d: Add test for PR d/108167 to the testsuite [PR108167]
[official-gcc.git] / gcc / tree-parloops.cc
blobdfb75c369d6d00d893ddd6fc28f189ec0d774711
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2023 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "fold-const.h"
34 #include "gimplify.h"
35 #include "gimple-iterator.h"
36 #include "gimplify-me.h"
37 #include "gimple-walk.h"
38 #include "stor-layout.h"
39 #include "tree-nested.h"
40 #include "tree-cfg.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "tree-ssa-loop.h"
45 #include "tree-into-ssa.h"
46 #include "cfgloop.h"
47 #include "tree-scalar-evolution.h"
48 #include "langhooks.h"
49 #include "tree-vectorizer.h"
50 #include "tree-hasher.h"
51 #include "tree-parloops.h"
52 #include "omp-general.h"
53 #include "omp-low.h"
54 #include "tree-ssa.h"
55 #include "tree-ssa-alias.h"
56 #include "tree-eh.h"
57 #include "gomp-constants.h"
58 #include "tree-dfa.h"
59 #include "stringpool.h"
60 #include "attribs.h"
62 /* This pass tries to distribute iterations of loops into several threads.
63 The implementation is straightforward -- for each loop we test whether its
64 iterations are independent, and if it is the case (and some additional
65 conditions regarding profitability and correctness are satisfied), we
66 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
67 machinery do its job.
69 The most of the complexity is in bringing the code into shape expected
70 by the omp expanders:
71 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
72 variable and that the exit test is at the start of the loop body
73 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
74 variables by accesses through pointers, and breaking up ssa chains
75 by storing the values incoming to the parallelized loop to a structure
76 passed to the new function as an argument (something similar is done
77 in omp gimplification, unfortunately only a small part of the code
78 can be shared).
80 TODO:
81 -- if there are several parallelizable loops in a function, it may be
82 possible to generate the threads just once (using synchronization to
83 ensure that cross-loop dependences are obeyed).
84 -- handling of common reduction patterns for outer loops.
86 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
88 Reduction handling:
89 currently we use code inspired by vect_force_simple_reduction to detect
90 reduction patterns.
91 The code transformation will be introduced by an example.
94 parloop
96 int sum=1;
98 for (i = 0; i < N; i++)
100 x[i] = i + 3;
101 sum+=x[i];
105 gimple-like code:
106 header_bb:
108 # sum_29 = PHI <sum_11(5), 1(3)>
109 # i_28 = PHI <i_12(5), 0(3)>
110 D.1795_8 = i_28 + 3;
111 x[i_28] = D.1795_8;
112 sum_11 = D.1795_8 + sum_29;
113 i_12 = i_28 + 1;
114 if (N_6(D) > i_12)
115 goto header_bb;
118 exit_bb:
120 # sum_21 = PHI <sum_11(4)>
121 printf (&"%d"[0], sum_21);
124 after reduction transformation (only relevant parts):
126 parloop
129 ....
132 # Storing the initial value given by the user. #
134 .paral_data_store.32.sum.27 = 1;
136 #pragma omp parallel num_threads(4)
138 #pragma omp for schedule(static)
140 # The neutral element corresponding to the particular
141 reduction's operation, e.g. 0 for PLUS_EXPR,
142 1 for MULT_EXPR, etc. replaces the user's initial value. #
144 # sum.27_29 = PHI <sum.27_11, 0>
146 sum.27_11 = D.1827_8 + sum.27_29;
148 GIMPLE_OMP_CONTINUE
150 # Adding this reduction phi is done at create_phi_for_local_result() #
151 # sum.27_56 = PHI <sum.27_11, 0>
152 GIMPLE_OMP_RETURN
154 # Creating the atomic operation is done at
155 create_call_for_reduction_1() #
157 #pragma omp atomic_load
158 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
159 D.1840_60 = sum.27_56 + D.1839_59;
160 #pragma omp atomic_store (D.1840_60);
162 GIMPLE_OMP_RETURN
164 # collecting the result after the join of the threads is done at
165 create_loads_for_reductions().
166 The value computed by the threads is loaded from the
167 shared struct. #
170 .paral_data_load.33_52 = &.paral_data_store.32;
171 sum_37 = .paral_data_load.33_52->sum.27;
172 sum_43 = D.1795_41 + sum_37;
174 exit bb:
175 # sum_21 = PHI <sum_43, sum_26>
176 printf (&"%d"[0], sum_21);
184 /* Error reporting helper for parloops_is_simple_reduction below. GIMPLE
185 statement STMT is printed with a message MSG. */
187 static void
188 report_ploop_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
190 dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
193 /* DEF_STMT_INFO occurs in a loop that contains a potential reduction
194 operation. Return true if the results of DEF_STMT_INFO are something
195 that can be accumulated by such a reduction. */
197 static bool
198 parloops_valid_reduction_input_p (stmt_vec_info def_stmt_info)
200 return (is_gimple_assign (def_stmt_info->stmt)
201 || is_gimple_call (def_stmt_info->stmt)
202 || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_induction_def
203 || (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
204 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def
205 && !is_loop_header_bb_p (gimple_bb (def_stmt_info->stmt))));
208 /* Detect SLP reduction of the form:
210 #a1 = phi <a5, a0>
211 a2 = operation (a1)
212 a3 = operation (a2)
213 a4 = operation (a3)
214 a5 = operation (a4)
216 #a = phi <a5>
218 PHI is the reduction phi node (#a1 = phi <a5, a0> above)
219 FIRST_STMT is the first reduction stmt in the chain
220 (a2 = operation (a1)).
222 Return TRUE if a reduction chain was detected. */
224 static bool
225 parloops_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
226 gimple *first_stmt)
228 class loop *loop = (gimple_bb (phi))->loop_father;
229 class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
230 enum tree_code code;
231 gimple *loop_use_stmt = NULL;
232 stmt_vec_info use_stmt_info;
233 tree lhs;
234 imm_use_iterator imm_iter;
235 use_operand_p use_p;
236 int nloop_uses, size = 0, n_out_of_loop_uses;
237 bool found = false;
239 if (loop != vect_loop)
240 return false;
242 auto_vec<stmt_vec_info, 8> reduc_chain;
243 lhs = PHI_RESULT (phi);
244 code = gimple_assign_rhs_code (first_stmt);
245 while (1)
247 nloop_uses = 0;
248 n_out_of_loop_uses = 0;
249 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
251 gimple *use_stmt = USE_STMT (use_p);
252 if (is_gimple_debug (use_stmt))
253 continue;
255 /* Check if we got back to the reduction phi. */
256 if (use_stmt == phi)
258 loop_use_stmt = use_stmt;
259 found = true;
260 break;
263 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
265 loop_use_stmt = use_stmt;
266 nloop_uses++;
268 else
269 n_out_of_loop_uses++;
271 /* There are can be either a single use in the loop or two uses in
272 phi nodes. */
273 if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
274 return false;
277 if (found)
278 break;
280 /* We reached a statement with no loop uses. */
281 if (nloop_uses == 0)
282 return false;
284 /* This is a loop exit phi, and we haven't reached the reduction phi. */
285 if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
286 return false;
288 if (!is_gimple_assign (loop_use_stmt)
289 || code != gimple_assign_rhs_code (loop_use_stmt)
290 || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
291 return false;
293 /* Insert USE_STMT into reduction chain. */
294 use_stmt_info = loop_info->lookup_stmt (loop_use_stmt);
295 reduc_chain.safe_push (use_stmt_info);
297 lhs = gimple_assign_lhs (loop_use_stmt);
298 size++;
301 if (!found || loop_use_stmt != phi || size < 2)
302 return false;
304 /* Swap the operands, if needed, to make the reduction operand be the second
305 operand. */
306 lhs = PHI_RESULT (phi);
307 for (unsigned i = 0; i < reduc_chain.length (); ++i)
309 gassign *next_stmt = as_a <gassign *> (reduc_chain[i]->stmt);
310 if (gimple_assign_rhs2 (next_stmt) == lhs)
312 tree op = gimple_assign_rhs1 (next_stmt);
313 stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
315 /* Check that the other def is either defined in the loop
316 ("vect_internal_def"), or it's an induction (defined by a
317 loop-header phi-node). */
318 if (def_stmt_info
319 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
320 && parloops_valid_reduction_input_p (def_stmt_info))
322 lhs = gimple_assign_lhs (next_stmt);
323 continue;
326 return false;
328 else
330 tree op = gimple_assign_rhs2 (next_stmt);
331 stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
333 /* Check that the other def is either defined in the loop
334 ("vect_internal_def"), or it's an induction (defined by a
335 loop-header phi-node). */
336 if (def_stmt_info
337 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
338 && parloops_valid_reduction_input_p (def_stmt_info))
340 if (dump_enabled_p ())
341 dump_printf_loc (MSG_NOTE, vect_location,
342 "swapping oprnds: %G", (gimple *) next_stmt);
344 swap_ssa_operands (next_stmt,
345 gimple_assign_rhs1_ptr (next_stmt),
346 gimple_assign_rhs2_ptr (next_stmt));
347 update_stmt (next_stmt);
349 else
350 return false;
353 lhs = gimple_assign_lhs (next_stmt);
356 /* Build up the actual chain. */
357 for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
359 REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
360 REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
362 REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
363 REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
365 /* Save the chain for further analysis in SLP detection. */
366 LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
367 REDUC_GROUP_SIZE (reduc_chain[0]) = size;
369 return true;
372 /* Return true if we need an in-order reduction for operation CODE
373 on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
374 overflow must wrap. */
376 static bool
377 parloops_needs_fold_left_reduction_p (tree type, tree_code code,
378 bool need_wrapping_integral_overflow)
380 /* CHECKME: check for !flag_finite_math_only too? */
381 if (SCALAR_FLOAT_TYPE_P (type))
382 switch (code)
384 case MIN_EXPR:
385 case MAX_EXPR:
386 return false;
388 default:
389 return !flag_associative_math;
392 if (INTEGRAL_TYPE_P (type))
394 if (!operation_no_trapping_overflow (type, code))
395 return true;
396 if (need_wrapping_integral_overflow
397 && !TYPE_OVERFLOW_WRAPS (type)
398 && operation_can_overflow (code))
399 return true;
400 return false;
403 if (SAT_FIXED_POINT_TYPE_P (type))
404 return true;
406 return false;
410 /* Function parloops_is_simple_reduction
412 (1) Detect a cross-iteration def-use cycle that represents a simple
413 reduction computation. We look for the following pattern:
415 loop_header:
416 a1 = phi < a0, a2 >
417 a3 = ...
418 a2 = operation (a3, a1)
422 a3 = ...
423 loop_header:
424 a1 = phi < a0, a2 >
425 a2 = operation (a3, a1)
427 such that:
428 1. operation is commutative and associative and it is safe to
429 change the order of the computation
430 2. no uses for a2 in the loop (a2 is used out of the loop)
431 3. no uses of a1 in the loop besides the reduction operation
432 4. no uses of a1 outside the loop.
434 Conditions 1,4 are tested here.
435 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
437 (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
438 nested cycles.
440 (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
441 reductions:
443 a1 = phi < a0, a2 >
444 inner loop (def of a3)
445 a2 = phi < a3 >
447 (4) Detect condition expressions, ie:
448 for (int i = 0; i < N; i++)
449 if (a[i] < val)
450 ret_val = a[i];
454 static stmt_vec_info
455 parloops_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
456 bool *double_reduc,
457 bool need_wrapping_integral_overflow,
458 enum vect_reduction_type *v_reduc_type)
460 gphi *phi = as_a <gphi *> (phi_info->stmt);
461 class loop *loop = (gimple_bb (phi))->loop_father;
462 class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
463 bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
464 gimple *phi_use_stmt = NULL;
465 enum tree_code orig_code, code;
466 tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
467 tree type;
468 tree name;
469 imm_use_iterator imm_iter;
470 use_operand_p use_p;
471 bool phi_def;
473 *double_reduc = false;
474 *v_reduc_type = TREE_CODE_REDUCTION;
476 tree phi_name = PHI_RESULT (phi);
477 /* ??? If there are no uses of the PHI result the inner loop reduction
478 won't be detected as possibly double-reduction by vectorizable_reduction
479 because that tries to walk the PHI arg from the preheader edge which
480 can be constant. See PR60382. */
481 if (has_zero_uses (phi_name))
482 return NULL;
483 unsigned nphi_def_loop_uses = 0;
484 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
486 gimple *use_stmt = USE_STMT (use_p);
487 if (is_gimple_debug (use_stmt))
488 continue;
490 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
494 "intermediate value used outside loop.\n");
496 return NULL;
499 nphi_def_loop_uses++;
500 phi_use_stmt = use_stmt;
503 edge latch_e = loop_latch_edge (loop);
504 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
505 if (TREE_CODE (loop_arg) != SSA_NAME)
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
509 "reduction: not ssa_name: %T\n", loop_arg);
510 return NULL;
513 stmt_vec_info def_stmt_info = loop_info->lookup_def (loop_arg);
514 if (!def_stmt_info
515 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
516 return NULL;
518 if (gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt))
520 name = gimple_assign_lhs (def_stmt);
521 phi_def = false;
523 else if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
525 name = PHI_RESULT (def_stmt);
526 phi_def = true;
528 else
530 if (dump_enabled_p ())
531 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
532 "reduction: unhandled reduction operation: %G",
533 def_stmt_info->stmt);
534 return NULL;
537 unsigned nlatch_def_loop_uses = 0;
538 auto_vec<gphi *, 3> lcphis;
539 bool inner_loop_of_double_reduc = false;
540 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
542 gimple *use_stmt = USE_STMT (use_p);
543 if (is_gimple_debug (use_stmt))
544 continue;
545 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
546 nlatch_def_loop_uses++;
547 else
549 /* We can have more than one loop-closed PHI. */
550 lcphis.safe_push (as_a <gphi *> (use_stmt));
551 if (nested_in_vect_loop
552 && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))
553 == vect_double_reduction_def))
554 inner_loop_of_double_reduc = true;
558 /* If this isn't a nested cycle or if the nested cycle reduction value
559 is used ouside of the inner loop we cannot handle uses of the reduction
560 value. */
561 if ((!nested_in_vect_loop || inner_loop_of_double_reduc)
562 && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1))
564 if (dump_enabled_p ())
565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
566 "reduction used in loop.\n");
567 return NULL;
570 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
571 defined in the inner loop. */
572 if (phi_def)
574 gphi *def_stmt = as_a <gphi *> (def_stmt_info->stmt);
575 op1 = PHI_ARG_DEF (def_stmt, 0);
577 if (gimple_phi_num_args (def_stmt) != 1
578 || TREE_CODE (op1) != SSA_NAME)
580 if (dump_enabled_p ())
581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
582 "unsupported phi node definition.\n");
584 return NULL;
587 gimple *def1 = SSA_NAME_DEF_STMT (op1);
588 if (gimple_bb (def1)
589 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
590 && loop->inner
591 && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
592 && is_gimple_assign (def1)
593 && is_a <gphi *> (phi_use_stmt)
594 && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
596 if (dump_enabled_p ())
597 report_ploop_op (MSG_NOTE, def_stmt,
598 "detected double reduction: ");
600 *double_reduc = true;
601 return def_stmt_info;
604 return NULL;
607 /* If we are vectorizing an inner reduction we are executing that
608 in the original order only in case we are not dealing with a
609 double reduction. */
610 bool check_reduction = true;
611 if (flow_loop_nested_p (vect_loop, loop))
613 gphi *lcphi;
614 unsigned i;
615 check_reduction = false;
616 FOR_EACH_VEC_ELT (lcphis, i, lcphi)
617 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
619 gimple *use_stmt = USE_STMT (use_p);
620 if (is_gimple_debug (use_stmt))
621 continue;
622 if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
623 check_reduction = true;
627 gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt);
628 code = orig_code = gimple_assign_rhs_code (def_stmt);
630 if (nested_in_vect_loop && !check_reduction)
632 /* FIXME: Even for non-reductions code generation is funneled
633 through vectorizable_reduction for the stmt defining the
634 PHI latch value. So we have to artificially restrict ourselves
635 for the supported operations. */
636 switch (get_gimple_rhs_class (code))
638 case GIMPLE_BINARY_RHS:
639 case GIMPLE_TERNARY_RHS:
640 break;
641 default:
642 /* Not supported by vectorizable_reduction. */
643 if (dump_enabled_p ())
644 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
645 "nested cycle: not handled operation: ");
646 return NULL;
648 if (dump_enabled_p ())
649 report_ploop_op (MSG_NOTE, def_stmt, "detected nested cycle: ");
650 return def_stmt_info;
653 /* We can handle "res -= x[i]", which is non-associative by
654 simply rewriting this into "res += -x[i]". Avoid changing
655 gimple instruction for the first simple tests and only do this
656 if we're allowed to change code at all. */
657 if (code == MINUS_EXPR && gimple_assign_rhs2 (def_stmt) != phi_name)
658 code = PLUS_EXPR;
660 if (code == COND_EXPR)
662 if (! nested_in_vect_loop)
663 *v_reduc_type = COND_REDUCTION;
665 op3 = gimple_assign_rhs1 (def_stmt);
666 if (COMPARISON_CLASS_P (op3))
668 op4 = TREE_OPERAND (op3, 1);
669 op3 = TREE_OPERAND (op3, 0);
671 if (op3 == phi_name || op4 == phi_name)
673 if (dump_enabled_p ())
674 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
675 "reduction: condition depends on previous"
676 " iteration: ");
677 return NULL;
680 op1 = gimple_assign_rhs2 (def_stmt);
681 op2 = gimple_assign_rhs3 (def_stmt);
683 else if (!commutative_tree_code (code) || !associative_tree_code (code))
685 if (dump_enabled_p ())
686 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
687 "reduction: not commutative/associative: ");
688 return NULL;
690 else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
692 op1 = gimple_assign_rhs1 (def_stmt);
693 op2 = gimple_assign_rhs2 (def_stmt);
695 else
697 if (dump_enabled_p ())
698 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
699 "reduction: not handled operation: ");
700 return NULL;
703 if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
705 if (dump_enabled_p ())
706 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
707 "reduction: both uses not ssa_names: ");
709 return NULL;
712 type = TREE_TYPE (gimple_assign_lhs (def_stmt));
713 if ((TREE_CODE (op1) == SSA_NAME
714 && !types_compatible_p (type,TREE_TYPE (op1)))
715 || (TREE_CODE (op2) == SSA_NAME
716 && !types_compatible_p (type, TREE_TYPE (op2)))
717 || (op3 && TREE_CODE (op3) == SSA_NAME
718 && !types_compatible_p (type, TREE_TYPE (op3)))
719 || (op4 && TREE_CODE (op4) == SSA_NAME
720 && !types_compatible_p (type, TREE_TYPE (op4))))
722 if (dump_enabled_p ())
724 dump_printf_loc (MSG_NOTE, vect_location,
725 "reduction: multiple types: operation type: "
726 "%T, operands types: %T,%T",
727 type, TREE_TYPE (op1), TREE_TYPE (op2));
728 if (op3)
729 dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op3));
731 if (op4)
732 dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op4));
733 dump_printf (MSG_NOTE, "\n");
736 return NULL;
739 /* Check whether it's ok to change the order of the computation.
740 Generally, when vectorizing a reduction we change the order of the
741 computation. This may change the behavior of the program in some
742 cases, so we need to check that this is ok. One exception is when
743 vectorizing an outer-loop: the inner-loop is executed sequentially,
744 and therefore vectorizing reductions in the inner-loop during
745 outer-loop vectorization is safe. */
746 if (check_reduction
747 && *v_reduc_type == TREE_CODE_REDUCTION
748 && parloops_needs_fold_left_reduction_p (type, code,
749 need_wrapping_integral_overflow))
750 *v_reduc_type = FOLD_LEFT_REDUCTION;
752 /* Reduction is safe. We're dealing with one of the following:
753 1) integer arithmetic and no trapv
754 2) floating point arithmetic, and special flags permit this optimization
755 3) nested cycle (i.e., outer loop vectorization). */
756 stmt_vec_info def1_info = loop_info->lookup_def (op1);
757 stmt_vec_info def2_info = loop_info->lookup_def (op2);
758 if (code != COND_EXPR && !def1_info && !def2_info)
760 if (dump_enabled_p ())
761 report_ploop_op (MSG_NOTE, def_stmt,
762 "reduction: no defs for operands: ");
763 return NULL;
766 /* Check that one def is the reduction def, defined by PHI,
767 the other def is either defined in the loop ("vect_internal_def"),
768 or it's an induction (defined by a loop-header phi-node). */
770 if (def2_info
771 && def2_info->stmt == phi
772 && (code == COND_EXPR
773 || !def1_info
774 || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt))
775 || parloops_valid_reduction_input_p (def1_info)))
777 if (dump_enabled_p ())
778 report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
779 return def_stmt_info;
782 if (def1_info
783 && def1_info->stmt == phi
784 && (code == COND_EXPR
785 || !def2_info
786 || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
787 || parloops_valid_reduction_input_p (def2_info)))
789 if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
791 /* Check if we can swap operands (just for simplicity - so that
792 the rest of the code can assume that the reduction variable
793 is always the last (second) argument). */
794 if (code == COND_EXPR)
796 /* Swap cond_expr by inverting the condition. */
797 tree cond_expr = gimple_assign_rhs1 (def_stmt);
798 enum tree_code invert_code = ERROR_MARK;
799 enum tree_code cond_code = TREE_CODE (cond_expr);
801 if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
803 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
804 invert_code = invert_tree_comparison (cond_code, honor_nans);
806 if (invert_code != ERROR_MARK)
808 TREE_SET_CODE (cond_expr, invert_code);
809 swap_ssa_operands (def_stmt,
810 gimple_assign_rhs2_ptr (def_stmt),
811 gimple_assign_rhs3_ptr (def_stmt));
813 else
815 if (dump_enabled_p ())
816 report_ploop_op (MSG_NOTE, def_stmt,
817 "detected reduction: cannot swap operands "
818 "for cond_expr");
819 return NULL;
822 else
823 swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
824 gimple_assign_rhs2_ptr (def_stmt));
826 if (dump_enabled_p ())
827 report_ploop_op (MSG_NOTE, def_stmt,
828 "detected reduction: need to swap operands: ");
830 else
832 if (dump_enabled_p ())
833 report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
836 return def_stmt_info;
839 /* Try to find SLP reduction chain. */
840 if (! nested_in_vect_loop
841 && code != COND_EXPR
842 && orig_code != MINUS_EXPR
843 && parloops_is_slp_reduction (loop_info, phi, def_stmt))
845 if (dump_enabled_p ())
846 report_ploop_op (MSG_NOTE, def_stmt,
847 "reduction: detected reduction chain: ");
849 return def_stmt_info;
852 /* Look for the expression computing loop_arg from loop PHI result. */
853 if (check_reduction_path (vect_location, loop, phi, loop_arg, code))
854 return def_stmt_info;
856 if (dump_enabled_p ())
858 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
859 "reduction: unknown pattern: ");
862 return NULL;
865 /* Wrapper around vect_is_simple_reduction, which will modify code
866 in-place if it enables detection of more reductions. Arguments
867 as there. */
869 stmt_vec_info
870 parloops_force_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
871 bool *double_reduc,
872 bool need_wrapping_integral_overflow)
874 enum vect_reduction_type v_reduc_type;
875 stmt_vec_info def_info
876 = parloops_is_simple_reduction (loop_info, phi_info, double_reduc,
877 need_wrapping_integral_overflow,
878 &v_reduc_type);
879 if (def_info)
881 STMT_VINFO_REDUC_TYPE (phi_info) = v_reduc_type;
882 STMT_VINFO_REDUC_DEF (phi_info) = def_info;
883 STMT_VINFO_REDUC_TYPE (def_info) = v_reduc_type;
884 STMT_VINFO_REDUC_DEF (def_info) = phi_info;
886 return def_info;
889 /* Minimal number of iterations of a loop that should be executed in each
890 thread. */
891 #define MIN_PER_THREAD param_parloops_min_per_thread
893 /* Element of the hashtable, representing a
894 reduction in the current loop. */
895 struct reduction_info
897 gimple *reduc_stmt; /* reduction statement. */
898 gimple *reduc_phi; /* The phi node defining the reduction. */
899 enum tree_code reduction_code;/* code for the reduction operation. */
900 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
901 result. */
902 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
903 of the reduction variable when existing the loop. */
904 tree initial_value; /* The initial value of the reduction var before entering the loop. */
905 tree field; /* the name of the field in the parloop data structure intended for reduction. */
906 tree reduc_addr; /* The address of the reduction variable for
907 openacc reductions. */
908 tree init; /* reduction initialization value. */
909 gphi *new_phi; /* (helper field) Newly created phi node whose result
910 will be passed to the atomic operation. Represents
911 the local result each thread computed for the reduction
912 operation. */
915 /* Reduction info hashtable helpers. */
917 struct reduction_hasher : free_ptr_hash <reduction_info>
919 static inline hashval_t hash (const reduction_info *);
920 static inline bool equal (const reduction_info *, const reduction_info *);
923 /* Equality and hash functions for hashtab code. */
925 inline bool
926 reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
928 return (a->reduc_phi == b->reduc_phi);
931 inline hashval_t
932 reduction_hasher::hash (const reduction_info *a)
934 return a->reduc_version;
937 typedef hash_table<reduction_hasher> reduction_info_table_type;
940 static struct reduction_info *
941 reduction_phi (reduction_info_table_type *reduction_list, gimple *phi)
943 struct reduction_info tmpred, *red;
945 if (reduction_list->is_empty () || phi == NULL)
946 return NULL;
948 if (gimple_uid (phi) == (unsigned int)-1
949 || gimple_uid (phi) == 0)
950 return NULL;
952 tmpred.reduc_phi = phi;
953 tmpred.reduc_version = gimple_uid (phi);
954 red = reduction_list->find (&tmpred);
955 gcc_assert (red == NULL || red->reduc_phi == phi);
957 return red;
960 /* Element of hashtable of names to copy. */
962 struct name_to_copy_elt
964 unsigned version; /* The version of the name to copy. */
965 tree new_name; /* The new name used in the copy. */
966 tree field; /* The field of the structure used to pass the
967 value. */
970 /* Name copies hashtable helpers. */
972 struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
974 static inline hashval_t hash (const name_to_copy_elt *);
975 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
978 /* Equality and hash functions for hashtab code. */
980 inline bool
981 name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
983 return a->version == b->version;
986 inline hashval_t
987 name_to_copy_hasher::hash (const name_to_copy_elt *a)
989 return (hashval_t) a->version;
992 typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
994 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
995 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
996 represents the denominator for every element in the matrix. */
997 typedef struct lambda_trans_matrix_s
999 lambda_matrix matrix;
1000 int rowsize;
1001 int colsize;
1002 int denominator;
1003 } *lambda_trans_matrix;
1004 #define LTM_MATRIX(T) ((T)->matrix)
1005 #define LTM_ROWSIZE(T) ((T)->rowsize)
1006 #define LTM_COLSIZE(T) ((T)->colsize)
1007 #define LTM_DENOMINATOR(T) ((T)->denominator)
1009 /* Allocate a new transformation matrix. */
1011 static lambda_trans_matrix
1012 lambda_trans_matrix_new (int colsize, int rowsize,
1013 struct obstack * lambda_obstack)
1015 lambda_trans_matrix ret;
1017 ret = (lambda_trans_matrix)
1018 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
1019 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
1020 LTM_ROWSIZE (ret) = rowsize;
1021 LTM_COLSIZE (ret) = colsize;
1022 LTM_DENOMINATOR (ret) = 1;
1023 return ret;
1026 /* Multiply a vector VEC by a matrix MAT.
1027 MAT is an M*N matrix, and VEC is a vector with length N. The result
1028 is stored in DEST which must be a vector of length M. */
1030 static void
1031 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
1032 lambda_vector vec, lambda_vector dest)
1034 int i, j;
1036 lambda_vector_clear (dest, m);
1037 for (i = 0; i < m; i++)
1038 for (j = 0; j < n; j++)
1039 dest[i] += matrix[i][j] * vec[j];
1042 /* Return true if TRANS is a legal transformation matrix that respects
1043 the dependence vectors in DISTS and DIRS. The conservative answer
1044 is false.
1046 "Wolfe proves that a unimodular transformation represented by the
1047 matrix T is legal when applied to a loop nest with a set of
1048 lexicographically non-negative distance vectors RDG if and only if
1049 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
1050 i.e.: if and only if it transforms the lexicographically positive
1051 distance vectors to lexicographically positive vectors. Note that
1052 a unimodular matrix must transform the zero vector (and only it) to
1053 the zero vector." S.Muchnick. */
1055 static bool
1056 lambda_transform_legal_p (lambda_trans_matrix trans,
1057 int nb_loops,
1058 vec<ddr_p> dependence_relations)
1060 unsigned int i, j;
1061 lambda_vector distres;
1062 struct data_dependence_relation *ddr;
1064 gcc_assert (LTM_COLSIZE (trans) == nb_loops
1065 && LTM_ROWSIZE (trans) == nb_loops);
1067 /* When there are no dependences, the transformation is correct. */
1068 if (dependence_relations.length () == 0)
1069 return true;
1071 ddr = dependence_relations[0];
1072 if (ddr == NULL)
1073 return true;
1075 /* When there is an unknown relation in the dependence_relations, we
1076 know that it is no worth looking at this loop nest: give up. */
1077 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1078 return false;
1080 distres = lambda_vector_new (nb_loops);
1082 /* For each distance vector in the dependence graph. */
1083 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
1085 /* Don't care about relations for which we know that there is no
1086 dependence, nor about read-read (aka. output-dependences):
1087 these data accesses can happen in any order. */
1088 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
1089 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
1090 continue;
1092 /* Conservatively answer: "this transformation is not valid". */
1093 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1094 return false;
1096 /* If the dependence could not be captured by a distance vector,
1097 conservatively answer that the transform is not valid. */
1098 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1099 return false;
1101 /* Compute trans.dist_vect */
1102 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
1104 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
1105 DDR_DIST_VECT (ddr, j), distres);
1107 if (!lambda_vector_lexico_pos (distres, nb_loops))
1108 return false;
1111 return true;
1114 /* Data dependency analysis. Returns true if the iterations of LOOP
1115 are independent on each other (that is, if we can execute them
1116 in parallel). */
1118 static bool
1119 loop_parallel_p (class loop *loop, struct obstack * parloop_obstack)
1121 vec<ddr_p> dependence_relations;
1122 vec<data_reference_p> datarefs;
1123 lambda_trans_matrix trans;
1124 bool ret = false;
1126 if (dump_file && (dump_flags & TDF_DETAILS))
1128 fprintf (dump_file, "Considering loop %d\n", loop->num);
1129 if (!loop->inner)
1130 fprintf (dump_file, "loop is innermost\n");
1131 else
1132 fprintf (dump_file, "loop NOT innermost\n");
1135 /* Check for problems with dependences. If the loop can be reversed,
1136 the iterations are independent. */
1137 auto_vec<loop_p, 3> loop_nest;
1138 datarefs.create (10);
1139 dependence_relations.create (100);
1140 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
1141 &dependence_relations))
1143 if (dump_file && (dump_flags & TDF_DETAILS))
1144 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
1145 ret = false;
1146 goto end;
1148 if (dump_file && (dump_flags & TDF_DETAILS))
1149 dump_data_dependence_relations (dump_file, dependence_relations);
1151 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
1152 LTM_MATRIX (trans)[0][0] = -1;
1154 if (lambda_transform_legal_p (trans, 1, dependence_relations))
1156 ret = true;
1157 if (dump_file && (dump_flags & TDF_DETAILS))
1158 fprintf (dump_file, " SUCCESS: may be parallelized\n");
1160 else if (dump_file && (dump_flags & TDF_DETAILS))
1161 fprintf (dump_file,
1162 " FAILED: data dependencies exist across iterations\n");
1164 end:
1165 free_dependence_relations (dependence_relations);
1166 free_data_refs (datarefs);
1168 return ret;
1171 /* Return true when LOOP contains basic blocks marked with the
1172 BB_IRREDUCIBLE_LOOP flag. */
1174 static inline bool
1175 loop_has_blocks_with_irreducible_flag (class loop *loop)
1177 unsigned i;
1178 basic_block *bbs = get_loop_body_in_dom_order (loop);
1179 bool res = true;
1181 for (i = 0; i < loop->num_nodes; i++)
1182 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
1183 goto end;
1185 res = false;
1186 end:
1187 free (bbs);
1188 return res;
1191 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
1192 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
1193 to their addresses that can be reused. The address of OBJ is known to
1194 be invariant in the whole function. Other needed statements are placed
1195 right before GSI. */
1197 static tree
1198 take_address_of (tree obj, tree type, edge entry,
1199 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
1201 int uid;
1202 tree *var_p, name, addr;
1203 gassign *stmt;
1204 gimple_seq stmts;
1206 /* Since the address of OBJ is invariant, the trees may be shared.
1207 Avoid rewriting unrelated parts of the code. */
1208 obj = unshare_expr (obj);
1209 for (var_p = &obj;
1210 handled_component_p (*var_p);
1211 var_p = &TREE_OPERAND (*var_p, 0))
1212 continue;
1214 /* Canonicalize the access to base on a MEM_REF. */
1215 if (DECL_P (*var_p))
1216 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
1218 /* Assign a canonical SSA name to the address of the base decl used
1219 in the address and share it for all accesses and addresses based
1220 on it. */
1221 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
1222 int_tree_map elt;
1223 elt.uid = uid;
1224 int_tree_map *slot = decl_address->find_slot (elt,
1225 gsi == NULL
1226 ? NO_INSERT
1227 : INSERT);
1228 if (!slot || !slot->to)
1230 if (gsi == NULL)
1231 return NULL;
1232 addr = TREE_OPERAND (*var_p, 0);
1233 const char *obj_name
1234 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
1235 if (obj_name)
1236 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
1237 else
1238 name = make_ssa_name (TREE_TYPE (addr));
1239 stmt = gimple_build_assign (name, addr);
1240 gsi_insert_on_edge_immediate (entry, stmt);
1242 slot->uid = uid;
1243 slot->to = name;
1245 else
1246 name = slot->to;
1248 /* Express the address in terms of the canonical SSA name. */
1249 TREE_OPERAND (*var_p, 0) = name;
1250 if (gsi == NULL)
1251 return build_fold_addr_expr_with_type (obj, type);
1253 name = force_gimple_operand (build_addr (obj),
1254 &stmts, true, NULL_TREE);
1255 if (!gimple_seq_empty_p (stmts))
1256 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1258 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
1260 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
1261 NULL_TREE);
1262 if (!gimple_seq_empty_p (stmts))
1263 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1266 return name;
1269 static tree
1270 reduc_stmt_res (gimple *stmt)
1272 return (gimple_code (stmt) == GIMPLE_PHI
1273 ? gimple_phi_result (stmt)
1274 : gimple_assign_lhs (stmt));
1277 /* Callback for htab_traverse. Create the initialization statement
1278 for reduction described in SLOT, and place it at the preheader of
1279 the loop described in DATA. */
1282 initialize_reductions (reduction_info **slot, class loop *loop)
1284 tree init;
1285 tree type, arg;
1286 edge e;
1288 struct reduction_info *const reduc = *slot;
1290 /* Create initialization in preheader:
1291 reduction_variable = initialization value of reduction. */
1293 /* In the phi node at the header, replace the argument coming
1294 from the preheader with the reduction initialization value. */
1296 /* Initialize the reduction. */
1297 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1298 init = omp_reduction_init_op (gimple_location (reduc->reduc_stmt),
1299 reduc->reduction_code, type);
1300 reduc->init = init;
1302 /* Replace the argument representing the initialization value
1303 with the initialization value for the reduction (neutral
1304 element for the particular operation, e.g. 0 for PLUS_EXPR,
1305 1 for MULT_EXPR, etc).
1306 Keep the old value in a new variable "reduction_initial",
1307 that will be taken in consideration after the parallel
1308 computing is done. */
1310 e = loop_preheader_edge (loop);
1311 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
1312 /* Create new variable to hold the initial value. */
1314 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
1315 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
1316 reduc->initial_value = arg;
1317 return 1;
1320 struct elv_data
1322 struct walk_stmt_info info;
1323 edge entry;
1324 int_tree_htab_type *decl_address;
1325 gimple_stmt_iterator *gsi;
1326 bool changed;
1327 bool reset;
1330 /* Eliminates references to local variables in *TP out of the single
1331 entry single exit region starting at DTA->ENTRY.
1332 DECL_ADDRESS contains addresses of the references that had their
1333 address taken already. If the expression is changed, CHANGED is
1334 set to true. Callback for walk_tree. */
1336 static tree
1337 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
1339 struct elv_data *const dta = (struct elv_data *) data;
1340 tree t = *tp, var, addr, addr_type, type, obj;
1342 if (DECL_P (t))
1344 *walk_subtrees = 0;
1346 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
1347 return NULL_TREE;
1349 type = TREE_TYPE (t);
1350 addr_type = build_pointer_type (type);
1351 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
1352 dta->gsi);
1353 if (dta->gsi == NULL && addr == NULL_TREE)
1355 dta->reset = true;
1356 return NULL_TREE;
1359 *tp = build_simple_mem_ref (addr);
1361 dta->changed = true;
1362 return NULL_TREE;
1365 if (TREE_CODE (t) == ADDR_EXPR)
1367 /* ADDR_EXPR may appear in two contexts:
1368 -- as a gimple operand, when the address taken is a function invariant
1369 -- as gimple rhs, when the resulting address in not a function
1370 invariant
1371 We do not need to do anything special in the latter case (the base of
1372 the memory reference whose address is taken may be replaced in the
1373 DECL_P case). The former case is more complicated, as we need to
1374 ensure that the new address is still a gimple operand. Thus, it
1375 is not sufficient to replace just the base of the memory reference --
1376 we need to move the whole computation of the address out of the
1377 loop. */
1378 if (!is_gimple_val (t))
1379 return NULL_TREE;
1381 *walk_subtrees = 0;
1382 obj = TREE_OPERAND (t, 0);
1383 var = get_base_address (obj);
1384 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
1385 return NULL_TREE;
1387 addr_type = TREE_TYPE (t);
1388 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
1389 dta->gsi);
1390 if (dta->gsi == NULL && addr == NULL_TREE)
1392 dta->reset = true;
1393 return NULL_TREE;
1395 *tp = addr;
1397 dta->changed = true;
1398 return NULL_TREE;
1401 if (!EXPR_P (t))
1402 *walk_subtrees = 0;
1404 return NULL_TREE;
1407 /* Moves the references to local variables in STMT at *GSI out of the single
1408 entry single exit region starting at ENTRY. DECL_ADDRESS contains
1409 addresses of the references that had their address taken
1410 already. */
1412 static void
1413 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
1414 int_tree_htab_type *decl_address)
1416 struct elv_data dta;
1417 gimple *stmt = gsi_stmt (*gsi);
1419 memset (&dta.info, '\0', sizeof (dta.info));
1420 dta.entry = entry;
1421 dta.decl_address = decl_address;
1422 dta.changed = false;
1423 dta.reset = false;
1425 if (gimple_debug_bind_p (stmt))
1427 dta.gsi = NULL;
1428 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
1429 eliminate_local_variables_1, &dta.info, NULL);
1430 if (dta.reset)
1432 gimple_debug_bind_reset_value (stmt);
1433 dta.changed = true;
1436 else if (gimple_clobber_p (stmt))
1438 unlink_stmt_vdef (stmt);
1439 stmt = gimple_build_nop ();
1440 gsi_replace (gsi, stmt, false);
1441 dta.changed = true;
1443 else
1445 dta.gsi = gsi;
1446 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
1449 if (dta.changed)
1450 update_stmt (stmt);
1453 /* Eliminates the references to local variables from the single entry
1454 single exit region between the ENTRY and EXIT edges.
1456 This includes:
1457 1) Taking address of a local variable -- these are moved out of the
1458 region (and temporary variable is created to hold the address if
1459 necessary).
1461 2) Dereferencing a local variable -- these are replaced with indirect
1462 references. */
1464 static void
1465 eliminate_local_variables (edge entry, edge exit)
1467 basic_block bb;
1468 auto_vec<basic_block, 3> body;
1469 unsigned i;
1470 gimple_stmt_iterator gsi;
1471 bool has_debug_stmt = false;
1472 int_tree_htab_type decl_address (10);
1473 basic_block entry_bb = entry->src;
1474 basic_block exit_bb = exit->dest;
1476 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1478 FOR_EACH_VEC_ELT (body, i, bb)
1479 if (bb != entry_bb && bb != exit_bb)
1481 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1482 if (is_gimple_debug (gsi_stmt (gsi)))
1484 if (gimple_debug_bind_p (gsi_stmt (gsi)))
1485 has_debug_stmt = true;
1487 else
1488 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
1491 if (has_debug_stmt)
1492 FOR_EACH_VEC_ELT (body, i, bb)
1493 if (bb != entry_bb && bb != exit_bb)
1494 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1495 if (gimple_debug_bind_p (gsi_stmt (gsi)))
1496 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
1499 /* Returns true if expression EXPR is not defined between ENTRY and
1500 EXIT, i.e. if all its operands are defined outside of the region. */
1502 static bool
1503 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
1505 basic_block entry_bb = entry->src;
1506 basic_block exit_bb = exit->dest;
1507 basic_block def_bb;
1509 if (is_gimple_min_invariant (expr))
1510 return true;
1512 if (TREE_CODE (expr) == SSA_NAME)
1514 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1515 if (def_bb
1516 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
1517 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
1518 return false;
1520 return true;
1523 return false;
1526 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
1527 The copies are stored to NAME_COPIES, if NAME was already duplicated,
1528 its duplicate stored in NAME_COPIES is returned.
1530 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
1531 duplicated, storing the copies in DECL_COPIES. */
1533 static tree
1534 separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
1535 int_tree_htab_type *decl_copies,
1536 bool copy_name_p)
1538 tree copy, var, var_copy;
1539 unsigned idx, uid, nuid;
1540 struct int_tree_map ielt;
1541 struct name_to_copy_elt elt, *nelt;
1542 name_to_copy_elt **slot;
1543 int_tree_map *dslot;
1545 if (TREE_CODE (name) != SSA_NAME)
1546 return name;
1548 idx = SSA_NAME_VERSION (name);
1549 elt.version = idx;
1550 slot = name_copies->find_slot_with_hash (&elt, idx,
1551 copy_name_p ? INSERT : NO_INSERT);
1552 if (slot && *slot)
1553 return (*slot)->new_name;
1555 if (copy_name_p)
1557 copy = duplicate_ssa_name (name, NULL);
1558 nelt = XNEW (struct name_to_copy_elt);
1559 nelt->version = idx;
1560 nelt->new_name = copy;
1561 nelt->field = NULL_TREE;
1562 *slot = nelt;
1564 else
1566 gcc_assert (!slot);
1567 copy = name;
1570 var = SSA_NAME_VAR (name);
1571 if (!var)
1572 return copy;
1574 uid = DECL_UID (var);
1575 ielt.uid = uid;
1576 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
1577 if (!dslot->to)
1579 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
1580 DECL_NOT_GIMPLE_REG_P (var_copy) = DECL_NOT_GIMPLE_REG_P (var);
1581 dslot->uid = uid;
1582 dslot->to = var_copy;
1584 /* Ensure that when we meet this decl next time, we won't duplicate
1585 it again. */
1586 nuid = DECL_UID (var_copy);
1587 ielt.uid = nuid;
1588 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
1589 gcc_assert (!dslot->to);
1590 dslot->uid = nuid;
1591 dslot->to = var_copy;
1593 else
1594 var_copy = dslot->to;
1596 replace_ssa_name_symbol (copy, var_copy);
1597 return copy;
1600 /* Finds the ssa names used in STMT that are defined outside the
1601 region between ENTRY and EXIT and replaces such ssa names with
1602 their duplicates. The duplicates are stored to NAME_COPIES. Base
1603 decls of all ssa names used in STMT (including those defined in
1604 LOOP) are replaced with the new temporary variables; the
1605 replacement decls are stored in DECL_COPIES. */
1607 static void
1608 separate_decls_in_region_stmt (edge entry, edge exit, gimple *stmt,
1609 name_to_copy_table_type *name_copies,
1610 int_tree_htab_type *decl_copies)
1612 use_operand_p use;
1613 def_operand_p def;
1614 ssa_op_iter oi;
1615 tree name, copy;
1616 bool copy_name_p;
1618 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
1620 name = DEF_FROM_PTR (def);
1621 gcc_assert (TREE_CODE (name) == SSA_NAME);
1622 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
1623 false);
1624 gcc_assert (copy == name);
1627 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
1629 name = USE_FROM_PTR (use);
1630 if (TREE_CODE (name) != SSA_NAME)
1631 continue;
1633 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
1634 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
1635 copy_name_p);
1636 SET_USE (use, copy);
1640 /* Finds the ssa names used in STMT that are defined outside the
1641 region between ENTRY and EXIT and replaces such ssa names with
1642 their duplicates. The duplicates are stored to NAME_COPIES. Base
1643 decls of all ssa names used in STMT (including those defined in
1644 LOOP) are replaced with the new temporary variables; the
1645 replacement decls are stored in DECL_COPIES. */
1647 static bool
1648 separate_decls_in_region_debug (gimple *stmt,
1649 name_to_copy_table_type *name_copies,
1650 int_tree_htab_type *decl_copies)
1652 use_operand_p use;
1653 ssa_op_iter oi;
1654 tree var, name;
1655 struct int_tree_map ielt;
1656 struct name_to_copy_elt elt;
1657 name_to_copy_elt **slot;
1658 int_tree_map *dslot;
1660 if (gimple_debug_bind_p (stmt))
1661 var = gimple_debug_bind_get_var (stmt);
1662 else if (gimple_debug_source_bind_p (stmt))
1663 var = gimple_debug_source_bind_get_var (stmt);
1664 else
1665 return true;
1666 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
1667 return true;
1668 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
1669 ielt.uid = DECL_UID (var);
1670 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
1671 if (!dslot)
1672 return true;
1673 if (gimple_debug_bind_p (stmt))
1674 gimple_debug_bind_set_var (stmt, dslot->to);
1675 else if (gimple_debug_source_bind_p (stmt))
1676 gimple_debug_source_bind_set_var (stmt, dslot->to);
1678 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
1680 name = USE_FROM_PTR (use);
1681 if (TREE_CODE (name) != SSA_NAME)
1682 continue;
1684 elt.version = SSA_NAME_VERSION (name);
1685 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
1686 if (!slot)
1688 gimple_debug_bind_reset_value (stmt);
1689 update_stmt (stmt);
1690 break;
1693 SET_USE (use, (*slot)->new_name);
1696 return false;
1699 /* Callback for htab_traverse. Adds a field corresponding to the reduction
1700 specified in SLOT. The type is passed in DATA. */
1703 add_field_for_reduction (reduction_info **slot, tree type)
1706 struct reduction_info *const red = *slot;
1707 tree var = reduc_stmt_res (red->reduc_stmt);
1708 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
1709 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
1711 insert_field_into_struct (type, field);
1713 red->field = field;
1715 return 1;
1718 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
1719 described in SLOT. The type is passed in DATA. */
1722 add_field_for_name (name_to_copy_elt **slot, tree type)
1724 struct name_to_copy_elt *const elt = *slot;
1725 tree name = ssa_name (elt->version);
1726 tree field = build_decl (UNKNOWN_LOCATION,
1727 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1728 TREE_TYPE (name));
1730 insert_field_into_struct (type, field);
1731 elt->field = field;
1733 return 1;
1736 /* Callback for htab_traverse. A local result is the intermediate result
1737 computed by a single
1738 thread, or the initial value in case no iteration was executed.
1739 This function creates a phi node reflecting these values.
1740 The phi's result will be stored in NEW_PHI field of the
1741 reduction's data structure. */
1744 create_phi_for_local_result (reduction_info **slot, class loop *loop)
1746 struct reduction_info *const reduc = *slot;
1747 edge e;
1748 gphi *new_phi;
1749 basic_block store_bb, continue_bb;
1750 tree local_res;
1751 location_t locus;
1753 /* STORE_BB is the block where the phi
1754 should be stored. It is the destination of the loop exit.
1755 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1756 continue_bb = single_pred (loop->latch);
1757 store_bb = FALLTHRU_EDGE (continue_bb)->dest;
1759 /* STORE_BB has two predecessors. One coming from the loop
1760 (the reduction's result is computed at the loop),
1761 and another coming from a block preceding the loop,
1762 when no iterations
1763 are executed (the initial value should be taken). */
1764 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (continue_bb))
1765 e = EDGE_PRED (store_bb, 1);
1766 else
1767 e = EDGE_PRED (store_bb, 0);
1768 tree lhs = reduc_stmt_res (reduc->reduc_stmt);
1769 local_res = copy_ssa_name (lhs);
1770 locus = gimple_location (reduc->reduc_stmt);
1771 new_phi = create_phi_node (local_res, store_bb);
1772 add_phi_arg (new_phi, reduc->init, e, locus);
1773 add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (continue_bb), locus);
1774 reduc->new_phi = new_phi;
1776 return 1;
1779 struct clsn_data
1781 tree store;
1782 tree load;
1784 basic_block store_bb;
1785 basic_block load_bb;
1788 /* Callback for htab_traverse. Create an atomic instruction for the
1789 reduction described in SLOT.
1790 DATA annotates the place in memory the atomic operation relates to,
1791 and the basic block it needs to be generated in. */
1794 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1796 struct reduction_info *const reduc = *slot;
1797 gimple_stmt_iterator gsi;
1798 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1799 tree load_struct;
1800 basic_block bb;
1801 basic_block new_bb;
1802 edge e;
1803 tree t, addr, ref, x;
1804 tree tmp_load, name;
1805 gimple *load;
1807 if (reduc->reduc_addr == NULL_TREE)
1809 load_struct = build_simple_mem_ref (clsn_data->load);
1810 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1812 addr = build_addr (t);
1814 else
1816 /* Set the address for the atomic store. */
1817 addr = reduc->reduc_addr;
1819 /* Remove the non-atomic store '*addr = sum'. */
1820 tree res = PHI_RESULT (reduc->keep_res);
1821 use_operand_p use_p;
1822 gimple *stmt;
1823 bool single_use_p = single_imm_use (res, &use_p, &stmt);
1824 gcc_assert (single_use_p);
1825 replace_uses_by (gimple_vdef (stmt),
1826 gimple_vuse (stmt));
1827 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1828 gsi_remove (&gsi, true);
1831 /* Create phi node. */
1832 bb = clsn_data->load_bb;
1834 gsi = gsi_last_bb (bb);
1835 e = split_block (bb, gsi_stmt (gsi));
1836 new_bb = e->dest;
1838 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1839 tmp_load = make_ssa_name (tmp_load);
1840 load = gimple_build_omp_atomic_load (tmp_load, addr,
1841 OMP_MEMORY_ORDER_RELAXED);
1842 SSA_NAME_DEF_STMT (tmp_load) = load;
1843 gsi = gsi_start_bb (new_bb);
1844 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1846 e = split_block (new_bb, load);
1847 new_bb = e->dest;
1848 gsi = gsi_start_bb (new_bb);
1849 ref = tmp_load;
1850 x = fold_build2 (reduc->reduction_code,
1851 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1852 PHI_RESULT (reduc->new_phi));
1854 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1855 GSI_CONTINUE_LINKING);
1857 gimple *store = gimple_build_omp_atomic_store (name,
1858 OMP_MEMORY_ORDER_RELAXED);
1859 gsi_insert_after (&gsi, store, GSI_NEW_STMT);
1860 return 1;
1863 /* Create the atomic operation at the join point of the threads.
1864 REDUCTION_LIST describes the reductions in the LOOP.
1865 LD_ST_DATA describes the shared data structure where
1866 shared data is stored in and loaded from. */
1867 static void
1868 create_call_for_reduction (class loop *loop,
1869 reduction_info_table_type *reduction_list,
1870 struct clsn_data *ld_st_data)
1872 reduction_list->traverse <class loop *, create_phi_for_local_result> (loop);
1873 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1874 basic_block continue_bb = single_pred (loop->latch);
1875 ld_st_data->load_bb = FALLTHRU_EDGE (continue_bb)->dest;
1876 reduction_list
1877 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1880 /* Callback for htab_traverse. Loads the final reduction value at the
1881 join point of all threads, and inserts it in the right place. */
1884 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1886 struct reduction_info *const red = *slot;
1887 gimple *stmt;
1888 gimple_stmt_iterator gsi;
1889 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1890 tree load_struct;
1891 tree name;
1892 tree x;
1894 /* If there's no exit phi, the result of the reduction is unused. */
1895 if (red->keep_res == NULL)
1896 return 1;
1898 gsi = gsi_after_labels (clsn_data->load_bb);
1899 load_struct = build_simple_mem_ref (clsn_data->load);
1900 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1901 NULL_TREE);
1903 x = load_struct;
1904 name = PHI_RESULT (red->keep_res);
1905 stmt = gimple_build_assign (name, x);
1907 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1909 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1910 !gsi_end_p (gsi); gsi_next (&gsi))
1911 if (gsi_stmt (gsi) == red->keep_res)
1913 remove_phi_node (&gsi, false);
1914 return 1;
1916 gcc_unreachable ();
1919 /* Load the reduction result that was stored in LD_ST_DATA.
1920 REDUCTION_LIST describes the list of reductions that the
1921 loads should be generated for. */
1922 static void
1923 create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1924 struct clsn_data *ld_st_data)
1926 gimple_stmt_iterator gsi;
1927 tree t;
1928 gimple *stmt;
1930 gsi = gsi_after_labels (ld_st_data->load_bb);
1931 t = build_fold_addr_expr (ld_st_data->store);
1932 stmt = gimple_build_assign (ld_st_data->load, t);
1934 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1936 reduction_list
1937 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1941 /* Callback for htab_traverse. Store the neutral value for the
1942 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1943 1 for MULT_EXPR, etc. into the reduction field.
1944 The reduction is specified in SLOT. The store information is
1945 passed in DATA. */
1948 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1950 struct reduction_info *const red = *slot;
1951 tree t;
1952 gimple *stmt;
1953 gimple_stmt_iterator gsi;
1954 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1956 gsi = gsi_last_bb (clsn_data->store_bb);
1957 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1958 stmt = gimple_build_assign (t, red->initial_value);
1959 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1961 return 1;
1964 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1965 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1966 specified in SLOT. */
1969 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1970 struct clsn_data *clsn_data)
1972 struct name_to_copy_elt *const elt = *slot;
1973 tree t;
1974 gimple *stmt;
1975 gimple_stmt_iterator gsi;
1976 tree type = TREE_TYPE (elt->new_name);
1977 tree load_struct;
1979 gsi = gsi_last_bb (clsn_data->store_bb);
1980 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1981 stmt = gimple_build_assign (t, ssa_name (elt->version));
1982 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1984 gsi = gsi_last_bb (clsn_data->load_bb);
1985 load_struct = build_simple_mem_ref (clsn_data->load);
1986 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1987 stmt = gimple_build_assign (elt->new_name, t);
1988 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1990 return 1;
1993 /* Moves all the variables used in LOOP and defined outside of it (including
1994 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1995 name) to a structure created for this purpose. The code
1997 while (1)
1999 use (a);
2000 use (b);
2003 is transformed this way:
2005 bb0:
2006 old.a = a;
2007 old.b = b;
2009 bb1:
2010 a' = new->a;
2011 b' = new->b;
2012 while (1)
2014 use (a');
2015 use (b');
2018 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
2019 pointer `new' is intentionally not initialized (the loop will be split to a
2020 separate function later, and `new' will be initialized from its arguments).
2021 LD_ST_DATA holds information about the shared data structure used to pass
2022 information among the threads. It is initialized here, and
2023 gen_parallel_loop will pass it to create_call_for_reduction that
2024 needs this information. REDUCTION_LIST describes the reductions
2025 in LOOP. */
2027 static void
2028 separate_decls_in_region (edge entry, edge exit,
2029 reduction_info_table_type *reduction_list,
2030 tree *arg_struct, tree *new_arg_struct,
2031 struct clsn_data *ld_st_data)
2034 basic_block bb1 = split_edge (entry);
2035 basic_block bb0 = single_pred (bb1);
2036 name_to_copy_table_type name_copies (10);
2037 int_tree_htab_type decl_copies (10);
2038 unsigned i;
2039 tree type, type_name, nvar;
2040 gimple_stmt_iterator gsi;
2041 struct clsn_data clsn_data;
2042 auto_vec<basic_block, 3> body;
2043 basic_block bb;
2044 basic_block entry_bb = bb1;
2045 basic_block exit_bb = exit->dest;
2046 bool has_debug_stmt = false;
2048 entry = single_succ_edge (entry_bb);
2049 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
2051 FOR_EACH_VEC_ELT (body, i, bb)
2053 if (bb != entry_bb && bb != exit_bb)
2055 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2056 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
2057 &name_copies, &decl_copies);
2059 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2061 gimple *stmt = gsi_stmt (gsi);
2063 if (is_gimple_debug (stmt))
2064 has_debug_stmt = true;
2065 else
2066 separate_decls_in_region_stmt (entry, exit, stmt,
2067 &name_copies, &decl_copies);
2072 /* Now process debug bind stmts. We must not create decls while
2073 processing debug stmts, so we defer their processing so as to
2074 make sure we will have debug info for as many variables as
2075 possible (all of those that were dealt with in the loop above),
2076 and discard those for which we know there's nothing we can
2077 do. */
2078 if (has_debug_stmt)
2079 FOR_EACH_VEC_ELT (body, i, bb)
2080 if (bb != entry_bb && bb != exit_bb)
2082 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2084 gimple *stmt = gsi_stmt (gsi);
2086 if (is_gimple_debug (stmt))
2088 if (separate_decls_in_region_debug (stmt, &name_copies,
2089 &decl_copies))
2091 gsi_remove (&gsi, true);
2092 continue;
2096 gsi_next (&gsi);
2100 if (name_copies.is_empty () && reduction_list->is_empty ())
2102 /* It may happen that there is nothing to copy (if there are only
2103 loop carried and external variables in the loop). */
2104 *arg_struct = NULL;
2105 *new_arg_struct = NULL;
2107 else
2109 /* Create the type for the structure to store the ssa names to. */
2110 type = lang_hooks.types.make_type (RECORD_TYPE);
2111 type_name = build_decl (UNKNOWN_LOCATION,
2112 TYPE_DECL, create_tmp_var_name (".paral_data"),
2113 type);
2114 TYPE_NAME (type) = type_name;
2116 name_copies.traverse <tree, add_field_for_name> (type);
2117 if (reduction_list && !reduction_list->is_empty ())
2119 /* Create the fields for reductions. */
2120 reduction_list->traverse <tree, add_field_for_reduction> (type);
2122 layout_type (type);
2124 /* Create the loads and stores. */
2125 *arg_struct = create_tmp_var (type, ".paral_data_store");
2126 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
2127 *new_arg_struct = make_ssa_name (nvar);
2129 ld_st_data->store = *arg_struct;
2130 ld_st_data->load = *new_arg_struct;
2131 ld_st_data->store_bb = bb0;
2132 ld_st_data->load_bb = bb1;
2134 name_copies
2135 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
2136 (ld_st_data);
2138 /* Load the calculation from memory (after the join of the threads). */
2140 if (reduction_list && !reduction_list->is_empty ())
2142 reduction_list
2143 ->traverse <struct clsn_data *, create_stores_for_reduction>
2144 (ld_st_data);
2145 clsn_data.load = make_ssa_name (nvar);
2146 clsn_data.load_bb = exit->dest;
2147 clsn_data.store = ld_st_data->store;
2148 create_final_loads_for_reduction (reduction_list, &clsn_data);
2153 /* Returns true if FN was created to run in parallel. */
2155 bool
2156 parallelized_function_p (tree fndecl)
2158 cgraph_node *node = cgraph_node::get (fndecl);
2159 gcc_assert (node != NULL);
2160 return node->parallelized_function;
2163 /* Creates and returns an empty function that will receive the body of
2164 a parallelized loop. */
2166 static tree
2167 create_loop_fn (location_t loc)
2169 char buf[100];
2170 char *tname;
2171 tree decl, type, name, t;
2172 struct function *act_cfun = cfun;
2173 static unsigned loopfn_num;
2175 loc = LOCATION_LOCUS (loc);
2176 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
2177 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
2178 clean_symbol_name (tname);
2179 name = get_identifier (tname);
2180 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
2182 decl = build_decl (loc, FUNCTION_DECL, name, type);
2183 TREE_STATIC (decl) = 1;
2184 TREE_USED (decl) = 1;
2185 DECL_ARTIFICIAL (decl) = 1;
2186 DECL_IGNORED_P (decl) = 0;
2187 TREE_PUBLIC (decl) = 0;
2188 DECL_UNINLINABLE (decl) = 1;
2189 DECL_EXTERNAL (decl) = 0;
2190 DECL_CONTEXT (decl) = NULL_TREE;
2191 DECL_INITIAL (decl) = make_node (BLOCK);
2192 BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl;
2194 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
2195 DECL_ARTIFICIAL (t) = 1;
2196 DECL_IGNORED_P (t) = 1;
2197 DECL_RESULT (decl) = t;
2199 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
2200 ptr_type_node);
2201 DECL_ARTIFICIAL (t) = 1;
2202 DECL_ARG_TYPE (t) = ptr_type_node;
2203 DECL_CONTEXT (t) = decl;
2204 TREE_USED (t) = 1;
2205 DECL_ARGUMENTS (decl) = t;
2207 allocate_struct_function (decl, false);
2209 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
2210 it. */
2211 set_cfun (act_cfun);
2213 return decl;
2216 /* Replace uses of NAME by VAL in block BB. */
2218 static void
2219 replace_uses_in_bb_by (tree name, tree val, basic_block bb)
2221 gimple *use_stmt;
2222 imm_use_iterator imm_iter;
2224 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
2226 if (gimple_bb (use_stmt) != bb)
2227 continue;
2229 use_operand_p use_p;
2230 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2231 SET_USE (use_p, val);
2235 /* Do transformation from:
2237 <bb preheader>:
2239 goto <bb header>
2241 <bb header>:
2242 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2243 sum_a = PHI <sum_init (preheader), sum_b (latch)>
2245 use (ivtmp_a)
2247 sum_b = sum_a + sum_update
2249 if (ivtmp_a < n)
2250 goto <bb latch>;
2251 else
2252 goto <bb exit>;
2254 <bb latch>:
2255 ivtmp_b = ivtmp_a + 1;
2256 goto <bb header>
2258 <bb exit>:
2259 sum_z = PHI <sum_b (cond[1]), ...>
2261 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
2262 that's <bb header>.
2266 <bb preheader>:
2268 goto <bb newheader>
2270 <bb header>:
2271 ivtmp_a = PHI <ivtmp_c (latch)>
2272 sum_a = PHI <sum_c (latch)>
2274 use (ivtmp_a)
2276 sum_b = sum_a + sum_update
2278 goto <bb latch>;
2280 <bb newheader>:
2281 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2282 sum_c = PHI <sum_init (preheader), sum_b (latch)>
2283 if (ivtmp_c < n + 1)
2284 goto <bb header>;
2285 else
2286 goto <bb newexit>;
2288 <bb latch>:
2289 ivtmp_b = ivtmp_a + 1;
2290 goto <bb newheader>
2292 <bb newexit>:
2293 sum_y = PHI <sum_c (newheader)>
2295 <bb exit>:
2296 sum_z = PHI <sum_y (newexit), ...>
2299 In unified diff format:
2301 <bb preheader>:
2303 - goto <bb header>
2304 + goto <bb newheader>
2306 <bb header>:
2307 - ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2308 - sum_a = PHI <sum_init (preheader), sum_b (latch)>
2309 + ivtmp_a = PHI <ivtmp_c (latch)>
2310 + sum_a = PHI <sum_c (latch)>
2312 use (ivtmp_a)
2314 sum_b = sum_a + sum_update
2316 - if (ivtmp_a < n)
2317 - goto <bb latch>;
2318 + goto <bb latch>;
2320 + <bb newheader>:
2321 + ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2322 + sum_c = PHI <sum_init (preheader), sum_b (latch)>
2323 + if (ivtmp_c < n + 1)
2324 + goto <bb header>;
2325 else
2326 goto <bb exit>;
2328 <bb latch>:
2329 ivtmp_b = ivtmp_a + 1;
2330 - goto <bb header>
2331 + goto <bb newheader>
2333 + <bb newexit>:
2334 + sum_y = PHI <sum_c (newheader)>
2336 <bb exit>:
2337 - sum_z = PHI <sum_b (cond[1]), ...>
2338 + sum_z = PHI <sum_y (newexit), ...>
2340 Note: the example does not show any virtual phis, but these are handled more
2341 or less as reductions.
2344 Moves the exit condition of LOOP to the beginning of its header.
2345 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
2346 bound. */
2348 static void
2349 transform_to_exit_first_loop_alt (class loop *loop,
2350 reduction_info_table_type *reduction_list,
2351 tree bound)
2353 basic_block header = loop->header;
2354 basic_block latch = loop->latch;
2355 edge exit = single_dom_exit (loop);
2356 basic_block exit_block = exit->dest;
2357 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2358 tree control = gimple_cond_lhs (cond_stmt);
2359 edge e;
2361 /* Create the new_header block. */
2362 basic_block new_header = split_block_before_cond_jump (exit->src);
2363 edge edge_at_split = single_pred_edge (new_header);
2365 /* Redirect entry edge to new_header. */
2366 edge entry = loop_preheader_edge (loop);
2367 e = redirect_edge_and_branch (entry, new_header);
2368 gcc_assert (e == entry);
2370 /* Redirect post_inc_edge to new_header. */
2371 edge post_inc_edge = single_succ_edge (latch);
2372 e = redirect_edge_and_branch (post_inc_edge, new_header);
2373 gcc_assert (e == post_inc_edge);
2375 /* Redirect post_cond_edge to header. */
2376 edge post_cond_edge = single_pred_edge (latch);
2377 e = redirect_edge_and_branch (post_cond_edge, header);
2378 gcc_assert (e == post_cond_edge);
2380 /* Redirect edge_at_split to latch. */
2381 e = redirect_edge_and_branch (edge_at_split, latch);
2382 gcc_assert (e == edge_at_split);
2384 /* Set the new loop bound. */
2385 gimple_cond_set_rhs (cond_stmt, bound);
2386 update_stmt (cond_stmt);
2388 /* Repair the ssa. */
2389 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
2390 edge_var_map *vm;
2391 gphi_iterator gsi;
2392 int i;
2393 for (gsi = gsi_start_phis (header), i = 0;
2394 !gsi_end_p (gsi) && v->iterate (i, &vm);
2395 gsi_next (&gsi), i++)
2397 gphi *phi = gsi.phi ();
2398 tree res_a = PHI_RESULT (phi);
2400 /* Create new phi. */
2401 tree res_c = copy_ssa_name (res_a, phi);
2402 gphi *nphi = create_phi_node (res_c, new_header);
2404 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
2405 replace_uses_in_bb_by (res_a, res_c, new_header);
2407 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
2408 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
2410 /* Replace sum_b with sum_c in exit phi. */
2411 tree res_b = redirect_edge_var_map_def (vm);
2412 replace_uses_in_bb_by (res_b, res_c, exit_block);
2414 struct reduction_info *red = reduction_phi (reduction_list, phi);
2415 gcc_assert (virtual_operand_p (res_a)
2416 || res_a == control
2417 || red != NULL);
2419 if (red)
2421 /* Register the new reduction phi. */
2422 red->reduc_phi = nphi;
2423 gimple_set_uid (red->reduc_phi, red->reduc_version);
2426 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
2428 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
2429 flush_pending_stmts (entry);
2431 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
2432 flush_pending_stmts (post_inc_edge);
2435 basic_block new_exit_block = NULL;
2436 if (!single_pred_p (exit->dest))
2438 /* Create a new empty exit block, inbetween the new loop header and the
2439 old exit block. The function separate_decls_in_region needs this block
2440 to insert code that is active on loop exit, but not any other path. */
2441 new_exit_block = split_edge (exit);
2444 /* Insert and register the reduction exit phis. */
2445 for (gphi_iterator gsi = gsi_start_phis (exit_block);
2446 !gsi_end_p (gsi);
2447 gsi_next (&gsi))
2449 gphi *phi = gsi.phi ();
2450 gphi *nphi = NULL;
2451 tree res_z = PHI_RESULT (phi);
2452 tree res_c;
2454 if (new_exit_block != NULL)
2456 /* Now that we have a new exit block, duplicate the phi of the old
2457 exit block in the new exit block to preserve loop-closed ssa. */
2458 edge succ_new_exit_block = single_succ_edge (new_exit_block);
2459 edge pred_new_exit_block = single_pred_edge (new_exit_block);
2460 tree res_y = copy_ssa_name (res_z, phi);
2461 nphi = create_phi_node (res_y, new_exit_block);
2462 res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
2463 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
2464 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
2466 else
2467 res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2469 if (virtual_operand_p (res_z))
2470 continue;
2472 gimple *reduc_phi = SSA_NAME_DEF_STMT (res_c);
2473 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
2474 if (red != NULL)
2475 red->keep_res = (nphi != NULL
2476 ? nphi
2477 : phi);
2480 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
2481 then we're still using some fields, so only bother about fields that are
2482 still used: header and latch.
2483 The loop has a new header bb, so we update it. The latch bb stays the
2484 same. */
2485 loop->header = new_header;
2487 /* Recalculate dominance info. */
2488 free_dominance_info (CDI_DOMINATORS);
2489 calculate_dominance_info (CDI_DOMINATORS);
2492 /* Tries to moves the exit condition of LOOP to the beginning of its header
2493 without duplication of the loop body. NIT is the number of iterations of the
2494 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
2495 transformation is successful. */
2497 static bool
2498 try_transform_to_exit_first_loop_alt (class loop *loop,
2499 reduction_info_table_type *reduction_list,
2500 tree nit)
2502 /* Check whether the latch contains a single statement. */
2503 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
2504 return false;
2506 /* Check whether the latch contains no phis. */
2507 if (phi_nodes (loop->latch) != NULL)
2508 return false;
2510 /* Check whether the latch contains the loop iv increment. */
2511 edge back = single_succ_edge (loop->latch);
2512 edge exit = single_dom_exit (loop);
2513 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2514 tree control = gimple_cond_lhs (cond_stmt);
2515 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
2516 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
2517 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
2518 return false;
2520 /* Check whether there's no code between the loop condition and the latch. */
2521 if (!single_pred_p (loop->latch)
2522 || single_pred (loop->latch) != exit->src)
2523 return false;
2525 tree alt_bound = NULL_TREE;
2526 tree nit_type = TREE_TYPE (nit);
2528 /* Figure out whether nit + 1 overflows. */
2529 if (TREE_CODE (nit) == INTEGER_CST)
2531 if (!tree_int_cst_equal (nit, TYPE_MAX_VALUE (nit_type)))
2533 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
2534 nit, build_one_cst (nit_type));
2536 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
2537 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
2538 return true;
2540 else
2542 /* Todo: Figure out if we can trigger this, if it's worth to handle
2543 optimally, and if we can handle it optimally. */
2544 return false;
2548 gcc_assert (TREE_CODE (nit) == SSA_NAME);
2550 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
2551 iv with base 0 and step 1 that is incremented in the latch, like this:
2553 <bb header>:
2554 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
2556 if (iv_1 < nit)
2557 goto <bb latch>;
2558 else
2559 goto <bb exit>;
2561 <bb latch>:
2562 iv_2 = iv_1 + 1;
2563 goto <bb header>;
2565 The range of iv_1 is [0, nit]. The latch edge is taken for
2566 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
2567 number of latch executions is equal to nit.
2569 The function max_loop_iterations gives us the maximum number of latch
2570 executions, so it gives us the maximum value of nit. */
2571 widest_int nit_max;
2572 if (!max_loop_iterations (loop, &nit_max))
2573 return false;
2575 /* Check if nit + 1 overflows. */
2576 widest_int type_max = wi::to_widest (TYPE_MAX_VALUE (nit_type));
2577 if (nit_max >= type_max)
2578 return false;
2580 gimple *def = SSA_NAME_DEF_STMT (nit);
2582 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
2583 if (def
2584 && is_gimple_assign (def)
2585 && gimple_assign_rhs_code (def) == PLUS_EXPR)
2587 tree op1 = gimple_assign_rhs1 (def);
2588 tree op2 = gimple_assign_rhs2 (def);
2589 if (integer_minus_onep (op1))
2590 alt_bound = op2;
2591 else if (integer_minus_onep (op2))
2592 alt_bound = op1;
2595 /* If not found, insert nit + 1. */
2596 if (alt_bound == NULL_TREE)
2598 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
2599 build_int_cst_type (nit_type, 1));
2601 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
2603 alt_bound
2604 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
2605 GSI_CONTINUE_LINKING);
2608 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
2609 return true;
2612 /* Moves the exit condition of LOOP to the beginning of its header. NIT is the
2613 number of iterations of the loop. REDUCTION_LIST describes the reductions in
2614 LOOP. */
2616 static void
2617 transform_to_exit_first_loop (class loop *loop,
2618 reduction_info_table_type *reduction_list,
2619 tree nit)
2621 basic_block *bbs, *nbbs, ex_bb, orig_header;
2622 unsigned n;
2623 bool ok;
2624 edge exit = single_dom_exit (loop), hpred;
2625 tree control, control_name, res, t;
2626 gphi *phi, *nphi;
2627 gassign *stmt;
2628 gcond *cond_stmt, *cond_nit;
2629 tree nit_1;
2631 split_block_after_labels (loop->header);
2632 orig_header = single_succ (loop->header);
2633 hpred = single_succ_edge (loop->header);
2635 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2636 control = gimple_cond_lhs (cond_stmt);
2637 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
2639 /* Make sure that we have phi nodes on exit for all loop header phis
2640 (create_parallel_loop requires that). */
2641 for (gphi_iterator gsi = gsi_start_phis (loop->header);
2642 !gsi_end_p (gsi);
2643 gsi_next (&gsi))
2645 phi = gsi.phi ();
2646 res = PHI_RESULT (phi);
2647 t = copy_ssa_name (res, phi);
2648 SET_PHI_RESULT (phi, t);
2649 nphi = create_phi_node (res, orig_header);
2650 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
2652 if (res == control)
2654 gimple_cond_set_lhs (cond_stmt, t);
2655 update_stmt (cond_stmt);
2656 control = t;
2660 bbs = get_loop_body_in_dom_order (loop);
2662 for (n = 0; bbs[n] != exit->src; n++)
2663 continue;
2664 nbbs = XNEWVEC (basic_block, n);
2665 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
2666 bbs + 1, n, nbbs);
2667 gcc_assert (ok);
2668 free (bbs);
2669 ex_bb = nbbs[0];
2670 free (nbbs);
2672 /* Other than reductions, the only gimple reg that should be copied
2673 out of the loop is the control variable. */
2674 exit = single_dom_exit (loop);
2675 control_name = NULL_TREE;
2676 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
2677 !gsi_end_p (gsi); )
2679 phi = gsi.phi ();
2680 res = PHI_RESULT (phi);
2681 if (virtual_operand_p (res))
2683 gsi_next (&gsi);
2684 continue;
2687 /* Check if it is a part of reduction. If it is,
2688 keep the phi at the reduction's keep_res field. The
2689 PHI_RESULT of this phi is the resulting value of the reduction
2690 variable when exiting the loop. */
2692 if (!reduction_list->is_empty ())
2694 struct reduction_info *red;
2696 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2697 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
2698 if (red)
2700 red->keep_res = phi;
2701 gsi_next (&gsi);
2702 continue;
2705 gcc_assert (control_name == NULL_TREE
2706 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
2707 control_name = res;
2708 remove_phi_node (&gsi, false);
2710 gcc_assert (control_name != NULL_TREE);
2712 /* Initialize the control variable to number of iterations
2713 according to the rhs of the exit condition. */
2714 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
2715 cond_nit = as_a <gcond *> (last_stmt (exit->src));
2716 nit_1 = gimple_cond_rhs (cond_nit);
2717 nit_1 = force_gimple_operand_gsi (&gsi,
2718 fold_convert (TREE_TYPE (control_name), nit_1),
2719 false, NULL_TREE, false, GSI_SAME_STMT);
2720 stmt = gimple_build_assign (control_name, nit_1);
2721 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
2724 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
2725 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
2726 NEW_DATA is the variable that should be initialized from the argument
2727 of LOOP_FN. N_THREADS is the requested number of threads, which can be 0 if
2728 that number is to be determined later. */
2730 static void
2731 create_parallel_loop (class loop *loop, tree loop_fn, tree data,
2732 tree new_data, unsigned n_threads, location_t loc,
2733 bool oacc_kernels_p)
2735 gimple_stmt_iterator gsi;
2736 basic_block for_bb, ex_bb, continue_bb;
2737 tree t, param;
2738 gomp_parallel *omp_par_stmt;
2739 gimple *omp_return_stmt1, *omp_return_stmt2;
2740 gimple *phi;
2741 gcond *cond_stmt;
2742 gomp_for *for_stmt;
2743 gomp_continue *omp_cont_stmt;
2744 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
2745 edge exit, nexit, guard, end, e;
2747 if (oacc_kernels_p)
2749 gcc_checking_assert (lookup_attribute ("oacc kernels",
2750 DECL_ATTRIBUTES (cfun->decl)));
2751 /* Indicate to later processing that this is a parallelized OpenACC
2752 kernels construct. */
2753 DECL_ATTRIBUTES (cfun->decl)
2754 = tree_cons (get_identifier ("oacc kernels parallelized"),
2755 NULL_TREE, DECL_ATTRIBUTES (cfun->decl));
2757 else
2759 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
2761 basic_block bb = loop_preheader_edge (loop)->src;
2762 basic_block paral_bb = single_pred (bb);
2763 gsi = gsi_last_bb (paral_bb);
2765 gcc_checking_assert (n_threads != 0);
2766 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
2767 OMP_CLAUSE_NUM_THREADS_EXPR (t)
2768 = build_int_cst (integer_type_node, n_threads);
2769 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2770 gimple_set_location (omp_par_stmt, loc);
2772 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
2774 /* Initialize NEW_DATA. */
2775 if (data)
2777 gassign *assign_stmt;
2779 gsi = gsi_after_labels (bb);
2781 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2782 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2783 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2785 assign_stmt = gimple_build_assign (new_data,
2786 fold_convert (TREE_TYPE (new_data), param));
2787 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2790 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2791 bb = split_loop_exit_edge (single_dom_exit (loop));
2792 gsi = gsi_last_bb (bb);
2793 omp_return_stmt1 = gimple_build_omp_return (false);
2794 gimple_set_location (omp_return_stmt1, loc);
2795 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2798 /* Extract data for GIMPLE_OMP_FOR. */
2799 gcc_assert (loop->header == single_dom_exit (loop)->src);
2800 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
2802 cvar = gimple_cond_lhs (cond_stmt);
2803 cvar_base = SSA_NAME_VAR (cvar);
2804 phi = SSA_NAME_DEF_STMT (cvar);
2805 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2806 initvar = copy_ssa_name (cvar);
2807 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2808 initvar);
2809 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2811 gsi = gsi_last_nondebug_bb (loop->latch);
2812 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2813 gsi_remove (&gsi, true);
2815 /* Prepare cfg. */
2816 for_bb = split_edge (loop_preheader_edge (loop));
2817 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2818 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2819 gcc_assert (exit == single_dom_exit (loop));
2821 guard = make_edge (for_bb, ex_bb, 0);
2822 /* FIXME: What is the probability? */
2823 guard->probability = profile_probability::guessed_never ();
2824 /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid. */
2825 loop->latch = split_edge (single_succ_edge (loop->latch));
2826 single_pred_edge (loop->latch)->flags = 0;
2827 end = make_single_succ_edge (single_pred (loop->latch), ex_bb, EDGE_FALLTHRU);
2828 rescan_loop_exit (end, true, false);
2830 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2831 !gsi_end_p (gpi); gsi_next (&gpi))
2833 location_t locus;
2834 gphi *phi = gpi.phi ();
2835 tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2836 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
2838 /* If the exit phi is not connected to a header phi in the same loop, this
2839 value is not modified in the loop, and we're done with this phi. */
2840 if (!(gimple_code (def_stmt) == GIMPLE_PHI
2841 && gimple_bb (def_stmt) == loop->header))
2843 locus = gimple_phi_arg_location_from_edge (phi, exit);
2844 add_phi_arg (phi, def, guard, locus);
2845 add_phi_arg (phi, def, end, locus);
2846 continue;
2849 gphi *stmt = as_a <gphi *> (def_stmt);
2850 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
2851 locus = gimple_phi_arg_location_from_edge (stmt,
2852 loop_preheader_edge (loop));
2853 add_phi_arg (phi, def, guard, locus);
2855 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2856 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
2857 add_phi_arg (phi, def, end, locus);
2859 e = redirect_edge_and_branch (exit, nexit->dest);
2860 PENDING_STMT (e) = NULL;
2862 /* Emit GIMPLE_OMP_FOR. */
2863 if (oacc_kernels_p)
2864 /* Parallelized OpenACC kernels constructs use gang parallelism. See also
2865 omp-offload.cc:execute_oacc_loop_designation. */
2866 t = build_omp_clause (loc, OMP_CLAUSE_GANG);
2867 else
2869 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2870 int chunk_size = param_parloops_chunk_size;
2871 switch (param_parloops_schedule)
2873 case PARLOOPS_SCHEDULE_STATIC:
2874 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2875 break;
2876 case PARLOOPS_SCHEDULE_DYNAMIC:
2877 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_DYNAMIC;
2878 break;
2879 case PARLOOPS_SCHEDULE_GUIDED:
2880 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_GUIDED;
2881 break;
2882 case PARLOOPS_SCHEDULE_AUTO:
2883 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_AUTO;
2884 chunk_size = 0;
2885 break;
2886 case PARLOOPS_SCHEDULE_RUNTIME:
2887 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_RUNTIME;
2888 chunk_size = 0;
2889 break;
2890 default:
2891 gcc_unreachable ();
2893 if (chunk_size != 0)
2894 OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t)
2895 = build_int_cst (integer_type_node, chunk_size);
2898 for_stmt = gimple_build_omp_for (NULL,
2899 (oacc_kernels_p
2900 ? GF_OMP_FOR_KIND_OACC_LOOP
2901 : GF_OMP_FOR_KIND_FOR),
2902 t, 1, NULL);
2904 gimple_cond_set_lhs (cond_stmt, cvar_base);
2905 type = TREE_TYPE (cvar);
2906 gimple_set_location (for_stmt, loc);
2907 gimple_omp_for_set_index (for_stmt, 0, initvar);
2908 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2909 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2910 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2911 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2912 cvar_base,
2913 build_int_cst (type, 1)));
2915 gsi = gsi_last_bb (for_bb);
2916 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
2917 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2919 /* Emit GIMPLE_OMP_CONTINUE. */
2920 continue_bb = single_pred (loop->latch);
2921 gsi = gsi_last_bb (continue_bb);
2922 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2923 gimple_set_location (omp_cont_stmt, loc);
2924 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2925 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
2927 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2928 gsi = gsi_last_bb (ex_bb);
2929 omp_return_stmt2 = gimple_build_omp_return (true);
2930 gimple_set_location (omp_return_stmt2, loc);
2931 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
2933 /* After the above dom info is hosed. Re-compute it. */
2934 free_dominance_info (CDI_DOMINATORS);
2935 calculate_dominance_info (CDI_DOMINATORS);
2938 /* Return number of phis in bb. If COUNT_VIRTUAL_P is false, don't count the
2939 virtual phi. */
2941 static unsigned int
2942 num_phis (basic_block bb, bool count_virtual_p)
2944 unsigned int nr_phis = 0;
2945 gphi_iterator gsi;
2946 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2948 if (!count_virtual_p && virtual_operand_p (PHI_RESULT (gsi.phi ())))
2949 continue;
2951 nr_phis++;
2954 return nr_phis;
2957 /* Generates code to execute the iterations of LOOP in N_THREADS
2958 threads in parallel, which can be 0 if that number is to be determined
2959 later.
2961 NITER describes number of iterations of LOOP.
2962 REDUCTION_LIST describes the reductions existent in the LOOP. */
2964 static void
2965 gen_parallel_loop (class loop *loop,
2966 reduction_info_table_type *reduction_list,
2967 unsigned n_threads, class tree_niter_desc *niter,
2968 bool oacc_kernels_p)
2970 tree many_iterations_cond, type, nit;
2971 tree arg_struct, new_arg_struct;
2972 gimple_seq stmts;
2973 edge entry, exit;
2974 struct clsn_data clsn_data;
2975 location_t loc;
2976 gimple *cond_stmt;
2977 unsigned int m_p_thread=2;
2979 /* From
2981 ---------------------------------------------------------------------
2982 loop
2984 IV = phi (INIT, IV + STEP)
2985 BODY1;
2986 if (COND)
2987 break;
2988 BODY2;
2990 ---------------------------------------------------------------------
2992 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2993 we generate the following code:
2995 ---------------------------------------------------------------------
2997 if (MAY_BE_ZERO
2998 || NITER < MIN_PER_THREAD * N_THREADS)
2999 goto original;
3001 BODY1;
3002 store all local loop-invariant variables used in body of the loop to DATA.
3003 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
3004 load the variables from DATA.
3005 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
3006 BODY2;
3007 BODY1;
3008 GIMPLE_OMP_CONTINUE;
3009 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
3010 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
3011 goto end;
3013 original:
3014 loop
3016 IV = phi (INIT, IV + STEP)
3017 BODY1;
3018 if (COND)
3019 break;
3020 BODY2;
3023 end:
3027 /* Create two versions of the loop -- in the old one, we know that the
3028 number of iterations is large enough, and we will transform it into the
3029 loop that will be split to loop_fn, the new one will be used for the
3030 remaining iterations. */
3032 /* We should compute a better number-of-iterations value for outer loops.
3033 That is, if we have
3035 for (i = 0; i < n; ++i)
3036 for (j = 0; j < m; ++j)
3039 we should compute nit = n * m, not nit = n.
3040 Also may_be_zero handling would need to be adjusted. */
3042 type = TREE_TYPE (niter->niter);
3043 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
3044 NULL_TREE);
3045 if (stmts)
3046 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3048 if (!oacc_kernels_p)
3050 if (loop->inner)
3051 m_p_thread=2;
3052 else
3053 m_p_thread=MIN_PER_THREAD;
3055 gcc_checking_assert (n_threads != 0);
3056 many_iterations_cond =
3057 fold_build2 (GE_EXPR, boolean_type_node,
3058 nit, build_int_cst (type, m_p_thread * n_threads - 1));
3060 many_iterations_cond
3061 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3062 invert_truthvalue (unshare_expr (niter->may_be_zero)),
3063 many_iterations_cond);
3064 many_iterations_cond
3065 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
3066 if (stmts)
3067 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3068 if (!is_gimple_condexpr_for_cond (many_iterations_cond))
3070 many_iterations_cond
3071 = force_gimple_operand (many_iterations_cond, &stmts,
3072 true, NULL_TREE);
3073 if (stmts)
3074 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop),
3075 stmts);
3078 initialize_original_copy_tables ();
3080 /* We assume that the loop usually iterates a lot. */
3081 loop_version (loop, many_iterations_cond, NULL,
3082 profile_probability::likely (),
3083 profile_probability::unlikely (),
3084 profile_probability::likely (),
3085 profile_probability::unlikely (), true);
3086 update_ssa (TODO_update_ssa_no_phi);
3087 free_original_copy_tables ();
3090 /* Base all the induction variables in LOOP on a single control one. */
3091 canonicalize_loop_ivs (loop, &nit, true);
3092 if (num_phis (loop->header, false) != reduction_list->elements () + 1)
3094 /* The call to canonicalize_loop_ivs above failed to "base all the
3095 induction variables in LOOP on a single control one". Do damage
3096 control. */
3097 basic_block preheader = loop_preheader_edge (loop)->src;
3098 basic_block cond_bb = single_pred (preheader);
3099 gcond *cond = as_a <gcond *> (gsi_stmt (gsi_last_bb (cond_bb)));
3100 gimple_cond_make_true (cond);
3101 update_stmt (cond);
3102 /* We've gotten rid of the duplicate loop created by loop_version, but
3103 we can't undo whatever canonicalize_loop_ivs has done.
3104 TODO: Fix this properly by ensuring that the call to
3105 canonicalize_loop_ivs succeeds. */
3106 if (dump_file
3107 && (dump_flags & TDF_DETAILS))
3108 fprintf (dump_file, "canonicalize_loop_ivs failed for loop %d,"
3109 " aborting transformation\n", loop->num);
3110 return;
3113 /* Ensure that the exit condition is the first statement in the loop.
3114 The common case is that latch of the loop is empty (apart from the
3115 increment) and immediately follows the loop exit test. Attempt to move the
3116 entry of the loop directly before the exit check and increase the number of
3117 iterations of the loop by one. */
3118 if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
3120 if (dump_file
3121 && (dump_flags & TDF_DETAILS))
3122 fprintf (dump_file,
3123 "alternative exit-first loop transform succeeded"
3124 " for loop %d\n", loop->num);
3126 else
3128 if (oacc_kernels_p)
3129 n_threads = 1;
3131 /* Fall back on the method that handles more cases, but duplicates the
3132 loop body: move the exit condition of LOOP to the beginning of its
3133 header, and duplicate the part of the last iteration that gets disabled
3134 to the exit of the loop. */
3135 transform_to_exit_first_loop (loop, reduction_list, nit);
3137 update_ssa (TODO_update_ssa_no_phi);
3139 /* Generate initializations for reductions. */
3140 if (!reduction_list->is_empty ())
3141 reduction_list->traverse <class loop *, initialize_reductions> (loop);
3143 /* Eliminate the references to local variables from the loop. */
3144 gcc_assert (single_exit (loop));
3145 entry = loop_preheader_edge (loop);
3146 exit = single_dom_exit (loop);
3148 /* This rewrites the body in terms of new variables. This has already
3149 been done for oacc_kernels_p in pass_lower_omp/lower_omp (). */
3150 if (!oacc_kernels_p)
3152 eliminate_local_variables (entry, exit);
3153 /* In the old loop, move all variables non-local to the loop to a
3154 structure and back, and create separate decls for the variables used in
3155 loop. */
3156 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
3157 &new_arg_struct, &clsn_data);
3159 else
3161 arg_struct = NULL_TREE;
3162 new_arg_struct = NULL_TREE;
3163 clsn_data.load = NULL_TREE;
3164 clsn_data.load_bb = exit->dest;
3165 clsn_data.store = NULL_TREE;
3166 clsn_data.store_bb = NULL;
3169 /* Create the parallel constructs. */
3170 loc = UNKNOWN_LOCATION;
3171 cond_stmt = last_stmt (loop->header);
3172 if (cond_stmt)
3173 loc = gimple_location (cond_stmt);
3174 create_parallel_loop (loop, create_loop_fn (loc), arg_struct, new_arg_struct,
3175 n_threads, loc, oacc_kernels_p);
3176 if (!reduction_list->is_empty ())
3177 create_call_for_reduction (loop, reduction_list, &clsn_data);
3179 scev_reset ();
3181 /* Free loop bound estimations that could contain references to
3182 removed statements. */
3183 free_numbers_of_iterations_estimates (cfun);
3186 /* Returns true when LOOP contains vector phi nodes. */
3188 static bool
3189 loop_has_vector_phi_nodes (class loop *loop ATTRIBUTE_UNUSED)
3191 unsigned i;
3192 basic_block *bbs = get_loop_body_in_dom_order (loop);
3193 gphi_iterator gsi;
3194 bool res = true;
3196 for (i = 0; i < loop->num_nodes; i++)
3197 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
3198 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
3199 goto end;
3201 res = false;
3202 end:
3203 free (bbs);
3204 return res;
3207 /* Create a reduction_info struct, initialize it with REDUC_STMT
3208 and PHI, insert it to the REDUCTION_LIST. */
3210 static void
3211 build_new_reduction (reduction_info_table_type *reduction_list,
3212 gimple *reduc_stmt, gphi *phi)
3214 reduction_info **slot;
3215 struct reduction_info *new_reduction;
3216 enum tree_code reduction_code;
3218 gcc_assert (reduc_stmt);
3220 if (gimple_code (reduc_stmt) == GIMPLE_PHI)
3222 tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
3223 gimple *def1 = SSA_NAME_DEF_STMT (op1);
3224 reduction_code = gimple_assign_rhs_code (def1);
3226 else
3227 reduction_code = gimple_assign_rhs_code (reduc_stmt);
3228 /* Check for OpenMP supported reduction. */
3229 switch (reduction_code)
3231 case MINUS_EXPR:
3232 reduction_code = PLUS_EXPR;
3233 /* Fallthru. */
3234 case PLUS_EXPR:
3235 case MULT_EXPR:
3236 case MAX_EXPR:
3237 case MIN_EXPR:
3238 case BIT_IOR_EXPR:
3239 case BIT_XOR_EXPR:
3240 case BIT_AND_EXPR:
3241 case TRUTH_OR_EXPR:
3242 case TRUTH_XOR_EXPR:
3243 case TRUTH_AND_EXPR:
3244 break;
3245 default:
3246 return;
3249 if (dump_file && (dump_flags & TDF_DETAILS))
3251 fprintf (dump_file,
3252 "Detected reduction. reduction stmt is:\n");
3253 print_gimple_stmt (dump_file, reduc_stmt, 0);
3254 fprintf (dump_file, "\n");
3257 new_reduction = XCNEW (struct reduction_info);
3259 new_reduction->reduc_stmt = reduc_stmt;
3260 new_reduction->reduc_phi = phi;
3261 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
3262 new_reduction->reduction_code = reduction_code;
3263 slot = reduction_list->find_slot (new_reduction, INSERT);
3264 *slot = new_reduction;
3267 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
3270 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
3272 struct reduction_info *const red = *slot;
3273 gimple_set_uid (red->reduc_phi, red->reduc_version);
3274 return 1;
3277 /* Return true if the type of reduction performed by STMT_INFO is suitable
3278 for this pass. */
3280 static bool
3281 valid_reduction_p (stmt_vec_info stmt_info)
3283 /* Parallelization would reassociate the operation, which isn't
3284 allowed for in-order reductions. */
3285 vect_reduction_type reduc_type = STMT_VINFO_REDUC_TYPE (stmt_info);
3286 return reduc_type != FOLD_LEFT_REDUCTION;
3289 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
3291 static void
3292 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
3294 gphi_iterator gsi;
3295 loop_vec_info simple_loop_info;
3296 auto_vec<gphi *, 4> double_reduc_phis;
3297 auto_vec<gimple *, 4> double_reduc_stmts;
3299 vec_info_shared shared;
3300 vect_loop_form_info info;
3301 if (!vect_analyze_loop_form (loop, &info))
3302 goto gather_done;
3304 simple_loop_info = vect_create_loop_vinfo (loop, &shared, &info);
3305 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
3307 gphi *phi = gsi.phi ();
3308 affine_iv iv;
3309 tree res = PHI_RESULT (phi);
3310 bool double_reduc;
3312 if (virtual_operand_p (res))
3313 continue;
3315 if (simple_iv (loop, loop, res, &iv, true))
3316 continue;
3318 stmt_vec_info reduc_stmt_info
3319 = parloops_force_simple_reduction (simple_loop_info,
3320 simple_loop_info->lookup_stmt (phi),
3321 &double_reduc, true);
3322 if (!reduc_stmt_info || !valid_reduction_p (reduc_stmt_info))
3323 continue;
3325 if (double_reduc)
3327 if (loop->inner->inner != NULL)
3328 continue;
3330 double_reduc_phis.safe_push (phi);
3331 double_reduc_stmts.safe_push (reduc_stmt_info->stmt);
3332 continue;
3335 build_new_reduction (reduction_list, reduc_stmt_info->stmt, phi);
3337 delete simple_loop_info;
3339 if (!double_reduc_phis.is_empty ())
3341 vec_info_shared shared;
3342 vect_loop_form_info info;
3343 if (vect_analyze_loop_form (loop->inner, &info))
3345 simple_loop_info
3346 = vect_create_loop_vinfo (loop->inner, &shared, &info);
3347 gphi *phi;
3348 unsigned int i;
3350 FOR_EACH_VEC_ELT (double_reduc_phis, i, phi)
3352 affine_iv iv;
3353 tree res = PHI_RESULT (phi);
3354 bool double_reduc;
3356 use_operand_p use_p;
3357 gimple *inner_stmt;
3358 bool single_use_p = single_imm_use (res, &use_p, &inner_stmt);
3359 gcc_assert (single_use_p);
3360 if (gimple_code (inner_stmt) != GIMPLE_PHI)
3361 continue;
3362 gphi *inner_phi = as_a <gphi *> (inner_stmt);
3363 if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
3364 &iv, true))
3365 continue;
3367 stmt_vec_info inner_phi_info
3368 = simple_loop_info->lookup_stmt (inner_phi);
3369 stmt_vec_info inner_reduc_stmt_info
3370 = parloops_force_simple_reduction (simple_loop_info,
3371 inner_phi_info,
3372 &double_reduc, true);
3373 gcc_assert (!double_reduc);
3374 if (!inner_reduc_stmt_info
3375 || !valid_reduction_p (inner_reduc_stmt_info))
3376 continue;
3378 build_new_reduction (reduction_list, double_reduc_stmts[i], phi);
3380 delete simple_loop_info;
3384 gather_done:
3385 if (reduction_list->is_empty ())
3386 return;
3388 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
3389 and delete simple_loop_info, we can set gimple_uid of reduc_phi stmts only
3390 now. */
3391 basic_block bb;
3392 FOR_EACH_BB_FN (bb, cfun)
3393 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3394 gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1);
3395 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
3398 /* Try to initialize NITER for code generation part. */
3400 static bool
3401 try_get_loop_niter (loop_p loop, class tree_niter_desc *niter)
3403 edge exit = single_dom_exit (loop);
3405 gcc_assert (exit);
3407 /* We need to know # of iterations, and there should be no uses of values
3408 defined inside loop outside of it, unless the values are invariants of
3409 the loop. */
3410 if (!number_of_iterations_exit (loop, exit, niter, false))
3412 if (dump_file && (dump_flags & TDF_DETAILS))
3413 fprintf (dump_file, " FAILED: number of iterations not known\n");
3414 return false;
3417 return true;
3420 /* Return the default def of the first function argument. */
3422 static tree
3423 get_omp_data_i_param (void)
3425 tree decl = DECL_ARGUMENTS (cfun->decl);
3426 gcc_assert (DECL_CHAIN (decl) == NULL_TREE);
3427 return ssa_default_def (cfun, decl);
3430 /* For PHI in loop header of LOOP, look for pattern:
3432 <bb preheader>
3433 .omp_data_i = &.omp_data_arr;
3434 addr = .omp_data_i->sum;
3435 sum_a = *addr;
3437 <bb header>:
3438 sum_b = PHI <sum_a (preheader), sum_c (latch)>
3440 and return addr. Otherwise, return NULL_TREE. */
3442 static tree
3443 find_reduc_addr (class loop *loop, gphi *phi)
3445 edge e = loop_preheader_edge (loop);
3446 tree arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
3447 gimple *stmt = SSA_NAME_DEF_STMT (arg);
3448 if (!gimple_assign_single_p (stmt))
3449 return NULL_TREE;
3450 tree memref = gimple_assign_rhs1 (stmt);
3451 if (TREE_CODE (memref) != MEM_REF)
3452 return NULL_TREE;
3453 tree addr = TREE_OPERAND (memref, 0);
3455 gimple *stmt2 = SSA_NAME_DEF_STMT (addr);
3456 if (!gimple_assign_single_p (stmt2))
3457 return NULL_TREE;
3458 tree compref = gimple_assign_rhs1 (stmt2);
3459 if (TREE_CODE (compref) != COMPONENT_REF)
3460 return NULL_TREE;
3461 tree addr2 = TREE_OPERAND (compref, 0);
3462 if (TREE_CODE (addr2) != MEM_REF)
3463 return NULL_TREE;
3464 addr2 = TREE_OPERAND (addr2, 0);
3465 if (TREE_CODE (addr2) != SSA_NAME
3466 || addr2 != get_omp_data_i_param ())
3467 return NULL_TREE;
3469 return addr;
3472 /* Try to initialize REDUCTION_LIST for code generation part.
3473 REDUCTION_LIST describes the reductions. */
3475 static bool
3476 try_create_reduction_list (loop_p loop,
3477 reduction_info_table_type *reduction_list,
3478 bool oacc_kernels_p)
3480 edge exit = single_dom_exit (loop);
3481 gphi_iterator gsi;
3483 gcc_assert (exit);
3485 /* Try to get rid of exit phis. */
3486 final_value_replacement_loop (loop);
3488 gather_scalar_reductions (loop, reduction_list);
3491 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3493 gphi *phi = gsi.phi ();
3494 struct reduction_info *red;
3495 imm_use_iterator imm_iter;
3496 use_operand_p use_p;
3497 gimple *reduc_phi;
3498 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
3500 if (!virtual_operand_p (val))
3502 if (TREE_CODE (val) != SSA_NAME)
3504 if (dump_file && (dump_flags & TDF_DETAILS))
3505 fprintf (dump_file,
3506 " FAILED: exit PHI argument invariant.\n");
3507 return false;
3510 if (dump_file && (dump_flags & TDF_DETAILS))
3512 fprintf (dump_file, "phi is ");
3513 print_gimple_stmt (dump_file, phi, 0);
3514 fprintf (dump_file, "arg of phi to exit: value ");
3515 print_generic_expr (dump_file, val);
3516 fprintf (dump_file, " used outside loop\n");
3517 fprintf (dump_file,
3518 " checking if it is part of reduction pattern:\n");
3520 if (reduction_list->is_empty ())
3522 if (dump_file && (dump_flags & TDF_DETAILS))
3523 fprintf (dump_file,
3524 " FAILED: it is not a part of reduction.\n");
3525 return false;
3527 reduc_phi = NULL;
3528 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
3530 if (!gimple_debug_bind_p (USE_STMT (use_p))
3531 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
3533 reduc_phi = USE_STMT (use_p);
3534 break;
3537 red = reduction_phi (reduction_list, reduc_phi);
3538 if (red == NULL)
3540 if (dump_file && (dump_flags & TDF_DETAILS))
3541 fprintf (dump_file,
3542 " FAILED: it is not a part of reduction.\n");
3543 return false;
3545 if (red->keep_res != NULL)
3547 if (dump_file && (dump_flags & TDF_DETAILS))
3548 fprintf (dump_file,
3549 " FAILED: reduction has multiple exit phis.\n");
3550 return false;
3552 red->keep_res = phi;
3553 if (dump_file && (dump_flags & TDF_DETAILS))
3555 fprintf (dump_file, "reduction phi is ");
3556 print_gimple_stmt (dump_file, red->reduc_phi, 0);
3557 fprintf (dump_file, "reduction stmt is ");
3558 print_gimple_stmt (dump_file, red->reduc_stmt, 0);
3563 /* The iterations of the loop may communicate only through bivs whose
3564 iteration space can be distributed efficiently. */
3565 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
3567 gphi *phi = gsi.phi ();
3568 tree def = PHI_RESULT (phi);
3569 affine_iv iv;
3571 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
3573 struct reduction_info *red;
3575 red = reduction_phi (reduction_list, phi);
3576 if (red == NULL)
3578 if (dump_file && (dump_flags & TDF_DETAILS))
3579 fprintf (dump_file,
3580 " FAILED: scalar dependency between iterations\n");
3581 return false;
3586 if (oacc_kernels_p)
3588 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi);
3589 gsi_next (&gsi))
3591 gphi *phi = gsi.phi ();
3592 tree def = PHI_RESULT (phi);
3593 affine_iv iv;
3595 if (!virtual_operand_p (def)
3596 && !simple_iv (loop, loop, def, &iv, true))
3598 tree addr = find_reduc_addr (loop, phi);
3599 if (addr == NULL_TREE)
3600 return false;
3601 struct reduction_info *red = reduction_phi (reduction_list, phi);
3602 red->reduc_addr = addr;
3607 return true;
3610 /* Return true if LOOP contains phis with ADDR_EXPR in args. */
3612 static bool
3613 loop_has_phi_with_address_arg (class loop *loop)
3615 basic_block *bbs = get_loop_body (loop);
3616 bool res = false;
3618 unsigned i, j;
3619 gphi_iterator gsi;
3620 for (i = 0; i < loop->num_nodes; i++)
3621 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
3623 gphi *phi = gsi.phi ();
3624 for (j = 0; j < gimple_phi_num_args (phi); j++)
3626 tree arg = gimple_phi_arg_def (phi, j);
3627 if (TREE_CODE (arg) == ADDR_EXPR)
3629 /* This should be handled by eliminate_local_variables, but that
3630 function currently ignores phis. */
3631 res = true;
3632 goto end;
3636 end:
3637 free (bbs);
3639 return res;
3642 /* Return true if memory ref REF (corresponding to the stmt at GSI in
3643 REGIONS_BB[I]) conflicts with the statements in REGIONS_BB[I] after gsi,
3644 or the statements in REGIONS_BB[I + n]. REF_IS_STORE indicates if REF is a
3645 store. Ignore conflicts with SKIP_STMT. */
3647 static bool
3648 ref_conflicts_with_region (gimple_stmt_iterator gsi, ao_ref *ref,
3649 bool ref_is_store, vec<basic_block> region_bbs,
3650 unsigned int i, gimple *skip_stmt)
3652 basic_block bb = region_bbs[i];
3653 gsi_next (&gsi);
3655 while (true)
3657 for (; !gsi_end_p (gsi);
3658 gsi_next (&gsi))
3660 gimple *stmt = gsi_stmt (gsi);
3661 if (stmt == skip_stmt)
3663 if (dump_file)
3665 fprintf (dump_file, "skipping reduction store: ");
3666 print_gimple_stmt (dump_file, stmt, 0);
3668 continue;
3671 if (!gimple_vdef (stmt)
3672 && !gimple_vuse (stmt))
3673 continue;
3675 if (gimple_code (stmt) == GIMPLE_RETURN)
3676 continue;
3678 if (ref_is_store)
3680 if (ref_maybe_used_by_stmt_p (stmt, ref))
3682 if (dump_file)
3684 fprintf (dump_file, "Stmt ");
3685 print_gimple_stmt (dump_file, stmt, 0);
3687 return true;
3690 else
3692 if (stmt_may_clobber_ref_p_1 (stmt, ref))
3694 if (dump_file)
3696 fprintf (dump_file, "Stmt ");
3697 print_gimple_stmt (dump_file, stmt, 0);
3699 return true;
3703 i++;
3704 if (i == region_bbs.length ())
3705 break;
3706 bb = region_bbs[i];
3707 gsi = gsi_start_bb (bb);
3710 return false;
3713 /* Return true if the bbs in REGION_BBS but not in in_loop_bbs can be executed
3714 in parallel with REGION_BBS containing the loop. Return the stores of
3715 reduction results in REDUCTION_STORES. */
3717 static bool
3718 oacc_entry_exit_ok_1 (bitmap in_loop_bbs, const vec<basic_block> &region_bbs,
3719 reduction_info_table_type *reduction_list,
3720 bitmap reduction_stores)
3722 tree omp_data_i = get_omp_data_i_param ();
3724 unsigned i;
3725 basic_block bb;
3726 FOR_EACH_VEC_ELT (region_bbs, i, bb)
3728 if (bitmap_bit_p (in_loop_bbs, bb->index))
3729 continue;
3731 gimple_stmt_iterator gsi;
3732 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
3733 gsi_next (&gsi))
3735 gimple *stmt = gsi_stmt (gsi);
3736 gimple *skip_stmt = NULL;
3738 if (is_gimple_debug (stmt)
3739 || gimple_code (stmt) == GIMPLE_COND)
3740 continue;
3742 ao_ref ref;
3743 bool ref_is_store = false;
3744 if (gimple_assign_load_p (stmt))
3746 tree rhs = gimple_assign_rhs1 (stmt);
3747 tree base = get_base_address (rhs);
3748 if (TREE_CODE (base) == MEM_REF
3749 && operand_equal_p (TREE_OPERAND (base, 0), omp_data_i, 0))
3750 continue;
3752 tree lhs = gimple_assign_lhs (stmt);
3753 if (TREE_CODE (lhs) == SSA_NAME
3754 && has_single_use (lhs))
3756 use_operand_p use_p;
3757 gimple *use_stmt;
3758 struct reduction_info *red;
3759 single_imm_use (lhs, &use_p, &use_stmt);
3760 if (gimple_code (use_stmt) == GIMPLE_PHI
3761 && (red = reduction_phi (reduction_list, use_stmt)))
3763 tree val = PHI_RESULT (red->keep_res);
3764 if (has_single_use (val))
3766 single_imm_use (val, &use_p, &use_stmt);
3767 if (gimple_store_p (use_stmt))
3769 unsigned int id
3770 = SSA_NAME_VERSION (gimple_vdef (use_stmt));
3771 bitmap_set_bit (reduction_stores, id);
3772 skip_stmt = use_stmt;
3773 if (dump_file)
3775 fprintf (dump_file, "found reduction load: ");
3776 print_gimple_stmt (dump_file, stmt, 0);
3783 ao_ref_init (&ref, rhs);
3785 else if (gimple_store_p (stmt))
3787 ao_ref_init (&ref, gimple_assign_lhs (stmt));
3788 ref_is_store = true;
3790 else if (gimple_code (stmt) == GIMPLE_OMP_RETURN)
3791 continue;
3792 else if (!gimple_has_side_effects (stmt)
3793 && !gimple_could_trap_p (stmt)
3794 && !stmt_could_throw_p (cfun, stmt)
3795 && !gimple_vdef (stmt)
3796 && !gimple_vuse (stmt))
3797 continue;
3798 else if (gimple_call_internal_p (stmt, IFN_GOACC_DIM_POS))
3799 continue;
3800 else if (gimple_code (stmt) == GIMPLE_RETURN)
3801 continue;
3802 else
3804 if (dump_file)
3806 fprintf (dump_file, "Unhandled stmt in entry/exit: ");
3807 print_gimple_stmt (dump_file, stmt, 0);
3809 return false;
3812 if (ref_conflicts_with_region (gsi, &ref, ref_is_store, region_bbs,
3813 i, skip_stmt))
3815 if (dump_file)
3817 fprintf (dump_file, "conflicts with entry/exit stmt: ");
3818 print_gimple_stmt (dump_file, stmt, 0);
3820 return false;
3825 return true;
3828 /* Find stores inside REGION_BBS and outside IN_LOOP_BBS, and guard them with
3829 gang_pos == 0, except when the stores are REDUCTION_STORES. Return true
3830 if any changes were made. */
3832 static bool
3833 oacc_entry_exit_single_gang (bitmap in_loop_bbs,
3834 const vec<basic_block> &region_bbs,
3835 bitmap reduction_stores)
3837 tree gang_pos = NULL_TREE;
3838 bool changed = false;
3840 unsigned i;
3841 basic_block bb;
3842 FOR_EACH_VEC_ELT (region_bbs, i, bb)
3844 if (bitmap_bit_p (in_loop_bbs, bb->index))
3845 continue;
3847 gimple_stmt_iterator gsi;
3848 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3850 gimple *stmt = gsi_stmt (gsi);
3852 if (!gimple_store_p (stmt))
3854 /* Update gsi to point to next stmt. */
3855 gsi_next (&gsi);
3856 continue;
3859 if (bitmap_bit_p (reduction_stores,
3860 SSA_NAME_VERSION (gimple_vdef (stmt))))
3862 if (dump_file)
3864 fprintf (dump_file,
3865 "skipped reduction store for single-gang"
3866 " neutering: ");
3867 print_gimple_stmt (dump_file, stmt, 0);
3870 /* Update gsi to point to next stmt. */
3871 gsi_next (&gsi);
3872 continue;
3875 changed = true;
3877 if (gang_pos == NULL_TREE)
3879 tree arg = build_int_cst (integer_type_node, GOMP_DIM_GANG);
3880 gcall *gang_single
3881 = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg);
3882 gang_pos = make_ssa_name (integer_type_node);
3883 gimple_call_set_lhs (gang_single, gang_pos);
3884 gimple_stmt_iterator start
3885 = gsi_start_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
3886 tree vuse = ssa_default_def (cfun, gimple_vop (cfun));
3887 gimple_set_vuse (gang_single, vuse);
3888 gsi_insert_before (&start, gang_single, GSI_SAME_STMT);
3891 if (dump_file)
3893 fprintf (dump_file,
3894 "found store that needs single-gang neutering: ");
3895 print_gimple_stmt (dump_file, stmt, 0);
3899 /* Split block before store. */
3900 gimple_stmt_iterator gsi2 = gsi;
3901 gsi_prev (&gsi2);
3902 edge e;
3903 if (gsi_end_p (gsi2))
3905 e = split_block_after_labels (bb);
3906 gsi2 = gsi_last_bb (bb);
3908 else
3909 e = split_block (bb, gsi_stmt (gsi2));
3910 basic_block bb2 = e->dest;
3912 /* Split block after store. */
3913 gimple_stmt_iterator gsi3 = gsi_start_bb (bb2);
3914 edge e2 = split_block (bb2, gsi_stmt (gsi3));
3915 basic_block bb3 = e2->dest;
3917 gimple *cond
3918 = gimple_build_cond (EQ_EXPR, gang_pos, integer_zero_node,
3919 NULL_TREE, NULL_TREE);
3920 gsi_insert_after (&gsi2, cond, GSI_NEW_STMT);
3922 edge e3 = make_edge (bb, bb3, EDGE_FALSE_VALUE);
3923 /* FIXME: What is the probability? */
3924 e3->probability = profile_probability::guessed_never ();
3925 e->flags = EDGE_TRUE_VALUE;
3927 tree vdef = gimple_vdef (stmt);
3928 tree vuse = gimple_vuse (stmt);
3930 tree phi_res = copy_ssa_name (vdef);
3931 gphi *new_phi = create_phi_node (phi_res, bb3);
3932 replace_uses_by (vdef, phi_res);
3933 add_phi_arg (new_phi, vuse, e3, UNKNOWN_LOCATION);
3934 add_phi_arg (new_phi, vdef, e2, UNKNOWN_LOCATION);
3936 /* Update gsi to point to next stmt. */
3937 bb = bb3;
3938 gsi = gsi_start_bb (bb);
3943 return changed;
3946 /* Return true if the statements before and after the LOOP can be executed in
3947 parallel with the function containing the loop. Resolve conflicting stores
3948 outside LOOP by guarding them such that only a single gang executes them. */
3950 static bool
3951 oacc_entry_exit_ok (class loop *loop,
3952 reduction_info_table_type *reduction_list)
3954 basic_block *loop_bbs = get_loop_body_in_dom_order (loop);
3955 auto_vec<basic_block> region_bbs
3956 = get_all_dominated_blocks (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
3958 bitmap in_loop_bbs = BITMAP_ALLOC (NULL);
3959 bitmap_clear (in_loop_bbs);
3960 for (unsigned int i = 0; i < loop->num_nodes; i++)
3961 bitmap_set_bit (in_loop_bbs, loop_bbs[i]->index);
3963 bitmap reduction_stores = BITMAP_ALLOC (NULL);
3964 bool res = oacc_entry_exit_ok_1 (in_loop_bbs, region_bbs, reduction_list,
3965 reduction_stores);
3967 if (res)
3969 bool changed = oacc_entry_exit_single_gang (in_loop_bbs, region_bbs,
3970 reduction_stores);
3971 if (changed)
3973 free_dominance_info (CDI_DOMINATORS);
3974 calculate_dominance_info (CDI_DOMINATORS);
3978 free (loop_bbs);
3980 BITMAP_FREE (in_loop_bbs);
3981 BITMAP_FREE (reduction_stores);
3983 return res;
3986 /* Detect parallel loops and generate parallel code using libgomp
3987 primitives. Returns true if some loop was parallelized, false
3988 otherwise. */
3990 static bool
3991 parallelize_loops (bool oacc_kernels_p)
3993 unsigned n_threads;
3994 bool changed = false;
3995 class loop *skip_loop = NULL;
3996 class tree_niter_desc niter_desc;
3997 struct obstack parloop_obstack;
3998 HOST_WIDE_INT estimated;
4000 /* Do not parallelize loops in the functions created by parallelization. */
4001 if (!oacc_kernels_p
4002 && parallelized_function_p (cfun->decl))
4003 return false;
4005 /* Do not parallelize loops in offloaded functions. */
4006 if (!oacc_kernels_p
4007 && oacc_get_fn_attrib (cfun->decl) != NULL)
4008 return false;
4010 if (cfun->has_nonlocal_label)
4011 return false;
4013 /* For OpenACC kernels, n_threads will be determined later; otherwise, it's
4014 the argument to -ftree-parallelize-loops. */
4015 if (oacc_kernels_p)
4016 n_threads = 0;
4017 else
4018 n_threads = flag_tree_parallelize_loops;
4020 gcc_obstack_init (&parloop_obstack);
4021 reduction_info_table_type reduction_list (10);
4023 calculate_dominance_info (CDI_DOMINATORS);
4025 for (auto loop : loops_list (cfun, 0))
4027 if (loop == skip_loop)
4029 if (!loop->in_oacc_kernels_region
4030 && dump_file && (dump_flags & TDF_DETAILS))
4031 fprintf (dump_file,
4032 "Skipping loop %d as inner loop of parallelized loop\n",
4033 loop->num);
4035 skip_loop = loop->inner;
4036 continue;
4038 else
4039 skip_loop = NULL;
4041 reduction_list.empty ();
4043 if (oacc_kernels_p)
4045 if (!loop->in_oacc_kernels_region)
4046 continue;
4048 /* Don't try to parallelize inner loops in an oacc kernels region. */
4049 if (loop->inner)
4050 skip_loop = loop->inner;
4052 if (dump_file && (dump_flags & TDF_DETAILS))
4053 fprintf (dump_file,
4054 "Trying loop %d with header bb %d in oacc kernels"
4055 " region\n", loop->num, loop->header->index);
4058 if (dump_file && (dump_flags & TDF_DETAILS))
4060 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
4061 if (loop->inner)
4062 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
4063 else
4064 fprintf (dump_file, "loop %d is innermost\n",loop->num);
4067 if (!single_dom_exit (loop))
4070 if (dump_file && (dump_flags & TDF_DETAILS))
4071 fprintf (dump_file, "loop is !single_dom_exit\n");
4073 continue;
4076 if (/* And of course, the loop must be parallelizable. */
4077 !can_duplicate_loop_p (loop)
4078 || loop_has_blocks_with_irreducible_flag (loop)
4079 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
4080 /* FIXME: the check for vector phi nodes could be removed. */
4081 || loop_has_vector_phi_nodes (loop))
4082 continue;
4084 estimated = estimated_loop_iterations_int (loop);
4085 if (estimated == -1)
4086 estimated = get_likely_max_loop_iterations_int (loop);
4087 /* FIXME: Bypass this check as graphite doesn't update the
4088 count and frequency correctly now. */
4089 if (!flag_loop_parallelize_all
4090 && !oacc_kernels_p
4091 && ((estimated != -1
4092 && (estimated
4093 < ((HOST_WIDE_INT) n_threads
4094 * (loop->inner ? 2 : MIN_PER_THREAD) - 1)))
4095 /* Do not bother with loops in cold areas. */
4096 || optimize_loop_nest_for_size_p (loop)))
4097 continue;
4099 if (!try_get_loop_niter (loop, &niter_desc))
4100 continue;
4102 if (!try_create_reduction_list (loop, &reduction_list, oacc_kernels_p))
4103 continue;
4105 if (loop_has_phi_with_address_arg (loop))
4106 continue;
4108 if (!loop->can_be_parallel
4109 && !loop_parallel_p (loop, &parloop_obstack))
4110 continue;
4112 if (oacc_kernels_p
4113 && !oacc_entry_exit_ok (loop, &reduction_list))
4115 if (dump_file)
4116 fprintf (dump_file, "entry/exit not ok: FAILED\n");
4117 continue;
4120 changed = true;
4121 skip_loop = loop->inner;
4123 if (dump_enabled_p ())
4125 dump_user_location_t loop_loc = find_loop_location (loop);
4126 if (loop->inner)
4127 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
4128 "parallelizing outer loop %d\n", loop->num);
4129 else
4130 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
4131 "parallelizing inner loop %d\n", loop->num);
4134 gen_parallel_loop (loop, &reduction_list,
4135 n_threads, &niter_desc, oacc_kernels_p);
4138 obstack_free (&parloop_obstack, NULL);
4140 /* Parallelization will cause new function calls to be inserted through
4141 which local variables will escape. Reset the points-to solution
4142 for ESCAPED. */
4143 if (changed)
4144 pt_solution_reset (&cfun->gimple_df->escaped);
4146 return changed;
4149 /* Parallelization. */
4151 namespace {
4153 const pass_data pass_data_parallelize_loops =
4155 GIMPLE_PASS, /* type */
4156 "parloops", /* name */
4157 OPTGROUP_LOOP, /* optinfo_flags */
4158 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
4159 ( PROP_cfg | PROP_ssa ), /* properties_required */
4160 0, /* properties_provided */
4161 0, /* properties_destroyed */
4162 0, /* todo_flags_start */
4163 0, /* todo_flags_finish */
4166 class pass_parallelize_loops : public gimple_opt_pass
4168 public:
4169 pass_parallelize_loops (gcc::context *ctxt)
4170 : gimple_opt_pass (pass_data_parallelize_loops, ctxt),
4171 oacc_kernels_p (false)
4174 /* opt_pass methods: */
4175 bool gate (function *) final override
4177 if (oacc_kernels_p)
4178 return flag_openacc;
4179 else
4180 return flag_tree_parallelize_loops > 1;
4182 unsigned int execute (function *) final override;
4183 opt_pass * clone () final override
4185 return new pass_parallelize_loops (m_ctxt);
4187 void set_pass_param (unsigned int n, bool param) final override
4189 gcc_assert (n == 0);
4190 oacc_kernels_p = param;
4193 private:
4194 bool oacc_kernels_p;
4195 }; // class pass_parallelize_loops
4197 unsigned
4198 pass_parallelize_loops::execute (function *fun)
4200 tree nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4201 if (nthreads == NULL_TREE)
4202 return 0;
4204 bool in_loop_pipeline = scev_initialized_p ();
4205 if (!in_loop_pipeline)
4206 loop_optimizer_init (LOOPS_NORMAL
4207 | LOOPS_HAVE_RECORDED_EXITS);
4209 if (number_of_loops (fun) <= 1)
4210 return 0;
4212 if (!in_loop_pipeline)
4214 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
4215 scev_initialize ();
4218 unsigned int todo = 0;
4219 if (parallelize_loops (oacc_kernels_p))
4221 fun->curr_properties &= ~(PROP_gimple_eomp);
4223 checking_verify_loop_structure ();
4225 /* ??? Intermediate SSA updates with no PHIs might have lost
4226 the virtual operand renaming needed by separate_decls_in_region,
4227 make sure to rename them again. */
4228 mark_virtual_operands_for_renaming (fun);
4229 update_ssa (TODO_update_ssa);
4230 if (in_loop_pipeline)
4231 rewrite_into_loop_closed_ssa (NULL, 0);
4234 if (!in_loop_pipeline)
4236 scev_finalize ();
4237 loop_optimizer_finalize ();
4240 return todo;
4243 } // anon namespace
4245 gimple_opt_pass *
4246 make_pass_parallelize_loops (gcc::context *ctxt)
4248 return new pass_parallelize_loops (ctxt);