Fix previous commit
[official-gcc.git] / gcc / tree-parloops.c
blobae880e151db6667a207fcd6a738c36dbd19a4249
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2019 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "fold-const.h"
34 #include "gimplify.h"
35 #include "gimple-iterator.h"
36 #include "gimplify-me.h"
37 #include "gimple-walk.h"
38 #include "stor-layout.h"
39 #include "tree-nested.h"
40 #include "tree-cfg.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "tree-ssa-loop.h"
45 #include "tree-into-ssa.h"
46 #include "cfgloop.h"
47 #include "tree-scalar-evolution.h"
48 #include "langhooks.h"
49 #include "tree-vectorizer.h"
50 #include "tree-hasher.h"
51 #include "tree-parloops.h"
52 #include "omp-general.h"
53 #include "omp-low.h"
54 #include "tree-ssa.h"
55 #include "params.h"
56 #include "params-enum.h"
57 #include "tree-ssa-alias.h"
58 #include "tree-eh.h"
59 #include "gomp-constants.h"
60 #include "tree-dfa.h"
61 #include "stringpool.h"
62 #include "attribs.h"
64 /* This pass tries to distribute iterations of loops into several threads.
65 The implementation is straightforward -- for each loop we test whether its
66 iterations are independent, and if it is the case (and some additional
67 conditions regarding profitability and correctness are satisfied), we
68 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
69 machinery do its job.
71 The most of the complexity is in bringing the code into shape expected
72 by the omp expanders:
73 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
74 variable and that the exit test is at the start of the loop body
75 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
76 variables by accesses through pointers, and breaking up ssa chains
77 by storing the values incoming to the parallelized loop to a structure
78 passed to the new function as an argument (something similar is done
79 in omp gimplification, unfortunately only a small part of the code
80 can be shared).
82 TODO:
83 -- if there are several parallelizable loops in a function, it may be
84 possible to generate the threads just once (using synchronization to
85 ensure that cross-loop dependences are obeyed).
86 -- handling of common reduction patterns for outer loops.
88 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
90 Reduction handling:
91 currently we use code inspired by vect_force_simple_reduction to detect
92 reduction patterns.
93 The code transformation will be introduced by an example.
96 parloop
98 int sum=1;
100 for (i = 0; i < N; i++)
102 x[i] = i + 3;
103 sum+=x[i];
107 gimple-like code:
108 header_bb:
110 # sum_29 = PHI <sum_11(5), 1(3)>
111 # i_28 = PHI <i_12(5), 0(3)>
112 D.1795_8 = i_28 + 3;
113 x[i_28] = D.1795_8;
114 sum_11 = D.1795_8 + sum_29;
115 i_12 = i_28 + 1;
116 if (N_6(D) > i_12)
117 goto header_bb;
120 exit_bb:
122 # sum_21 = PHI <sum_11(4)>
123 printf (&"%d"[0], sum_21);
126 after reduction transformation (only relevant parts):
128 parloop
131 ....
134 # Storing the initial value given by the user. #
136 .paral_data_store.32.sum.27 = 1;
138 #pragma omp parallel num_threads(4)
140 #pragma omp for schedule(static)
142 # The neutral element corresponding to the particular
143 reduction's operation, e.g. 0 for PLUS_EXPR,
144 1 for MULT_EXPR, etc. replaces the user's initial value. #
146 # sum.27_29 = PHI <sum.27_11, 0>
148 sum.27_11 = D.1827_8 + sum.27_29;
150 GIMPLE_OMP_CONTINUE
152 # Adding this reduction phi is done at create_phi_for_local_result() #
153 # sum.27_56 = PHI <sum.27_11, 0>
154 GIMPLE_OMP_RETURN
156 # Creating the atomic operation is done at
157 create_call_for_reduction_1() #
159 #pragma omp atomic_load
160 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
161 D.1840_60 = sum.27_56 + D.1839_59;
162 #pragma omp atomic_store (D.1840_60);
164 GIMPLE_OMP_RETURN
166 # collecting the result after the join of the threads is done at
167 create_loads_for_reductions().
168 The value computed by the threads is loaded from the
169 shared struct. #
172 .paral_data_load.33_52 = &.paral_data_store.32;
173 sum_37 = .paral_data_load.33_52->sum.27;
174 sum_43 = D.1795_41 + sum_37;
176 exit bb:
177 # sum_21 = PHI <sum_43, sum_26>
178 printf (&"%d"[0], sum_21);
186 /* Error reporting helper for parloops_is_simple_reduction below. GIMPLE
187 statement STMT is printed with a message MSG. */
189 static void
190 report_ploop_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
192 dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
195 /* DEF_STMT_INFO occurs in a loop that contains a potential reduction
196 operation. Return true if the results of DEF_STMT_INFO are something
197 that can be accumulated by such a reduction. */
199 static bool
200 parloops_valid_reduction_input_p (stmt_vec_info def_stmt_info)
202 return (is_gimple_assign (def_stmt_info->stmt)
203 || is_gimple_call (def_stmt_info->stmt)
204 || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_induction_def
205 || (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
206 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def
207 && !is_loop_header_bb_p (gimple_bb (def_stmt_info->stmt))));
210 /* Detect SLP reduction of the form:
212 #a1 = phi <a5, a0>
213 a2 = operation (a1)
214 a3 = operation (a2)
215 a4 = operation (a3)
216 a5 = operation (a4)
218 #a = phi <a5>
220 PHI is the reduction phi node (#a1 = phi <a5, a0> above)
221 FIRST_STMT is the first reduction stmt in the chain
222 (a2 = operation (a1)).
224 Return TRUE if a reduction chain was detected. */
226 static bool
227 parloops_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
228 gimple *first_stmt)
230 class loop *loop = (gimple_bb (phi))->loop_father;
231 class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
232 enum tree_code code;
233 gimple *loop_use_stmt = NULL;
234 stmt_vec_info use_stmt_info;
235 tree lhs;
236 imm_use_iterator imm_iter;
237 use_operand_p use_p;
238 int nloop_uses, size = 0, n_out_of_loop_uses;
239 bool found = false;
241 if (loop != vect_loop)
242 return false;
244 auto_vec<stmt_vec_info, 8> reduc_chain;
245 lhs = PHI_RESULT (phi);
246 code = gimple_assign_rhs_code (first_stmt);
247 while (1)
249 nloop_uses = 0;
250 n_out_of_loop_uses = 0;
251 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
253 gimple *use_stmt = USE_STMT (use_p);
254 if (is_gimple_debug (use_stmt))
255 continue;
257 /* Check if we got back to the reduction phi. */
258 if (use_stmt == phi)
260 loop_use_stmt = use_stmt;
261 found = true;
262 break;
265 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
267 loop_use_stmt = use_stmt;
268 nloop_uses++;
270 else
271 n_out_of_loop_uses++;
273 /* There are can be either a single use in the loop or two uses in
274 phi nodes. */
275 if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
276 return false;
279 if (found)
280 break;
282 /* We reached a statement with no loop uses. */
283 if (nloop_uses == 0)
284 return false;
286 /* This is a loop exit phi, and we haven't reached the reduction phi. */
287 if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
288 return false;
290 if (!is_gimple_assign (loop_use_stmt)
291 || code != gimple_assign_rhs_code (loop_use_stmt)
292 || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
293 return false;
295 /* Insert USE_STMT into reduction chain. */
296 use_stmt_info = loop_info->lookup_stmt (loop_use_stmt);
297 reduc_chain.safe_push (use_stmt_info);
299 lhs = gimple_assign_lhs (loop_use_stmt);
300 size++;
303 if (!found || loop_use_stmt != phi || size < 2)
304 return false;
306 /* Swap the operands, if needed, to make the reduction operand be the second
307 operand. */
308 lhs = PHI_RESULT (phi);
309 for (unsigned i = 0; i < reduc_chain.length (); ++i)
311 gassign *next_stmt = as_a <gassign *> (reduc_chain[i]->stmt);
312 if (gimple_assign_rhs2 (next_stmt) == lhs)
314 tree op = gimple_assign_rhs1 (next_stmt);
315 stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
317 /* Check that the other def is either defined in the loop
318 ("vect_internal_def"), or it's an induction (defined by a
319 loop-header phi-node). */
320 if (def_stmt_info
321 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
322 && parloops_valid_reduction_input_p (def_stmt_info))
324 lhs = gimple_assign_lhs (next_stmt);
325 continue;
328 return false;
330 else
332 tree op = gimple_assign_rhs2 (next_stmt);
333 stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
335 /* Check that the other def is either defined in the loop
336 ("vect_internal_def"), or it's an induction (defined by a
337 loop-header phi-node). */
338 if (def_stmt_info
339 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
340 && parloops_valid_reduction_input_p (def_stmt_info))
342 if (dump_enabled_p ())
343 dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: %G",
344 next_stmt);
346 swap_ssa_operands (next_stmt,
347 gimple_assign_rhs1_ptr (next_stmt),
348 gimple_assign_rhs2_ptr (next_stmt));
349 update_stmt (next_stmt);
351 else
352 return false;
355 lhs = gimple_assign_lhs (next_stmt);
358 /* Build up the actual chain. */
359 for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
361 REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
362 REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
364 REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
365 REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
367 /* Save the chain for further analysis in SLP detection. */
368 LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
369 REDUC_GROUP_SIZE (reduc_chain[0]) = size;
371 return true;
374 /* Return true if we need an in-order reduction for operation CODE
375 on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
376 overflow must wrap. */
378 static bool
379 parloops_needs_fold_left_reduction_p (tree type, tree_code code,
380 bool need_wrapping_integral_overflow)
382 /* CHECKME: check for !flag_finite_math_only too? */
383 if (SCALAR_FLOAT_TYPE_P (type))
384 switch (code)
386 case MIN_EXPR:
387 case MAX_EXPR:
388 return false;
390 default:
391 return !flag_associative_math;
394 if (INTEGRAL_TYPE_P (type))
396 if (!operation_no_trapping_overflow (type, code))
397 return true;
398 if (need_wrapping_integral_overflow
399 && !TYPE_OVERFLOW_WRAPS (type)
400 && operation_can_overflow (code))
401 return true;
402 return false;
405 if (SAT_FIXED_POINT_TYPE_P (type))
406 return true;
408 return false;
412 /* Function parloops_is_simple_reduction
414 (1) Detect a cross-iteration def-use cycle that represents a simple
415 reduction computation. We look for the following pattern:
417 loop_header:
418 a1 = phi < a0, a2 >
419 a3 = ...
420 a2 = operation (a3, a1)
424 a3 = ...
425 loop_header:
426 a1 = phi < a0, a2 >
427 a2 = operation (a3, a1)
429 such that:
430 1. operation is commutative and associative and it is safe to
431 change the order of the computation
432 2. no uses for a2 in the loop (a2 is used out of the loop)
433 3. no uses of a1 in the loop besides the reduction operation
434 4. no uses of a1 outside the loop.
436 Conditions 1,4 are tested here.
437 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
439 (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
440 nested cycles.
442 (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
443 reductions:
445 a1 = phi < a0, a2 >
446 inner loop (def of a3)
447 a2 = phi < a3 >
449 (4) Detect condition expressions, ie:
450 for (int i = 0; i < N; i++)
451 if (a[i] < val)
452 ret_val = a[i];
456 static stmt_vec_info
457 parloops_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
458 bool *double_reduc,
459 bool need_wrapping_integral_overflow,
460 enum vect_reduction_type *v_reduc_type)
462 gphi *phi = as_a <gphi *> (phi_info->stmt);
463 class loop *loop = (gimple_bb (phi))->loop_father;
464 class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
465 bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
466 gimple *phi_use_stmt = NULL;
467 enum tree_code orig_code, code;
468 tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
469 tree type;
470 tree name;
471 imm_use_iterator imm_iter;
472 use_operand_p use_p;
473 bool phi_def;
475 *double_reduc = false;
476 *v_reduc_type = TREE_CODE_REDUCTION;
478 tree phi_name = PHI_RESULT (phi);
479 /* ??? If there are no uses of the PHI result the inner loop reduction
480 won't be detected as possibly double-reduction by vectorizable_reduction
481 because that tries to walk the PHI arg from the preheader edge which
482 can be constant. See PR60382. */
483 if (has_zero_uses (phi_name))
484 return NULL;
485 unsigned nphi_def_loop_uses = 0;
486 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
488 gimple *use_stmt = USE_STMT (use_p);
489 if (is_gimple_debug (use_stmt))
490 continue;
492 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
494 if (dump_enabled_p ())
495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
496 "intermediate value used outside loop.\n");
498 return NULL;
501 nphi_def_loop_uses++;
502 phi_use_stmt = use_stmt;
505 edge latch_e = loop_latch_edge (loop);
506 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
507 if (TREE_CODE (loop_arg) != SSA_NAME)
509 if (dump_enabled_p ())
510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
511 "reduction: not ssa_name: %T\n", loop_arg);
512 return NULL;
515 stmt_vec_info def_stmt_info = loop_info->lookup_def (loop_arg);
516 if (!def_stmt_info
517 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
518 return NULL;
520 if (gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt))
522 name = gimple_assign_lhs (def_stmt);
523 phi_def = false;
525 else if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
527 name = PHI_RESULT (def_stmt);
528 phi_def = true;
530 else
532 if (dump_enabled_p ())
533 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
534 "reduction: unhandled reduction operation: %G",
535 def_stmt_info->stmt);
536 return NULL;
539 unsigned nlatch_def_loop_uses = 0;
540 auto_vec<gphi *, 3> lcphis;
541 bool inner_loop_of_double_reduc = false;
542 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
544 gimple *use_stmt = USE_STMT (use_p);
545 if (is_gimple_debug (use_stmt))
546 continue;
547 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
548 nlatch_def_loop_uses++;
549 else
551 /* We can have more than one loop-closed PHI. */
552 lcphis.safe_push (as_a <gphi *> (use_stmt));
553 if (nested_in_vect_loop
554 && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))
555 == vect_double_reduction_def))
556 inner_loop_of_double_reduc = true;
560 /* If this isn't a nested cycle or if the nested cycle reduction value
561 is used ouside of the inner loop we cannot handle uses of the reduction
562 value. */
563 if ((!nested_in_vect_loop || inner_loop_of_double_reduc)
564 && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1))
566 if (dump_enabled_p ())
567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
568 "reduction used in loop.\n");
569 return NULL;
572 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
573 defined in the inner loop. */
574 if (phi_def)
576 gphi *def_stmt = as_a <gphi *> (def_stmt_info->stmt);
577 op1 = PHI_ARG_DEF (def_stmt, 0);
579 if (gimple_phi_num_args (def_stmt) != 1
580 || TREE_CODE (op1) != SSA_NAME)
582 if (dump_enabled_p ())
583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
584 "unsupported phi node definition.\n");
586 return NULL;
589 gimple *def1 = SSA_NAME_DEF_STMT (op1);
590 if (gimple_bb (def1)
591 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
592 && loop->inner
593 && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
594 && is_gimple_assign (def1)
595 && is_a <gphi *> (phi_use_stmt)
596 && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
598 if (dump_enabled_p ())
599 report_ploop_op (MSG_NOTE, def_stmt,
600 "detected double reduction: ");
602 *double_reduc = true;
603 return def_stmt_info;
606 return NULL;
609 /* If we are vectorizing an inner reduction we are executing that
610 in the original order only in case we are not dealing with a
611 double reduction. */
612 bool check_reduction = true;
613 if (flow_loop_nested_p (vect_loop, loop))
615 gphi *lcphi;
616 unsigned i;
617 check_reduction = false;
618 FOR_EACH_VEC_ELT (lcphis, i, lcphi)
619 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
621 gimple *use_stmt = USE_STMT (use_p);
622 if (is_gimple_debug (use_stmt))
623 continue;
624 if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
625 check_reduction = true;
629 gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt);
630 code = orig_code = gimple_assign_rhs_code (def_stmt);
632 if (nested_in_vect_loop && !check_reduction)
634 /* FIXME: Even for non-reductions code generation is funneled
635 through vectorizable_reduction for the stmt defining the
636 PHI latch value. So we have to artificially restrict ourselves
637 for the supported operations. */
638 switch (get_gimple_rhs_class (code))
640 case GIMPLE_BINARY_RHS:
641 case GIMPLE_TERNARY_RHS:
642 break;
643 default:
644 /* Not supported by vectorizable_reduction. */
645 if (dump_enabled_p ())
646 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
647 "nested cycle: not handled operation: ");
648 return NULL;
650 if (dump_enabled_p ())
651 report_ploop_op (MSG_NOTE, def_stmt, "detected nested cycle: ");
652 return def_stmt_info;
655 /* We can handle "res -= x[i]", which is non-associative by
656 simply rewriting this into "res += -x[i]". Avoid changing
657 gimple instruction for the first simple tests and only do this
658 if we're allowed to change code at all. */
659 if (code == MINUS_EXPR && gimple_assign_rhs2 (def_stmt) != phi_name)
660 code = PLUS_EXPR;
662 if (code == COND_EXPR)
664 if (! nested_in_vect_loop)
665 *v_reduc_type = COND_REDUCTION;
667 op3 = gimple_assign_rhs1 (def_stmt);
668 if (COMPARISON_CLASS_P (op3))
670 op4 = TREE_OPERAND (op3, 1);
671 op3 = TREE_OPERAND (op3, 0);
673 if (op3 == phi_name || op4 == phi_name)
675 if (dump_enabled_p ())
676 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
677 "reduction: condition depends on previous"
678 " iteration: ");
679 return NULL;
682 op1 = gimple_assign_rhs2 (def_stmt);
683 op2 = gimple_assign_rhs3 (def_stmt);
685 else if (!commutative_tree_code (code) || !associative_tree_code (code))
687 if (dump_enabled_p ())
688 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
689 "reduction: not commutative/associative: ");
690 return NULL;
692 else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
694 op1 = gimple_assign_rhs1 (def_stmt);
695 op2 = gimple_assign_rhs2 (def_stmt);
697 else
699 if (dump_enabled_p ())
700 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
701 "reduction: not handled operation: ");
702 return NULL;
705 if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
707 if (dump_enabled_p ())
708 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
709 "reduction: both uses not ssa_names: ");
711 return NULL;
714 type = TREE_TYPE (gimple_assign_lhs (def_stmt));
715 if ((TREE_CODE (op1) == SSA_NAME
716 && !types_compatible_p (type,TREE_TYPE (op1)))
717 || (TREE_CODE (op2) == SSA_NAME
718 && !types_compatible_p (type, TREE_TYPE (op2)))
719 || (op3 && TREE_CODE (op3) == SSA_NAME
720 && !types_compatible_p (type, TREE_TYPE (op3)))
721 || (op4 && TREE_CODE (op4) == SSA_NAME
722 && !types_compatible_p (type, TREE_TYPE (op4))))
724 if (dump_enabled_p ())
726 dump_printf_loc (MSG_NOTE, vect_location,
727 "reduction: multiple types: operation type: "
728 "%T, operands types: %T,%T",
729 type, TREE_TYPE (op1), TREE_TYPE (op2));
730 if (op3)
731 dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op3));
733 if (op4)
734 dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op4));
735 dump_printf (MSG_NOTE, "\n");
738 return NULL;
741 /* Check whether it's ok to change the order of the computation.
742 Generally, when vectorizing a reduction we change the order of the
743 computation. This may change the behavior of the program in some
744 cases, so we need to check that this is ok. One exception is when
745 vectorizing an outer-loop: the inner-loop is executed sequentially,
746 and therefore vectorizing reductions in the inner-loop during
747 outer-loop vectorization is safe. */
748 if (check_reduction
749 && *v_reduc_type == TREE_CODE_REDUCTION
750 && parloops_needs_fold_left_reduction_p (type, code,
751 need_wrapping_integral_overflow))
752 *v_reduc_type = FOLD_LEFT_REDUCTION;
754 /* Reduction is safe. We're dealing with one of the following:
755 1) integer arithmetic and no trapv
756 2) floating point arithmetic, and special flags permit this optimization
757 3) nested cycle (i.e., outer loop vectorization). */
758 stmt_vec_info def1_info = loop_info->lookup_def (op1);
759 stmt_vec_info def2_info = loop_info->lookup_def (op2);
760 if (code != COND_EXPR && !def1_info && !def2_info)
762 if (dump_enabled_p ())
763 report_ploop_op (MSG_NOTE, def_stmt,
764 "reduction: no defs for operands: ");
765 return NULL;
768 /* Check that one def is the reduction def, defined by PHI,
769 the other def is either defined in the loop ("vect_internal_def"),
770 or it's an induction (defined by a loop-header phi-node). */
772 if (def2_info
773 && def2_info->stmt == phi
774 && (code == COND_EXPR
775 || !def1_info
776 || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt))
777 || parloops_valid_reduction_input_p (def1_info)))
779 if (dump_enabled_p ())
780 report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
781 return def_stmt_info;
784 if (def1_info
785 && def1_info->stmt == phi
786 && (code == COND_EXPR
787 || !def2_info
788 || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
789 || parloops_valid_reduction_input_p (def2_info)))
791 if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
793 /* Check if we can swap operands (just for simplicity - so that
794 the rest of the code can assume that the reduction variable
795 is always the last (second) argument). */
796 if (code == COND_EXPR)
798 /* Swap cond_expr by inverting the condition. */
799 tree cond_expr = gimple_assign_rhs1 (def_stmt);
800 enum tree_code invert_code = ERROR_MARK;
801 enum tree_code cond_code = TREE_CODE (cond_expr);
803 if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
805 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
806 invert_code = invert_tree_comparison (cond_code, honor_nans);
808 if (invert_code != ERROR_MARK)
810 TREE_SET_CODE (cond_expr, invert_code);
811 swap_ssa_operands (def_stmt,
812 gimple_assign_rhs2_ptr (def_stmt),
813 gimple_assign_rhs3_ptr (def_stmt));
815 else
817 if (dump_enabled_p ())
818 report_ploop_op (MSG_NOTE, def_stmt,
819 "detected reduction: cannot swap operands "
820 "for cond_expr");
821 return NULL;
824 else
825 swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
826 gimple_assign_rhs2_ptr (def_stmt));
828 if (dump_enabled_p ())
829 report_ploop_op (MSG_NOTE, def_stmt,
830 "detected reduction: need to swap operands: ");
832 else
834 if (dump_enabled_p ())
835 report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
838 return def_stmt_info;
841 /* Try to find SLP reduction chain. */
842 if (! nested_in_vect_loop
843 && code != COND_EXPR
844 && orig_code != MINUS_EXPR
845 && parloops_is_slp_reduction (loop_info, phi, def_stmt))
847 if (dump_enabled_p ())
848 report_ploop_op (MSG_NOTE, def_stmt,
849 "reduction: detected reduction chain: ");
851 return def_stmt_info;
854 /* Look for the expression computing loop_arg from loop PHI result. */
855 if (check_reduction_path (vect_location, loop, phi, loop_arg, code))
856 return def_stmt_info;
858 if (dump_enabled_p ())
860 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
861 "reduction: unknown pattern: ");
864 return NULL;
867 /* Wrapper around vect_is_simple_reduction, which will modify code
868 in-place if it enables detection of more reductions. Arguments
869 as there. */
871 stmt_vec_info
872 parloops_force_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
873 bool *double_reduc,
874 bool need_wrapping_integral_overflow)
876 enum vect_reduction_type v_reduc_type;
877 stmt_vec_info def_info
878 = parloops_is_simple_reduction (loop_info, phi_info, double_reduc,
879 need_wrapping_integral_overflow,
880 &v_reduc_type);
881 if (def_info)
883 STMT_VINFO_REDUC_TYPE (phi_info) = v_reduc_type;
884 STMT_VINFO_REDUC_DEF (phi_info) = def_info;
885 STMT_VINFO_REDUC_TYPE (def_info) = v_reduc_type;
886 STMT_VINFO_REDUC_DEF (def_info) = phi_info;
888 return def_info;
891 /* Minimal number of iterations of a loop that should be executed in each
892 thread. */
893 #define MIN_PER_THREAD PARAM_VALUE (PARAM_PARLOOPS_MIN_PER_THREAD)
895 /* Element of the hashtable, representing a
896 reduction in the current loop. */
897 struct reduction_info
899 gimple *reduc_stmt; /* reduction statement. */
900 gimple *reduc_phi; /* The phi node defining the reduction. */
901 enum tree_code reduction_code;/* code for the reduction operation. */
902 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
903 result. */
904 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
905 of the reduction variable when existing the loop. */
906 tree initial_value; /* The initial value of the reduction var before entering the loop. */
907 tree field; /* the name of the field in the parloop data structure intended for reduction. */
908 tree reduc_addr; /* The address of the reduction variable for
909 openacc reductions. */
910 tree init; /* reduction initialization value. */
911 gphi *new_phi; /* (helper field) Newly created phi node whose result
912 will be passed to the atomic operation. Represents
913 the local result each thread computed for the reduction
914 operation. */
917 /* Reduction info hashtable helpers. */
919 struct reduction_hasher : free_ptr_hash <reduction_info>
921 static inline hashval_t hash (const reduction_info *);
922 static inline bool equal (const reduction_info *, const reduction_info *);
925 /* Equality and hash functions for hashtab code. */
927 inline bool
928 reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
930 return (a->reduc_phi == b->reduc_phi);
933 inline hashval_t
934 reduction_hasher::hash (const reduction_info *a)
936 return a->reduc_version;
939 typedef hash_table<reduction_hasher> reduction_info_table_type;
942 static struct reduction_info *
943 reduction_phi (reduction_info_table_type *reduction_list, gimple *phi)
945 struct reduction_info tmpred, *red;
947 if (reduction_list->is_empty () || phi == NULL)
948 return NULL;
950 if (gimple_uid (phi) == (unsigned int)-1
951 || gimple_uid (phi) == 0)
952 return NULL;
954 tmpred.reduc_phi = phi;
955 tmpred.reduc_version = gimple_uid (phi);
956 red = reduction_list->find (&tmpred);
957 gcc_assert (red == NULL || red->reduc_phi == phi);
959 return red;
962 /* Element of hashtable of names to copy. */
964 struct name_to_copy_elt
966 unsigned version; /* The version of the name to copy. */
967 tree new_name; /* The new name used in the copy. */
968 tree field; /* The field of the structure used to pass the
969 value. */
972 /* Name copies hashtable helpers. */
974 struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
976 static inline hashval_t hash (const name_to_copy_elt *);
977 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
980 /* Equality and hash functions for hashtab code. */
982 inline bool
983 name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
985 return a->version == b->version;
988 inline hashval_t
989 name_to_copy_hasher::hash (const name_to_copy_elt *a)
991 return (hashval_t) a->version;
994 typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
996 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
997 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
998 represents the denominator for every element in the matrix. */
999 typedef struct lambda_trans_matrix_s
1001 lambda_matrix matrix;
1002 int rowsize;
1003 int colsize;
1004 int denominator;
1005 } *lambda_trans_matrix;
1006 #define LTM_MATRIX(T) ((T)->matrix)
1007 #define LTM_ROWSIZE(T) ((T)->rowsize)
1008 #define LTM_COLSIZE(T) ((T)->colsize)
1009 #define LTM_DENOMINATOR(T) ((T)->denominator)
1011 /* Allocate a new transformation matrix. */
1013 static lambda_trans_matrix
1014 lambda_trans_matrix_new (int colsize, int rowsize,
1015 struct obstack * lambda_obstack)
1017 lambda_trans_matrix ret;
1019 ret = (lambda_trans_matrix)
1020 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
1021 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
1022 LTM_ROWSIZE (ret) = rowsize;
1023 LTM_COLSIZE (ret) = colsize;
1024 LTM_DENOMINATOR (ret) = 1;
1025 return ret;
1028 /* Multiply a vector VEC by a matrix MAT.
1029 MAT is an M*N matrix, and VEC is a vector with length N. The result
1030 is stored in DEST which must be a vector of length M. */
1032 static void
1033 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
1034 lambda_vector vec, lambda_vector dest)
1036 int i, j;
1038 lambda_vector_clear (dest, m);
1039 for (i = 0; i < m; i++)
1040 for (j = 0; j < n; j++)
1041 dest[i] += matrix[i][j] * vec[j];
1044 /* Return true if TRANS is a legal transformation matrix that respects
1045 the dependence vectors in DISTS and DIRS. The conservative answer
1046 is false.
1048 "Wolfe proves that a unimodular transformation represented by the
1049 matrix T is legal when applied to a loop nest with a set of
1050 lexicographically non-negative distance vectors RDG if and only if
1051 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
1052 i.e.: if and only if it transforms the lexicographically positive
1053 distance vectors to lexicographically positive vectors. Note that
1054 a unimodular matrix must transform the zero vector (and only it) to
1055 the zero vector." S.Muchnick. */
1057 static bool
1058 lambda_transform_legal_p (lambda_trans_matrix trans,
1059 int nb_loops,
1060 vec<ddr_p> dependence_relations)
1062 unsigned int i, j;
1063 lambda_vector distres;
1064 struct data_dependence_relation *ddr;
1066 gcc_assert (LTM_COLSIZE (trans) == nb_loops
1067 && LTM_ROWSIZE (trans) == nb_loops);
1069 /* When there are no dependences, the transformation is correct. */
1070 if (dependence_relations.length () == 0)
1071 return true;
1073 ddr = dependence_relations[0];
1074 if (ddr == NULL)
1075 return true;
1077 /* When there is an unknown relation in the dependence_relations, we
1078 know that it is no worth looking at this loop nest: give up. */
1079 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1080 return false;
1082 distres = lambda_vector_new (nb_loops);
1084 /* For each distance vector in the dependence graph. */
1085 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
1087 /* Don't care about relations for which we know that there is no
1088 dependence, nor about read-read (aka. output-dependences):
1089 these data accesses can happen in any order. */
1090 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
1091 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
1092 continue;
1094 /* Conservatively answer: "this transformation is not valid". */
1095 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1096 return false;
1098 /* If the dependence could not be captured by a distance vector,
1099 conservatively answer that the transform is not valid. */
1100 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1101 return false;
1103 /* Compute trans.dist_vect */
1104 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
1106 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
1107 DDR_DIST_VECT (ddr, j), distres);
1109 if (!lambda_vector_lexico_pos (distres, nb_loops))
1110 return false;
1113 return true;
1116 /* Data dependency analysis. Returns true if the iterations of LOOP
1117 are independent on each other (that is, if we can execute them
1118 in parallel). */
1120 static bool
1121 loop_parallel_p (class loop *loop, struct obstack * parloop_obstack)
1123 vec<ddr_p> dependence_relations;
1124 vec<data_reference_p> datarefs;
1125 lambda_trans_matrix trans;
1126 bool ret = false;
1128 if (dump_file && (dump_flags & TDF_DETAILS))
1130 fprintf (dump_file, "Considering loop %d\n", loop->num);
1131 if (!loop->inner)
1132 fprintf (dump_file, "loop is innermost\n");
1133 else
1134 fprintf (dump_file, "loop NOT innermost\n");
1137 /* Check for problems with dependences. If the loop can be reversed,
1138 the iterations are independent. */
1139 auto_vec<loop_p, 3> loop_nest;
1140 datarefs.create (10);
1141 dependence_relations.create (100);
1142 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
1143 &dependence_relations))
1145 if (dump_file && (dump_flags & TDF_DETAILS))
1146 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
1147 ret = false;
1148 goto end;
1150 if (dump_file && (dump_flags & TDF_DETAILS))
1151 dump_data_dependence_relations (dump_file, dependence_relations);
1153 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
1154 LTM_MATRIX (trans)[0][0] = -1;
1156 if (lambda_transform_legal_p (trans, 1, dependence_relations))
1158 ret = true;
1159 if (dump_file && (dump_flags & TDF_DETAILS))
1160 fprintf (dump_file, " SUCCESS: may be parallelized\n");
1162 else if (dump_file && (dump_flags & TDF_DETAILS))
1163 fprintf (dump_file,
1164 " FAILED: data dependencies exist across iterations\n");
1166 end:
1167 free_dependence_relations (dependence_relations);
1168 free_data_refs (datarefs);
1170 return ret;
1173 /* Return true when LOOP contains basic blocks marked with the
1174 BB_IRREDUCIBLE_LOOP flag. */
1176 static inline bool
1177 loop_has_blocks_with_irreducible_flag (class loop *loop)
1179 unsigned i;
1180 basic_block *bbs = get_loop_body_in_dom_order (loop);
1181 bool res = true;
1183 for (i = 0; i < loop->num_nodes; i++)
1184 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
1185 goto end;
1187 res = false;
1188 end:
1189 free (bbs);
1190 return res;
1193 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
1194 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
1195 to their addresses that can be reused. The address of OBJ is known to
1196 be invariant in the whole function. Other needed statements are placed
1197 right before GSI. */
1199 static tree
1200 take_address_of (tree obj, tree type, edge entry,
1201 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
1203 int uid;
1204 tree *var_p, name, addr;
1205 gassign *stmt;
1206 gimple_seq stmts;
1208 /* Since the address of OBJ is invariant, the trees may be shared.
1209 Avoid rewriting unrelated parts of the code. */
1210 obj = unshare_expr (obj);
1211 for (var_p = &obj;
1212 handled_component_p (*var_p);
1213 var_p = &TREE_OPERAND (*var_p, 0))
1214 continue;
1216 /* Canonicalize the access to base on a MEM_REF. */
1217 if (DECL_P (*var_p))
1218 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
1220 /* Assign a canonical SSA name to the address of the base decl used
1221 in the address and share it for all accesses and addresses based
1222 on it. */
1223 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
1224 int_tree_map elt;
1225 elt.uid = uid;
1226 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
1227 if (!slot->to)
1229 if (gsi == NULL)
1230 return NULL;
1231 addr = TREE_OPERAND (*var_p, 0);
1232 const char *obj_name
1233 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
1234 if (obj_name)
1235 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
1236 else
1237 name = make_ssa_name (TREE_TYPE (addr));
1238 stmt = gimple_build_assign (name, addr);
1239 gsi_insert_on_edge_immediate (entry, stmt);
1241 slot->uid = uid;
1242 slot->to = name;
1244 else
1245 name = slot->to;
1247 /* Express the address in terms of the canonical SSA name. */
1248 TREE_OPERAND (*var_p, 0) = name;
1249 if (gsi == NULL)
1250 return build_fold_addr_expr_with_type (obj, type);
1252 name = force_gimple_operand (build_addr (obj),
1253 &stmts, true, NULL_TREE);
1254 if (!gimple_seq_empty_p (stmts))
1255 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1257 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
1259 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
1260 NULL_TREE);
1261 if (!gimple_seq_empty_p (stmts))
1262 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1265 return name;
1268 static tree
1269 reduc_stmt_res (gimple *stmt)
1271 return (gimple_code (stmt) == GIMPLE_PHI
1272 ? gimple_phi_result (stmt)
1273 : gimple_assign_lhs (stmt));
1276 /* Callback for htab_traverse. Create the initialization statement
1277 for reduction described in SLOT, and place it at the preheader of
1278 the loop described in DATA. */
1281 initialize_reductions (reduction_info **slot, class loop *loop)
1283 tree init;
1284 tree type, arg;
1285 edge e;
1287 struct reduction_info *const reduc = *slot;
1289 /* Create initialization in preheader:
1290 reduction_variable = initialization value of reduction. */
1292 /* In the phi node at the header, replace the argument coming
1293 from the preheader with the reduction initialization value. */
1295 /* Initialize the reduction. */
1296 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1297 init = omp_reduction_init_op (gimple_location (reduc->reduc_stmt),
1298 reduc->reduction_code, type);
1299 reduc->init = init;
1301 /* Replace the argument representing the initialization value
1302 with the initialization value for the reduction (neutral
1303 element for the particular operation, e.g. 0 for PLUS_EXPR,
1304 1 for MULT_EXPR, etc).
1305 Keep the old value in a new variable "reduction_initial",
1306 that will be taken in consideration after the parallel
1307 computing is done. */
1309 e = loop_preheader_edge (loop);
1310 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
1311 /* Create new variable to hold the initial value. */
1313 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
1314 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
1315 reduc->initial_value = arg;
1316 return 1;
1319 struct elv_data
1321 struct walk_stmt_info info;
1322 edge entry;
1323 int_tree_htab_type *decl_address;
1324 gimple_stmt_iterator *gsi;
1325 bool changed;
1326 bool reset;
1329 /* Eliminates references to local variables in *TP out of the single
1330 entry single exit region starting at DTA->ENTRY.
1331 DECL_ADDRESS contains addresses of the references that had their
1332 address taken already. If the expression is changed, CHANGED is
1333 set to true. Callback for walk_tree. */
1335 static tree
1336 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
1338 struct elv_data *const dta = (struct elv_data *) data;
1339 tree t = *tp, var, addr, addr_type, type, obj;
1341 if (DECL_P (t))
1343 *walk_subtrees = 0;
1345 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
1346 return NULL_TREE;
1348 type = TREE_TYPE (t);
1349 addr_type = build_pointer_type (type);
1350 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
1351 dta->gsi);
1352 if (dta->gsi == NULL && addr == NULL_TREE)
1354 dta->reset = true;
1355 return NULL_TREE;
1358 *tp = build_simple_mem_ref (addr);
1360 dta->changed = true;
1361 return NULL_TREE;
1364 if (TREE_CODE (t) == ADDR_EXPR)
1366 /* ADDR_EXPR may appear in two contexts:
1367 -- as a gimple operand, when the address taken is a function invariant
1368 -- as gimple rhs, when the resulting address in not a function
1369 invariant
1370 We do not need to do anything special in the latter case (the base of
1371 the memory reference whose address is taken may be replaced in the
1372 DECL_P case). The former case is more complicated, as we need to
1373 ensure that the new address is still a gimple operand. Thus, it
1374 is not sufficient to replace just the base of the memory reference --
1375 we need to move the whole computation of the address out of the
1376 loop. */
1377 if (!is_gimple_val (t))
1378 return NULL_TREE;
1380 *walk_subtrees = 0;
1381 obj = TREE_OPERAND (t, 0);
1382 var = get_base_address (obj);
1383 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
1384 return NULL_TREE;
1386 addr_type = TREE_TYPE (t);
1387 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
1388 dta->gsi);
1389 if (dta->gsi == NULL && addr == NULL_TREE)
1391 dta->reset = true;
1392 return NULL_TREE;
1394 *tp = addr;
1396 dta->changed = true;
1397 return NULL_TREE;
1400 if (!EXPR_P (t))
1401 *walk_subtrees = 0;
1403 return NULL_TREE;
1406 /* Moves the references to local variables in STMT at *GSI out of the single
1407 entry single exit region starting at ENTRY. DECL_ADDRESS contains
1408 addresses of the references that had their address taken
1409 already. */
1411 static void
1412 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
1413 int_tree_htab_type *decl_address)
1415 struct elv_data dta;
1416 gimple *stmt = gsi_stmt (*gsi);
1418 memset (&dta.info, '\0', sizeof (dta.info));
1419 dta.entry = entry;
1420 dta.decl_address = decl_address;
1421 dta.changed = false;
1422 dta.reset = false;
1424 if (gimple_debug_bind_p (stmt))
1426 dta.gsi = NULL;
1427 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
1428 eliminate_local_variables_1, &dta.info, NULL);
1429 if (dta.reset)
1431 gimple_debug_bind_reset_value (stmt);
1432 dta.changed = true;
1435 else if (gimple_clobber_p (stmt))
1437 unlink_stmt_vdef (stmt);
1438 stmt = gimple_build_nop ();
1439 gsi_replace (gsi, stmt, false);
1440 dta.changed = true;
1442 else
1444 dta.gsi = gsi;
1445 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
1448 if (dta.changed)
1449 update_stmt (stmt);
1452 /* Eliminates the references to local variables from the single entry
1453 single exit region between the ENTRY and EXIT edges.
1455 This includes:
1456 1) Taking address of a local variable -- these are moved out of the
1457 region (and temporary variable is created to hold the address if
1458 necessary).
1460 2) Dereferencing a local variable -- these are replaced with indirect
1461 references. */
1463 static void
1464 eliminate_local_variables (edge entry, edge exit)
1466 basic_block bb;
1467 auto_vec<basic_block, 3> body;
1468 unsigned i;
1469 gimple_stmt_iterator gsi;
1470 bool has_debug_stmt = false;
1471 int_tree_htab_type decl_address (10);
1472 basic_block entry_bb = entry->src;
1473 basic_block exit_bb = exit->dest;
1475 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1477 FOR_EACH_VEC_ELT (body, i, bb)
1478 if (bb != entry_bb && bb != exit_bb)
1480 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1481 if (is_gimple_debug (gsi_stmt (gsi)))
1483 if (gimple_debug_bind_p (gsi_stmt (gsi)))
1484 has_debug_stmt = true;
1486 else
1487 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
1490 if (has_debug_stmt)
1491 FOR_EACH_VEC_ELT (body, i, bb)
1492 if (bb != entry_bb && bb != exit_bb)
1493 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1494 if (gimple_debug_bind_p (gsi_stmt (gsi)))
1495 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
1498 /* Returns true if expression EXPR is not defined between ENTRY and
1499 EXIT, i.e. if all its operands are defined outside of the region. */
1501 static bool
1502 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
1504 basic_block entry_bb = entry->src;
1505 basic_block exit_bb = exit->dest;
1506 basic_block def_bb;
1508 if (is_gimple_min_invariant (expr))
1509 return true;
1511 if (TREE_CODE (expr) == SSA_NAME)
1513 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1514 if (def_bb
1515 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
1516 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
1517 return false;
1519 return true;
1522 return false;
1525 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
1526 The copies are stored to NAME_COPIES, if NAME was already duplicated,
1527 its duplicate stored in NAME_COPIES is returned.
1529 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
1530 duplicated, storing the copies in DECL_COPIES. */
1532 static tree
1533 separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
1534 int_tree_htab_type *decl_copies,
1535 bool copy_name_p)
1537 tree copy, var, var_copy;
1538 unsigned idx, uid, nuid;
1539 struct int_tree_map ielt;
1540 struct name_to_copy_elt elt, *nelt;
1541 name_to_copy_elt **slot;
1542 int_tree_map *dslot;
1544 if (TREE_CODE (name) != SSA_NAME)
1545 return name;
1547 idx = SSA_NAME_VERSION (name);
1548 elt.version = idx;
1549 slot = name_copies->find_slot_with_hash (&elt, idx,
1550 copy_name_p ? INSERT : NO_INSERT);
1551 if (slot && *slot)
1552 return (*slot)->new_name;
1554 if (copy_name_p)
1556 copy = duplicate_ssa_name (name, NULL);
1557 nelt = XNEW (struct name_to_copy_elt);
1558 nelt->version = idx;
1559 nelt->new_name = copy;
1560 nelt->field = NULL_TREE;
1561 *slot = nelt;
1563 else
1565 gcc_assert (!slot);
1566 copy = name;
1569 var = SSA_NAME_VAR (name);
1570 if (!var)
1571 return copy;
1573 uid = DECL_UID (var);
1574 ielt.uid = uid;
1575 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
1576 if (!dslot->to)
1578 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
1579 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
1580 dslot->uid = uid;
1581 dslot->to = var_copy;
1583 /* Ensure that when we meet this decl next time, we won't duplicate
1584 it again. */
1585 nuid = DECL_UID (var_copy);
1586 ielt.uid = nuid;
1587 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
1588 gcc_assert (!dslot->to);
1589 dslot->uid = nuid;
1590 dslot->to = var_copy;
1592 else
1593 var_copy = dslot->to;
1595 replace_ssa_name_symbol (copy, var_copy);
1596 return copy;
1599 /* Finds the ssa names used in STMT that are defined outside the
1600 region between ENTRY and EXIT and replaces such ssa names with
1601 their duplicates. The duplicates are stored to NAME_COPIES. Base
1602 decls of all ssa names used in STMT (including those defined in
1603 LOOP) are replaced with the new temporary variables; the
1604 replacement decls are stored in DECL_COPIES. */
1606 static void
1607 separate_decls_in_region_stmt (edge entry, edge exit, gimple *stmt,
1608 name_to_copy_table_type *name_copies,
1609 int_tree_htab_type *decl_copies)
1611 use_operand_p use;
1612 def_operand_p def;
1613 ssa_op_iter oi;
1614 tree name, copy;
1615 bool copy_name_p;
1617 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
1619 name = DEF_FROM_PTR (def);
1620 gcc_assert (TREE_CODE (name) == SSA_NAME);
1621 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
1622 false);
1623 gcc_assert (copy == name);
1626 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
1628 name = USE_FROM_PTR (use);
1629 if (TREE_CODE (name) != SSA_NAME)
1630 continue;
1632 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
1633 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
1634 copy_name_p);
1635 SET_USE (use, copy);
1639 /* Finds the ssa names used in STMT that are defined outside the
1640 region between ENTRY and EXIT and replaces such ssa names with
1641 their duplicates. The duplicates are stored to NAME_COPIES. Base
1642 decls of all ssa names used in STMT (including those defined in
1643 LOOP) are replaced with the new temporary variables; the
1644 replacement decls are stored in DECL_COPIES. */
1646 static bool
1647 separate_decls_in_region_debug (gimple *stmt,
1648 name_to_copy_table_type *name_copies,
1649 int_tree_htab_type *decl_copies)
1651 use_operand_p use;
1652 ssa_op_iter oi;
1653 tree var, name;
1654 struct int_tree_map ielt;
1655 struct name_to_copy_elt elt;
1656 name_to_copy_elt **slot;
1657 int_tree_map *dslot;
1659 if (gimple_debug_bind_p (stmt))
1660 var = gimple_debug_bind_get_var (stmt);
1661 else if (gimple_debug_source_bind_p (stmt))
1662 var = gimple_debug_source_bind_get_var (stmt);
1663 else
1664 return true;
1665 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
1666 return true;
1667 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
1668 ielt.uid = DECL_UID (var);
1669 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
1670 if (!dslot)
1671 return true;
1672 if (gimple_debug_bind_p (stmt))
1673 gimple_debug_bind_set_var (stmt, dslot->to);
1674 else if (gimple_debug_source_bind_p (stmt))
1675 gimple_debug_source_bind_set_var (stmt, dslot->to);
1677 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
1679 name = USE_FROM_PTR (use);
1680 if (TREE_CODE (name) != SSA_NAME)
1681 continue;
1683 elt.version = SSA_NAME_VERSION (name);
1684 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
1685 if (!slot)
1687 gimple_debug_bind_reset_value (stmt);
1688 update_stmt (stmt);
1689 break;
1692 SET_USE (use, (*slot)->new_name);
1695 return false;
1698 /* Callback for htab_traverse. Adds a field corresponding to the reduction
1699 specified in SLOT. The type is passed in DATA. */
1702 add_field_for_reduction (reduction_info **slot, tree type)
1705 struct reduction_info *const red = *slot;
1706 tree var = reduc_stmt_res (red->reduc_stmt);
1707 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
1708 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
1710 insert_field_into_struct (type, field);
1712 red->field = field;
1714 return 1;
1717 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
1718 described in SLOT. The type is passed in DATA. */
1721 add_field_for_name (name_to_copy_elt **slot, tree type)
1723 struct name_to_copy_elt *const elt = *slot;
1724 tree name = ssa_name (elt->version);
1725 tree field = build_decl (UNKNOWN_LOCATION,
1726 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1727 TREE_TYPE (name));
1729 insert_field_into_struct (type, field);
1730 elt->field = field;
1732 return 1;
1735 /* Callback for htab_traverse. A local result is the intermediate result
1736 computed by a single
1737 thread, or the initial value in case no iteration was executed.
1738 This function creates a phi node reflecting these values.
1739 The phi's result will be stored in NEW_PHI field of the
1740 reduction's data structure. */
1743 create_phi_for_local_result (reduction_info **slot, class loop *loop)
1745 struct reduction_info *const reduc = *slot;
1746 edge e;
1747 gphi *new_phi;
1748 basic_block store_bb, continue_bb;
1749 tree local_res;
1750 location_t locus;
1752 /* STORE_BB is the block where the phi
1753 should be stored. It is the destination of the loop exit.
1754 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1755 continue_bb = single_pred (loop->latch);
1756 store_bb = FALLTHRU_EDGE (continue_bb)->dest;
1758 /* STORE_BB has two predecessors. One coming from the loop
1759 (the reduction's result is computed at the loop),
1760 and another coming from a block preceding the loop,
1761 when no iterations
1762 are executed (the initial value should be taken). */
1763 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (continue_bb))
1764 e = EDGE_PRED (store_bb, 1);
1765 else
1766 e = EDGE_PRED (store_bb, 0);
1767 tree lhs = reduc_stmt_res (reduc->reduc_stmt);
1768 local_res = copy_ssa_name (lhs);
1769 locus = gimple_location (reduc->reduc_stmt);
1770 new_phi = create_phi_node (local_res, store_bb);
1771 add_phi_arg (new_phi, reduc->init, e, locus);
1772 add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (continue_bb), locus);
1773 reduc->new_phi = new_phi;
1775 return 1;
1778 struct clsn_data
1780 tree store;
1781 tree load;
1783 basic_block store_bb;
1784 basic_block load_bb;
1787 /* Callback for htab_traverse. Create an atomic instruction for the
1788 reduction described in SLOT.
1789 DATA annotates the place in memory the atomic operation relates to,
1790 and the basic block it needs to be generated in. */
1793 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1795 struct reduction_info *const reduc = *slot;
1796 gimple_stmt_iterator gsi;
1797 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1798 tree load_struct;
1799 basic_block bb;
1800 basic_block new_bb;
1801 edge e;
1802 tree t, addr, ref, x;
1803 tree tmp_load, name;
1804 gimple *load;
1806 if (reduc->reduc_addr == NULL_TREE)
1808 load_struct = build_simple_mem_ref (clsn_data->load);
1809 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1811 addr = build_addr (t);
1813 else
1815 /* Set the address for the atomic store. */
1816 addr = reduc->reduc_addr;
1818 /* Remove the non-atomic store '*addr = sum'. */
1819 tree res = PHI_RESULT (reduc->keep_res);
1820 use_operand_p use_p;
1821 gimple *stmt;
1822 bool single_use_p = single_imm_use (res, &use_p, &stmt);
1823 gcc_assert (single_use_p);
1824 replace_uses_by (gimple_vdef (stmt),
1825 gimple_vuse (stmt));
1826 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1827 gsi_remove (&gsi, true);
1830 /* Create phi node. */
1831 bb = clsn_data->load_bb;
1833 gsi = gsi_last_bb (bb);
1834 e = split_block (bb, gsi_stmt (gsi));
1835 new_bb = e->dest;
1837 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1838 tmp_load = make_ssa_name (tmp_load);
1839 load = gimple_build_omp_atomic_load (tmp_load, addr,
1840 OMP_MEMORY_ORDER_RELAXED);
1841 SSA_NAME_DEF_STMT (tmp_load) = load;
1842 gsi = gsi_start_bb (new_bb);
1843 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1845 e = split_block (new_bb, load);
1846 new_bb = e->dest;
1847 gsi = gsi_start_bb (new_bb);
1848 ref = tmp_load;
1849 x = fold_build2 (reduc->reduction_code,
1850 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1851 PHI_RESULT (reduc->new_phi));
1853 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1854 GSI_CONTINUE_LINKING);
1856 gimple *store = gimple_build_omp_atomic_store (name,
1857 OMP_MEMORY_ORDER_RELAXED);
1858 gsi_insert_after (&gsi, store, GSI_NEW_STMT);
1859 return 1;
1862 /* Create the atomic operation at the join point of the threads.
1863 REDUCTION_LIST describes the reductions in the LOOP.
1864 LD_ST_DATA describes the shared data structure where
1865 shared data is stored in and loaded from. */
1866 static void
1867 create_call_for_reduction (class loop *loop,
1868 reduction_info_table_type *reduction_list,
1869 struct clsn_data *ld_st_data)
1871 reduction_list->traverse <class loop *, create_phi_for_local_result> (loop);
1872 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1873 basic_block continue_bb = single_pred (loop->latch);
1874 ld_st_data->load_bb = FALLTHRU_EDGE (continue_bb)->dest;
1875 reduction_list
1876 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1879 /* Callback for htab_traverse. Loads the final reduction value at the
1880 join point of all threads, and inserts it in the right place. */
1883 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1885 struct reduction_info *const red = *slot;
1886 gimple *stmt;
1887 gimple_stmt_iterator gsi;
1888 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1889 tree load_struct;
1890 tree name;
1891 tree x;
1893 /* If there's no exit phi, the result of the reduction is unused. */
1894 if (red->keep_res == NULL)
1895 return 1;
1897 gsi = gsi_after_labels (clsn_data->load_bb);
1898 load_struct = build_simple_mem_ref (clsn_data->load);
1899 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1900 NULL_TREE);
1902 x = load_struct;
1903 name = PHI_RESULT (red->keep_res);
1904 stmt = gimple_build_assign (name, x);
1906 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1908 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1909 !gsi_end_p (gsi); gsi_next (&gsi))
1910 if (gsi_stmt (gsi) == red->keep_res)
1912 remove_phi_node (&gsi, false);
1913 return 1;
1915 gcc_unreachable ();
1918 /* Load the reduction result that was stored in LD_ST_DATA.
1919 REDUCTION_LIST describes the list of reductions that the
1920 loads should be generated for. */
1921 static void
1922 create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1923 struct clsn_data *ld_st_data)
1925 gimple_stmt_iterator gsi;
1926 tree t;
1927 gimple *stmt;
1929 gsi = gsi_after_labels (ld_st_data->load_bb);
1930 t = build_fold_addr_expr (ld_st_data->store);
1931 stmt = gimple_build_assign (ld_st_data->load, t);
1933 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1935 reduction_list
1936 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1940 /* Callback for htab_traverse. Store the neutral value for the
1941 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1942 1 for MULT_EXPR, etc. into the reduction field.
1943 The reduction is specified in SLOT. The store information is
1944 passed in DATA. */
1947 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1949 struct reduction_info *const red = *slot;
1950 tree t;
1951 gimple *stmt;
1952 gimple_stmt_iterator gsi;
1953 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1955 gsi = gsi_last_bb (clsn_data->store_bb);
1956 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1957 stmt = gimple_build_assign (t, red->initial_value);
1958 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1960 return 1;
1963 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1964 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1965 specified in SLOT. */
1968 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1969 struct clsn_data *clsn_data)
1971 struct name_to_copy_elt *const elt = *slot;
1972 tree t;
1973 gimple *stmt;
1974 gimple_stmt_iterator gsi;
1975 tree type = TREE_TYPE (elt->new_name);
1976 tree load_struct;
1978 gsi = gsi_last_bb (clsn_data->store_bb);
1979 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1980 stmt = gimple_build_assign (t, ssa_name (elt->version));
1981 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1983 gsi = gsi_last_bb (clsn_data->load_bb);
1984 load_struct = build_simple_mem_ref (clsn_data->load);
1985 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1986 stmt = gimple_build_assign (elt->new_name, t);
1987 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1989 return 1;
1992 /* Moves all the variables used in LOOP and defined outside of it (including
1993 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1994 name) to a structure created for this purpose. The code
1996 while (1)
1998 use (a);
1999 use (b);
2002 is transformed this way:
2004 bb0:
2005 old.a = a;
2006 old.b = b;
2008 bb1:
2009 a' = new->a;
2010 b' = new->b;
2011 while (1)
2013 use (a');
2014 use (b');
2017 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
2018 pointer `new' is intentionally not initialized (the loop will be split to a
2019 separate function later, and `new' will be initialized from its arguments).
2020 LD_ST_DATA holds information about the shared data structure used to pass
2021 information among the threads. It is initialized here, and
2022 gen_parallel_loop will pass it to create_call_for_reduction that
2023 needs this information. REDUCTION_LIST describes the reductions
2024 in LOOP. */
2026 static void
2027 separate_decls_in_region (edge entry, edge exit,
2028 reduction_info_table_type *reduction_list,
2029 tree *arg_struct, tree *new_arg_struct,
2030 struct clsn_data *ld_st_data)
2033 basic_block bb1 = split_edge (entry);
2034 basic_block bb0 = single_pred (bb1);
2035 name_to_copy_table_type name_copies (10);
2036 int_tree_htab_type decl_copies (10);
2037 unsigned i;
2038 tree type, type_name, nvar;
2039 gimple_stmt_iterator gsi;
2040 struct clsn_data clsn_data;
2041 auto_vec<basic_block, 3> body;
2042 basic_block bb;
2043 basic_block entry_bb = bb1;
2044 basic_block exit_bb = exit->dest;
2045 bool has_debug_stmt = false;
2047 entry = single_succ_edge (entry_bb);
2048 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
2050 FOR_EACH_VEC_ELT (body, i, bb)
2052 if (bb != entry_bb && bb != exit_bb)
2054 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2055 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
2056 &name_copies, &decl_copies);
2058 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2060 gimple *stmt = gsi_stmt (gsi);
2062 if (is_gimple_debug (stmt))
2063 has_debug_stmt = true;
2064 else
2065 separate_decls_in_region_stmt (entry, exit, stmt,
2066 &name_copies, &decl_copies);
2071 /* Now process debug bind stmts. We must not create decls while
2072 processing debug stmts, so we defer their processing so as to
2073 make sure we will have debug info for as many variables as
2074 possible (all of those that were dealt with in the loop above),
2075 and discard those for which we know there's nothing we can
2076 do. */
2077 if (has_debug_stmt)
2078 FOR_EACH_VEC_ELT (body, i, bb)
2079 if (bb != entry_bb && bb != exit_bb)
2081 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2083 gimple *stmt = gsi_stmt (gsi);
2085 if (is_gimple_debug (stmt))
2087 if (separate_decls_in_region_debug (stmt, &name_copies,
2088 &decl_copies))
2090 gsi_remove (&gsi, true);
2091 continue;
2095 gsi_next (&gsi);
2099 if (name_copies.is_empty () && reduction_list->is_empty ())
2101 /* It may happen that there is nothing to copy (if there are only
2102 loop carried and external variables in the loop). */
2103 *arg_struct = NULL;
2104 *new_arg_struct = NULL;
2106 else
2108 /* Create the type for the structure to store the ssa names to. */
2109 type = lang_hooks.types.make_type (RECORD_TYPE);
2110 type_name = build_decl (UNKNOWN_LOCATION,
2111 TYPE_DECL, create_tmp_var_name (".paral_data"),
2112 type);
2113 TYPE_NAME (type) = type_name;
2115 name_copies.traverse <tree, add_field_for_name> (type);
2116 if (reduction_list && !reduction_list->is_empty ())
2118 /* Create the fields for reductions. */
2119 reduction_list->traverse <tree, add_field_for_reduction> (type);
2121 layout_type (type);
2123 /* Create the loads and stores. */
2124 *arg_struct = create_tmp_var (type, ".paral_data_store");
2125 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
2126 *new_arg_struct = make_ssa_name (nvar);
2128 ld_st_data->store = *arg_struct;
2129 ld_st_data->load = *new_arg_struct;
2130 ld_st_data->store_bb = bb0;
2131 ld_st_data->load_bb = bb1;
2133 name_copies
2134 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
2135 (ld_st_data);
2137 /* Load the calculation from memory (after the join of the threads). */
2139 if (reduction_list && !reduction_list->is_empty ())
2141 reduction_list
2142 ->traverse <struct clsn_data *, create_stores_for_reduction>
2143 (ld_st_data);
2144 clsn_data.load = make_ssa_name (nvar);
2145 clsn_data.load_bb = exit->dest;
2146 clsn_data.store = ld_st_data->store;
2147 create_final_loads_for_reduction (reduction_list, &clsn_data);
2152 /* Returns true if FN was created to run in parallel. */
2154 bool
2155 parallelized_function_p (tree fndecl)
2157 cgraph_node *node = cgraph_node::get (fndecl);
2158 gcc_assert (node != NULL);
2159 return node->parallelized_function;
2162 /* Creates and returns an empty function that will receive the body of
2163 a parallelized loop. */
2165 static tree
2166 create_loop_fn (location_t loc)
2168 char buf[100];
2169 char *tname;
2170 tree decl, type, name, t;
2171 struct function *act_cfun = cfun;
2172 static unsigned loopfn_num;
2174 loc = LOCATION_LOCUS (loc);
2175 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
2176 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
2177 clean_symbol_name (tname);
2178 name = get_identifier (tname);
2179 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
2181 decl = build_decl (loc, FUNCTION_DECL, name, type);
2182 TREE_STATIC (decl) = 1;
2183 TREE_USED (decl) = 1;
2184 DECL_ARTIFICIAL (decl) = 1;
2185 DECL_IGNORED_P (decl) = 0;
2186 TREE_PUBLIC (decl) = 0;
2187 DECL_UNINLINABLE (decl) = 1;
2188 DECL_EXTERNAL (decl) = 0;
2189 DECL_CONTEXT (decl) = NULL_TREE;
2190 DECL_INITIAL (decl) = make_node (BLOCK);
2191 BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl;
2193 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
2194 DECL_ARTIFICIAL (t) = 1;
2195 DECL_IGNORED_P (t) = 1;
2196 DECL_RESULT (decl) = t;
2198 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
2199 ptr_type_node);
2200 DECL_ARTIFICIAL (t) = 1;
2201 DECL_ARG_TYPE (t) = ptr_type_node;
2202 DECL_CONTEXT (t) = decl;
2203 TREE_USED (t) = 1;
2204 DECL_ARGUMENTS (decl) = t;
2206 allocate_struct_function (decl, false);
2207 DECL_STRUCT_FUNCTION (decl)->last_clique = act_cfun->last_clique;
2209 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
2210 it. */
2211 set_cfun (act_cfun);
2213 return decl;
2216 /* Replace uses of NAME by VAL in block BB. */
2218 static void
2219 replace_uses_in_bb_by (tree name, tree val, basic_block bb)
2221 gimple *use_stmt;
2222 imm_use_iterator imm_iter;
2224 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
2226 if (gimple_bb (use_stmt) != bb)
2227 continue;
2229 use_operand_p use_p;
2230 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2231 SET_USE (use_p, val);
2235 /* Do transformation from:
2237 <bb preheader>:
2239 goto <bb header>
2241 <bb header>:
2242 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2243 sum_a = PHI <sum_init (preheader), sum_b (latch)>
2245 use (ivtmp_a)
2247 sum_b = sum_a + sum_update
2249 if (ivtmp_a < n)
2250 goto <bb latch>;
2251 else
2252 goto <bb exit>;
2254 <bb latch>:
2255 ivtmp_b = ivtmp_a + 1;
2256 goto <bb header>
2258 <bb exit>:
2259 sum_z = PHI <sum_b (cond[1]), ...>
2261 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
2262 that's <bb header>.
2266 <bb preheader>:
2268 goto <bb newheader>
2270 <bb header>:
2271 ivtmp_a = PHI <ivtmp_c (latch)>
2272 sum_a = PHI <sum_c (latch)>
2274 use (ivtmp_a)
2276 sum_b = sum_a + sum_update
2278 goto <bb latch>;
2280 <bb newheader>:
2281 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2282 sum_c = PHI <sum_init (preheader), sum_b (latch)>
2283 if (ivtmp_c < n + 1)
2284 goto <bb header>;
2285 else
2286 goto <bb newexit>;
2288 <bb latch>:
2289 ivtmp_b = ivtmp_a + 1;
2290 goto <bb newheader>
2292 <bb newexit>:
2293 sum_y = PHI <sum_c (newheader)>
2295 <bb exit>:
2296 sum_z = PHI <sum_y (newexit), ...>
2299 In unified diff format:
2301 <bb preheader>:
2303 - goto <bb header>
2304 + goto <bb newheader>
2306 <bb header>:
2307 - ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2308 - sum_a = PHI <sum_init (preheader), sum_b (latch)>
2309 + ivtmp_a = PHI <ivtmp_c (latch)>
2310 + sum_a = PHI <sum_c (latch)>
2312 use (ivtmp_a)
2314 sum_b = sum_a + sum_update
2316 - if (ivtmp_a < n)
2317 - goto <bb latch>;
2318 + goto <bb latch>;
2320 + <bb newheader>:
2321 + ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2322 + sum_c = PHI <sum_init (preheader), sum_b (latch)>
2323 + if (ivtmp_c < n + 1)
2324 + goto <bb header>;
2325 else
2326 goto <bb exit>;
2328 <bb latch>:
2329 ivtmp_b = ivtmp_a + 1;
2330 - goto <bb header>
2331 + goto <bb newheader>
2333 + <bb newexit>:
2334 + sum_y = PHI <sum_c (newheader)>
2336 <bb exit>:
2337 - sum_z = PHI <sum_b (cond[1]), ...>
2338 + sum_z = PHI <sum_y (newexit), ...>
2340 Note: the example does not show any virtual phis, but these are handled more
2341 or less as reductions.
2344 Moves the exit condition of LOOP to the beginning of its header.
2345 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
2346 bound. */
2348 static void
2349 transform_to_exit_first_loop_alt (class loop *loop,
2350 reduction_info_table_type *reduction_list,
2351 tree bound)
2353 basic_block header = loop->header;
2354 basic_block latch = loop->latch;
2355 edge exit = single_dom_exit (loop);
2356 basic_block exit_block = exit->dest;
2357 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2358 tree control = gimple_cond_lhs (cond_stmt);
2359 edge e;
2361 /* Rewriting virtuals into loop-closed ssa normal form makes this
2362 transformation simpler. It also ensures that the virtuals are in
2363 loop-closed ssa normal from after the transformation, which is required by
2364 create_parallel_loop. */
2365 rewrite_virtuals_into_loop_closed_ssa (loop);
2367 /* Create the new_header block. */
2368 basic_block new_header = split_block_before_cond_jump (exit->src);
2369 edge edge_at_split = single_pred_edge (new_header);
2371 /* Redirect entry edge to new_header. */
2372 edge entry = loop_preheader_edge (loop);
2373 e = redirect_edge_and_branch (entry, new_header);
2374 gcc_assert (e == entry);
2376 /* Redirect post_inc_edge to new_header. */
2377 edge post_inc_edge = single_succ_edge (latch);
2378 e = redirect_edge_and_branch (post_inc_edge, new_header);
2379 gcc_assert (e == post_inc_edge);
2381 /* Redirect post_cond_edge to header. */
2382 edge post_cond_edge = single_pred_edge (latch);
2383 e = redirect_edge_and_branch (post_cond_edge, header);
2384 gcc_assert (e == post_cond_edge);
2386 /* Redirect edge_at_split to latch. */
2387 e = redirect_edge_and_branch (edge_at_split, latch);
2388 gcc_assert (e == edge_at_split);
2390 /* Set the new loop bound. */
2391 gimple_cond_set_rhs (cond_stmt, bound);
2392 update_stmt (cond_stmt);
2394 /* Repair the ssa. */
2395 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
2396 edge_var_map *vm;
2397 gphi_iterator gsi;
2398 int i;
2399 for (gsi = gsi_start_phis (header), i = 0;
2400 !gsi_end_p (gsi) && v->iterate (i, &vm);
2401 gsi_next (&gsi), i++)
2403 gphi *phi = gsi.phi ();
2404 tree res_a = PHI_RESULT (phi);
2406 /* Create new phi. */
2407 tree res_c = copy_ssa_name (res_a, phi);
2408 gphi *nphi = create_phi_node (res_c, new_header);
2410 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
2411 replace_uses_in_bb_by (res_a, res_c, new_header);
2413 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
2414 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
2416 /* Replace sum_b with sum_c in exit phi. */
2417 tree res_b = redirect_edge_var_map_def (vm);
2418 replace_uses_in_bb_by (res_b, res_c, exit_block);
2420 struct reduction_info *red = reduction_phi (reduction_list, phi);
2421 gcc_assert (virtual_operand_p (res_a)
2422 || res_a == control
2423 || red != NULL);
2425 if (red)
2427 /* Register the new reduction phi. */
2428 red->reduc_phi = nphi;
2429 gimple_set_uid (red->reduc_phi, red->reduc_version);
2432 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
2434 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
2435 flush_pending_stmts (entry);
2437 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
2438 flush_pending_stmts (post_inc_edge);
2441 basic_block new_exit_block = NULL;
2442 if (!single_pred_p (exit->dest))
2444 /* Create a new empty exit block, inbetween the new loop header and the
2445 old exit block. The function separate_decls_in_region needs this block
2446 to insert code that is active on loop exit, but not any other path. */
2447 new_exit_block = split_edge (exit);
2450 /* Insert and register the reduction exit phis. */
2451 for (gphi_iterator gsi = gsi_start_phis (exit_block);
2452 !gsi_end_p (gsi);
2453 gsi_next (&gsi))
2455 gphi *phi = gsi.phi ();
2456 gphi *nphi = NULL;
2457 tree res_z = PHI_RESULT (phi);
2458 tree res_c;
2460 if (new_exit_block != NULL)
2462 /* Now that we have a new exit block, duplicate the phi of the old
2463 exit block in the new exit block to preserve loop-closed ssa. */
2464 edge succ_new_exit_block = single_succ_edge (new_exit_block);
2465 edge pred_new_exit_block = single_pred_edge (new_exit_block);
2466 tree res_y = copy_ssa_name (res_z, phi);
2467 nphi = create_phi_node (res_y, new_exit_block);
2468 res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
2469 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
2470 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
2472 else
2473 res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2475 if (virtual_operand_p (res_z))
2476 continue;
2478 gimple *reduc_phi = SSA_NAME_DEF_STMT (res_c);
2479 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
2480 if (red != NULL)
2481 red->keep_res = (nphi != NULL
2482 ? nphi
2483 : phi);
2486 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
2487 then we're still using some fields, so only bother about fields that are
2488 still used: header and latch.
2489 The loop has a new header bb, so we update it. The latch bb stays the
2490 same. */
2491 loop->header = new_header;
2493 /* Recalculate dominance info. */
2494 free_dominance_info (CDI_DOMINATORS);
2495 calculate_dominance_info (CDI_DOMINATORS);
2497 checking_verify_ssa (true, true);
2500 /* Tries to moves the exit condition of LOOP to the beginning of its header
2501 without duplication of the loop body. NIT is the number of iterations of the
2502 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
2503 transformation is successful. */
2505 static bool
2506 try_transform_to_exit_first_loop_alt (class loop *loop,
2507 reduction_info_table_type *reduction_list,
2508 tree nit)
2510 /* Check whether the latch contains a single statement. */
2511 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
2512 return false;
2514 /* Check whether the latch contains no phis. */
2515 if (phi_nodes (loop->latch) != NULL)
2516 return false;
2518 /* Check whether the latch contains the loop iv increment. */
2519 edge back = single_succ_edge (loop->latch);
2520 edge exit = single_dom_exit (loop);
2521 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2522 tree control = gimple_cond_lhs (cond_stmt);
2523 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
2524 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
2525 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
2526 return false;
2528 /* Check whether there's no code between the loop condition and the latch. */
2529 if (!single_pred_p (loop->latch)
2530 || single_pred (loop->latch) != exit->src)
2531 return false;
2533 tree alt_bound = NULL_TREE;
2534 tree nit_type = TREE_TYPE (nit);
2536 /* Figure out whether nit + 1 overflows. */
2537 if (TREE_CODE (nit) == INTEGER_CST)
2539 if (!tree_int_cst_equal (nit, TYPE_MAX_VALUE (nit_type)))
2541 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
2542 nit, build_one_cst (nit_type));
2544 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
2545 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
2546 return true;
2548 else
2550 /* Todo: Figure out if we can trigger this, if it's worth to handle
2551 optimally, and if we can handle it optimally. */
2552 return false;
2556 gcc_assert (TREE_CODE (nit) == SSA_NAME);
2558 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
2559 iv with base 0 and step 1 that is incremented in the latch, like this:
2561 <bb header>:
2562 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
2564 if (iv_1 < nit)
2565 goto <bb latch>;
2566 else
2567 goto <bb exit>;
2569 <bb latch>:
2570 iv_2 = iv_1 + 1;
2571 goto <bb header>;
2573 The range of iv_1 is [0, nit]. The latch edge is taken for
2574 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
2575 number of latch executions is equal to nit.
2577 The function max_loop_iterations gives us the maximum number of latch
2578 executions, so it gives us the maximum value of nit. */
2579 widest_int nit_max;
2580 if (!max_loop_iterations (loop, &nit_max))
2581 return false;
2583 /* Check if nit + 1 overflows. */
2584 widest_int type_max = wi::to_widest (TYPE_MAX_VALUE (nit_type));
2585 if (nit_max >= type_max)
2586 return false;
2588 gimple *def = SSA_NAME_DEF_STMT (nit);
2590 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
2591 if (def
2592 && is_gimple_assign (def)
2593 && gimple_assign_rhs_code (def) == PLUS_EXPR)
2595 tree op1 = gimple_assign_rhs1 (def);
2596 tree op2 = gimple_assign_rhs2 (def);
2597 if (integer_minus_onep (op1))
2598 alt_bound = op2;
2599 else if (integer_minus_onep (op2))
2600 alt_bound = op1;
2603 /* If not found, insert nit + 1. */
2604 if (alt_bound == NULL_TREE)
2606 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
2607 build_int_cst_type (nit_type, 1));
2609 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
2611 alt_bound
2612 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
2613 GSI_CONTINUE_LINKING);
2616 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
2617 return true;
2620 /* Moves the exit condition of LOOP to the beginning of its header. NIT is the
2621 number of iterations of the loop. REDUCTION_LIST describes the reductions in
2622 LOOP. */
2624 static void
2625 transform_to_exit_first_loop (class loop *loop,
2626 reduction_info_table_type *reduction_list,
2627 tree nit)
2629 basic_block *bbs, *nbbs, ex_bb, orig_header;
2630 unsigned n;
2631 bool ok;
2632 edge exit = single_dom_exit (loop), hpred;
2633 tree control, control_name, res, t;
2634 gphi *phi, *nphi;
2635 gassign *stmt;
2636 gcond *cond_stmt, *cond_nit;
2637 tree nit_1;
2639 split_block_after_labels (loop->header);
2640 orig_header = single_succ (loop->header);
2641 hpred = single_succ_edge (loop->header);
2643 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2644 control = gimple_cond_lhs (cond_stmt);
2645 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
2647 /* Make sure that we have phi nodes on exit for all loop header phis
2648 (create_parallel_loop requires that). */
2649 for (gphi_iterator gsi = gsi_start_phis (loop->header);
2650 !gsi_end_p (gsi);
2651 gsi_next (&gsi))
2653 phi = gsi.phi ();
2654 res = PHI_RESULT (phi);
2655 t = copy_ssa_name (res, phi);
2656 SET_PHI_RESULT (phi, t);
2657 nphi = create_phi_node (res, orig_header);
2658 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
2660 if (res == control)
2662 gimple_cond_set_lhs (cond_stmt, t);
2663 update_stmt (cond_stmt);
2664 control = t;
2668 bbs = get_loop_body_in_dom_order (loop);
2670 for (n = 0; bbs[n] != exit->src; n++)
2671 continue;
2672 nbbs = XNEWVEC (basic_block, n);
2673 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
2674 bbs + 1, n, nbbs);
2675 gcc_assert (ok);
2676 free (bbs);
2677 ex_bb = nbbs[0];
2678 free (nbbs);
2680 /* Other than reductions, the only gimple reg that should be copied
2681 out of the loop is the control variable. */
2682 exit = single_dom_exit (loop);
2683 control_name = NULL_TREE;
2684 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
2685 !gsi_end_p (gsi); )
2687 phi = gsi.phi ();
2688 res = PHI_RESULT (phi);
2689 if (virtual_operand_p (res))
2691 gsi_next (&gsi);
2692 continue;
2695 /* Check if it is a part of reduction. If it is,
2696 keep the phi at the reduction's keep_res field. The
2697 PHI_RESULT of this phi is the resulting value of the reduction
2698 variable when exiting the loop. */
2700 if (!reduction_list->is_empty ())
2702 struct reduction_info *red;
2704 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2705 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
2706 if (red)
2708 red->keep_res = phi;
2709 gsi_next (&gsi);
2710 continue;
2713 gcc_assert (control_name == NULL_TREE
2714 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
2715 control_name = res;
2716 remove_phi_node (&gsi, false);
2718 gcc_assert (control_name != NULL_TREE);
2720 /* Initialize the control variable to number of iterations
2721 according to the rhs of the exit condition. */
2722 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
2723 cond_nit = as_a <gcond *> (last_stmt (exit->src));
2724 nit_1 = gimple_cond_rhs (cond_nit);
2725 nit_1 = force_gimple_operand_gsi (&gsi,
2726 fold_convert (TREE_TYPE (control_name), nit_1),
2727 false, NULL_TREE, false, GSI_SAME_STMT);
2728 stmt = gimple_build_assign (control_name, nit_1);
2729 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
2732 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
2733 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
2734 NEW_DATA is the variable that should be initialized from the argument
2735 of LOOP_FN. N_THREADS is the requested number of threads, which can be 0 if
2736 that number is to be determined later. */
2738 static void
2739 create_parallel_loop (class loop *loop, tree loop_fn, tree data,
2740 tree new_data, unsigned n_threads, location_t loc,
2741 bool oacc_kernels_p)
2743 gimple_stmt_iterator gsi;
2744 basic_block for_bb, ex_bb, continue_bb;
2745 tree t, param;
2746 gomp_parallel *omp_par_stmt;
2747 gimple *omp_return_stmt1, *omp_return_stmt2;
2748 gimple *phi;
2749 gcond *cond_stmt;
2750 gomp_for *for_stmt;
2751 gomp_continue *omp_cont_stmt;
2752 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
2753 edge exit, nexit, guard, end, e;
2755 if (oacc_kernels_p)
2757 gcc_checking_assert (lookup_attribute ("oacc kernels",
2758 DECL_ATTRIBUTES (cfun->decl)));
2759 /* Indicate to later processing that this is a parallelized OpenACC
2760 kernels construct. */
2761 DECL_ATTRIBUTES (cfun->decl)
2762 = tree_cons (get_identifier ("oacc kernels parallelized"),
2763 NULL_TREE, DECL_ATTRIBUTES (cfun->decl));
2765 else
2767 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
2769 basic_block bb = loop_preheader_edge (loop)->src;
2770 basic_block paral_bb = single_pred (bb);
2771 gsi = gsi_last_bb (paral_bb);
2773 gcc_checking_assert (n_threads != 0);
2774 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
2775 OMP_CLAUSE_NUM_THREADS_EXPR (t)
2776 = build_int_cst (integer_type_node, n_threads);
2777 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2778 gimple_set_location (omp_par_stmt, loc);
2780 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
2782 /* Initialize NEW_DATA. */
2783 if (data)
2785 gassign *assign_stmt;
2787 gsi = gsi_after_labels (bb);
2789 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2790 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2791 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2793 assign_stmt = gimple_build_assign (new_data,
2794 fold_convert (TREE_TYPE (new_data), param));
2795 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2798 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2799 bb = split_loop_exit_edge (single_dom_exit (loop));
2800 gsi = gsi_last_bb (bb);
2801 omp_return_stmt1 = gimple_build_omp_return (false);
2802 gimple_set_location (omp_return_stmt1, loc);
2803 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2806 /* Extract data for GIMPLE_OMP_FOR. */
2807 gcc_assert (loop->header == single_dom_exit (loop)->src);
2808 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
2810 cvar = gimple_cond_lhs (cond_stmt);
2811 cvar_base = SSA_NAME_VAR (cvar);
2812 phi = SSA_NAME_DEF_STMT (cvar);
2813 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2814 initvar = copy_ssa_name (cvar);
2815 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2816 initvar);
2817 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2819 gsi = gsi_last_nondebug_bb (loop->latch);
2820 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2821 gsi_remove (&gsi, true);
2823 /* Prepare cfg. */
2824 for_bb = split_edge (loop_preheader_edge (loop));
2825 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2826 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2827 gcc_assert (exit == single_dom_exit (loop));
2829 guard = make_edge (for_bb, ex_bb, 0);
2830 /* FIXME: What is the probability? */
2831 guard->probability = profile_probability::guessed_never ();
2832 /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid. */
2833 loop->latch = split_edge (single_succ_edge (loop->latch));
2834 single_pred_edge (loop->latch)->flags = 0;
2835 end = make_single_succ_edge (single_pred (loop->latch), ex_bb, EDGE_FALLTHRU);
2836 rescan_loop_exit (end, true, false);
2838 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2839 !gsi_end_p (gpi); gsi_next (&gpi))
2841 location_t locus;
2842 gphi *phi = gpi.phi ();
2843 tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2844 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
2846 /* If the exit phi is not connected to a header phi in the same loop, this
2847 value is not modified in the loop, and we're done with this phi. */
2848 if (!(gimple_code (def_stmt) == GIMPLE_PHI
2849 && gimple_bb (def_stmt) == loop->header))
2851 locus = gimple_phi_arg_location_from_edge (phi, exit);
2852 add_phi_arg (phi, def, guard, locus);
2853 add_phi_arg (phi, def, end, locus);
2854 continue;
2857 gphi *stmt = as_a <gphi *> (def_stmt);
2858 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
2859 locus = gimple_phi_arg_location_from_edge (stmt,
2860 loop_preheader_edge (loop));
2861 add_phi_arg (phi, def, guard, locus);
2863 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2864 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
2865 add_phi_arg (phi, def, end, locus);
2867 e = redirect_edge_and_branch (exit, nexit->dest);
2868 PENDING_STMT (e) = NULL;
2870 /* Emit GIMPLE_OMP_FOR. */
2871 if (oacc_kernels_p)
2872 /* Parallelized OpenACC kernels constructs use gang parallelism. See also
2873 omp-offload.c:execute_oacc_device_lower. */
2874 t = build_omp_clause (loc, OMP_CLAUSE_GANG);
2875 else
2877 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2878 int chunk_size = PARAM_VALUE (PARAM_PARLOOPS_CHUNK_SIZE);
2879 enum PARAM_PARLOOPS_SCHEDULE_KIND schedule_type \
2880 = (enum PARAM_PARLOOPS_SCHEDULE_KIND) PARAM_VALUE (PARAM_PARLOOPS_SCHEDULE);
2881 switch (schedule_type)
2883 case PARAM_PARLOOPS_SCHEDULE_KIND_static:
2884 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2885 break;
2886 case PARAM_PARLOOPS_SCHEDULE_KIND_dynamic:
2887 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_DYNAMIC;
2888 break;
2889 case PARAM_PARLOOPS_SCHEDULE_KIND_guided:
2890 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_GUIDED;
2891 break;
2892 case PARAM_PARLOOPS_SCHEDULE_KIND_auto:
2893 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_AUTO;
2894 chunk_size = 0;
2895 break;
2896 case PARAM_PARLOOPS_SCHEDULE_KIND_runtime:
2897 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_RUNTIME;
2898 chunk_size = 0;
2899 break;
2900 default:
2901 gcc_unreachable ();
2903 if (chunk_size != 0)
2904 OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t)
2905 = build_int_cst (integer_type_node, chunk_size);
2908 for_stmt = gimple_build_omp_for (NULL,
2909 (oacc_kernels_p
2910 ? GF_OMP_FOR_KIND_OACC_LOOP
2911 : GF_OMP_FOR_KIND_FOR),
2912 t, 1, NULL);
2914 gimple_cond_set_lhs (cond_stmt, cvar_base);
2915 type = TREE_TYPE (cvar);
2916 gimple_set_location (for_stmt, loc);
2917 gimple_omp_for_set_index (for_stmt, 0, initvar);
2918 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2919 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2920 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2921 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2922 cvar_base,
2923 build_int_cst (type, 1)));
2925 gsi = gsi_last_bb (for_bb);
2926 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
2927 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2929 /* Emit GIMPLE_OMP_CONTINUE. */
2930 continue_bb = single_pred (loop->latch);
2931 gsi = gsi_last_bb (continue_bb);
2932 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2933 gimple_set_location (omp_cont_stmt, loc);
2934 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2935 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
2937 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2938 gsi = gsi_last_bb (ex_bb);
2939 omp_return_stmt2 = gimple_build_omp_return (true);
2940 gimple_set_location (omp_return_stmt2, loc);
2941 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
2943 /* After the above dom info is hosed. Re-compute it. */
2944 free_dominance_info (CDI_DOMINATORS);
2945 calculate_dominance_info (CDI_DOMINATORS);
2948 /* Return number of phis in bb. If COUNT_VIRTUAL_P is false, don't count the
2949 virtual phi. */
2951 static unsigned int
2952 num_phis (basic_block bb, bool count_virtual_p)
2954 unsigned int nr_phis = 0;
2955 gphi_iterator gsi;
2956 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2958 if (!count_virtual_p && virtual_operand_p (PHI_RESULT (gsi.phi ())))
2959 continue;
2961 nr_phis++;
2964 return nr_phis;
2967 /* Generates code to execute the iterations of LOOP in N_THREADS
2968 threads in parallel, which can be 0 if that number is to be determined
2969 later.
2971 NITER describes number of iterations of LOOP.
2972 REDUCTION_LIST describes the reductions existent in the LOOP. */
2974 static void
2975 gen_parallel_loop (class loop *loop,
2976 reduction_info_table_type *reduction_list,
2977 unsigned n_threads, class tree_niter_desc *niter,
2978 bool oacc_kernels_p)
2980 tree many_iterations_cond, type, nit;
2981 tree arg_struct, new_arg_struct;
2982 gimple_seq stmts;
2983 edge entry, exit;
2984 struct clsn_data clsn_data;
2985 location_t loc;
2986 gimple *cond_stmt;
2987 unsigned int m_p_thread=2;
2989 /* From
2991 ---------------------------------------------------------------------
2992 loop
2994 IV = phi (INIT, IV + STEP)
2995 BODY1;
2996 if (COND)
2997 break;
2998 BODY2;
3000 ---------------------------------------------------------------------
3002 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
3003 we generate the following code:
3005 ---------------------------------------------------------------------
3007 if (MAY_BE_ZERO
3008 || NITER < MIN_PER_THREAD * N_THREADS)
3009 goto original;
3011 BODY1;
3012 store all local loop-invariant variables used in body of the loop to DATA.
3013 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
3014 load the variables from DATA.
3015 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
3016 BODY2;
3017 BODY1;
3018 GIMPLE_OMP_CONTINUE;
3019 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
3020 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
3021 goto end;
3023 original:
3024 loop
3026 IV = phi (INIT, IV + STEP)
3027 BODY1;
3028 if (COND)
3029 break;
3030 BODY2;
3033 end:
3037 /* Create two versions of the loop -- in the old one, we know that the
3038 number of iterations is large enough, and we will transform it into the
3039 loop that will be split to loop_fn, the new one will be used for the
3040 remaining iterations. */
3042 /* We should compute a better number-of-iterations value for outer loops.
3043 That is, if we have
3045 for (i = 0; i < n; ++i)
3046 for (j = 0; j < m; ++j)
3049 we should compute nit = n * m, not nit = n.
3050 Also may_be_zero handling would need to be adjusted. */
3052 type = TREE_TYPE (niter->niter);
3053 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
3054 NULL_TREE);
3055 if (stmts)
3056 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3058 if (!oacc_kernels_p)
3060 if (loop->inner)
3061 m_p_thread=2;
3062 else
3063 m_p_thread=MIN_PER_THREAD;
3065 gcc_checking_assert (n_threads != 0);
3066 many_iterations_cond =
3067 fold_build2 (GE_EXPR, boolean_type_node,
3068 nit, build_int_cst (type, m_p_thread * n_threads - 1));
3070 many_iterations_cond
3071 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3072 invert_truthvalue (unshare_expr (niter->may_be_zero)),
3073 many_iterations_cond);
3074 many_iterations_cond
3075 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
3076 if (stmts)
3077 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3078 if (!is_gimple_condexpr (many_iterations_cond))
3080 many_iterations_cond
3081 = force_gimple_operand (many_iterations_cond, &stmts,
3082 true, NULL_TREE);
3083 if (stmts)
3084 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop),
3085 stmts);
3088 initialize_original_copy_tables ();
3090 /* We assume that the loop usually iterates a lot. */
3091 loop_version (loop, many_iterations_cond, NULL,
3092 profile_probability::likely (),
3093 profile_probability::unlikely (),
3094 profile_probability::likely (),
3095 profile_probability::unlikely (), true);
3096 update_ssa (TODO_update_ssa);
3097 free_original_copy_tables ();
3100 /* Base all the induction variables in LOOP on a single control one. */
3101 canonicalize_loop_ivs (loop, &nit, true);
3102 if (num_phis (loop->header, false) != reduction_list->elements () + 1)
3104 /* The call to canonicalize_loop_ivs above failed to "base all the
3105 induction variables in LOOP on a single control one". Do damage
3106 control. */
3107 basic_block preheader = loop_preheader_edge (loop)->src;
3108 basic_block cond_bb = single_pred (preheader);
3109 gcond *cond = as_a <gcond *> (gsi_stmt (gsi_last_bb (cond_bb)));
3110 gimple_cond_make_true (cond);
3111 update_stmt (cond);
3112 /* We've gotten rid of the duplicate loop created by loop_version, but
3113 we can't undo whatever canonicalize_loop_ivs has done.
3114 TODO: Fix this properly by ensuring that the call to
3115 canonicalize_loop_ivs succeeds. */
3116 if (dump_file
3117 && (dump_flags & TDF_DETAILS))
3118 fprintf (dump_file, "canonicalize_loop_ivs failed for loop %d,"
3119 " aborting transformation\n", loop->num);
3120 return;
3123 /* Ensure that the exit condition is the first statement in the loop.
3124 The common case is that latch of the loop is empty (apart from the
3125 increment) and immediately follows the loop exit test. Attempt to move the
3126 entry of the loop directly before the exit check and increase the number of
3127 iterations of the loop by one. */
3128 if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
3130 if (dump_file
3131 && (dump_flags & TDF_DETAILS))
3132 fprintf (dump_file,
3133 "alternative exit-first loop transform succeeded"
3134 " for loop %d\n", loop->num);
3136 else
3138 if (oacc_kernels_p)
3139 n_threads = 1;
3141 /* Fall back on the method that handles more cases, but duplicates the
3142 loop body: move the exit condition of LOOP to the beginning of its
3143 header, and duplicate the part of the last iteration that gets disabled
3144 to the exit of the loop. */
3145 transform_to_exit_first_loop (loop, reduction_list, nit);
3148 /* Generate initializations for reductions. */
3149 if (!reduction_list->is_empty ())
3150 reduction_list->traverse <class loop *, initialize_reductions> (loop);
3152 /* Eliminate the references to local variables from the loop. */
3153 gcc_assert (single_exit (loop));
3154 entry = loop_preheader_edge (loop);
3155 exit = single_dom_exit (loop);
3157 /* This rewrites the body in terms of new variables. This has already
3158 been done for oacc_kernels_p in pass_lower_omp/lower_omp (). */
3159 if (!oacc_kernels_p)
3161 eliminate_local_variables (entry, exit);
3162 /* In the old loop, move all variables non-local to the loop to a
3163 structure and back, and create separate decls for the variables used in
3164 loop. */
3165 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
3166 &new_arg_struct, &clsn_data);
3168 else
3170 arg_struct = NULL_TREE;
3171 new_arg_struct = NULL_TREE;
3172 clsn_data.load = NULL_TREE;
3173 clsn_data.load_bb = exit->dest;
3174 clsn_data.store = NULL_TREE;
3175 clsn_data.store_bb = NULL;
3178 /* Create the parallel constructs. */
3179 loc = UNKNOWN_LOCATION;
3180 cond_stmt = last_stmt (loop->header);
3181 if (cond_stmt)
3182 loc = gimple_location (cond_stmt);
3183 create_parallel_loop (loop, create_loop_fn (loc), arg_struct, new_arg_struct,
3184 n_threads, loc, oacc_kernels_p);
3185 if (!reduction_list->is_empty ())
3186 create_call_for_reduction (loop, reduction_list, &clsn_data);
3188 scev_reset ();
3190 /* Free loop bound estimations that could contain references to
3191 removed statements. */
3192 free_numbers_of_iterations_estimates (cfun);
3195 /* Returns true when LOOP contains vector phi nodes. */
3197 static bool
3198 loop_has_vector_phi_nodes (class loop *loop ATTRIBUTE_UNUSED)
3200 unsigned i;
3201 basic_block *bbs = get_loop_body_in_dom_order (loop);
3202 gphi_iterator gsi;
3203 bool res = true;
3205 for (i = 0; i < loop->num_nodes; i++)
3206 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
3207 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
3208 goto end;
3210 res = false;
3211 end:
3212 free (bbs);
3213 return res;
3216 /* Create a reduction_info struct, initialize it with REDUC_STMT
3217 and PHI, insert it to the REDUCTION_LIST. */
3219 static void
3220 build_new_reduction (reduction_info_table_type *reduction_list,
3221 gimple *reduc_stmt, gphi *phi)
3223 reduction_info **slot;
3224 struct reduction_info *new_reduction;
3225 enum tree_code reduction_code;
3227 gcc_assert (reduc_stmt);
3229 if (gimple_code (reduc_stmt) == GIMPLE_PHI)
3231 tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
3232 gimple *def1 = SSA_NAME_DEF_STMT (op1);
3233 reduction_code = gimple_assign_rhs_code (def1);
3235 else
3236 reduction_code = gimple_assign_rhs_code (reduc_stmt);
3237 /* Check for OpenMP supported reduction. */
3238 switch (reduction_code)
3240 case PLUS_EXPR:
3241 case MULT_EXPR:
3242 case MAX_EXPR:
3243 case MIN_EXPR:
3244 case BIT_IOR_EXPR:
3245 case BIT_XOR_EXPR:
3246 case BIT_AND_EXPR:
3247 case TRUTH_OR_EXPR:
3248 case TRUTH_XOR_EXPR:
3249 case TRUTH_AND_EXPR:
3250 break;
3251 default:
3252 return;
3255 if (dump_file && (dump_flags & TDF_DETAILS))
3257 fprintf (dump_file,
3258 "Detected reduction. reduction stmt is:\n");
3259 print_gimple_stmt (dump_file, reduc_stmt, 0);
3260 fprintf (dump_file, "\n");
3263 new_reduction = XCNEW (struct reduction_info);
3265 new_reduction->reduc_stmt = reduc_stmt;
3266 new_reduction->reduc_phi = phi;
3267 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
3268 new_reduction->reduction_code = reduction_code;
3269 slot = reduction_list->find_slot (new_reduction, INSERT);
3270 *slot = new_reduction;
3273 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
3276 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
3278 struct reduction_info *const red = *slot;
3279 gimple_set_uid (red->reduc_phi, red->reduc_version);
3280 return 1;
3283 /* Return true if the type of reduction performed by STMT_INFO is suitable
3284 for this pass. */
3286 static bool
3287 valid_reduction_p (stmt_vec_info stmt_info)
3289 /* Parallelization would reassociate the operation, which isn't
3290 allowed for in-order reductions. */
3291 vect_reduction_type reduc_type = STMT_VINFO_REDUC_TYPE (stmt_info);
3292 return reduc_type != FOLD_LEFT_REDUCTION;
3295 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
3297 static void
3298 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
3300 gphi_iterator gsi;
3301 loop_vec_info simple_loop_info;
3302 auto_vec<gphi *, 4> double_reduc_phis;
3303 auto_vec<gimple *, 4> double_reduc_stmts;
3305 vec_info_shared shared;
3306 simple_loop_info = vect_analyze_loop_form (loop, &shared);
3307 if (simple_loop_info == NULL)
3308 goto gather_done;
3310 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
3312 gphi *phi = gsi.phi ();
3313 affine_iv iv;
3314 tree res = PHI_RESULT (phi);
3315 bool double_reduc;
3317 if (virtual_operand_p (res))
3318 continue;
3320 if (simple_iv (loop, loop, res, &iv, true))
3321 continue;
3323 stmt_vec_info reduc_stmt_info
3324 = parloops_force_simple_reduction (simple_loop_info,
3325 simple_loop_info->lookup_stmt (phi),
3326 &double_reduc, true);
3327 if (!reduc_stmt_info || !valid_reduction_p (reduc_stmt_info))
3328 continue;
3330 if (double_reduc)
3332 if (loop->inner->inner != NULL)
3333 continue;
3335 double_reduc_phis.safe_push (phi);
3336 double_reduc_stmts.safe_push (reduc_stmt_info->stmt);
3337 continue;
3340 build_new_reduction (reduction_list, reduc_stmt_info->stmt, phi);
3342 delete simple_loop_info;
3344 if (!double_reduc_phis.is_empty ())
3346 vec_info_shared shared;
3347 simple_loop_info = vect_analyze_loop_form (loop->inner, &shared);
3348 if (simple_loop_info)
3350 gphi *phi;
3351 unsigned int i;
3353 FOR_EACH_VEC_ELT (double_reduc_phis, i, phi)
3355 affine_iv iv;
3356 tree res = PHI_RESULT (phi);
3357 bool double_reduc;
3359 use_operand_p use_p;
3360 gimple *inner_stmt;
3361 bool single_use_p = single_imm_use (res, &use_p, &inner_stmt);
3362 gcc_assert (single_use_p);
3363 if (gimple_code (inner_stmt) != GIMPLE_PHI)
3364 continue;
3365 gphi *inner_phi = as_a <gphi *> (inner_stmt);
3366 if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
3367 &iv, true))
3368 continue;
3370 stmt_vec_info inner_phi_info
3371 = simple_loop_info->lookup_stmt (inner_phi);
3372 stmt_vec_info inner_reduc_stmt_info
3373 = parloops_force_simple_reduction (simple_loop_info,
3374 inner_phi_info,
3375 &double_reduc, true);
3376 gcc_assert (!double_reduc);
3377 if (!inner_reduc_stmt_info
3378 || !valid_reduction_p (inner_reduc_stmt_info))
3379 continue;
3381 build_new_reduction (reduction_list, double_reduc_stmts[i], phi);
3383 delete simple_loop_info;
3387 gather_done:
3388 if (reduction_list->is_empty ())
3389 return;
3391 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
3392 and delete simple_loop_info, we can set gimple_uid of reduc_phi stmts only
3393 now. */
3394 basic_block bb;
3395 FOR_EACH_BB_FN (bb, cfun)
3396 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3397 gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1);
3398 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
3401 /* Try to initialize NITER for code generation part. */
3403 static bool
3404 try_get_loop_niter (loop_p loop, class tree_niter_desc *niter)
3406 edge exit = single_dom_exit (loop);
3408 gcc_assert (exit);
3410 /* We need to know # of iterations, and there should be no uses of values
3411 defined inside loop outside of it, unless the values are invariants of
3412 the loop. */
3413 if (!number_of_iterations_exit (loop, exit, niter, false))
3415 if (dump_file && (dump_flags & TDF_DETAILS))
3416 fprintf (dump_file, " FAILED: number of iterations not known\n");
3417 return false;
3420 return true;
3423 /* Return the default def of the first function argument. */
3425 static tree
3426 get_omp_data_i_param (void)
3428 tree decl = DECL_ARGUMENTS (cfun->decl);
3429 gcc_assert (DECL_CHAIN (decl) == NULL_TREE);
3430 return ssa_default_def (cfun, decl);
3433 /* For PHI in loop header of LOOP, look for pattern:
3435 <bb preheader>
3436 .omp_data_i = &.omp_data_arr;
3437 addr = .omp_data_i->sum;
3438 sum_a = *addr;
3440 <bb header>:
3441 sum_b = PHI <sum_a (preheader), sum_c (latch)>
3443 and return addr. Otherwise, return NULL_TREE. */
3445 static tree
3446 find_reduc_addr (class loop *loop, gphi *phi)
3448 edge e = loop_preheader_edge (loop);
3449 tree arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
3450 gimple *stmt = SSA_NAME_DEF_STMT (arg);
3451 if (!gimple_assign_single_p (stmt))
3452 return NULL_TREE;
3453 tree memref = gimple_assign_rhs1 (stmt);
3454 if (TREE_CODE (memref) != MEM_REF)
3455 return NULL_TREE;
3456 tree addr = TREE_OPERAND (memref, 0);
3458 gimple *stmt2 = SSA_NAME_DEF_STMT (addr);
3459 if (!gimple_assign_single_p (stmt2))
3460 return NULL_TREE;
3461 tree compref = gimple_assign_rhs1 (stmt2);
3462 if (TREE_CODE (compref) != COMPONENT_REF)
3463 return NULL_TREE;
3464 tree addr2 = TREE_OPERAND (compref, 0);
3465 if (TREE_CODE (addr2) != MEM_REF)
3466 return NULL_TREE;
3467 addr2 = TREE_OPERAND (addr2, 0);
3468 if (TREE_CODE (addr2) != SSA_NAME
3469 || addr2 != get_omp_data_i_param ())
3470 return NULL_TREE;
3472 return addr;
3475 /* Try to initialize REDUCTION_LIST for code generation part.
3476 REDUCTION_LIST describes the reductions. */
3478 static bool
3479 try_create_reduction_list (loop_p loop,
3480 reduction_info_table_type *reduction_list,
3481 bool oacc_kernels_p)
3483 edge exit = single_dom_exit (loop);
3484 gphi_iterator gsi;
3486 gcc_assert (exit);
3488 /* Try to get rid of exit phis. */
3489 final_value_replacement_loop (loop);
3491 gather_scalar_reductions (loop, reduction_list);
3494 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3496 gphi *phi = gsi.phi ();
3497 struct reduction_info *red;
3498 imm_use_iterator imm_iter;
3499 use_operand_p use_p;
3500 gimple *reduc_phi;
3501 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
3503 if (!virtual_operand_p (val))
3505 if (TREE_CODE (val) != SSA_NAME)
3507 if (dump_file && (dump_flags & TDF_DETAILS))
3508 fprintf (dump_file,
3509 " FAILED: exit PHI argument invariant.\n");
3510 return false;
3513 if (dump_file && (dump_flags & TDF_DETAILS))
3515 fprintf (dump_file, "phi is ");
3516 print_gimple_stmt (dump_file, phi, 0);
3517 fprintf (dump_file, "arg of phi to exit: value ");
3518 print_generic_expr (dump_file, val);
3519 fprintf (dump_file, " used outside loop\n");
3520 fprintf (dump_file,
3521 " checking if it is part of reduction pattern:\n");
3523 if (reduction_list->is_empty ())
3525 if (dump_file && (dump_flags & TDF_DETAILS))
3526 fprintf (dump_file,
3527 " FAILED: it is not a part of reduction.\n");
3528 return false;
3530 reduc_phi = NULL;
3531 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
3533 if (!gimple_debug_bind_p (USE_STMT (use_p))
3534 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
3536 reduc_phi = USE_STMT (use_p);
3537 break;
3540 red = reduction_phi (reduction_list, reduc_phi);
3541 if (red == NULL)
3543 if (dump_file && (dump_flags & TDF_DETAILS))
3544 fprintf (dump_file,
3545 " FAILED: it is not a part of reduction.\n");
3546 return false;
3548 if (red->keep_res != NULL)
3550 if (dump_file && (dump_flags & TDF_DETAILS))
3551 fprintf (dump_file,
3552 " FAILED: reduction has multiple exit phis.\n");
3553 return false;
3555 red->keep_res = phi;
3556 if (dump_file && (dump_flags & TDF_DETAILS))
3558 fprintf (dump_file, "reduction phi is ");
3559 print_gimple_stmt (dump_file, red->reduc_phi, 0);
3560 fprintf (dump_file, "reduction stmt is ");
3561 print_gimple_stmt (dump_file, red->reduc_stmt, 0);
3566 /* The iterations of the loop may communicate only through bivs whose
3567 iteration space can be distributed efficiently. */
3568 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
3570 gphi *phi = gsi.phi ();
3571 tree def = PHI_RESULT (phi);
3572 affine_iv iv;
3574 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
3576 struct reduction_info *red;
3578 red = reduction_phi (reduction_list, phi);
3579 if (red == NULL)
3581 if (dump_file && (dump_flags & TDF_DETAILS))
3582 fprintf (dump_file,
3583 " FAILED: scalar dependency between iterations\n");
3584 return false;
3589 if (oacc_kernels_p)
3591 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi);
3592 gsi_next (&gsi))
3594 gphi *phi = gsi.phi ();
3595 tree def = PHI_RESULT (phi);
3596 affine_iv iv;
3598 if (!virtual_operand_p (def)
3599 && !simple_iv (loop, loop, def, &iv, true))
3601 tree addr = find_reduc_addr (loop, phi);
3602 if (addr == NULL_TREE)
3603 return false;
3604 struct reduction_info *red = reduction_phi (reduction_list, phi);
3605 red->reduc_addr = addr;
3610 return true;
3613 /* Return true if LOOP contains phis with ADDR_EXPR in args. */
3615 static bool
3616 loop_has_phi_with_address_arg (class loop *loop)
3618 basic_block *bbs = get_loop_body (loop);
3619 bool res = false;
3621 unsigned i, j;
3622 gphi_iterator gsi;
3623 for (i = 0; i < loop->num_nodes; i++)
3624 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
3626 gphi *phi = gsi.phi ();
3627 for (j = 0; j < gimple_phi_num_args (phi); j++)
3629 tree arg = gimple_phi_arg_def (phi, j);
3630 if (TREE_CODE (arg) == ADDR_EXPR)
3632 /* This should be handled by eliminate_local_variables, but that
3633 function currently ignores phis. */
3634 res = true;
3635 goto end;
3639 end:
3640 free (bbs);
3642 return res;
3645 /* Return true if memory ref REF (corresponding to the stmt at GSI in
3646 REGIONS_BB[I]) conflicts with the statements in REGIONS_BB[I] after gsi,
3647 or the statements in REGIONS_BB[I + n]. REF_IS_STORE indicates if REF is a
3648 store. Ignore conflicts with SKIP_STMT. */
3650 static bool
3651 ref_conflicts_with_region (gimple_stmt_iterator gsi, ao_ref *ref,
3652 bool ref_is_store, vec<basic_block> region_bbs,
3653 unsigned int i, gimple *skip_stmt)
3655 basic_block bb = region_bbs[i];
3656 gsi_next (&gsi);
3658 while (true)
3660 for (; !gsi_end_p (gsi);
3661 gsi_next (&gsi))
3663 gimple *stmt = gsi_stmt (gsi);
3664 if (stmt == skip_stmt)
3666 if (dump_file)
3668 fprintf (dump_file, "skipping reduction store: ");
3669 print_gimple_stmt (dump_file, stmt, 0);
3671 continue;
3674 if (!gimple_vdef (stmt)
3675 && !gimple_vuse (stmt))
3676 continue;
3678 if (gimple_code (stmt) == GIMPLE_RETURN)
3679 continue;
3681 if (ref_is_store)
3683 if (ref_maybe_used_by_stmt_p (stmt, ref))
3685 if (dump_file)
3687 fprintf (dump_file, "Stmt ");
3688 print_gimple_stmt (dump_file, stmt, 0);
3690 return true;
3693 else
3695 if (stmt_may_clobber_ref_p_1 (stmt, ref))
3697 if (dump_file)
3699 fprintf (dump_file, "Stmt ");
3700 print_gimple_stmt (dump_file, stmt, 0);
3702 return true;
3706 i++;
3707 if (i == region_bbs.length ())
3708 break;
3709 bb = region_bbs[i];
3710 gsi = gsi_start_bb (bb);
3713 return false;
3716 /* Return true if the bbs in REGION_BBS but not in in_loop_bbs can be executed
3717 in parallel with REGION_BBS containing the loop. Return the stores of
3718 reduction results in REDUCTION_STORES. */
3720 static bool
3721 oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3722 reduction_info_table_type *reduction_list,
3723 bitmap reduction_stores)
3725 tree omp_data_i = get_omp_data_i_param ();
3727 unsigned i;
3728 basic_block bb;
3729 FOR_EACH_VEC_ELT (region_bbs, i, bb)
3731 if (bitmap_bit_p (in_loop_bbs, bb->index))
3732 continue;
3734 gimple_stmt_iterator gsi;
3735 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
3736 gsi_next (&gsi))
3738 gimple *stmt = gsi_stmt (gsi);
3739 gimple *skip_stmt = NULL;
3741 if (is_gimple_debug (stmt)
3742 || gimple_code (stmt) == GIMPLE_COND)
3743 continue;
3745 ao_ref ref;
3746 bool ref_is_store = false;
3747 if (gimple_assign_load_p (stmt))
3749 tree rhs = gimple_assign_rhs1 (stmt);
3750 tree base = get_base_address (rhs);
3751 if (TREE_CODE (base) == MEM_REF
3752 && operand_equal_p (TREE_OPERAND (base, 0), omp_data_i, 0))
3753 continue;
3755 tree lhs = gimple_assign_lhs (stmt);
3756 if (TREE_CODE (lhs) == SSA_NAME
3757 && has_single_use (lhs))
3759 use_operand_p use_p;
3760 gimple *use_stmt;
3761 struct reduction_info *red;
3762 single_imm_use (lhs, &use_p, &use_stmt);
3763 if (gimple_code (use_stmt) == GIMPLE_PHI
3764 && (red = reduction_phi (reduction_list, use_stmt)))
3766 tree val = PHI_RESULT (red->keep_res);
3767 if (has_single_use (val))
3769 single_imm_use (val, &use_p, &use_stmt);
3770 if (gimple_store_p (use_stmt))
3772 unsigned int id
3773 = SSA_NAME_VERSION (gimple_vdef (use_stmt));
3774 bitmap_set_bit (reduction_stores, id);
3775 skip_stmt = use_stmt;
3776 if (dump_file)
3778 fprintf (dump_file, "found reduction load: ");
3779 print_gimple_stmt (dump_file, stmt, 0);
3786 ao_ref_init (&ref, rhs);
3788 else if (gimple_store_p (stmt))
3790 ao_ref_init (&ref, gimple_assign_lhs (stmt));
3791 ref_is_store = true;
3793 else if (gimple_code (stmt) == GIMPLE_OMP_RETURN)
3794 continue;
3795 else if (!gimple_has_side_effects (stmt)
3796 && !gimple_could_trap_p (stmt)
3797 && !stmt_could_throw_p (cfun, stmt)
3798 && !gimple_vdef (stmt)
3799 && !gimple_vuse (stmt))
3800 continue;
3801 else if (gimple_call_internal_p (stmt, IFN_GOACC_DIM_POS))
3802 continue;
3803 else if (gimple_code (stmt) == GIMPLE_RETURN)
3804 continue;
3805 else
3807 if (dump_file)
3809 fprintf (dump_file, "Unhandled stmt in entry/exit: ");
3810 print_gimple_stmt (dump_file, stmt, 0);
3812 return false;
3815 if (ref_conflicts_with_region (gsi, &ref, ref_is_store, region_bbs,
3816 i, skip_stmt))
3818 if (dump_file)
3820 fprintf (dump_file, "conflicts with entry/exit stmt: ");
3821 print_gimple_stmt (dump_file, stmt, 0);
3823 return false;
3828 return true;
3831 /* Find stores inside REGION_BBS and outside IN_LOOP_BBS, and guard them with
3832 gang_pos == 0, except when the stores are REDUCTION_STORES. Return true
3833 if any changes were made. */
3835 static bool
3836 oacc_entry_exit_single_gang (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3837 bitmap reduction_stores)
3839 tree gang_pos = NULL_TREE;
3840 bool changed = false;
3842 unsigned i;
3843 basic_block bb;
3844 FOR_EACH_VEC_ELT (region_bbs, i, bb)
3846 if (bitmap_bit_p (in_loop_bbs, bb->index))
3847 continue;
3849 gimple_stmt_iterator gsi;
3850 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3852 gimple *stmt = gsi_stmt (gsi);
3854 if (!gimple_store_p (stmt))
3856 /* Update gsi to point to next stmt. */
3857 gsi_next (&gsi);
3858 continue;
3861 if (bitmap_bit_p (reduction_stores,
3862 SSA_NAME_VERSION (gimple_vdef (stmt))))
3864 if (dump_file)
3866 fprintf (dump_file,
3867 "skipped reduction store for single-gang"
3868 " neutering: ");
3869 print_gimple_stmt (dump_file, stmt, 0);
3872 /* Update gsi to point to next stmt. */
3873 gsi_next (&gsi);
3874 continue;
3877 changed = true;
3879 if (gang_pos == NULL_TREE)
3881 tree arg = build_int_cst (integer_type_node, GOMP_DIM_GANG);
3882 gcall *gang_single
3883 = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg);
3884 gang_pos = make_ssa_name (integer_type_node);
3885 gimple_call_set_lhs (gang_single, gang_pos);
3886 gimple_stmt_iterator start
3887 = gsi_start_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
3888 tree vuse = ssa_default_def (cfun, gimple_vop (cfun));
3889 gimple_set_vuse (gang_single, vuse);
3890 gsi_insert_before (&start, gang_single, GSI_SAME_STMT);
3893 if (dump_file)
3895 fprintf (dump_file,
3896 "found store that needs single-gang neutering: ");
3897 print_gimple_stmt (dump_file, stmt, 0);
3901 /* Split block before store. */
3902 gimple_stmt_iterator gsi2 = gsi;
3903 gsi_prev (&gsi2);
3904 edge e;
3905 if (gsi_end_p (gsi2))
3907 e = split_block_after_labels (bb);
3908 gsi2 = gsi_last_bb (bb);
3910 else
3911 e = split_block (bb, gsi_stmt (gsi2));
3912 basic_block bb2 = e->dest;
3914 /* Split block after store. */
3915 gimple_stmt_iterator gsi3 = gsi_start_bb (bb2);
3916 edge e2 = split_block (bb2, gsi_stmt (gsi3));
3917 basic_block bb3 = e2->dest;
3919 gimple *cond
3920 = gimple_build_cond (EQ_EXPR, gang_pos, integer_zero_node,
3921 NULL_TREE, NULL_TREE);
3922 gsi_insert_after (&gsi2, cond, GSI_NEW_STMT);
3924 edge e3 = make_edge (bb, bb3, EDGE_FALSE_VALUE);
3925 /* FIXME: What is the probability? */
3926 e3->probability = profile_probability::guessed_never ();
3927 e->flags = EDGE_TRUE_VALUE;
3929 tree vdef = gimple_vdef (stmt);
3930 tree vuse = gimple_vuse (stmt);
3932 tree phi_res = copy_ssa_name (vdef);
3933 gphi *new_phi = create_phi_node (phi_res, bb3);
3934 replace_uses_by (vdef, phi_res);
3935 add_phi_arg (new_phi, vuse, e3, UNKNOWN_LOCATION);
3936 add_phi_arg (new_phi, vdef, e2, UNKNOWN_LOCATION);
3938 /* Update gsi to point to next stmt. */
3939 bb = bb3;
3940 gsi = gsi_start_bb (bb);
3945 return changed;
3948 /* Return true if the statements before and after the LOOP can be executed in
3949 parallel with the function containing the loop. Resolve conflicting stores
3950 outside LOOP by guarding them such that only a single gang executes them. */
3952 static bool
3953 oacc_entry_exit_ok (class loop *loop,
3954 reduction_info_table_type *reduction_list)
3956 basic_block *loop_bbs = get_loop_body_in_dom_order (loop);
3957 vec<basic_block> region_bbs
3958 = get_all_dominated_blocks (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
3960 bitmap in_loop_bbs = BITMAP_ALLOC (NULL);
3961 bitmap_clear (in_loop_bbs);
3962 for (unsigned int i = 0; i < loop->num_nodes; i++)
3963 bitmap_set_bit (in_loop_bbs, loop_bbs[i]->index);
3965 bitmap reduction_stores = BITMAP_ALLOC (NULL);
3966 bool res = oacc_entry_exit_ok_1 (in_loop_bbs, region_bbs, reduction_list,
3967 reduction_stores);
3969 if (res)
3971 bool changed = oacc_entry_exit_single_gang (in_loop_bbs, region_bbs,
3972 reduction_stores);
3973 if (changed)
3975 free_dominance_info (CDI_DOMINATORS);
3976 calculate_dominance_info (CDI_DOMINATORS);
3980 region_bbs.release ();
3981 free (loop_bbs);
3983 BITMAP_FREE (in_loop_bbs);
3984 BITMAP_FREE (reduction_stores);
3986 return res;
3989 /* Detect parallel loops and generate parallel code using libgomp
3990 primitives. Returns true if some loop was parallelized, false
3991 otherwise. */
3993 static bool
3994 parallelize_loops (bool oacc_kernels_p)
3996 unsigned n_threads;
3997 bool changed = false;
3998 class loop *loop;
3999 class loop *skip_loop = NULL;
4000 class tree_niter_desc niter_desc;
4001 struct obstack parloop_obstack;
4002 HOST_WIDE_INT estimated;
4004 /* Do not parallelize loops in the functions created by parallelization. */
4005 if (!oacc_kernels_p
4006 && parallelized_function_p (cfun->decl))
4007 return false;
4009 /* Do not parallelize loops in offloaded functions. */
4010 if (!oacc_kernels_p
4011 && oacc_get_fn_attrib (cfun->decl) != NULL)
4012 return false;
4014 if (cfun->has_nonlocal_label)
4015 return false;
4017 /* For OpenACC kernels, n_threads will be determined later; otherwise, it's
4018 the argument to -ftree-parallelize-loops. */
4019 if (oacc_kernels_p)
4020 n_threads = 0;
4021 else
4022 n_threads = flag_tree_parallelize_loops;
4024 gcc_obstack_init (&parloop_obstack);
4025 reduction_info_table_type reduction_list (10);
4027 calculate_dominance_info (CDI_DOMINATORS);
4029 FOR_EACH_LOOP (loop, 0)
4031 if (loop == skip_loop)
4033 if (!loop->in_oacc_kernels_region
4034 && dump_file && (dump_flags & TDF_DETAILS))
4035 fprintf (dump_file,
4036 "Skipping loop %d as inner loop of parallelized loop\n",
4037 loop->num);
4039 skip_loop = loop->inner;
4040 continue;
4042 else
4043 skip_loop = NULL;
4045 reduction_list.empty ();
4047 if (oacc_kernels_p)
4049 if (!loop->in_oacc_kernels_region)
4050 continue;
4052 /* Don't try to parallelize inner loops in an oacc kernels region. */
4053 if (loop->inner)
4054 skip_loop = loop->inner;
4056 if (dump_file && (dump_flags & TDF_DETAILS))
4057 fprintf (dump_file,
4058 "Trying loop %d with header bb %d in oacc kernels"
4059 " region\n", loop->num, loop->header->index);
4062 if (dump_file && (dump_flags & TDF_DETAILS))
4064 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
4065 if (loop->inner)
4066 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
4067 else
4068 fprintf (dump_file, "loop %d is innermost\n",loop->num);
4071 if (!single_dom_exit (loop))
4074 if (dump_file && (dump_flags & TDF_DETAILS))
4075 fprintf (dump_file, "loop is !single_dom_exit\n");
4077 continue;
4080 if (/* And of course, the loop must be parallelizable. */
4081 !can_duplicate_loop_p (loop)
4082 || loop_has_blocks_with_irreducible_flag (loop)
4083 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
4084 /* FIXME: the check for vector phi nodes could be removed. */
4085 || loop_has_vector_phi_nodes (loop))
4086 continue;
4088 estimated = estimated_loop_iterations_int (loop);
4089 if (estimated == -1)
4090 estimated = get_likely_max_loop_iterations_int (loop);
4091 /* FIXME: Bypass this check as graphite doesn't update the
4092 count and frequency correctly now. */
4093 if (!flag_loop_parallelize_all
4094 && !oacc_kernels_p
4095 && ((estimated != -1
4096 && (estimated
4097 < ((HOST_WIDE_INT) n_threads
4098 * (loop->inner ? 2 : MIN_PER_THREAD) - 1)))
4099 /* Do not bother with loops in cold areas. */
4100 || optimize_loop_nest_for_size_p (loop)))
4101 continue;
4103 if (!try_get_loop_niter (loop, &niter_desc))
4104 continue;
4106 if (!try_create_reduction_list (loop, &reduction_list, oacc_kernels_p))
4107 continue;
4109 if (loop_has_phi_with_address_arg (loop))
4110 continue;
4112 if (!loop->can_be_parallel
4113 && !loop_parallel_p (loop, &parloop_obstack))
4114 continue;
4116 if (oacc_kernels_p
4117 && !oacc_entry_exit_ok (loop, &reduction_list))
4119 if (dump_file)
4120 fprintf (dump_file, "entry/exit not ok: FAILED\n");
4121 continue;
4124 changed = true;
4125 skip_loop = loop->inner;
4127 if (dump_enabled_p ())
4129 dump_user_location_t loop_loc = find_loop_location (loop);
4130 if (loop->inner)
4131 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
4132 "parallelizing outer loop %d\n", loop->num);
4133 else
4134 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
4135 "parallelizing inner loop %d\n", loop->num);
4138 gen_parallel_loop (loop, &reduction_list,
4139 n_threads, &niter_desc, oacc_kernels_p);
4142 obstack_free (&parloop_obstack, NULL);
4144 /* Parallelization will cause new function calls to be inserted through
4145 which local variables will escape. Reset the points-to solution
4146 for ESCAPED. */
4147 if (changed)
4148 pt_solution_reset (&cfun->gimple_df->escaped);
4150 return changed;
4153 /* Parallelization. */
4155 namespace {
4157 const pass_data pass_data_parallelize_loops =
4159 GIMPLE_PASS, /* type */
4160 "parloops", /* name */
4161 OPTGROUP_LOOP, /* optinfo_flags */
4162 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
4163 ( PROP_cfg | PROP_ssa ), /* properties_required */
4164 0, /* properties_provided */
4165 0, /* properties_destroyed */
4166 0, /* todo_flags_start */
4167 0, /* todo_flags_finish */
4170 class pass_parallelize_loops : public gimple_opt_pass
4172 public:
4173 pass_parallelize_loops (gcc::context *ctxt)
4174 : gimple_opt_pass (pass_data_parallelize_loops, ctxt),
4175 oacc_kernels_p (false)
4178 /* opt_pass methods: */
4179 virtual bool gate (function *)
4181 if (oacc_kernels_p)
4182 return flag_openacc;
4183 else
4184 return flag_tree_parallelize_loops > 1;
4186 virtual unsigned int execute (function *);
4187 opt_pass * clone () { return new pass_parallelize_loops (m_ctxt); }
4188 void set_pass_param (unsigned int n, bool param)
4190 gcc_assert (n == 0);
4191 oacc_kernels_p = param;
4194 private:
4195 bool oacc_kernels_p;
4196 }; // class pass_parallelize_loops
4198 unsigned
4199 pass_parallelize_loops::execute (function *fun)
4201 tree nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4202 if (nthreads == NULL_TREE)
4203 return 0;
4205 bool in_loop_pipeline = scev_initialized_p ();
4206 if (!in_loop_pipeline)
4207 loop_optimizer_init (LOOPS_NORMAL
4208 | LOOPS_HAVE_RECORDED_EXITS);
4210 if (number_of_loops (fun) <= 1)
4211 return 0;
4213 if (!in_loop_pipeline)
4215 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
4216 scev_initialize ();
4219 unsigned int todo = 0;
4220 if (parallelize_loops (oacc_kernels_p))
4222 fun->curr_properties &= ~(PROP_gimple_eomp);
4224 checking_verify_loop_structure ();
4226 todo |= TODO_update_ssa;
4229 if (!in_loop_pipeline)
4231 scev_finalize ();
4232 loop_optimizer_finalize ();
4235 return todo;
4238 } // anon namespace
4240 gimple_opt_pass *
4241 make_pass_parallelize_loops (gcc::context *ctxt)
4243 return new pass_parallelize_loops (ctxt);