c++: ttp TEMPLATE_DECL equivalence [PR112737]
[official-gcc.git] / gcc / gimple-loop-interchange.cc
blobb4228044a3873f5dad872e1a0449372277acdb97
1 /* Loop interchange.
2 Copyright (C) 2017-2024 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "is-a.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "tree-pass.h"
29 #include "ssa.h"
30 #include "gimple-pretty-print.h"
31 #include "fold-const.h"
32 #include "gimplify.h"
33 #include "gimple-iterator.h"
34 #include "gimplify-me.h"
35 #include "cfgloop.h"
36 #include "tree-ssa.h"
37 #include "tree-scalar-evolution.h"
38 #include "tree-ssa-loop-manip.h"
39 #include "tree-ssa-loop-niter.h"
40 #include "tree-ssa-loop-ivopts.h"
41 #include "tree-ssa-dce.h"
42 #include "tree-data-ref.h"
43 #include "tree-vectorizer.h"
45 /* This pass performs loop interchange: for example, the loop nest
47 for (int j = 0; j < N; j++)
48 for (int k = 0; k < N; k++)
49 for (int i = 0; i < N; i++)
50 c[i][j] = c[i][j] + a[i][k]*b[k][j];
52 is transformed to
54 for (int i = 0; i < N; i++)
55 for (int j = 0; j < N; j++)
56 for (int k = 0; k < N; k++)
57 c[i][j] = c[i][j] + a[i][k]*b[k][j];
59 This pass implements loop interchange in the following steps:
61 1) Find perfect loop nest for each innermost loop and compute data
62 dependence relations for it. For above example, loop nest is
63 <loop_j, loop_k, loop_i>.
64 2) From innermost to outermost loop, this pass tries to interchange
65 each loop pair. For above case, it firstly tries to interchange
66 <loop_k, loop_i> and loop nest becomes <loop_j, loop_i, loop_k>.
67 Then it tries to interchange <loop_j, loop_i> and loop nest becomes
68 <loop_i, loop_j, loop_k>. The overall effect is to move innermost
69 loop to the outermost position. For loop pair <loop_i, loop_j>
70 to be interchanged, we:
71 3) Check if data dependence relations are valid for loop interchange.
72 4) Check if both loops can be interchanged in terms of transformation.
73 5) Check if interchanging the two loops is profitable.
74 6) Interchange the two loops by mapping induction variables.
76 This pass also handles reductions in loop nest. So far we only support
77 simple reduction of inner loop and double reduction of the loop nest. */
79 /* Maximum number of stmts in each loop that should be interchanged. */
80 #define MAX_NUM_STMT (param_loop_interchange_max_num_stmts)
81 /* Maximum number of data references in loop nest. */
82 #define MAX_DATAREFS (param_loop_max_datarefs_for_datadeps)
84 /* Comparison ratio of access stride between inner/outer loops to be
85 interchanged. This is the minimum stride ratio for loop interchange
86 to be profitable. */
87 #define OUTER_STRIDE_RATIO (param_loop_interchange_stride_ratio)
88 /* The same as above, but we require higher ratio for interchanging the
89 innermost two loops. */
90 #define INNER_STRIDE_RATIO ((OUTER_STRIDE_RATIO) + 1)
92 /* Comparison ratio of stmt cost between inner/outer loops. Loops won't
93 be interchanged if outer loop has too many stmts. */
94 #define STMT_COST_RATIO (3)
96 /* Vector of strides that DR accesses in each level loop of a loop nest. */
97 #define DR_ACCESS_STRIDE(dr) ((vec<tree> *) dr->aux)
99 /* Structure recording loop induction variable. */
100 typedef struct induction
102 /* IV itself. */
103 tree var;
104 /* IV's initializing value, which is the init arg of the IV PHI node. */
105 tree init_val;
106 /* IV's initializing expr, which is (the expanded result of) init_val. */
107 tree init_expr;
108 /* IV's step. */
109 tree step;
110 } *induction_p;
112 /* Enum type for loop reduction variable. */
113 enum reduction_type
115 UNKNOWN_RTYPE = 0,
116 SIMPLE_RTYPE,
117 DOUBLE_RTYPE
120 /* Structure recording loop reduction variable. */
121 typedef struct reduction
123 /* Reduction itself. */
124 tree var;
125 /* PHI node defining reduction variable. */
126 gphi *phi;
127 /* Init and next variables of the reduction. */
128 tree init;
129 tree next;
130 /* Lcssa PHI node if reduction is used outside of its definition loop. */
131 gphi *lcssa_phi;
132 /* Stmts defining init and next. */
133 gimple *producer;
134 gimple *consumer;
135 /* If init is loaded from memory, this is the loading memory reference. */
136 tree init_ref;
137 /* If reduction is finally stored to memory, this is the stored memory
138 reference. */
139 tree fini_ref;
140 enum reduction_type type;
141 } *reduction_p;
144 /* Dump reduction RE. */
146 static void
147 dump_reduction (reduction_p re)
149 if (re->type == SIMPLE_RTYPE)
150 fprintf (dump_file, " Simple reduction: ");
151 else if (re->type == DOUBLE_RTYPE)
152 fprintf (dump_file, " Double reduction: ");
153 else
154 fprintf (dump_file, " Unknown reduction: ");
156 print_gimple_stmt (dump_file, re->phi, 0);
159 /* Dump LOOP's induction IV. */
160 static void
161 dump_induction (class loop *loop, induction_p iv)
163 fprintf (dump_file, " Induction: ");
164 print_generic_expr (dump_file, iv->var, TDF_SLIM);
165 fprintf (dump_file, " = {");
166 print_generic_expr (dump_file, iv->init_expr, TDF_SLIM);
167 fprintf (dump_file, ", ");
168 print_generic_expr (dump_file, iv->step, TDF_SLIM);
169 fprintf (dump_file, "}_%d\n", loop->num);
172 /* Loop candidate for interchange. */
174 class loop_cand
176 public:
177 loop_cand (class loop *, class loop *);
178 ~loop_cand ();
180 reduction_p find_reduction_by_stmt (gimple *);
181 void classify_simple_reduction (reduction_p);
182 bool analyze_iloop_reduction_var (tree);
183 bool analyze_oloop_reduction_var (loop_cand *, tree);
184 bool analyze_induction_var (tree, tree);
185 bool analyze_carried_vars (loop_cand *);
186 bool analyze_lcssa_phis (void);
187 bool can_interchange_p (loop_cand *);
188 void undo_simple_reduction (reduction_p, bitmap);
190 /* The loop itself. */
191 class loop *m_loop;
192 /* The outer loop for interchange. It equals to loop if this loop cand
193 itself represents the outer loop. */
194 class loop *m_outer;
195 /* Vector of induction variables in loop. */
196 vec<induction_p> m_inductions;
197 /* Vector of reduction variables in loop. */
198 vec<reduction_p> m_reductions;
199 /* Lcssa PHI nodes of this loop. */
200 vec<gphi *> m_lcssa_nodes;
201 /* Single exit edge of this loop. */
202 edge m_exit;
203 /* Basic blocks of this loop. */
204 basic_block *m_bbs;
205 /* Number of stmts of this loop. Inner loops' stmts are not included. */
206 int m_num_stmts;
207 /* Number of constant initialized simple reduction. */
208 int m_const_init_reduc;
211 /* Constructor. */
213 loop_cand::loop_cand (class loop *loop, class loop *outer)
214 : m_loop (loop), m_outer (outer), m_exit (single_exit (loop)),
215 m_bbs (get_loop_body (loop)), m_num_stmts (0), m_const_init_reduc (0)
217 m_inductions.create (3);
218 m_reductions.create (3);
219 m_lcssa_nodes.create (3);
222 /* Destructor. */
224 loop_cand::~loop_cand ()
226 induction_p iv;
227 for (unsigned i = 0; m_inductions.iterate (i, &iv); ++i)
228 free (iv);
230 reduction_p re;
231 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
232 free (re);
234 m_inductions.release ();
235 m_reductions.release ();
236 m_lcssa_nodes.release ();
237 free (m_bbs);
240 /* Return single use stmt of VAR in LOOP, otherwise return NULL. */
242 static gimple *
243 single_use_in_loop (tree var, class loop *loop)
245 gimple *stmt, *res = NULL;
246 use_operand_p use_p;
247 imm_use_iterator iterator;
249 FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
251 stmt = USE_STMT (use_p);
252 if (is_gimple_debug (stmt))
253 continue;
255 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
256 continue;
258 if (res)
259 return NULL;
261 res = stmt;
263 return res;
266 /* Return true if E is unsupported in loop interchange, i.e, E is a complex
267 edge or part of irreducible loop. */
269 static inline bool
270 unsupported_edge (edge e)
272 return (e->flags & (EDGE_COMPLEX | EDGE_IRREDUCIBLE_LOOP));
275 /* Return the reduction if STMT is one of its lcssa PHI, producer or consumer
276 stmt. */
278 reduction_p
279 loop_cand::find_reduction_by_stmt (gimple *stmt)
281 gphi *phi = dyn_cast <gphi *> (stmt);
282 reduction_p re;
284 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
285 if ((phi != NULL && phi == re->lcssa_phi)
286 || (stmt == re->producer || stmt == re->consumer))
287 return re;
289 return NULL;
292 /* Return true if current loop_cand be interchanged. ILOOP is not NULL if
293 current loop_cand is outer loop in loop nest. */
295 bool
296 loop_cand::can_interchange_p (loop_cand *iloop)
298 /* For now we only support at most one reduction. */
299 unsigned allowed_reduction_num = 1;
301 /* Only support reduction if the loop nest to be interchanged is the
302 innermostin two loops. */
303 if ((iloop == NULL && m_loop->inner != NULL)
304 || (iloop != NULL && iloop->m_loop->inner != NULL))
305 allowed_reduction_num = 0;
307 if (m_reductions.length () > allowed_reduction_num
308 || (m_reductions.length () == 1
309 && m_reductions[0]->type == UNKNOWN_RTYPE))
310 return false;
312 /* Only support lcssa PHI node which is for reduction. */
313 if (m_lcssa_nodes.length () > allowed_reduction_num)
314 return false;
316 /* Check if basic block has any unsupported operation. Note basic blocks
317 of inner loops are not checked here. */
318 for (unsigned i = 0; i < m_loop->num_nodes; i++)
320 basic_block bb = m_bbs[i];
321 gphi_iterator psi;
322 gimple_stmt_iterator gsi;
324 /* Skip basic blocks of inner loops. */
325 if (bb->loop_father != m_loop)
326 continue;
328 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
330 gimple *stmt = gsi_stmt (gsi);
331 if (is_gimple_debug (stmt))
332 continue;
334 if (gimple_has_side_effects (stmt))
335 return false;
337 m_num_stmts++;
338 if (gcall *call = dyn_cast <gcall *> (stmt))
340 /* In basic block of outer loop, the call should be cheap since
341 it will be moved to inner loop. */
342 if (iloop != NULL
343 && !gimple_inexpensive_call_p (call))
344 return false;
345 continue;
348 if (!iloop || !gimple_vuse (stmt))
349 continue;
351 /* Support stmt accessing memory in outer loop only if it is for
352 inner loop's reduction. */
353 if (iloop->find_reduction_by_stmt (stmt))
354 continue;
356 tree lhs;
357 /* Support loop invariant memory reference if it's only used once by
358 inner loop. */
359 /* ??? How's this checking for invariantness? */
360 if (gimple_assign_single_p (stmt)
361 && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE
362 && TREE_CODE (lhs) == SSA_NAME
363 && single_use_in_loop (lhs, iloop->m_loop))
364 continue;
366 return false;
368 /* Check if loop has too many stmts. */
369 if (m_num_stmts > MAX_NUM_STMT)
370 return false;
372 /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer
373 loop's header, or PHI nodes in dest bb of inner loop's exit edge. */
374 if (!iloop || bb == m_loop->header
375 || bb == iloop->m_exit->dest)
376 continue;
378 /* Don't allow any other PHI nodes. */
379 for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
380 if (!virtual_operand_p (PHI_RESULT (psi.phi ())))
381 return false;
384 return true;
387 /* Programmers and optimizers (like loop store motion) may optimize code:
389 for (int i = 0; i < N; i++)
390 for (int j = 0; j < N; j++)
391 a[i] += b[j][i] * c[j][i];
393 into reduction:
395 for (int i = 0; i < N; i++)
397 // producer. Note sum can be intitialized to a constant.
398 int sum = a[i];
399 for (int j = 0; j < N; j++)
401 sum += b[j][i] * c[j][i];
403 // consumer.
404 a[i] = sum;
407 The result code can't be interchanged without undoing the optimization.
408 This function classifies this kind reduction and records information so
409 that we can undo the store motion during interchange. */
411 void
412 loop_cand::classify_simple_reduction (reduction_p re)
414 gimple *producer, *consumer;
416 /* Check init variable of reduction and how it is initialized. */
417 if (TREE_CODE (re->init) == SSA_NAME)
419 producer = SSA_NAME_DEF_STMT (re->init);
420 re->producer = producer;
421 basic_block bb = gimple_bb (producer);
422 if (!bb || bb->loop_father != m_outer)
423 return;
425 if (!gimple_assign_load_p (producer))
426 return;
428 re->init_ref = gimple_assign_rhs1 (producer);
430 else if (CONSTANT_CLASS_P (re->init))
431 m_const_init_reduc++;
432 else
433 return;
435 /* Check how reduction variable is used. */
436 consumer = single_use_in_loop (PHI_RESULT (re->lcssa_phi), m_outer);
437 if (!consumer
438 || !gimple_store_p (consumer))
439 return;
441 re->fini_ref = gimple_get_lhs (consumer);
442 re->consumer = consumer;
444 /* Simple reduction with constant initializer. */
445 if (!re->init_ref)
447 gcc_assert (CONSTANT_CLASS_P (re->init));
448 re->init_ref = unshare_expr (re->fini_ref);
451 /* Require memory references in producer and consumer are the same so
452 that we can undo reduction during interchange. */
453 if (re->init_ref && !operand_equal_p (re->init_ref, re->fini_ref, 0))
454 return;
456 re->type = SIMPLE_RTYPE;
459 /* Analyze reduction variable VAR for inner loop of the loop nest to be
460 interchanged. Return true if analysis succeeds. */
462 bool
463 loop_cand::analyze_iloop_reduction_var (tree var)
465 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
466 gphi *lcssa_phi = NULL, *use_phi;
467 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
468 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
469 reduction_p re;
470 gimple *stmt, *next_def, *single_use = NULL;
471 use_operand_p use_p;
472 imm_use_iterator iterator;
474 if (TREE_CODE (next) != SSA_NAME)
475 return false;
477 next_def = SSA_NAME_DEF_STMT (next);
478 basic_block bb = gimple_bb (next_def);
479 if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
480 return false;
482 /* In restricted reduction, the var is (and must be) used in defining
483 the updated var. The process can be depicted as below:
485 var ;; = PHI<init, next>
489 +---------------------+
490 | reduction operators | <-- other operands
491 +---------------------+
495 next
497 In terms loop interchange, we don't change how NEXT is computed based
498 on VAR and OTHER OPERANDS. In case of double reduction in loop nest
499 to be interchanged, we don't changed it at all. In the case of simple
500 reduction in inner loop, we only make change how VAR/NEXT is loaded or
501 stored. With these conditions, we can relax restrictions on reduction
502 in a way that reduction operation is seen as black box. In general,
503 we can ignore reassociation of reduction operator; we can handle fake
504 reductions in which VAR is not even used to compute NEXT. */
505 if (! single_imm_use (var, &use_p, &single_use)
506 || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
507 return false;
509 /* Check the reduction operation. We require a left-associative operation.
510 For FP math we also need to be allowed to associate operations. */
511 if (gassign *ass = dyn_cast <gassign *> (single_use))
513 enum tree_code code = gimple_assign_rhs_code (ass);
514 if (! (associative_tree_code (code)
515 || (code == MINUS_EXPR
516 && use_p->use == gimple_assign_rhs1_ptr (ass)))
517 || (FLOAT_TYPE_P (TREE_TYPE (var))
518 && ! flag_associative_math))
519 return false;
521 else
522 return false;
524 /* Handle and verify a series of stmts feeding the reduction op. */
525 if (single_use != next_def
526 && !check_reduction_path (dump_user_location_t (), m_loop, phi, next,
527 gimple_assign_rhs_code (single_use)))
528 return false;
530 /* Only support cases in which INIT is used in inner loop. */
531 if (TREE_CODE (init) == SSA_NAME)
532 FOR_EACH_IMM_USE_FAST (use_p, iterator, init)
534 stmt = USE_STMT (use_p);
535 if (is_gimple_debug (stmt))
536 continue;
538 if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
539 return false;
542 FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
544 stmt = USE_STMT (use_p);
545 if (is_gimple_debug (stmt))
546 continue;
548 /* Or else it's used in PHI itself. */
549 use_phi = dyn_cast <gphi *> (stmt);
550 if (use_phi == phi)
551 continue;
553 if (use_phi != NULL
554 && lcssa_phi == NULL
555 && gimple_bb (stmt) == m_exit->dest
556 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
557 lcssa_phi = use_phi;
558 else
559 return false;
561 if (!lcssa_phi)
562 return false;
564 re = XCNEW (struct reduction);
565 re->var = var;
566 re->init = init;
567 re->next = next;
568 re->phi = phi;
569 re->lcssa_phi = lcssa_phi;
571 classify_simple_reduction (re);
573 if (dump_file && (dump_flags & TDF_DETAILS))
574 dump_reduction (re);
576 m_reductions.safe_push (re);
577 return true;
580 /* Analyze reduction variable VAR for outer loop of the loop nest to be
581 interchanged. ILOOP is not NULL and points to inner loop. For the
582 moment, we only support double reduction for outer loop, like:
584 for (int i = 0; i < n; i++)
586 int sum = 0;
588 for (int j = 0; j < n; j++) // outer loop
589 for (int k = 0; k < n; k++) // inner loop
590 sum += a[i][k]*b[k][j];
592 s[i] = sum;
595 Note the innermost two loops are the loop nest to be interchanged.
596 Return true if analysis succeeds. */
598 bool
599 loop_cand::analyze_oloop_reduction_var (loop_cand *iloop, tree var)
601 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
602 gphi *lcssa_phi = NULL, *use_phi;
603 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
604 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
605 reduction_p re;
606 gimple *stmt, *next_def;
607 use_operand_p use_p;
608 imm_use_iterator iterator;
610 if (TREE_CODE (next) != SSA_NAME)
611 return false;
613 next_def = SSA_NAME_DEF_STMT (next);
614 basic_block bb = gimple_bb (next_def);
615 if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
616 return false;
618 /* Find inner loop's simple reduction that uses var as initializer. */
619 reduction_p inner_re = NULL;
620 for (unsigned i = 0; iloop->m_reductions.iterate (i, &inner_re); ++i)
621 if (inner_re->init == var || operand_equal_p (inner_re->init, var, 0))
622 break;
624 if (inner_re == NULL
625 || inner_re->type != UNKNOWN_RTYPE
626 || inner_re->producer != phi)
627 return false;
629 /* In case of double reduction, outer loop's reduction should be updated
630 by inner loop's simple reduction. */
631 if (next_def != inner_re->lcssa_phi)
632 return false;
634 /* Outer loop's reduction should only be used to initialize inner loop's
635 simple reduction. */
636 if (! single_imm_use (var, &use_p, &stmt)
637 || stmt != inner_re->phi)
638 return false;
640 /* Check this reduction is correctly used outside of loop via lcssa phi. */
641 FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
643 stmt = USE_STMT (use_p);
644 if (is_gimple_debug (stmt))
645 continue;
647 /* Or else it's used in PHI itself. */
648 use_phi = dyn_cast <gphi *> (stmt);
649 if (use_phi == phi)
650 continue;
652 if (lcssa_phi == NULL
653 && use_phi != NULL
654 && gimple_bb (stmt) == m_exit->dest
655 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
656 lcssa_phi = use_phi;
657 else
658 return false;
660 if (!lcssa_phi)
661 return false;
663 re = XCNEW (struct reduction);
664 re->var = var;
665 re->init = init;
666 re->next = next;
667 re->phi = phi;
668 re->lcssa_phi = lcssa_phi;
669 re->type = DOUBLE_RTYPE;
670 inner_re->type = DOUBLE_RTYPE;
672 if (dump_file && (dump_flags & TDF_DETAILS))
673 dump_reduction (re);
675 m_reductions.safe_push (re);
676 return true;
679 /* Return true if VAR is induction variable of current loop whose scev is
680 specified by CHREC. */
682 bool
683 loop_cand::analyze_induction_var (tree var, tree chrec)
685 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
686 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
688 /* Var is loop invariant, though it's unlikely to happen. */
689 if (tree_does_not_contain_chrecs (chrec))
691 /* Punt on floating point invariants if honoring signed zeros,
692 representing that as + 0.0 would change the result if init
693 is -0.0. Similarly for SNaNs it can raise exception. */
694 if (HONOR_SIGNED_ZEROS (chrec) || HONOR_SNANS (chrec))
695 return false;
696 struct induction *iv = XCNEW (struct induction);
697 iv->var = var;
698 iv->init_val = init;
699 iv->init_expr = chrec;
700 iv->step = build_zero_cst (TREE_TYPE (chrec));
701 m_inductions.safe_push (iv);
702 return true;
705 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC
706 || CHREC_VARIABLE (chrec) != (unsigned) m_loop->num
707 || tree_contains_chrecs (CHREC_LEFT (chrec), NULL)
708 || tree_contains_chrecs (CHREC_RIGHT (chrec), NULL))
709 return false;
711 struct induction *iv = XCNEW (struct induction);
712 iv->var = var;
713 iv->init_val = init;
714 iv->init_expr = CHREC_LEFT (chrec);
715 iv->step = CHREC_RIGHT (chrec);
717 if (dump_file && (dump_flags & TDF_DETAILS))
718 dump_induction (m_loop, iv);
720 m_inductions.safe_push (iv);
721 return true;
724 /* Return true if all loop carried variables defined in loop header can
725 be successfully analyzed. */
727 bool
728 loop_cand::analyze_carried_vars (loop_cand *iloop)
730 edge e = loop_preheader_edge (m_outer);
731 gphi_iterator gsi;
733 if (dump_file && (dump_flags & TDF_DETAILS))
734 fprintf (dump_file, "\nLoop(%d) carried vars:\n", m_loop->num);
736 for (gsi = gsi_start_phis (m_loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
738 gphi *phi = gsi.phi ();
740 tree var = PHI_RESULT (phi);
741 if (virtual_operand_p (var))
742 continue;
744 tree chrec = analyze_scalar_evolution (m_loop, var);
745 chrec = instantiate_scev (e, m_loop, chrec);
747 /* Analyze var as reduction variable. */
748 if (chrec_contains_undetermined (chrec)
749 || chrec_contains_symbols_defined_in_loop (chrec, m_outer->num))
751 if (iloop && !analyze_oloop_reduction_var (iloop, var))
752 return false;
753 if (!iloop && !analyze_iloop_reduction_var (var))
754 return false;
756 /* Analyze var as induction variable. */
757 else if (!analyze_induction_var (var, chrec))
758 return false;
761 return true;
764 /* Return TRUE if loop closed PHI nodes can be analyzed successfully. */
766 bool
767 loop_cand::analyze_lcssa_phis (void)
769 gphi_iterator gsi;
770 for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
772 gphi *phi = gsi.phi ();
774 if (virtual_operand_p (PHI_RESULT (phi)))
775 continue;
777 /* TODO: We only support lcssa phi for reduction for now. */
778 if (!find_reduction_by_stmt (phi))
779 return false;
782 return true;
785 /* CONSUMER is a stmt in BB storing reduction result into memory object.
786 When the reduction is intialized from constant value, we need to add
787 a stmt loading from the memory object to target basic block in inner
788 loop during undoing the reduction. Problem is that memory reference
789 may use ssa variables not dominating the target basic block. This
790 function finds all stmts on which CONSUMER depends in basic block BB,
791 records and returns them via STMTS. */
793 static void
794 find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer)
796 auto_vec<gimple *, 4> worklist;
797 use_operand_p use_p;
798 ssa_op_iter iter;
799 gimple *stmt, *def_stmt;
800 gimple_stmt_iterator gsi;
802 /* First clear flag for stmts in bb. */
803 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
804 gimple_set_plf (gsi_stmt (gsi), GF_PLF_1, false);
806 /* DFS search all depended stmts in bb and mark flag for these stmts. */
807 worklist.safe_push (consumer);
808 while (!worklist.is_empty ())
810 stmt = worklist.pop ();
811 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
813 def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
815 if (is_a <gphi *> (def_stmt)
816 || gimple_bb (def_stmt) != bb
817 || gimple_plf (def_stmt, GF_PLF_1))
818 continue;
820 worklist.safe_push (def_stmt);
822 gimple_set_plf (stmt, GF_PLF_1, true);
824 for (gsi = gsi_start_nondebug_bb (bb);
825 !gsi_end_p (gsi) && (stmt = gsi_stmt (gsi)) != consumer;)
827 /* Move dep stmts to sequence STMTS. */
828 if (gimple_plf (stmt, GF_PLF_1))
830 gsi_remove (&gsi, false);
831 gimple_seq_add_stmt_without_update (stmts, stmt);
833 else
834 gsi_next_nondebug (&gsi);
838 /* User can write, optimizers can generate simple reduction RE for inner
839 loop. In order to make interchange valid, we have to undo reduction by
840 moving producer and consumer stmts into the inner loop. For example,
841 below code:
843 init = MEM_REF[idx]; //producer
844 loop:
845 var = phi<init, next>
846 next = var op ...
847 reduc_sum = phi<next>
848 MEM_REF[idx] = reduc_sum //consumer
850 is transformed into:
852 loop:
853 new_var = MEM_REF[idx]; //producer after moving
854 next = new_var op ...
855 MEM_REF[idx] = next; //consumer after moving
857 Note if the reduction variable is initialized to constant, like:
859 var = phi<0.0, next>
861 we compute new_var as below:
863 loop:
864 tmp = MEM_REF[idx];
865 new_var = !first_iteration ? tmp : 0.0;
867 so that the initial const is used in the first iteration of loop. Also
868 record ssa variables for dead code elimination in DCE_SEEDS. */
870 void
871 loop_cand::undo_simple_reduction (reduction_p re, bitmap dce_seeds)
873 gimple *stmt;
874 gimple_stmt_iterator from, to = gsi_after_labels (m_loop->header);
875 gimple_seq stmts = NULL;
876 tree new_var;
878 /* Prepare the initialization stmts and insert it to inner loop. */
879 if (re->producer != NULL)
881 gimple_set_vuse (re->producer, NULL_TREE);
882 update_stmt (re->producer);
883 from = gsi_for_stmt (re->producer);
884 gsi_remove (&from, false);
885 gimple_seq_add_stmt_without_update (&stmts, re->producer);
886 new_var = re->init;
888 else
890 /* Find all stmts on which expression "MEM_REF[idx]" depends. */
891 find_deps_in_bb_for_stmt (&stmts, gimple_bb (re->consumer), re->consumer);
892 /* Because we generate new stmt loading from the MEM_REF to TMP. */
893 tree cond, tmp = copy_ssa_name (re->var);
894 stmt = gimple_build_assign (tmp, re->init_ref);
895 gimple_seq_add_stmt_without_update (&stmts, stmt);
897 /* Init new_var to MEM_REF or CONST depending on if it is the first
898 iteration. */
899 induction_p iv = m_inductions[0];
900 cond = make_ssa_name (boolean_type_node);
901 stmt = gimple_build_assign (cond, NE_EXPR, iv->var, iv->init_val);
902 gimple_seq_add_stmt_without_update (&stmts, stmt);
903 new_var = copy_ssa_name (re->var);
904 stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init);
905 gimple_seq_add_stmt_without_update (&stmts, stmt);
907 gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT);
909 /* Replace all uses of reduction var with new variable. */
910 use_operand_p use_p;
911 imm_use_iterator iterator;
912 FOR_EACH_IMM_USE_STMT (stmt, iterator, re->var)
914 FOR_EACH_IMM_USE_ON_STMT (use_p, iterator)
915 SET_USE (use_p, new_var);
917 update_stmt (stmt);
920 /* Move consumer stmt into inner loop, just after reduction next's def. */
921 unlink_stmt_vdef (re->consumer);
922 release_ssa_name (gimple_vdef (re->consumer));
923 gimple_set_vdef (re->consumer, NULL_TREE);
924 gimple_set_vuse (re->consumer, NULL_TREE);
925 gimple_assign_set_rhs1 (re->consumer, re->next);
926 update_stmt (re->consumer);
927 from = gsi_for_stmt (re->consumer);
928 to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next));
929 gsi_move_after (&from, &to);
931 /* Mark the reduction variables for DCE. */
932 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (re->var));
933 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (PHI_RESULT (re->lcssa_phi)));
936 /* Free DATAREFS and its auxiliary memory. */
938 static void
939 free_data_refs_with_aux (vec<data_reference_p> datarefs)
941 data_reference_p dr;
942 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
943 if (dr->aux != NULL)
945 DR_ACCESS_STRIDE (dr)->release ();
946 delete (vec<tree> *) dr->aux;
949 free_data_refs (datarefs);
952 /* Class for loop interchange transformation. */
954 class tree_loop_interchange
956 public:
957 tree_loop_interchange (vec<class loop *> loop_nest)
958 : m_loop_nest (loop_nest), m_niters_iv_var (NULL_TREE),
959 m_dce_seeds (BITMAP_ALLOC (NULL)) { }
960 ~tree_loop_interchange () { BITMAP_FREE (m_dce_seeds); }
961 bool interchange (vec<data_reference_p>, vec<ddr_p>);
963 private:
964 void update_data_info (unsigned, unsigned, vec<data_reference_p>, vec<ddr_p>);
965 bool valid_data_dependences (unsigned, unsigned, vec<ddr_p>);
966 void interchange_loops (loop_cand &, loop_cand &);
967 void map_inductions_to_loop (loop_cand &, loop_cand &);
968 void move_code_to_inner_loop (class loop *, class loop *, basic_block *);
970 /* The whole loop nest in which interchange is ongoing. */
971 vec<class loop *> m_loop_nest;
972 /* We create new IV which is only used in loop's exit condition check.
973 In case of 3-level loop nest interchange, when we interchange the
974 innermost two loops, new IV created in the middle level loop does
975 not need to be preserved in interchanging the outermost two loops
976 later. We record the IV so that it can be skipped. */
977 tree m_niters_iv_var;
978 /* Bitmap of seed variables for dead code elimination after interchange. */
979 bitmap m_dce_seeds;
982 /* Update data refs' access stride and dependence information after loop
983 interchange. I_IDX/O_IDX gives indices of interchanged loops in loop
984 nest. DATAREFS are data references. DDRS are data dependences. */
986 void
987 tree_loop_interchange::update_data_info (unsigned i_idx, unsigned o_idx,
988 vec<data_reference_p> datarefs,
989 vec<ddr_p> ddrs)
991 struct data_reference *dr;
992 struct data_dependence_relation *ddr;
994 /* Update strides of data references. */
995 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
997 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
998 gcc_assert (stride->length () > i_idx);
999 std::swap ((*stride)[i_idx], (*stride)[o_idx]);
1001 /* Update data dependences. */
1002 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
1003 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1005 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
1007 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
1008 std::swap (dist_vect[i_idx], dist_vect[o_idx]);
1013 /* Check data dependence relations, return TRUE if it's valid to interchange
1014 two loops specified by I_IDX/O_IDX. Theoretically, interchanging the two
1015 loops is valid only if dist vector, after interchanging, doesn't have '>'
1016 as the leftmost non-'=' direction. Practically, this function have been
1017 conservative here by not checking some valid cases. */
1019 bool
1020 tree_loop_interchange::valid_data_dependences (unsigned i_idx, unsigned o_idx,
1021 vec<ddr_p> ddrs)
1023 struct data_dependence_relation *ddr;
1025 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
1027 /* Skip no-dependence case. */
1028 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1029 continue;
1031 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
1033 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
1034 unsigned level = dependence_level (dist_vect, m_loop_nest.length ());
1036 /* If there is no carried dependence. */
1037 if (level == 0)
1038 continue;
1040 level --;
1042 /* If dependence is not carried by any loop in between the two
1043 loops [oloop, iloop] to interchange. */
1044 if (level < o_idx || level > i_idx)
1045 continue;
1047 /* Be conservative, skip case if either direction at i_idx/o_idx
1048 levels is not '=' or '<'. */
1049 if ((!DDR_REVERSED_P (ddr) && dist_vect[i_idx] < 0)
1050 || (DDR_REVERSED_P (ddr) && dist_vect[i_idx] > 0)
1051 || (!DDR_REVERSED_P (ddr) && dist_vect[o_idx] < 0)
1052 || (DDR_REVERSED_P (ddr) && dist_vect[o_idx] > 0))
1053 return false;
1057 return true;
1060 /* Interchange two loops specified by ILOOP and OLOOP. */
1062 void
1063 tree_loop_interchange::interchange_loops (loop_cand &iloop, loop_cand &oloop)
1065 reduction_p re;
1066 gimple_stmt_iterator gsi;
1067 tree i_niters, o_niters, var_after;
1069 /* Undo inner loop's simple reduction. */
1070 for (unsigned i = 0; iloop.m_reductions.iterate (i, &re); ++i)
1071 if (re->type != DOUBLE_RTYPE)
1073 if (re->producer)
1074 reset_debug_uses (re->producer);
1076 iloop.undo_simple_reduction (re, m_dce_seeds);
1079 /* Only need to reset debug uses for double reduction. */
1080 for (unsigned i = 0; oloop.m_reductions.iterate (i, &re); ++i)
1082 gcc_assert (re->type == DOUBLE_RTYPE);
1083 reset_debug_uses (SSA_NAME_DEF_STMT (re->var));
1084 reset_debug_uses (SSA_NAME_DEF_STMT (re->next));
1087 /* Prepare niters for both loops. */
1088 class loop *loop_nest = m_loop_nest[0];
1089 edge instantiate_below = loop_preheader_edge (loop_nest);
1090 gsi = gsi_last_bb (loop_preheader_edge (loop_nest)->src);
1091 i_niters = number_of_latch_executions (iloop.m_loop);
1092 i_niters = analyze_scalar_evolution (loop_outer (iloop.m_loop), i_niters);
1093 i_niters = instantiate_scev (instantiate_below, loop_outer (iloop.m_loop),
1094 i_niters);
1095 i_niters = force_gimple_operand_gsi (&gsi, unshare_expr (i_niters), true,
1096 NULL_TREE, false, GSI_CONTINUE_LINKING);
1097 o_niters = number_of_latch_executions (oloop.m_loop);
1098 if (oloop.m_loop != loop_nest)
1100 o_niters = analyze_scalar_evolution (loop_outer (oloop.m_loop), o_niters);
1101 o_niters = instantiate_scev (instantiate_below, loop_outer (oloop.m_loop),
1102 o_niters);
1104 o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true,
1105 NULL_TREE, false, GSI_CONTINUE_LINKING);
1107 /* Move src's code to tgt loop. This is necessary when src is the outer
1108 loop and tgt is the inner loop. */
1109 move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs);
1111 /* Map outer loop's IV to inner loop, and vice versa. */
1112 map_inductions_to_loop (oloop, iloop);
1113 map_inductions_to_loop (iloop, oloop);
1115 /* Create canonical IV for both loops. Note canonical IV for outer/inner
1116 loop is actually from inner/outer loop. Also we record the new IV
1117 created for the outer loop so that it can be skipped in later loop
1118 interchange. */
1119 create_canonical_iv (oloop.m_loop, oloop.m_exit,
1120 i_niters, &m_niters_iv_var, &var_after);
1121 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1122 create_canonical_iv (iloop.m_loop, iloop.m_exit,
1123 o_niters, NULL, &var_after);
1124 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1126 /* Scrap niters estimation of interchanged loops. */
1127 iloop.m_loop->any_upper_bound = false;
1128 iloop.m_loop->any_likely_upper_bound = false;
1129 free_numbers_of_iterations_estimates (iloop.m_loop);
1130 oloop.m_loop->any_upper_bound = false;
1131 oloop.m_loop->any_likely_upper_bound = false;
1132 free_numbers_of_iterations_estimates (oloop.m_loop);
1134 /* Clear all cached scev information. This is expensive but shouldn't be
1135 a problem given we interchange in very limited times. */
1136 scev_reset_htab ();
1138 /* ??? The association between the loop data structure and the
1139 CFG changed, so what was loop N at the source level is now
1140 loop M. We should think of retaining the association or breaking
1141 it fully by creating a new loop instead of re-using the "wrong" one. */
1144 /* Map induction variables of SRC loop to TGT loop. The function firstly
1145 creates the same IV of SRC loop in TGT loop, then deletes the original
1146 IV and re-initialize it using the newly created IV. For example, loop
1147 nest:
1149 for (i = 0; i < N; i++)
1150 for (j = 0; j < M; j++)
1152 //use of i;
1153 //use of j;
1156 will be transformed into:
1158 for (jj = 0; jj < M; jj++)
1159 for (ii = 0; ii < N; ii++)
1161 //use of ii;
1162 //use of jj;
1165 after loop interchange. */
1167 void
1168 tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt)
1170 induction_p iv;
1171 edge e = tgt.m_exit;
1172 gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi;
1174 /* Map source loop's IV to target loop. */
1175 for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i)
1177 gimple *use_stmt, *stmt = SSA_NAME_DEF_STMT (iv->var);
1178 gcc_assert (is_a <gphi *> (stmt));
1180 use_operand_p use_p;
1181 /* Only map original IV to target loop. */
1182 if (m_niters_iv_var != iv->var)
1184 /* Map the IV by creating the same one in target loop. */
1185 tree var_before, var_after;
1186 tree base = unshare_expr (iv->init_expr);
1187 tree step = unshare_expr (iv->step);
1188 create_iv (base, PLUS_EXPR, step, SSA_NAME_VAR (iv->var),
1189 tgt.m_loop, &incr_pos, false, &var_before, &var_after);
1190 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_before));
1191 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1193 /* Replace uses of the original IV var with newly created IV var. */
1194 imm_use_iterator imm_iter;
1195 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, iv->var)
1197 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1198 SET_USE (use_p, var_before);
1200 update_stmt (use_stmt);
1204 /* Mark all uses for DCE. */
1205 ssa_op_iter op_iter;
1206 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, op_iter, SSA_OP_USE)
1208 tree use = USE_FROM_PTR (use_p);
1209 if (TREE_CODE (use) == SSA_NAME
1210 && ! SSA_NAME_IS_DEFAULT_DEF (use))
1211 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (use));
1214 /* Delete definition of the original IV in the source loop. */
1215 gsi = gsi_for_stmt (stmt);
1216 remove_phi_node (&gsi, true);
1220 /* Move stmts of outer loop to inner loop. */
1222 void
1223 tree_loop_interchange::move_code_to_inner_loop (class loop *outer,
1224 class loop *inner,
1225 basic_block *outer_bbs)
1227 basic_block oloop_exit_bb = single_exit (outer)->src;
1228 gimple_stmt_iterator gsi, to;
1230 for (unsigned i = 0; i < outer->num_nodes; i++)
1232 basic_block bb = outer_bbs[i];
1234 /* Skip basic blocks of inner loop. */
1235 if (flow_bb_inside_loop_p (inner, bb))
1236 continue;
1238 /* Move code from header/latch to header/latch. */
1239 if (bb == outer->header)
1240 to = gsi_after_labels (inner->header);
1241 else if (bb == outer->latch)
1242 to = gsi_after_labels (inner->latch);
1243 else
1244 /* Otherwise, simply move to exit->src. */
1245 to = gsi_last_bb (single_exit (inner)->src);
1247 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
1249 gimple *stmt = gsi_stmt (gsi);
1251 if (oloop_exit_bb == bb
1252 && stmt == gsi_stmt (gsi_last_bb (oloop_exit_bb)))
1254 gsi_next (&gsi);
1255 continue;
1258 if (gimple_vdef (stmt))
1260 unlink_stmt_vdef (stmt);
1261 release_ssa_name (gimple_vdef (stmt));
1262 gimple_set_vdef (stmt, NULL_TREE);
1264 if (gimple_vuse (stmt))
1266 gimple_set_vuse (stmt, NULL_TREE);
1267 update_stmt (stmt);
1270 reset_debug_uses (stmt);
1271 gsi_move_before (&gsi, &to);
1276 /* Given data reference DR in LOOP_NEST, the function computes DR's access
1277 stride at each level of loop from innermost LOOP to outer. On success,
1278 it saves access stride at each level loop in a vector which is pointed
1279 by DR->aux. For example:
1281 int arr[100][100][100];
1282 for (i = 0; i < 100; i++) ;(DR->aux)strides[0] = 40000
1283 for (j = 100; j > 0; j--) ;(DR->aux)strides[1] = 400
1284 for (k = 0; k < 100; k++) ;(DR->aux)strides[2] = 4
1285 arr[i][j - 1][k] = 0; */
1287 static void
1288 compute_access_stride (class loop *&loop_nest, class loop *loop,
1289 data_reference_p dr)
1291 vec<tree> *strides = new vec<tree> ();
1292 dr->aux = strides;
1294 basic_block bb = gimple_bb (DR_STMT (dr));
1295 if (!flow_bb_inside_loop_p (loop_nest, bb))
1296 return;
1297 while (!flow_bb_inside_loop_p (loop, bb))
1299 strides->safe_push (build_int_cst (sizetype, 0));
1300 loop = loop_outer (loop);
1302 gcc_assert (loop == bb->loop_father);
1304 tree ref = DR_REF (dr);
1305 if (TREE_CODE (ref) == COMPONENT_REF
1306 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1308 /* We can't take address of bitfields. If the bitfield is at constant
1309 offset from the start of the struct, just use address of the
1310 struct, for analysis of the strides that shouldn't matter. */
1311 if (!TREE_OPERAND (ref, 2)
1312 || TREE_CODE (TREE_OPERAND (ref, 2)) == INTEGER_CST)
1313 ref = TREE_OPERAND (ref, 0);
1314 /* Otherwise, if we have a bit field representative, use that. */
1315 else if (DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1))
1316 != NULL_TREE)
1318 tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1));
1319 ref = build3 (COMPONENT_REF, TREE_TYPE (repr), TREE_OPERAND (ref, 0),
1320 repr, TREE_OPERAND (ref, 2));
1322 /* Otherwise punt. */
1323 else
1324 return;
1326 tree scev_base = build_fold_addr_expr (ref);
1327 tree scev = analyze_scalar_evolution (loop, scev_base);
1328 if (chrec_contains_undetermined (scev))
1329 return;
1331 tree orig_scev = scev;
1334 scev = instantiate_scev (loop_preheader_edge (loop_nest),
1335 loop, orig_scev);
1336 if (! chrec_contains_undetermined (scev))
1337 break;
1339 /* If we couldn't instantiate for the desired nest, shrink it. */
1340 if (loop_nest == loop)
1341 return;
1342 loop_nest = loop_nest->inner;
1343 } while (1);
1345 tree sl = scev;
1346 class loop *expected = loop;
1347 while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
1349 class loop *sl_loop = get_chrec_loop (sl);
1350 while (sl_loop != expected)
1352 strides->safe_push (size_int (0));
1353 expected = loop_outer (expected);
1355 strides->safe_push (CHREC_RIGHT (sl));
1356 sl = CHREC_LEFT (sl);
1357 expected = loop_outer (expected);
1359 if (! tree_contains_chrecs (sl, NULL))
1360 while (expected != loop_outer (loop_nest))
1362 strides->safe_push (size_int (0));
1363 expected = loop_outer (expected);
1367 /* Given loop nest LOOP_NEST with innermost LOOP, the function computes
1368 access strides with respect to each level loop for all data refs in
1369 DATAREFS from inner loop to outer loop. On success, it returns the
1370 outermost loop that access strides can be computed successfully for
1371 all data references. If access strides cannot be computed at least
1372 for two levels of loop for any data reference, it returns NULL. */
1374 static class loop *
1375 compute_access_strides (class loop *loop_nest, class loop *loop,
1376 vec<data_reference_p> datarefs)
1378 unsigned i, j, num_loops = (unsigned) -1;
1379 data_reference_p dr;
1380 vec<tree> *stride;
1382 class loop *interesting_loop_nest = loop_nest;
1383 for (i = 0; datarefs.iterate (i, &dr); ++i)
1385 compute_access_stride (interesting_loop_nest, loop, dr);
1386 stride = DR_ACCESS_STRIDE (dr);
1387 if (stride->length () < num_loops)
1389 num_loops = stride->length ();
1390 if (num_loops < 2)
1391 return NULL;
1395 for (i = 0; datarefs.iterate (i, &dr); ++i)
1397 stride = DR_ACCESS_STRIDE (dr);
1398 if (stride->length () > num_loops)
1399 stride->truncate (num_loops);
1401 for (j = 0; j < (num_loops >> 1); ++j)
1402 std::swap ((*stride)[j], (*stride)[num_loops - j - 1]);
1405 loop = superloop_at_depth (loop, loop_depth (loop) + 1 - num_loops);
1406 gcc_assert (loop_nest == loop || flow_loop_nested_p (loop_nest, loop));
1407 return loop;
1410 /* Prune access strides for data references in DATAREFS by removing strides
1411 of loops that isn't in current LOOP_NEST. */
1413 static void
1414 prune_access_strides_not_in_loop (class loop *loop_nest,
1415 class loop *innermost,
1416 vec<data_reference_p> datarefs)
1418 data_reference_p dr;
1419 unsigned num_loops = loop_depth (innermost) - loop_depth (loop_nest) + 1;
1420 gcc_assert (num_loops > 1);
1422 /* Block remove strides of loops that is not in current loop nest. */
1423 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
1425 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1426 if (stride->length () > num_loops)
1427 stride->block_remove (0, stride->length () - num_loops);
1431 /* Dump access strides for all DATAREFS. */
1433 static void
1434 dump_access_strides (vec<data_reference_p> datarefs)
1436 data_reference_p dr;
1437 fprintf (dump_file, "Access Strides for DRs:\n");
1438 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
1440 fprintf (dump_file, " ");
1441 print_generic_expr (dump_file, DR_REF (dr), TDF_SLIM);
1442 fprintf (dump_file, ":\t\t<");
1444 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1445 unsigned num_loops = stride->length ();
1446 for (unsigned j = 0; j < num_loops; ++j)
1448 print_generic_expr (dump_file, (*stride)[j], TDF_SLIM);
1449 fprintf (dump_file, "%s", (j < num_loops - 1) ? ",\t" : ">\n");
1454 /* Return true if it's profitable to interchange two loops whose index
1455 in whole loop nest vector are I_IDX/O_IDX respectively. The function
1456 computes and compares three types information from all DATAREFS:
1457 1) Access stride for loop I_IDX and O_IDX.
1458 2) Number of invariant memory references with respect to I_IDX before
1459 and after loop interchange.
1460 3) Flags indicating if all memory references access sequential memory
1461 in ILOOP, before and after loop interchange.
1462 If INNMOST_LOOP_P is true, the two loops for interchanging are the two
1463 innermost loops in loop nest. This function also dumps information if
1464 DUMP_INFO_P is true. */
1466 static bool
1467 should_interchange_loops (unsigned i_idx, unsigned o_idx,
1468 vec<data_reference_p> datarefs,
1469 unsigned i_stmt_cost, unsigned o_stmt_cost,
1470 bool innermost_loops_p, bool dump_info_p = true)
1472 unsigned HOST_WIDE_INT ratio;
1473 unsigned i, j, num_old_inv_drs = 0, num_new_inv_drs = 0;
1474 struct data_reference *dr;
1475 bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
1476 widest_int iloop_strides = 0, oloop_strides = 0;
1477 unsigned num_unresolved_drs = 0;
1478 unsigned num_resolved_ok_drs = 0;
1479 unsigned num_resolved_not_ok_drs = 0;
1481 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
1482 fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
1484 for (i = 0; datarefs.iterate (i, &dr); ++i)
1486 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1487 tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
1489 bool subloop_stride_p = false;
1490 /* Data ref can't be invariant or sequential access at current loop if
1491 its address changes with respect to any subloops. */
1492 for (j = i_idx + 1; j < stride->length (); ++j)
1493 if (!integer_zerop ((*stride)[j]))
1495 subloop_stride_p = true;
1496 break;
1499 if (integer_zerop (iloop_stride))
1501 if (!subloop_stride_p)
1502 num_old_inv_drs++;
1504 if (integer_zerop (oloop_stride))
1506 if (!subloop_stride_p)
1507 num_new_inv_drs++;
1510 if (TREE_CODE (iloop_stride) == INTEGER_CST
1511 && TREE_CODE (oloop_stride) == INTEGER_CST)
1513 iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
1514 oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
1516 else if (multiple_of_p (TREE_TYPE (iloop_stride),
1517 iloop_stride, oloop_stride))
1518 num_resolved_ok_drs++;
1519 else if (multiple_of_p (TREE_TYPE (iloop_stride),
1520 oloop_stride, iloop_stride))
1521 num_resolved_not_ok_drs++;
1522 else
1523 num_unresolved_drs++;
1525 /* Data ref can't be sequential access if its address changes in sub
1526 loop. */
1527 if (subloop_stride_p)
1529 all_seq_dr_before_p = false;
1530 all_seq_dr_after_p = false;
1531 continue;
1533 /* Track if all data references are sequential accesses before/after loop
1534 interchange. Note invariant is considered sequential here. */
1535 tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
1536 if (all_seq_dr_before_p
1537 && ! (integer_zerop (iloop_stride)
1538 || operand_equal_p (access_size, iloop_stride, 0)))
1539 all_seq_dr_before_p = false;
1540 if (all_seq_dr_after_p
1541 && ! (integer_zerop (oloop_stride)
1542 || operand_equal_p (access_size, oloop_stride, 0)))
1543 all_seq_dr_after_p = false;
1546 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
1548 fprintf (dump_file, "\toverall:\t\t");
1549 print_decu (iloop_strides, dump_file);
1550 fprintf (dump_file, "\t");
1551 print_decu (oloop_strides, dump_file);
1552 fprintf (dump_file, "\n");
1554 fprintf (dump_file, "Invariant data ref: before(%d), after(%d)\n",
1555 num_old_inv_drs, num_new_inv_drs);
1556 fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
1557 all_seq_dr_before_p ? "true" : "false",
1558 all_seq_dr_after_p ? "true" : "false");
1559 fprintf (dump_file, "OK to interchage with variable strides: %d\n",
1560 num_resolved_ok_drs);
1561 fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
1562 num_resolved_not_ok_drs);
1563 fprintf (dump_file, "Variable strides we cannot decide: %d\n",
1564 num_unresolved_drs);
1565 fprintf (dump_file, "Stmt cost of inner loop: %d\n", i_stmt_cost);
1566 fprintf (dump_file, "Stmt cost of outer loop: %d\n", o_stmt_cost);
1569 if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
1570 return false;
1572 /* Stmts of outer loop will be moved to inner loop. If there are two many
1573 such stmts, it could make inner loop costly. Here we compare stmt cost
1574 between outer and inner loops. */
1575 if (i_stmt_cost && o_stmt_cost
1576 && num_old_inv_drs + o_stmt_cost > num_new_inv_drs
1577 && o_stmt_cost * STMT_COST_RATIO > i_stmt_cost)
1578 return false;
1580 /* We use different stride comparison ratio for interchanging innermost
1581 two loops or not. The idea is to be conservative in interchange for
1582 the innermost loops. */
1583 ratio = innermost_loops_p ? INNER_STRIDE_RATIO : OUTER_STRIDE_RATIO;
1584 /* Do interchange if it gives better data locality behavior. */
1585 if (wi::gtu_p (iloop_strides, wi::mul (oloop_strides, ratio)))
1586 return true;
1587 if (wi::gtu_p (iloop_strides, oloop_strides))
1589 /* Or it creates more invariant memory references. */
1590 if ((!all_seq_dr_before_p || all_seq_dr_after_p)
1591 && num_new_inv_drs > num_old_inv_drs)
1592 return true;
1593 /* Or it makes all memory references sequential. */
1594 if (num_new_inv_drs >= num_old_inv_drs
1595 && !all_seq_dr_before_p && all_seq_dr_after_p)
1596 return true;
1599 return false;
1602 /* Try to interchange inner loop of a loop nest to outer level. */
1604 bool
1605 tree_loop_interchange::interchange (vec<data_reference_p> datarefs,
1606 vec<ddr_p> ddrs)
1608 dump_user_location_t loc = find_loop_location (m_loop_nest[0]);
1609 bool changed_p = false;
1610 /* In each iteration we try to interchange I-th loop with (I+1)-th loop.
1611 The overall effect is to push inner loop to outermost level in whole
1612 loop nest. */
1613 for (unsigned i = m_loop_nest.length (); i > 1; --i)
1615 unsigned i_idx = i - 1, o_idx = i - 2;
1617 /* Check validity for loop interchange. */
1618 if (!valid_data_dependences (i_idx, o_idx, ddrs))
1619 break;
1621 loop_cand iloop (m_loop_nest[i_idx], m_loop_nest[o_idx]);
1622 loop_cand oloop (m_loop_nest[o_idx], m_loop_nest[o_idx]);
1624 /* Check if we can do transformation for loop interchange. */
1625 if (!iloop.analyze_carried_vars (NULL)
1626 || !iloop.analyze_lcssa_phis ()
1627 || !oloop.analyze_carried_vars (&iloop)
1628 || !oloop.analyze_lcssa_phis ()
1629 || !iloop.can_interchange_p (NULL)
1630 || !oloop.can_interchange_p (&iloop))
1631 break;
1633 /* Outer loop's stmts will be moved to inner loop during interchange.
1634 If there are many of them, it may make inner loop very costly. We
1635 need to check number of outer loop's stmts in profit cost model of
1636 interchange. */
1637 int stmt_cost = oloop.m_num_stmts;
1638 /* Count out the exit checking stmt of outer loop. */
1639 stmt_cost --;
1640 /* Count out IV's increasing stmt, IVOPTs takes care if it. */
1641 stmt_cost -= oloop.m_inductions.length ();
1642 /* Count in the additional load and cond_expr stmts caused by inner
1643 loop's constant initialized reduction. */
1644 stmt_cost += iloop.m_const_init_reduc * 2;
1645 if (stmt_cost < 0)
1646 stmt_cost = 0;
1648 /* Check profitability for loop interchange. */
1649 if (should_interchange_loops (i_idx, o_idx, datarefs,
1650 (unsigned) iloop.m_num_stmts,
1651 (unsigned) stmt_cost,
1652 iloop.m_loop->inner == NULL))
1654 if (dump_file && (dump_flags & TDF_DETAILS))
1655 fprintf (dump_file,
1656 "Loop_pair<outer:%d, inner:%d> is interchanged\n\n",
1657 oloop.m_loop->num, iloop.m_loop->num);
1659 changed_p = true;
1660 interchange_loops (iloop, oloop);
1661 /* No need to update if there is no further loop interchange. */
1662 if (o_idx > 0)
1663 update_data_info (i_idx, o_idx, datarefs, ddrs);
1665 else
1667 if (dump_file && (dump_flags & TDF_DETAILS))
1668 fprintf (dump_file,
1669 "Loop_pair<outer:%d, inner:%d> is not interchanged\n\n",
1670 oloop.m_loop->num, iloop.m_loop->num);
1673 simple_dce_from_worklist (m_dce_seeds);
1675 if (changed_p && dump_enabled_p ())
1676 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
1677 "loops interchanged in loop nest\n");
1679 return changed_p;
1683 /* Loop interchange pass. */
1685 namespace {
1687 const pass_data pass_data_linterchange =
1689 GIMPLE_PASS, /* type */
1690 "linterchange", /* name */
1691 OPTGROUP_LOOP, /* optinfo_flags */
1692 TV_LINTERCHANGE, /* tv_id */
1693 PROP_cfg, /* properties_required */
1694 0, /* properties_provided */
1695 0, /* properties_destroyed */
1696 0, /* todo_flags_start */
1697 0, /* todo_flags_finish */
1700 class pass_linterchange : public gimple_opt_pass
1702 public:
1703 pass_linterchange (gcc::context *ctxt)
1704 : gimple_opt_pass (pass_data_linterchange, ctxt)
1707 /* opt_pass methods: */
1708 opt_pass * clone () final override { return new pass_linterchange (m_ctxt); }
1709 bool gate (function *) final override { return flag_loop_interchange; }
1710 unsigned int execute (function *) final override;
1712 }; // class pass_linterchange
1715 /* Return true if LOOP has proper form for interchange. We check three
1716 conditions in the function:
1717 1) In general, a loop can be interchanged only if it doesn't have
1718 basic blocks other than header, exit and latch besides possible
1719 inner loop nest. This basically restricts loop interchange to
1720 below form loop nests:
1722 header<---+
1725 INNER_LOOP |
1728 exit--->latch
1730 2) Data reference in basic block that executes in different times
1731 than loop head/exit is not allowed.
1732 3) Record the innermost outer loop that doesn't form rectangle loop
1733 nest with LOOP. */
1735 static bool
1736 proper_loop_form_for_interchange (class loop *loop, class loop **min_outer)
1738 edge e0, e1, exit;
1740 /* Don't interchange if loop has unsupported information for the moment. */
1741 if (loop->safelen > 0
1742 || loop->constraints != 0
1743 || loop->can_be_parallel
1744 || loop->dont_vectorize
1745 || loop->force_vectorize
1746 || loop->in_oacc_kernels_region
1747 || loop->orig_loop_num != 0
1748 || loop->simduid != NULL_TREE)
1749 return false;
1751 /* Don't interchange if outer loop has basic block other than header, exit
1752 and latch. */
1753 if (loop->inner != NULL
1754 && loop->num_nodes != loop->inner->num_nodes + 3)
1755 return false;
1757 if ((exit = single_dom_exit (loop)) == NULL)
1758 return false;
1760 /* Check control flow on loop header/exit blocks. */
1761 if (loop->header == exit->src
1762 && (EDGE_COUNT (loop->header->preds) != 2
1763 || EDGE_COUNT (loop->header->succs) != 2))
1764 return false;
1765 else if (loop->header != exit->src
1766 && (EDGE_COUNT (loop->header->preds) != 2
1767 || !single_succ_p (loop->header)
1768 || unsupported_edge (single_succ_edge (loop->header))
1769 || EDGE_COUNT (exit->src->succs) != 2
1770 || !single_pred_p (exit->src)
1771 || unsupported_edge (single_pred_edge (exit->src))))
1772 return false;
1774 e0 = EDGE_PRED (loop->header, 0);
1775 e1 = EDGE_PRED (loop->header, 1);
1776 if (unsupported_edge (e0) || unsupported_edge (e1)
1777 || (e0->src != loop->latch && e1->src != loop->latch)
1778 || (e0->src->loop_father == loop && e1->src->loop_father == loop))
1779 return false;
1781 e0 = EDGE_SUCC (exit->src, 0);
1782 e1 = EDGE_SUCC (exit->src, 1);
1783 if (unsupported_edge (e0) || unsupported_edge (e1)
1784 || (e0->dest != loop->latch && e1->dest != loop->latch)
1785 || (e0->dest->loop_father == loop && e1->dest->loop_father == loop))
1786 return false;
1788 /* Don't interchange if any reference is in basic block that doesn't
1789 dominate exit block. */
1790 basic_block *bbs = get_loop_body (loop);
1791 for (unsigned i = 0; i < loop->num_nodes; i++)
1793 basic_block bb = bbs[i];
1795 if (bb->loop_father != loop
1796 || bb == loop->header || bb == exit->src
1797 || dominated_by_p (CDI_DOMINATORS, exit->src, bb))
1798 continue;
1800 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
1801 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
1802 if (gimple_vuse (gsi_stmt (gsi)))
1804 free (bbs);
1805 return false;
1808 free (bbs);
1810 tree niters = number_of_latch_executions (loop);
1811 niters = analyze_scalar_evolution (loop_outer (loop), niters);
1812 if (!niters || chrec_contains_undetermined (niters))
1813 return false;
1815 /* Record the innermost outer loop that doesn't form rectangle loop nest. */
1816 for (loop_p loop2 = loop_outer (loop);
1817 loop2 && flow_loop_nested_p (*min_outer, loop2);
1818 loop2 = loop_outer (loop2))
1820 niters = instantiate_scev (loop_preheader_edge (loop2),
1821 loop_outer (loop), niters);
1822 if (!evolution_function_is_invariant_p (niters, loop2->num))
1824 *min_outer = loop2;
1825 break;
1828 return true;
1831 /* Return true if any two adjacent loops in loop nest [INNERMOST, LOOP_NEST]
1832 should be interchanged by looking into all DATAREFS. */
1834 static bool
1835 should_interchange_loop_nest (class loop *loop_nest, class loop *innermost,
1836 vec<data_reference_p> datarefs)
1838 unsigned idx = loop_depth (innermost) - loop_depth (loop_nest);
1839 gcc_assert (idx > 0);
1841 /* Check if any two adjacent loops should be interchanged. */
1842 for (class loop *loop = innermost;
1843 loop != loop_nest; loop = loop_outer (loop), idx--)
1844 if (should_interchange_loops (idx, idx - 1, datarefs, 0, 0,
1845 loop == innermost, false))
1846 return true;
1848 return false;
1851 /* Given loop nest LOOP_NEST and data references DATAREFS, compute data
1852 dependences for loop interchange and store it in DDRS. Note we compute
1853 dependences directly rather than call generic interface so that we can
1854 return on unknown dependence instantly. */
1856 static bool
1857 tree_loop_interchange_compute_ddrs (vec<loop_p> loop_nest,
1858 vec<data_reference_p> datarefs,
1859 vec<ddr_p> *ddrs)
1861 struct data_reference *a, *b;
1862 class loop *innermost = loop_nest.last ();
1864 for (unsigned i = 0; datarefs.iterate (i, &a); ++i)
1866 bool a_outer_p = gimple_bb (DR_STMT (a))->loop_father != innermost;
1867 for (unsigned j = i + 1; datarefs.iterate (j, &b); ++j)
1868 if (DR_IS_WRITE (a) || DR_IS_WRITE (b))
1870 bool b_outer_p = gimple_bb (DR_STMT (b))->loop_father != innermost;
1871 /* Don't support multiple write references in outer loop. */
1872 if (a_outer_p && b_outer_p && DR_IS_WRITE (a) && DR_IS_WRITE (b))
1873 return false;
1875 ddr_p ddr = initialize_data_dependence_relation (a, b, loop_nest);
1876 ddrs->safe_push (ddr);
1877 compute_affine_dependence (ddr, loop_nest[0]);
1879 /* Give up if ddr is unknown dependence or classic direct vector
1880 is not available. */
1881 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
1882 || (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
1883 && DDR_NUM_DIR_VECTS (ddr) == 0))
1884 return false;
1886 /* If either data references is in outer loop of nest, we require
1887 no dependence here because the data reference need to be moved
1888 into inner loop during interchange. */
1889 if (a_outer_p && b_outer_p
1890 && operand_equal_p (DR_REF (a), DR_REF (b), 0))
1891 continue;
1892 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1893 && (a_outer_p || b_outer_p))
1894 return false;
1898 return true;
1901 /* Prune DATAREFS by removing any data reference not inside of LOOP. */
1903 static inline void
1904 prune_datarefs_not_in_loop (class loop *loop, vec<data_reference_p> datarefs)
1906 unsigned i, j;
1907 struct data_reference *dr;
1909 for (i = 0, j = 0; datarefs.iterate (i, &dr); ++i)
1911 if (flow_bb_inside_loop_p (loop, gimple_bb (DR_STMT (dr))))
1912 datarefs[j++] = dr;
1913 else
1915 if (dr->aux)
1917 DR_ACCESS_STRIDE (dr)->release ();
1918 delete (vec<tree> *) dr->aux;
1920 free_data_ref (dr);
1923 datarefs.truncate (j);
1926 /* Find and store data references in DATAREFS for LOOP nest. If there's
1927 difficult data reference in a basic block, we shrink the loop nest to
1928 inner loop of that basic block's father loop. On success, return the
1929 outer loop of the result loop nest. */
1931 static class loop *
1932 prepare_data_references (class loop *loop, vec<data_reference_p> *datarefs)
1934 class loop *loop_nest = loop;
1935 vec<data_reference_p> *bb_refs;
1936 basic_block bb, *bbs = get_loop_body_in_dom_order (loop);
1938 for (unsigned i = 0; i < loop->num_nodes; i++)
1939 bbs[i]->aux = NULL;
1941 /* Find data references for all basic blocks. Shrink loop nest on difficult
1942 data reference. */
1943 for (unsigned i = 0; loop_nest && i < loop->num_nodes; ++i)
1945 bb = bbs[i];
1946 if (!flow_bb_inside_loop_p (loop_nest, bb))
1947 continue;
1949 bb_refs = new vec<data_reference_p> ();
1950 if (find_data_references_in_bb (loop, bb, bb_refs) == chrec_dont_know)
1952 loop_nest = bb->loop_father->inner;
1953 if (loop_nest && !loop_nest->inner)
1954 loop_nest = NULL;
1956 free_data_refs (*bb_refs);
1957 delete bb_refs;
1959 else if (bb_refs->is_empty ())
1961 bb_refs->release ();
1962 delete bb_refs;
1964 else
1965 bb->aux = bb_refs;
1968 /* Collect all data references in loop nest. */
1969 for (unsigned i = 0; i < loop->num_nodes; i++)
1971 bb = bbs[i];
1972 if (!bb->aux)
1973 continue;
1975 bb_refs = (vec<data_reference_p> *) bb->aux;
1976 if (loop_nest && flow_bb_inside_loop_p (loop_nest, bb))
1978 datarefs->safe_splice (*bb_refs);
1979 bb_refs->release ();
1981 else
1982 free_data_refs (*bb_refs);
1984 delete bb_refs;
1985 bb->aux = NULL;
1987 free (bbs);
1989 return loop_nest;
1992 /* Given innermost LOOP, return true if perfect loop nest can be found and
1993 data dependences can be computed. If succeed, record the perfect loop
1994 nest in LOOP_NEST; record all data references in DATAREFS and record all
1995 data dependence relations in DDRS.
1997 We do support a restricted form of imperfect loop nest, i.e, loop nest
1998 with load/store in outer loop initializing/finalizing simple reduction
1999 of the innermost loop. For such outer loop reference, we require that
2000 it has no dependence with others sinve it will be moved to inner loop
2001 in interchange. */
2003 static bool
2004 prepare_perfect_loop_nest (class loop *loop, vec<loop_p> *loop_nest,
2005 vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
2007 class loop *start_loop = NULL, *innermost = loop;
2008 class loop *outermost = loops_for_fn (cfun)->tree_root;
2010 /* Find loop nest from the innermost loop. The outermost is the innermost
2011 outer*/
2012 while (loop->num != 0 && loop->inner == start_loop
2013 && flow_loop_nested_p (outermost, loop))
2015 if (!proper_loop_form_for_interchange (loop, &outermost))
2016 break;
2018 start_loop = loop;
2019 /* If this loop has sibling loop, the father loop won't be in perfect
2020 loop nest. */
2021 if (loop->next != NULL)
2022 break;
2024 loop = loop_outer (loop);
2026 if (!start_loop || !start_loop->inner)
2027 return false;
2029 /* Prepare the data reference vector for the loop nest, pruning outer
2030 loops we cannot handle. */
2031 start_loop = prepare_data_references (start_loop, datarefs);
2032 if (!start_loop
2033 /* Check if there is no data reference. */
2034 || datarefs->is_empty ()
2035 /* Check if there are too many of data references. */
2036 || (int) datarefs->length () > MAX_DATAREFS)
2037 return false;
2039 /* Compute access strides for all data references, pruning outer
2040 loops we cannot analyze refs in. */
2041 start_loop = compute_access_strides (start_loop, innermost, *datarefs);
2042 if (!start_loop)
2043 return false;
2045 /* Check if any interchange is profitable in the loop nest. */
2046 if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
2047 return false;
2049 /* Check if data dependences can be computed for loop nest starting from
2050 start_loop. */
2051 loop = start_loop;
2052 do {
2053 loop_nest->truncate (0);
2055 if (loop != start_loop)
2056 prune_datarefs_not_in_loop (start_loop, *datarefs);
2058 if (find_loop_nest (start_loop, loop_nest)
2059 && tree_loop_interchange_compute_ddrs (*loop_nest, *datarefs, ddrs))
2061 if (dump_file && (dump_flags & TDF_DETAILS))
2062 fprintf (dump_file,
2063 "\nConsider loop interchange for loop_nest<%d - %d>\n",
2064 start_loop->num, innermost->num);
2066 if (loop != start_loop)
2067 prune_access_strides_not_in_loop (start_loop, innermost, *datarefs);
2069 if (dump_file && (dump_flags & TDF_DETAILS))
2070 dump_access_strides (*datarefs);
2072 return true;
2075 free_dependence_relations (*ddrs);
2076 *ddrs = vNULL;
2077 /* Try to compute data dependences with the outermost loop stripped. */
2078 loop = start_loop;
2079 start_loop = start_loop->inner;
2080 } while (start_loop && start_loop->inner);
2082 return false;
2085 /* Main entry for loop interchange pass. */
2087 unsigned int
2088 pass_linterchange::execute (function *fun)
2090 if (number_of_loops (fun) <= 2)
2091 return 0;
2093 bool changed_p = false;
2094 for (auto loop : loops_list (cfun, LI_ONLY_INNERMOST))
2096 vec<loop_p> loop_nest = vNULL;
2097 vec<data_reference_p> datarefs = vNULL;
2098 vec<ddr_p> ddrs = vNULL;
2099 if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
2101 tree_loop_interchange loop_interchange (loop_nest);
2102 changed_p |= loop_interchange.interchange (datarefs, ddrs);
2104 free_dependence_relations (ddrs);
2105 free_data_refs_with_aux (datarefs);
2106 loop_nest.release ();
2109 if (changed_p)
2111 unsigned todo = TODO_update_ssa_only_virtuals;
2112 todo |= loop_invariant_motion_in_fun (cfun, false);
2113 scev_reset ();
2114 return todo;
2116 return 0;
2119 } // anon namespace
2121 gimple_opt_pass *
2122 make_pass_linterchange (gcc::context *ctxt)
2124 return new pass_linterchange (ctxt);