* gcc-interface/trans.c (Subprogram_Body_to_gnu): Initialize locus.
[official-gcc.git] / gcc / gimple-loop-interchange.cc
blob1d1cf96c81236943cee5cc04bf3ca515916be1dc
1 /* Loop interchange.
2 Copyright (C) 2017 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "is-a.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "tree-pass.h"
29 #include "ssa.h"
30 #include "gimple-pretty-print.h"
31 #include "fold-const.h"
32 #include "gimplify.h"
33 #include "gimple-iterator.h"
34 #include "gimplify-me.h"
35 #include "cfgloop.h"
36 #include "params.h"
37 #include "tree-ssa.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-ssa-loop-manip.h"
40 #include "tree-ssa-loop-niter.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-dce.h"
43 #include "tree-data-ref.h"
44 #include "tree-vectorizer.h"
46 /* This pass performs loop interchange: for example, the loop nest
48 for (int j = 0; j < N; j++)
49 for (int k = 0; k < N; k++)
50 for (int i = 0; i < N; i++)
51 c[i][j] = c[i][j] + a[i][k]*b[k][j];
53 is transformed to
55 for (int i = 0; i < N; i++)
56 for (int j = 0; j < N; j++)
57 for (int k = 0; k < N; k++)
58 c[i][j] = c[i][j] + a[i][k]*b[k][j];
60 This pass implements loop interchange in the following steps:
62 1) Find perfect loop nest for each innermost loop and compute data
63 dependence relations for it. For above example, loop nest is
64 <loop_j, loop_k, loop_i>.
65 2) From innermost to outermost loop, this pass tries to interchange
66 each loop pair. For above case, it firstly tries to interchange
67 <loop_k, loop_i> and loop nest becomes <loop_j, loop_i, loop_k>.
68 Then it tries to interchange <loop_j, loop_i> and loop nest becomes
69 <loop_i, loop_j, loop_k>. The overall effect is to move innermost
70 loop to the outermost position. For loop pair <loop_i, loop_j>
71 to be interchanged, we:
72 3) Check if data dependence relations are valid for loop interchange.
73 4) Check if both loops can be interchanged in terms of transformation.
74 5) Check if interchanging the two loops is profitable.
75 6) Interchange the two loops by mapping induction variables.
77 This pass also handles reductions in loop nest. So far we only support
78 simple reduction of inner loop and double reduction of the loop nest. */
80 /* Maximum number of stmts in each loop that should be interchanged. */
81 #define MAX_NUM_STMT (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS))
82 /* Maximum number of data references in loop nest. */
83 #define MAX_DATAREFS (PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
85 /* Comparison ratio of access stride between inner/outer loops to be
86 interchanged. This is the minimum stride ratio for loop interchange
87 to be profitable. */
88 #define OUTER_STRIDE_RATIO (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO))
89 /* The same as above, but we require higher ratio for interchanging the
90 innermost two loops. */
91 #define INNER_STRIDE_RATIO ((OUTER_STRIDE_RATIO) + 1)
93 /* Vector of strides that DR accesses in each level loop of a loop nest. */
94 #define DR_ACCESS_STRIDE(dr) ((vec<tree> *) dr->aux)
96 /* Structure recording loop induction variable. */
97 typedef struct induction
99 /* IV itself. */
100 tree var;
101 /* IV's initializing value, which is the init arg of the IV PHI node. */
102 tree init_val;
103 /* IV's initializing expr, which is (the expanded result of) init_val. */
104 tree init_expr;
105 /* IV's step. */
106 tree step;
107 } *induction_p;
109 /* Enum type for loop reduction variable. */
110 enum reduction_type
112 UNKNOWN_RTYPE = 0,
113 SIMPLE_RTYPE,
114 DOUBLE_RTYPE
117 /* Structure recording loop reduction variable. */
118 typedef struct reduction
120 /* Reduction itself. */
121 tree var;
122 /* PHI node defining reduction variable. */
123 gphi *phi;
124 /* Init and next variables of the reduction. */
125 tree init;
126 tree next;
127 /* Lcssa PHI node if reduction is used outside of its definition loop. */
128 gphi *lcssa_phi;
129 /* Stmts defining init and next. */
130 gimple *producer;
131 gimple *consumer;
132 /* If init is loaded from memory, this is the loading memory reference. */
133 tree init_ref;
134 /* If reduction is finally stored to memory, this is the stored memory
135 reference. */
136 tree fini_ref;
137 enum reduction_type type;
138 } *reduction_p;
141 /* Dump reduction RE. */
143 static void
144 dump_reduction (reduction_p re)
146 if (re->type == SIMPLE_RTYPE)
147 fprintf (dump_file, " Simple reduction: ");
148 else if (re->type == DOUBLE_RTYPE)
149 fprintf (dump_file, " Double reduction: ");
150 else
151 fprintf (dump_file, " Unknown reduction: ");
153 print_gimple_stmt (dump_file, re->phi, 0);
156 /* Dump LOOP's induction IV. */
157 static void
158 dump_induction (struct loop *loop, induction_p iv)
160 fprintf (dump_file, " Induction: ");
161 print_generic_expr (dump_file, iv->var, TDF_SLIM);
162 fprintf (dump_file, " = {");
163 print_generic_expr (dump_file, iv->init_expr, TDF_SLIM);
164 fprintf (dump_file, ", ");
165 print_generic_expr (dump_file, iv->step, TDF_SLIM);
166 fprintf (dump_file, "}_%d\n", loop->num);
169 /* Loop candidate for interchange. */
171 struct loop_cand
173 loop_cand (struct loop *, struct loop *);
174 ~loop_cand ();
176 reduction_p find_reduction_by_stmt (gimple *);
177 void classify_simple_reduction (reduction_p);
178 bool analyze_iloop_reduction_var (tree);
179 bool analyze_oloop_reduction_var (loop_cand *, tree);
180 bool analyze_induction_var (tree, tree);
181 bool analyze_carried_vars (loop_cand *);
182 bool analyze_lcssa_phis (void);
183 bool can_interchange_p (loop_cand *);
184 bool supported_operations (basic_block, loop_cand *, int *);
185 void undo_simple_reduction (reduction_p, bitmap);
187 /* The loop itself. */
188 struct loop *m_loop;
189 /* The outer loop for interchange. It equals to loop if this loop cand
190 itself represents the outer loop. */
191 struct loop *m_outer;
192 /* Vector of induction variables in loop. */
193 vec<induction_p> m_inductions;
194 /* Vector of reduction variables in loop. */
195 vec<reduction_p> m_reductions;
196 /* Lcssa PHI nodes of this loop. */
197 vec<gphi *> m_lcssa_nodes;
198 /* Single exit edge of this loop. */
199 edge m_exit;
200 /* Basic blocks of this loop. */
201 basic_block *m_bbs;
204 /* Constructor. */
206 loop_cand::loop_cand (struct loop *loop, struct loop *outer)
207 : m_loop (loop), m_outer (outer),
208 m_exit (single_exit (loop)), m_bbs (get_loop_body (loop))
210 m_inductions.create (3);
211 m_reductions.create (3);
212 m_lcssa_nodes.create (3);
215 /* Destructor. */
217 loop_cand::~loop_cand ()
219 induction_p iv;
220 for (unsigned i = 0; m_inductions.iterate (i, &iv); ++i)
221 free (iv);
223 reduction_p re;
224 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
225 free (re);
227 m_inductions.release ();
228 m_reductions.release ();
229 m_lcssa_nodes.release ();
230 free (m_bbs);
233 /* Return single use stmt of VAR in LOOP, otherwise return NULL. */
235 static gimple *
236 single_use_in_loop (tree var, struct loop *loop)
238 gimple *stmt, *res = NULL;
239 use_operand_p use_p;
240 imm_use_iterator iterator;
242 FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
244 stmt = USE_STMT (use_p);
245 if (is_gimple_debug (stmt))
246 continue;
248 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
249 continue;
251 if (res)
252 return NULL;
254 res = stmt;
256 return res;
259 /* Return true if E is unsupported in loop interchange, i.e, E is a complex
260 edge or part of irreducible loop. */
262 static inline bool
263 unsupported_edge (edge e)
265 return (e->flags & (EDGE_COMPLEX | EDGE_IRREDUCIBLE_LOOP));
268 /* Return the reduction if STMT is one of its lcssa PHI, producer or consumer
269 stmt. */
271 reduction_p
272 loop_cand::find_reduction_by_stmt (gimple *stmt)
274 gphi *phi = dyn_cast <gphi *> (stmt);
275 reduction_p re;
277 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
278 if ((phi != NULL && phi == re->lcssa_phi)
279 || (stmt == re->producer || stmt == re->consumer))
280 return re;
282 return NULL;
285 /* Return true if all stmts in BB can be supported by loop interchange,
286 otherwise return false. ILOOP is not NULL if this loop_cand is the
287 outer loop in loop nest. Add the number of supported statements to
288 NUM_STMTS. */
290 bool
291 loop_cand::supported_operations (basic_block bb, loop_cand *iloop,
292 int *num_stmts)
294 int bb_num_stmts = 0;
295 gphi_iterator psi;
296 gimple_stmt_iterator gsi;
298 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
300 gimple *stmt = gsi_stmt (gsi);
301 if (is_gimple_debug (stmt))
302 continue;
304 if (gimple_has_side_effects (stmt))
305 return false;
307 bb_num_stmts++;
308 if (gcall *call = dyn_cast <gcall *> (stmt))
310 /* In basic block of outer loop, the call should be cheap since
311 it will be moved to inner loop. */
312 if (iloop != NULL
313 && !gimple_inexpensive_call_p (call))
314 return false;
315 continue;
318 if (!iloop || !gimple_vuse (stmt))
319 continue;
321 /* Support stmt accessing memory in outer loop only if it is for inner
322 loop's reduction. */
323 if (iloop->find_reduction_by_stmt (stmt))
324 continue;
326 tree lhs;
327 /* Support loop invariant memory reference if it's only used once by
328 inner loop. */
329 /* ??? How's this checking for invariantness? */
330 if (gimple_assign_single_p (stmt)
331 && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE
332 && TREE_CODE (lhs) == SSA_NAME
333 && single_use_in_loop (lhs, iloop->m_loop))
334 continue;
336 return false;
338 *num_stmts += bb_num_stmts;
340 /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer
341 loop's header, or PHI nodes in dest bb of inner loop's exit edge. */
342 if (!iloop || bb == m_loop->header
343 || bb == iloop->m_exit->dest)
344 return true;
346 /* Don't allow any other PHI nodes. */
347 for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
348 if (!virtual_operand_p (PHI_RESULT (psi.phi ())))
349 return false;
351 return true;
354 /* Return true if current loop_cand be interchanged. ILOOP is not NULL if
355 current loop_cand is outer loop in loop nest. */
357 bool
358 loop_cand::can_interchange_p (loop_cand *iloop)
360 /* For now we only support at most one reduction. */
361 unsigned allowed_reduction_num = 1;
363 /* Only support reduction if the loop nest to be interchanged is the
364 innermostin two loops. */
365 if ((iloop == NULL && m_loop->inner != NULL)
366 || (iloop != NULL && iloop->m_loop->inner != NULL))
367 allowed_reduction_num = 0;
369 if (m_reductions.length () > allowed_reduction_num
370 || (m_reductions.length () == 1
371 && m_reductions[0]->type == UNKNOWN_RTYPE))
372 return false;
374 /* Only support lcssa PHI node which is for reduction. */
375 if (m_lcssa_nodes.length () > allowed_reduction_num)
376 return false;
378 int num_stmts = 0;
379 /* Check basic blocks other than loop header/exit. */
380 for (unsigned i = 0; i < m_loop->num_nodes; i++)
382 basic_block bb = m_bbs[i];
384 /* Skip basic blocks of inner loops. */
385 if (bb->loop_father != m_loop)
386 continue;
388 /* Check if basic block has any unsupported operation. */
389 if (!supported_operations (bb, iloop, &num_stmts))
390 return false;
392 /* Check if loop has too many stmts. */
393 if (num_stmts > MAX_NUM_STMT)
394 return false;
397 return true;
400 /* Programmers and optimizers (like loop store motion) may optimize code:
402 for (int i = 0; i < N; i++)
403 for (int j = 0; j < N; j++)
404 a[i] += b[j][i] * c[j][i];
406 into reduction:
408 for (int i = 0; i < N; i++)
410 // producer. Note sum can be intitialized to a constant.
411 int sum = a[i];
412 for (int j = 0; j < N; j++)
414 sum += b[j][i] * c[j][i];
416 // consumer.
417 a[i] = sum;
420 The result code can't be interchanged without undoing the optimization.
421 This function classifies this kind reduction and records information so
422 that we can undo the store motion during interchange. */
424 void
425 loop_cand::classify_simple_reduction (reduction_p re)
427 gimple *producer, *consumer;
429 /* Check init variable of reduction and how it is initialized. */
430 if (TREE_CODE (re->init) == SSA_NAME)
432 producer = SSA_NAME_DEF_STMT (re->init);
433 re->producer = producer;
434 basic_block bb = gimple_bb (producer);
435 if (!bb || bb->loop_father != m_outer)
436 return;
438 if (!gimple_assign_load_p (producer))
439 return;
441 re->init_ref = gimple_assign_rhs1 (producer);
443 else if (!CONSTANT_CLASS_P (re->init))
444 return;
446 /* Check how reduction variable is used. */
447 consumer = single_use_in_loop (PHI_RESULT (re->lcssa_phi), m_outer);
448 if (!consumer
449 || !gimple_store_p (consumer))
450 return;
452 re->fini_ref = gimple_get_lhs (consumer);
453 re->consumer = consumer;
455 /* Simple reduction with constant initializer. */
456 if (!re->init_ref)
458 gcc_assert (CONSTANT_CLASS_P (re->init));
459 re->init_ref = unshare_expr (re->fini_ref);
462 /* Require memory references in producer and consumer are the same so
463 that we can undo reduction during interchange. */
464 if (re->init_ref && !operand_equal_p (re->init_ref, re->fini_ref, 0))
465 return;
467 re->type = SIMPLE_RTYPE;
470 /* Analyze reduction variable VAR for inner loop of the loop nest to be
471 interchanged. Return true if analysis succeeds. */
473 bool
474 loop_cand::analyze_iloop_reduction_var (tree var)
476 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
477 gphi *lcssa_phi = NULL, *use_phi;
478 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
479 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
480 reduction_p re;
481 gimple *stmt, *next_def, *single_use = NULL;
482 use_operand_p use_p;
483 imm_use_iterator iterator;
485 if (TREE_CODE (next) != SSA_NAME)
486 return false;
488 next_def = SSA_NAME_DEF_STMT (next);
489 basic_block bb = gimple_bb (next_def);
490 if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
491 return false;
493 /* In restricted reduction, the var is (and must be) used in defining
494 the updated var. The process can be depicted as below:
496 var ;; = PHI<init, next>
500 +---------------------+
501 | reduction operators | <-- other operands
502 +---------------------+
506 next
508 In terms loop interchange, we don't change how NEXT is computed based
509 on VAR and OTHER OPERANDS. In case of double reduction in loop nest
510 to be interchanged, we don't changed it at all. In the case of simple
511 reduction in inner loop, we only make change how VAR/NEXT is loaded or
512 stored. With these conditions, we can relax restrictions on reduction
513 in a way that reduction operation is seen as black box. In general,
514 we can ignore reassociation of reduction operator; we can handle fake
515 reductions in which VAR is not even used to compute NEXT. */
516 if (! single_imm_use (var, &use_p, &single_use)
517 || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
518 return false;
520 /* Check the reduction operation. We require a left-associative operation.
521 For FP math we also need to be allowed to associate operations. */
522 if (gassign *ass = dyn_cast <gassign *> (single_use))
524 enum tree_code code = gimple_assign_rhs_code (ass);
525 if (! (associative_tree_code (code)
526 || (code == MINUS_EXPR
527 && use_p->use == gimple_assign_rhs1_ptr (ass)))
528 || (FLOAT_TYPE_P (TREE_TYPE (var))
529 && ! flag_associative_math))
530 return false;
532 else
533 return false;
535 /* Handle and verify a series of stmts feeding the reduction op. */
536 if (single_use != next_def
537 && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next,
538 gimple_assign_rhs_code (single_use)))
539 return false;
541 /* Only support cases in which INIT is used in inner loop. */
542 if (TREE_CODE (init) == SSA_NAME)
543 FOR_EACH_IMM_USE_FAST (use_p, iterator, init)
545 stmt = USE_STMT (use_p);
546 if (is_gimple_debug (stmt))
547 continue;
549 if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
550 return false;
553 FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
555 stmt = USE_STMT (use_p);
556 if (is_gimple_debug (stmt))
557 continue;
559 /* Or else it's used in PHI itself. */
560 use_phi = dyn_cast <gphi *> (stmt);
561 if (use_phi == phi)
562 continue;
564 if (use_phi != NULL
565 && lcssa_phi == NULL
566 && gimple_bb (stmt) == m_exit->dest
567 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
568 lcssa_phi = use_phi;
569 else
570 return false;
572 if (!lcssa_phi)
573 return false;
575 re = XCNEW (struct reduction);
576 re->var = var;
577 re->init = init;
578 re->next = next;
579 re->phi = phi;
580 re->lcssa_phi = lcssa_phi;
582 classify_simple_reduction (re);
584 if (dump_file && (dump_flags & TDF_DETAILS))
585 dump_reduction (re);
587 m_reductions.safe_push (re);
588 return true;
591 /* Analyze reduction variable VAR for outer loop of the loop nest to be
592 interchanged. ILOOP is not NULL and points to inner loop. For the
593 moment, we only support double reduction for outer loop, like:
595 for (int i = 0; i < n; i++)
597 int sum = 0;
599 for (int j = 0; j < n; j++) // outer loop
600 for (int k = 0; k < n; k++) // inner loop
601 sum += a[i][k]*b[k][j];
603 s[i] = sum;
606 Note the innermost two loops are the loop nest to be interchanged.
607 Return true if analysis succeeds. */
609 bool
610 loop_cand::analyze_oloop_reduction_var (loop_cand *iloop, tree var)
612 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
613 gphi *lcssa_phi = NULL, *use_phi;
614 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
615 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
616 reduction_p re;
617 gimple *stmt, *next_def;
618 use_operand_p use_p;
619 imm_use_iterator iterator;
621 if (TREE_CODE (next) != SSA_NAME)
622 return false;
624 next_def = SSA_NAME_DEF_STMT (next);
625 basic_block bb = gimple_bb (next_def);
626 if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
627 return false;
629 /* Find inner loop's simple reduction that uses var as initializer. */
630 reduction_p inner_re = NULL;
631 for (unsigned i = 0; iloop->m_reductions.iterate (i, &inner_re); ++i)
632 if (inner_re->init == var || operand_equal_p (inner_re->init, var, 0))
633 break;
635 if (inner_re == NULL
636 || inner_re->type != UNKNOWN_RTYPE
637 || inner_re->producer != phi)
638 return false;
640 /* In case of double reduction, outer loop's reduction should be updated
641 by inner loop's simple reduction. */
642 if (next_def != inner_re->lcssa_phi)
643 return false;
645 /* Outer loop's reduction should only be used to initialize inner loop's
646 simple reduction. */
647 if (! single_imm_use (var, &use_p, &stmt)
648 || stmt != inner_re->phi)
649 return false;
651 /* Check this reduction is correctly used outside of loop via lcssa phi. */
652 FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
654 stmt = USE_STMT (use_p);
655 if (is_gimple_debug (stmt))
656 continue;
658 /* Or else it's used in PHI itself. */
659 use_phi = dyn_cast <gphi *> (stmt);
660 if (use_phi == phi)
661 continue;
663 if (lcssa_phi == NULL
664 && use_phi != NULL
665 && gimple_bb (stmt) == m_exit->dest
666 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
667 lcssa_phi = use_phi;
668 else
669 return false;
671 if (!lcssa_phi)
672 return false;
674 re = XCNEW (struct reduction);
675 re->var = var;
676 re->init = init;
677 re->next = next;
678 re->phi = phi;
679 re->lcssa_phi = lcssa_phi;
680 re->type = DOUBLE_RTYPE;
681 inner_re->type = DOUBLE_RTYPE;
683 if (dump_file && (dump_flags & TDF_DETAILS))
684 dump_reduction (re);
686 m_reductions.safe_push (re);
687 return true;
690 /* Return true if VAR is induction variable of current loop whose scev is
691 specified by CHREC. */
693 bool
694 loop_cand::analyze_induction_var (tree var, tree chrec)
696 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
697 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
699 /* Var is loop invariant, though it's unlikely to happen. */
700 if (tree_does_not_contain_chrecs (chrec))
702 struct induction *iv = XCNEW (struct induction);
703 iv->var = var;
704 iv->init_val = init;
705 iv->init_expr = chrec;
706 iv->step = build_int_cst (TREE_TYPE (chrec), 0);
707 m_inductions.safe_push (iv);
708 return true;
711 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC
712 || CHREC_VARIABLE (chrec) != (unsigned) m_loop->num
713 || tree_contains_chrecs (CHREC_LEFT (chrec), NULL)
714 || tree_contains_chrecs (CHREC_RIGHT (chrec), NULL))
715 return false;
717 struct induction *iv = XCNEW (struct induction);
718 iv->var = var;
719 iv->init_val = init;
720 iv->init_expr = CHREC_LEFT (chrec);
721 iv->step = CHREC_RIGHT (chrec);
723 if (dump_file && (dump_flags & TDF_DETAILS))
724 dump_induction (m_loop, iv);
726 m_inductions.safe_push (iv);
727 return true;
730 /* Return true if all loop carried variables defined in loop header can
731 be successfully analyzed. */
733 bool
734 loop_cand::analyze_carried_vars (loop_cand *iloop)
736 edge e = loop_preheader_edge (m_outer);
737 gphi_iterator gsi;
739 if (dump_file && (dump_flags & TDF_DETAILS))
740 fprintf (dump_file, "\nLoop(%d) carried vars:\n", m_loop->num);
742 for (gsi = gsi_start_phis (m_loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
744 gphi *phi = gsi.phi ();
746 tree var = PHI_RESULT (phi);
747 if (virtual_operand_p (var))
748 continue;
750 tree chrec = analyze_scalar_evolution (m_loop, var);
751 chrec = instantiate_scev (e, m_loop, chrec);
753 /* Analyze var as reduction variable. */
754 if (chrec_contains_undetermined (chrec)
755 || chrec_contains_symbols_defined_in_loop (chrec, m_outer->num))
757 if (iloop && !analyze_oloop_reduction_var (iloop, var))
758 return false;
759 if (!iloop && !analyze_iloop_reduction_var (var))
760 return false;
762 /* Analyze var as induction variable. */
763 else if (!analyze_induction_var (var, chrec))
764 return false;
767 return true;
770 /* Return TRUE if loop closed PHI nodes can be analyzed successfully. */
772 bool
773 loop_cand::analyze_lcssa_phis (void)
775 gphi_iterator gsi;
776 for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
778 gphi *phi = gsi.phi ();
780 if (virtual_operand_p (PHI_RESULT (phi)))
781 continue;
783 /* TODO: We only support lcssa phi for reduction for now. */
784 if (!find_reduction_by_stmt (phi))
785 return false;
788 return true;
791 /* CONSUMER is a stmt in BB storing reduction result into memory object.
792 When the reduction is intialized from constant value, we need to add
793 a stmt loading from the memory object to target basic block in inner
794 loop during undoing the reduction. Problem is that memory reference
795 may use ssa variables not dominating the target basic block. This
796 function finds all stmts on which CONSUMER depends in basic block BB,
797 records and returns them via STMTS. */
799 static void
800 find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer)
802 auto_vec<gimple *, 4> worklist;
803 use_operand_p use_p;
804 ssa_op_iter iter;
805 gimple *stmt, *def_stmt;
806 gimple_stmt_iterator gsi;
808 /* First clear flag for stmts in bb. */
809 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
810 gimple_set_plf (gsi_stmt (gsi), GF_PLF_1, false);
812 /* DFS search all depended stmts in bb and mark flag for these stmts. */
813 worklist.safe_push (consumer);
814 while (!worklist.is_empty ())
816 stmt = worklist.pop ();
817 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
819 def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
821 if (is_a <gphi *> (def_stmt)
822 || gimple_bb (def_stmt) != bb
823 || gimple_plf (def_stmt, GF_PLF_1))
824 continue;
826 worklist.safe_push (def_stmt);
828 gimple_set_plf (stmt, GF_PLF_1, true);
830 for (gsi = gsi_start_nondebug_bb (bb);
831 !gsi_end_p (gsi) && (stmt = gsi_stmt (gsi)) != consumer;)
833 /* Move dep stmts to sequence STMTS. */
834 if (gimple_plf (stmt, GF_PLF_1))
836 gsi_remove (&gsi, false);
837 gimple_seq_add_stmt_without_update (stmts, stmt);
839 else
840 gsi_next_nondebug (&gsi);
844 /* User can write, optimizers can generate simple reduction RE for inner
845 loop. In order to make interchange valid, we have to undo reduction by
846 moving producer and consumer stmts into the inner loop. For example,
847 below code:
849 init = MEM_REF[idx]; //producer
850 loop:
851 var = phi<init, next>
852 next = var op ...
853 reduc_sum = phi<next>
854 MEM_REF[idx] = reduc_sum //consumer
856 is transformed into:
858 loop:
859 new_var = MEM_REF[idx]; //producer after moving
860 next = new_var op ...
861 MEM_REF[idx] = next; //consumer after moving
863 Note if the reduction variable is initialized to constant, like:
865 var = phi<0.0, next>
867 we compute new_var as below:
869 loop:
870 tmp = MEM_REF[idx];
871 new_var = !first_iteration ? tmp : 0.0;
873 so that the initial const is used in the first iteration of loop. Also
874 record ssa variables for dead code elimination in DCE_SEEDS. */
876 void
877 loop_cand::undo_simple_reduction (reduction_p re, bitmap dce_seeds)
879 gimple *stmt;
880 gimple_stmt_iterator from, to = gsi_after_labels (m_loop->header);
881 gimple_seq stmts = NULL;
882 tree new_var;
884 /* Prepare the initialization stmts and insert it to inner loop. */
885 if (re->producer != NULL)
887 gimple_set_vuse (re->producer, NULL_TREE);
888 from = gsi_for_stmt (re->producer);
889 gsi_remove (&from, false);
890 gimple_seq_add_stmt_without_update (&stmts, re->producer);
891 new_var = re->init;
893 else
895 /* Find all stmts on which expression "MEM_REF[idx]" depends. */
896 find_deps_in_bb_for_stmt (&stmts, gimple_bb (re->consumer), re->consumer);
897 /* Because we generate new stmt loading from the MEM_REF to TMP. */
898 tree cond, tmp = copy_ssa_name (re->var);
899 stmt = gimple_build_assign (tmp, re->init_ref);
900 gimple_seq_add_stmt_without_update (&stmts, stmt);
902 /* Init new_var to MEM_REF or CONST depending on if it is the first
903 iteration. */
904 induction_p iv = m_inductions[0];
905 cond = fold_build2 (NE_EXPR, boolean_type_node, iv->var, iv->init_val);
906 new_var = copy_ssa_name (re->var);
907 stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init);
908 gimple_seq_add_stmt_without_update (&stmts, stmt);
910 gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT);
912 /* Replace all uses of reduction var with new variable. */
913 use_operand_p use_p;
914 imm_use_iterator iterator;
915 FOR_EACH_IMM_USE_STMT (stmt, iterator, re->var)
917 FOR_EACH_IMM_USE_ON_STMT (use_p, iterator)
918 SET_USE (use_p, new_var);
920 update_stmt (stmt);
923 /* Move consumer stmt into inner loop, just after reduction next's def. */
924 unlink_stmt_vdef (re->consumer);
925 release_ssa_name (gimple_vdef (re->consumer));
926 gimple_set_vdef (re->consumer, NULL_TREE);
927 gimple_set_vuse (re->consumer, NULL_TREE);
928 gimple_assign_set_rhs1 (re->consumer, re->next);
929 from = gsi_for_stmt (re->consumer);
930 to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next));
931 gsi_move_after (&from, &to);
933 /* Mark the reduction variables for DCE. */
934 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (re->var));
935 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (PHI_RESULT (re->lcssa_phi)));
938 /* Free DATAREFS and its auxiliary memory. */
940 static void
941 free_data_refs_with_aux (vec<data_reference_p> datarefs)
943 data_reference_p dr;
944 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
945 if (dr->aux != NULL)
947 DR_ACCESS_STRIDE (dr)->release ();
948 delete (vec<tree> *) dr->aux;
951 free_data_refs (datarefs);
954 /* Class for loop interchange transformation. */
956 class tree_loop_interchange
958 public:
959 tree_loop_interchange (vec<struct loop *> loop_nest)
960 : m_loop_nest (loop_nest), m_niters_iv_var (NULL_TREE),
961 m_dce_seeds (BITMAP_ALLOC (NULL)) { }
962 ~tree_loop_interchange () { BITMAP_FREE (m_dce_seeds); }
963 bool interchange (vec<data_reference_p>, vec<ddr_p>);
965 private:
966 void update_data_info (unsigned, unsigned, vec<data_reference_p>, vec<ddr_p>);
967 bool valid_data_dependences (unsigned, unsigned, vec<ddr_p>);
968 void interchange_loops (loop_cand &, loop_cand &);
969 void map_inductions_to_loop (loop_cand &, loop_cand &);
970 void move_code_to_inner_loop (struct loop *, struct loop *, basic_block *);
972 /* The whole loop nest in which interchange is ongoing. */
973 vec<struct loop *> m_loop_nest;
974 /* We create new IV which is only used in loop's exit condition check.
975 In case of 3-level loop nest interchange, when we interchange the
976 innermost two loops, new IV created in the middle level loop does
977 not need to be preserved in interchanging the outermost two loops
978 later. We record the IV so that it can be skipped. */
979 tree m_niters_iv_var;
980 /* Bitmap of seed variables for dead code elimination after interchange. */
981 bitmap m_dce_seeds;
984 /* Update data refs' access stride and dependence information after loop
985 interchange. I_IDX/O_IDX gives indices of interchanged loops in loop
986 nest. DATAREFS are data references. DDRS are data dependences. */
988 void
989 tree_loop_interchange::update_data_info (unsigned i_idx, unsigned o_idx,
990 vec<data_reference_p> datarefs,
991 vec<ddr_p> ddrs)
993 struct data_reference *dr;
994 struct data_dependence_relation *ddr;
996 /* Update strides of data references. */
997 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
999 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1000 gcc_assert (stride->length () > i_idx);
1001 std::swap ((*stride)[i_idx], (*stride)[o_idx]);
1003 /* Update data dependences. */
1004 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
1005 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1007 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
1009 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
1010 std::swap (dist_vect[i_idx], dist_vect[o_idx]);
1015 /* Check data dependence relations, return TRUE if it's valid to interchange
1016 two loops specified by I_IDX/O_IDX. Theoretically, interchanging the two
1017 loops is valid only if dist vector, after interchanging, doesn't have '>'
1018 as the leftmost non-'=' direction. Practically, this function have been
1019 conservative here by not checking some valid cases. */
1021 bool
1022 tree_loop_interchange::valid_data_dependences (unsigned i_idx, unsigned o_idx,
1023 vec<ddr_p> ddrs)
1025 struct data_dependence_relation *ddr;
1027 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
1029 /* Skip no-dependence case. */
1030 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1031 continue;
1033 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
1035 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
1036 unsigned level = dependence_level (dist_vect, m_loop_nest.length ());
1038 /* If there is no carried dependence. */
1039 if (level == 0)
1040 continue;
1042 level --;
1044 /* If dependence is not carried by any loop in between the two
1045 loops [oloop, iloop] to interchange. */
1046 if (level < o_idx || level > i_idx)
1047 continue;
1049 /* Be conservative, skip case if either direction at i_idx/o_idx
1050 levels is not '=' or '<'. */
1051 if (dist_vect[i_idx] < 0 || dist_vect[o_idx] < 0)
1052 return false;
1056 return true;
1059 /* Interchange two loops specified by ILOOP and OLOOP. */
1061 void
1062 tree_loop_interchange::interchange_loops (loop_cand &iloop, loop_cand &oloop)
1064 reduction_p re;
1065 gimple_stmt_iterator gsi;
1066 tree i_niters, o_niters, var_after;
1068 /* Undo inner loop's simple reduction. */
1069 for (unsigned i = 0; iloop.m_reductions.iterate (i, &re); ++i)
1070 if (re->type != DOUBLE_RTYPE)
1072 if (re->producer)
1073 reset_debug_uses (re->producer);
1075 iloop.undo_simple_reduction (re, m_dce_seeds);
1078 /* Only need to reset debug uses for double reduction. */
1079 for (unsigned i = 0; oloop.m_reductions.iterate (i, &re); ++i)
1081 gcc_assert (re->type == DOUBLE_RTYPE);
1082 reset_debug_uses (SSA_NAME_DEF_STMT (re->var));
1083 reset_debug_uses (SSA_NAME_DEF_STMT (re->next));
1086 /* Prepare niters for both loops. */
1087 struct loop *loop_nest = m_loop_nest[0];
1088 edge instantiate_below = loop_preheader_edge (loop_nest);
1089 gsi = gsi_last_bb (loop_preheader_edge (loop_nest)->src);
1090 i_niters = number_of_latch_executions (iloop.m_loop);
1091 i_niters = analyze_scalar_evolution (loop_outer (iloop.m_loop), i_niters);
1092 i_niters = instantiate_scev (instantiate_below, loop_outer (iloop.m_loop),
1093 i_niters);
1094 i_niters = force_gimple_operand_gsi (&gsi, unshare_expr (i_niters), true,
1095 NULL_TREE, false, GSI_CONTINUE_LINKING);
1096 o_niters = number_of_latch_executions (oloop.m_loop);
1097 if (oloop.m_loop != loop_nest)
1099 o_niters = analyze_scalar_evolution (loop_outer (oloop.m_loop), o_niters);
1100 o_niters = instantiate_scev (instantiate_below, loop_outer (oloop.m_loop),
1101 o_niters);
1103 o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true,
1104 NULL_TREE, false, GSI_CONTINUE_LINKING);
1106 /* Move src's code to tgt loop. This is necessary when src is the outer
1107 loop and tgt is the inner loop. */
1108 move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs);
1110 /* Map outer loop's IV to inner loop, and vice versa. */
1111 map_inductions_to_loop (oloop, iloop);
1112 map_inductions_to_loop (iloop, oloop);
1114 /* Create canonical IV for both loops. Note canonical IV for outer/inner
1115 loop is actually from inner/outer loop. Also we record the new IV
1116 created for the outer loop so that it can be skipped in later loop
1117 interchange. */
1118 create_canonical_iv (oloop.m_loop, oloop.m_exit,
1119 i_niters, &m_niters_iv_var, &var_after);
1120 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1121 create_canonical_iv (iloop.m_loop, iloop.m_exit,
1122 o_niters, NULL, &var_after);
1123 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1125 /* Scrap niters estimation of interchanged loops. */
1126 iloop.m_loop->any_upper_bound = false;
1127 iloop.m_loop->any_likely_upper_bound = false;
1128 free_numbers_of_iterations_estimates (iloop.m_loop);
1129 oloop.m_loop->any_upper_bound = false;
1130 oloop.m_loop->any_likely_upper_bound = false;
1131 free_numbers_of_iterations_estimates (oloop.m_loop);
1133 /* ??? The association between the loop data structure and the
1134 CFG changed, so what was loop N at the source level is now
1135 loop M. We should think of retaining the association or breaking
1136 it fully by creating a new loop instead of re-using the "wrong" one. */
1139 /* Map induction variables of SRC loop to TGT loop. The function firstly
1140 creates the same IV of SRC loop in TGT loop, then deletes the original
1141 IV and re-initialize it using the newly created IV. For example, loop
1142 nest:
1144 for (i = 0; i < N; i++)
1145 for (j = 0; j < M; j++)
1147 //use of i;
1148 //use of j;
1151 will be transformed into:
1153 for (jj = 0; jj < M; jj++)
1154 for (ii = 0; ii < N; ii++)
1156 //use of ii;
1157 //use of jj;
1160 after loop interchange. */
1162 void
1163 tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt)
1165 induction_p iv;
1166 edge e = tgt.m_exit;
1167 gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi;
1169 /* Map source loop's IV to target loop. */
1170 for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i)
1172 gimple *use_stmt, *stmt = SSA_NAME_DEF_STMT (iv->var);
1173 gcc_assert (is_a <gphi *> (stmt));
1175 use_operand_p use_p;
1176 /* Only map original IV to target loop. */
1177 if (m_niters_iv_var != iv->var)
1179 /* Map the IV by creating the same one in target loop. */
1180 tree var_before, var_after;
1181 tree base = unshare_expr (iv->init_expr);
1182 tree step = unshare_expr (iv->step);
1183 create_iv (base, step, SSA_NAME_VAR (iv->var),
1184 tgt.m_loop, &incr_pos, false, &var_before, &var_after);
1185 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_before));
1186 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1188 /* Replace uses of the original IV var with newly created IV var. */
1189 imm_use_iterator imm_iter;
1190 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, iv->var)
1192 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1193 SET_USE (use_p, var_before);
1195 update_stmt (use_stmt);
1199 /* Mark all uses for DCE. */
1200 ssa_op_iter op_iter;
1201 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, op_iter, SSA_OP_USE)
1203 tree use = USE_FROM_PTR (use_p);
1204 if (TREE_CODE (use) == SSA_NAME
1205 && ! SSA_NAME_IS_DEFAULT_DEF (use))
1206 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (use));
1209 /* Delete definition of the original IV in the source loop. */
1210 gsi = gsi_for_stmt (stmt);
1211 remove_phi_node (&gsi, true);
1215 /* Move stmts of outer loop to inner loop. */
1217 void
1218 tree_loop_interchange::move_code_to_inner_loop (struct loop *outer,
1219 struct loop *inner,
1220 basic_block *outer_bbs)
1222 basic_block oloop_exit_bb = single_exit (outer)->src;
1223 gimple_stmt_iterator gsi, to;
1225 for (unsigned i = 0; i < outer->num_nodes; i++)
1227 basic_block bb = outer_bbs[i];
1229 /* Skip basic blocks of inner loop. */
1230 if (flow_bb_inside_loop_p (inner, bb))
1231 continue;
1233 /* Move code from header/latch to header/latch. */
1234 if (bb == outer->header)
1235 to = gsi_after_labels (inner->header);
1236 else if (bb == outer->latch)
1237 to = gsi_after_labels (inner->latch);
1238 else
1239 /* Otherwise, simply move to exit->src. */
1240 to = gsi_last_bb (single_exit (inner)->src);
1242 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
1244 gimple *stmt = gsi_stmt (gsi);
1246 if (oloop_exit_bb == bb
1247 && stmt == gsi_stmt (gsi_last_bb (oloop_exit_bb)))
1249 gsi_next (&gsi);
1250 continue;
1253 if (gimple_vuse (stmt))
1254 gimple_set_vuse (stmt, NULL_TREE);
1255 if (gimple_vdef (stmt))
1257 unlink_stmt_vdef (stmt);
1258 release_ssa_name (gimple_vdef (stmt));
1259 gimple_set_vdef (stmt, NULL_TREE);
1262 reset_debug_uses (stmt);
1263 gsi_move_before (&gsi, &to);
1268 /* Given data reference DR in LOOP_NEST, the function computes DR's access
1269 stride at each level of loop from innermost LOOP to outer. On success,
1270 it saves access stride at each level loop in a vector which is pointed
1271 by DR->aux. For example:
1273 int arr[100][100][100];
1274 for (i = 0; i < 100; i++) ;(DR->aux)strides[0] = 40000
1275 for (j = 100; j > 0; j--) ;(DR->aux)strides[1] = 400
1276 for (k = 0; k < 100; k++) ;(DR->aux)strides[2] = 4
1277 arr[i][j - 1][k] = 0; */
1279 static void
1280 compute_access_stride (struct loop *loop_nest, struct loop *loop,
1281 data_reference_p dr)
1283 vec<tree> *strides = new vec<tree> ();
1284 basic_block bb = gimple_bb (DR_STMT (dr));
1286 while (!flow_bb_inside_loop_p (loop, bb))
1288 strides->safe_push (build_int_cst (sizetype, 0));
1289 loop = loop_outer (loop);
1291 gcc_assert (loop == bb->loop_father);
1293 tree ref = DR_REF (dr);
1294 if (TREE_CODE (ref) == COMPONENT_REF
1295 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1297 /* We can't take address of bitfields. If the bitfield is at constant
1298 offset from the start of the struct, just use address of the
1299 struct, for analysis of the strides that shouldn't matter. */
1300 if (!TREE_OPERAND (ref, 2)
1301 || TREE_CODE (TREE_OPERAND (ref, 2)) == INTEGER_CST)
1302 ref = TREE_OPERAND (ref, 0);
1303 /* Otherwise, if we have a bit field representative, use that. */
1304 else if (DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1))
1305 != NULL_TREE)
1307 tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1));
1308 ref = build3 (COMPONENT_REF, TREE_TYPE (repr), TREE_OPERAND (ref, 0),
1309 repr, TREE_OPERAND (ref, 2));
1311 /* Otherwise punt. */
1312 else
1314 dr->aux = strides;
1315 return;
1318 tree scev_base = build_fold_addr_expr (ref);
1319 tree scev = analyze_scalar_evolution (loop, scev_base);
1320 scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev);
1321 if (! chrec_contains_undetermined (scev))
1323 tree sl = scev;
1324 struct loop *expected = loop;
1325 while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
1327 struct loop *sl_loop = get_chrec_loop (sl);
1328 while (sl_loop != expected)
1330 strides->safe_push (size_int (0));
1331 expected = loop_outer (expected);
1333 strides->safe_push (CHREC_RIGHT (sl));
1334 sl = CHREC_LEFT (sl);
1335 expected = loop_outer (expected);
1337 if (! tree_contains_chrecs (sl, NULL))
1338 while (expected != loop_outer (loop_nest))
1340 strides->safe_push (size_int (0));
1341 expected = loop_outer (expected);
1345 dr->aux = strides;
1348 /* Given loop nest LOOP_NEST with innermost LOOP, the function computes
1349 access strides with respect to each level loop for all data refs in
1350 DATAREFS from inner loop to outer loop. On success, it returns the
1351 outermost loop that access strides can be computed successfully for
1352 all data references. If access strides cannot be computed at least
1353 for two levels of loop for any data reference, it returns NULL. */
1355 static struct loop *
1356 compute_access_strides (struct loop *loop_nest, struct loop *loop,
1357 vec<data_reference_p> datarefs)
1359 unsigned i, j, num_loops = (unsigned) -1;
1360 data_reference_p dr;
1361 vec<tree> *stride;
1363 for (i = 0; datarefs.iterate (i, &dr); ++i)
1365 compute_access_stride (loop_nest, loop, dr);
1366 stride = DR_ACCESS_STRIDE (dr);
1367 if (stride->length () < num_loops)
1369 num_loops = stride->length ();
1370 if (num_loops < 2)
1371 return NULL;
1375 for (i = 0; datarefs.iterate (i, &dr); ++i)
1377 stride = DR_ACCESS_STRIDE (dr);
1378 if (stride->length () > num_loops)
1379 stride->truncate (num_loops);
1381 for (j = 0; j < (num_loops >> 1); ++j)
1382 std::swap ((*stride)[j], (*stride)[num_loops - j - 1]);
1385 loop = superloop_at_depth (loop, loop_depth (loop) + 1 - num_loops);
1386 gcc_assert (loop_nest == loop || flow_loop_nested_p (loop_nest, loop));
1387 return loop;
1390 /* Prune access strides for data references in DATAREFS by removing strides
1391 of loops that isn't in current LOOP_NEST. */
1393 static void
1394 prune_access_strides_not_in_loop (struct loop *loop_nest,
1395 struct loop *innermost,
1396 vec<data_reference_p> datarefs)
1398 data_reference_p dr;
1399 unsigned num_loops = loop_depth (innermost) - loop_depth (loop_nest) + 1;
1400 gcc_assert (num_loops > 1);
1402 /* Block remove strides of loops that is not in current loop nest. */
1403 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
1405 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1406 if (stride->length () > num_loops)
1407 stride->block_remove (0, stride->length () - num_loops);
1411 /* Dump access strides for all DATAREFS. */
1413 static void
1414 dump_access_strides (vec<data_reference_p> datarefs)
1416 data_reference_p dr;
1417 fprintf (dump_file, "Access Strides for DRs:\n");
1418 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
1420 fprintf (dump_file, " ");
1421 print_generic_expr (dump_file, DR_REF (dr), TDF_SLIM);
1422 fprintf (dump_file, ":\t\t<");
1424 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1425 unsigned num_loops = stride->length ();
1426 for (unsigned j = 0; j < num_loops; ++j)
1428 print_generic_expr (dump_file, (*stride)[j], TDF_SLIM);
1429 fprintf (dump_file, "%s", (j < num_loops - 1) ? ",\t" : ">\n");
1434 /* Return true if it's profitable to interchange two loops whose index
1435 in whole loop nest vector are I_IDX/O_IDX respectively. The function
1436 computes and compares three types information from all DATAREFS:
1437 1) Access stride for loop I_IDX and O_IDX.
1438 2) Number of invariant memory references with respect to I_IDX before
1439 and after loop interchange.
1440 3) Flags indicating if all memory references access sequential memory
1441 in ILOOP, before and after loop interchange.
1442 If INNMOST_LOOP_P is true, the two loops for interchanging are the two
1443 innermost loops in loop nest. This function also dumps information if
1444 DUMP_INFO_P is true. */
1446 static bool
1447 should_interchange_loops (unsigned i_idx, unsigned o_idx,
1448 vec<data_reference_p> datarefs,
1449 bool innermost_loops_p, bool dump_info_p = true)
1451 unsigned HOST_WIDE_INT ratio;
1452 unsigned i, j, num_old_inv_drs = 0, num_new_inv_drs = 0;
1453 struct data_reference *dr;
1454 bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
1455 widest_int iloop_strides = 0, oloop_strides = 0;
1456 unsigned num_unresolved_drs = 0;
1457 unsigned num_resolved_ok_drs = 0;
1458 unsigned num_resolved_not_ok_drs = 0;
1460 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
1461 fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
1463 for (i = 0; datarefs.iterate (i, &dr); ++i)
1465 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1466 tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
1468 bool subloop_stride_p = false;
1469 /* Data ref can't be invariant or sequential access at current loop if
1470 its address changes with respect to any subloops. */
1471 for (j = i_idx + 1; j < stride->length (); ++j)
1472 if (!integer_zerop ((*stride)[j]))
1474 subloop_stride_p = true;
1475 break;
1478 if (integer_zerop (iloop_stride))
1480 if (!subloop_stride_p)
1481 num_old_inv_drs++;
1483 if (integer_zerop (oloop_stride))
1485 if (!subloop_stride_p)
1486 num_new_inv_drs++;
1489 if (TREE_CODE (iloop_stride) == INTEGER_CST
1490 && TREE_CODE (oloop_stride) == INTEGER_CST)
1492 iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
1493 oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
1495 else if (multiple_of_p (TREE_TYPE (iloop_stride),
1496 iloop_stride, oloop_stride))
1497 num_resolved_ok_drs++;
1498 else if (multiple_of_p (TREE_TYPE (iloop_stride),
1499 oloop_stride, iloop_stride))
1500 num_resolved_not_ok_drs++;
1501 else
1502 num_unresolved_drs++;
1504 /* Data ref can't be sequential access if its address changes in sub
1505 loop. */
1506 if (subloop_stride_p)
1508 all_seq_dr_before_p = false;
1509 all_seq_dr_after_p = false;
1510 continue;
1512 /* Track if all data references are sequential accesses before/after loop
1513 interchange. Note invariant is considered sequential here. */
1514 tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
1515 if (all_seq_dr_before_p
1516 && ! (integer_zerop (iloop_stride)
1517 || operand_equal_p (access_size, iloop_stride, 0)))
1518 all_seq_dr_before_p = false;
1519 if (all_seq_dr_after_p
1520 && ! (integer_zerop (oloop_stride)
1521 || operand_equal_p (access_size, oloop_stride, 0)))
1522 all_seq_dr_after_p = false;
1525 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
1527 fprintf (dump_file, "\toverall:\t\t");
1528 print_decu (iloop_strides, dump_file);
1529 fprintf (dump_file, "\t");
1530 print_decu (oloop_strides, dump_file);
1531 fprintf (dump_file, "\n");
1533 fprintf (dump_file, "Invariant data ref: before(%d), after(%d)\n",
1534 num_old_inv_drs, num_new_inv_drs);
1535 fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
1536 all_seq_dr_before_p ? "true" : "false",
1537 all_seq_dr_after_p ? "true" : "false");
1538 fprintf (dump_file, "OK to interchage with variable strides: %d\n",
1539 num_resolved_ok_drs);
1540 fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
1541 num_resolved_not_ok_drs);
1542 fprintf (dump_file, "Variable strides we cannot decide: %d\n",
1543 num_unresolved_drs);
1546 if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
1547 return false;
1549 /* We use different stride comparison ratio for interchanging innermost
1550 two loops or not. The idea is to be conservative in interchange for
1551 the innermost loops. */
1552 ratio = innermost_loops_p ? INNER_STRIDE_RATIO : OUTER_STRIDE_RATIO;
1553 /* Do interchange if it gives better data locality behavior. */
1554 if (wi::gtu_p (iloop_strides, wi::mul (oloop_strides, ratio)))
1555 return true;
1556 if (wi::gtu_p (iloop_strides, oloop_strides))
1558 /* Or it creates more invariant memory references. */
1559 if ((!all_seq_dr_before_p || all_seq_dr_after_p)
1560 && num_new_inv_drs > num_old_inv_drs)
1561 return true;
1562 /* Or it makes all memory references sequential. */
1563 if (num_new_inv_drs >= num_old_inv_drs
1564 && !all_seq_dr_before_p && all_seq_dr_after_p)
1565 return true;
1568 return false;
1571 /* Try to interchange inner loop of a loop nest to outer level. */
1573 bool
1574 tree_loop_interchange::interchange (vec<data_reference_p> datarefs,
1575 vec<ddr_p> ddrs)
1577 location_t loc = find_loop_location (m_loop_nest[0]);
1578 bool changed_p = false;
1579 /* In each iteration we try to interchange I-th loop with (I+1)-th loop.
1580 The overall effect is to push inner loop to outermost level in whole
1581 loop nest. */
1582 for (unsigned i = m_loop_nest.length (); i > 1; --i)
1584 unsigned i_idx = i - 1, o_idx = i - 2;
1586 /* Check validity for loop interchange. */
1587 if (!valid_data_dependences (i_idx, o_idx, ddrs))
1588 break;
1590 loop_cand iloop (m_loop_nest[i_idx], m_loop_nest[o_idx]);
1591 loop_cand oloop (m_loop_nest[o_idx], m_loop_nest[o_idx]);
1593 /* Check if we can do transformation for loop interchange. */
1594 if (!iloop.analyze_carried_vars (NULL)
1595 || !iloop.analyze_lcssa_phis ()
1596 || !oloop.analyze_carried_vars (&iloop)
1597 || !oloop.analyze_lcssa_phis ()
1598 || !iloop.can_interchange_p (NULL)
1599 || !oloop.can_interchange_p (&iloop))
1600 break;
1602 /* Check profitability for loop interchange. */
1603 if (should_interchange_loops (i_idx, o_idx, datarefs,
1604 iloop.m_loop->inner == NULL))
1606 if (dump_file && (dump_flags & TDF_DETAILS))
1607 fprintf (dump_file,
1608 "Loop_pair<outer:%d, inner:%d> is interchanged\n\n",
1609 oloop.m_loop->num, iloop.m_loop->num);
1611 changed_p = true;
1612 interchange_loops (iloop, oloop);
1613 /* No need to update if there is no further loop interchange. */
1614 if (o_idx > 0)
1615 update_data_info (i_idx, o_idx, datarefs, ddrs);
1617 else
1619 if (dump_file && (dump_flags & TDF_DETAILS))
1620 fprintf (dump_file,
1621 "Loop_pair<outer:%d, inner:%d> is not interchanged\n\n",
1622 oloop.m_loop->num, iloop.m_loop->num);
1625 simple_dce_from_worklist (m_dce_seeds);
1627 if (changed_p)
1628 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
1629 "loops interchanged in loop nest\n");
1631 return changed_p;
1635 /* Loop interchange pass. */
1637 namespace {
1639 const pass_data pass_data_linterchange =
1641 GIMPLE_PASS, /* type */
1642 "linterchange", /* name */
1643 OPTGROUP_LOOP, /* optinfo_flags */
1644 TV_LINTERCHANGE, /* tv_id */
1645 PROP_cfg, /* properties_required */
1646 0, /* properties_provided */
1647 0, /* properties_destroyed */
1648 0, /* todo_flags_start */
1649 0, /* todo_flags_finish */
1652 class pass_linterchange : public gimple_opt_pass
1654 public:
1655 pass_linterchange (gcc::context *ctxt)
1656 : gimple_opt_pass (pass_data_linterchange, ctxt)
1659 /* opt_pass methods: */
1660 opt_pass * clone () { return new pass_linterchange (m_ctxt); }
1661 virtual bool gate (function *) { return flag_loop_interchange; }
1662 virtual unsigned int execute (function *);
1664 }; // class pass_linterchange
1667 /* Return true if LOOP has proper form for interchange. We check three
1668 conditions in the function:
1669 1) In general, a loop can be interchanged only if it doesn't have
1670 basic blocks other than header, exit and latch besides possible
1671 inner loop nest. This basically restricts loop interchange to
1672 below form loop nests:
1674 header<---+
1677 INNER_LOOP |
1680 exit--->latch
1682 2) Data reference in basic block that executes in different times
1683 than loop head/exit is not allowed.
1684 3) Record the innermost outer loop that doesn't form rectangle loop
1685 nest with LOOP. */
1687 static bool
1688 proper_loop_form_for_interchange (struct loop *loop, struct loop **min_outer)
1690 edge e0, e1, exit;
1692 /* Don't interchange if loop has unsupported information for the moment. */
1693 if (loop->safelen > 0
1694 || loop->constraints != 0
1695 || loop->can_be_parallel
1696 || loop->dont_vectorize
1697 || loop->force_vectorize
1698 || loop->in_oacc_kernels_region
1699 || loop->orig_loop_num != 0
1700 || loop->simduid != NULL_TREE)
1701 return false;
1703 /* Don't interchange if outer loop has basic block other than header, exit
1704 and latch. */
1705 if (loop->inner != NULL
1706 && loop->num_nodes != loop->inner->num_nodes + 3)
1707 return false;
1709 if ((exit = single_dom_exit (loop)) == NULL)
1710 return false;
1712 /* Check control flow on loop header/exit blocks. */
1713 if (loop->header == exit->src
1714 && (EDGE_COUNT (loop->header->preds) != 2
1715 || EDGE_COUNT (loop->header->succs) != 2))
1716 return false;
1717 else if (loop->header != exit->src
1718 && (EDGE_COUNT (loop->header->preds) != 2
1719 || !single_succ_p (loop->header)
1720 || unsupported_edge (single_succ_edge (loop->header))
1721 || EDGE_COUNT (exit->src->succs) != 2
1722 || !single_pred_p (exit->src)
1723 || unsupported_edge (single_pred_edge (exit->src))))
1724 return false;
1726 e0 = EDGE_PRED (loop->header, 0);
1727 e1 = EDGE_PRED (loop->header, 1);
1728 if (unsupported_edge (e0) || unsupported_edge (e1)
1729 || (e0->src != loop->latch && e1->src != loop->latch)
1730 || (e0->src->loop_father == loop && e1->src->loop_father == loop))
1731 return false;
1733 e0 = EDGE_SUCC (exit->src, 0);
1734 e1 = EDGE_SUCC (exit->src, 1);
1735 if (unsupported_edge (e0) || unsupported_edge (e1)
1736 || (e0->dest != loop->latch && e1->dest != loop->latch)
1737 || (e0->dest->loop_father == loop && e1->dest->loop_father == loop))
1738 return false;
1740 /* Don't interchange if any reference is in basic block that doesn't
1741 dominate exit block. */
1742 basic_block *bbs = get_loop_body (loop);
1743 for (unsigned i = 0; i < loop->num_nodes; i++)
1745 basic_block bb = bbs[i];
1747 if (bb->loop_father != loop
1748 || bb == loop->header || bb == exit->src
1749 || dominated_by_p (CDI_DOMINATORS, exit->src, bb))
1750 continue;
1752 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
1753 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
1754 if (gimple_vuse (gsi_stmt (gsi)))
1756 free (bbs);
1757 return false;
1760 free (bbs);
1762 tree niters = number_of_latch_executions (loop);
1763 niters = analyze_scalar_evolution (loop_outer (loop), niters);
1764 if (!niters || chrec_contains_undetermined (niters))
1765 return false;
1767 /* Record the innermost outer loop that doesn't form rectangle loop nest. */
1768 for (loop_p loop2 = loop_outer (loop);
1769 loop2 && flow_loop_nested_p (*min_outer, loop2);
1770 loop2 = loop_outer (loop2))
1772 niters = instantiate_scev (loop_preheader_edge (loop2),
1773 loop_outer (loop), niters);
1774 if (!evolution_function_is_invariant_p (niters, loop2->num))
1776 *min_outer = loop2;
1777 break;
1780 return true;
1783 /* Return true if any two adjacent loops in loop nest [INNERMOST, LOOP_NEST]
1784 should be interchanged by looking into all DATAREFS. */
1786 static bool
1787 should_interchange_loop_nest (struct loop *loop_nest, struct loop *innermost,
1788 vec<data_reference_p> datarefs)
1790 unsigned idx = loop_depth (innermost) - loop_depth (loop_nest);
1791 gcc_assert (idx > 0);
1793 /* Check if any two adjacent loops should be interchanged. */
1794 for (struct loop *loop = innermost;
1795 loop != loop_nest; loop = loop_outer (loop), idx--)
1796 if (should_interchange_loops (idx, idx - 1, datarefs,
1797 loop == innermost, false))
1798 return true;
1800 return false;
1803 /* Given loop nest LOOP_NEST and data references DATAREFS, compute data
1804 dependences for loop interchange and store it in DDRS. Note we compute
1805 dependences directly rather than call generic interface so that we can
1806 return on unknown dependence instantly. */
1808 static bool
1809 tree_loop_interchange_compute_ddrs (vec<loop_p> loop_nest,
1810 vec<data_reference_p> datarefs,
1811 vec<ddr_p> *ddrs)
1813 struct data_reference *a, *b;
1814 struct loop *innermost = loop_nest.last ();
1816 for (unsigned i = 0; datarefs.iterate (i, &a); ++i)
1818 bool a_outer_p = gimple_bb (DR_STMT (a))->loop_father != innermost;
1819 for (unsigned j = i + 1; datarefs.iterate (j, &b); ++j)
1820 if (DR_IS_WRITE (a) || DR_IS_WRITE (b))
1822 bool b_outer_p = gimple_bb (DR_STMT (b))->loop_father != innermost;
1823 /* Don't support multiple write references in outer loop. */
1824 if (a_outer_p && b_outer_p && DR_IS_WRITE (a) && DR_IS_WRITE (b))
1825 return false;
1827 ddr_p ddr = initialize_data_dependence_relation (a, b, loop_nest);
1828 ddrs->safe_push (ddr);
1829 compute_affine_dependence (ddr, loop_nest[0]);
1831 /* Give up if ddr is unknown dependence or classic direct vector
1832 is not available. */
1833 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
1834 || (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
1835 && DDR_NUM_DIR_VECTS (ddr) == 0))
1836 return false;
1838 /* If either data references is in outer loop of nest, we require
1839 no dependence here because the data reference need to be moved
1840 into inner loop during interchange. */
1841 if (a_outer_p && b_outer_p
1842 && operand_equal_p (DR_REF (a), DR_REF (b), 0))
1843 continue;
1844 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1845 && (a_outer_p || b_outer_p))
1846 return false;
1850 return true;
1853 /* Prune DATAREFS by removing any data reference not inside of LOOP. */
1855 static inline void
1856 prune_datarefs_not_in_loop (struct loop *loop, vec<data_reference_p> datarefs)
1858 unsigned i, j;
1859 struct data_reference *dr;
1861 for (i = 0, j = 0; datarefs.iterate (i, &dr); ++i)
1863 if (flow_bb_inside_loop_p (loop, gimple_bb (DR_STMT (dr))))
1864 datarefs[j++] = dr;
1865 else
1867 if (dr->aux)
1869 DR_ACCESS_STRIDE (dr)->release ();
1870 delete (vec<tree> *) dr->aux;
1872 free_data_ref (dr);
1875 datarefs.truncate (j);
1878 /* Find and store data references in DATAREFS for LOOP nest. If there's
1879 difficult data reference in a basic block, we shrink the loop nest to
1880 inner loop of that basic block's father loop. On success, return the
1881 outer loop of the result loop nest. */
1883 static struct loop *
1884 prepare_data_references (struct loop *loop, vec<data_reference_p> *datarefs)
1886 struct loop *loop_nest = loop;
1887 vec<data_reference_p> *bb_refs;
1888 basic_block bb, *bbs = get_loop_body_in_dom_order (loop);
1890 for (unsigned i = 0; i < loop->num_nodes; i++)
1891 bbs[i]->aux = NULL;
1893 /* Find data references for all basic blocks. Shrink loop nest on difficult
1894 data reference. */
1895 for (unsigned i = 0; loop_nest && i < loop->num_nodes; ++i)
1897 bb = bbs[i];
1898 if (!flow_bb_inside_loop_p (loop_nest, bb))
1899 continue;
1901 bb_refs = new vec<data_reference_p> ();
1902 if (find_data_references_in_bb (loop, bb, bb_refs) == chrec_dont_know)
1904 loop_nest = bb->loop_father->inner;
1905 if (loop_nest && !loop_nest->inner)
1906 loop_nest = NULL;
1908 free_data_refs (*bb_refs);
1909 delete bb_refs;
1911 else if (bb_refs->is_empty ())
1912 delete bb_refs;
1913 else
1914 bb->aux = bb_refs;
1917 /* Collect all data references in loop nest. */
1918 for (unsigned i = 0; i < loop->num_nodes; i++)
1920 bb = bbs[i];
1921 if (!bb->aux)
1922 continue;
1924 bb_refs = (vec<data_reference_p> *) bb->aux;
1925 if (loop_nest && flow_bb_inside_loop_p (loop_nest, bb))
1926 datarefs->safe_splice (*bb_refs);
1927 else
1928 free_data_refs (*bb_refs);
1930 delete bb_refs;
1931 bb->aux = NULL;
1933 free (bbs);
1935 return loop_nest;
1938 /* Given innermost LOOP, return true if perfect loop nest can be found and
1939 data dependences can be computed. If succeed, record the perfect loop
1940 nest in LOOP_NEST; record all data references in DATAREFS and record all
1941 data dependence relations in DDRS.
1943 We do support a restricted form of imperfect loop nest, i.e, loop nest
1944 with load/store in outer loop initializing/finalizing simple reduction
1945 of the innermost loop. For such outer loop reference, we require that
1946 it has no dependence with others sinve it will be moved to inner loop
1947 in interchange. */
1949 static bool
1950 prepare_perfect_loop_nest (struct loop *loop, vec<loop_p> *loop_nest,
1951 vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
1953 struct loop *start_loop = NULL, *innermost = loop;
1954 struct loop *outermost = loops_for_fn (cfun)->tree_root;
1956 /* Find loop nest from the innermost loop. The outermost is the innermost
1957 outer*/
1958 while (loop->num != 0 && loop->inner == start_loop
1959 && flow_loop_nested_p (outermost, loop))
1961 if (!proper_loop_form_for_interchange (loop, &outermost))
1962 break;
1964 start_loop = loop;
1965 /* If this loop has sibling loop, the father loop won't be in perfect
1966 loop nest. */
1967 if (loop->next != NULL)
1968 break;
1970 loop = loop_outer (loop);
1972 if (!start_loop || !start_loop->inner)
1973 return false;
1975 /* Prepare the data reference vector for the loop nest, pruning outer
1976 loops we cannot handle. */
1977 start_loop = prepare_data_references (start_loop, datarefs);
1978 if (!start_loop
1979 /* Check if there is no data reference. */
1980 || datarefs->is_empty ()
1981 /* Check if there are too many of data references. */
1982 || (int) datarefs->length () > MAX_DATAREFS)
1983 return false;
1985 /* Compute access strides for all data references, pruning outer
1986 loops we cannot analyze refs in. */
1987 start_loop = compute_access_strides (start_loop, innermost, *datarefs);
1988 if (!start_loop)
1989 return false;
1991 /* Check if any interchange is profitable in the loop nest. */
1992 if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
1993 return false;
1995 /* Check if data dependences can be computed for loop nest starting from
1996 start_loop. */
1997 loop = start_loop;
1998 do {
1999 loop_nest->truncate (0);
2001 if (loop != start_loop)
2002 prune_datarefs_not_in_loop (start_loop, *datarefs);
2004 if (find_loop_nest (start_loop, loop_nest)
2005 && tree_loop_interchange_compute_ddrs (*loop_nest, *datarefs, ddrs))
2007 if (dump_file && (dump_flags & TDF_DETAILS))
2008 fprintf (dump_file,
2009 "\nConsider loop interchange for loop_nest<%d - %d>\n",
2010 start_loop->num, innermost->num);
2012 if (loop != start_loop)
2013 prune_access_strides_not_in_loop (start_loop, innermost, *datarefs);
2015 if (dump_file && (dump_flags & TDF_DETAILS))
2016 dump_access_strides (*datarefs);
2018 return true;
2021 free_dependence_relations (*ddrs);
2022 *ddrs = vNULL;
2023 /* Try to compute data dependences with the outermost loop stripped. */
2024 loop = start_loop;
2025 start_loop = start_loop->inner;
2026 } while (start_loop && start_loop->inner);
2028 return false;
2031 /* Main entry for loop interchange pass. */
2033 unsigned int
2034 pass_linterchange::execute (function *fun)
2036 if (number_of_loops (fun) <= 2)
2037 return 0;
2039 bool changed_p = false;
2040 struct loop *loop;
2041 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2043 vec<loop_p> loop_nest = vNULL;
2044 vec<data_reference_p> datarefs = vNULL;
2045 vec<ddr_p> ddrs = vNULL;
2046 if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
2048 tree_loop_interchange loop_interchange (loop_nest);
2049 changed_p |= loop_interchange.interchange (datarefs, ddrs);
2051 free_dependence_relations (ddrs);
2052 free_data_refs_with_aux (datarefs);
2053 loop_nest.release ();
2056 if (changed_p)
2057 scev_reset_htab ();
2059 return changed_p ? (TODO_update_ssa_only_virtuals) : 0;
2062 } // anon namespace
2064 gimple_opt_pass *
2065 make_pass_linterchange (gcc::context *ctxt)
2067 return new pass_linterchange (ctxt);