Make vect_model_store_cost take a vec_load_store_type
[official-gcc.git] / gcc / gimple-loop-interchange.cc
blob01a26c07c421ff0728f1a5b983cf30e5dc8d5d71
1 /* Loop interchange.
2 Copyright (C) 2017-2018 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "is-a.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "tree-pass.h"
29 #include "ssa.h"
30 #include "gimple-pretty-print.h"
31 #include "fold-const.h"
32 #include "gimplify.h"
33 #include "gimple-iterator.h"
34 #include "gimplify-me.h"
35 #include "cfgloop.h"
36 #include "params.h"
37 #include "tree-ssa.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-ssa-loop-manip.h"
40 #include "tree-ssa-loop-niter.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-dce.h"
43 #include "tree-data-ref.h"
44 #include "tree-vectorizer.h"
46 /* This pass performs loop interchange: for example, the loop nest
48 for (int j = 0; j < N; j++)
49 for (int k = 0; k < N; k++)
50 for (int i = 0; i < N; i++)
51 c[i][j] = c[i][j] + a[i][k]*b[k][j];
53 is transformed to
55 for (int i = 0; i < N; i++)
56 for (int j = 0; j < N; j++)
57 for (int k = 0; k < N; k++)
58 c[i][j] = c[i][j] + a[i][k]*b[k][j];
60 This pass implements loop interchange in the following steps:
62 1) Find perfect loop nest for each innermost loop and compute data
63 dependence relations for it. For above example, loop nest is
64 <loop_j, loop_k, loop_i>.
65 2) From innermost to outermost loop, this pass tries to interchange
66 each loop pair. For above case, it firstly tries to interchange
67 <loop_k, loop_i> and loop nest becomes <loop_j, loop_i, loop_k>.
68 Then it tries to interchange <loop_j, loop_i> and loop nest becomes
69 <loop_i, loop_j, loop_k>. The overall effect is to move innermost
70 loop to the outermost position. For loop pair <loop_i, loop_j>
71 to be interchanged, we:
72 3) Check if data dependence relations are valid for loop interchange.
73 4) Check if both loops can be interchanged in terms of transformation.
74 5) Check if interchanging the two loops is profitable.
75 6) Interchange the two loops by mapping induction variables.
77 This pass also handles reductions in loop nest. So far we only support
78 simple reduction of inner loop and double reduction of the loop nest. */
80 /* Maximum number of stmts in each loop that should be interchanged. */
81 #define MAX_NUM_STMT (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS))
82 /* Maximum number of data references in loop nest. */
83 #define MAX_DATAREFS (PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
85 /* Comparison ratio of access stride between inner/outer loops to be
86 interchanged. This is the minimum stride ratio for loop interchange
87 to be profitable. */
88 #define OUTER_STRIDE_RATIO (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO))
89 /* The same as above, but we require higher ratio for interchanging the
90 innermost two loops. */
91 #define INNER_STRIDE_RATIO ((OUTER_STRIDE_RATIO) + 1)
93 /* Comparison ratio of stmt cost between inner/outer loops. Loops won't
94 be interchanged if outer loop has too many stmts. */
95 #define STMT_COST_RATIO (3)
97 /* Vector of strides that DR accesses in each level loop of a loop nest. */
98 #define DR_ACCESS_STRIDE(dr) ((vec<tree> *) dr->aux)
100 /* Structure recording loop induction variable. */
101 typedef struct induction
103 /* IV itself. */
104 tree var;
105 /* IV's initializing value, which is the init arg of the IV PHI node. */
106 tree init_val;
107 /* IV's initializing expr, which is (the expanded result of) init_val. */
108 tree init_expr;
109 /* IV's step. */
110 tree step;
111 } *induction_p;
113 /* Enum type for loop reduction variable. */
114 enum reduction_type
116 UNKNOWN_RTYPE = 0,
117 SIMPLE_RTYPE,
118 DOUBLE_RTYPE
121 /* Structure recording loop reduction variable. */
122 typedef struct reduction
124 /* Reduction itself. */
125 tree var;
126 /* PHI node defining reduction variable. */
127 gphi *phi;
128 /* Init and next variables of the reduction. */
129 tree init;
130 tree next;
131 /* Lcssa PHI node if reduction is used outside of its definition loop. */
132 gphi *lcssa_phi;
133 /* Stmts defining init and next. */
134 gimple *producer;
135 gimple *consumer;
136 /* If init is loaded from memory, this is the loading memory reference. */
137 tree init_ref;
138 /* If reduction is finally stored to memory, this is the stored memory
139 reference. */
140 tree fini_ref;
141 enum reduction_type type;
142 } *reduction_p;
145 /* Dump reduction RE. */
147 static void
148 dump_reduction (reduction_p re)
150 if (re->type == SIMPLE_RTYPE)
151 fprintf (dump_file, " Simple reduction: ");
152 else if (re->type == DOUBLE_RTYPE)
153 fprintf (dump_file, " Double reduction: ");
154 else
155 fprintf (dump_file, " Unknown reduction: ");
157 print_gimple_stmt (dump_file, re->phi, 0);
160 /* Dump LOOP's induction IV. */
161 static void
162 dump_induction (struct loop *loop, induction_p iv)
164 fprintf (dump_file, " Induction: ");
165 print_generic_expr (dump_file, iv->var, TDF_SLIM);
166 fprintf (dump_file, " = {");
167 print_generic_expr (dump_file, iv->init_expr, TDF_SLIM);
168 fprintf (dump_file, ", ");
169 print_generic_expr (dump_file, iv->step, TDF_SLIM);
170 fprintf (dump_file, "}_%d\n", loop->num);
173 /* Loop candidate for interchange. */
175 struct loop_cand
177 loop_cand (struct loop *, struct loop *);
178 ~loop_cand ();
180 reduction_p find_reduction_by_stmt (gimple *);
181 void classify_simple_reduction (reduction_p);
182 bool analyze_iloop_reduction_var (tree);
183 bool analyze_oloop_reduction_var (loop_cand *, tree);
184 bool analyze_induction_var (tree, tree);
185 bool analyze_carried_vars (loop_cand *);
186 bool analyze_lcssa_phis (void);
187 bool can_interchange_p (loop_cand *);
188 void undo_simple_reduction (reduction_p, bitmap);
190 /* The loop itself. */
191 struct loop *m_loop;
192 /* The outer loop for interchange. It equals to loop if this loop cand
193 itself represents the outer loop. */
194 struct loop *m_outer;
195 /* Vector of induction variables in loop. */
196 vec<induction_p> m_inductions;
197 /* Vector of reduction variables in loop. */
198 vec<reduction_p> m_reductions;
199 /* Lcssa PHI nodes of this loop. */
200 vec<gphi *> m_lcssa_nodes;
201 /* Single exit edge of this loop. */
202 edge m_exit;
203 /* Basic blocks of this loop. */
204 basic_block *m_bbs;
205 /* Number of stmts of this loop. Inner loops' stmts are not included. */
206 int m_num_stmts;
207 /* Number of constant initialized simple reduction. */
208 int m_const_init_reduc;
211 /* Constructor. */
213 loop_cand::loop_cand (struct loop *loop, struct loop *outer)
214 : m_loop (loop), m_outer (outer), m_exit (single_exit (loop)),
215 m_bbs (get_loop_body (loop)), m_num_stmts (0), m_const_init_reduc (0)
217 m_inductions.create (3);
218 m_reductions.create (3);
219 m_lcssa_nodes.create (3);
222 /* Destructor. */
224 loop_cand::~loop_cand ()
226 induction_p iv;
227 for (unsigned i = 0; m_inductions.iterate (i, &iv); ++i)
228 free (iv);
230 reduction_p re;
231 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
232 free (re);
234 m_inductions.release ();
235 m_reductions.release ();
236 m_lcssa_nodes.release ();
237 free (m_bbs);
240 /* Return single use stmt of VAR in LOOP, otherwise return NULL. */
242 static gimple *
243 single_use_in_loop (tree var, struct loop *loop)
245 gimple *stmt, *res = NULL;
246 use_operand_p use_p;
247 imm_use_iterator iterator;
249 FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
251 stmt = USE_STMT (use_p);
252 if (is_gimple_debug (stmt))
253 continue;
255 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
256 continue;
258 if (res)
259 return NULL;
261 res = stmt;
263 return res;
266 /* Return true if E is unsupported in loop interchange, i.e, E is a complex
267 edge or part of irreducible loop. */
269 static inline bool
270 unsupported_edge (edge e)
272 return (e->flags & (EDGE_COMPLEX | EDGE_IRREDUCIBLE_LOOP));
275 /* Return the reduction if STMT is one of its lcssa PHI, producer or consumer
276 stmt. */
278 reduction_p
279 loop_cand::find_reduction_by_stmt (gimple *stmt)
281 gphi *phi = dyn_cast <gphi *> (stmt);
282 reduction_p re;
284 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
285 if ((phi != NULL && phi == re->lcssa_phi)
286 || (stmt == re->producer || stmt == re->consumer))
287 return re;
289 return NULL;
292 /* Return true if current loop_cand be interchanged. ILOOP is not NULL if
293 current loop_cand is outer loop in loop nest. */
295 bool
296 loop_cand::can_interchange_p (loop_cand *iloop)
298 /* For now we only support at most one reduction. */
299 unsigned allowed_reduction_num = 1;
301 /* Only support reduction if the loop nest to be interchanged is the
302 innermostin two loops. */
303 if ((iloop == NULL && m_loop->inner != NULL)
304 || (iloop != NULL && iloop->m_loop->inner != NULL))
305 allowed_reduction_num = 0;
307 if (m_reductions.length () > allowed_reduction_num
308 || (m_reductions.length () == 1
309 && m_reductions[0]->type == UNKNOWN_RTYPE))
310 return false;
312 /* Only support lcssa PHI node which is for reduction. */
313 if (m_lcssa_nodes.length () > allowed_reduction_num)
314 return false;
316 /* Check if basic block has any unsupported operation. Note basic blocks
317 of inner loops are not checked here. */
318 for (unsigned i = 0; i < m_loop->num_nodes; i++)
320 basic_block bb = m_bbs[i];
321 gphi_iterator psi;
322 gimple_stmt_iterator gsi;
324 /* Skip basic blocks of inner loops. */
325 if (bb->loop_father != m_loop)
326 continue;
328 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
330 gimple *stmt = gsi_stmt (gsi);
331 if (is_gimple_debug (stmt))
332 continue;
334 if (gimple_has_side_effects (stmt))
335 return false;
337 m_num_stmts++;
338 if (gcall *call = dyn_cast <gcall *> (stmt))
340 /* In basic block of outer loop, the call should be cheap since
341 it will be moved to inner loop. */
342 if (iloop != NULL
343 && !gimple_inexpensive_call_p (call))
344 return false;
345 continue;
348 if (!iloop || !gimple_vuse (stmt))
349 continue;
351 /* Support stmt accessing memory in outer loop only if it is for
352 inner loop's reduction. */
353 if (iloop->find_reduction_by_stmt (stmt))
354 continue;
356 tree lhs;
357 /* Support loop invariant memory reference if it's only used once by
358 inner loop. */
359 /* ??? How's this checking for invariantness? */
360 if (gimple_assign_single_p (stmt)
361 && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE
362 && TREE_CODE (lhs) == SSA_NAME
363 && single_use_in_loop (lhs, iloop->m_loop))
364 continue;
366 return false;
368 /* Check if loop has too many stmts. */
369 if (m_num_stmts > MAX_NUM_STMT)
370 return false;
372 /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer
373 loop's header, or PHI nodes in dest bb of inner loop's exit edge. */
374 if (!iloop || bb == m_loop->header
375 || bb == iloop->m_exit->dest)
376 continue;
378 /* Don't allow any other PHI nodes. */
379 for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
380 if (!virtual_operand_p (PHI_RESULT (psi.phi ())))
381 return false;
384 return true;
387 /* Programmers and optimizers (like loop store motion) may optimize code:
389 for (int i = 0; i < N; i++)
390 for (int j = 0; j < N; j++)
391 a[i] += b[j][i] * c[j][i];
393 into reduction:
395 for (int i = 0; i < N; i++)
397 // producer. Note sum can be intitialized to a constant.
398 int sum = a[i];
399 for (int j = 0; j < N; j++)
401 sum += b[j][i] * c[j][i];
403 // consumer.
404 a[i] = sum;
407 The result code can't be interchanged without undoing the optimization.
408 This function classifies this kind reduction and records information so
409 that we can undo the store motion during interchange. */
411 void
412 loop_cand::classify_simple_reduction (reduction_p re)
414 gimple *producer, *consumer;
416 /* Check init variable of reduction and how it is initialized. */
417 if (TREE_CODE (re->init) == SSA_NAME)
419 producer = SSA_NAME_DEF_STMT (re->init);
420 re->producer = producer;
421 basic_block bb = gimple_bb (producer);
422 if (!bb || bb->loop_father != m_outer)
423 return;
425 if (!gimple_assign_load_p (producer))
426 return;
428 re->init_ref = gimple_assign_rhs1 (producer);
430 else if (CONSTANT_CLASS_P (re->init))
431 m_const_init_reduc++;
432 else
433 return;
435 /* Check how reduction variable is used. */
436 consumer = single_use_in_loop (PHI_RESULT (re->lcssa_phi), m_outer);
437 if (!consumer
438 || !gimple_store_p (consumer))
439 return;
441 re->fini_ref = gimple_get_lhs (consumer);
442 re->consumer = consumer;
444 /* Simple reduction with constant initializer. */
445 if (!re->init_ref)
447 gcc_assert (CONSTANT_CLASS_P (re->init));
448 re->init_ref = unshare_expr (re->fini_ref);
451 /* Require memory references in producer and consumer are the same so
452 that we can undo reduction during interchange. */
453 if (re->init_ref && !operand_equal_p (re->init_ref, re->fini_ref, 0))
454 return;
456 re->type = SIMPLE_RTYPE;
459 /* Analyze reduction variable VAR for inner loop of the loop nest to be
460 interchanged. Return true if analysis succeeds. */
462 bool
463 loop_cand::analyze_iloop_reduction_var (tree var)
465 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
466 gphi *lcssa_phi = NULL, *use_phi;
467 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
468 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
469 reduction_p re;
470 gimple *stmt, *next_def, *single_use = NULL;
471 use_operand_p use_p;
472 imm_use_iterator iterator;
474 if (TREE_CODE (next) != SSA_NAME)
475 return false;
477 next_def = SSA_NAME_DEF_STMT (next);
478 basic_block bb = gimple_bb (next_def);
479 if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
480 return false;
482 /* In restricted reduction, the var is (and must be) used in defining
483 the updated var. The process can be depicted as below:
485 var ;; = PHI<init, next>
489 +---------------------+
490 | reduction operators | <-- other operands
491 +---------------------+
495 next
497 In terms loop interchange, we don't change how NEXT is computed based
498 on VAR and OTHER OPERANDS. In case of double reduction in loop nest
499 to be interchanged, we don't changed it at all. In the case of simple
500 reduction in inner loop, we only make change how VAR/NEXT is loaded or
501 stored. With these conditions, we can relax restrictions on reduction
502 in a way that reduction operation is seen as black box. In general,
503 we can ignore reassociation of reduction operator; we can handle fake
504 reductions in which VAR is not even used to compute NEXT. */
505 if (! single_imm_use (var, &use_p, &single_use)
506 || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
507 return false;
509 /* Check the reduction operation. We require a left-associative operation.
510 For FP math we also need to be allowed to associate operations. */
511 if (gassign *ass = dyn_cast <gassign *> (single_use))
513 enum tree_code code = gimple_assign_rhs_code (ass);
514 if (! (associative_tree_code (code)
515 || (code == MINUS_EXPR
516 && use_p->use == gimple_assign_rhs1_ptr (ass)))
517 || (FLOAT_TYPE_P (TREE_TYPE (var))
518 && ! flag_associative_math))
519 return false;
521 else
522 return false;
524 /* Handle and verify a series of stmts feeding the reduction op. */
525 if (single_use != next_def
526 && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next,
527 gimple_assign_rhs_code (single_use)))
528 return false;
530 /* Only support cases in which INIT is used in inner loop. */
531 if (TREE_CODE (init) == SSA_NAME)
532 FOR_EACH_IMM_USE_FAST (use_p, iterator, init)
534 stmt = USE_STMT (use_p);
535 if (is_gimple_debug (stmt))
536 continue;
538 if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
539 return false;
542 FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
544 stmt = USE_STMT (use_p);
545 if (is_gimple_debug (stmt))
546 continue;
548 /* Or else it's used in PHI itself. */
549 use_phi = dyn_cast <gphi *> (stmt);
550 if (use_phi == phi)
551 continue;
553 if (use_phi != NULL
554 && lcssa_phi == NULL
555 && gimple_bb (stmt) == m_exit->dest
556 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
557 lcssa_phi = use_phi;
558 else
559 return false;
561 if (!lcssa_phi)
562 return false;
564 re = XCNEW (struct reduction);
565 re->var = var;
566 re->init = init;
567 re->next = next;
568 re->phi = phi;
569 re->lcssa_phi = lcssa_phi;
571 classify_simple_reduction (re);
573 if (dump_file && (dump_flags & TDF_DETAILS))
574 dump_reduction (re);
576 m_reductions.safe_push (re);
577 return true;
580 /* Analyze reduction variable VAR for outer loop of the loop nest to be
581 interchanged. ILOOP is not NULL and points to inner loop. For the
582 moment, we only support double reduction for outer loop, like:
584 for (int i = 0; i < n; i++)
586 int sum = 0;
588 for (int j = 0; j < n; j++) // outer loop
589 for (int k = 0; k < n; k++) // inner loop
590 sum += a[i][k]*b[k][j];
592 s[i] = sum;
595 Note the innermost two loops are the loop nest to be interchanged.
596 Return true if analysis succeeds. */
598 bool
599 loop_cand::analyze_oloop_reduction_var (loop_cand *iloop, tree var)
601 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
602 gphi *lcssa_phi = NULL, *use_phi;
603 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
604 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
605 reduction_p re;
606 gimple *stmt, *next_def;
607 use_operand_p use_p;
608 imm_use_iterator iterator;
610 if (TREE_CODE (next) != SSA_NAME)
611 return false;
613 next_def = SSA_NAME_DEF_STMT (next);
614 basic_block bb = gimple_bb (next_def);
615 if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
616 return false;
618 /* Find inner loop's simple reduction that uses var as initializer. */
619 reduction_p inner_re = NULL;
620 for (unsigned i = 0; iloop->m_reductions.iterate (i, &inner_re); ++i)
621 if (inner_re->init == var || operand_equal_p (inner_re->init, var, 0))
622 break;
624 if (inner_re == NULL
625 || inner_re->type != UNKNOWN_RTYPE
626 || inner_re->producer != phi)
627 return false;
629 /* In case of double reduction, outer loop's reduction should be updated
630 by inner loop's simple reduction. */
631 if (next_def != inner_re->lcssa_phi)
632 return false;
634 /* Outer loop's reduction should only be used to initialize inner loop's
635 simple reduction. */
636 if (! single_imm_use (var, &use_p, &stmt)
637 || stmt != inner_re->phi)
638 return false;
640 /* Check this reduction is correctly used outside of loop via lcssa phi. */
641 FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
643 stmt = USE_STMT (use_p);
644 if (is_gimple_debug (stmt))
645 continue;
647 /* Or else it's used in PHI itself. */
648 use_phi = dyn_cast <gphi *> (stmt);
649 if (use_phi == phi)
650 continue;
652 if (lcssa_phi == NULL
653 && use_phi != NULL
654 && gimple_bb (stmt) == m_exit->dest
655 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
656 lcssa_phi = use_phi;
657 else
658 return false;
660 if (!lcssa_phi)
661 return false;
663 re = XCNEW (struct reduction);
664 re->var = var;
665 re->init = init;
666 re->next = next;
667 re->phi = phi;
668 re->lcssa_phi = lcssa_phi;
669 re->type = DOUBLE_RTYPE;
670 inner_re->type = DOUBLE_RTYPE;
672 if (dump_file && (dump_flags & TDF_DETAILS))
673 dump_reduction (re);
675 m_reductions.safe_push (re);
676 return true;
679 /* Return true if VAR is induction variable of current loop whose scev is
680 specified by CHREC. */
682 bool
683 loop_cand::analyze_induction_var (tree var, tree chrec)
685 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
686 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
688 /* Var is loop invariant, though it's unlikely to happen. */
689 if (tree_does_not_contain_chrecs (chrec))
691 struct induction *iv = XCNEW (struct induction);
692 iv->var = var;
693 iv->init_val = init;
694 iv->init_expr = chrec;
695 iv->step = build_int_cst (TREE_TYPE (chrec), 0);
696 m_inductions.safe_push (iv);
697 return true;
700 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC
701 || CHREC_VARIABLE (chrec) != (unsigned) m_loop->num
702 || tree_contains_chrecs (CHREC_LEFT (chrec), NULL)
703 || tree_contains_chrecs (CHREC_RIGHT (chrec), NULL))
704 return false;
706 struct induction *iv = XCNEW (struct induction);
707 iv->var = var;
708 iv->init_val = init;
709 iv->init_expr = CHREC_LEFT (chrec);
710 iv->step = CHREC_RIGHT (chrec);
712 if (dump_file && (dump_flags & TDF_DETAILS))
713 dump_induction (m_loop, iv);
715 m_inductions.safe_push (iv);
716 return true;
719 /* Return true if all loop carried variables defined in loop header can
720 be successfully analyzed. */
722 bool
723 loop_cand::analyze_carried_vars (loop_cand *iloop)
725 edge e = loop_preheader_edge (m_outer);
726 gphi_iterator gsi;
728 if (dump_file && (dump_flags & TDF_DETAILS))
729 fprintf (dump_file, "\nLoop(%d) carried vars:\n", m_loop->num);
731 for (gsi = gsi_start_phis (m_loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
733 gphi *phi = gsi.phi ();
735 tree var = PHI_RESULT (phi);
736 if (virtual_operand_p (var))
737 continue;
739 tree chrec = analyze_scalar_evolution (m_loop, var);
740 chrec = instantiate_scev (e, m_loop, chrec);
742 /* Analyze var as reduction variable. */
743 if (chrec_contains_undetermined (chrec)
744 || chrec_contains_symbols_defined_in_loop (chrec, m_outer->num))
746 if (iloop && !analyze_oloop_reduction_var (iloop, var))
747 return false;
748 if (!iloop && !analyze_iloop_reduction_var (var))
749 return false;
751 /* Analyze var as induction variable. */
752 else if (!analyze_induction_var (var, chrec))
753 return false;
756 return true;
759 /* Return TRUE if loop closed PHI nodes can be analyzed successfully. */
761 bool
762 loop_cand::analyze_lcssa_phis (void)
764 gphi_iterator gsi;
765 for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
767 gphi *phi = gsi.phi ();
769 if (virtual_operand_p (PHI_RESULT (phi)))
770 continue;
772 /* TODO: We only support lcssa phi for reduction for now. */
773 if (!find_reduction_by_stmt (phi))
774 return false;
777 return true;
780 /* CONSUMER is a stmt in BB storing reduction result into memory object.
781 When the reduction is intialized from constant value, we need to add
782 a stmt loading from the memory object to target basic block in inner
783 loop during undoing the reduction. Problem is that memory reference
784 may use ssa variables not dominating the target basic block. This
785 function finds all stmts on which CONSUMER depends in basic block BB,
786 records and returns them via STMTS. */
788 static void
789 find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer)
791 auto_vec<gimple *, 4> worklist;
792 use_operand_p use_p;
793 ssa_op_iter iter;
794 gimple *stmt, *def_stmt;
795 gimple_stmt_iterator gsi;
797 /* First clear flag for stmts in bb. */
798 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
799 gimple_set_plf (gsi_stmt (gsi), GF_PLF_1, false);
801 /* DFS search all depended stmts in bb and mark flag for these stmts. */
802 worklist.safe_push (consumer);
803 while (!worklist.is_empty ())
805 stmt = worklist.pop ();
806 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
808 def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
810 if (is_a <gphi *> (def_stmt)
811 || gimple_bb (def_stmt) != bb
812 || gimple_plf (def_stmt, GF_PLF_1))
813 continue;
815 worklist.safe_push (def_stmt);
817 gimple_set_plf (stmt, GF_PLF_1, true);
819 for (gsi = gsi_start_nondebug_bb (bb);
820 !gsi_end_p (gsi) && (stmt = gsi_stmt (gsi)) != consumer;)
822 /* Move dep stmts to sequence STMTS. */
823 if (gimple_plf (stmt, GF_PLF_1))
825 gsi_remove (&gsi, false);
826 gimple_seq_add_stmt_without_update (stmts, stmt);
828 else
829 gsi_next_nondebug (&gsi);
833 /* User can write, optimizers can generate simple reduction RE for inner
834 loop. In order to make interchange valid, we have to undo reduction by
835 moving producer and consumer stmts into the inner loop. For example,
836 below code:
838 init = MEM_REF[idx]; //producer
839 loop:
840 var = phi<init, next>
841 next = var op ...
842 reduc_sum = phi<next>
843 MEM_REF[idx] = reduc_sum //consumer
845 is transformed into:
847 loop:
848 new_var = MEM_REF[idx]; //producer after moving
849 next = new_var op ...
850 MEM_REF[idx] = next; //consumer after moving
852 Note if the reduction variable is initialized to constant, like:
854 var = phi<0.0, next>
856 we compute new_var as below:
858 loop:
859 tmp = MEM_REF[idx];
860 new_var = !first_iteration ? tmp : 0.0;
862 so that the initial const is used in the first iteration of loop. Also
863 record ssa variables for dead code elimination in DCE_SEEDS. */
865 void
866 loop_cand::undo_simple_reduction (reduction_p re, bitmap dce_seeds)
868 gimple *stmt;
869 gimple_stmt_iterator from, to = gsi_after_labels (m_loop->header);
870 gimple_seq stmts = NULL;
871 tree new_var;
873 /* Prepare the initialization stmts and insert it to inner loop. */
874 if (re->producer != NULL)
876 gimple_set_vuse (re->producer, NULL_TREE);
877 from = gsi_for_stmt (re->producer);
878 gsi_remove (&from, false);
879 gimple_seq_add_stmt_without_update (&stmts, re->producer);
880 new_var = re->init;
882 else
884 /* Find all stmts on which expression "MEM_REF[idx]" depends. */
885 find_deps_in_bb_for_stmt (&stmts, gimple_bb (re->consumer), re->consumer);
886 /* Because we generate new stmt loading from the MEM_REF to TMP. */
887 tree cond, tmp = copy_ssa_name (re->var);
888 stmt = gimple_build_assign (tmp, re->init_ref);
889 gimple_seq_add_stmt_without_update (&stmts, stmt);
891 /* Init new_var to MEM_REF or CONST depending on if it is the first
892 iteration. */
893 induction_p iv = m_inductions[0];
894 cond = fold_build2 (NE_EXPR, boolean_type_node, iv->var, iv->init_val);
895 new_var = copy_ssa_name (re->var);
896 stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init);
897 gimple_seq_add_stmt_without_update (&stmts, stmt);
899 gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT);
901 /* Replace all uses of reduction var with new variable. */
902 use_operand_p use_p;
903 imm_use_iterator iterator;
904 FOR_EACH_IMM_USE_STMT (stmt, iterator, re->var)
906 FOR_EACH_IMM_USE_ON_STMT (use_p, iterator)
907 SET_USE (use_p, new_var);
909 update_stmt (stmt);
912 /* Move consumer stmt into inner loop, just after reduction next's def. */
913 unlink_stmt_vdef (re->consumer);
914 release_ssa_name (gimple_vdef (re->consumer));
915 gimple_set_vdef (re->consumer, NULL_TREE);
916 gimple_set_vuse (re->consumer, NULL_TREE);
917 gimple_assign_set_rhs1 (re->consumer, re->next);
918 from = gsi_for_stmt (re->consumer);
919 to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next));
920 gsi_move_after (&from, &to);
922 /* Mark the reduction variables for DCE. */
923 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (re->var));
924 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (PHI_RESULT (re->lcssa_phi)));
927 /* Free DATAREFS and its auxiliary memory. */
929 static void
930 free_data_refs_with_aux (vec<data_reference_p> datarefs)
932 data_reference_p dr;
933 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
934 if (dr->aux != NULL)
936 DR_ACCESS_STRIDE (dr)->release ();
937 delete (vec<tree> *) dr->aux;
940 free_data_refs (datarefs);
943 /* Class for loop interchange transformation. */
945 class tree_loop_interchange
947 public:
948 tree_loop_interchange (vec<struct loop *> loop_nest)
949 : m_loop_nest (loop_nest), m_niters_iv_var (NULL_TREE),
950 m_dce_seeds (BITMAP_ALLOC (NULL)) { }
951 ~tree_loop_interchange () { BITMAP_FREE (m_dce_seeds); }
952 bool interchange (vec<data_reference_p>, vec<ddr_p>);
954 private:
955 void update_data_info (unsigned, unsigned, vec<data_reference_p>, vec<ddr_p>);
956 bool valid_data_dependences (unsigned, unsigned, vec<ddr_p>);
957 void interchange_loops (loop_cand &, loop_cand &);
958 void map_inductions_to_loop (loop_cand &, loop_cand &);
959 void move_code_to_inner_loop (struct loop *, struct loop *, basic_block *);
961 /* The whole loop nest in which interchange is ongoing. */
962 vec<struct loop *> m_loop_nest;
963 /* We create new IV which is only used in loop's exit condition check.
964 In case of 3-level loop nest interchange, when we interchange the
965 innermost two loops, new IV created in the middle level loop does
966 not need to be preserved in interchanging the outermost two loops
967 later. We record the IV so that it can be skipped. */
968 tree m_niters_iv_var;
969 /* Bitmap of seed variables for dead code elimination after interchange. */
970 bitmap m_dce_seeds;
973 /* Update data refs' access stride and dependence information after loop
974 interchange. I_IDX/O_IDX gives indices of interchanged loops in loop
975 nest. DATAREFS are data references. DDRS are data dependences. */
977 void
978 tree_loop_interchange::update_data_info (unsigned i_idx, unsigned o_idx,
979 vec<data_reference_p> datarefs,
980 vec<ddr_p> ddrs)
982 struct data_reference *dr;
983 struct data_dependence_relation *ddr;
985 /* Update strides of data references. */
986 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
988 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
989 gcc_assert (stride->length () > i_idx);
990 std::swap ((*stride)[i_idx], (*stride)[o_idx]);
992 /* Update data dependences. */
993 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
994 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
996 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
998 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
999 std::swap (dist_vect[i_idx], dist_vect[o_idx]);
1004 /* Check data dependence relations, return TRUE if it's valid to interchange
1005 two loops specified by I_IDX/O_IDX. Theoretically, interchanging the two
1006 loops is valid only if dist vector, after interchanging, doesn't have '>'
1007 as the leftmost non-'=' direction. Practically, this function have been
1008 conservative here by not checking some valid cases. */
1010 bool
1011 tree_loop_interchange::valid_data_dependences (unsigned i_idx, unsigned o_idx,
1012 vec<ddr_p> ddrs)
1014 struct data_dependence_relation *ddr;
1016 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
1018 /* Skip no-dependence case. */
1019 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1020 continue;
1022 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
1024 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
1025 unsigned level = dependence_level (dist_vect, m_loop_nest.length ());
1027 /* If there is no carried dependence. */
1028 if (level == 0)
1029 continue;
1031 level --;
1033 /* If dependence is not carried by any loop in between the two
1034 loops [oloop, iloop] to interchange. */
1035 if (level < o_idx || level > i_idx)
1036 continue;
1038 /* Be conservative, skip case if either direction at i_idx/o_idx
1039 levels is not '=' or '<'. */
1040 if (dist_vect[i_idx] < 0 || dist_vect[o_idx] < 0)
1041 return false;
1045 return true;
1048 /* Interchange two loops specified by ILOOP and OLOOP. */
1050 void
1051 tree_loop_interchange::interchange_loops (loop_cand &iloop, loop_cand &oloop)
1053 reduction_p re;
1054 gimple_stmt_iterator gsi;
1055 tree i_niters, o_niters, var_after;
1057 /* Undo inner loop's simple reduction. */
1058 for (unsigned i = 0; iloop.m_reductions.iterate (i, &re); ++i)
1059 if (re->type != DOUBLE_RTYPE)
1061 if (re->producer)
1062 reset_debug_uses (re->producer);
1064 iloop.undo_simple_reduction (re, m_dce_seeds);
1067 /* Only need to reset debug uses for double reduction. */
1068 for (unsigned i = 0; oloop.m_reductions.iterate (i, &re); ++i)
1070 gcc_assert (re->type == DOUBLE_RTYPE);
1071 reset_debug_uses (SSA_NAME_DEF_STMT (re->var));
1072 reset_debug_uses (SSA_NAME_DEF_STMT (re->next));
1075 /* Prepare niters for both loops. */
1076 struct loop *loop_nest = m_loop_nest[0];
1077 edge instantiate_below = loop_preheader_edge (loop_nest);
1078 gsi = gsi_last_bb (loop_preheader_edge (loop_nest)->src);
1079 i_niters = number_of_latch_executions (iloop.m_loop);
1080 i_niters = analyze_scalar_evolution (loop_outer (iloop.m_loop), i_niters);
1081 i_niters = instantiate_scev (instantiate_below, loop_outer (iloop.m_loop),
1082 i_niters);
1083 i_niters = force_gimple_operand_gsi (&gsi, unshare_expr (i_niters), true,
1084 NULL_TREE, false, GSI_CONTINUE_LINKING);
1085 o_niters = number_of_latch_executions (oloop.m_loop);
1086 if (oloop.m_loop != loop_nest)
1088 o_niters = analyze_scalar_evolution (loop_outer (oloop.m_loop), o_niters);
1089 o_niters = instantiate_scev (instantiate_below, loop_outer (oloop.m_loop),
1090 o_niters);
1092 o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true,
1093 NULL_TREE, false, GSI_CONTINUE_LINKING);
1095 /* Move src's code to tgt loop. This is necessary when src is the outer
1096 loop and tgt is the inner loop. */
1097 move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs);
1099 /* Map outer loop's IV to inner loop, and vice versa. */
1100 map_inductions_to_loop (oloop, iloop);
1101 map_inductions_to_loop (iloop, oloop);
1103 /* Create canonical IV for both loops. Note canonical IV for outer/inner
1104 loop is actually from inner/outer loop. Also we record the new IV
1105 created for the outer loop so that it can be skipped in later loop
1106 interchange. */
1107 create_canonical_iv (oloop.m_loop, oloop.m_exit,
1108 i_niters, &m_niters_iv_var, &var_after);
1109 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1110 create_canonical_iv (iloop.m_loop, iloop.m_exit,
1111 o_niters, NULL, &var_after);
1112 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1114 /* Scrap niters estimation of interchanged loops. */
1115 iloop.m_loop->any_upper_bound = false;
1116 iloop.m_loop->any_likely_upper_bound = false;
1117 free_numbers_of_iterations_estimates (iloop.m_loop);
1118 oloop.m_loop->any_upper_bound = false;
1119 oloop.m_loop->any_likely_upper_bound = false;
1120 free_numbers_of_iterations_estimates (oloop.m_loop);
1122 /* ??? The association between the loop data structure and the
1123 CFG changed, so what was loop N at the source level is now
1124 loop M. We should think of retaining the association or breaking
1125 it fully by creating a new loop instead of re-using the "wrong" one. */
1128 /* Map induction variables of SRC loop to TGT loop. The function firstly
1129 creates the same IV of SRC loop in TGT loop, then deletes the original
1130 IV and re-initialize it using the newly created IV. For example, loop
1131 nest:
1133 for (i = 0; i < N; i++)
1134 for (j = 0; j < M; j++)
1136 //use of i;
1137 //use of j;
1140 will be transformed into:
1142 for (jj = 0; jj < M; jj++)
1143 for (ii = 0; ii < N; ii++)
1145 //use of ii;
1146 //use of jj;
1149 after loop interchange. */
1151 void
1152 tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt)
1154 induction_p iv;
1155 edge e = tgt.m_exit;
1156 gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi;
1158 /* Map source loop's IV to target loop. */
1159 for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i)
1161 gimple *use_stmt, *stmt = SSA_NAME_DEF_STMT (iv->var);
1162 gcc_assert (is_a <gphi *> (stmt));
1164 use_operand_p use_p;
1165 /* Only map original IV to target loop. */
1166 if (m_niters_iv_var != iv->var)
1168 /* Map the IV by creating the same one in target loop. */
1169 tree var_before, var_after;
1170 tree base = unshare_expr (iv->init_expr);
1171 tree step = unshare_expr (iv->step);
1172 create_iv (base, step, SSA_NAME_VAR (iv->var),
1173 tgt.m_loop, &incr_pos, false, &var_before, &var_after);
1174 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_before));
1175 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1177 /* Replace uses of the original IV var with newly created IV var. */
1178 imm_use_iterator imm_iter;
1179 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, iv->var)
1181 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1182 SET_USE (use_p, var_before);
1184 update_stmt (use_stmt);
1188 /* Mark all uses for DCE. */
1189 ssa_op_iter op_iter;
1190 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, op_iter, SSA_OP_USE)
1192 tree use = USE_FROM_PTR (use_p);
1193 if (TREE_CODE (use) == SSA_NAME
1194 && ! SSA_NAME_IS_DEFAULT_DEF (use))
1195 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (use));
1198 /* Delete definition of the original IV in the source loop. */
1199 gsi = gsi_for_stmt (stmt);
1200 remove_phi_node (&gsi, true);
1204 /* Move stmts of outer loop to inner loop. */
1206 void
1207 tree_loop_interchange::move_code_to_inner_loop (struct loop *outer,
1208 struct loop *inner,
1209 basic_block *outer_bbs)
1211 basic_block oloop_exit_bb = single_exit (outer)->src;
1212 gimple_stmt_iterator gsi, to;
1214 for (unsigned i = 0; i < outer->num_nodes; i++)
1216 basic_block bb = outer_bbs[i];
1218 /* Skip basic blocks of inner loop. */
1219 if (flow_bb_inside_loop_p (inner, bb))
1220 continue;
1222 /* Move code from header/latch to header/latch. */
1223 if (bb == outer->header)
1224 to = gsi_after_labels (inner->header);
1225 else if (bb == outer->latch)
1226 to = gsi_after_labels (inner->latch);
1227 else
1228 /* Otherwise, simply move to exit->src. */
1229 to = gsi_last_bb (single_exit (inner)->src);
1231 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
1233 gimple *stmt = gsi_stmt (gsi);
1235 if (oloop_exit_bb == bb
1236 && stmt == gsi_stmt (gsi_last_bb (oloop_exit_bb)))
1238 gsi_next (&gsi);
1239 continue;
1242 if (gimple_vuse (stmt))
1243 gimple_set_vuse (stmt, NULL_TREE);
1244 if (gimple_vdef (stmt))
1246 unlink_stmt_vdef (stmt);
1247 release_ssa_name (gimple_vdef (stmt));
1248 gimple_set_vdef (stmt, NULL_TREE);
1251 reset_debug_uses (stmt);
1252 gsi_move_before (&gsi, &to);
1257 /* Given data reference DR in LOOP_NEST, the function computes DR's access
1258 stride at each level of loop from innermost LOOP to outer. On success,
1259 it saves access stride at each level loop in a vector which is pointed
1260 by DR->aux. For example:
1262 int arr[100][100][100];
1263 for (i = 0; i < 100; i++) ;(DR->aux)strides[0] = 40000
1264 for (j = 100; j > 0; j--) ;(DR->aux)strides[1] = 400
1265 for (k = 0; k < 100; k++) ;(DR->aux)strides[2] = 4
1266 arr[i][j - 1][k] = 0; */
1268 static void
1269 compute_access_stride (struct loop *loop_nest, struct loop *loop,
1270 data_reference_p dr)
1272 vec<tree> *strides = new vec<tree> ();
1273 basic_block bb = gimple_bb (DR_STMT (dr));
1275 while (!flow_bb_inside_loop_p (loop, bb))
1277 strides->safe_push (build_int_cst (sizetype, 0));
1278 loop = loop_outer (loop);
1280 gcc_assert (loop == bb->loop_father);
1282 tree ref = DR_REF (dr);
1283 if (TREE_CODE (ref) == COMPONENT_REF
1284 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1286 /* We can't take address of bitfields. If the bitfield is at constant
1287 offset from the start of the struct, just use address of the
1288 struct, for analysis of the strides that shouldn't matter. */
1289 if (!TREE_OPERAND (ref, 2)
1290 || TREE_CODE (TREE_OPERAND (ref, 2)) == INTEGER_CST)
1291 ref = TREE_OPERAND (ref, 0);
1292 /* Otherwise, if we have a bit field representative, use that. */
1293 else if (DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1))
1294 != NULL_TREE)
1296 tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1));
1297 ref = build3 (COMPONENT_REF, TREE_TYPE (repr), TREE_OPERAND (ref, 0),
1298 repr, TREE_OPERAND (ref, 2));
1300 /* Otherwise punt. */
1301 else
1303 dr->aux = strides;
1304 return;
1307 tree scev_base = build_fold_addr_expr (ref);
1308 tree scev = analyze_scalar_evolution (loop, scev_base);
1309 scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev);
1310 if (! chrec_contains_undetermined (scev))
1312 tree sl = scev;
1313 struct loop *expected = loop;
1314 while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
1316 struct loop *sl_loop = get_chrec_loop (sl);
1317 while (sl_loop != expected)
1319 strides->safe_push (size_int (0));
1320 expected = loop_outer (expected);
1322 strides->safe_push (CHREC_RIGHT (sl));
1323 sl = CHREC_LEFT (sl);
1324 expected = loop_outer (expected);
1326 if (! tree_contains_chrecs (sl, NULL))
1327 while (expected != loop_outer (loop_nest))
1329 strides->safe_push (size_int (0));
1330 expected = loop_outer (expected);
1334 dr->aux = strides;
1337 /* Given loop nest LOOP_NEST with innermost LOOP, the function computes
1338 access strides with respect to each level loop for all data refs in
1339 DATAREFS from inner loop to outer loop. On success, it returns the
1340 outermost loop that access strides can be computed successfully for
1341 all data references. If access strides cannot be computed at least
1342 for two levels of loop for any data reference, it returns NULL. */
1344 static struct loop *
1345 compute_access_strides (struct loop *loop_nest, struct loop *loop,
1346 vec<data_reference_p> datarefs)
1348 unsigned i, j, num_loops = (unsigned) -1;
1349 data_reference_p dr;
1350 vec<tree> *stride;
1352 for (i = 0; datarefs.iterate (i, &dr); ++i)
1354 compute_access_stride (loop_nest, loop, dr);
1355 stride = DR_ACCESS_STRIDE (dr);
1356 if (stride->length () < num_loops)
1358 num_loops = stride->length ();
1359 if (num_loops < 2)
1360 return NULL;
1364 for (i = 0; datarefs.iterate (i, &dr); ++i)
1366 stride = DR_ACCESS_STRIDE (dr);
1367 if (stride->length () > num_loops)
1368 stride->truncate (num_loops);
1370 for (j = 0; j < (num_loops >> 1); ++j)
1371 std::swap ((*stride)[j], (*stride)[num_loops - j - 1]);
1374 loop = superloop_at_depth (loop, loop_depth (loop) + 1 - num_loops);
1375 gcc_assert (loop_nest == loop || flow_loop_nested_p (loop_nest, loop));
1376 return loop;
1379 /* Prune access strides for data references in DATAREFS by removing strides
1380 of loops that isn't in current LOOP_NEST. */
1382 static void
1383 prune_access_strides_not_in_loop (struct loop *loop_nest,
1384 struct loop *innermost,
1385 vec<data_reference_p> datarefs)
1387 data_reference_p dr;
1388 unsigned num_loops = loop_depth (innermost) - loop_depth (loop_nest) + 1;
1389 gcc_assert (num_loops > 1);
1391 /* Block remove strides of loops that is not in current loop nest. */
1392 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
1394 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1395 if (stride->length () > num_loops)
1396 stride->block_remove (0, stride->length () - num_loops);
1400 /* Dump access strides for all DATAREFS. */
1402 static void
1403 dump_access_strides (vec<data_reference_p> datarefs)
1405 data_reference_p dr;
1406 fprintf (dump_file, "Access Strides for DRs:\n");
1407 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
1409 fprintf (dump_file, " ");
1410 print_generic_expr (dump_file, DR_REF (dr), TDF_SLIM);
1411 fprintf (dump_file, ":\t\t<");
1413 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1414 unsigned num_loops = stride->length ();
1415 for (unsigned j = 0; j < num_loops; ++j)
1417 print_generic_expr (dump_file, (*stride)[j], TDF_SLIM);
1418 fprintf (dump_file, "%s", (j < num_loops - 1) ? ",\t" : ">\n");
1423 /* Return true if it's profitable to interchange two loops whose index
1424 in whole loop nest vector are I_IDX/O_IDX respectively. The function
1425 computes and compares three types information from all DATAREFS:
1426 1) Access stride for loop I_IDX and O_IDX.
1427 2) Number of invariant memory references with respect to I_IDX before
1428 and after loop interchange.
1429 3) Flags indicating if all memory references access sequential memory
1430 in ILOOP, before and after loop interchange.
1431 If INNMOST_LOOP_P is true, the two loops for interchanging are the two
1432 innermost loops in loop nest. This function also dumps information if
1433 DUMP_INFO_P is true. */
1435 static bool
1436 should_interchange_loops (unsigned i_idx, unsigned o_idx,
1437 vec<data_reference_p> datarefs,
1438 unsigned i_stmt_cost, unsigned o_stmt_cost,
1439 bool innermost_loops_p, bool dump_info_p = true)
1441 unsigned HOST_WIDE_INT ratio;
1442 unsigned i, j, num_old_inv_drs = 0, num_new_inv_drs = 0;
1443 struct data_reference *dr;
1444 bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
1445 widest_int iloop_strides = 0, oloop_strides = 0;
1446 unsigned num_unresolved_drs = 0;
1447 unsigned num_resolved_ok_drs = 0;
1448 unsigned num_resolved_not_ok_drs = 0;
1450 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
1451 fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
1453 for (i = 0; datarefs.iterate (i, &dr); ++i)
1455 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1456 tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
1458 bool subloop_stride_p = false;
1459 /* Data ref can't be invariant or sequential access at current loop if
1460 its address changes with respect to any subloops. */
1461 for (j = i_idx + 1; j < stride->length (); ++j)
1462 if (!integer_zerop ((*stride)[j]))
1464 subloop_stride_p = true;
1465 break;
1468 if (integer_zerop (iloop_stride))
1470 if (!subloop_stride_p)
1471 num_old_inv_drs++;
1473 if (integer_zerop (oloop_stride))
1475 if (!subloop_stride_p)
1476 num_new_inv_drs++;
1479 if (TREE_CODE (iloop_stride) == INTEGER_CST
1480 && TREE_CODE (oloop_stride) == INTEGER_CST)
1482 iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
1483 oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
1485 else if (multiple_of_p (TREE_TYPE (iloop_stride),
1486 iloop_stride, oloop_stride))
1487 num_resolved_ok_drs++;
1488 else if (multiple_of_p (TREE_TYPE (iloop_stride),
1489 oloop_stride, iloop_stride))
1490 num_resolved_not_ok_drs++;
1491 else
1492 num_unresolved_drs++;
1494 /* Data ref can't be sequential access if its address changes in sub
1495 loop. */
1496 if (subloop_stride_p)
1498 all_seq_dr_before_p = false;
1499 all_seq_dr_after_p = false;
1500 continue;
1502 /* Track if all data references are sequential accesses before/after loop
1503 interchange. Note invariant is considered sequential here. */
1504 tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
1505 if (all_seq_dr_before_p
1506 && ! (integer_zerop (iloop_stride)
1507 || operand_equal_p (access_size, iloop_stride, 0)))
1508 all_seq_dr_before_p = false;
1509 if (all_seq_dr_after_p
1510 && ! (integer_zerop (oloop_stride)
1511 || operand_equal_p (access_size, oloop_stride, 0)))
1512 all_seq_dr_after_p = false;
1515 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
1517 fprintf (dump_file, "\toverall:\t\t");
1518 print_decu (iloop_strides, dump_file);
1519 fprintf (dump_file, "\t");
1520 print_decu (oloop_strides, dump_file);
1521 fprintf (dump_file, "\n");
1523 fprintf (dump_file, "Invariant data ref: before(%d), after(%d)\n",
1524 num_old_inv_drs, num_new_inv_drs);
1525 fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
1526 all_seq_dr_before_p ? "true" : "false",
1527 all_seq_dr_after_p ? "true" : "false");
1528 fprintf (dump_file, "OK to interchage with variable strides: %d\n",
1529 num_resolved_ok_drs);
1530 fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
1531 num_resolved_not_ok_drs);
1532 fprintf (dump_file, "Variable strides we cannot decide: %d\n",
1533 num_unresolved_drs);
1534 fprintf (dump_file, "Stmt cost of inner loop: %d\n", i_stmt_cost);
1535 fprintf (dump_file, "Stmt cost of outer loop: %d\n", o_stmt_cost);
1538 if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
1539 return false;
1541 /* Stmts of outer loop will be moved to inner loop. If there are two many
1542 such stmts, it could make inner loop costly. Here we compare stmt cost
1543 between outer and inner loops. */
1544 if (i_stmt_cost && o_stmt_cost
1545 && num_old_inv_drs + o_stmt_cost > num_new_inv_drs
1546 && o_stmt_cost * STMT_COST_RATIO > i_stmt_cost)
1547 return false;
1549 /* We use different stride comparison ratio for interchanging innermost
1550 two loops or not. The idea is to be conservative in interchange for
1551 the innermost loops. */
1552 ratio = innermost_loops_p ? INNER_STRIDE_RATIO : OUTER_STRIDE_RATIO;
1553 /* Do interchange if it gives better data locality behavior. */
1554 if (wi::gtu_p (iloop_strides, wi::mul (oloop_strides, ratio)))
1555 return true;
1556 if (wi::gtu_p (iloop_strides, oloop_strides))
1558 /* Or it creates more invariant memory references. */
1559 if ((!all_seq_dr_before_p || all_seq_dr_after_p)
1560 && num_new_inv_drs > num_old_inv_drs)
1561 return true;
1562 /* Or it makes all memory references sequential. */
1563 if (num_new_inv_drs >= num_old_inv_drs
1564 && !all_seq_dr_before_p && all_seq_dr_after_p)
1565 return true;
1568 return false;
1571 /* Try to interchange inner loop of a loop nest to outer level. */
1573 bool
1574 tree_loop_interchange::interchange (vec<data_reference_p> datarefs,
1575 vec<ddr_p> ddrs)
1577 location_t loc = find_loop_location (m_loop_nest[0]);
1578 bool changed_p = false;
1579 /* In each iteration we try to interchange I-th loop with (I+1)-th loop.
1580 The overall effect is to push inner loop to outermost level in whole
1581 loop nest. */
1582 for (unsigned i = m_loop_nest.length (); i > 1; --i)
1584 unsigned i_idx = i - 1, o_idx = i - 2;
1586 /* Check validity for loop interchange. */
1587 if (!valid_data_dependences (i_idx, o_idx, ddrs))
1588 break;
1590 loop_cand iloop (m_loop_nest[i_idx], m_loop_nest[o_idx]);
1591 loop_cand oloop (m_loop_nest[o_idx], m_loop_nest[o_idx]);
1593 /* Check if we can do transformation for loop interchange. */
1594 if (!iloop.analyze_carried_vars (NULL)
1595 || !iloop.analyze_lcssa_phis ()
1596 || !oloop.analyze_carried_vars (&iloop)
1597 || !oloop.analyze_lcssa_phis ()
1598 || !iloop.can_interchange_p (NULL)
1599 || !oloop.can_interchange_p (&iloop))
1600 break;
1602 /* Outer loop's stmts will be moved to inner loop during interchange.
1603 If there are many of them, it may make inner loop very costly. We
1604 need to check number of outer loop's stmts in profit cost model of
1605 interchange. */
1606 int stmt_cost = oloop.m_num_stmts;
1607 /* Count out the exit checking stmt of outer loop. */
1608 stmt_cost --;
1609 /* Count out IV's increasing stmt, IVOPTs takes care if it. */
1610 stmt_cost -= oloop.m_inductions.length ();
1611 /* Count in the additional load and cond_expr stmts caused by inner
1612 loop's constant initialized reduction. */
1613 stmt_cost += iloop.m_const_init_reduc * 2;
1614 if (stmt_cost < 0)
1615 stmt_cost = 0;
1617 /* Check profitability for loop interchange. */
1618 if (should_interchange_loops (i_idx, o_idx, datarefs,
1619 (unsigned) iloop.m_num_stmts,
1620 (unsigned) stmt_cost,
1621 iloop.m_loop->inner == NULL))
1623 if (dump_file && (dump_flags & TDF_DETAILS))
1624 fprintf (dump_file,
1625 "Loop_pair<outer:%d, inner:%d> is interchanged\n\n",
1626 oloop.m_loop->num, iloop.m_loop->num);
1628 changed_p = true;
1629 interchange_loops (iloop, oloop);
1630 /* No need to update if there is no further loop interchange. */
1631 if (o_idx > 0)
1632 update_data_info (i_idx, o_idx, datarefs, ddrs);
1634 else
1636 if (dump_file && (dump_flags & TDF_DETAILS))
1637 fprintf (dump_file,
1638 "Loop_pair<outer:%d, inner:%d> is not interchanged\n\n",
1639 oloop.m_loop->num, iloop.m_loop->num);
1642 simple_dce_from_worklist (m_dce_seeds);
1644 if (changed_p)
1645 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
1646 "loops interchanged in loop nest\n");
1648 return changed_p;
1652 /* Loop interchange pass. */
1654 namespace {
1656 const pass_data pass_data_linterchange =
1658 GIMPLE_PASS, /* type */
1659 "linterchange", /* name */
1660 OPTGROUP_LOOP, /* optinfo_flags */
1661 TV_LINTERCHANGE, /* tv_id */
1662 PROP_cfg, /* properties_required */
1663 0, /* properties_provided */
1664 0, /* properties_destroyed */
1665 0, /* todo_flags_start */
1666 0, /* todo_flags_finish */
1669 class pass_linterchange : public gimple_opt_pass
1671 public:
1672 pass_linterchange (gcc::context *ctxt)
1673 : gimple_opt_pass (pass_data_linterchange, ctxt)
1676 /* opt_pass methods: */
1677 opt_pass * clone () { return new pass_linterchange (m_ctxt); }
1678 virtual bool gate (function *) { return flag_loop_interchange; }
1679 virtual unsigned int execute (function *);
1681 }; // class pass_linterchange
1684 /* Return true if LOOP has proper form for interchange. We check three
1685 conditions in the function:
1686 1) In general, a loop can be interchanged only if it doesn't have
1687 basic blocks other than header, exit and latch besides possible
1688 inner loop nest. This basically restricts loop interchange to
1689 below form loop nests:
1691 header<---+
1694 INNER_LOOP |
1697 exit--->latch
1699 2) Data reference in basic block that executes in different times
1700 than loop head/exit is not allowed.
1701 3) Record the innermost outer loop that doesn't form rectangle loop
1702 nest with LOOP. */
1704 static bool
1705 proper_loop_form_for_interchange (struct loop *loop, struct loop **min_outer)
1707 edge e0, e1, exit;
1709 /* Don't interchange if loop has unsupported information for the moment. */
1710 if (loop->safelen > 0
1711 || loop->constraints != 0
1712 || loop->can_be_parallel
1713 || loop->dont_vectorize
1714 || loop->force_vectorize
1715 || loop->in_oacc_kernels_region
1716 || loop->orig_loop_num != 0
1717 || loop->simduid != NULL_TREE)
1718 return false;
1720 /* Don't interchange if outer loop has basic block other than header, exit
1721 and latch. */
1722 if (loop->inner != NULL
1723 && loop->num_nodes != loop->inner->num_nodes + 3)
1724 return false;
1726 if ((exit = single_dom_exit (loop)) == NULL)
1727 return false;
1729 /* Check control flow on loop header/exit blocks. */
1730 if (loop->header == exit->src
1731 && (EDGE_COUNT (loop->header->preds) != 2
1732 || EDGE_COUNT (loop->header->succs) != 2))
1733 return false;
1734 else if (loop->header != exit->src
1735 && (EDGE_COUNT (loop->header->preds) != 2
1736 || !single_succ_p (loop->header)
1737 || unsupported_edge (single_succ_edge (loop->header))
1738 || EDGE_COUNT (exit->src->succs) != 2
1739 || !single_pred_p (exit->src)
1740 || unsupported_edge (single_pred_edge (exit->src))))
1741 return false;
1743 e0 = EDGE_PRED (loop->header, 0);
1744 e1 = EDGE_PRED (loop->header, 1);
1745 if (unsupported_edge (e0) || unsupported_edge (e1)
1746 || (e0->src != loop->latch && e1->src != loop->latch)
1747 || (e0->src->loop_father == loop && e1->src->loop_father == loop))
1748 return false;
1750 e0 = EDGE_SUCC (exit->src, 0);
1751 e1 = EDGE_SUCC (exit->src, 1);
1752 if (unsupported_edge (e0) || unsupported_edge (e1)
1753 || (e0->dest != loop->latch && e1->dest != loop->latch)
1754 || (e0->dest->loop_father == loop && e1->dest->loop_father == loop))
1755 return false;
1757 /* Don't interchange if any reference is in basic block that doesn't
1758 dominate exit block. */
1759 basic_block *bbs = get_loop_body (loop);
1760 for (unsigned i = 0; i < loop->num_nodes; i++)
1762 basic_block bb = bbs[i];
1764 if (bb->loop_father != loop
1765 || bb == loop->header || bb == exit->src
1766 || dominated_by_p (CDI_DOMINATORS, exit->src, bb))
1767 continue;
1769 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
1770 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
1771 if (gimple_vuse (gsi_stmt (gsi)))
1773 free (bbs);
1774 return false;
1777 free (bbs);
1779 tree niters = number_of_latch_executions (loop);
1780 niters = analyze_scalar_evolution (loop_outer (loop), niters);
1781 if (!niters || chrec_contains_undetermined (niters))
1782 return false;
1784 /* Record the innermost outer loop that doesn't form rectangle loop nest. */
1785 for (loop_p loop2 = loop_outer (loop);
1786 loop2 && flow_loop_nested_p (*min_outer, loop2);
1787 loop2 = loop_outer (loop2))
1789 niters = instantiate_scev (loop_preheader_edge (loop2),
1790 loop_outer (loop), niters);
1791 if (!evolution_function_is_invariant_p (niters, loop2->num))
1793 *min_outer = loop2;
1794 break;
1797 return true;
1800 /* Return true if any two adjacent loops in loop nest [INNERMOST, LOOP_NEST]
1801 should be interchanged by looking into all DATAREFS. */
1803 static bool
1804 should_interchange_loop_nest (struct loop *loop_nest, struct loop *innermost,
1805 vec<data_reference_p> datarefs)
1807 unsigned idx = loop_depth (innermost) - loop_depth (loop_nest);
1808 gcc_assert (idx > 0);
1810 /* Check if any two adjacent loops should be interchanged. */
1811 for (struct loop *loop = innermost;
1812 loop != loop_nest; loop = loop_outer (loop), idx--)
1813 if (should_interchange_loops (idx, idx - 1, datarefs, 0, 0,
1814 loop == innermost, false))
1815 return true;
1817 return false;
1820 /* Given loop nest LOOP_NEST and data references DATAREFS, compute data
1821 dependences for loop interchange and store it in DDRS. Note we compute
1822 dependences directly rather than call generic interface so that we can
1823 return on unknown dependence instantly. */
1825 static bool
1826 tree_loop_interchange_compute_ddrs (vec<loop_p> loop_nest,
1827 vec<data_reference_p> datarefs,
1828 vec<ddr_p> *ddrs)
1830 struct data_reference *a, *b;
1831 struct loop *innermost = loop_nest.last ();
1833 for (unsigned i = 0; datarefs.iterate (i, &a); ++i)
1835 bool a_outer_p = gimple_bb (DR_STMT (a))->loop_father != innermost;
1836 for (unsigned j = i + 1; datarefs.iterate (j, &b); ++j)
1837 if (DR_IS_WRITE (a) || DR_IS_WRITE (b))
1839 bool b_outer_p = gimple_bb (DR_STMT (b))->loop_father != innermost;
1840 /* Don't support multiple write references in outer loop. */
1841 if (a_outer_p && b_outer_p && DR_IS_WRITE (a) && DR_IS_WRITE (b))
1842 return false;
1844 ddr_p ddr = initialize_data_dependence_relation (a, b, loop_nest);
1845 ddrs->safe_push (ddr);
1846 compute_affine_dependence (ddr, loop_nest[0]);
1848 /* Give up if ddr is unknown dependence or classic direct vector
1849 is not available. */
1850 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
1851 || (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
1852 && DDR_NUM_DIR_VECTS (ddr) == 0))
1853 return false;
1855 /* If either data references is in outer loop of nest, we require
1856 no dependence here because the data reference need to be moved
1857 into inner loop during interchange. */
1858 if (a_outer_p && b_outer_p
1859 && operand_equal_p (DR_REF (a), DR_REF (b), 0))
1860 continue;
1861 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1862 && (a_outer_p || b_outer_p))
1863 return false;
1867 return true;
1870 /* Prune DATAREFS by removing any data reference not inside of LOOP. */
1872 static inline void
1873 prune_datarefs_not_in_loop (struct loop *loop, vec<data_reference_p> datarefs)
1875 unsigned i, j;
1876 struct data_reference *dr;
1878 for (i = 0, j = 0; datarefs.iterate (i, &dr); ++i)
1880 if (flow_bb_inside_loop_p (loop, gimple_bb (DR_STMT (dr))))
1881 datarefs[j++] = dr;
1882 else
1884 if (dr->aux)
1886 DR_ACCESS_STRIDE (dr)->release ();
1887 delete (vec<tree> *) dr->aux;
1889 free_data_ref (dr);
1892 datarefs.truncate (j);
1895 /* Find and store data references in DATAREFS for LOOP nest. If there's
1896 difficult data reference in a basic block, we shrink the loop nest to
1897 inner loop of that basic block's father loop. On success, return the
1898 outer loop of the result loop nest. */
1900 static struct loop *
1901 prepare_data_references (struct loop *loop, vec<data_reference_p> *datarefs)
1903 struct loop *loop_nest = loop;
1904 vec<data_reference_p> *bb_refs;
1905 basic_block bb, *bbs = get_loop_body_in_dom_order (loop);
1907 for (unsigned i = 0; i < loop->num_nodes; i++)
1908 bbs[i]->aux = NULL;
1910 /* Find data references for all basic blocks. Shrink loop nest on difficult
1911 data reference. */
1912 for (unsigned i = 0; loop_nest && i < loop->num_nodes; ++i)
1914 bb = bbs[i];
1915 if (!flow_bb_inside_loop_p (loop_nest, bb))
1916 continue;
1918 bb_refs = new vec<data_reference_p> ();
1919 if (find_data_references_in_bb (loop, bb, bb_refs) == chrec_dont_know)
1921 loop_nest = bb->loop_father->inner;
1922 if (loop_nest && !loop_nest->inner)
1923 loop_nest = NULL;
1925 free_data_refs (*bb_refs);
1926 delete bb_refs;
1928 else if (bb_refs->is_empty ())
1929 delete bb_refs;
1930 else
1931 bb->aux = bb_refs;
1934 /* Collect all data references in loop nest. */
1935 for (unsigned i = 0; i < loop->num_nodes; i++)
1937 bb = bbs[i];
1938 if (!bb->aux)
1939 continue;
1941 bb_refs = (vec<data_reference_p> *) bb->aux;
1942 if (loop_nest && flow_bb_inside_loop_p (loop_nest, bb))
1943 datarefs->safe_splice (*bb_refs);
1944 else
1945 free_data_refs (*bb_refs);
1947 delete bb_refs;
1948 bb->aux = NULL;
1950 free (bbs);
1952 return loop_nest;
1955 /* Given innermost LOOP, return true if perfect loop nest can be found and
1956 data dependences can be computed. If succeed, record the perfect loop
1957 nest in LOOP_NEST; record all data references in DATAREFS and record all
1958 data dependence relations in DDRS.
1960 We do support a restricted form of imperfect loop nest, i.e, loop nest
1961 with load/store in outer loop initializing/finalizing simple reduction
1962 of the innermost loop. For such outer loop reference, we require that
1963 it has no dependence with others sinve it will be moved to inner loop
1964 in interchange. */
1966 static bool
1967 prepare_perfect_loop_nest (struct loop *loop, vec<loop_p> *loop_nest,
1968 vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
1970 struct loop *start_loop = NULL, *innermost = loop;
1971 struct loop *outermost = loops_for_fn (cfun)->tree_root;
1973 /* Find loop nest from the innermost loop. The outermost is the innermost
1974 outer*/
1975 while (loop->num != 0 && loop->inner == start_loop
1976 && flow_loop_nested_p (outermost, loop))
1978 if (!proper_loop_form_for_interchange (loop, &outermost))
1979 break;
1981 start_loop = loop;
1982 /* If this loop has sibling loop, the father loop won't be in perfect
1983 loop nest. */
1984 if (loop->next != NULL)
1985 break;
1987 loop = loop_outer (loop);
1989 if (!start_loop || !start_loop->inner)
1990 return false;
1992 /* Prepare the data reference vector for the loop nest, pruning outer
1993 loops we cannot handle. */
1994 start_loop = prepare_data_references (start_loop, datarefs);
1995 if (!start_loop
1996 /* Check if there is no data reference. */
1997 || datarefs->is_empty ()
1998 /* Check if there are too many of data references. */
1999 || (int) datarefs->length () > MAX_DATAREFS)
2000 return false;
2002 /* Compute access strides for all data references, pruning outer
2003 loops we cannot analyze refs in. */
2004 start_loop = compute_access_strides (start_loop, innermost, *datarefs);
2005 if (!start_loop)
2006 return false;
2008 /* Check if any interchange is profitable in the loop nest. */
2009 if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
2010 return false;
2012 /* Check if data dependences can be computed for loop nest starting from
2013 start_loop. */
2014 loop = start_loop;
2015 do {
2016 loop_nest->truncate (0);
2018 if (loop != start_loop)
2019 prune_datarefs_not_in_loop (start_loop, *datarefs);
2021 if (find_loop_nest (start_loop, loop_nest)
2022 && tree_loop_interchange_compute_ddrs (*loop_nest, *datarefs, ddrs))
2024 if (dump_file && (dump_flags & TDF_DETAILS))
2025 fprintf (dump_file,
2026 "\nConsider loop interchange for loop_nest<%d - %d>\n",
2027 start_loop->num, innermost->num);
2029 if (loop != start_loop)
2030 prune_access_strides_not_in_loop (start_loop, innermost, *datarefs);
2032 if (dump_file && (dump_flags & TDF_DETAILS))
2033 dump_access_strides (*datarefs);
2035 return true;
2038 free_dependence_relations (*ddrs);
2039 *ddrs = vNULL;
2040 /* Try to compute data dependences with the outermost loop stripped. */
2041 loop = start_loop;
2042 start_loop = start_loop->inner;
2043 } while (start_loop && start_loop->inner);
2045 return false;
2048 /* Main entry for loop interchange pass. */
2050 unsigned int
2051 pass_linterchange::execute (function *fun)
2053 if (number_of_loops (fun) <= 2)
2054 return 0;
2056 bool changed_p = false;
2057 struct loop *loop;
2058 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2060 vec<loop_p> loop_nest = vNULL;
2061 vec<data_reference_p> datarefs = vNULL;
2062 vec<ddr_p> ddrs = vNULL;
2063 if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
2065 tree_loop_interchange loop_interchange (loop_nest);
2066 changed_p |= loop_interchange.interchange (datarefs, ddrs);
2068 free_dependence_relations (ddrs);
2069 free_data_refs_with_aux (datarefs);
2070 loop_nest.release ();
2073 if (changed_p)
2074 scev_reset_htab ();
2076 return changed_p ? (TODO_update_ssa_only_virtuals) : 0;
2079 } // anon namespace
2081 gimple_opt_pass *
2082 make_pass_linterchange (gcc::context *ctxt)
2084 return new pass_linterchange (ctxt);