[AArch64] Properly handle mvn-register and add EON+shift pattern and cost appropriately
[official-gcc.git] / gcc / loop-init.c
blob87e58ea677d7b19d1cba851bfc9d7ad8574505fc
1 /* Loop optimizer initialization routines and RTL loop optimization passes.
2 Copyright (C) 2002-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "hash-set.h"
26 #include "machmode.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "regs.h"
36 #include "obstack.h"
37 #include "predict.h"
38 #include "hard-reg-set.h"
39 #include "input.h"
40 #include "function.h"
41 #include "dominance.h"
42 #include "cfg.h"
43 #include "cfgcleanup.h"
44 #include "basic-block.h"
45 #include "cfgloop.h"
46 #include "tree-pass.h"
47 #include "flags.h"
48 #include "df.h"
49 #include "ggc.h"
50 #include "tree-ssa-loop-niter.h"
51 #include "loop-unroll.h"
52 #include "tree-scalar-evolution.h"
55 /* Apply FLAGS to the loop state. */
57 static void
58 apply_loop_flags (unsigned flags)
60 if (flags & LOOPS_MAY_HAVE_MULTIPLE_LATCHES)
62 /* If the loops may have multiple latches, we cannot canonicalize
63 them further (and most of the loop manipulation functions will
64 not work). However, we avoid modifying cfg, which some
65 passes may want. */
66 gcc_assert ((flags & ~(LOOPS_MAY_HAVE_MULTIPLE_LATCHES
67 | LOOPS_HAVE_RECORDED_EXITS)) == 0);
68 loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
70 else
71 disambiguate_loops_with_multiple_latches ();
73 /* Create pre-headers. */
74 if (flags & LOOPS_HAVE_PREHEADERS)
76 int cp_flags = CP_SIMPLE_PREHEADERS;
78 if (flags & LOOPS_HAVE_FALLTHRU_PREHEADERS)
79 cp_flags |= CP_FALLTHRU_PREHEADERS;
81 create_preheaders (cp_flags);
84 /* Force all latches to have only single successor. */
85 if (flags & LOOPS_HAVE_SIMPLE_LATCHES)
86 force_single_succ_latches ();
88 /* Mark irreducible loops. */
89 if (flags & LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)
90 mark_irreducible_loops ();
92 if (flags & LOOPS_HAVE_RECORDED_EXITS)
93 record_loop_exits ();
96 /* Initialize loop structures. This is used by the tree and RTL loop
97 optimizers. FLAGS specify what properties to compute and/or ensure for
98 loops. */
100 void
101 loop_optimizer_init (unsigned flags)
103 timevar_push (TV_LOOP_INIT);
105 if (!current_loops)
107 gcc_assert (!(cfun->curr_properties & PROP_loops));
109 /* Find the loops. */
110 current_loops = flow_loops_find (NULL);
112 else
114 bool recorded_exits = loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS);
115 bool needs_fixup = loops_state_satisfies_p (LOOPS_NEED_FIXUP);
117 gcc_assert (cfun->curr_properties & PROP_loops);
119 /* Ensure that the dominators are computed, like flow_loops_find does. */
120 calculate_dominance_info (CDI_DOMINATORS);
122 #ifdef ENABLE_CHECKING
123 if (!needs_fixup)
124 verify_loop_structure ();
125 #endif
127 /* Clear all flags. */
128 if (recorded_exits)
129 release_recorded_exits ();
130 loops_state_clear (~0U);
132 if (needs_fixup)
134 /* Apply LOOPS_MAY_HAVE_MULTIPLE_LATCHES early as fix_loop_structure
135 re-applies flags. */
136 loops_state_set (flags & LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
137 fix_loop_structure (NULL);
141 /* Apply flags to loops. */
142 apply_loop_flags (flags);
144 /* Dump loops. */
145 flow_loops_dump (dump_file, NULL, 1);
147 #ifdef ENABLE_CHECKING
148 verify_loop_structure ();
149 #endif
151 timevar_pop (TV_LOOP_INIT);
154 /* Finalize loop structures. */
156 void
157 loop_optimizer_finalize (void)
159 struct loop *loop;
160 basic_block bb;
162 timevar_push (TV_LOOP_FINI);
164 if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS))
165 release_recorded_exits ();
167 free_numbers_of_iterations_estimates ();
169 /* If we should preserve loop structure, do not free it but clear
170 flags that advanced properties are there as we are not preserving
171 that in full. */
172 if (cfun->curr_properties & PROP_loops)
174 loops_state_clear (LOOP_CLOSED_SSA
175 | LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
176 | LOOPS_HAVE_PREHEADERS
177 | LOOPS_HAVE_SIMPLE_LATCHES
178 | LOOPS_HAVE_FALLTHRU_PREHEADERS);
179 loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
180 goto loop_fini_done;
183 gcc_assert (current_loops != NULL);
185 FOR_EACH_LOOP (loop, 0)
186 free_simple_loop_desc (loop);
188 /* Clean up. */
189 flow_loops_free (current_loops);
190 ggc_free (current_loops);
191 current_loops = NULL;
193 FOR_ALL_BB_FN (bb, cfun)
195 bb->loop_father = NULL;
198 loop_fini_done:
199 timevar_pop (TV_LOOP_FINI);
202 /* The structure of loops might have changed. Some loops might get removed
203 (and their headers and latches were set to NULL), loop exists might get
204 removed (thus the loop nesting may be wrong), and some blocks and edges
205 were changed (so the information about bb --> loop mapping does not have
206 to be correct). But still for the remaining loops the header dominates
207 the latch, and loops did not get new subloops (new loops might possibly
208 get created, but we are not interested in them). Fix up the mess.
210 If CHANGED_BBS is not NULL, basic blocks whose loop depth has changed are
211 marked in it.
213 Returns the number of new discovered loops. */
215 unsigned
216 fix_loop_structure (bitmap changed_bbs)
218 basic_block bb;
219 int record_exits = 0;
220 struct loop *loop;
221 unsigned old_nloops, i;
223 timevar_push (TV_LOOP_INIT);
225 if (dump_file && (dump_flags & TDF_DETAILS))
226 fprintf (dump_file, "fix_loop_structure: fixing up loops for function\n");
228 /* We need exact and fast dominance info to be available. */
229 gcc_assert (dom_info_state (CDI_DOMINATORS) == DOM_OK);
231 if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS))
233 release_recorded_exits ();
234 record_exits = LOOPS_HAVE_RECORDED_EXITS;
237 /* Remember the depth of the blocks in the loop hierarchy, so that we can
238 recognize blocks whose loop nesting relationship has changed. */
239 if (changed_bbs)
240 FOR_EACH_BB_FN (bb, cfun)
241 bb->aux = (void *) (size_t) loop_depth (bb->loop_father);
243 /* Remove the dead loops from structures. We start from the innermost
244 loops, so that when we remove the loops, we know that the loops inside
245 are preserved, and do not waste time relinking loops that will be
246 removed later. */
247 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
249 /* Detect the case that the loop is no longer present even though
250 it wasn't marked for removal.
251 ??? If we do that we can get away with not marking loops for
252 removal at all. And possibly avoid some spurious removals. */
253 if (loop->header
254 && bb_loop_header_p (loop->header))
255 continue;
257 if (dump_file && (dump_flags & TDF_DETAILS))
258 fprintf (dump_file, "fix_loop_structure: removing loop %d\n",
259 loop->num);
261 while (loop->inner)
263 struct loop *ploop = loop->inner;
264 flow_loop_tree_node_remove (ploop);
265 flow_loop_tree_node_add (loop_outer (loop), ploop);
268 /* Remove the loop. */
269 if (loop->header)
270 loop->former_header = loop->header;
271 else
272 gcc_assert (loop->former_header != NULL);
273 loop->header = NULL;
274 flow_loop_tree_node_remove (loop);
277 /* Remember the number of loops so we can return how many new loops
278 flow_loops_find discovered. */
279 old_nloops = number_of_loops (cfun);
281 /* Re-compute loop structure in-place. */
282 flow_loops_find (current_loops);
284 /* Mark the blocks whose loop has changed. */
285 if (changed_bbs)
287 FOR_EACH_BB_FN (bb, cfun)
289 if ((void *) (size_t) loop_depth (bb->loop_father) != bb->aux)
290 bitmap_set_bit (changed_bbs, bb->index);
292 bb->aux = NULL;
296 /* Finally free deleted loops. */
297 bool any_deleted = false;
298 FOR_EACH_VEC_ELT (*get_loops (cfun), i, loop)
299 if (loop && loop->header == NULL)
301 if (dump_file
302 && ((unsigned) loop->former_header->index
303 < basic_block_info_for_fn (cfun)->length ()))
305 basic_block former_header
306 = BASIC_BLOCK_FOR_FN (cfun, loop->former_header->index);
307 /* If the old header still exists we want to check if the
308 original loop is re-discovered or the old header is now
309 part of a newly discovered loop.
310 In both cases we should have avoided removing the loop. */
311 if (former_header == loop->former_header)
313 if (former_header->loop_father->header == former_header)
314 fprintf (dump_file, "fix_loop_structure: rediscovered "
315 "removed loop %d as loop %d with old header %d\n",
316 loop->num, former_header->loop_father->num,
317 former_header->index);
318 else if ((unsigned) former_header->loop_father->num
319 >= old_nloops)
320 fprintf (dump_file, "fix_loop_structure: header %d of "
321 "removed loop %d is part of the newly "
322 "discovered loop %d with header %d\n",
323 former_header->index, loop->num,
324 former_header->loop_father->num,
325 former_header->loop_father->header->index);
328 (*get_loops (cfun))[i] = NULL;
329 flow_loop_free (loop);
330 any_deleted = true;
333 /* If we deleted loops then the cached scalar evolutions refering to
334 those loops become invalid. */
335 if (any_deleted && scev_initialized_p ())
336 scev_reset_htab ();
338 loops_state_clear (LOOPS_NEED_FIXUP);
340 /* Apply flags to loops. */
341 apply_loop_flags (current_loops->state | record_exits);
343 #ifdef ENABLE_CHECKING
344 verify_loop_structure ();
345 #endif
347 timevar_pop (TV_LOOP_INIT);
349 return number_of_loops (cfun) - old_nloops;
352 /* The RTL loop superpass. The actual passes are subpasses. See passes.c for
353 more on that. */
355 namespace {
357 const pass_data pass_data_loop2 =
359 RTL_PASS, /* type */
360 "loop2", /* name */
361 OPTGROUP_LOOP, /* optinfo_flags */
362 TV_LOOP, /* tv_id */
363 0, /* properties_required */
364 0, /* properties_provided */
365 0, /* properties_destroyed */
366 0, /* todo_flags_start */
367 0, /* todo_flags_finish */
370 class pass_loop2 : public rtl_opt_pass
372 public:
373 pass_loop2 (gcc::context *ctxt)
374 : rtl_opt_pass (pass_data_loop2, ctxt)
377 /* opt_pass methods: */
378 virtual bool gate (function *);
380 }; // class pass_loop2
382 bool
383 pass_loop2::gate (function *fun)
385 if (optimize > 0
386 && (flag_move_loop_invariants
387 || flag_unswitch_loops
388 || flag_unroll_loops
389 #ifdef HAVE_doloop_end
390 || (flag_branch_on_count_reg && HAVE_doloop_end)
391 #endif
393 return true;
394 else
396 /* No longer preserve loops, remove them now. */
397 fun->curr_properties &= ~PROP_loops;
398 if (current_loops)
399 loop_optimizer_finalize ();
400 return false;
404 } // anon namespace
406 rtl_opt_pass *
407 make_pass_loop2 (gcc::context *ctxt)
409 return new pass_loop2 (ctxt);
413 /* Initialization of the RTL loop passes. */
414 static unsigned int
415 rtl_loop_init (void)
417 gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
419 if (dump_file)
421 dump_reg_info (dump_file);
422 dump_flow_info (dump_file, dump_flags);
425 loop_optimizer_init (LOOPS_NORMAL);
426 return 0;
429 namespace {
431 const pass_data pass_data_rtl_loop_init =
433 RTL_PASS, /* type */
434 "loop2_init", /* name */
435 OPTGROUP_LOOP, /* optinfo_flags */
436 TV_LOOP, /* tv_id */
437 0, /* properties_required */
438 0, /* properties_provided */
439 0, /* properties_destroyed */
440 0, /* todo_flags_start */
441 0, /* todo_flags_finish */
444 class pass_rtl_loop_init : public rtl_opt_pass
446 public:
447 pass_rtl_loop_init (gcc::context *ctxt)
448 : rtl_opt_pass (pass_data_rtl_loop_init, ctxt)
451 /* opt_pass methods: */
452 virtual unsigned int execute (function *) { return rtl_loop_init (); }
454 }; // class pass_rtl_loop_init
456 } // anon namespace
458 rtl_opt_pass *
459 make_pass_rtl_loop_init (gcc::context *ctxt)
461 return new pass_rtl_loop_init (ctxt);
465 /* Finalization of the RTL loop passes. */
467 namespace {
469 const pass_data pass_data_rtl_loop_done =
471 RTL_PASS, /* type */
472 "loop2_done", /* name */
473 OPTGROUP_LOOP, /* optinfo_flags */
474 TV_LOOP, /* tv_id */
475 0, /* properties_required */
476 0, /* properties_provided */
477 PROP_loops, /* properties_destroyed */
478 0, /* todo_flags_start */
479 0, /* todo_flags_finish */
482 class pass_rtl_loop_done : public rtl_opt_pass
484 public:
485 pass_rtl_loop_done (gcc::context *ctxt)
486 : rtl_opt_pass (pass_data_rtl_loop_done, ctxt)
489 /* opt_pass methods: */
490 virtual unsigned int execute (function *);
492 }; // class pass_rtl_loop_done
494 unsigned int
495 pass_rtl_loop_done::execute (function *fun)
497 /* No longer preserve loops, remove them now. */
498 fun->curr_properties &= ~PROP_loops;
499 loop_optimizer_finalize ();
500 free_dominance_info (CDI_DOMINATORS);
502 cleanup_cfg (0);
503 if (dump_file)
505 dump_reg_info (dump_file);
506 dump_flow_info (dump_file, dump_flags);
509 return 0;
512 } // anon namespace
514 rtl_opt_pass *
515 make_pass_rtl_loop_done (gcc::context *ctxt)
517 return new pass_rtl_loop_done (ctxt);
521 /* Loop invariant code motion. */
523 namespace {
525 const pass_data pass_data_rtl_move_loop_invariants =
527 RTL_PASS, /* type */
528 "loop2_invariant", /* name */
529 OPTGROUP_LOOP, /* optinfo_flags */
530 TV_LOOP_MOVE_INVARIANTS, /* tv_id */
531 0, /* properties_required */
532 0, /* properties_provided */
533 0, /* properties_destroyed */
534 0, /* todo_flags_start */
535 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
538 class pass_rtl_move_loop_invariants : public rtl_opt_pass
540 public:
541 pass_rtl_move_loop_invariants (gcc::context *ctxt)
542 : rtl_opt_pass (pass_data_rtl_move_loop_invariants, ctxt)
545 /* opt_pass methods: */
546 virtual bool gate (function *) { return flag_move_loop_invariants; }
547 virtual unsigned int execute (function *fun)
549 if (number_of_loops (fun) > 1)
550 move_loop_invariants ();
551 return 0;
554 }; // class pass_rtl_move_loop_invariants
556 } // anon namespace
558 rtl_opt_pass *
559 make_pass_rtl_move_loop_invariants (gcc::context *ctxt)
561 return new pass_rtl_move_loop_invariants (ctxt);
565 namespace {
567 const pass_data pass_data_rtl_unroll_loops =
569 RTL_PASS, /* type */
570 "loop2_unroll", /* name */
571 OPTGROUP_LOOP, /* optinfo_flags */
572 TV_LOOP_UNROLL, /* tv_id */
573 0, /* properties_required */
574 0, /* properties_provided */
575 0, /* properties_destroyed */
576 0, /* todo_flags_start */
577 0, /* todo_flags_finish */
580 class pass_rtl_unroll_loops : public rtl_opt_pass
582 public:
583 pass_rtl_unroll_loops (gcc::context *ctxt)
584 : rtl_opt_pass (pass_data_rtl_unroll_loops, ctxt)
587 /* opt_pass methods: */
588 virtual bool gate (function *)
590 return (flag_peel_loops || flag_unroll_loops || flag_unroll_all_loops);
593 virtual unsigned int execute (function *);
595 }; // class pass_rtl_unroll_loops
597 unsigned int
598 pass_rtl_unroll_loops::execute (function *fun)
600 if (number_of_loops (fun) > 1)
602 int flags = 0;
603 if (dump_file)
604 df_dump (dump_file);
606 if (flag_unroll_loops)
607 flags |= UAP_UNROLL;
608 if (flag_unroll_all_loops)
609 flags |= UAP_UNROLL_ALL;
611 unroll_loops (flags);
613 return 0;
616 } // anon namespace
618 rtl_opt_pass *
619 make_pass_rtl_unroll_loops (gcc::context *ctxt)
621 return new pass_rtl_unroll_loops (ctxt);
625 namespace {
627 const pass_data pass_data_rtl_doloop =
629 RTL_PASS, /* type */
630 "loop2_doloop", /* name */
631 OPTGROUP_LOOP, /* optinfo_flags */
632 TV_LOOP_DOLOOP, /* tv_id */
633 0, /* properties_required */
634 0, /* properties_provided */
635 0, /* properties_destroyed */
636 0, /* todo_flags_start */
637 0, /* todo_flags_finish */
640 class pass_rtl_doloop : public rtl_opt_pass
642 public:
643 pass_rtl_doloop (gcc::context *ctxt)
644 : rtl_opt_pass (pass_data_rtl_doloop, ctxt)
647 /* opt_pass methods: */
648 virtual bool gate (function *);
649 virtual unsigned int execute (function *);
651 }; // class pass_rtl_doloop
653 bool
654 pass_rtl_doloop::gate (function *)
656 #ifdef HAVE_doloop_end
657 return (flag_branch_on_count_reg && HAVE_doloop_end);
658 #else
659 return false;
660 #endif
663 unsigned int
664 pass_rtl_doloop::execute (function *fun ATTRIBUTE_UNUSED)
666 #ifdef HAVE_doloop_end
667 if (number_of_loops (fun) > 1)
668 doloop_optimize_loops ();
669 #endif
670 return 0;
673 } // anon namespace
675 rtl_opt_pass *
676 make_pass_rtl_doloop (gcc::context *ctxt)
678 return new pass_rtl_doloop (ctxt);