* tree-ssa-phiopt.c, config/arm/arm.c, config/fr30/fr30.md,
[official-gcc.git] / gcc / tree-vectorizer.c
blob13b2281e1747834511c420db24e9f8eee27cbbd9
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "errors.h"
128 #include "ggc.h"
129 #include "tree.h"
130 #include "target.h"
131 #include "rtl.h"
132 #include "basic-block.h"
133 #include "diagnostic.h"
134 #include "tree-flow.h"
135 #include "tree-dump.h"
136 #include "timevar.h"
137 #include "cfgloop.h"
138 #include "cfglayout.h"
139 #include "expr.h"
140 #include "optabs.h"
141 #include "toplev.h"
142 #include "tree-chrec.h"
143 #include "tree-data-ref.h"
144 #include "tree-scalar-evolution.h"
145 #include "input.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
149 /*************************************************************************
150 Simple Loop Peeling Utilities
151 *************************************************************************/
152 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
153 (struct loop *, struct loops *, edge);
154 static void slpeel_update_phis_for_duplicate_loop
155 (struct loop *, struct loop *, bool after);
156 static void slpeel_update_phi_nodes_for_guard1
157 (edge, struct loop *, bool, basic_block *, bitmap *);
158 static void slpeel_update_phi_nodes_for_guard2
159 (edge, struct loop *, bool, basic_block *);
160 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
162 static void allocate_new_names (bitmap);
163 static void rename_use_op (use_operand_p);
164 static void rename_def_op (def_operand_p, tree);
165 static void rename_variables_in_bb (basic_block);
166 static void free_new_names (bitmap);
167 static void rename_variables_in_loop (struct loop *);
169 /*************************************************************************
170 General Vectorization Utilities
171 *************************************************************************/
172 static void vect_set_dump_settings (void);
174 /* vect_dump will be set to stderr or dump_file if exist. */
175 FILE *vect_dump;
177 /* vect_verbosity_level set to an invalid value
178 to mark that it's uninitialized. */
179 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
181 /* Number of loops, at the beginning of vectorization. */
182 unsigned int vect_loops_num;
184 /*************************************************************************
185 Simple Loop Peeling Utilities
187 Utilities to support loop peeling for vectorization purposes.
188 *************************************************************************/
191 /* For each definition in DEFINITIONS this function allocates
192 new ssa name. */
194 static void
195 allocate_new_names (bitmap definitions)
197 unsigned ver;
198 bitmap_iterator bi;
200 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
202 tree def = ssa_name (ver);
203 tree *new_name_ptr = xmalloc (sizeof (tree));
205 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
207 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
208 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
210 SSA_NAME_AUX (def) = new_name_ptr;
215 /* Renames the use *OP_P. */
217 static void
218 rename_use_op (use_operand_p op_p)
220 tree *new_name_ptr;
222 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
223 return;
225 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
227 /* Something defined outside of the loop. */
228 if (!new_name_ptr)
229 return;
231 /* An ordinary ssa name defined in the loop. */
233 SET_USE (op_p, *new_name_ptr);
237 /* Renames the def *OP_P in statement STMT. */
239 static void
240 rename_def_op (def_operand_p op_p, tree stmt)
242 tree *new_name_ptr;
244 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
245 return;
247 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
249 /* Something defined outside of the loop. */
250 if (!new_name_ptr)
251 return;
253 /* An ordinary ssa name defined in the loop. */
255 SET_DEF (op_p, *new_name_ptr);
256 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
260 /* Renames the variables in basic block BB. */
262 static void
263 rename_variables_in_bb (basic_block bb)
265 tree phi;
266 block_stmt_iterator bsi;
267 tree stmt;
268 stmt_ann_t ann;
269 use_optype uses;
270 vuse_optype vuses;
271 def_optype defs;
272 v_may_def_optype v_may_defs;
273 v_must_def_optype v_must_defs;
274 unsigned i;
275 edge e;
276 edge_iterator ei;
277 struct loop *loop = bb->loop_father;
279 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
280 rename_def_op (PHI_RESULT_PTR (phi), phi);
282 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
284 stmt = bsi_stmt (bsi);
285 ann = stmt_ann (stmt);
287 uses = USE_OPS (ann);
288 for (i = 0; i < NUM_USES (uses); i++)
289 rename_use_op (USE_OP_PTR (uses, i));
291 defs = DEF_OPS (ann);
292 for (i = 0; i < NUM_DEFS (defs); i++)
293 rename_def_op (DEF_OP_PTR (defs, i), stmt);
295 vuses = VUSE_OPS (ann);
296 for (i = 0; i < NUM_VUSES (vuses); i++)
297 rename_use_op (VUSE_OP_PTR (vuses, i));
299 v_may_defs = V_MAY_DEF_OPS (ann);
300 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
302 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
303 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
306 v_must_defs = V_MUST_DEF_OPS (ann);
307 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
309 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
310 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
314 FOR_EACH_EDGE (e, ei, bb->succs)
316 if (!flow_bb_inside_loop_p (loop, e->dest))
317 continue;
318 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
319 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
324 /* Releases the structures holding the new ssa names. */
326 static void
327 free_new_names (bitmap definitions)
329 unsigned ver;
330 bitmap_iterator bi;
332 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
334 tree def = ssa_name (ver);
336 if (SSA_NAME_AUX (def))
338 free (SSA_NAME_AUX (def));
339 SSA_NAME_AUX (def) = NULL;
345 /* Renames variables in new generated LOOP. */
347 static void
348 rename_variables_in_loop (struct loop *loop)
350 unsigned i;
351 basic_block *bbs;
353 bbs = get_loop_body (loop);
355 for (i = 0; i < loop->num_nodes; i++)
356 rename_variables_in_bb (bbs[i]);
358 free (bbs);
362 /* Update the PHI nodes of NEW_LOOP.
364 NEW_LOOP is a duplicate of ORIG_LOOP.
365 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
366 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
367 executes before it. */
369 static void
370 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
371 struct loop *new_loop, bool after)
373 tree *new_name_ptr, new_ssa_name;
374 tree phi_new, phi_orig;
375 tree def;
376 edge orig_loop_latch = loop_latch_edge (orig_loop);
377 edge orig_entry_e = loop_preheader_edge (orig_loop);
378 edge new_loop_exit_e = new_loop->single_exit;
379 edge new_loop_entry_e = loop_preheader_edge (new_loop);
380 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
383 step 1. For each loop-header-phi:
384 Add the first phi argument for the phi in NEW_LOOP
385 (the one associated with the entry of NEW_LOOP)
387 step 2. For each loop-header-phi:
388 Add the second phi argument for the phi in NEW_LOOP
389 (the one associated with the latch of NEW_LOOP)
391 step 3. Update the phis in the successor block of NEW_LOOP.
393 case 1: NEW_LOOP was placed before ORIG_LOOP:
394 The successor block of NEW_LOOP is the header of ORIG_LOOP.
395 Updating the phis in the successor block can therefore be done
396 along with the scanning of the loop header phis, because the
397 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
398 phi nodes, organized in the same order.
400 case 2: NEW_LOOP was placed after ORIG_LOOP:
401 The successor block of NEW_LOOP is the original exit block of
402 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
403 We postpone updating these phis to a later stage (when
404 loop guards are added).
408 /* Scan the phis in the headers of the old and new loops
409 (they are organized in exactly the same order). */
411 for (phi_new = phi_nodes (new_loop->header),
412 phi_orig = phi_nodes (orig_loop->header);
413 phi_new && phi_orig;
414 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
416 /* step 1. */
417 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
418 add_phi_arg (phi_new, def, new_loop_entry_e);
420 /* step 2. */
421 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
422 if (TREE_CODE (def) != SSA_NAME)
423 continue;
425 new_name_ptr = SSA_NAME_AUX (def);
426 if (!new_name_ptr)
427 /* Something defined outside of the loop. */
428 continue;
430 /* An ordinary ssa name defined in the loop. */
431 new_ssa_name = *new_name_ptr;
432 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
434 /* step 3 (case 1). */
435 if (!after)
437 gcc_assert (new_loop_exit_e == orig_entry_e);
438 SET_PHI_ARG_DEF (phi_orig,
439 new_loop_exit_e->dest_idx,
440 new_ssa_name);
446 /* Update PHI nodes for a guard of the LOOP.
448 Input:
449 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
450 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
451 originates from the guard-bb, skips LOOP and reaches the (unique) exit
452 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
453 We denote this bb NEW_MERGE_BB because before the guard code was added
454 it had a single predecessor (the LOOP header), and now it became a merge
455 point of two paths - the path that ends with the LOOP exit-edge, and
456 the path that ends with GUARD_EDGE.
457 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
458 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
460 ===> The CFG before the guard-code was added:
461 LOOP_header_bb:
462 loop_body
463 if (exit_loop) goto update_bb
464 else goto LOOP_header_bb
465 update_bb:
467 ==> The CFG after the guard-code was added:
468 guard_bb:
469 if (LOOP_guard_condition) goto new_merge_bb
470 else goto LOOP_header_bb
471 LOOP_header_bb:
472 loop_body
473 if (exit_loop_condition) goto new_merge_bb
474 else goto LOOP_header_bb
475 new_merge_bb:
476 goto update_bb
477 update_bb:
479 ==> The CFG after this function:
480 guard_bb:
481 if (LOOP_guard_condition) goto new_merge_bb
482 else goto LOOP_header_bb
483 LOOP_header_bb:
484 loop_body
485 if (exit_loop_condition) goto new_exit_bb
486 else goto LOOP_header_bb
487 new_exit_bb:
488 new_merge_bb:
489 goto update_bb
490 update_bb:
492 This function:
493 1. creates and updates the relevant phi nodes to account for the new
494 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
495 1.1. Create phi nodes at NEW_MERGE_BB.
496 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
497 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
498 2. preserves loop-closed-ssa-form by creating the required phi nodes
499 at the exit of LOOP (i.e, in NEW_EXIT_BB).
501 There are two flavors to this function:
503 slpeel_update_phi_nodes_for_guard1:
504 Here the guard controls whether we enter or skip LOOP, where LOOP is a
505 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
506 for variables that have phis in the loop header.
508 slpeel_update_phi_nodes_for_guard2:
509 Here the guard controls whether we enter or skip LOOP, where LOOP is an
510 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
511 for variables that have phis in the loop exit.
513 I.E., the overall structure is:
515 loop1_preheader_bb:
516 guard1 (goto loop1/merg1_bb)
517 loop1
518 loop1_exit_bb:
519 guard2 (goto merge1_bb/merge2_bb)
520 merge1_bb
521 loop2
522 loop2_exit_bb
523 merge2_bb
524 next_bb
526 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
527 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
528 that have phis in loop1->header).
530 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
531 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
532 that have phis in next_bb). It also adds some of these phis to
533 loop1_exit_bb.
535 slpeel_update_phi_nodes_for_guard1 is always called before
536 slpeel_update_phi_nodes_for_guard2. They are both needed in order
537 to create correct data-flow and loop-closed-ssa-form.
539 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
540 that change between iterations of a loop (and therefore have a phi-node
541 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
542 phis for variables that are used out of the loop (and therefore have
543 loop-closed exit phis). Some variables may be both updated between
544 iterations and used after the loop. This is why in loop1_exit_bb we
545 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
546 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
548 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
549 an original loop. i.e., we have:
551 orig_loop
552 guard_bb (goto LOOP/new_merge)
553 new_loop <-- LOOP
554 new_exit
555 new_merge
556 next_bb
558 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
559 have:
561 new_loop
562 guard_bb (goto LOOP/new_merge)
563 orig_loop <-- LOOP
564 new_exit
565 new_merge
566 next_bb
568 The ssa-names defined in the original loop have an SSA_NAME_AUX pointer
569 that records the corresponding new ssa-name used in the new duplicated
570 loop copy.
573 /* Function slpeel_update_phi_nodes_for_guard1
575 Input:
576 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
577 - DEFS - a bitmap of ssa names to mark new names for which we recorded
578 information.
580 In the context of the overall structure, we have:
582 loop1_preheader_bb:
583 guard1 (goto loop1/merg1_bb)
584 LOOP-> loop1
585 loop1_exit_bb:
586 guard2 (goto merge1_bb/merge2_bb)
587 merge1_bb
588 loop2
589 loop2_exit_bb
590 merge2_bb
591 next_bb
593 For each name updated between loop iterations (i.e - for each name that has
594 an entry (loop-header) phi in LOOP) we create a new phi in:
595 1. merge1_bb (to account for the edge from guard1)
596 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
599 static void
600 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
601 bool is_new_loop, basic_block *new_exit_bb,
602 bitmap *defs)
604 tree orig_phi, new_phi;
605 tree update_phi, update_phi2;
606 tree *new_name_ptr, *new_name_ptr2;
607 tree guard_arg, loop_arg;
608 basic_block new_merge_bb = guard_edge->dest;
609 edge e = EDGE_SUCC (new_merge_bb, 0);
610 basic_block update_bb = e->dest;
611 basic_block orig_bb = loop->header;
612 edge new_exit_e;
613 tree current_new_name;
615 /* Create new bb between loop and new_merge_bb. */
616 *new_exit_bb = split_edge (loop->single_exit);
617 add_bb_to_loop (*new_exit_bb, loop->outer);
619 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
621 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
622 orig_phi && update_phi;
623 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
625 /** 1. Handle new-merge-point phis **/
627 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
628 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
629 new_merge_bb);
631 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
632 of LOOP. Set the two phi args in NEW_PHI for these edges: */
633 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
634 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
636 add_phi_arg (new_phi, loop_arg, new_exit_e);
637 add_phi_arg (new_phi, guard_arg, guard_edge);
639 /* 1.3. Update phi in successor block. */
640 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
641 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
642 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
643 update_phi2 = new_phi;
646 /** 2. Handle loop-closed-ssa-form phis **/
648 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
649 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
650 *new_exit_bb);
652 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
653 add_phi_arg (new_phi, loop_arg, loop->single_exit);
655 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
656 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
657 SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
659 /* 2.4. Record the newly created name in SSA_NAME_AUX.
660 We want to find a name such that
661 name = *(SSA_NAME_AUX (orig_loop_name))
662 and to set its SSA_NAME_AUX as follows:
663 *(SSA_NAME_AUX (name)) = new_phi_name
665 If LOOP is a new loop then loop_arg is already the name we're
666 looking for. If LOOP is the original loop, then loop_arg is
667 the orig_loop_name and the relevant name is recorded in its
668 SSA_NAME_AUX */
669 if (is_new_loop)
670 current_new_name = loop_arg;
671 else
673 new_name_ptr = SSA_NAME_AUX (loop_arg);
674 gcc_assert (new_name_ptr);
675 current_new_name = *new_name_ptr;
677 #ifdef ENABLE_CHECKING
678 gcc_assert (! SSA_NAME_AUX (current_new_name));
679 #endif
681 new_name_ptr2 = xmalloc (sizeof (tree));
682 *new_name_ptr2 = PHI_RESULT (new_phi);
683 SSA_NAME_AUX (current_new_name) = new_name_ptr2;
684 bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
687 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
691 /* Function slpeel_update_phi_nodes_for_guard2
693 Input:
694 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
696 In the context of the overall structure, we have:
698 loop1_preheader_bb:
699 guard1 (goto loop1/merg1_bb)
700 loop1
701 loop1_exit_bb:
702 guard2 (goto merge1_bb/merge2_bb)
703 merge1_bb
704 LOOP-> loop2
705 loop2_exit_bb
706 merge2_bb
707 next_bb
709 For each name used out side the loop (i.e - for each name that has an exit
710 phi in next_bb) we create a new phi in:
711 1. merge2_bb (to account for the edge from guard_bb)
712 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
713 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
714 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
717 static void
718 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
719 bool is_new_loop, basic_block *new_exit_bb)
721 tree orig_phi, new_phi;
722 tree update_phi, update_phi2;
723 tree *new_name_ptr, *new_name_ptr2;
724 tree guard_arg, loop_arg;
725 basic_block new_merge_bb = guard_edge->dest;
726 edge e = EDGE_SUCC (new_merge_bb, 0);
727 basic_block update_bb = e->dest;
728 edge new_exit_e;
729 tree orig_def;
730 tree new_name, new_name2;
731 tree arg;
733 /* Create new bb between loop and new_merge_bb. */
734 *new_exit_bb = split_edge (loop->single_exit);
735 add_bb_to_loop (*new_exit_bb, loop->outer);
737 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
739 for (update_phi = phi_nodes (update_bb); update_phi;
740 update_phi = PHI_CHAIN (update_phi))
742 orig_phi = update_phi;
743 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
744 new_name_ptr = SSA_NAME_AUX (orig_def);
745 arg = NULL_TREE;
747 /** 1. Handle new-merge-point phis **/
749 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
750 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
751 new_merge_bb);
753 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
754 of LOOP. Set the two phi args in NEW_PHI for these edges: */
755 new_name = orig_def;
756 new_name2 = NULL_TREE;
757 if (new_name_ptr)
759 new_name = *new_name_ptr;
760 new_name_ptr2 = SSA_NAME_AUX (new_name);
761 if (new_name_ptr2)
762 /* Some variables have both loop-entry-phis and loop-exit-phis.
763 Such variables were given yet newer names by phis placed in
764 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
765 new_name2 = SSA_NAME_AUX (SSA_NAME_AUX (orig_name)). */
766 new_name2 = *new_name_ptr2;
769 if (is_new_loop)
771 guard_arg = orig_def;
772 loop_arg = new_name;
774 else
776 guard_arg = new_name;
777 loop_arg = orig_def;
779 if (new_name2)
780 guard_arg = new_name2;
782 add_phi_arg (new_phi, loop_arg, new_exit_e);
783 add_phi_arg (new_phi, guard_arg, guard_edge);
785 /* 1.3. Update phi in successor block. */
786 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
787 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
788 update_phi2 = new_phi;
791 /** 2. Handle loop-closed-ssa-form phis **/
793 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
794 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
795 *new_exit_bb);
797 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
798 add_phi_arg (new_phi, loop_arg, loop->single_exit);
800 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
801 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
802 SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
805 /** 3. Handle loop-closed-ssa-form phis for first loop **/
807 /* 3.1. Find the relevant names that need an exit-phi in GUARD_BB, i.e.
808 names for which slpeel_update_phi_nodes_for_guard1 had not already
809 created a phi node. This is the case for names that are used outside
810 the loop (and therefore need an exit phi) but are not updated
811 across loop iterations (and therefore don't have a loop-header-phi).
813 slpeel_update_phi_nodes_for_guard1 is responsible for creating
814 loop-exit phis in GUARD_BB for names that have a loop-header-phi. When
815 such a phi is created we also record the new name in SSA_NAME_AUX. If
816 this new name exists, then guard_arg was set to this new name
817 (see 1.2 above). Therefore, if guard_arg is not this new name, this is
818 an indication that an exit-phi in GUARD_BB was not yet created, so we
819 take care of it here.
821 if (guard_arg == new_name2)
822 continue;
823 arg = guard_arg;
825 /* 3.2. Generate new phi node in GUARD_BB: */
826 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
827 guard_edge->src);
829 /* 3.3. GUARD_BB has one incoming edge: */
830 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
831 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
833 /* 3.4. Update phi in successor of GUARD_BB: */
834 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
835 == guard_arg);
836 SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
839 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
843 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
844 that starts at zero, increases by one and its limit is NITERS.
846 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
848 void
849 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
851 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
852 tree orig_cond;
853 edge exit_edge = loop->single_exit;
854 block_stmt_iterator loop_cond_bsi;
855 block_stmt_iterator incr_bsi;
856 bool insert_after;
857 tree begin_label = tree_block_label (loop->latch);
858 tree exit_label = tree_block_label (loop->single_exit->dest);
859 tree init = build_int_cst (TREE_TYPE (niters), 0);
860 tree step = build_int_cst (TREE_TYPE (niters), 1);
861 tree then_label;
862 tree else_label;
863 LOC loop_loc;
865 orig_cond = get_loop_exit_condition (loop);
866 #ifdef ENABLE_CHECKING
867 gcc_assert (orig_cond);
868 #endif
869 loop_cond_bsi = bsi_for_stmt (orig_cond);
871 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
872 create_iv (init, step, NULL_TREE, loop,
873 &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
875 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
877 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
878 then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
879 else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
881 else /* 'then' edge loops back. */
883 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
884 then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
885 else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
888 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
889 then_label, else_label);
890 bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
892 /* Remove old loop exit test: */
893 bsi_remove (&loop_cond_bsi);
895 loop_loc = find_loop_location (loop);
896 if (dump_file && (dump_flags & TDF_DETAILS))
898 if (loop_loc != UNKNOWN_LOC)
899 fprintf (dump_file, "\nloop at %s:%d: ",
900 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
901 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
904 loop->nb_iterations = niters;
908 /* Given LOOP this function generates a new copy of it and puts it
909 on E which is either the entry or exit of LOOP. */
911 static struct loop *
912 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
913 edge e)
915 struct loop *new_loop;
916 basic_block *new_bbs, *bbs;
917 bool at_exit;
918 bool was_imm_dom;
919 basic_block exit_dest;
920 tree phi, phi_arg;
922 at_exit = (e == loop->single_exit);
923 if (!at_exit && e != loop_preheader_edge (loop))
924 return NULL;
926 bbs = get_loop_body (loop);
928 /* Check whether duplication is possible. */
929 if (!can_copy_bbs_p (bbs, loop->num_nodes))
931 free (bbs);
932 return NULL;
935 /* Generate new loop structure. */
936 new_loop = duplicate_loop (loops, loop, loop->outer);
937 if (!new_loop)
939 free (bbs);
940 return NULL;
943 exit_dest = loop->single_exit->dest;
944 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
945 exit_dest) == loop->header ?
946 true : false);
948 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
950 copy_bbs (bbs, loop->num_nodes, new_bbs,
951 &loop->single_exit, 1, &new_loop->single_exit, NULL);
953 /* Duplicating phi args at exit bbs as coming
954 also from exit of duplicated loop. */
955 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
957 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
958 if (phi_arg)
960 edge new_loop_exit_edge;
962 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
963 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
964 else
965 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
967 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
971 if (at_exit) /* Add the loop copy at exit. */
973 redirect_edge_and_branch_force (e, new_loop->header);
974 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
975 if (was_imm_dom)
976 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
978 else /* Add the copy at entry. */
980 edge new_exit_e;
981 edge entry_e = loop_preheader_edge (loop);
982 basic_block preheader = entry_e->src;
984 if (!flow_bb_inside_loop_p (new_loop,
985 EDGE_SUCC (new_loop->header, 0)->dest))
986 new_exit_e = EDGE_SUCC (new_loop->header, 0);
987 else
988 new_exit_e = EDGE_SUCC (new_loop->header, 1);
990 redirect_edge_and_branch_force (new_exit_e, loop->header);
991 set_immediate_dominator (CDI_DOMINATORS, loop->header,
992 new_exit_e->src);
994 /* We have to add phi args to the loop->header here as coming
995 from new_exit_e edge. */
996 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
998 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
999 if (phi_arg)
1000 add_phi_arg (phi, phi_arg, new_exit_e);
1003 redirect_edge_and_branch_force (entry_e, new_loop->header);
1004 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
1007 free (new_bbs);
1008 free (bbs);
1010 return new_loop;
1014 /* Given the condition statement COND, put it as the last statement
1015 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
1016 Assumes that this is the single exit of the guarded loop.
1017 Returns the skip edge. */
1019 static edge
1020 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
1021 basic_block dom_bb)
1023 block_stmt_iterator bsi;
1024 edge new_e, enter_e;
1025 tree cond_stmt, then_label, else_label;
1027 enter_e = EDGE_SUCC (guard_bb, 0);
1028 enter_e->flags &= ~EDGE_FALLTHRU;
1029 enter_e->flags |= EDGE_FALSE_VALUE;
1030 bsi = bsi_last (guard_bb);
1032 then_label = build1 (GOTO_EXPR, void_type_node,
1033 tree_block_label (exit_bb));
1034 else_label = build1 (GOTO_EXPR, void_type_node,
1035 tree_block_label (enter_e->dest));
1036 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
1037 then_label, else_label);
1038 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
1039 /* Add new edge to connect guard block to the merge/loop-exit block. */
1040 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
1041 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
1042 return new_e;
1046 /* This function verifies that the following restrictions apply to LOOP:
1047 (1) it is innermost
1048 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
1049 (3) it is single entry, single exit
1050 (4) its exit condition is the last stmt in the header
1051 (5) E is the entry/exit edge of LOOP.
1054 bool
1055 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
1057 edge exit_e = loop->single_exit;
1058 edge entry_e = loop_preheader_edge (loop);
1059 tree orig_cond = get_loop_exit_condition (loop);
1060 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
1062 if (any_marked_for_rewrite_p ())
1063 return false;
1065 if (loop->inner
1066 /* All loops have an outer scope; the only case loop->outer is NULL is for
1067 the function itself. */
1068 || !loop->outer
1069 || loop->num_nodes != 2
1070 || !empty_block_p (loop->latch)
1071 || !loop->single_exit
1072 /* Verify that new loop exit condition can be trivially modified. */
1073 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
1074 || (e != exit_e && e != entry_e))
1075 return false;
1077 return true;
1080 #ifdef ENABLE_CHECKING
1081 void
1082 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1083 struct loop *second_loop)
1085 basic_block loop1_exit_bb = first_loop->single_exit->dest;
1086 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1087 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1089 /* A guard that controls whether the second_loop is to be executed or skipped
1090 is placed in first_loop->exit. first_loopt->exit therefore has two
1091 successors - one is the preheader of second_loop, and the other is a bb
1092 after second_loop.
1094 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1096 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
1097 of second_loop. */
1099 /* The preheader of new_loop is expected to have two predessors:
1100 first_loop->exit and the block that precedes first_loop. */
1102 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1103 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1104 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1105 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
1106 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1108 /* Verify that the other successor of first_loopt->exit is after the
1109 second_loop. */
1110 /* TODO */
1112 #endif
1114 /* Function slpeel_tree_peel_loop_to_edge.
1116 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1117 that is placed on the entry (exit) edge E of LOOP. After this transformation
1118 we have two loops one after the other - first-loop iterates FIRST_NITERS
1119 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1121 Input:
1122 - LOOP: the loop to be peeled.
1123 - E: the exit or entry edge of LOOP.
1124 If it is the entry edge, we peel the first iterations of LOOP. In this
1125 case first-loop is LOOP, and second-loop is the newly created loop.
1126 If it is the exit edge, we peel the last iterations of LOOP. In this
1127 case, first-loop is the newly created loop, and second-loop is LOOP.
1128 - NITERS: the number of iterations that LOOP iterates.
1129 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1130 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1131 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1132 is false, the caller of this function may want to take care of this
1133 (this can be useful if we don't want new stmts added to first-loop).
1135 Output:
1136 The function returns a pointer to the new loop-copy, or NULL if it failed
1137 to perform the transformation.
1139 The function generates two if-then-else guards: one before the first loop,
1140 and the other before the second loop:
1141 The first guard is:
1142 if (FIRST_NITERS == 0) then skip the first loop,
1143 and go directly to the second loop.
1144 The second guard is:
1145 if (FIRST_NITERS == NITERS) then skip the second loop.
1147 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1148 FORNOW the resulting code will not be in loop-closed-ssa form.
1151 struct loop*
1152 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
1153 edge e, tree first_niters,
1154 tree niters, bool update_first_loop_count)
1156 struct loop *new_loop = NULL, *first_loop, *second_loop;
1157 edge skip_e;
1158 tree pre_condition;
1159 bitmap definitions;
1160 basic_block bb_before_second_loop, bb_after_second_loop;
1161 basic_block bb_before_first_loop;
1162 basic_block bb_between_loops;
1163 basic_block new_exit_bb;
1164 edge exit_e = loop->single_exit;
1165 LOC loop_loc;
1167 if (!slpeel_can_duplicate_loop_p (loop, e))
1168 return NULL;
1170 /* We have to initialize cfg_hooks. Then, when calling
1171 cfg_hooks->split_edge, the function tree_split_edge
1172 is actually called and, when calling cfg_hooks->duplicate_block,
1173 the function tree_duplicate_bb is called. */
1174 tree_register_cfg_hooks ();
1177 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1178 Resulting CFG would be:
1180 first_loop:
1181 do {
1182 } while ...
1184 second_loop:
1185 do {
1186 } while ...
1188 orig_exit_bb:
1191 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1193 loop_loc = find_loop_location (loop);
1194 if (dump_file && (dump_flags & TDF_DETAILS))
1196 if (loop_loc != UNKNOWN_LOC)
1197 fprintf (dump_file, "\n%s:%d: note: ",
1198 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1199 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1201 return NULL;
1204 if (e == exit_e)
1206 /* NEW_LOOP was placed after LOOP. */
1207 first_loop = loop;
1208 second_loop = new_loop;
1210 else
1212 /* NEW_LOOP was placed before LOOP. */
1213 first_loop = new_loop;
1214 second_loop = loop;
1217 definitions = marked_ssa_names ();
1218 allocate_new_names (definitions);
1219 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1220 rename_variables_in_loop (new_loop);
1223 /* 2. Add the guard that controls whether the first loop is executed.
1224 Resulting CFG would be:
1226 bb_before_first_loop:
1227 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1228 GOTO first-loop
1230 first_loop:
1231 do {
1232 } while ...
1234 bb_before_second_loop:
1236 second_loop:
1237 do {
1238 } while ...
1240 orig_exit_bb:
1243 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1244 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1245 bb_before_second_loop = split_edge (first_loop->single_exit);
1246 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1248 pre_condition =
1249 fold (build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node));
1250 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1251 bb_before_second_loop, bb_before_first_loop);
1252 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1253 first_loop == new_loop,
1254 &new_exit_bb, &definitions);
1257 /* 3. Add the guard that controls whether the second loop is executed.
1258 Resulting CFG would be:
1260 bb_before_first_loop:
1261 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1262 GOTO first-loop
1264 first_loop:
1265 do {
1266 } while ...
1268 bb_between_loops:
1269 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1270 GOTO bb_before_second_loop
1272 bb_before_second_loop:
1274 second_loop:
1275 do {
1276 } while ...
1278 bb_after_second_loop:
1280 orig_exit_bb:
1283 bb_between_loops = new_exit_bb;
1284 bb_after_second_loop = split_edge (second_loop->single_exit);
1285 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1287 pre_condition =
1288 fold (build2 (EQ_EXPR, boolean_type_node, first_niters, niters));
1289 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1290 bb_after_second_loop, bb_before_first_loop);
1291 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1292 second_loop == new_loop, &new_exit_bb);
1294 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1296 if (update_first_loop_count)
1297 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1299 free_new_names (definitions);
1300 BITMAP_FREE (definitions);
1301 unmark_all_for_rewrite ();
1303 return new_loop;
1306 /* Function vect_get_loop_location.
1308 Extract the location of the loop in the source code.
1309 If the loop is not well formed for vectorization, an estimated
1310 location is calculated.
1311 Return the loop location if succeed and NULL if not. */
1314 find_loop_location (struct loop *loop)
1316 tree node = NULL_TREE;
1317 basic_block bb;
1318 block_stmt_iterator si;
1320 if (!loop)
1321 return UNKNOWN_LOC;
1323 node = get_loop_exit_condition (loop);
1325 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1326 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1327 return EXPR_LOC (node);
1329 /* If we got here the loop is probably not "well formed",
1330 try to estimate the loop location */
1332 if (!loop->header)
1333 return UNKNOWN_LOC;
1335 bb = loop->header;
1337 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1339 node = bsi_stmt (si);
1340 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1341 return EXPR_LOC (node);
1344 return UNKNOWN_LOC;
1348 /*************************************************************************
1349 Vectorization Debug Information.
1350 *************************************************************************/
1352 /* Function vect_set_verbosity_level.
1354 Called from toplev.c upon detection of the
1355 -ftree-vectorizer-verbose=N option. */
1357 void
1358 vect_set_verbosity_level (const char *val)
1360 unsigned int vl;
1362 vl = atoi (val);
1363 if (vl < MAX_VERBOSITY_LEVEL)
1364 vect_verbosity_level = vl;
1365 else
1366 vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1370 /* Function vect_set_dump_settings.
1372 Fix the verbosity level of the vectorizer if the
1373 requested level was not set explicitly using the flag
1374 -ftree-vectorizer-verbose=N.
1375 Decide where to print the debugging information (dump_file/stderr).
1376 If the user defined the verbosity level, but there is no dump file,
1377 print to stderr, otherwise print to the dump file. */
1379 static void
1380 vect_set_dump_settings (void)
1382 vect_dump = dump_file;
1384 /* Check if the verbosity level was defined by the user: */
1385 if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1387 /* If there is no dump file, print to stderr. */
1388 if (!dump_file)
1389 vect_dump = stderr;
1390 return;
1393 /* User didn't specify verbosity level: */
1394 if (dump_file && (dump_flags & TDF_DETAILS))
1395 vect_verbosity_level = REPORT_DETAILS;
1396 else if (dump_file && (dump_flags & TDF_STATS))
1397 vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1398 else
1399 vect_verbosity_level = REPORT_NONE;
1401 gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1405 /* Function debug_loop_details.
1407 For vectorization debug dumps. */
1409 bool
1410 vect_print_dump_info (enum verbosity_levels vl, LOC loc)
1412 if (vl > vect_verbosity_level)
1413 return false;
1415 if (loc == UNKNOWN_LOC)
1416 fprintf (vect_dump, "\n%s:%d: note: ",
1417 DECL_SOURCE_FILE (current_function_decl),
1418 DECL_SOURCE_LINE (current_function_decl));
1419 else
1420 fprintf (vect_dump, "\n%s:%d: note: ", LOC_FILE (loc), LOC_LINE (loc));
1423 return true;
1427 /*************************************************************************
1428 Vectorization Utilities.
1429 *************************************************************************/
1431 /* Function new_stmt_vec_info.
1433 Create and initialize a new stmt_vec_info struct for STMT. */
1435 stmt_vec_info
1436 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1438 stmt_vec_info res;
1439 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1441 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1442 STMT_VINFO_STMT (res) = stmt;
1443 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1444 STMT_VINFO_RELEVANT_P (res) = 0;
1445 STMT_VINFO_VECTYPE (res) = NULL;
1446 STMT_VINFO_VEC_STMT (res) = NULL;
1447 STMT_VINFO_DATA_REF (res) = NULL;
1448 STMT_VINFO_MEMTAG (res) = NULL;
1449 STMT_VINFO_PTR_INFO (res) = NULL;
1450 STMT_VINFO_SUBVARS (res) = NULL;
1451 STMT_VINFO_VECT_DR_BASE_ADDRESS (res) = NULL;
1452 STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1453 STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1454 STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1455 STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1457 return res;
1461 /* Function new_loop_vec_info.
1463 Create and initialize a new loop_vec_info struct for LOOP, as well as
1464 stmt_vec_info structs for all the stmts in LOOP. */
1466 loop_vec_info
1467 new_loop_vec_info (struct loop *loop)
1469 loop_vec_info res;
1470 basic_block *bbs;
1471 block_stmt_iterator si;
1472 unsigned int i;
1474 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1476 bbs = get_loop_body (loop);
1478 /* Create stmt_info for all stmts in the loop. */
1479 for (i = 0; i < loop->num_nodes; i++)
1481 basic_block bb = bbs[i];
1482 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1484 tree stmt = bsi_stmt (si);
1485 stmt_ann_t ann;
1487 ann = stmt_ann (stmt);
1488 set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1492 LOOP_VINFO_LOOP (res) = loop;
1493 LOOP_VINFO_BBS (res) = bbs;
1494 LOOP_VINFO_EXIT_COND (res) = NULL;
1495 LOOP_VINFO_NITERS (res) = NULL;
1496 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1497 LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1498 LOOP_VINFO_VECT_FACTOR (res) = 0;
1499 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1500 "loop_write_datarefs");
1501 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1502 "loop_read_datarefs");
1503 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1504 LOOP_VINFO_LOC (res) = UNKNOWN_LOC;
1506 return res;
1510 /* Function destroy_loop_vec_info.
1512 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1513 stmts in the loop. */
1515 void
1516 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1518 struct loop *loop;
1519 basic_block *bbs;
1520 int nbbs;
1521 block_stmt_iterator si;
1522 int j;
1524 if (!loop_vinfo)
1525 return;
1527 loop = LOOP_VINFO_LOOP (loop_vinfo);
1529 bbs = LOOP_VINFO_BBS (loop_vinfo);
1530 nbbs = loop->num_nodes;
1532 for (j = 0; j < nbbs; j++)
1534 basic_block bb = bbs[j];
1535 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1537 tree stmt = bsi_stmt (si);
1538 stmt_ann_t ann = stmt_ann (stmt);
1539 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1540 free (stmt_info);
1541 set_stmt_info (ann, NULL);
1545 free (LOOP_VINFO_BBS (loop_vinfo));
1546 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1547 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1549 free (loop_vinfo);
1553 /* Function vect_strip_conversions
1555 Strip conversions that don't narrow the mode. */
1557 tree
1558 vect_strip_conversion (tree expr)
1560 tree to, ti, oprnd0;
1562 while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1564 to = TREE_TYPE (expr);
1565 oprnd0 = TREE_OPERAND (expr, 0);
1566 ti = TREE_TYPE (oprnd0);
1568 if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1569 return NULL_TREE;
1570 if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1571 return NULL_TREE;
1573 expr = oprnd0;
1575 return expr;
1579 /* Function vect_force_dr_alignment_p.
1581 Returns whether the alignment of a DECL can be forced to be aligned
1582 on ALIGNMENT bit boundary. */
1584 bool
1585 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1587 if (TREE_CODE (decl) != VAR_DECL)
1588 return false;
1590 if (DECL_EXTERNAL (decl))
1591 return false;
1593 if (TREE_ASM_WRITTEN (decl))
1594 return false;
1596 if (TREE_STATIC (decl))
1597 return (alignment <= MAX_OFILE_ALIGNMENT);
1598 else
1599 /* This is not 100% correct. The absolute correct stack alignment
1600 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1601 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1602 However, until someone implements forced stack alignment, SSE
1603 isn't really usable without this. */
1604 return (alignment <= PREFERRED_STACK_BOUNDARY);
1608 /* Function get_vectype_for_scalar_type.
1610 Returns the vector type corresponding to SCALAR_TYPE as supported
1611 by the target. */
1613 tree
1614 get_vectype_for_scalar_type (tree scalar_type)
1616 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1617 int nbytes = GET_MODE_SIZE (inner_mode);
1618 int nunits;
1619 tree vectype;
1621 if (nbytes == 0)
1622 return NULL_TREE;
1624 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1625 is expected. */
1626 nunits = UNITS_PER_SIMD_WORD / nbytes;
1628 vectype = build_vector_type (scalar_type, nunits);
1629 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1631 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1632 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1635 if (!vectype)
1636 return NULL_TREE;
1638 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1640 fprintf (vect_dump, "vectype: ");
1641 print_generic_expr (vect_dump, vectype, TDF_SLIM);
1644 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1646 /* TODO: tree-complex.c sometimes can parallelize operations
1647 on generic vectors. We can vectorize the loop in that case,
1648 but then we should re-run the lowering pass. */
1649 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1650 fprintf (vect_dump, "mode not supported by target.");
1651 return NULL_TREE;
1654 return vectype;
1658 /* Function vect_supportable_dr_alignment
1660 Return whether the data reference DR is supported with respect to its
1661 alignment. */
1663 enum dr_alignment_support
1664 vect_supportable_dr_alignment (struct data_reference *dr)
1666 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1667 enum machine_mode mode = (int) TYPE_MODE (vectype);
1669 if (aligned_access_p (dr))
1670 return dr_aligned;
1672 /* Possibly unaligned access. */
1674 if (DR_IS_READ (dr))
1676 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1677 && (!targetm.vectorize.builtin_mask_for_load
1678 || targetm.vectorize.builtin_mask_for_load ()))
1679 return dr_unaligned_software_pipeline;
1681 if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1682 /* Can't software pipeline the loads, but can at least do them. */
1683 return dr_unaligned_supported;
1686 /* Unsupported. */
1687 return dr_unaligned_unsupported;
1691 /* Function vect_is_simple_use.
1693 Input:
1694 LOOP - the loop that is being vectorized.
1695 OPERAND - operand of a stmt in LOOP.
1696 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1698 Returns whether a stmt with OPERAND can be vectorized.
1699 Supportable operands are constants, loop invariants, and operands that are
1700 defined by the current iteration of the loop. Unsupportable operands are
1701 those that are defined by a previous iteration of the loop (as is the case
1702 in reduction/induction computations). */
1704 bool
1705 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def)
1707 tree def_stmt;
1708 basic_block bb;
1709 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1711 if (def)
1712 *def = NULL_TREE;
1714 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1715 return true;
1717 if (TREE_CODE (operand) != SSA_NAME)
1718 return false;
1720 def_stmt = SSA_NAME_DEF_STMT (operand);
1721 if (def_stmt == NULL_TREE )
1723 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1724 fprintf (vect_dump, "no def_stmt.");
1725 return false;
1728 /* empty stmt is expected only in case of a function argument.
1729 (Otherwise - we expect a phi_node or a modify_expr). */
1730 if (IS_EMPTY_STMT (def_stmt))
1732 tree arg = TREE_OPERAND (def_stmt, 0);
1733 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1734 return true;
1735 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1737 fprintf (vect_dump, "Unexpected empty stmt: ");
1738 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1740 return false;
1743 /* phi_node inside the loop indicates an induction/reduction pattern.
1744 This is not supported yet. */
1745 bb = bb_for_stmt (def_stmt);
1746 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1748 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1749 fprintf (vect_dump, "reduction/induction - unsupported.");
1750 return false; /* FORNOW: not supported yet. */
1753 /* Expecting a modify_expr or a phi_node. */
1754 if (TREE_CODE (def_stmt) == MODIFY_EXPR
1755 || TREE_CODE (def_stmt) == PHI_NODE)
1757 if (def)
1758 *def = def_stmt;
1759 return true;
1762 return false;
1766 /* Function vect_is_simple_iv_evolution.
1768 FORNOW: A simple evolution of an induction variables in the loop is
1769 considered a polynomial evolution with constant step. */
1771 bool
1772 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1773 tree * step)
1775 tree init_expr;
1776 tree step_expr;
1778 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1780 /* When there is no evolution in this loop, the evolution function
1781 is not "simple". */
1782 if (evolution_part == NULL_TREE)
1783 return false;
1785 /* When the evolution is a polynomial of degree >= 2
1786 the evolution function is not "simple". */
1787 if (tree_is_chrec (evolution_part))
1788 return false;
1790 step_expr = evolution_part;
1791 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1792 loop_nb));
1794 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1796 fprintf (vect_dump, "step: ");
1797 print_generic_expr (vect_dump, step_expr, TDF_SLIM);
1798 fprintf (vect_dump, ", init: ");
1799 print_generic_expr (vect_dump, init_expr, TDF_SLIM);
1802 *init = init_expr;
1803 *step = step_expr;
1805 if (TREE_CODE (step_expr) != INTEGER_CST)
1807 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1808 fprintf (vect_dump, "step unknown.");
1809 return false;
1812 return true;
1816 /* Function vectorize_loops.
1818 Entry Point to loop vectorization phase. */
1820 void
1821 vectorize_loops (struct loops *loops)
1823 unsigned int i;
1824 unsigned int num_vectorized_loops = 0;
1826 /* Fix the verbosity level if not defined explicitly by the user. */
1827 vect_set_dump_settings ();
1829 /* Does the target support SIMD? */
1830 /* FORNOW: until more sophisticated machine modelling is in place. */
1831 if (!UNITS_PER_SIMD_WORD)
1833 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1834 fprintf (vect_dump, "vectorizer: target vector size is not defined.");
1835 return;
1838 /* ----------- Analyze loops. ----------- */
1840 /* If some loop was duplicated, it gets bigger number
1841 than all previously defined loops. This fact allows us to run
1842 only over initial loops skipping newly generated ones. */
1843 vect_loops_num = loops->num;
1844 for (i = 1; i < vect_loops_num; i++)
1846 loop_vec_info loop_vinfo;
1847 struct loop *loop = loops->parray[i];
1849 if (!loop)
1850 continue;
1852 loop_vinfo = vect_analyze_loop (loop);
1853 loop->aux = loop_vinfo;
1855 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
1856 continue;
1858 vect_transform_loop (loop_vinfo, loops);
1859 num_vectorized_loops++;
1862 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, UNKNOWN_LOC))
1863 fprintf (vect_dump, "vectorized %u loops in function.\n",
1864 num_vectorized_loops);
1866 /* ----------- Finalize. ----------- */
1868 for (i = 1; i < vect_loops_num; i++)
1870 struct loop *loop = loops->parray[i];
1871 loop_vec_info loop_vinfo;
1873 if (!loop)
1874 continue;
1875 loop_vinfo = loop->aux;
1876 destroy_loop_vec_info (loop_vinfo);
1877 loop->aux = NULL;