PR target/4198
[official-gcc.git] / gcc / tree-vectorizer.c
blobbe4d816c30e312de383638d5b48711938cfdf4c1
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "errors.h"
128 #include "ggc.h"
129 #include "tree.h"
130 #include "target.h"
131 #include "rtl.h"
132 #include "basic-block.h"
133 #include "diagnostic.h"
134 #include "tree-flow.h"
135 #include "tree-dump.h"
136 #include "timevar.h"
137 #include "cfgloop.h"
138 #include "cfglayout.h"
139 #include "expr.h"
140 #include "optabs.h"
141 #include "toplev.h"
142 #include "tree-chrec.h"
143 #include "tree-data-ref.h"
144 #include "tree-scalar-evolution.h"
145 #include "input.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
149 /*************************************************************************
150 Simple Loop Peeling Utilities
151 *************************************************************************/
152 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
153 (struct loop *, struct loops *, edge);
154 static void slpeel_update_phis_for_duplicate_loop
155 (struct loop *, struct loop *, bool after);
156 static void slpeel_update_phi_nodes_for_guard1
157 (edge, struct loop *, bool, basic_block *, bitmap *);
158 static void slpeel_update_phi_nodes_for_guard2
159 (edge, struct loop *, bool, basic_block *);
160 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
162 static void allocate_new_names (bitmap);
163 static void rename_use_op (use_operand_p);
164 static void rename_def_op (def_operand_p, tree);
165 static void rename_variables_in_bb (basic_block);
166 static void free_new_names (bitmap);
167 static void rename_variables_in_loop (struct loop *);
169 /*************************************************************************
170 General Vectorization Utilities
171 *************************************************************************/
172 static void vect_set_dump_settings (void);
173 static bool need_imm_uses_for (tree);
175 /* vect_dump will be set to stderr or dump_file if exist. */
176 FILE *vect_dump;
178 /* vect_verbosity_level set to an invalid value
179 to mark that it's uninitialized. */
180 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
184 /*************************************************************************
185 Simple Loop Peeling Utilities
187 Utilities to support loop peeling for vectorization purposes.
188 *************************************************************************/
191 /* For each definition in DEFINITIONS this function allocates
192 new ssa name. */
194 static void
195 allocate_new_names (bitmap definitions)
197 unsigned ver;
198 bitmap_iterator bi;
200 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
202 tree def = ssa_name (ver);
203 tree *new_name_ptr = xmalloc (sizeof (tree));
205 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
207 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
208 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
210 SSA_NAME_AUX (def) = new_name_ptr;
215 /* Renames the use *OP_P. */
217 static void
218 rename_use_op (use_operand_p op_p)
220 tree *new_name_ptr;
222 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
223 return;
225 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
227 /* Something defined outside of the loop. */
228 if (!new_name_ptr)
229 return;
231 /* An ordinary ssa name defined in the loop. */
233 SET_USE (op_p, *new_name_ptr);
237 /* Renames the def *OP_P in statement STMT. */
239 static void
240 rename_def_op (def_operand_p op_p, tree stmt)
242 tree *new_name_ptr;
244 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
245 return;
247 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
249 /* Something defined outside of the loop. */
250 if (!new_name_ptr)
251 return;
253 /* An ordinary ssa name defined in the loop. */
255 SET_DEF (op_p, *new_name_ptr);
256 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
260 /* Renames the variables in basic block BB. */
262 static void
263 rename_variables_in_bb (basic_block bb)
265 tree phi;
266 block_stmt_iterator bsi;
267 tree stmt;
268 stmt_ann_t ann;
269 use_optype uses;
270 vuse_optype vuses;
271 def_optype defs;
272 v_may_def_optype v_may_defs;
273 v_must_def_optype v_must_defs;
274 unsigned i;
275 edge e;
276 edge_iterator ei;
277 struct loop *loop = bb->loop_father;
279 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
280 rename_def_op (PHI_RESULT_PTR (phi), phi);
282 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
284 stmt = bsi_stmt (bsi);
285 get_stmt_operands (stmt);
286 ann = stmt_ann (stmt);
288 uses = USE_OPS (ann);
289 for (i = 0; i < NUM_USES (uses); i++)
290 rename_use_op (USE_OP_PTR (uses, i));
292 defs = DEF_OPS (ann);
293 for (i = 0; i < NUM_DEFS (defs); i++)
294 rename_def_op (DEF_OP_PTR (defs, i), stmt);
296 vuses = VUSE_OPS (ann);
297 for (i = 0; i < NUM_VUSES (vuses); i++)
298 rename_use_op (VUSE_OP_PTR (vuses, i));
300 v_may_defs = V_MAY_DEF_OPS (ann);
301 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
303 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
304 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
307 v_must_defs = V_MUST_DEF_OPS (ann);
308 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
310 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
311 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
315 FOR_EACH_EDGE (e, ei, bb->succs)
317 if (!flow_bb_inside_loop_p (loop, e->dest))
318 continue;
319 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
320 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
325 /* Releases the structures holding the new ssa names. */
327 static void
328 free_new_names (bitmap definitions)
330 unsigned ver;
331 bitmap_iterator bi;
333 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
335 tree def = ssa_name (ver);
337 if (SSA_NAME_AUX (def))
339 free (SSA_NAME_AUX (def));
340 SSA_NAME_AUX (def) = NULL;
346 /* Renames variables in new generated LOOP. */
348 static void
349 rename_variables_in_loop (struct loop *loop)
351 unsigned i;
352 basic_block *bbs;
354 bbs = get_loop_body (loop);
356 for (i = 0; i < loop->num_nodes; i++)
357 rename_variables_in_bb (bbs[i]);
359 free (bbs);
363 /* Update the PHI nodes of NEW_LOOP.
365 NEW_LOOP is a duplicate of ORIG_LOOP.
366 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
367 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
368 executes before it. */
370 static void
371 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
372 struct loop *new_loop, bool after)
374 tree *new_name_ptr, new_ssa_name;
375 tree phi_new, phi_orig;
376 tree def;
377 edge orig_loop_latch = loop_latch_edge (orig_loop);
378 edge orig_entry_e = loop_preheader_edge (orig_loop);
379 edge new_loop_exit_e = new_loop->single_exit;
380 edge new_loop_entry_e = loop_preheader_edge (new_loop);
381 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
384 step 1. For each loop-header-phi:
385 Add the first phi argument for the phi in NEW_LOOP
386 (the one associated with the entry of NEW_LOOP)
388 step 2. For each loop-header-phi:
389 Add the second phi argument for the phi in NEW_LOOP
390 (the one associated with the latch of NEW_LOOP)
392 step 3. Update the phis in the successor block of NEW_LOOP.
394 case 1: NEW_LOOP was placed before ORIG_LOOP:
395 The successor block of NEW_LOOP is the header of ORIG_LOOP.
396 Updating the phis in the successor block can therefore be done
397 along with the scanning of the loop header phis, because the
398 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
399 phi nodes, organized in the same order.
401 case 2: NEW_LOOP was placed after ORIG_LOOP:
402 The successor block of NEW_LOOP is the original exit block of
403 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
404 We postpone updating these phis to a later stage (when
405 loop guards are added).
409 /* Scan the phis in the headers of the old and new loops
410 (they are organized in exactly the same order). */
412 for (phi_new = phi_nodes (new_loop->header),
413 phi_orig = phi_nodes (orig_loop->header);
414 phi_new && phi_orig;
415 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
417 /* step 1. */
418 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
419 add_phi_arg (phi_new, def, new_loop_entry_e);
421 /* step 2. */
422 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
423 if (TREE_CODE (def) != SSA_NAME)
424 continue;
426 new_name_ptr = SSA_NAME_AUX (def);
427 if (!new_name_ptr)
428 /* Something defined outside of the loop. */
429 continue;
431 /* An ordinary ssa name defined in the loop. */
432 new_ssa_name = *new_name_ptr;
433 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
435 /* step 3 (case 1). */
436 if (!after)
438 gcc_assert (new_loop_exit_e == orig_entry_e);
439 SET_PHI_ARG_DEF (phi_orig,
440 new_loop_exit_e->dest_idx,
441 new_ssa_name);
447 /* Update PHI nodes for a guard of the LOOP.
449 Input:
450 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
451 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
452 originates from the guard-bb, skips LOOP and reaches the (unique) exit
453 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
454 We denote this bb NEW_MERGE_BB because before the guard code was added
455 it had a single predecessor (the LOOP header), and now it became a merge
456 point of two paths - the path that ends with the LOOP exit-edge, and
457 the path that ends with GUARD_EDGE.
458 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
459 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
461 ===> The CFG before the guard-code was added:
462 LOOP_header_bb:
463 loop_body
464 if (exit_loop) goto update_bb
465 else goto LOOP_header_bb
466 update_bb:
468 ==> The CFG after the guard-code was added:
469 guard_bb:
470 if (LOOP_guard_condition) goto new_merge_bb
471 else goto LOOP_header_bb
472 LOOP_header_bb:
473 loop_body
474 if (exit_loop_condition) goto new_merge_bb
475 else goto LOOP_header_bb
476 new_merge_bb:
477 goto update_bb
478 update_bb:
480 ==> The CFG after this function:
481 guard_bb:
482 if (LOOP_guard_condition) goto new_merge_bb
483 else goto LOOP_header_bb
484 LOOP_header_bb:
485 loop_body
486 if (exit_loop_condition) goto new_exit_bb
487 else goto LOOP_header_bb
488 new_exit_bb:
489 new_merge_bb:
490 goto update_bb
491 update_bb:
493 This function:
494 1. creates and updates the relevant phi nodes to account for the new
495 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
496 1.1. Create phi nodes at NEW_MERGE_BB.
497 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
498 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
499 2. preserves loop-closed-ssa-form by creating the required phi nodes
500 at the exit of LOOP (i.e, in NEW_EXIT_BB).
502 There are two flavors to this function:
504 slpeel_update_phi_nodes_for_guard1:
505 Here the guard controls whether we enter or skip LOOP, where LOOP is a
506 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
507 for variables that have phis in the loop header.
509 slpeel_update_phi_nodes_for_guard2:
510 Here the guard controls whether we enter or skip LOOP, where LOOP is an
511 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
512 for variables that have phis in the loop exit.
514 I.E., the overall structure is:
516 loop1_preheader_bb:
517 guard1 (goto loop1/merg1_bb)
518 loop1
519 loop1_exit_bb:
520 guard2 (goto merge1_bb/merge2_bb)
521 merge1_bb
522 loop2
523 loop2_exit_bb
524 merge2_bb
525 next_bb
527 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
528 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
529 that have phis in loop1->header).
531 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
532 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
533 that have phis in next_bb). It also adds some of these phis to
534 loop1_exit_bb.
536 slpeel_update_phi_nodes_for_guard1 is always called before
537 slpeel_update_phi_nodes_for_guard2. They are both needed in order
538 to create correct data-flow and loop-closed-ssa-form.
540 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
541 that change between iterations of a loop (and therefore have a phi-node
542 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
543 phis for variables that are used out of the loop (and therefore have
544 loop-closed exit phis). Some variables may be both updated between
545 iterations and used after the loop. This is why in loop1_exit_bb we
546 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
547 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
549 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
550 an original loop. i.e., we have:
552 orig_loop
553 guard_bb (goto LOOP/new_merge)
554 new_loop <-- LOOP
555 new_exit
556 new_merge
557 next_bb
559 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
560 have:
562 new_loop
563 guard_bb (goto LOOP/new_merge)
564 orig_loop <-- LOOP
565 new_exit
566 new_merge
567 next_bb
569 The ssa-names defined in the original loop have an SSA_NAME_AUX pointer
570 that records the corresponding new ssa-name used in the new duplicated
571 loop copy.
574 /* Function slpeel_update_phi_nodes_for_guard1
576 Input:
577 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
578 - DEFS - a bitmap of ssa names to mark new names for which we recorded
579 information.
581 In the context of the overall structure, we have:
583 loop1_preheader_bb:
584 guard1 (goto loop1/merg1_bb)
585 LOOP-> loop1
586 loop1_exit_bb:
587 guard2 (goto merge1_bb/merge2_bb)
588 merge1_bb
589 loop2
590 loop2_exit_bb
591 merge2_bb
592 next_bb
594 For each name updated between loop iterations (i.e - for each name that has
595 an entry (loop-header) phi in LOOP) we create a new phi in:
596 1. merge1_bb (to account for the edge from guard1)
597 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
600 static void
601 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
602 bool is_new_loop, basic_block *new_exit_bb,
603 bitmap *defs)
605 tree orig_phi, new_phi;
606 tree update_phi, update_phi2;
607 tree *new_name_ptr, *new_name_ptr2;
608 tree guard_arg, loop_arg;
609 basic_block new_merge_bb = guard_edge->dest;
610 edge e = EDGE_SUCC (new_merge_bb, 0);
611 basic_block update_bb = e->dest;
612 basic_block orig_bb = loop->header;
613 edge new_exit_e;
614 tree current_new_name;
616 /* Create new bb between loop and new_merge_bb. */
617 *new_exit_bb = split_edge (loop->single_exit);
618 add_bb_to_loop (*new_exit_bb, loop->outer);
620 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
622 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
623 orig_phi && update_phi;
624 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
626 /** 1. Handle new-merge-point phis **/
628 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
629 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
630 new_merge_bb);
632 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
633 of LOOP. Set the two phi args in NEW_PHI for these edges: */
634 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
635 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
637 add_phi_arg (new_phi, loop_arg, new_exit_e);
638 add_phi_arg (new_phi, guard_arg, guard_edge);
640 /* 1.3. Update phi in successor block. */
641 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
642 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
643 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
644 update_phi2 = new_phi;
647 /** 2. Handle loop-closed-ssa-form phis **/
649 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
650 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
651 *new_exit_bb);
653 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
654 add_phi_arg (new_phi, loop_arg, loop->single_exit);
656 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
657 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
658 SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
660 /* 2.4. Record the newly created name in SSA_NAME_AUX.
661 We want to find a name such that
662 name = *(SSA_NAME_AUX (orig_loop_name))
663 and to set its SSA_NAME_AUX as follows:
664 *(SSA_NAME_AUX (name)) = new_phi_name
666 If LOOP is a new loop then loop_arg is already the name we're
667 looking for. If LOOP is the original loop, then loop_arg is
668 the orig_loop_name and the relevant name is recorded in its
669 SSA_NAME_AUX */
670 if (is_new_loop)
671 current_new_name = loop_arg;
672 else
674 new_name_ptr = SSA_NAME_AUX (loop_arg);
675 gcc_assert (new_name_ptr);
676 current_new_name = *new_name_ptr;
678 #ifdef ENABLE_CHECKING
679 gcc_assert (! SSA_NAME_AUX (current_new_name));
680 #endif
682 new_name_ptr2 = xmalloc (sizeof (tree));
683 *new_name_ptr2 = PHI_RESULT (new_phi);
684 SSA_NAME_AUX (current_new_name) = new_name_ptr2;
685 bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
688 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
692 /* Function slpeel_update_phi_nodes_for_guard2
694 Input:
695 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
697 In the context of the overall structure, we have:
699 loop1_preheader_bb:
700 guard1 (goto loop1/merg1_bb)
701 loop1
702 loop1_exit_bb:
703 guard2 (goto merge1_bb/merge2_bb)
704 merge1_bb
705 LOOP-> loop2
706 loop2_exit_bb
707 merge2_bb
708 next_bb
710 For each name used out side the loop (i.e - for each name that has an exit
711 phi in next_bb) we create a new phi in:
712 1. merge2_bb (to account for the edge from guard_bb)
713 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
714 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
715 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
718 static void
719 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
720 bool is_new_loop, basic_block *new_exit_bb)
722 tree orig_phi, new_phi;
723 tree update_phi, update_phi2;
724 tree *new_name_ptr, *new_name_ptr2;
725 tree guard_arg, loop_arg;
726 basic_block new_merge_bb = guard_edge->dest;
727 edge e = EDGE_SUCC (new_merge_bb, 0);
728 basic_block update_bb = e->dest;
729 edge new_exit_e;
730 tree orig_def;
731 tree new_name, new_name2;
732 tree arg;
734 /* Create new bb between loop and new_merge_bb. */
735 *new_exit_bb = split_edge (loop->single_exit);
736 add_bb_to_loop (*new_exit_bb, loop->outer);
738 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
740 for (update_phi = phi_nodes (update_bb); update_phi;
741 update_phi = PHI_CHAIN (update_phi))
743 orig_phi = update_phi;
744 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
745 new_name_ptr = SSA_NAME_AUX (orig_def);
746 arg = NULL_TREE;
748 /** 1. Handle new-merge-point phis **/
750 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
751 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
752 new_merge_bb);
754 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
755 of LOOP. Set the two phi args in NEW_PHI for these edges: */
756 new_name = orig_def;
757 new_name2 = NULL_TREE;
758 if (new_name_ptr)
760 new_name = *new_name_ptr;
761 new_name_ptr2 = SSA_NAME_AUX (new_name);
762 if (new_name_ptr2)
763 /* Some variables have both loop-entry-phis and loop-exit-phis.
764 Such variables were given yet newer names by phis placed in
765 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
766 new_name2 = SSA_NAME_AUX (SSA_NAME_AUX (orig_name)). */
767 new_name2 = *new_name_ptr2;
770 if (is_new_loop)
772 guard_arg = orig_def;
773 loop_arg = new_name;
775 else
777 guard_arg = new_name;
778 loop_arg = orig_def;
780 if (new_name2)
781 guard_arg = new_name2;
783 add_phi_arg (new_phi, loop_arg, new_exit_e);
784 add_phi_arg (new_phi, guard_arg, guard_edge);
786 /* 1.3. Update phi in successor block. */
787 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
788 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
789 update_phi2 = new_phi;
792 /** 2. Handle loop-closed-ssa-form phis **/
794 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
795 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
796 *new_exit_bb);
798 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
799 add_phi_arg (new_phi, loop_arg, loop->single_exit);
801 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
802 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
803 SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
806 /** 3. Handle loop-closed-ssa-form phis for first loop **/
808 /* 3.1. Find the relevant names that need an exit-phi in GUARD_BB, i.e.
809 names for which slpeel_update_phi_nodes_for_guard1 had not already
810 created a phi node. This is the case for names that are used outside
811 the loop (and therefore need an exit phi) but are not updated
812 across loop iterations (and therefore don't have a loop-header-phi).
814 slpeel_update_phi_nodes_for_guard1 is responsible for creating
815 loop-exit phis in GUARD_BB for names that have a loop-header-phi. When
816 such a phi is created we also record the new name in SSA_NAME_AUX. If
817 this new name exists, then guard_arg was set to this new name
818 (see 1.2 above). Therefore, if guard_arg is not this new name, this is
819 an indication that an exit-phi in GUARD_BB was not yet created, so we
820 take care of it here.
822 if (guard_arg == new_name2)
823 continue;
824 arg = guard_arg;
826 /* 3.2. Generate new phi node in GUARD_BB: */
827 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
828 guard_edge->src);
830 /* 3.3. GUARD_BB has one incoming edge: */
831 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
832 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
834 /* 3.4. Update phi in successor of GUARD_BB: */
835 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
836 == guard_arg);
837 SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
840 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
844 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
845 that starts at zero, increases by one and its limit is NITERS.
847 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
849 void
850 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
852 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
853 tree orig_cond;
854 edge exit_edge = loop->single_exit;
855 block_stmt_iterator loop_cond_bsi;
856 block_stmt_iterator incr_bsi;
857 bool insert_after;
858 tree begin_label = tree_block_label (loop->latch);
859 tree exit_label = tree_block_label (loop->single_exit->dest);
860 tree init = build_int_cst (TREE_TYPE (niters), 0);
861 tree step = build_int_cst (TREE_TYPE (niters), 1);
862 tree then_label;
863 tree else_label;
864 LOC loop_loc;
866 orig_cond = get_loop_exit_condition (loop);
867 #ifdef ENABLE_CHECKING
868 gcc_assert (orig_cond);
869 #endif
870 loop_cond_bsi = bsi_for_stmt (orig_cond);
872 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
873 create_iv (init, step, NULL_TREE, loop,
874 &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
876 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
878 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
879 then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
880 else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
882 else /* 'then' edge loops back. */
884 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
885 then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
886 else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
889 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
890 then_label, else_label);
891 bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
893 /* Remove old loop exit test: */
894 bsi_remove (&loop_cond_bsi);
896 loop_loc = find_loop_location (loop);
897 if (dump_file && (dump_flags & TDF_DETAILS))
899 if (loop_loc != UNKNOWN_LOC)
900 fprintf (dump_file, "\nloop at %s:%d: ",
901 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
902 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
905 loop->nb_iterations = niters;
909 /* Given LOOP this function generates a new copy of it and puts it
910 on E which is either the entry or exit of LOOP. */
912 static struct loop *
913 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
914 edge e)
916 struct loop *new_loop;
917 basic_block *new_bbs, *bbs;
918 bool at_exit;
919 bool was_imm_dom;
920 basic_block exit_dest;
921 tree phi, phi_arg;
923 at_exit = (e == loop->single_exit);
924 if (!at_exit && e != loop_preheader_edge (loop))
925 return NULL;
927 bbs = get_loop_body (loop);
929 /* Check whether duplication is possible. */
930 if (!can_copy_bbs_p (bbs, loop->num_nodes))
932 free (bbs);
933 return NULL;
936 /* Generate new loop structure. */
937 new_loop = duplicate_loop (loops, loop, loop->outer);
938 if (!new_loop)
940 free (bbs);
941 return NULL;
944 exit_dest = loop->single_exit->dest;
945 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
946 exit_dest) == loop->header ?
947 true : false);
949 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
951 copy_bbs (bbs, loop->num_nodes, new_bbs,
952 &loop->single_exit, 1, &new_loop->single_exit, NULL);
954 /* Duplicating phi args at exit bbs as coming
955 also from exit of duplicated loop. */
956 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
958 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
959 if (phi_arg)
961 edge new_loop_exit_edge;
963 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
964 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
965 else
966 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
968 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
972 if (at_exit) /* Add the loop copy at exit. */
974 redirect_edge_and_branch_force (e, new_loop->header);
975 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
976 if (was_imm_dom)
977 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
979 else /* Add the copy at entry. */
981 edge new_exit_e;
982 edge entry_e = loop_preheader_edge (loop);
983 basic_block preheader = entry_e->src;
985 if (!flow_bb_inside_loop_p (new_loop,
986 EDGE_SUCC (new_loop->header, 0)->dest))
987 new_exit_e = EDGE_SUCC (new_loop->header, 0);
988 else
989 new_exit_e = EDGE_SUCC (new_loop->header, 1);
991 redirect_edge_and_branch_force (new_exit_e, loop->header);
992 set_immediate_dominator (CDI_DOMINATORS, loop->header,
993 new_exit_e->src);
995 /* We have to add phi args to the loop->header here as coming
996 from new_exit_e edge. */
997 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
999 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
1000 if (phi_arg)
1001 add_phi_arg (phi, phi_arg, new_exit_e);
1004 redirect_edge_and_branch_force (entry_e, new_loop->header);
1005 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
1008 free (new_bbs);
1009 free (bbs);
1011 return new_loop;
1015 /* Given the condition statement COND, put it as the last statement
1016 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
1017 Assumes that this is the single exit of the guarded loop.
1018 Returns the skip edge. */
1020 static edge
1021 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
1022 basic_block dom_bb)
1024 block_stmt_iterator bsi;
1025 edge new_e, enter_e;
1026 tree cond_stmt, then_label, else_label;
1028 enter_e = EDGE_SUCC (guard_bb, 0);
1029 enter_e->flags &= ~EDGE_FALLTHRU;
1030 enter_e->flags |= EDGE_FALSE_VALUE;
1031 bsi = bsi_last (guard_bb);
1033 then_label = build1 (GOTO_EXPR, void_type_node,
1034 tree_block_label (exit_bb));
1035 else_label = build1 (GOTO_EXPR, void_type_node,
1036 tree_block_label (enter_e->dest));
1037 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
1038 then_label, else_label);
1039 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
1040 /* Add new edge to connect guard block to the merge/loop-exit block. */
1041 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
1042 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
1043 return new_e;
1047 /* This function verifies that the following restrictions apply to LOOP:
1048 (1) it is innermost
1049 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
1050 (3) it is single entry, single exit
1051 (4) its exit condition is the last stmt in the header
1052 (5) E is the entry/exit edge of LOOP.
1055 bool
1056 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
1058 edge exit_e = loop->single_exit;
1059 edge entry_e = loop_preheader_edge (loop);
1060 tree orig_cond = get_loop_exit_condition (loop);
1061 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
1063 if (any_marked_for_rewrite_p ())
1064 return false;
1066 if (loop->inner
1067 /* All loops have an outer scope; the only case loop->outer is NULL is for
1068 the function itself. */
1069 || !loop->outer
1070 || loop->num_nodes != 2
1071 || !empty_block_p (loop->latch)
1072 || !loop->single_exit
1073 /* Verify that new loop exit condition can be trivially modified. */
1074 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
1075 || (e != exit_e && e != entry_e))
1076 return false;
1078 return true;
1081 #ifdef ENABLE_CHECKING
1082 void
1083 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1084 struct loop *second_loop)
1086 basic_block loop1_exit_bb = first_loop->single_exit->dest;
1087 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1088 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1090 /* A guard that controls whether the second_loop is to be executed or skipped
1091 is placed in first_loop->exit. first_loopt->exit therefore has two
1092 successors - one is the preheader of second_loop, and the other is a bb
1093 after second_loop.
1095 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1097 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
1098 of second_loop. */
1100 /* The preheader of new_loop is expected to have two predessors:
1101 first_loop->exit and the block that precedes first_loop. */
1103 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1104 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1105 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1106 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
1107 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1109 /* Verify that the other successor of first_loopt->exit is after the
1110 second_loop. */
1111 /* TODO */
1113 #endif
1115 /* Function slpeel_tree_peel_loop_to_edge.
1117 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1118 that is placed on the entry (exit) edge E of LOOP. After this transformation
1119 we have two loops one after the other - first-loop iterates FIRST_NITERS
1120 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1122 Input:
1123 - LOOP: the loop to be peeled.
1124 - E: the exit or entry edge of LOOP.
1125 If it is the entry edge, we peel the first iterations of LOOP. In this
1126 case first-loop is LOOP, and second-loop is the newly created loop.
1127 If it is the exit edge, we peel the last iterations of LOOP. In this
1128 case, first-loop is the newly created loop, and second-loop is LOOP.
1129 - NITERS: the number of iterations that LOOP iterates.
1130 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1131 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1132 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1133 is false, the caller of this function may want to take care of this
1134 (this can be useful if we don't want new stmts added to first-loop).
1136 Output:
1137 The function returns a pointer to the new loop-copy, or NULL if it failed
1138 to perform the transformation.
1140 The function generates two if-then-else guards: one before the first loop,
1141 and the other before the second loop:
1142 The first guard is:
1143 if (FIRST_NITERS == 0) then skip the first loop,
1144 and go directly to the second loop.
1145 The second guard is:
1146 if (FIRST_NITERS == NITERS) then skip the second loop.
1148 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1149 FORNOW the resulting code will not be in loop-closed-ssa form.
1152 struct loop*
1153 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
1154 edge e, tree first_niters,
1155 tree niters, bool update_first_loop_count)
1157 struct loop *new_loop = NULL, *first_loop, *second_loop;
1158 edge skip_e;
1159 tree pre_condition;
1160 bitmap definitions;
1161 basic_block bb_before_second_loop, bb_after_second_loop;
1162 basic_block bb_before_first_loop;
1163 basic_block bb_between_loops;
1164 basic_block new_exit_bb;
1165 edge exit_e = loop->single_exit;
1166 LOC loop_loc;
1168 if (!slpeel_can_duplicate_loop_p (loop, e))
1169 return NULL;
1171 /* We have to initialize cfg_hooks. Then, when calling
1172 cfg_hooks->split_edge, the function tree_split_edge
1173 is actually called and, when calling cfg_hooks->duplicate_block,
1174 the function tree_duplicate_bb is called. */
1175 tree_register_cfg_hooks ();
1178 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1179 Resulting CFG would be:
1181 first_loop:
1182 do {
1183 } while ...
1185 second_loop:
1186 do {
1187 } while ...
1189 orig_exit_bb:
1192 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1194 loop_loc = find_loop_location (loop);
1195 if (dump_file && (dump_flags & TDF_DETAILS))
1197 if (loop_loc != UNKNOWN_LOC)
1198 fprintf (dump_file, "\n%s:%d: note: ",
1199 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1200 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1202 return NULL;
1205 if (e == exit_e)
1207 /* NEW_LOOP was placed after LOOP. */
1208 first_loop = loop;
1209 second_loop = new_loop;
1211 else
1213 /* NEW_LOOP was placed before LOOP. */
1214 first_loop = new_loop;
1215 second_loop = loop;
1218 definitions = marked_ssa_names ();
1219 allocate_new_names (definitions);
1220 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1221 rename_variables_in_loop (new_loop);
1224 /* 2. Add the guard that controls whether the first loop is executed.
1225 Resulting CFG would be:
1227 bb_before_first_loop:
1228 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1229 GOTO first-loop
1231 first_loop:
1232 do {
1233 } while ...
1235 bb_before_second_loop:
1237 second_loop:
1238 do {
1239 } while ...
1241 orig_exit_bb:
1244 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1245 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1246 bb_before_second_loop = split_edge (first_loop->single_exit);
1247 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1249 pre_condition =
1250 fold (build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node));
1251 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1252 bb_before_second_loop, bb_before_first_loop);
1253 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1254 first_loop == new_loop,
1255 &new_exit_bb, &definitions);
1258 /* 3. Add the guard that controls whether the second loop is executed.
1259 Resulting CFG would be:
1261 bb_before_first_loop:
1262 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1263 GOTO first-loop
1265 first_loop:
1266 do {
1267 } while ...
1269 bb_between_loops:
1270 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1271 GOTO bb_before_second_loop
1273 bb_before_second_loop:
1275 second_loop:
1276 do {
1277 } while ...
1279 bb_after_second_loop:
1281 orig_exit_bb:
1284 bb_between_loops = new_exit_bb;
1285 bb_after_second_loop = split_edge (second_loop->single_exit);
1286 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1288 pre_condition =
1289 fold (build2 (EQ_EXPR, boolean_type_node, first_niters, niters));
1290 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1291 bb_after_second_loop, bb_before_first_loop);
1292 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1293 second_loop == new_loop, &new_exit_bb);
1295 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1297 if (update_first_loop_count)
1298 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1300 free_new_names (definitions);
1301 BITMAP_FREE (definitions);
1302 unmark_all_for_rewrite ();
1304 return new_loop;
1307 /* Function vect_get_loop_location.
1309 Extract the location of the loop in the source code.
1310 If the loop is not well formed for vectorization, an estimated
1311 location is calculated.
1312 Return the loop location if succeed and NULL if not. */
1315 find_loop_location (struct loop *loop)
1317 tree node = NULL_TREE;
1318 basic_block bb;
1319 block_stmt_iterator si;
1321 if (!loop)
1322 return UNKNOWN_LOC;
1324 node = get_loop_exit_condition (loop);
1326 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1327 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1328 return EXPR_LOC (node);
1330 /* If we got here the loop is probably not "well formed",
1331 try to estimate the loop location */
1333 if (!loop->header)
1334 return UNKNOWN_LOC;
1336 bb = loop->header;
1338 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1340 node = bsi_stmt (si);
1341 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1342 return EXPR_LOC (node);
1345 return UNKNOWN_LOC;
1349 /*************************************************************************
1350 Vectorization Debug Information.
1351 *************************************************************************/
1353 /* Function vect_set_verbosity_level.
1355 Called from toplev.c upon detection of the
1356 -ftree-vectorizer-verbose=N option. */
1358 void
1359 vect_set_verbosity_level (const char *val)
1361 unsigned int vl;
1363 vl = atoi (val);
1364 if (vl < MAX_VERBOSITY_LEVEL)
1365 vect_verbosity_level = vl;
1366 else
1367 vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1371 /* Function vect_set_dump_settings.
1373 Fix the verbosity level of the vectorizer if the
1374 requested level was not set explicitly using the flag
1375 -ftree-vectorizer-verbose=N.
1376 Decide where to print the debugging information (dump_file/stderr).
1377 If the user defined the verbosity level, but there is no dump file,
1378 print to stderr, otherwise print to the dump file. */
1380 static void
1381 vect_set_dump_settings (void)
1383 vect_dump = dump_file;
1385 /* Check if the verbosity level was defined by the user: */
1386 if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1388 /* If there is no dump file, print to stderr. */
1389 if (!dump_file)
1390 vect_dump = stderr;
1391 return;
1394 /* User didn't specify verbosity level: */
1395 if (dump_file && (dump_flags & TDF_DETAILS))
1396 vect_verbosity_level = REPORT_DETAILS;
1397 else if (dump_file && (dump_flags & TDF_STATS))
1398 vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1399 else
1400 vect_verbosity_level = REPORT_NONE;
1402 gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1406 /* Function debug_loop_details.
1408 For vectorization debug dumps. */
1410 bool
1411 vect_print_dump_info (enum verbosity_levels vl, LOC loc)
1413 if (vl > vect_verbosity_level)
1414 return false;
1416 if (loc == UNKNOWN_LOC)
1417 fprintf (vect_dump, "\n%s:%d: note: ",
1418 DECL_SOURCE_FILE (current_function_decl),
1419 DECL_SOURCE_LINE (current_function_decl));
1420 else
1421 fprintf (vect_dump, "\n%s:%d: note: ", LOC_FILE (loc), LOC_LINE (loc));
1424 return true;
1428 /*************************************************************************
1429 Vectorization Utilities.
1430 *************************************************************************/
1432 /* Function new_stmt_vec_info.
1434 Create and initialize a new stmt_vec_info struct for STMT. */
1436 stmt_vec_info
1437 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1439 stmt_vec_info res;
1440 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1442 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1443 STMT_VINFO_STMT (res) = stmt;
1444 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1445 STMT_VINFO_RELEVANT_P (res) = 0;
1446 STMT_VINFO_VECTYPE (res) = NULL;
1447 STMT_VINFO_VEC_STMT (res) = NULL;
1448 STMT_VINFO_DATA_REF (res) = NULL;
1449 STMT_VINFO_MEMTAG (res) = NULL;
1450 STMT_VINFO_PTR_INFO (res) = NULL;
1451 STMT_VINFO_SUBVARS (res) = NULL;
1452 STMT_VINFO_VECT_DR_BASE_ADDRESS (res) = NULL;
1453 STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1454 STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1455 STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1456 STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1458 return res;
1462 /* Function new_loop_vec_info.
1464 Create and initialize a new loop_vec_info struct for LOOP, as well as
1465 stmt_vec_info structs for all the stmts in LOOP. */
1467 loop_vec_info
1468 new_loop_vec_info (struct loop *loop)
1470 loop_vec_info res;
1471 basic_block *bbs;
1472 block_stmt_iterator si;
1473 unsigned int i;
1475 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1477 bbs = get_loop_body (loop);
1479 /* Create stmt_info for all stmts in the loop. */
1480 for (i = 0; i < loop->num_nodes; i++)
1482 basic_block bb = bbs[i];
1483 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1485 tree stmt = bsi_stmt (si);
1486 stmt_ann_t ann;
1488 get_stmt_operands (stmt);
1489 ann = stmt_ann (stmt);
1490 set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1494 LOOP_VINFO_LOOP (res) = loop;
1495 LOOP_VINFO_BBS (res) = bbs;
1496 LOOP_VINFO_EXIT_COND (res) = NULL;
1497 LOOP_VINFO_NITERS (res) = NULL;
1498 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1499 LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1500 LOOP_VINFO_VECT_FACTOR (res) = 0;
1501 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1502 "loop_write_datarefs");
1503 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1504 "loop_read_datarefs");
1505 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1506 LOOP_VINFO_LOC (res) = UNKNOWN_LOC;
1508 return res;
1512 /* Function destroy_loop_vec_info.
1514 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1515 stmts in the loop. */
1517 void
1518 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1520 struct loop *loop;
1521 basic_block *bbs;
1522 int nbbs;
1523 block_stmt_iterator si;
1524 int j;
1526 if (!loop_vinfo)
1527 return;
1529 loop = LOOP_VINFO_LOOP (loop_vinfo);
1531 bbs = LOOP_VINFO_BBS (loop_vinfo);
1532 nbbs = loop->num_nodes;
1534 for (j = 0; j < nbbs; j++)
1536 basic_block bb = bbs[j];
1537 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1539 tree stmt = bsi_stmt (si);
1540 stmt_ann_t ann = stmt_ann (stmt);
1541 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1542 free (stmt_info);
1543 set_stmt_info (ann, NULL);
1547 free (LOOP_VINFO_BBS (loop_vinfo));
1548 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1549 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1551 free (loop_vinfo);
1555 /* Function vect_strip_conversions
1557 Strip conversions that don't narrow the mode. */
1559 tree
1560 vect_strip_conversion (tree expr)
1562 tree to, ti, oprnd0;
1564 while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1566 to = TREE_TYPE (expr);
1567 oprnd0 = TREE_OPERAND (expr, 0);
1568 ti = TREE_TYPE (oprnd0);
1570 if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1571 return NULL_TREE;
1572 if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1573 return NULL_TREE;
1575 expr = oprnd0;
1577 return expr;
1581 /* Function vect_force_dr_alignment_p.
1583 Returns whether the alignment of a DECL can be forced to be aligned
1584 on ALIGNMENT bit boundary. */
1586 bool
1587 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1589 if (TREE_CODE (decl) != VAR_DECL)
1590 return false;
1592 if (DECL_EXTERNAL (decl))
1593 return false;
1595 if (TREE_ASM_WRITTEN (decl))
1596 return false;
1598 if (TREE_STATIC (decl))
1599 return (alignment <= MAX_OFILE_ALIGNMENT);
1600 else
1601 /* This is not 100% correct. The absolute correct stack alignment
1602 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1603 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1604 However, until someone implements forced stack alignment, SSE
1605 isn't really usable without this. */
1606 return (alignment <= PREFERRED_STACK_BOUNDARY);
1610 /* Function get_vectype_for_scalar_type.
1612 Returns the vector type corresponding to SCALAR_TYPE as supported
1613 by the target. */
1615 tree
1616 get_vectype_for_scalar_type (tree scalar_type)
1618 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1619 int nbytes = GET_MODE_SIZE (inner_mode);
1620 int nunits;
1621 tree vectype;
1623 if (nbytes == 0)
1624 return NULL_TREE;
1626 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1627 is expected. */
1628 nunits = UNITS_PER_SIMD_WORD / nbytes;
1630 vectype = build_vector_type (scalar_type, nunits);
1631 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1633 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1634 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1637 if (!vectype)
1638 return NULL_TREE;
1640 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1642 fprintf (vect_dump, "vectype: ");
1643 print_generic_expr (vect_dump, vectype, TDF_SLIM);
1646 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1648 /* TODO: tree-complex.c sometimes can parallelize operations
1649 on generic vectors. We can vectorize the loop in that case,
1650 but then we should re-run the lowering pass. */
1651 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1652 fprintf (vect_dump, "mode not supported by target.");
1653 return NULL_TREE;
1656 return vectype;
1660 /* Function vect_supportable_dr_alignment
1662 Return whether the data reference DR is supported with respect to its
1663 alignment. */
1665 enum dr_alignment_support
1666 vect_supportable_dr_alignment (struct data_reference *dr)
1668 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1669 enum machine_mode mode = (int) TYPE_MODE (vectype);
1671 if (aligned_access_p (dr))
1672 return dr_aligned;
1674 /* Possibly unaligned access. */
1676 if (DR_IS_READ (dr))
1678 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1679 && (!targetm.vectorize.builtin_mask_for_load
1680 || targetm.vectorize.builtin_mask_for_load ()))
1681 return dr_unaligned_software_pipeline;
1683 if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1684 /* Can't software pipeline the loads, but can at least do them. */
1685 return dr_unaligned_supported;
1688 /* Unsupported. */
1689 return dr_unaligned_unsupported;
1693 /* Function vect_is_simple_use.
1695 Input:
1696 LOOP - the loop that is being vectorized.
1697 OPERAND - operand of a stmt in LOOP.
1698 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1700 Returns whether a stmt with OPERAND can be vectorized.
1701 Supportable operands are constants, loop invariants, and operands that are
1702 defined by the current iteration of the loop. Unsupportable operands are
1703 those that are defined by a previous iteration of the loop (as is the case
1704 in reduction/induction computations). */
1706 bool
1707 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def)
1709 tree def_stmt;
1710 basic_block bb;
1711 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1713 if (def)
1714 *def = NULL_TREE;
1716 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1717 return true;
1719 if (TREE_CODE (operand) != SSA_NAME)
1720 return false;
1722 def_stmt = SSA_NAME_DEF_STMT (operand);
1723 if (def_stmt == NULL_TREE )
1725 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1726 fprintf (vect_dump, "no def_stmt.");
1727 return false;
1730 /* empty stmt is expected only in case of a function argument.
1731 (Otherwise - we expect a phi_node or a modify_expr). */
1732 if (IS_EMPTY_STMT (def_stmt))
1734 tree arg = TREE_OPERAND (def_stmt, 0);
1735 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1736 return true;
1737 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1739 fprintf (vect_dump, "Unexpected empty stmt: ");
1740 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1742 return false;
1745 /* phi_node inside the loop indicates an induction/reduction pattern.
1746 This is not supported yet. */
1747 bb = bb_for_stmt (def_stmt);
1748 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1750 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1751 fprintf (vect_dump, "reduction/induction - unsupported.");
1752 return false; /* FORNOW: not supported yet. */
1755 /* Expecting a modify_expr or a phi_node. */
1756 if (TREE_CODE (def_stmt) == MODIFY_EXPR
1757 || TREE_CODE (def_stmt) == PHI_NODE)
1759 if (def)
1760 *def = def_stmt;
1761 return true;
1764 return false;
1768 /* Function vect_is_simple_iv_evolution.
1770 FORNOW: A simple evolution of an induction variables in the loop is
1771 considered a polynomial evolution with constant step. */
1773 bool
1774 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1775 tree * step)
1777 tree init_expr;
1778 tree step_expr;
1780 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1782 /* When there is no evolution in this loop, the evolution function
1783 is not "simple". */
1784 if (evolution_part == NULL_TREE)
1785 return false;
1787 /* When the evolution is a polynomial of degree >= 2
1788 the evolution function is not "simple". */
1789 if (tree_is_chrec (evolution_part))
1790 return false;
1792 step_expr = evolution_part;
1793 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1794 loop_nb));
1796 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1798 fprintf (vect_dump, "step: ");
1799 print_generic_expr (vect_dump, step_expr, TDF_SLIM);
1800 fprintf (vect_dump, ", init: ");
1801 print_generic_expr (vect_dump, init_expr, TDF_SLIM);
1804 *init = init_expr;
1805 *step = step_expr;
1807 if (TREE_CODE (step_expr) != INTEGER_CST)
1809 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1810 fprintf (vect_dump, "step unknown.");
1811 return false;
1814 return true;
1818 /* Function need_imm_uses_for.
1820 Return whether we ought to include information for 'var'
1821 when calculating immediate uses. For this pass we only want use
1822 information for non-virtual variables. */
1824 static bool
1825 need_imm_uses_for (tree var)
1827 return is_gimple_reg (var);
1831 /* Function vectorize_loops.
1833 Entry Point to loop vectorization phase. */
1835 void
1836 vectorize_loops (struct loops *loops)
1838 unsigned int i, loops_num;
1839 unsigned int num_vectorized_loops = 0;
1841 /* Fix the verbosity level if not defined explicitly by the user. */
1842 vect_set_dump_settings ();
1844 /* Does the target support SIMD? */
1845 /* FORNOW: until more sophisticated machine modelling is in place. */
1846 if (!UNITS_PER_SIMD_WORD)
1848 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1849 fprintf (vect_dump, "vectorizer: target vector size is not defined.");
1850 return;
1853 #ifdef ENABLE_CHECKING
1854 verify_loop_closed_ssa ();
1855 #endif
1857 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
1859 /* ----------- Analyze loops. ----------- */
1861 /* If some loop was duplicated, it gets bigger number
1862 than all previously defined loops. This fact allows us to run
1863 only over initial loops skipping newly generated ones. */
1864 loops_num = loops->num;
1865 for (i = 1; i < loops_num; i++)
1867 loop_vec_info loop_vinfo;
1868 struct loop *loop = loops->parray[i];
1870 if (!loop)
1871 continue;
1873 loop_vinfo = vect_analyze_loop (loop);
1874 loop->aux = loop_vinfo;
1876 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
1877 continue;
1879 vect_transform_loop (loop_vinfo, loops);
1880 num_vectorized_loops++;
1883 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, UNKNOWN_LOC))
1884 fprintf (vect_dump, "vectorized %u loops in function.\n",
1885 num_vectorized_loops);
1887 /* ----------- Finalize. ----------- */
1889 free_df ();
1890 for (i = 1; i < loops_num; i++)
1892 struct loop *loop = loops->parray[i];
1893 loop_vec_info loop_vinfo;
1895 if (!loop)
1896 continue;
1897 loop_vinfo = loop->aux;
1898 destroy_loop_vec_info (loop_vinfo);
1899 loop->aux = NULL;