Shuffle ChangeLog entries into new files ChangeLog-1998,
[official-gcc.git] / gcc / tree-vectorizer.c
blob95ecdd9fa9777c42e13f29ce017b84774f082cf8
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "errors.h"
128 #include "ggc.h"
129 #include "tree.h"
130 #include "target.h"
131 #include "rtl.h"
132 #include "basic-block.h"
133 #include "diagnostic.h"
134 #include "tree-flow.h"
135 #include "tree-dump.h"
136 #include "timevar.h"
137 #include "cfgloop.h"
138 #include "cfglayout.h"
139 #include "expr.h"
140 #include "optabs.h"
141 #include "toplev.h"
142 #include "tree-chrec.h"
143 #include "tree-data-ref.h"
144 #include "tree-scalar-evolution.h"
145 #include "input.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
149 /*************************************************************************
150 Simple Loop Peeling Utilities
151 *************************************************************************/
152 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
153 (struct loop *, struct loops *, edge);
154 static void slpeel_update_phis_for_duplicate_loop
155 (struct loop *, struct loop *, bool after);
156 static void slpeel_update_phi_nodes_for_guard1
157 (edge, struct loop *, bool, basic_block *, bitmap *);
158 static void slpeel_update_phi_nodes_for_guard2
159 (edge, struct loop *, bool, basic_block *);
160 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
162 static void rename_use_op (use_operand_p);
163 static void rename_variables_in_bb (basic_block);
164 static void rename_variables_in_loop (struct loop *);
166 /*************************************************************************
167 General Vectorization Utilities
168 *************************************************************************/
169 static void vect_set_dump_settings (void);
171 /* vect_dump will be set to stderr or dump_file if exist. */
172 FILE *vect_dump;
174 /* vect_verbosity_level set to an invalid value
175 to mark that it's uninitialized. */
176 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
178 /* Number of loops, at the beginning of vectorization. */
179 unsigned int vect_loops_num;
181 /*************************************************************************
182 Simple Loop Peeling Utilities
184 Utilities to support loop peeling for vectorization purposes.
185 *************************************************************************/
188 /* Renames the use *OP_P. */
190 static void
191 rename_use_op (use_operand_p op_p)
193 tree new_name;
195 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
196 return;
198 new_name = get_current_def (USE_FROM_PTR (op_p));
200 /* Something defined outside of the loop. */
201 if (!new_name)
202 return;
204 /* An ordinary ssa name defined in the loop. */
206 SET_USE (op_p, new_name);
210 /* Renames the variables in basic block BB. */
212 static void
213 rename_variables_in_bb (basic_block bb)
215 tree phi;
216 block_stmt_iterator bsi;
217 tree stmt;
218 use_operand_p use_p;
219 ssa_op_iter iter;
220 edge e;
221 edge_iterator ei;
222 struct loop *loop = bb->loop_father;
224 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
226 stmt = bsi_stmt (bsi);
227 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
228 (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
229 rename_use_op (use_p);
232 FOR_EACH_EDGE (e, ei, bb->succs)
234 if (!flow_bb_inside_loop_p (loop, e->dest))
235 continue;
236 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
237 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
242 /* Renames variables in new generated LOOP. */
244 static void
245 rename_variables_in_loop (struct loop *loop)
247 unsigned i;
248 basic_block *bbs;
250 bbs = get_loop_body (loop);
252 for (i = 0; i < loop->num_nodes; i++)
253 rename_variables_in_bb (bbs[i]);
255 free (bbs);
259 /* Update the PHI nodes of NEW_LOOP.
261 NEW_LOOP is a duplicate of ORIG_LOOP.
262 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
263 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
264 executes before it. */
266 static void
267 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
268 struct loop *new_loop, bool after)
270 tree new_ssa_name;
271 tree phi_new, phi_orig;
272 tree def;
273 edge orig_loop_latch = loop_latch_edge (orig_loop);
274 edge orig_entry_e = loop_preheader_edge (orig_loop);
275 edge new_loop_exit_e = new_loop->single_exit;
276 edge new_loop_entry_e = loop_preheader_edge (new_loop);
277 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
280 step 1. For each loop-header-phi:
281 Add the first phi argument for the phi in NEW_LOOP
282 (the one associated with the entry of NEW_LOOP)
284 step 2. For each loop-header-phi:
285 Add the second phi argument for the phi in NEW_LOOP
286 (the one associated with the latch of NEW_LOOP)
288 step 3. Update the phis in the successor block of NEW_LOOP.
290 case 1: NEW_LOOP was placed before ORIG_LOOP:
291 The successor block of NEW_LOOP is the header of ORIG_LOOP.
292 Updating the phis in the successor block can therefore be done
293 along with the scanning of the loop header phis, because the
294 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
295 phi nodes, organized in the same order.
297 case 2: NEW_LOOP was placed after ORIG_LOOP:
298 The successor block of NEW_LOOP is the original exit block of
299 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
300 We postpone updating these phis to a later stage (when
301 loop guards are added).
305 /* Scan the phis in the headers of the old and new loops
306 (they are organized in exactly the same order). */
308 for (phi_new = phi_nodes (new_loop->header),
309 phi_orig = phi_nodes (orig_loop->header);
310 phi_new && phi_orig;
311 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
313 /* step 1. */
314 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
315 add_phi_arg (phi_new, def, new_loop_entry_e);
317 /* step 2. */
318 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
319 if (TREE_CODE (def) != SSA_NAME)
320 continue;
322 new_ssa_name = get_current_def (def);
323 if (!new_ssa_name)
324 /* Something defined outside of the loop. */
325 continue;
327 /* An ordinary ssa name defined in the loop. */
328 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
330 /* step 3 (case 1). */
331 if (!after)
333 gcc_assert (new_loop_exit_e == orig_entry_e);
334 SET_PHI_ARG_DEF (phi_orig,
335 new_loop_exit_e->dest_idx,
336 new_ssa_name);
342 /* Update PHI nodes for a guard of the LOOP.
344 Input:
345 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
346 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
347 originates from the guard-bb, skips LOOP and reaches the (unique) exit
348 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
349 We denote this bb NEW_MERGE_BB because before the guard code was added
350 it had a single predecessor (the LOOP header), and now it became a merge
351 point of two paths - the path that ends with the LOOP exit-edge, and
352 the path that ends with GUARD_EDGE.
353 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
354 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
356 ===> The CFG before the guard-code was added:
357 LOOP_header_bb:
358 loop_body
359 if (exit_loop) goto update_bb
360 else goto LOOP_header_bb
361 update_bb:
363 ==> The CFG after the guard-code was added:
364 guard_bb:
365 if (LOOP_guard_condition) goto new_merge_bb
366 else goto LOOP_header_bb
367 LOOP_header_bb:
368 loop_body
369 if (exit_loop_condition) goto new_merge_bb
370 else goto LOOP_header_bb
371 new_merge_bb:
372 goto update_bb
373 update_bb:
375 ==> The CFG after this function:
376 guard_bb:
377 if (LOOP_guard_condition) goto new_merge_bb
378 else goto LOOP_header_bb
379 LOOP_header_bb:
380 loop_body
381 if (exit_loop_condition) goto new_exit_bb
382 else goto LOOP_header_bb
383 new_exit_bb:
384 new_merge_bb:
385 goto update_bb
386 update_bb:
388 This function:
389 1. creates and updates the relevant phi nodes to account for the new
390 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
391 1.1. Create phi nodes at NEW_MERGE_BB.
392 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
393 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
394 2. preserves loop-closed-ssa-form by creating the required phi nodes
395 at the exit of LOOP (i.e, in NEW_EXIT_BB).
397 There are two flavors to this function:
399 slpeel_update_phi_nodes_for_guard1:
400 Here the guard controls whether we enter or skip LOOP, where LOOP is a
401 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
402 for variables that have phis in the loop header.
404 slpeel_update_phi_nodes_for_guard2:
405 Here the guard controls whether we enter or skip LOOP, where LOOP is an
406 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
407 for variables that have phis in the loop exit.
409 I.E., the overall structure is:
411 loop1_preheader_bb:
412 guard1 (goto loop1/merg1_bb)
413 loop1
414 loop1_exit_bb:
415 guard2 (goto merge1_bb/merge2_bb)
416 merge1_bb
417 loop2
418 loop2_exit_bb
419 merge2_bb
420 next_bb
422 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
423 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
424 that have phis in loop1->header).
426 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
427 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
428 that have phis in next_bb). It also adds some of these phis to
429 loop1_exit_bb.
431 slpeel_update_phi_nodes_for_guard1 is always called before
432 slpeel_update_phi_nodes_for_guard2. They are both needed in order
433 to create correct data-flow and loop-closed-ssa-form.
435 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
436 that change between iterations of a loop (and therefore have a phi-node
437 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
438 phis for variables that are used out of the loop (and therefore have
439 loop-closed exit phis). Some variables may be both updated between
440 iterations and used after the loop. This is why in loop1_exit_bb we
441 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
442 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
444 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
445 an original loop. i.e., we have:
447 orig_loop
448 guard_bb (goto LOOP/new_merge)
449 new_loop <-- LOOP
450 new_exit
451 new_merge
452 next_bb
454 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
455 have:
457 new_loop
458 guard_bb (goto LOOP/new_merge)
459 orig_loop <-- LOOP
460 new_exit
461 new_merge
462 next_bb
464 The SSA names defined in the original loop have a current
465 reaching definition that that records the corresponding new
466 ssa-name used in the new duplicated loop copy.
469 /* Function slpeel_update_phi_nodes_for_guard1
471 Input:
472 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
473 - DEFS - a bitmap of ssa names to mark new names for which we recorded
474 information.
476 In the context of the overall structure, we have:
478 loop1_preheader_bb:
479 guard1 (goto loop1/merg1_bb)
480 LOOP-> loop1
481 loop1_exit_bb:
482 guard2 (goto merge1_bb/merge2_bb)
483 merge1_bb
484 loop2
485 loop2_exit_bb
486 merge2_bb
487 next_bb
489 For each name updated between loop iterations (i.e - for each name that has
490 an entry (loop-header) phi in LOOP) we create a new phi in:
491 1. merge1_bb (to account for the edge from guard1)
492 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
495 static void
496 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
497 bool is_new_loop, basic_block *new_exit_bb,
498 bitmap *defs)
500 tree orig_phi, new_phi;
501 tree update_phi, update_phi2;
502 tree guard_arg, loop_arg;
503 basic_block new_merge_bb = guard_edge->dest;
504 edge e = EDGE_SUCC (new_merge_bb, 0);
505 basic_block update_bb = e->dest;
506 basic_block orig_bb = loop->header;
507 edge new_exit_e;
508 tree current_new_name;
510 /* Create new bb between loop and new_merge_bb. */
511 *new_exit_bb = split_edge (loop->single_exit);
512 add_bb_to_loop (*new_exit_bb, loop->outer);
514 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
516 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
517 orig_phi && update_phi;
518 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
520 /** 1. Handle new-merge-point phis **/
522 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
523 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
524 new_merge_bb);
526 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
527 of LOOP. Set the two phi args in NEW_PHI for these edges: */
528 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
529 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
531 add_phi_arg (new_phi, loop_arg, new_exit_e);
532 add_phi_arg (new_phi, guard_arg, guard_edge);
534 /* 1.3. Update phi in successor block. */
535 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
536 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
537 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
538 update_phi2 = new_phi;
541 /** 2. Handle loop-closed-ssa-form phis **/
543 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
544 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
545 *new_exit_bb);
547 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
548 add_phi_arg (new_phi, loop_arg, loop->single_exit);
550 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
551 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
552 SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
554 /* 2.4. Record the newly created name with set_current_def.
555 We want to find a name such that
556 name = get_current_def (orig_loop_name)
557 and to set its current definition as follows:
558 set_current_def (name, new_phi_name)
560 If LOOP is a new loop then loop_arg is already the name we're
561 looking for. If LOOP is the original loop, then loop_arg is
562 the orig_loop_name and the relevant name is recorded in its
563 current reaching definition. */
564 if (is_new_loop)
565 current_new_name = loop_arg;
566 else
568 current_new_name = get_current_def (loop_arg);
569 gcc_assert (current_new_name);
571 #ifdef ENABLE_CHECKING
572 gcc_assert (get_current_def (current_new_name) == NULL_TREE);
573 #endif
575 set_current_def (current_new_name, PHI_RESULT (new_phi));
576 bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
579 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
583 /* Function slpeel_update_phi_nodes_for_guard2
585 Input:
586 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
588 In the context of the overall structure, we have:
590 loop1_preheader_bb:
591 guard1 (goto loop1/merg1_bb)
592 loop1
593 loop1_exit_bb:
594 guard2 (goto merge1_bb/merge2_bb)
595 merge1_bb
596 LOOP-> loop2
597 loop2_exit_bb
598 merge2_bb
599 next_bb
601 For each name used out side the loop (i.e - for each name that has an exit
602 phi in next_bb) we create a new phi in:
603 1. merge2_bb (to account for the edge from guard_bb)
604 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
605 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
606 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
609 static void
610 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
611 bool is_new_loop, basic_block *new_exit_bb)
613 tree orig_phi, new_phi;
614 tree update_phi, update_phi2;
615 tree guard_arg, loop_arg;
616 basic_block new_merge_bb = guard_edge->dest;
617 edge e = EDGE_SUCC (new_merge_bb, 0);
618 basic_block update_bb = e->dest;
619 edge new_exit_e;
620 tree orig_def, orig_def_new_name;
621 tree new_name, new_name2;
622 tree arg;
624 /* Create new bb between loop and new_merge_bb. */
625 *new_exit_bb = split_edge (loop->single_exit);
626 add_bb_to_loop (*new_exit_bb, loop->outer);
628 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
630 for (update_phi = phi_nodes (update_bb); update_phi;
631 update_phi = PHI_CHAIN (update_phi))
633 orig_phi = update_phi;
634 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
635 orig_def_new_name = get_current_def (orig_def);
636 arg = NULL_TREE;
638 /** 1. Handle new-merge-point phis **/
640 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
641 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
642 new_merge_bb);
644 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
645 of LOOP. Set the two PHI args in NEW_PHI for these edges: */
646 new_name = orig_def;
647 new_name2 = NULL_TREE;
648 if (orig_def_new_name)
650 new_name = orig_def_new_name;
651 /* Some variables have both loop-entry-phis and loop-exit-phis.
652 Such variables were given yet newer names by phis placed in
653 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
654 new_name2 = get_current_def (get_current_def (orig_name)). */
655 new_name2 = get_current_def (new_name);
658 if (is_new_loop)
660 guard_arg = orig_def;
661 loop_arg = new_name;
663 else
665 guard_arg = new_name;
666 loop_arg = orig_def;
668 if (new_name2)
669 guard_arg = new_name2;
671 add_phi_arg (new_phi, loop_arg, new_exit_e);
672 add_phi_arg (new_phi, guard_arg, guard_edge);
674 /* 1.3. Update phi in successor block. */
675 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
676 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
677 update_phi2 = new_phi;
680 /** 2. Handle loop-closed-ssa-form phis **/
682 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
683 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
684 *new_exit_bb);
686 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
687 add_phi_arg (new_phi, loop_arg, loop->single_exit);
689 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
690 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
691 SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
694 /** 3. Handle loop-closed-ssa-form phis for first loop **/
696 /* 3.1. Find the relevant names that need an exit-phi in
697 GUARD_BB, i.e. names for which
698 slpeel_update_phi_nodes_for_guard1 had not already created a
699 phi node. This is the case for names that are used outside
700 the loop (and therefore need an exit phi) but are not updated
701 across loop iterations (and therefore don't have a
702 loop-header-phi).
704 slpeel_update_phi_nodes_for_guard1 is responsible for
705 creating loop-exit phis in GUARD_BB for names that have a
706 loop-header-phi. When such a phi is created we also record
707 the new name in its current definition. If this new name
708 exists, then guard_arg was set to this new name (see 1.2
709 above). Therefore, if guard_arg is not this new name, this
710 is an indication that an exit-phi in GUARD_BB was not yet
711 created, so we take care of it here. */
712 if (guard_arg == new_name2)
713 continue;
714 arg = guard_arg;
716 /* 3.2. Generate new phi node in GUARD_BB: */
717 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
718 guard_edge->src);
720 /* 3.3. GUARD_BB has one incoming edge: */
721 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
722 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
724 /* 3.4. Update phi in successor of GUARD_BB: */
725 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
726 == guard_arg);
727 SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
730 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
734 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
735 that starts at zero, increases by one and its limit is NITERS.
737 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
739 void
740 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
742 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
743 tree orig_cond;
744 edge exit_edge = loop->single_exit;
745 block_stmt_iterator loop_cond_bsi;
746 block_stmt_iterator incr_bsi;
747 bool insert_after;
748 tree begin_label = tree_block_label (loop->latch);
749 tree exit_label = tree_block_label (loop->single_exit->dest);
750 tree init = build_int_cst (TREE_TYPE (niters), 0);
751 tree step = build_int_cst (TREE_TYPE (niters), 1);
752 tree then_label;
753 tree else_label;
754 LOC loop_loc;
756 orig_cond = get_loop_exit_condition (loop);
757 #ifdef ENABLE_CHECKING
758 gcc_assert (orig_cond);
759 #endif
760 loop_cond_bsi = bsi_for_stmt (orig_cond);
762 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
763 create_iv (init, step, NULL_TREE, loop,
764 &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
766 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
768 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
769 then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
770 else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
772 else /* 'then' edge loops back. */
774 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
775 then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
776 else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
779 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
780 then_label, else_label);
781 bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
783 /* Remove old loop exit test: */
784 bsi_remove (&loop_cond_bsi);
786 loop_loc = find_loop_location (loop);
787 if (dump_file && (dump_flags & TDF_DETAILS))
789 if (loop_loc != UNKNOWN_LOC)
790 fprintf (dump_file, "\nloop at %s:%d: ",
791 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
792 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
795 loop->nb_iterations = niters;
799 /* Given LOOP this function generates a new copy of it and puts it
800 on E which is either the entry or exit of LOOP. */
802 static struct loop *
803 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
804 edge e)
806 struct loop *new_loop;
807 basic_block *new_bbs, *bbs;
808 bool at_exit;
809 bool was_imm_dom;
810 basic_block exit_dest;
811 tree phi, phi_arg;
813 at_exit = (e == loop->single_exit);
814 if (!at_exit && e != loop_preheader_edge (loop))
815 return NULL;
817 bbs = get_loop_body (loop);
819 /* Check whether duplication is possible. */
820 if (!can_copy_bbs_p (bbs, loop->num_nodes))
822 free (bbs);
823 return NULL;
826 /* Generate new loop structure. */
827 new_loop = duplicate_loop (loops, loop, loop->outer);
828 if (!new_loop)
830 free (bbs);
831 return NULL;
834 exit_dest = loop->single_exit->dest;
835 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
836 exit_dest) == loop->header ?
837 true : false);
839 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
841 copy_bbs (bbs, loop->num_nodes, new_bbs,
842 &loop->single_exit, 1, &new_loop->single_exit, NULL);
844 /* Duplicating phi args at exit bbs as coming
845 also from exit of duplicated loop. */
846 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
848 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
849 if (phi_arg)
851 edge new_loop_exit_edge;
853 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
854 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
855 else
856 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
858 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
862 if (at_exit) /* Add the loop copy at exit. */
864 redirect_edge_and_branch_force (e, new_loop->header);
865 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
866 if (was_imm_dom)
867 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
869 else /* Add the copy at entry. */
871 edge new_exit_e;
872 edge entry_e = loop_preheader_edge (loop);
873 basic_block preheader = entry_e->src;
875 if (!flow_bb_inside_loop_p (new_loop,
876 EDGE_SUCC (new_loop->header, 0)->dest))
877 new_exit_e = EDGE_SUCC (new_loop->header, 0);
878 else
879 new_exit_e = EDGE_SUCC (new_loop->header, 1);
881 redirect_edge_and_branch_force (new_exit_e, loop->header);
882 set_immediate_dominator (CDI_DOMINATORS, loop->header,
883 new_exit_e->src);
885 /* We have to add phi args to the loop->header here as coming
886 from new_exit_e edge. */
887 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
889 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
890 if (phi_arg)
891 add_phi_arg (phi, phi_arg, new_exit_e);
894 redirect_edge_and_branch_force (entry_e, new_loop->header);
895 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
898 free (new_bbs);
899 free (bbs);
901 return new_loop;
905 /* Given the condition statement COND, put it as the last statement
906 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
907 Assumes that this is the single exit of the guarded loop.
908 Returns the skip edge. */
910 static edge
911 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
912 basic_block dom_bb)
914 block_stmt_iterator bsi;
915 edge new_e, enter_e;
916 tree cond_stmt, then_label, else_label;
918 enter_e = EDGE_SUCC (guard_bb, 0);
919 enter_e->flags &= ~EDGE_FALLTHRU;
920 enter_e->flags |= EDGE_FALSE_VALUE;
921 bsi = bsi_last (guard_bb);
923 then_label = build1 (GOTO_EXPR, void_type_node,
924 tree_block_label (exit_bb));
925 else_label = build1 (GOTO_EXPR, void_type_node,
926 tree_block_label (enter_e->dest));
927 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
928 then_label, else_label);
929 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
930 /* Add new edge to connect guard block to the merge/loop-exit block. */
931 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
932 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
933 return new_e;
937 /* This function verifies that the following restrictions apply to LOOP:
938 (1) it is innermost
939 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
940 (3) it is single entry, single exit
941 (4) its exit condition is the last stmt in the header
942 (5) E is the entry/exit edge of LOOP.
945 bool
946 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
948 edge exit_e = loop->single_exit;
949 edge entry_e = loop_preheader_edge (loop);
950 tree orig_cond = get_loop_exit_condition (loop);
951 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
953 if (need_ssa_update_p ())
954 return false;
956 if (loop->inner
957 /* All loops have an outer scope; the only case loop->outer is NULL is for
958 the function itself. */
959 || !loop->outer
960 || loop->num_nodes != 2
961 || !empty_block_p (loop->latch)
962 || !loop->single_exit
963 /* Verify that new loop exit condition can be trivially modified. */
964 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
965 || (e != exit_e && e != entry_e))
966 return false;
968 return true;
971 #ifdef ENABLE_CHECKING
972 void
973 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
974 struct loop *second_loop)
976 basic_block loop1_exit_bb = first_loop->single_exit->dest;
977 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
978 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
980 /* A guard that controls whether the second_loop is to be executed or skipped
981 is placed in first_loop->exit. first_loopt->exit therefore has two
982 successors - one is the preheader of second_loop, and the other is a bb
983 after second_loop.
985 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
987 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
988 of second_loop. */
990 /* The preheader of new_loop is expected to have two predessors:
991 first_loop->exit and the block that precedes first_loop. */
993 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
994 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
995 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
996 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
997 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
999 /* Verify that the other successor of first_loopt->exit is after the
1000 second_loop. */
1001 /* TODO */
1003 #endif
1005 /* Function slpeel_tree_peel_loop_to_edge.
1007 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1008 that is placed on the entry (exit) edge E of LOOP. After this transformation
1009 we have two loops one after the other - first-loop iterates FIRST_NITERS
1010 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1012 Input:
1013 - LOOP: the loop to be peeled.
1014 - E: the exit or entry edge of LOOP.
1015 If it is the entry edge, we peel the first iterations of LOOP. In this
1016 case first-loop is LOOP, and second-loop is the newly created loop.
1017 If it is the exit edge, we peel the last iterations of LOOP. In this
1018 case, first-loop is the newly created loop, and second-loop is LOOP.
1019 - NITERS: the number of iterations that LOOP iterates.
1020 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1021 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1022 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1023 is false, the caller of this function may want to take care of this
1024 (this can be useful if we don't want new stmts added to first-loop).
1026 Output:
1027 The function returns a pointer to the new loop-copy, or NULL if it failed
1028 to perform the transformation.
1030 The function generates two if-then-else guards: one before the first loop,
1031 and the other before the second loop:
1032 The first guard is:
1033 if (FIRST_NITERS == 0) then skip the first loop,
1034 and go directly to the second loop.
1035 The second guard is:
1036 if (FIRST_NITERS == NITERS) then skip the second loop.
1038 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1039 FORNOW the resulting code will not be in loop-closed-ssa form.
1042 struct loop*
1043 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
1044 edge e, tree first_niters,
1045 tree niters, bool update_first_loop_count)
1047 struct loop *new_loop = NULL, *first_loop, *second_loop;
1048 edge skip_e;
1049 tree pre_condition;
1050 bitmap definitions;
1051 basic_block bb_before_second_loop, bb_after_second_loop;
1052 basic_block bb_before_first_loop;
1053 basic_block bb_between_loops;
1054 basic_block new_exit_bb;
1055 edge exit_e = loop->single_exit;
1056 LOC loop_loc;
1058 if (!slpeel_can_duplicate_loop_p (loop, e))
1059 return NULL;
1061 /* We have to initialize cfg_hooks. Then, when calling
1062 cfg_hooks->split_edge, the function tree_split_edge
1063 is actually called and, when calling cfg_hooks->duplicate_block,
1064 the function tree_duplicate_bb is called. */
1065 tree_register_cfg_hooks ();
1068 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1069 Resulting CFG would be:
1071 first_loop:
1072 do {
1073 } while ...
1075 second_loop:
1076 do {
1077 } while ...
1079 orig_exit_bb:
1082 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1084 loop_loc = find_loop_location (loop);
1085 if (dump_file && (dump_flags & TDF_DETAILS))
1087 if (loop_loc != UNKNOWN_LOC)
1088 fprintf (dump_file, "\n%s:%d: note: ",
1089 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1090 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1092 return NULL;
1095 if (e == exit_e)
1097 /* NEW_LOOP was placed after LOOP. */
1098 first_loop = loop;
1099 second_loop = new_loop;
1101 else
1103 /* NEW_LOOP was placed before LOOP. */
1104 first_loop = new_loop;
1105 second_loop = loop;
1108 definitions = ssa_names_to_replace ();
1109 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1110 rename_variables_in_loop (new_loop);
1113 /* 2. Add the guard that controls whether the first loop is executed.
1114 Resulting CFG would be:
1116 bb_before_first_loop:
1117 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1118 GOTO first-loop
1120 first_loop:
1121 do {
1122 } while ...
1124 bb_before_second_loop:
1126 second_loop:
1127 do {
1128 } while ...
1130 orig_exit_bb:
1133 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1134 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1135 bb_before_second_loop = split_edge (first_loop->single_exit);
1136 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1138 pre_condition =
1139 fold (build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node));
1140 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1141 bb_before_second_loop, bb_before_first_loop);
1142 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1143 first_loop == new_loop,
1144 &new_exit_bb, &definitions);
1147 /* 3. Add the guard that controls whether the second loop is executed.
1148 Resulting CFG would be:
1150 bb_before_first_loop:
1151 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1152 GOTO first-loop
1154 first_loop:
1155 do {
1156 } while ...
1158 bb_between_loops:
1159 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1160 GOTO bb_before_second_loop
1162 bb_before_second_loop:
1164 second_loop:
1165 do {
1166 } while ...
1168 bb_after_second_loop:
1170 orig_exit_bb:
1173 bb_between_loops = new_exit_bb;
1174 bb_after_second_loop = split_edge (second_loop->single_exit);
1175 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1177 pre_condition =
1178 fold (build2 (EQ_EXPR, boolean_type_node, first_niters, niters));
1179 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1180 bb_after_second_loop, bb_before_first_loop);
1181 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1182 second_loop == new_loop, &new_exit_bb);
1184 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1186 if (update_first_loop_count)
1187 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1189 BITMAP_FREE (definitions);
1190 delete_update_ssa ();
1192 return new_loop;
1195 /* Function vect_get_loop_location.
1197 Extract the location of the loop in the source code.
1198 If the loop is not well formed for vectorization, an estimated
1199 location is calculated.
1200 Return the loop location if succeed and NULL if not. */
1203 find_loop_location (struct loop *loop)
1205 tree node = NULL_TREE;
1206 basic_block bb;
1207 block_stmt_iterator si;
1209 if (!loop)
1210 return UNKNOWN_LOC;
1212 node = get_loop_exit_condition (loop);
1214 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1215 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1216 return EXPR_LOC (node);
1218 /* If we got here the loop is probably not "well formed",
1219 try to estimate the loop location */
1221 if (!loop->header)
1222 return UNKNOWN_LOC;
1224 bb = loop->header;
1226 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1228 node = bsi_stmt (si);
1229 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1230 return EXPR_LOC (node);
1233 return UNKNOWN_LOC;
1237 /*************************************************************************
1238 Vectorization Debug Information.
1239 *************************************************************************/
1241 /* Function vect_set_verbosity_level.
1243 Called from toplev.c upon detection of the
1244 -ftree-vectorizer-verbose=N option. */
1246 void
1247 vect_set_verbosity_level (const char *val)
1249 unsigned int vl;
1251 vl = atoi (val);
1252 if (vl < MAX_VERBOSITY_LEVEL)
1253 vect_verbosity_level = vl;
1254 else
1255 vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1259 /* Function vect_set_dump_settings.
1261 Fix the verbosity level of the vectorizer if the
1262 requested level was not set explicitly using the flag
1263 -ftree-vectorizer-verbose=N.
1264 Decide where to print the debugging information (dump_file/stderr).
1265 If the user defined the verbosity level, but there is no dump file,
1266 print to stderr, otherwise print to the dump file. */
1268 static void
1269 vect_set_dump_settings (void)
1271 vect_dump = dump_file;
1273 /* Check if the verbosity level was defined by the user: */
1274 if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1276 /* If there is no dump file, print to stderr. */
1277 if (!dump_file)
1278 vect_dump = stderr;
1279 return;
1282 /* User didn't specify verbosity level: */
1283 if (dump_file && (dump_flags & TDF_DETAILS))
1284 vect_verbosity_level = REPORT_DETAILS;
1285 else if (dump_file && (dump_flags & TDF_STATS))
1286 vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1287 else
1288 vect_verbosity_level = REPORT_NONE;
1290 gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1294 /* Function debug_loop_details.
1296 For vectorization debug dumps. */
1298 bool
1299 vect_print_dump_info (enum verbosity_levels vl, LOC loc)
1301 if (vl > vect_verbosity_level)
1302 return false;
1304 if (loc == UNKNOWN_LOC)
1305 fprintf (vect_dump, "\n%s:%d: note: ",
1306 DECL_SOURCE_FILE (current_function_decl),
1307 DECL_SOURCE_LINE (current_function_decl));
1308 else
1309 fprintf (vect_dump, "\n%s:%d: note: ", LOC_FILE (loc), LOC_LINE (loc));
1312 return true;
1316 /*************************************************************************
1317 Vectorization Utilities.
1318 *************************************************************************/
1320 /* Function new_stmt_vec_info.
1322 Create and initialize a new stmt_vec_info struct for STMT. */
1324 stmt_vec_info
1325 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1327 stmt_vec_info res;
1328 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1330 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1331 STMT_VINFO_STMT (res) = stmt;
1332 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1333 STMT_VINFO_RELEVANT_P (res) = 0;
1334 STMT_VINFO_VECTYPE (res) = NULL;
1335 STMT_VINFO_VEC_STMT (res) = NULL;
1336 STMT_VINFO_DATA_REF (res) = NULL;
1337 STMT_VINFO_MEMTAG (res) = NULL;
1338 STMT_VINFO_PTR_INFO (res) = NULL;
1339 STMT_VINFO_SUBVARS (res) = NULL;
1340 STMT_VINFO_VECT_DR_BASE_ADDRESS (res) = NULL;
1341 STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1342 STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1343 STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1344 STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1346 return res;
1350 /* Function new_loop_vec_info.
1352 Create and initialize a new loop_vec_info struct for LOOP, as well as
1353 stmt_vec_info structs for all the stmts in LOOP. */
1355 loop_vec_info
1356 new_loop_vec_info (struct loop *loop)
1358 loop_vec_info res;
1359 basic_block *bbs;
1360 block_stmt_iterator si;
1361 unsigned int i;
1363 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1365 bbs = get_loop_body (loop);
1367 /* Create stmt_info for all stmts in the loop. */
1368 for (i = 0; i < loop->num_nodes; i++)
1370 basic_block bb = bbs[i];
1371 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1373 tree stmt = bsi_stmt (si);
1374 stmt_ann_t ann;
1376 ann = stmt_ann (stmt);
1377 set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1381 LOOP_VINFO_LOOP (res) = loop;
1382 LOOP_VINFO_BBS (res) = bbs;
1383 LOOP_VINFO_EXIT_COND (res) = NULL;
1384 LOOP_VINFO_NITERS (res) = NULL;
1385 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1386 LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1387 LOOP_VINFO_VECT_FACTOR (res) = 0;
1388 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1389 "loop_write_datarefs");
1390 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1391 "loop_read_datarefs");
1392 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1393 LOOP_VINFO_LOC (res) = UNKNOWN_LOC;
1395 return res;
1399 /* Function destroy_loop_vec_info.
1401 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1402 stmts in the loop. */
1404 void
1405 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1407 struct loop *loop;
1408 basic_block *bbs;
1409 int nbbs;
1410 block_stmt_iterator si;
1411 int j;
1413 if (!loop_vinfo)
1414 return;
1416 loop = LOOP_VINFO_LOOP (loop_vinfo);
1418 bbs = LOOP_VINFO_BBS (loop_vinfo);
1419 nbbs = loop->num_nodes;
1421 for (j = 0; j < nbbs; j++)
1423 basic_block bb = bbs[j];
1424 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1426 tree stmt = bsi_stmt (si);
1427 stmt_ann_t ann = stmt_ann (stmt);
1428 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1429 free (stmt_info);
1430 set_stmt_info (ann, NULL);
1434 free (LOOP_VINFO_BBS (loop_vinfo));
1435 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1436 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1438 free (loop_vinfo);
1442 /* Function vect_strip_conversions
1444 Strip conversions that don't narrow the mode. */
1446 tree
1447 vect_strip_conversion (tree expr)
1449 tree to, ti, oprnd0;
1451 while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1453 to = TREE_TYPE (expr);
1454 oprnd0 = TREE_OPERAND (expr, 0);
1455 ti = TREE_TYPE (oprnd0);
1457 if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1458 return NULL_TREE;
1459 if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1460 return NULL_TREE;
1462 expr = oprnd0;
1464 return expr;
1468 /* Function vect_force_dr_alignment_p.
1470 Returns whether the alignment of a DECL can be forced to be aligned
1471 on ALIGNMENT bit boundary. */
1473 bool
1474 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1476 if (TREE_CODE (decl) != VAR_DECL)
1477 return false;
1479 if (DECL_EXTERNAL (decl))
1480 return false;
1482 if (TREE_ASM_WRITTEN (decl))
1483 return false;
1485 if (TREE_STATIC (decl))
1486 return (alignment <= MAX_OFILE_ALIGNMENT);
1487 else
1488 /* This is not 100% correct. The absolute correct stack alignment
1489 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1490 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1491 However, until someone implements forced stack alignment, SSE
1492 isn't really usable without this. */
1493 return (alignment <= PREFERRED_STACK_BOUNDARY);
1497 /* Function get_vectype_for_scalar_type.
1499 Returns the vector type corresponding to SCALAR_TYPE as supported
1500 by the target. */
1502 tree
1503 get_vectype_for_scalar_type (tree scalar_type)
1505 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1506 int nbytes = GET_MODE_SIZE (inner_mode);
1507 int nunits;
1508 tree vectype;
1510 if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
1511 return NULL_TREE;
1513 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1514 is expected. */
1515 nunits = UNITS_PER_SIMD_WORD / nbytes;
1517 vectype = build_vector_type (scalar_type, nunits);
1518 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1520 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1521 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1524 if (!vectype)
1525 return NULL_TREE;
1527 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1529 fprintf (vect_dump, "vectype: ");
1530 print_generic_expr (vect_dump, vectype, TDF_SLIM);
1533 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1534 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
1536 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1537 fprintf (vect_dump, "mode not supported by target.");
1538 return NULL_TREE;
1541 return vectype;
1545 /* Function vect_supportable_dr_alignment
1547 Return whether the data reference DR is supported with respect to its
1548 alignment. */
1550 enum dr_alignment_support
1551 vect_supportable_dr_alignment (struct data_reference *dr)
1553 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1554 enum machine_mode mode = (int) TYPE_MODE (vectype);
1556 if (aligned_access_p (dr))
1557 return dr_aligned;
1559 /* Possibly unaligned access. */
1561 if (DR_IS_READ (dr))
1563 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1564 && (!targetm.vectorize.builtin_mask_for_load
1565 || targetm.vectorize.builtin_mask_for_load ()))
1566 return dr_unaligned_software_pipeline;
1568 if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1569 /* Can't software pipeline the loads, but can at least do them. */
1570 return dr_unaligned_supported;
1573 /* Unsupported. */
1574 return dr_unaligned_unsupported;
1578 /* Function vect_is_simple_use.
1580 Input:
1581 LOOP - the loop that is being vectorized.
1582 OPERAND - operand of a stmt in LOOP.
1583 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1585 Returns whether a stmt with OPERAND can be vectorized.
1586 Supportable operands are constants, loop invariants, and operands that are
1587 defined by the current iteration of the loop. Unsupportable operands are
1588 those that are defined by a previous iteration of the loop (as is the case
1589 in reduction/induction computations). */
1591 bool
1592 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def)
1594 tree def_stmt;
1595 basic_block bb;
1596 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1598 if (def)
1599 *def = NULL_TREE;
1601 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1602 return true;
1604 if (TREE_CODE (operand) != SSA_NAME)
1605 return false;
1607 def_stmt = SSA_NAME_DEF_STMT (operand);
1608 if (def_stmt == NULL_TREE )
1610 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1611 fprintf (vect_dump, "no def_stmt.");
1612 return false;
1615 /* empty stmt is expected only in case of a function argument.
1616 (Otherwise - we expect a phi_node or a modify_expr). */
1617 if (IS_EMPTY_STMT (def_stmt))
1619 tree arg = TREE_OPERAND (def_stmt, 0);
1620 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1621 return true;
1622 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1624 fprintf (vect_dump, "Unexpected empty stmt: ");
1625 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1627 return false;
1630 /* phi_node inside the loop indicates an induction/reduction pattern.
1631 This is not supported yet. */
1632 bb = bb_for_stmt (def_stmt);
1633 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1635 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1636 fprintf (vect_dump, "reduction/induction - unsupported.");
1637 return false; /* FORNOW: not supported yet. */
1640 /* Expecting a modify_expr or a phi_node. */
1641 if (TREE_CODE (def_stmt) == MODIFY_EXPR
1642 || TREE_CODE (def_stmt) == PHI_NODE)
1644 if (def)
1645 *def = def_stmt;
1646 return true;
1649 return false;
1653 /* Function vect_is_simple_iv_evolution.
1655 FORNOW: A simple evolution of an induction variables in the loop is
1656 considered a polynomial evolution with constant step. */
1658 bool
1659 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1660 tree * step)
1662 tree init_expr;
1663 tree step_expr;
1665 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1667 /* When there is no evolution in this loop, the evolution function
1668 is not "simple". */
1669 if (evolution_part == NULL_TREE)
1670 return false;
1672 /* When the evolution is a polynomial of degree >= 2
1673 the evolution function is not "simple". */
1674 if (tree_is_chrec (evolution_part))
1675 return false;
1677 step_expr = evolution_part;
1678 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1679 loop_nb));
1681 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1683 fprintf (vect_dump, "step: ");
1684 print_generic_expr (vect_dump, step_expr, TDF_SLIM);
1685 fprintf (vect_dump, ", init: ");
1686 print_generic_expr (vect_dump, init_expr, TDF_SLIM);
1689 *init = init_expr;
1690 *step = step_expr;
1692 if (TREE_CODE (step_expr) != INTEGER_CST)
1694 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1695 fprintf (vect_dump, "step unknown.");
1696 return false;
1699 return true;
1703 /* Function vectorize_loops.
1705 Entry Point to loop vectorization phase. */
1707 void
1708 vectorize_loops (struct loops *loops)
1710 unsigned int i;
1711 unsigned int num_vectorized_loops = 0;
1713 /* Fix the verbosity level if not defined explicitly by the user. */
1714 vect_set_dump_settings ();
1716 /* ----------- Analyze loops. ----------- */
1718 /* If some loop was duplicated, it gets bigger number
1719 than all previously defined loops. This fact allows us to run
1720 only over initial loops skipping newly generated ones. */
1721 vect_loops_num = loops->num;
1722 for (i = 1; i < vect_loops_num; i++)
1724 loop_vec_info loop_vinfo;
1725 struct loop *loop = loops->parray[i];
1727 if (!loop)
1728 continue;
1730 loop_vinfo = vect_analyze_loop (loop);
1731 loop->aux = loop_vinfo;
1733 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
1734 continue;
1736 vect_transform_loop (loop_vinfo, loops);
1737 num_vectorized_loops++;
1740 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, UNKNOWN_LOC))
1741 fprintf (vect_dump, "vectorized %u loops in function.\n",
1742 num_vectorized_loops);
1744 /* ----------- Finalize. ----------- */
1746 for (i = 1; i < vect_loops_num; i++)
1748 struct loop *loop = loops->parray[i];
1749 loop_vec_info loop_vinfo;
1751 if (!loop)
1752 continue;
1753 loop_vinfo = loop->aux;
1754 destroy_loop_vec_info (loop_vinfo);
1755 loop->aux = NULL;