Merge from the pain train
[official-gcc.git] / gcc / tree-vectorizer.c
blob8c447428780817ba3a4ec9c80aa794aac8cb3651
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "errors.h"
128 #include "ggc.h"
129 #include "tree.h"
130 #include "target.h"
131 #include "rtl.h"
132 #include "basic-block.h"
133 #include "diagnostic.h"
134 #include "tree-flow.h"
135 #include "tree-dump.h"
136 #include "timevar.h"
137 #include "cfgloop.h"
138 #include "cfglayout.h"
139 #include "expr.h"
140 #include "optabs.h"
141 #include "toplev.h"
142 #include "tree-chrec.h"
143 #include "tree-data-ref.h"
144 #include "tree-scalar-evolution.h"
145 #include "input.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
149 /*************************************************************************
150 Simple Loop Peeling Utilities
151 *************************************************************************/
152 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
153 (struct loop *, struct loops *, edge);
154 static void slpeel_update_phis_for_duplicate_loop
155 (struct loop *, struct loop *, bool after);
156 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
157 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
159 static void allocate_new_names (bitmap);
160 static void rename_use_op (use_operand_p);
161 static void rename_def_op (def_operand_p, tree);
162 static void rename_variables_in_bb (basic_block);
163 static void free_new_names (bitmap);
164 static void rename_variables_in_loop (struct loop *);
166 /*************************************************************************
167 General Vectorization Utilities
168 *************************************************************************/
169 static void vect_set_dump_settings (void);
170 static bool need_imm_uses_for (tree);
172 /* vect_dump will be set to stderr or dump_file if exist. */
173 FILE *vect_dump;
175 /* vect_verbosity_level set to an invalid value
176 to mark that it's uninitialized. */
177 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
181 /*************************************************************************
182 Simple Loop Peeling Utilities
184 Utilities to support loop peeling for vectorization purposes.
185 *************************************************************************/
188 /* For each definition in DEFINITIONS this function allocates
189 new ssa name. */
191 static void
192 allocate_new_names (bitmap definitions)
194 unsigned ver;
195 bitmap_iterator bi;
197 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
199 tree def = ssa_name (ver);
200 tree *new_name_ptr = xmalloc (sizeof (tree));
202 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
204 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
205 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
207 SSA_NAME_AUX (def) = new_name_ptr;
212 /* Renames the use *OP_P. */
214 static void
215 rename_use_op (use_operand_p op_p)
217 tree *new_name_ptr;
219 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
220 return;
222 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
224 /* Something defined outside of the loop. */
225 if (!new_name_ptr)
226 return;
228 /* An ordinary ssa name defined in the loop. */
230 SET_USE (op_p, *new_name_ptr);
234 /* Renames the def *OP_P in statement STMT. */
236 static void
237 rename_def_op (def_operand_p op_p, tree stmt)
239 tree *new_name_ptr;
241 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
242 return;
244 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
246 /* Something defined outside of the loop. */
247 if (!new_name_ptr)
248 return;
250 /* An ordinary ssa name defined in the loop. */
252 SET_DEF (op_p, *new_name_ptr);
253 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
257 /* Renames the variables in basic block BB. */
259 static void
260 rename_variables_in_bb (basic_block bb)
262 tree phi;
263 block_stmt_iterator bsi;
264 tree stmt;
265 stmt_ann_t ann;
266 use_optype uses;
267 vuse_optype vuses;
268 def_optype defs;
269 v_may_def_optype v_may_defs;
270 v_must_def_optype v_must_defs;
271 unsigned i;
272 edge e;
273 edge_iterator ei;
274 struct loop *loop = bb->loop_father;
276 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
277 rename_def_op (PHI_RESULT_PTR (phi), phi);
279 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
281 stmt = bsi_stmt (bsi);
282 get_stmt_operands (stmt);
283 ann = stmt_ann (stmt);
285 uses = USE_OPS (ann);
286 for (i = 0; i < NUM_USES (uses); i++)
287 rename_use_op (USE_OP_PTR (uses, i));
289 defs = DEF_OPS (ann);
290 for (i = 0; i < NUM_DEFS (defs); i++)
291 rename_def_op (DEF_OP_PTR (defs, i), stmt);
293 vuses = VUSE_OPS (ann);
294 for (i = 0; i < NUM_VUSES (vuses); i++)
295 rename_use_op (VUSE_OP_PTR (vuses, i));
297 v_may_defs = V_MAY_DEF_OPS (ann);
298 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
300 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
301 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
304 v_must_defs = V_MUST_DEF_OPS (ann);
305 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
307 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
308 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
312 FOR_EACH_EDGE (e, ei, bb->succs)
314 if (!flow_bb_inside_loop_p (loop, e->dest))
315 continue;
316 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
317 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
322 /* Releases the structures holding the new ssa names. */
324 static void
325 free_new_names (bitmap definitions)
327 unsigned ver;
328 bitmap_iterator bi;
330 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
332 tree def = ssa_name (ver);
334 if (SSA_NAME_AUX (def))
336 free (SSA_NAME_AUX (def));
337 SSA_NAME_AUX (def) = NULL;
343 /* Renames variables in new generated LOOP. */
345 static void
346 rename_variables_in_loop (struct loop *loop)
348 unsigned i;
349 basic_block *bbs;
351 bbs = get_loop_body (loop);
353 for (i = 0; i < loop->num_nodes; i++)
354 rename_variables_in_bb (bbs[i]);
356 free (bbs);
360 /* Update the PHI nodes of NEW_LOOP.
362 NEW_LOOP is a duplicate of ORIG_LOOP.
363 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
364 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
365 executes before it. */
367 static void
368 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
369 struct loop *new_loop, bool after)
371 tree *new_name_ptr, new_ssa_name;
372 tree phi_new, phi_orig;
373 tree def;
374 edge orig_loop_latch = loop_latch_edge (orig_loop);
375 edge orig_entry_e = loop_preheader_edge (orig_loop);
376 edge new_loop_exit_e = new_loop->exit_edges[0];
377 edge new_loop_entry_e = loop_preheader_edge (new_loop);
378 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
381 step 1. For each loop-header-phi:
382 Add the first phi argument for the phi in NEW_LOOP
383 (the one associated with the entry of NEW_LOOP)
385 step 2. For each loop-header-phi:
386 Add the second phi argument for the phi in NEW_LOOP
387 (the one associated with the latch of NEW_LOOP)
389 step 3. Update the phis in the successor block of NEW_LOOP.
391 case 1: NEW_LOOP was placed before ORIG_LOOP:
392 The successor block of NEW_LOOP is the header of ORIG_LOOP.
393 Updating the phis in the successor block can therefore be done
394 along with the scanning of the loop header phis, because the
395 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
396 phi nodes, organized in the same order.
398 case 2: NEW_LOOP was placed after ORIG_LOOP:
399 The successor block of NEW_LOOP is the original exit block of
400 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
401 We postpone updating these phis to a later stage (when
402 loop guards are added).
406 /* Scan the phis in the headers of the old and new loops
407 (they are organized in exactly the same order). */
409 for (phi_new = phi_nodes (new_loop->header),
410 phi_orig = phi_nodes (orig_loop->header);
411 phi_new && phi_orig;
412 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
414 /* step 1. */
415 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
416 add_phi_arg (phi_new, def, new_loop_entry_e);
418 /* step 2. */
419 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
420 if (TREE_CODE (def) != SSA_NAME)
421 continue;
423 new_name_ptr = SSA_NAME_AUX (def);
424 if (!new_name_ptr)
425 /* Something defined outside of the loop. */
426 continue;
428 /* An ordinary ssa name defined in the loop. */
429 new_ssa_name = *new_name_ptr;
430 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
432 /* step 3 (case 1). */
433 if (!after)
435 gcc_assert (new_loop_exit_e == orig_entry_e);
436 SET_PHI_ARG_DEF (phi_orig,
437 new_loop_exit_e->dest_idx,
438 new_ssa_name);
444 /* Update PHI nodes for a guard of the LOOP.
446 Input:
447 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
448 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
449 originates from the guard-bb, skips LOOP and reaches the (unique) exit
450 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
451 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
452 LOOP header) before the guard code was added, and now it became a merge
453 point of two paths - the path that ends with the LOOP exit-edge, and
454 the path that ends with GUARD_EDGE.
456 This function creates and updates the relevant phi nodes to account for
457 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
458 1. Create phi nodes at NEW_MERGE_BB.
459 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
460 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
461 was added:
463 ===> The CFG before the guard-code was added:
464 LOOP_header_bb:
465 if (exit_loop) goto update_bb : LOOP_header_bb
466 update_bb:
468 ==> The CFG after the guard-code was added:
469 guard_bb:
470 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
471 LOOP_header_bb:
472 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
473 new_merge_bb:
474 goto update_bb
475 update_bb:
477 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
478 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
479 organized in the same order.
480 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
481 loop exit phis.
483 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
484 "original" loop). FALSE if LOOP is an original loop (not a newly
485 created copy). The SSA_NAME_AUX fields of the defs in the original
486 loop are the corresponding new ssa-names used in the new duplicated
487 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
488 nodes in UPDATE_BB takes the original ssa-name, and which takes the
489 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
490 the LOOP-exit-edge takes the new-name, and the phi-arg that is
491 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
492 FALSE, it's the other way around.
495 static void
496 slpeel_update_phi_nodes_for_guard (edge guard_edge,
497 struct loop *loop,
498 bool entry_phis,
499 bool is_new_loop)
501 tree orig_phi, new_phi, update_phi;
502 tree guard_arg, loop_arg;
503 basic_block new_merge_bb = guard_edge->dest;
504 edge e = EDGE_SUCC (new_merge_bb, 0);
505 basic_block update_bb = e->dest;
506 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
508 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
509 orig_phi && update_phi;
510 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
512 /* 1. Generate new phi node in NEW_MERGE_BB: */
513 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
514 new_merge_bb);
516 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
517 of LOOP. Set the two phi args in NEW_PHI for these edges: */
518 if (entry_phis)
520 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
521 EDGE_SUCC (loop->latch, 0));
522 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
524 else /* exit phis */
526 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
527 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
528 tree new_name;
530 if (new_name_ptr)
531 new_name = *new_name_ptr;
532 else
533 /* Something defined outside of the loop */
534 new_name = orig_def;
536 if (is_new_loop)
538 guard_arg = orig_def;
539 loop_arg = new_name;
541 else
543 guard_arg = new_name;
544 loop_arg = orig_def;
547 add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
548 add_phi_arg (new_phi, guard_arg, guard_edge);
550 /* 3. Update phi in successor block. */
551 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
552 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
553 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
556 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
560 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
561 that starts at zero, increases by one and its limit is NITERS.
563 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
565 void
566 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
568 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
569 tree orig_cond;
570 edge exit_edge = loop->exit_edges[0];
571 block_stmt_iterator loop_cond_bsi;
572 block_stmt_iterator incr_bsi;
573 bool insert_after;
574 tree begin_label = tree_block_label (loop->latch);
575 tree exit_label = tree_block_label (loop->single_exit->dest);
576 tree init = build_int_cst (TREE_TYPE (niters), 0);
577 tree step = build_int_cst (TREE_TYPE (niters), 1);
578 tree then_label;
579 tree else_label;
580 LOC loop_loc;
582 orig_cond = get_loop_exit_condition (loop);
583 #ifdef ENABLE_CHECKING
584 gcc_assert (orig_cond);
585 #endif
586 loop_cond_bsi = bsi_for_stmt (orig_cond);
588 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
589 create_iv (init, step, NULL_TREE, loop,
590 &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
592 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
594 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
595 then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
596 else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
598 else /* 'then' edge loops back. */
600 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
601 then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
602 else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
605 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
606 then_label, else_label);
607 bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
609 /* Remove old loop exit test: */
610 bsi_remove (&loop_cond_bsi);
612 loop_loc = find_loop_location (loop);
613 if (dump_file && (dump_flags & TDF_DETAILS))
615 if (loop_loc != UNKNOWN_LOC)
616 fprintf (dump_file, "\nloop at %s:%d: ",
617 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
618 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
621 loop->nb_iterations = niters;
625 /* Given LOOP this function generates a new copy of it and puts it
626 on E which is either the entry or exit of LOOP. */
628 static struct loop *
629 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
630 edge e)
632 struct loop *new_loop;
633 basic_block *new_bbs, *bbs;
634 bool at_exit;
635 bool was_imm_dom;
636 basic_block exit_dest;
637 tree phi, phi_arg;
639 at_exit = (e == loop->exit_edges[0]);
640 if (!at_exit && e != loop_preheader_edge (loop))
641 return NULL;
643 bbs = get_loop_body (loop);
645 /* Check whether duplication is possible. */
646 if (!can_copy_bbs_p (bbs, loop->num_nodes))
648 free (bbs);
649 return NULL;
652 /* Generate new loop structure. */
653 new_loop = duplicate_loop (loops, loop, loop->outer);
654 if (!new_loop)
656 free (bbs);
657 return NULL;
660 exit_dest = loop->exit_edges[0]->dest;
661 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
662 exit_dest) == loop->header ?
663 true : false);
665 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
667 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
669 /* Duplicating phi args at exit bbs as coming
670 also from exit of duplicated loop. */
671 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
673 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
674 if (phi_arg)
676 edge new_loop_exit_edge;
678 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
679 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
680 else
681 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
683 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
687 if (at_exit) /* Add the loop copy at exit. */
689 redirect_edge_and_branch_force (e, new_loop->header);
690 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
691 if (was_imm_dom)
692 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
694 else /* Add the copy at entry. */
696 edge new_exit_e;
697 edge entry_e = loop_preheader_edge (loop);
698 basic_block preheader = entry_e->src;
700 if (!flow_bb_inside_loop_p (new_loop,
701 EDGE_SUCC (new_loop->header, 0)->dest))
702 new_exit_e = EDGE_SUCC (new_loop->header, 0);
703 else
704 new_exit_e = EDGE_SUCC (new_loop->header, 1);
706 redirect_edge_and_branch_force (new_exit_e, loop->header);
707 set_immediate_dominator (CDI_DOMINATORS, loop->header,
708 new_exit_e->src);
710 /* We have to add phi args to the loop->header here as coming
711 from new_exit_e edge. */
712 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
714 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
715 if (phi_arg)
716 add_phi_arg (phi, phi_arg, new_exit_e);
719 redirect_edge_and_branch_force (entry_e, new_loop->header);
720 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
723 flow_loop_scan (new_loop, LOOP_ALL);
724 flow_loop_scan (loop, LOOP_ALL);
725 free (new_bbs);
726 free (bbs);
728 return new_loop;
732 /* Given the condition statement COND, put it as the last statement
733 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
734 Assumes that this is the single exit of the guarded loop.
735 Returns the skip edge. */
737 static edge
738 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
739 basic_block dom_bb)
741 block_stmt_iterator bsi;
742 edge new_e, enter_e;
743 tree cond_stmt, then_label, else_label;
745 enter_e = EDGE_SUCC (guard_bb, 0);
746 enter_e->flags &= ~EDGE_FALLTHRU;
747 enter_e->flags |= EDGE_FALSE_VALUE;
748 bsi = bsi_last (guard_bb);
750 then_label = build1 (GOTO_EXPR, void_type_node,
751 tree_block_label (exit_bb));
752 else_label = build1 (GOTO_EXPR, void_type_node,
753 tree_block_label (enter_e->dest));
754 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
755 then_label, else_label);
756 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
757 /* Add new edge to connect entry block to the second loop. */
758 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
759 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
760 return new_e;
764 /* This function verifies that the following restrictions apply to LOOP:
765 (1) it is innermost
766 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
767 (3) it is single entry, single exit
768 (4) its exit condition is the last stmt in the header
769 (5) E is the entry/exit edge of LOOP.
772 bool
773 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
775 edge exit_e = loop->exit_edges [0];
776 edge entry_e = loop_preheader_edge (loop);
777 tree orig_cond = get_loop_exit_condition (loop);
778 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
780 if (any_marked_for_rewrite_p ())
781 return false;
783 if (loop->inner
784 /* All loops have an outer scope; the only case loop->outer is NULL is for
785 the function itself. */
786 || !loop->outer
787 || loop->num_nodes != 2
788 || !empty_block_p (loop->latch)
789 || loop->num_exits != 1
790 || loop->num_entries != 1
791 /* Verify that new loop exit condition can be trivially modified. */
792 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
793 || (e != exit_e && e != entry_e))
794 return false;
796 return true;
799 #ifdef ENABLE_CHECKING
800 void
801 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
802 struct loop *second_loop)
804 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
805 basic_block loop2_entry_bb = second_loop->pre_header;
806 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
808 /* A guard that controls whether the second_loop is to be executed or skipped
809 is placed in first_loop->exit. first_loopt->exit therefore has two
810 successors - one is the preheader of second_loop, and the other is a bb
811 after second_loop.
813 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
816 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
817 of second_loop. */
819 /* The preheader of new_loop is expected to have two predessors:
820 first_loop->exit and the block that precedes first_loop. */
822 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
823 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
824 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
825 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
826 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
828 /* Verify that the other successor of first_loopt->exit is after the
829 second_loop. */
830 /* TODO */
832 #endif
834 /* Function slpeel_tree_peel_loop_to_edge.
836 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
837 that is placed on the entry (exit) edge E of LOOP. After this transformation
838 we have two loops one after the other - first-loop iterates FIRST_NITERS
839 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
841 Input:
842 - LOOP: the loop to be peeled.
843 - E: the exit or entry edge of LOOP.
844 If it is the entry edge, we peel the first iterations of LOOP. In this
845 case first-loop is LOOP, and second-loop is the newly created loop.
846 If it is the exit edge, we peel the last iterations of LOOP. In this
847 case, first-loop is the newly created loop, and second-loop is LOOP.
848 - NITERS: the number of iterations that LOOP iterates.
849 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
850 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
851 for updating the loop bound of the first-loop to FIRST_NITERS. If it
852 is false, the caller of this function may want to take care of this
853 (this can be useful if we don't want new stmts added to first-loop).
855 Output:
856 The function returns a pointer to the new loop-copy, or NULL if it failed
857 to perform the transformation.
859 The function generates two if-then-else guards: one before the first loop,
860 and the other before the second loop:
861 The first guard is:
862 if (FIRST_NITERS == 0) then skip the first loop,
863 and go directly to the second loop.
864 The second guard is:
865 if (FIRST_NITERS == NITERS) then skip the second loop.
867 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
868 FORNOW the resulting code will not be in loop-closed-ssa form.
871 struct loop*
872 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
873 edge e, tree first_niters,
874 tree niters, bool update_first_loop_count)
876 struct loop *new_loop = NULL, *first_loop, *second_loop;
877 edge skip_e;
878 tree pre_condition;
879 bitmap definitions;
880 basic_block bb_before_second_loop, bb_after_second_loop;
881 basic_block bb_before_first_loop;
882 basic_block bb_between_loops;
883 edge exit_e = loop->exit_edges [0];
884 LOC loop_loc;
886 if (!slpeel_can_duplicate_loop_p (loop, e))
887 return NULL;
889 /* We have to initialize cfg_hooks. Then, when calling
890 cfg_hooks->split_edge, the function tree_split_edge
891 is actually called and, when calling cfg_hooks->duplicate_block,
892 the function tree_duplicate_bb is called. */
893 tree_register_cfg_hooks ();
896 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
897 Resulting CFG would be:
899 first_loop:
900 do {
901 } while ...
903 second_loop:
904 do {
905 } while ...
907 orig_exit_bb:
910 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
912 loop_loc = find_loop_location (loop);
913 if (dump_file && (dump_flags & TDF_DETAILS))
915 if (loop_loc != UNKNOWN_LOC)
916 fprintf (dump_file, "\n%s:%d: note: ",
917 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
918 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
920 return NULL;
923 if (e == exit_e)
925 /* NEW_LOOP was placed after LOOP. */
926 first_loop = loop;
927 second_loop = new_loop;
929 else
931 /* NEW_LOOP was placed before LOOP. */
932 first_loop = new_loop;
933 second_loop = loop;
936 definitions = marked_ssa_names ();
937 allocate_new_names (definitions);
938 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
939 rename_variables_in_loop (new_loop);
942 /* 2. Add the guard that controls whether the first loop is executed.
943 Resulting CFG would be:
945 bb_before_first_loop:
946 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
947 GOTO first-loop
949 first_loop:
950 do {
951 } while ...
953 bb_before_second_loop:
955 second_loop:
956 do {
957 } while ...
959 orig_exit_bb:
962 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
963 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
964 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
965 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
966 flow_loop_scan (first_loop, LOOP_ALL);
967 flow_loop_scan (second_loop, LOOP_ALL);
969 pre_condition =
970 build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
971 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
972 bb_before_second_loop, bb_before_first_loop);
973 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
974 first_loop == new_loop);
977 /* 3. Add the guard that controls whether the second loop is executed.
978 Resulting CFG would be:
980 bb_before_first_loop:
981 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
982 GOTO first-loop
984 first_loop:
985 do {
986 } while ...
988 bb_between_loops:
989 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
990 GOTO bb_before_second_loop
992 bb_before_second_loop:
994 second_loop:
995 do {
996 } while ...
998 bb_after_second_loop:
1000 orig_exit_bb:
1003 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1004 add_bb_to_loop (bb_between_loops, first_loop->outer);
1005 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1006 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1007 flow_loop_scan (first_loop, LOOP_ALL);
1008 flow_loop_scan (second_loop, LOOP_ALL);
1010 pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1011 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1012 bb_after_second_loop, bb_before_first_loop);
1013 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1014 second_loop == new_loop);
1016 /* Flow loop scan does not update loop->single_exit field. */
1017 first_loop->single_exit = first_loop->exit_edges[0];
1018 second_loop->single_exit = second_loop->exit_edges[0];
1020 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1022 if (update_first_loop_count)
1023 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1025 free_new_names (definitions);
1026 BITMAP_FREE (definitions);
1027 unmark_all_for_rewrite ();
1029 return new_loop;
1032 /* Function vect_get_loop_location.
1034 Extract the location of the loop in the source code.
1035 If the loop is not well formed for vectorization, an estimated
1036 location is calculated.
1037 Return the loop location if succeed and NULL if not. */
1040 find_loop_location (struct loop *loop)
1042 tree node = NULL_TREE;
1043 basic_block bb;
1044 block_stmt_iterator si;
1046 if (!loop)
1047 return UNKNOWN_LOC;
1049 node = get_loop_exit_condition (loop);
1051 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1052 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1053 return EXPR_LOC (node);
1055 /* If we got here the loop is probably not "well formed",
1056 try to estimate the loop location */
1058 if (!loop->header)
1059 return UNKNOWN_LOC;
1061 bb = loop->header;
1063 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1065 node = bsi_stmt (si);
1066 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1067 return EXPR_LOC (node);
1070 return UNKNOWN_LOC;
1074 /*************************************************************************
1075 Vectorization Debug Information.
1076 *************************************************************************/
1078 /* Function vect_set_verbosity_level.
1080 Called from toplev.c upon detection of the
1081 -ftree-vectorizer-verbose=N option. */
1083 void
1084 vect_set_verbosity_level (const char *val)
1086 unsigned int vl;
1088 vl = atoi (val);
1089 if (vl < MAX_VERBOSITY_LEVEL)
1090 vect_verbosity_level = vl;
1091 else
1092 vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1096 /* Function vect_set_dump_settings.
1098 Fix the verbosity level of the vectorizer if the
1099 requested level was not set explicitly using the flag
1100 -ftree-vectorizer-verbose=N.
1101 Decide where to print the debugging information (dump_file/stderr).
1102 If the user defined the verbosity level, but there is no dump file,
1103 print to stderr, otherwise print to the dump file. */
1105 static void
1106 vect_set_dump_settings (void)
1108 vect_dump = dump_file;
1110 /* Check if the verbosity level was defined by the user: */
1111 if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1113 /* If there is no dump file, print to stderr. */
1114 if (!dump_file)
1115 vect_dump = stderr;
1116 return;
1119 /* User didn't specify verbosity level: */
1120 if (dump_file && (dump_flags & TDF_DETAILS))
1121 vect_verbosity_level = REPORT_DETAILS;
1122 else if (dump_file && (dump_flags & TDF_STATS))
1123 vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1124 else
1125 vect_verbosity_level = REPORT_NONE;
1127 gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1131 /* Function debug_loop_details.
1133 For vectorization debug dumps. */
1135 bool
1136 vect_print_dump_info (enum verbosity_levels vl, LOC loc)
1138 if (vl > vect_verbosity_level)
1139 return false;
1141 if (loc == UNKNOWN_LOC)
1142 fprintf (vect_dump, "\n%s:%d: note: ",
1143 DECL_SOURCE_FILE (current_function_decl),
1144 DECL_SOURCE_LINE (current_function_decl));
1145 else
1146 fprintf (vect_dump, "\n%s:%d: note: ", LOC_FILE (loc), LOC_LINE (loc));
1149 return true;
1153 /*************************************************************************
1154 Vectorization Utilities.
1155 *************************************************************************/
1157 /* Function new_stmt_vec_info.
1159 Create and initialize a new stmt_vec_info struct for STMT. */
1161 stmt_vec_info
1162 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1164 stmt_vec_info res;
1165 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1167 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1168 STMT_VINFO_STMT (res) = stmt;
1169 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1170 STMT_VINFO_RELEVANT_P (res) = 0;
1171 STMT_VINFO_VECTYPE (res) = NULL;
1172 STMT_VINFO_VEC_STMT (res) = NULL;
1173 STMT_VINFO_DATA_REF (res) = NULL;
1174 STMT_VINFO_MEMTAG (res) = NULL;
1175 STMT_VINFO_VECT_DR_BASE_ADDRESS (res) = NULL;
1176 STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1177 STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1178 STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1179 STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1181 return res;
1185 /* Function new_loop_vec_info.
1187 Create and initialize a new loop_vec_info struct for LOOP, as well as
1188 stmt_vec_info structs for all the stmts in LOOP. */
1190 loop_vec_info
1191 new_loop_vec_info (struct loop *loop)
1193 loop_vec_info res;
1194 basic_block *bbs;
1195 block_stmt_iterator si;
1196 unsigned int i;
1198 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1200 bbs = get_loop_body (loop);
1202 /* Create stmt_info for all stmts in the loop. */
1203 for (i = 0; i < loop->num_nodes; i++)
1205 basic_block bb = bbs[i];
1206 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1208 tree stmt = bsi_stmt (si);
1209 stmt_ann_t ann;
1211 get_stmt_operands (stmt);
1212 ann = stmt_ann (stmt);
1213 set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1217 LOOP_VINFO_LOOP (res) = loop;
1218 LOOP_VINFO_BBS (res) = bbs;
1219 LOOP_VINFO_EXIT_COND (res) = NULL;
1220 LOOP_VINFO_NITERS (res) = NULL;
1221 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1222 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1223 LOOP_VINFO_VECT_FACTOR (res) = 0;
1224 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1225 "loop_write_datarefs");
1226 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1227 "loop_read_datarefs");
1228 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1229 LOOP_VINFO_LOC (res) = UNKNOWN_LOC;
1231 return res;
1235 /* Function destroy_loop_vec_info.
1237 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1238 stmts in the loop. */
1240 void
1241 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1243 struct loop *loop;
1244 basic_block *bbs;
1245 int nbbs;
1246 block_stmt_iterator si;
1247 int j;
1249 if (!loop_vinfo)
1250 return;
1252 loop = LOOP_VINFO_LOOP (loop_vinfo);
1254 bbs = LOOP_VINFO_BBS (loop_vinfo);
1255 nbbs = loop->num_nodes;
1257 for (j = 0; j < nbbs; j++)
1259 basic_block bb = bbs[j];
1260 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1262 tree stmt = bsi_stmt (si);
1263 stmt_ann_t ann = stmt_ann (stmt);
1264 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1265 free (stmt_info);
1266 set_stmt_info (ann, NULL);
1270 free (LOOP_VINFO_BBS (loop_vinfo));
1271 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1272 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1274 free (loop_vinfo);
1278 /* Function vect_strip_conversions
1280 Strip conversions that don't narrow the mode. */
1282 tree
1283 vect_strip_conversion (tree expr)
1285 tree to, ti, oprnd0;
1287 while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1289 to = TREE_TYPE (expr);
1290 oprnd0 = TREE_OPERAND (expr, 0);
1291 ti = TREE_TYPE (oprnd0);
1293 if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1294 return NULL_TREE;
1295 if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1296 return NULL_TREE;
1298 expr = oprnd0;
1300 return expr;
1304 /* Function vect_force_dr_alignment_p.
1306 Returns whether the alignment of a DECL can be forced to be aligned
1307 on ALIGNMENT bit boundary. */
1309 bool
1310 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1312 if (TREE_CODE (decl) != VAR_DECL)
1313 return false;
1315 if (DECL_EXTERNAL (decl))
1316 return false;
1318 if (TREE_ASM_WRITTEN (decl))
1319 return false;
1321 if (TREE_STATIC (decl))
1322 return (alignment <= MAX_OFILE_ALIGNMENT);
1323 else
1324 /* This is not 100% correct. The absolute correct stack alignment
1325 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1326 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1327 However, until someone implements forced stack alignment, SSE
1328 isn't really usable without this. */
1329 return (alignment <= PREFERRED_STACK_BOUNDARY);
1333 /* Function get_vectype_for_scalar_type.
1335 Returns the vector type corresponding to SCALAR_TYPE as supported
1336 by the target. */
1338 tree
1339 get_vectype_for_scalar_type (tree scalar_type)
1341 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1342 int nbytes = GET_MODE_SIZE (inner_mode);
1343 int nunits;
1344 tree vectype;
1346 if (nbytes == 0)
1347 return NULL_TREE;
1349 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1350 is expected. */
1351 nunits = UNITS_PER_SIMD_WORD / nbytes;
1353 vectype = build_vector_type (scalar_type, nunits);
1354 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1356 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1357 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1360 if (!vectype)
1361 return NULL_TREE;
1363 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1365 fprintf (vect_dump, "vectype: ");
1366 print_generic_expr (vect_dump, vectype, TDF_SLIM);
1369 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1371 /* TODO: tree-complex.c sometimes can parallelize operations
1372 on generic vectors. We can vectorize the loop in that case,
1373 but then we should re-run the lowering pass. */
1374 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1375 fprintf (vect_dump, "mode not supported by target.");
1376 return NULL_TREE;
1379 return vectype;
1383 /* Function vect_supportable_dr_alignment
1385 Return whether the data reference DR is supported with respect to its
1386 alignment. */
1388 enum dr_alignment_support
1389 vect_supportable_dr_alignment (struct data_reference *dr)
1391 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1392 enum machine_mode mode = (int) TYPE_MODE (vectype);
1394 if (aligned_access_p (dr))
1395 return dr_aligned;
1397 /* Possibly unaligned access. */
1399 if (DR_IS_READ (dr))
1401 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1402 && (!targetm.vectorize.builtin_mask_for_load
1403 || targetm.vectorize.builtin_mask_for_load ()))
1404 return dr_unaligned_software_pipeline;
1406 if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1407 /* Can't software pipeline the loads, but can at least do them. */
1408 return dr_unaligned_supported;
1411 /* Unsupported. */
1412 return dr_unaligned_unsupported;
1416 /* Function vect_is_simple_use.
1418 Input:
1419 LOOP - the loop that is being vectorized.
1420 OPERAND - operand of a stmt in LOOP.
1421 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1423 Returns whether a stmt with OPERAND can be vectorized.
1424 Supportable operands are constants, loop invariants, and operands that are
1425 defined by the current iteration of the loop. Unsupportable operands are
1426 those that are defined by a previous iteration of the loop (as is the case
1427 in reduction/induction computations). */
1429 bool
1430 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def)
1432 tree def_stmt;
1433 basic_block bb;
1434 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1436 if (def)
1437 *def = NULL_TREE;
1439 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1440 return true;
1442 if (TREE_CODE (operand) != SSA_NAME)
1443 return false;
1445 def_stmt = SSA_NAME_DEF_STMT (operand);
1446 if (def_stmt == NULL_TREE )
1448 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1449 fprintf (vect_dump, "no def_stmt.");
1450 return false;
1453 /* empty stmt is expected only in case of a function argument.
1454 (Otherwise - we expect a phi_node or a modify_expr). */
1455 if (IS_EMPTY_STMT (def_stmt))
1457 tree arg = TREE_OPERAND (def_stmt, 0);
1458 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1459 return true;
1460 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1462 fprintf (vect_dump, "Unexpected empty stmt: ");
1463 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1465 return false;
1468 /* phi_node inside the loop indicates an induction/reduction pattern.
1469 This is not supported yet. */
1470 bb = bb_for_stmt (def_stmt);
1471 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1473 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1474 fprintf (vect_dump, "reduction/induction - unsupported.");
1475 return false; /* FORNOW: not supported yet. */
1478 /* Expecting a modify_expr or a phi_node. */
1479 if (TREE_CODE (def_stmt) == MODIFY_EXPR
1480 || TREE_CODE (def_stmt) == PHI_NODE)
1482 if (def)
1483 *def = def_stmt;
1484 return true;
1487 return false;
1491 /* Function vect_is_simple_iv_evolution.
1493 FORNOW: A simple evolution of an induction variables in the loop is
1494 considered a polynomial evolution with constant step. */
1496 bool
1497 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1498 tree * step)
1500 tree init_expr;
1501 tree step_expr;
1503 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1505 /* When there is no evolution in this loop, the evolution function
1506 is not "simple". */
1507 if (evolution_part == NULL_TREE)
1508 return false;
1510 /* When the evolution is a polynomial of degree >= 2
1511 the evolution function is not "simple". */
1512 if (tree_is_chrec (evolution_part))
1513 return false;
1515 step_expr = evolution_part;
1516 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1517 loop_nb));
1519 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1521 fprintf (vect_dump, "step: ");
1522 print_generic_expr (vect_dump, step_expr, TDF_SLIM);
1523 fprintf (vect_dump, ", init: ");
1524 print_generic_expr (vect_dump, init_expr, TDF_SLIM);
1527 *init = init_expr;
1528 *step = step_expr;
1530 if (TREE_CODE (step_expr) != INTEGER_CST)
1532 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1533 fprintf (vect_dump, "step unknown.");
1534 return false;
1537 return true;
1541 /* Function need_imm_uses_for.
1543 Return whether we ought to include information for 'var'
1544 when calculating immediate uses. For this pass we only want use
1545 information for non-virtual variables. */
1547 static bool
1548 need_imm_uses_for (tree var)
1550 return is_gimple_reg (var);
1554 /* Function vectorize_loops.
1556 Entry Point to loop vectorization phase. */
1558 void
1559 vectorize_loops (struct loops *loops)
1561 unsigned int i, loops_num;
1562 unsigned int num_vectorized_loops = 0;
1564 /* Fix the verbosity level if not defined explicitly by the user. */
1565 vect_set_dump_settings ();
1567 /* Does the target support SIMD? */
1568 /* FORNOW: until more sophisticated machine modelling is in place. */
1569 if (!UNITS_PER_SIMD_WORD)
1571 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1572 fprintf (vect_dump, "vectorizer: target vector size is not defined.");
1573 return;
1576 #ifdef ENABLE_CHECKING
1577 verify_loop_closed_ssa ();
1578 #endif
1580 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
1582 /* ----------- Analyze loops. ----------- */
1584 /* If some loop was duplicated, it gets bigger number
1585 than all previously defined loops. This fact allows us to run
1586 only over initial loops skipping newly generated ones. */
1587 loops_num = loops->num;
1588 for (i = 1; i < loops_num; i++)
1590 loop_vec_info loop_vinfo;
1591 struct loop *loop = loops->parray[i];
1593 if (!loop)
1594 continue;
1596 loop_vinfo = vect_analyze_loop (loop);
1597 loop->aux = loop_vinfo;
1599 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
1600 continue;
1602 vect_transform_loop (loop_vinfo, loops);
1603 num_vectorized_loops++;
1606 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, UNKNOWN_LOC))
1607 fprintf (vect_dump, "vectorized %u loops in function.\n",
1608 num_vectorized_loops);
1610 /* ----------- Finalize. ----------- */
1612 free_df ();
1613 for (i = 1; i < loops_num; i++)
1615 struct loop *loop = loops->parray[i];
1616 loop_vec_info loop_vinfo;
1618 if (!loop)
1619 continue;
1620 loop_vinfo = loop->aux;
1621 destroy_loop_vec_info (loop_vinfo);
1622 loop->aux = NULL;
1625 rewrite_into_ssa (false);
1626 rewrite_into_loop_closed_ssa (); /* FORNOW */
1627 bitmap_clear (vars_to_rename);