Check in tree-dce enh to trunk
[official-gcc.git] / gcc / tree-vectorizer.c
blob79a7461a1a9aee75d9deba700ab1ae5bf40c2ca1
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Loop Vectorization Pass.
23 This pass tries to vectorize loops. This first implementation focuses on
24 simple inner-most loops, with no conditional control flow, and a set of
25 simple operations which vector form can be expressed using existing
26 tree codes (PLUS, MULT etc).
28 For example, the vectorizer transforms the following simple loop:
30 short a[N]; short b[N]; short c[N]; int i;
32 for (i=0; i<N; i++){
33 a[i] = b[i] + c[i];
36 as if it was manually vectorized by rewriting the source code into:
38 typedef int __attribute__((mode(V8HI))) v8hi;
39 short a[N]; short b[N]; short c[N]; int i;
40 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
41 v8hi va, vb, vc;
43 for (i=0; i<N/8; i++){
44 vb = pb[i];
45 vc = pc[i];
46 va = vb + vc;
47 pa[i] = va;
50 The main entry to this pass is vectorize_loops(), in which
51 the vectorizer applies a set of analyses on a given set of loops,
52 followed by the actual vectorization transformation for the loops that
53 had successfully passed the analysis phase.
55 Throughout this pass we make a distinction between two types of
56 data: scalars (which are represented by SSA_NAMES), and memory references
57 ("data-refs"). These two types of data require different handling both
58 during analysis and transformation. The types of data-refs that the
59 vectorizer currently supports are ARRAY_REFS which base is an array DECL
60 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
61 accesses are required to have a simple (consecutive) access pattern.
63 Analysis phase:
64 ===============
65 The driver for the analysis phase is vect_analyze_loop_nest().
66 It applies a set of analyses, some of which rely on the scalar evolution
67 analyzer (scev) developed by Sebastian Pop.
69 During the analysis phase the vectorizer records some information
70 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
71 loop, as well as general information about the loop as a whole, which is
72 recorded in a "loop_vec_info" struct attached to each loop.
74 Transformation phase:
75 =====================
76 The loop transformation phase scans all the stmts in the loop, and
77 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
78 the loop that needs to be vectorized. It insert the vector code sequence
79 just before the scalar stmt S, and records a pointer to the vector code
80 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
81 attached to S). This pointer will be used for the vectorization of following
82 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
83 otherwise, we rely on dead code elimination for removing it.
85 For example, say stmt S1 was vectorized into stmt VS1:
87 VS1: vb = px[i];
88 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
89 S2: a = b;
91 To vectorize stmt S2, the vectorizer first finds the stmt that defines
92 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
93 vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
94 resulting sequence would be:
96 VS1: vb = px[i];
97 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
98 VS2: va = vb;
99 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
101 Operands that are not SSA_NAMEs, are data-refs that appear in
102 load/store operations (like 'x[i]' in S1), and are handled differently.
104 Target modeling:
105 =================
106 Currently the only target specific information that is used is the
107 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
108 support different sizes of vectors, for now will need to specify one value
109 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
111 Since we only vectorize operations which vector form can be
112 expressed using existing tree codes, to verify that an operation is
113 supported, the vectorizer checks the relevant optab at the relevant
114 machine_mode (e.g, optab_handler (add_optab, V8HImode)->insn_code). If
115 the value found is CODE_FOR_nothing, then there's no target support, and
116 we can't vectorize the stmt.
118 For additional information on this project see:
119 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
122 #include "config.h"
123 #include "system.h"
124 #include "coretypes.h"
125 #include "tm.h"
126 #include "ggc.h"
127 #include "tree.h"
128 #include "target.h"
129 #include "rtl.h"
130 #include "basic-block.h"
131 #include "diagnostic.h"
132 #include "tree-flow.h"
133 #include "tree-dump.h"
134 #include "timevar.h"
135 #include "cfgloop.h"
136 #include "cfglayout.h"
137 #include "expr.h"
138 #include "recog.h"
139 #include "optabs.h"
140 #include "params.h"
141 #include "toplev.h"
142 #include "tree-chrec.h"
143 #include "tree-data-ref.h"
144 #include "tree-scalar-evolution.h"
145 #include "input.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
149 /*************************************************************************
150 General Vectorization Utilities
151 *************************************************************************/
153 /* vect_dump will be set to stderr or dump_file if exist. */
154 FILE *vect_dump;
156 /* vect_verbosity_level set to an invalid value
157 to mark that it's uninitialized. */
158 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
160 /* Loop location. */
161 static LOC vect_loop_location;
163 /* Bitmap of virtual variables to be renamed. */
164 bitmap vect_memsyms_to_rename;
166 /*************************************************************************
167 Simple Loop Peeling Utilities
169 Utilities to support loop peeling for vectorization purposes.
170 *************************************************************************/
173 /* Renames the use *OP_P. */
175 static void
176 rename_use_op (use_operand_p op_p)
178 tree new_name;
180 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
181 return;
183 new_name = get_current_def (USE_FROM_PTR (op_p));
185 /* Something defined outside of the loop. */
186 if (!new_name)
187 return;
189 /* An ordinary ssa name defined in the loop. */
191 SET_USE (op_p, new_name);
195 /* Renames the variables in basic block BB. */
197 static void
198 rename_variables_in_bb (basic_block bb)
200 tree phi;
201 block_stmt_iterator bsi;
202 tree stmt;
203 use_operand_p use_p;
204 ssa_op_iter iter;
205 edge e;
206 edge_iterator ei;
207 struct loop *loop = bb->loop_father;
209 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
211 stmt = bsi_stmt (bsi);
212 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
213 rename_use_op (use_p);
216 FOR_EACH_EDGE (e, ei, bb->succs)
218 if (!flow_bb_inside_loop_p (loop, e->dest))
219 continue;
220 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
221 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
226 /* Renames variables in new generated LOOP. */
228 void
229 rename_variables_in_loop (struct loop *loop)
231 unsigned i;
232 basic_block *bbs;
234 bbs = get_loop_body (loop);
236 for (i = 0; i < loop->num_nodes; i++)
237 rename_variables_in_bb (bbs[i]);
239 free (bbs);
243 /* Update the PHI nodes of NEW_LOOP.
245 NEW_LOOP is a duplicate of ORIG_LOOP.
246 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
247 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
248 executes before it. */
250 static void
251 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
252 struct loop *new_loop, bool after)
254 tree new_ssa_name;
255 tree phi_new, phi_orig;
256 tree def;
257 edge orig_loop_latch = loop_latch_edge (orig_loop);
258 edge orig_entry_e = loop_preheader_edge (orig_loop);
259 edge new_loop_exit_e = single_exit (new_loop);
260 edge new_loop_entry_e = loop_preheader_edge (new_loop);
261 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
264 step 1. For each loop-header-phi:
265 Add the first phi argument for the phi in NEW_LOOP
266 (the one associated with the entry of NEW_LOOP)
268 step 2. For each loop-header-phi:
269 Add the second phi argument for the phi in NEW_LOOP
270 (the one associated with the latch of NEW_LOOP)
272 step 3. Update the phis in the successor block of NEW_LOOP.
274 case 1: NEW_LOOP was placed before ORIG_LOOP:
275 The successor block of NEW_LOOP is the header of ORIG_LOOP.
276 Updating the phis in the successor block can therefore be done
277 along with the scanning of the loop header phis, because the
278 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
279 phi nodes, organized in the same order.
281 case 2: NEW_LOOP was placed after ORIG_LOOP:
282 The successor block of NEW_LOOP is the original exit block of
283 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
284 We postpone updating these phis to a later stage (when
285 loop guards are added).
289 /* Scan the phis in the headers of the old and new loops
290 (they are organized in exactly the same order). */
292 for (phi_new = phi_nodes (new_loop->header),
293 phi_orig = phi_nodes (orig_loop->header);
294 phi_new && phi_orig;
295 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
297 /* step 1. */
298 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
299 add_phi_arg (phi_new, def, new_loop_entry_e);
301 /* step 2. */
302 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
303 if (TREE_CODE (def) != SSA_NAME)
304 continue;
306 new_ssa_name = get_current_def (def);
307 if (!new_ssa_name)
309 /* This only happens if there are no definitions
310 inside the loop. use the phi_result in this case. */
311 new_ssa_name = PHI_RESULT (phi_new);
314 /* An ordinary ssa name defined in the loop. */
315 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
317 /* step 3 (case 1). */
318 if (!after)
320 gcc_assert (new_loop_exit_e == orig_entry_e);
321 SET_PHI_ARG_DEF (phi_orig,
322 new_loop_exit_e->dest_idx,
323 new_ssa_name);
329 /* Update PHI nodes for a guard of the LOOP.
331 Input:
332 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
333 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
334 originates from the guard-bb, skips LOOP and reaches the (unique) exit
335 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
336 We denote this bb NEW_MERGE_BB because before the guard code was added
337 it had a single predecessor (the LOOP header), and now it became a merge
338 point of two paths - the path that ends with the LOOP exit-edge, and
339 the path that ends with GUARD_EDGE.
340 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
341 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
343 ===> The CFG before the guard-code was added:
344 LOOP_header_bb:
345 loop_body
346 if (exit_loop) goto update_bb
347 else goto LOOP_header_bb
348 update_bb:
350 ==> The CFG after the guard-code was added:
351 guard_bb:
352 if (LOOP_guard_condition) goto new_merge_bb
353 else goto LOOP_header_bb
354 LOOP_header_bb:
355 loop_body
356 if (exit_loop_condition) goto new_merge_bb
357 else goto LOOP_header_bb
358 new_merge_bb:
359 goto update_bb
360 update_bb:
362 ==> The CFG after this function:
363 guard_bb:
364 if (LOOP_guard_condition) goto new_merge_bb
365 else goto LOOP_header_bb
366 LOOP_header_bb:
367 loop_body
368 if (exit_loop_condition) goto new_exit_bb
369 else goto LOOP_header_bb
370 new_exit_bb:
371 new_merge_bb:
372 goto update_bb
373 update_bb:
375 This function:
376 1. creates and updates the relevant phi nodes to account for the new
377 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
378 1.1. Create phi nodes at NEW_MERGE_BB.
379 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
380 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
381 2. preserves loop-closed-ssa-form by creating the required phi nodes
382 at the exit of LOOP (i.e, in NEW_EXIT_BB).
384 There are two flavors to this function:
386 slpeel_update_phi_nodes_for_guard1:
387 Here the guard controls whether we enter or skip LOOP, where LOOP is a
388 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
389 for variables that have phis in the loop header.
391 slpeel_update_phi_nodes_for_guard2:
392 Here the guard controls whether we enter or skip LOOP, where LOOP is an
393 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
394 for variables that have phis in the loop exit.
396 I.E., the overall structure is:
398 loop1_preheader_bb:
399 guard1 (goto loop1/merg1_bb)
400 loop1
401 loop1_exit_bb:
402 guard2 (goto merge1_bb/merge2_bb)
403 merge1_bb
404 loop2
405 loop2_exit_bb
406 merge2_bb
407 next_bb
409 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
410 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
411 that have phis in loop1->header).
413 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
414 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
415 that have phis in next_bb). It also adds some of these phis to
416 loop1_exit_bb.
418 slpeel_update_phi_nodes_for_guard1 is always called before
419 slpeel_update_phi_nodes_for_guard2. They are both needed in order
420 to create correct data-flow and loop-closed-ssa-form.
422 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
423 that change between iterations of a loop (and therefore have a phi-node
424 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
425 phis for variables that are used out of the loop (and therefore have
426 loop-closed exit phis). Some variables may be both updated between
427 iterations and used after the loop. This is why in loop1_exit_bb we
428 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
429 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
431 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
432 an original loop. i.e., we have:
434 orig_loop
435 guard_bb (goto LOOP/new_merge)
436 new_loop <-- LOOP
437 new_exit
438 new_merge
439 next_bb
441 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
442 have:
444 new_loop
445 guard_bb (goto LOOP/new_merge)
446 orig_loop <-- LOOP
447 new_exit
448 new_merge
449 next_bb
451 The SSA names defined in the original loop have a current
452 reaching definition that that records the corresponding new
453 ssa-name used in the new duplicated loop copy.
456 /* Function slpeel_update_phi_nodes_for_guard1
458 Input:
459 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
460 - DEFS - a bitmap of ssa names to mark new names for which we recorded
461 information.
463 In the context of the overall structure, we have:
465 loop1_preheader_bb:
466 guard1 (goto loop1/merg1_bb)
467 LOOP-> loop1
468 loop1_exit_bb:
469 guard2 (goto merge1_bb/merge2_bb)
470 merge1_bb
471 loop2
472 loop2_exit_bb
473 merge2_bb
474 next_bb
476 For each name updated between loop iterations (i.e - for each name that has
477 an entry (loop-header) phi in LOOP) we create a new phi in:
478 1. merge1_bb (to account for the edge from guard1)
479 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
482 static void
483 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
484 bool is_new_loop, basic_block *new_exit_bb,
485 bitmap *defs)
487 tree orig_phi, new_phi;
488 tree update_phi, update_phi2;
489 tree guard_arg, loop_arg;
490 basic_block new_merge_bb = guard_edge->dest;
491 edge e = EDGE_SUCC (new_merge_bb, 0);
492 basic_block update_bb = e->dest;
493 basic_block orig_bb = loop->header;
494 edge new_exit_e;
495 tree current_new_name;
496 tree name;
498 /* Create new bb between loop and new_merge_bb. */
499 *new_exit_bb = split_edge (single_exit (loop));
501 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
503 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
504 orig_phi && update_phi;
505 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
507 /* Virtual phi; Mark it for renaming. We actually want to call
508 mar_sym_for_renaming, but since all ssa renaming datastructures
509 are going to be freed before we get to call ssa_upate, we just
510 record this name for now in a bitmap, and will mark it for
511 renaming later. */
512 name = PHI_RESULT (orig_phi);
513 if (!is_gimple_reg (SSA_NAME_VAR (name)))
514 bitmap_set_bit (vect_memsyms_to_rename, DECL_UID (SSA_NAME_VAR (name)));
516 /** 1. Handle new-merge-point phis **/
518 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
519 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
520 new_merge_bb);
522 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
523 of LOOP. Set the two phi args in NEW_PHI for these edges: */
524 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
525 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
527 add_phi_arg (new_phi, loop_arg, new_exit_e);
528 add_phi_arg (new_phi, guard_arg, guard_edge);
530 /* 1.3. Update phi in successor block. */
531 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
532 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
533 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
534 update_phi2 = new_phi;
537 /** 2. Handle loop-closed-ssa-form phis **/
539 if (!is_gimple_reg (PHI_RESULT (orig_phi)))
540 continue;
542 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
543 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
544 *new_exit_bb);
546 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
547 add_phi_arg (new_phi, loop_arg, single_exit (loop));
549 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
550 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
551 SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
553 /* 2.4. Record the newly created name with set_current_def.
554 We want to find a name such that
555 name = get_current_def (orig_loop_name)
556 and to set its current definition as follows:
557 set_current_def (name, new_phi_name)
559 If LOOP is a new loop then loop_arg is already the name we're
560 looking for. If LOOP is the original loop, then loop_arg is
561 the orig_loop_name and the relevant name is recorded in its
562 current reaching definition. */
563 if (is_new_loop)
564 current_new_name = loop_arg;
565 else
567 current_new_name = get_current_def (loop_arg);
568 /* current_def is not available only if the variable does not
569 change inside the loop, in which case we also don't care
570 about recording a current_def for it because we won't be
571 trying to create loop-exit-phis for it. */
572 if (!current_new_name)
573 continue;
575 gcc_assert (get_current_def (current_new_name) == NULL_TREE);
577 set_current_def (current_new_name, PHI_RESULT (new_phi));
578 bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
581 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
585 /* Function slpeel_update_phi_nodes_for_guard2
587 Input:
588 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
590 In the context of the overall structure, we have:
592 loop1_preheader_bb:
593 guard1 (goto loop1/merg1_bb)
594 loop1
595 loop1_exit_bb:
596 guard2 (goto merge1_bb/merge2_bb)
597 merge1_bb
598 LOOP-> loop2
599 loop2_exit_bb
600 merge2_bb
601 next_bb
603 For each name used out side the loop (i.e - for each name that has an exit
604 phi in next_bb) we create a new phi in:
605 1. merge2_bb (to account for the edge from guard_bb)
606 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
607 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
608 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
611 static void
612 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
613 bool is_new_loop, basic_block *new_exit_bb)
615 tree orig_phi, new_phi;
616 tree update_phi, update_phi2;
617 tree guard_arg, loop_arg;
618 basic_block new_merge_bb = guard_edge->dest;
619 edge e = EDGE_SUCC (new_merge_bb, 0);
620 basic_block update_bb = e->dest;
621 edge new_exit_e;
622 tree orig_def, orig_def_new_name;
623 tree new_name, new_name2;
624 tree arg;
626 /* Create new bb between loop and new_merge_bb. */
627 *new_exit_bb = split_edge (single_exit (loop));
629 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
631 for (update_phi = phi_nodes (update_bb); update_phi;
632 update_phi = PHI_CHAIN (update_phi))
634 orig_phi = update_phi;
635 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
636 /* This loop-closed-phi actually doesn't represent a use
637 out of the loop - the phi arg is a constant. */
638 if (TREE_CODE (orig_def) != SSA_NAME)
639 continue;
640 orig_def_new_name = get_current_def (orig_def);
641 arg = NULL_TREE;
643 /** 1. Handle new-merge-point phis **/
645 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
646 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
647 new_merge_bb);
649 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
650 of LOOP. Set the two PHI args in NEW_PHI for these edges: */
651 new_name = orig_def;
652 new_name2 = NULL_TREE;
653 if (orig_def_new_name)
655 new_name = orig_def_new_name;
656 /* Some variables have both loop-entry-phis and loop-exit-phis.
657 Such variables were given yet newer names by phis placed in
658 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
659 new_name2 = get_current_def (get_current_def (orig_name)). */
660 new_name2 = get_current_def (new_name);
663 if (is_new_loop)
665 guard_arg = orig_def;
666 loop_arg = new_name;
668 else
670 guard_arg = new_name;
671 loop_arg = orig_def;
673 if (new_name2)
674 guard_arg = new_name2;
676 add_phi_arg (new_phi, loop_arg, new_exit_e);
677 add_phi_arg (new_phi, guard_arg, guard_edge);
679 /* 1.3. Update phi in successor block. */
680 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
681 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
682 update_phi2 = new_phi;
685 /** 2. Handle loop-closed-ssa-form phis **/
687 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
688 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
689 *new_exit_bb);
691 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
692 add_phi_arg (new_phi, loop_arg, single_exit (loop));
694 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
695 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
696 SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
699 /** 3. Handle loop-closed-ssa-form phis for first loop **/
701 /* 3.1. Find the relevant names that need an exit-phi in
702 GUARD_BB, i.e. names for which
703 slpeel_update_phi_nodes_for_guard1 had not already created a
704 phi node. This is the case for names that are used outside
705 the loop (and therefore need an exit phi) but are not updated
706 across loop iterations (and therefore don't have a
707 loop-header-phi).
709 slpeel_update_phi_nodes_for_guard1 is responsible for
710 creating loop-exit phis in GUARD_BB for names that have a
711 loop-header-phi. When such a phi is created we also record
712 the new name in its current definition. If this new name
713 exists, then guard_arg was set to this new name (see 1.2
714 above). Therefore, if guard_arg is not this new name, this
715 is an indication that an exit-phi in GUARD_BB was not yet
716 created, so we take care of it here. */
717 if (guard_arg == new_name2)
718 continue;
719 arg = guard_arg;
721 /* 3.2. Generate new phi node in GUARD_BB: */
722 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
723 guard_edge->src);
725 /* 3.3. GUARD_BB has one incoming edge: */
726 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
727 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
729 /* 3.4. Update phi in successor of GUARD_BB: */
730 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
731 == guard_arg);
732 SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
735 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
739 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
740 that starts at zero, increases by one and its limit is NITERS.
742 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
744 void
745 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
747 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
748 tree orig_cond;
749 edge exit_edge = single_exit (loop);
750 block_stmt_iterator loop_cond_bsi;
751 block_stmt_iterator incr_bsi;
752 bool insert_after;
753 tree init = build_int_cst (TREE_TYPE (niters), 0);
754 tree step = build_int_cst (TREE_TYPE (niters), 1);
755 LOC loop_loc;
757 orig_cond = get_loop_exit_condition (loop);
758 gcc_assert (orig_cond);
759 loop_cond_bsi = bsi_for_stmt (orig_cond);
761 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
762 create_iv (init, step, NULL_TREE, loop,
763 &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
765 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
766 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
767 else /* 'then' edge loops back. */
768 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
770 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
771 NULL_TREE, NULL_TREE);
772 bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
774 /* Remove old loop exit test: */
775 bsi_remove (&loop_cond_bsi, true);
777 loop_loc = find_loop_location (loop);
778 if (dump_file && (dump_flags & TDF_DETAILS))
780 if (loop_loc != UNKNOWN_LOC)
781 fprintf (dump_file, "\nloop at %s:%d: ",
782 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
783 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
786 loop->nb_iterations = niters;
790 /* Given LOOP this function generates a new copy of it and puts it
791 on E which is either the entry or exit of LOOP. */
793 struct loop *
794 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
796 struct loop *new_loop;
797 basic_block *new_bbs, *bbs;
798 bool at_exit;
799 bool was_imm_dom;
800 basic_block exit_dest;
801 tree phi, phi_arg;
802 edge exit, new_exit;
804 at_exit = (e == single_exit (loop));
805 if (!at_exit && e != loop_preheader_edge (loop))
806 return NULL;
808 bbs = get_loop_body (loop);
810 /* Check whether duplication is possible. */
811 if (!can_copy_bbs_p (bbs, loop->num_nodes))
813 free (bbs);
814 return NULL;
817 /* Generate new loop structure. */
818 new_loop = duplicate_loop (loop, loop_outer (loop));
819 if (!new_loop)
821 free (bbs);
822 return NULL;
825 exit_dest = single_exit (loop)->dest;
826 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
827 exit_dest) == loop->header ?
828 true : false);
830 new_bbs = XNEWVEC (basic_block, loop->num_nodes);
832 exit = single_exit (loop);
833 copy_bbs (bbs, loop->num_nodes, new_bbs,
834 &exit, 1, &new_exit, NULL,
835 e->src);
837 /* Duplicating phi args at exit bbs as coming
838 also from exit of duplicated loop. */
839 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
841 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
842 if (phi_arg)
844 edge new_loop_exit_edge;
846 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
847 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
848 else
849 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
851 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
855 if (at_exit) /* Add the loop copy at exit. */
857 redirect_edge_and_branch_force (e, new_loop->header);
858 PENDING_STMT (e) = NULL;
859 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
860 if (was_imm_dom)
861 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
863 else /* Add the copy at entry. */
865 edge new_exit_e;
866 edge entry_e = loop_preheader_edge (loop);
867 basic_block preheader = entry_e->src;
869 if (!flow_bb_inside_loop_p (new_loop,
870 EDGE_SUCC (new_loop->header, 0)->dest))
871 new_exit_e = EDGE_SUCC (new_loop->header, 0);
872 else
873 new_exit_e = EDGE_SUCC (new_loop->header, 1);
875 redirect_edge_and_branch_force (new_exit_e, loop->header);
876 PENDING_STMT (new_exit_e) = NULL;
877 set_immediate_dominator (CDI_DOMINATORS, loop->header,
878 new_exit_e->src);
880 /* We have to add phi args to the loop->header here as coming
881 from new_exit_e edge. */
882 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
884 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
885 if (phi_arg)
886 add_phi_arg (phi, phi_arg, new_exit_e);
889 redirect_edge_and_branch_force (entry_e, new_loop->header);
890 PENDING_STMT (entry_e) = NULL;
891 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
894 free (new_bbs);
895 free (bbs);
897 return new_loop;
901 /* Given the condition statement COND, put it as the last statement
902 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
903 Assumes that this is the single exit of the guarded loop.
904 Returns the skip edge. */
906 static edge
907 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
908 basic_block dom_bb)
910 block_stmt_iterator bsi;
911 edge new_e, enter_e;
912 tree cond_stmt;
913 tree gimplify_stmt_list;
915 enter_e = EDGE_SUCC (guard_bb, 0);
916 enter_e->flags &= ~EDGE_FALLTHRU;
917 enter_e->flags |= EDGE_FALSE_VALUE;
918 bsi = bsi_last (guard_bb);
920 cond =
921 force_gimple_operand (cond, &gimplify_stmt_list, true,
922 NULL_TREE);
923 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
924 NULL_TREE, NULL_TREE);
925 if (gimplify_stmt_list)
926 bsi_insert_after (&bsi, gimplify_stmt_list, BSI_NEW_STMT);
928 bsi = bsi_last (guard_bb);
929 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
931 /* Add new edge to connect guard block to the merge/loop-exit block. */
932 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
933 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
934 return new_e;
938 /* This function verifies that the following restrictions apply to LOOP:
939 (1) it is innermost
940 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
941 (3) it is single entry, single exit
942 (4) its exit condition is the last stmt in the header
943 (5) E is the entry/exit edge of LOOP.
946 bool
947 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
949 edge exit_e = single_exit (loop);
950 edge entry_e = loop_preheader_edge (loop);
951 tree orig_cond = get_loop_exit_condition (loop);
952 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
954 if (need_ssa_update_p ())
955 return false;
957 if (loop->inner
958 /* All loops have an outer scope; the only case loop->outer is NULL is for
959 the function itself. */
960 || !loop_outer (loop)
961 || loop->num_nodes != 2
962 || !empty_block_p (loop->latch)
963 || !single_exit (loop)
964 /* Verify that new loop exit condition can be trivially modified. */
965 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
966 || (e != exit_e && e != entry_e))
967 return false;
969 return true;
972 #ifdef ENABLE_CHECKING
973 void
974 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
975 struct loop *second_loop)
977 basic_block loop1_exit_bb = single_exit (first_loop)->dest;
978 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
979 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
981 /* A guard that controls whether the second_loop is to be executed or skipped
982 is placed in first_loop->exit. first_loopt->exit therefore has two
983 successors - one is the preheader of second_loop, and the other is a bb
984 after second_loop.
986 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
988 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
989 of second_loop. */
991 /* The preheader of new_loop is expected to have two predecessors:
992 first_loop->exit and the block that precedes first_loop. */
994 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
995 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
996 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
997 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
998 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1000 /* Verify that the other successor of first_loopt->exit is after the
1001 second_loop. */
1002 /* TODO */
1004 #endif
1006 /* If the run time cost model check determines that vectorization is
1007 not profitable and hence scalar loop should be generated then set
1008 FIRST_NITERS to prologue peeled iterations. This will allow all the
1009 iterations to be executed in the prologue peeled scalar loop. */
1011 void
1012 set_prologue_iterations (basic_block bb_before_first_loop,
1013 tree first_niters,
1014 struct loop *loop,
1015 unsigned int th)
1017 edge e;
1018 basic_block cond_bb, then_bb;
1019 tree var, prologue_after_cost_adjust_name, stmt;
1020 block_stmt_iterator bsi;
1021 tree newphi;
1022 edge e_true, e_false, e_fallthru;
1023 tree cond_stmt;
1024 tree gimplify_stmt_list;
1025 tree cost_pre_condition = NULL_TREE;
1026 tree scalar_loop_iters =
1027 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1029 e = single_pred_edge (bb_before_first_loop);
1030 cond_bb = split_edge(e);
1032 e = single_pred_edge (bb_before_first_loop);
1033 then_bb = split_edge(e);
1034 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1036 e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1037 EDGE_FALSE_VALUE);
1038 set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1040 e_true = EDGE_PRED (then_bb, 0);
1041 e_true->flags &= ~EDGE_FALLTHRU;
1042 e_true->flags |= EDGE_TRUE_VALUE;
1044 e_fallthru = EDGE_SUCC (then_bb, 0);
1046 cost_pre_condition =
1047 build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1048 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1049 cost_pre_condition =
1050 force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
1051 true, NULL_TREE);
1052 cond_stmt = build3 (COND_EXPR, void_type_node, cost_pre_condition,
1053 NULL_TREE, NULL_TREE);
1055 bsi = bsi_last (cond_bb);
1056 if (gimplify_stmt_list)
1057 bsi_insert_after (&bsi, gimplify_stmt_list, BSI_NEW_STMT);
1059 bsi = bsi_last (cond_bb);
1060 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
1062 var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1063 "prologue_after_cost_adjust");
1064 add_referenced_var (var);
1065 prologue_after_cost_adjust_name =
1066 force_gimple_operand (scalar_loop_iters, &stmt, false, var);
1068 bsi = bsi_last (then_bb);
1069 if (stmt)
1070 bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
1072 newphi = create_phi_node (var, bb_before_first_loop);
1073 add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru);
1074 add_phi_arg (newphi, first_niters, e_false);
1076 first_niters = PHI_RESULT (newphi);
1080 /* Function slpeel_tree_peel_loop_to_edge.
1082 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1083 that is placed on the entry (exit) edge E of LOOP. After this transformation
1084 we have two loops one after the other - first-loop iterates FIRST_NITERS
1085 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1086 If the cost model indicates that it is profitable to emit a scalar
1087 loop instead of the vector one, then the prolog (epilog) loop will iterate
1088 for the entire unchanged scalar iterations of the loop.
1090 Input:
1091 - LOOP: the loop to be peeled.
1092 - E: the exit or entry edge of LOOP.
1093 If it is the entry edge, we peel the first iterations of LOOP. In this
1094 case first-loop is LOOP, and second-loop is the newly created loop.
1095 If it is the exit edge, we peel the last iterations of LOOP. In this
1096 case, first-loop is the newly created loop, and second-loop is LOOP.
1097 - NITERS: the number of iterations that LOOP iterates.
1098 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1099 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1100 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1101 is false, the caller of this function may want to take care of this
1102 (this can be useful if we don't want new stmts added to first-loop).
1103 - TH: cost model profitability threshold of iterations for vectorization.
1104 - CHECK_PROFITABILITY: specify whether cost model check has not occured
1105 during versioning and hence needs to occur during
1106 prologue generation or whether cost model check
1107 has not occured during prologue generation and hence
1108 needs to occur during epilogue generation.
1111 Output:
1112 The function returns a pointer to the new loop-copy, or NULL if it failed
1113 to perform the transformation.
1115 The function generates two if-then-else guards: one before the first loop,
1116 and the other before the second loop:
1117 The first guard is:
1118 if (FIRST_NITERS == 0) then skip the first loop,
1119 and go directly to the second loop.
1120 The second guard is:
1121 if (FIRST_NITERS == NITERS) then skip the second loop.
1123 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1124 FORNOW the resulting code will not be in loop-closed-ssa form.
1127 struct loop*
1128 slpeel_tree_peel_loop_to_edge (struct loop *loop,
1129 edge e, tree first_niters,
1130 tree niters, bool update_first_loop_count,
1131 unsigned int th, bool check_profitability)
1133 struct loop *new_loop = NULL, *first_loop, *second_loop;
1134 edge skip_e;
1135 tree pre_condition = NULL_TREE;
1136 bitmap definitions;
1137 basic_block bb_before_second_loop, bb_after_second_loop;
1138 basic_block bb_before_first_loop;
1139 basic_block bb_between_loops;
1140 basic_block new_exit_bb;
1141 edge exit_e = single_exit (loop);
1142 LOC loop_loc;
1143 tree cost_pre_condition = NULL_TREE;
1145 if (!slpeel_can_duplicate_loop_p (loop, e))
1146 return NULL;
1148 /* We have to initialize cfg_hooks. Then, when calling
1149 cfg_hooks->split_edge, the function tree_split_edge
1150 is actually called and, when calling cfg_hooks->duplicate_block,
1151 the function tree_duplicate_bb is called. */
1152 tree_register_cfg_hooks ();
1155 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1156 Resulting CFG would be:
1158 first_loop:
1159 do {
1160 } while ...
1162 second_loop:
1163 do {
1164 } while ...
1166 orig_exit_bb:
1169 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1171 loop_loc = find_loop_location (loop);
1172 if (dump_file && (dump_flags & TDF_DETAILS))
1174 if (loop_loc != UNKNOWN_LOC)
1175 fprintf (dump_file, "\n%s:%d: note: ",
1176 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1177 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1179 return NULL;
1182 if (e == exit_e)
1184 /* NEW_LOOP was placed after LOOP. */
1185 first_loop = loop;
1186 second_loop = new_loop;
1188 else
1190 /* NEW_LOOP was placed before LOOP. */
1191 first_loop = new_loop;
1192 second_loop = loop;
1195 definitions = ssa_names_to_replace ();
1196 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1197 rename_variables_in_loop (new_loop);
1200 /* 2. Add the guard code in one of the following ways:
1202 2.a Add the guard that controls whether the first loop is executed.
1203 This occurs when this function is invoked for prologue or epilogiue
1204 generation and when the cost model check can be done at compile time.
1206 Resulting CFG would be:
1208 bb_before_first_loop:
1209 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1210 GOTO first-loop
1212 first_loop:
1213 do {
1214 } while ...
1216 bb_before_second_loop:
1218 second_loop:
1219 do {
1220 } while ...
1222 orig_exit_bb:
1224 2.b Add the cost model check that allows the prologue
1225 to iterate for the entire unchanged scalar
1226 iterations of the loop in the event that the cost
1227 model indicates that the scalar loop is more
1228 profitable than the vector one. This occurs when
1229 this function is invoked for prologue generation
1230 and the cost model check needs to be done at run
1231 time.
1233 Resulting CFG after prologue peeling would be:
1235 if (scalar_loop_iterations <= th)
1236 FIRST_NITERS = scalar_loop_iterations
1238 bb_before_first_loop:
1239 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1240 GOTO first-loop
1242 first_loop:
1243 do {
1244 } while ...
1246 bb_before_second_loop:
1248 second_loop:
1249 do {
1250 } while ...
1252 orig_exit_bb:
1254 2.c Add the cost model check that allows the epilogue
1255 to iterate for the entire unchanged scalar
1256 iterations of the loop in the event that the cost
1257 model indicates that the scalar loop is more
1258 profitable than the vector one. This occurs when
1259 this function is invoked for epilogue generation
1260 and the cost model check needs to be done at run
1261 time.
1263 Resulting CFG after prologue peeling would be:
1265 bb_before_first_loop:
1266 if ((scalar_loop_iterations <= th)
1268 FIRST_NITERS == 0) GOTO bb_before_second_loop
1269 GOTO first-loop
1271 first_loop:
1272 do {
1273 } while ...
1275 bb_before_second_loop:
1277 second_loop:
1278 do {
1279 } while ...
1281 orig_exit_bb:
1284 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1285 bb_before_second_loop = split_edge (single_exit (first_loop));
1287 /* Epilogue peeling. */
1288 if (!update_first_loop_count)
1290 pre_condition =
1291 fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1292 build_int_cst (TREE_TYPE (first_niters), 0));
1293 if (check_profitability)
1295 tree scalar_loop_iters
1296 = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1297 (loop_vec_info_for_loop (loop)));
1298 cost_pre_condition =
1299 build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1300 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1302 pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1303 cost_pre_condition, pre_condition);
1307 /* Prologue peeling. */
1308 else
1310 if (check_profitability)
1311 set_prologue_iterations (bb_before_first_loop, first_niters,
1312 loop, th);
1314 pre_condition =
1315 fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1316 build_int_cst (TREE_TYPE (first_niters), 0));
1319 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1320 bb_before_second_loop, bb_before_first_loop);
1321 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1322 first_loop == new_loop,
1323 &new_exit_bb, &definitions);
1326 /* 3. Add the guard that controls whether the second loop is executed.
1327 Resulting CFG would be:
1329 bb_before_first_loop:
1330 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1331 GOTO first-loop
1333 first_loop:
1334 do {
1335 } while ...
1337 bb_between_loops:
1338 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1339 GOTO bb_before_second_loop
1341 bb_before_second_loop:
1343 second_loop:
1344 do {
1345 } while ...
1347 bb_after_second_loop:
1349 orig_exit_bb:
1352 bb_between_loops = new_exit_bb;
1353 bb_after_second_loop = split_edge (single_exit (second_loop));
1355 pre_condition =
1356 fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1357 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1358 bb_after_second_loop, bb_before_first_loop);
1359 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1360 second_loop == new_loop, &new_exit_bb);
1362 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1364 if (update_first_loop_count)
1365 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1367 BITMAP_FREE (definitions);
1368 delete_update_ssa ();
1370 return new_loop;
1373 /* Function vect_get_loop_location.
1375 Extract the location of the loop in the source code.
1376 If the loop is not well formed for vectorization, an estimated
1377 location is calculated.
1378 Return the loop location if succeed and NULL if not. */
1381 find_loop_location (struct loop *loop)
1383 tree node = NULL_TREE;
1384 basic_block bb;
1385 block_stmt_iterator si;
1387 if (!loop)
1388 return UNKNOWN_LOC;
1390 node = get_loop_exit_condition (loop);
1392 if (node && CAN_HAVE_LOCATION_P (node) && EXPR_HAS_LOCATION (node)
1393 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1394 return EXPR_LOC (node);
1396 /* If we got here the loop is probably not "well formed",
1397 try to estimate the loop location */
1399 if (!loop->header)
1400 return UNKNOWN_LOC;
1402 bb = loop->header;
1404 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1406 node = bsi_stmt (si);
1407 if (node && CAN_HAVE_LOCATION_P (node) && EXPR_HAS_LOCATION (node))
1408 return EXPR_LOC (node);
1411 return UNKNOWN_LOC;
1415 /*************************************************************************
1416 Vectorization Debug Information.
1417 *************************************************************************/
1419 /* Function vect_set_verbosity_level.
1421 Called from toplev.c upon detection of the
1422 -ftree-vectorizer-verbose=N option. */
1424 void
1425 vect_set_verbosity_level (const char *val)
1427 unsigned int vl;
1429 vl = atoi (val);
1430 if (vl < MAX_VERBOSITY_LEVEL)
1431 vect_verbosity_level = vl;
1432 else
1433 vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1437 /* Function vect_set_dump_settings.
1439 Fix the verbosity level of the vectorizer if the
1440 requested level was not set explicitly using the flag
1441 -ftree-vectorizer-verbose=N.
1442 Decide where to print the debugging information (dump_file/stderr).
1443 If the user defined the verbosity level, but there is no dump file,
1444 print to stderr, otherwise print to the dump file. */
1446 static void
1447 vect_set_dump_settings (void)
1449 vect_dump = dump_file;
1451 /* Check if the verbosity level was defined by the user: */
1452 if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1454 /* If there is no dump file, print to stderr. */
1455 if (!dump_file)
1456 vect_dump = stderr;
1457 return;
1460 /* User didn't specify verbosity level: */
1461 if (dump_file && (dump_flags & TDF_DETAILS))
1462 vect_verbosity_level = REPORT_DETAILS;
1463 else if (dump_file && (dump_flags & TDF_STATS))
1464 vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1465 else
1466 vect_verbosity_level = REPORT_NONE;
1468 gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1472 /* Function debug_loop_details.
1474 For vectorization debug dumps. */
1476 bool
1477 vect_print_dump_info (enum verbosity_levels vl)
1479 if (vl > vect_verbosity_level)
1480 return false;
1482 if (!current_function_decl || !vect_dump)
1483 return false;
1485 if (vect_loop_location == UNKNOWN_LOC)
1486 fprintf (vect_dump, "\n%s:%d: note: ",
1487 DECL_SOURCE_FILE (current_function_decl),
1488 DECL_SOURCE_LINE (current_function_decl));
1489 else
1490 fprintf (vect_dump, "\n%s:%d: note: ",
1491 LOC_FILE (vect_loop_location), LOC_LINE (vect_loop_location));
1493 return true;
1497 /*************************************************************************
1498 Vectorization Utilities.
1499 *************************************************************************/
1501 /* Function new_stmt_vec_info.
1503 Create and initialize a new stmt_vec_info struct for STMT. */
1505 stmt_vec_info
1506 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1508 stmt_vec_info res;
1509 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1511 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1512 STMT_VINFO_STMT (res) = stmt;
1513 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1514 STMT_VINFO_RELEVANT (res) = 0;
1515 STMT_VINFO_LIVE_P (res) = false;
1516 STMT_VINFO_VECTYPE (res) = NULL;
1517 STMT_VINFO_VEC_STMT (res) = NULL;
1518 STMT_VINFO_IN_PATTERN_P (res) = false;
1519 STMT_VINFO_RELATED_STMT (res) = NULL;
1520 STMT_VINFO_DATA_REF (res) = NULL;
1522 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
1523 STMT_VINFO_DR_OFFSET (res) = NULL;
1524 STMT_VINFO_DR_INIT (res) = NULL;
1525 STMT_VINFO_DR_STEP (res) = NULL;
1526 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
1528 if (TREE_CODE (stmt) == PHI_NODE && is_loop_header_bb_p (bb_for_stmt (stmt)))
1529 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
1530 else
1531 STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
1532 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
1533 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
1534 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
1535 STMT_SLP_TYPE (res) = 0;
1536 DR_GROUP_FIRST_DR (res) = NULL_TREE;
1537 DR_GROUP_NEXT_DR (res) = NULL_TREE;
1538 DR_GROUP_SIZE (res) = 0;
1539 DR_GROUP_STORE_COUNT (res) = 0;
1540 DR_GROUP_GAP (res) = 0;
1541 DR_GROUP_SAME_DR_STMT (res) = NULL_TREE;
1542 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
1544 return res;
1548 /* Free stmt vectorization related info. */
1550 void
1551 free_stmt_vec_info (tree stmt)
1553 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1555 if (!stmt_info)
1556 return;
1558 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1559 free (stmt_info);
1560 set_stmt_info (stmt_ann (stmt), NULL);
1564 /* Function bb_in_loop_p
1566 Used as predicate for dfs order traversal of the loop bbs. */
1568 static bool
1569 bb_in_loop_p (const_basic_block bb, const void *data)
1571 const struct loop *const loop = (const struct loop *)data;
1572 if (flow_bb_inside_loop_p (loop, bb))
1573 return true;
1574 return false;
1578 /* Function new_loop_vec_info.
1580 Create and initialize a new loop_vec_info struct for LOOP, as well as
1581 stmt_vec_info structs for all the stmts in LOOP. */
1583 loop_vec_info
1584 new_loop_vec_info (struct loop *loop)
1586 loop_vec_info res;
1587 basic_block *bbs;
1588 block_stmt_iterator si;
1589 unsigned int i, nbbs;
1591 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1592 LOOP_VINFO_LOOP (res) = loop;
1594 bbs = get_loop_body (loop);
1596 /* Create/Update stmt_info for all stmts in the loop. */
1597 for (i = 0; i < loop->num_nodes; i++)
1599 basic_block bb = bbs[i];
1600 tree phi;
1602 /* BBs in a nested inner-loop will have been already processed (because
1603 we will have called vect_analyze_loop_form for any nested inner-loop).
1604 Therefore, for stmts in an inner-loop we just want to update the
1605 STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
1606 loop_info of the outer-loop we are currently considering to vectorize
1607 (instead of the loop_info of the inner-loop).
1608 For stmts in other BBs we need to create a stmt_info from scratch. */
1609 if (bb->loop_father != loop)
1611 /* Inner-loop bb. */
1612 gcc_assert (loop->inner && bb->loop_father == loop->inner);
1613 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1615 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
1616 loop_vec_info inner_loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1617 gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
1618 STMT_VINFO_LOOP_VINFO (stmt_info) = res;
1620 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1622 tree stmt = bsi_stmt (si);
1623 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1624 loop_vec_info inner_loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1625 gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
1626 STMT_VINFO_LOOP_VINFO (stmt_info) = res;
1629 else
1631 /* bb in current nest. */
1632 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1634 stmt_ann_t ann = get_stmt_ann (phi);
1635 set_stmt_info (ann, new_stmt_vec_info (phi, res));
1638 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1640 tree stmt = bsi_stmt (si);
1641 stmt_ann_t ann = stmt_ann (stmt);
1642 set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1647 /* CHECKME: We want to visit all BBs before their successors (except for
1648 latch blocks, for which this assertion wouldn't hold). In the simple
1649 case of the loop forms we allow, a dfs order of the BBs would the same
1650 as reversed postorder traversal, so we are safe. */
1652 free (bbs);
1653 bbs = XCNEWVEC (basic_block, loop->num_nodes);
1654 nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
1655 bbs, loop->num_nodes, loop);
1656 gcc_assert (nbbs == loop->num_nodes);
1658 LOOP_VINFO_BBS (res) = bbs;
1659 LOOP_VINFO_NITERS (res) = NULL;
1660 LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
1661 LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
1662 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1663 LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1664 LOOP_VINFO_VECT_FACTOR (res) = 0;
1665 LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
1666 LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
1667 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1668 LOOP_VINFO_MAY_MISALIGN_STMTS (res) =
1669 VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
1670 LOOP_VINFO_MAY_ALIAS_DDRS (res) =
1671 VEC_alloc (ddr_p, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
1672 LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (tree, heap, 10);
1673 LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
1674 LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
1676 return res;
1680 /* Function destroy_loop_vec_info.
1682 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1683 stmts in the loop. */
1685 void
1686 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
1688 struct loop *loop;
1689 basic_block *bbs;
1690 int nbbs;
1691 block_stmt_iterator si;
1692 int j;
1693 VEC (slp_instance, heap) *slp_instances;
1694 slp_instance instance;
1696 if (!loop_vinfo)
1697 return;
1699 loop = LOOP_VINFO_LOOP (loop_vinfo);
1701 bbs = LOOP_VINFO_BBS (loop_vinfo);
1702 nbbs = loop->num_nodes;
1704 if (!clean_stmts)
1706 free (LOOP_VINFO_BBS (loop_vinfo));
1707 free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
1708 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1709 VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
1711 free (loop_vinfo);
1712 loop->aux = NULL;
1713 return;
1716 for (j = 0; j < nbbs; j++)
1718 basic_block bb = bbs[j];
1719 tree phi;
1721 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1722 free_stmt_vec_info (phi);
1724 for (si = bsi_start (bb); !bsi_end_p (si); )
1726 tree stmt = bsi_stmt (si);
1727 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1729 if (stmt_info)
1731 /* Check if this is a "pattern stmt" (introduced by the
1732 vectorizer during the pattern recognition pass). */
1733 bool remove_stmt_p = false;
1734 tree orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1735 if (orig_stmt)
1737 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
1738 if (orig_stmt_info
1739 && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
1740 remove_stmt_p = true;
1743 /* Free stmt_vec_info. */
1744 free_stmt_vec_info (stmt);
1746 /* Remove dead "pattern stmts". */
1747 if (remove_stmt_p)
1748 bsi_remove (&si, true);
1750 bsi_next (&si);
1754 free (LOOP_VINFO_BBS (loop_vinfo));
1755 free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
1756 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1757 VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
1758 VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
1759 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1760 for (j = 0; VEC_iterate (slp_instance, slp_instances, j, instance); j++)
1761 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
1762 VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
1763 VEC_free (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
1765 free (loop_vinfo);
1766 loop->aux = NULL;
1770 /* Function vect_force_dr_alignment_p.
1772 Returns whether the alignment of a DECL can be forced to be aligned
1773 on ALIGNMENT bit boundary. */
1775 bool
1776 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
1778 if (TREE_CODE (decl) != VAR_DECL)
1779 return false;
1781 if (DECL_EXTERNAL (decl))
1782 return false;
1784 if (TREE_ASM_WRITTEN (decl))
1785 return false;
1787 if (TREE_STATIC (decl))
1788 return (alignment <= MAX_OFILE_ALIGNMENT);
1789 else
1790 /* This used to be PREFERRED_STACK_BOUNDARY, however, that is not 100%
1791 correct until someone implements forced stack alignment. */
1792 return (alignment <= STACK_BOUNDARY);
1796 /* Function get_vectype_for_scalar_type.
1798 Returns the vector type corresponding to SCALAR_TYPE as supported
1799 by the target. */
1801 tree
1802 get_vectype_for_scalar_type (tree scalar_type)
1804 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1805 int nbytes = GET_MODE_SIZE (inner_mode);
1806 int nunits;
1807 tree vectype;
1809 if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
1810 return NULL_TREE;
1812 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1813 is expected. */
1814 nunits = UNITS_PER_SIMD_WORD / nbytes;
1816 vectype = build_vector_type (scalar_type, nunits);
1817 if (vect_print_dump_info (REPORT_DETAILS))
1819 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1820 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1823 if (!vectype)
1824 return NULL_TREE;
1826 if (vect_print_dump_info (REPORT_DETAILS))
1828 fprintf (vect_dump, "vectype: ");
1829 print_generic_expr (vect_dump, vectype, TDF_SLIM);
1832 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1833 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
1835 if (vect_print_dump_info (REPORT_DETAILS))
1836 fprintf (vect_dump, "mode not supported by target.");
1837 return NULL_TREE;
1840 return vectype;
1844 /* Function vect_supportable_dr_alignment
1846 Return whether the data reference DR is supported with respect to its
1847 alignment. */
1849 enum dr_alignment_support
1850 vect_supportable_dr_alignment (struct data_reference *dr)
1852 tree stmt = DR_STMT (dr);
1853 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1854 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1855 enum machine_mode mode = (int) TYPE_MODE (vectype);
1856 struct loop *vect_loop = LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info));
1857 bool nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
1858 bool invariant_in_outerloop = false;
1860 if (aligned_access_p (dr))
1861 return dr_aligned;
1863 if (nested_in_vect_loop)
1865 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
1866 invariant_in_outerloop =
1867 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
1870 /* Possibly unaligned access. */
1872 /* We can choose between using the implicit realignment scheme (generating
1873 a misaligned_move stmt) and the explicit realignment scheme (generating
1874 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
1875 realignment scheme: optimized, and unoptimized.
1876 We can optimize the realignment only if the step between consecutive
1877 vector loads is equal to the vector size. Since the vector memory
1878 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
1879 is guaranteed that the misalignment amount remains the same throughout the
1880 execution of the vectorized loop. Therefore, we can create the
1881 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
1882 at the loop preheader.
1884 However, in the case of outer-loop vectorization, when vectorizing a
1885 memory access in the inner-loop nested within the LOOP that is now being
1886 vectorized, while it is guaranteed that the misalignment of the
1887 vectorized memory access will remain the same in different outer-loop
1888 iterations, it is *not* guaranteed that is will remain the same throughout
1889 the execution of the inner-loop. This is because the inner-loop advances
1890 with the original scalar step (and not in steps of VS). If the inner-loop
1891 step happens to be a multiple of VS, then the misalignment remains fixed
1892 and we can use the optimized realignment scheme. For example:
1894 for (i=0; i<N; i++)
1895 for (j=0; j<M; j++)
1896 s += a[i+j];
1898 When vectorizing the i-loop in the above example, the step between
1899 consecutive vector loads is 1, and so the misalignment does not remain
1900 fixed across the execution of the inner-loop, and the realignment cannot
1901 be optimized (as illustrated in the following pseudo vectorized loop):
1903 for (i=0; i<N; i+=4)
1904 for (j=0; j<M; j++){
1905 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
1906 // when j is {0,1,2,3,4,5,6,7,...} respectively.
1907 // (assuming that we start from an aligned address).
1910 We therefore have to use the unoptimized realignment scheme:
1912 for (i=0; i<N; i+=4)
1913 for (j=k; j<M; j+=4)
1914 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
1915 // that the misalignment of the initial address is
1916 // 0).
1918 The loop can then be vectorized as follows:
1920 for (k=0; k<4; k++){
1921 rt = get_realignment_token (&vp[k]);
1922 for (i=0; i<N; i+=4){
1923 v1 = vp[i+k];
1924 for (j=k; j<M; j+=4){
1925 v2 = vp[i+j+VS-1];
1926 va = REALIGN_LOAD <v1,v2,rt>;
1927 vs += va;
1928 v1 = v2;
1931 } */
1933 if (DR_IS_READ (dr))
1935 if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
1936 CODE_FOR_nothing
1937 && (!targetm.vectorize.builtin_mask_for_load
1938 || targetm.vectorize.builtin_mask_for_load ()))
1940 if (nested_in_vect_loop
1941 && TREE_INT_CST_LOW (DR_STEP (dr)) != UNITS_PER_SIMD_WORD)
1942 return dr_explicit_realign;
1943 else
1944 return dr_explicit_realign_optimized;
1947 if (optab_handler (movmisalign_optab, mode)->insn_code !=
1948 CODE_FOR_nothing)
1949 /* Can't software pipeline the loads, but can at least do them. */
1950 return dr_unaligned_supported;
1953 /* Unsupported. */
1954 return dr_unaligned_unsupported;
1958 /* Function vect_is_simple_use.
1960 Input:
1961 LOOP - the loop that is being vectorized.
1962 OPERAND - operand of a stmt in LOOP.
1963 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1965 Returns whether a stmt with OPERAND can be vectorized.
1966 Supportable operands are constants, loop invariants, and operands that are
1967 defined by the current iteration of the loop. Unsupportable operands are
1968 those that are defined by a previous iteration of the loop (as is the case
1969 in reduction/induction computations). */
1971 bool
1972 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
1973 tree *def, enum vect_def_type *dt)
1975 basic_block bb;
1976 stmt_vec_info stmt_vinfo;
1977 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1979 *def_stmt = NULL_TREE;
1980 *def = NULL_TREE;
1982 if (vect_print_dump_info (REPORT_DETAILS))
1984 fprintf (vect_dump, "vect_is_simple_use: operand ");
1985 print_generic_expr (vect_dump, operand, TDF_SLIM);
1988 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1990 *dt = vect_constant_def;
1991 return true;
1993 if (is_gimple_min_invariant (operand))
1995 *def = operand;
1996 *dt = vect_invariant_def;
1997 return true;
2000 if (TREE_CODE (operand) == PAREN_EXPR)
2002 if (vect_print_dump_info (REPORT_DETAILS))
2003 fprintf (vect_dump, "non-associatable copy.");
2004 operand = TREE_OPERAND (operand, 0);
2006 if (TREE_CODE (operand) != SSA_NAME)
2008 if (vect_print_dump_info (REPORT_DETAILS))
2009 fprintf (vect_dump, "not ssa-name.");
2010 return false;
2013 *def_stmt = SSA_NAME_DEF_STMT (operand);
2014 if (*def_stmt == NULL_TREE )
2016 if (vect_print_dump_info (REPORT_DETAILS))
2017 fprintf (vect_dump, "no def_stmt.");
2018 return false;
2021 if (vect_print_dump_info (REPORT_DETAILS))
2023 fprintf (vect_dump, "def_stmt: ");
2024 print_generic_expr (vect_dump, *def_stmt, TDF_SLIM);
2027 /* empty stmt is expected only in case of a function argument.
2028 (Otherwise - we expect a phi_node or a GIMPLE_MODIFY_STMT). */
2029 if (IS_EMPTY_STMT (*def_stmt))
2031 tree arg = TREE_OPERAND (*def_stmt, 0);
2032 if (is_gimple_min_invariant (arg))
2034 *def = operand;
2035 *dt = vect_invariant_def;
2036 return true;
2039 if (vect_print_dump_info (REPORT_DETAILS))
2040 fprintf (vect_dump, "Unexpected empty stmt.");
2041 return false;
2044 bb = bb_for_stmt (*def_stmt);
2045 if (!flow_bb_inside_loop_p (loop, bb))
2046 *dt = vect_invariant_def;
2047 else
2049 stmt_vinfo = vinfo_for_stmt (*def_stmt);
2050 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
2053 if (*dt == vect_unknown_def_type)
2055 if (vect_print_dump_info (REPORT_DETAILS))
2056 fprintf (vect_dump, "Unsupported pattern.");
2057 return false;
2060 if (vect_print_dump_info (REPORT_DETAILS))
2061 fprintf (vect_dump, "type of def: %d.",*dt);
2063 switch (TREE_CODE (*def_stmt))
2065 case PHI_NODE:
2066 *def = PHI_RESULT (*def_stmt);
2067 break;
2069 case GIMPLE_MODIFY_STMT:
2070 *def = GIMPLE_STMT_OPERAND (*def_stmt, 0);
2071 break;
2073 default:
2074 if (vect_print_dump_info (REPORT_DETAILS))
2075 fprintf (vect_dump, "unsupported defining stmt: ");
2076 return false;
2079 return true;
2083 /* Function supportable_widening_operation
2085 Check whether an operation represented by the code CODE is a
2086 widening operation that is supported by the target platform in
2087 vector form (i.e., when operating on arguments of type VECTYPE).
2089 Widening operations we currently support are NOP (CONVERT), FLOAT
2090 and WIDEN_MULT. This function checks if these operations are supported
2091 by the target platform either directly (via vector tree-codes), or via
2092 target builtins.
2094 Output:
2095 - CODE1 and CODE2 are codes of vector operations to be used when
2096 vectorizing the operation, if available.
2097 - DECL1 and DECL2 are decls of target builtin functions to be used
2098 when vectorizing the operation, if available. In this case,
2099 CODE1 and CODE2 are CALL_EXPR. */
2101 bool
2102 supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,
2103 tree *decl1, tree *decl2,
2104 enum tree_code *code1, enum tree_code *code2)
2106 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2107 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
2108 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2109 bool ordered_p;
2110 enum machine_mode vec_mode;
2111 enum insn_code icode1, icode2;
2112 optab optab1, optab2;
2113 tree expr = GIMPLE_STMT_OPERAND (stmt, 1);
2114 tree type = TREE_TYPE (expr);
2115 tree wide_vectype = get_vectype_for_scalar_type (type);
2116 enum tree_code c1, c2;
2118 /* The result of a vectorized widening operation usually requires two vectors
2119 (because the widened results do not fit int one vector). The generated
2120 vector results would normally be expected to be generated in the same
2121 order as in the original scalar computation. i.e. if 8 results are
2122 generated in each vector iteration, they are to be organized as follows:
2123 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
2125 However, in the special case that the result of the widening operation is
2126 used in a reduction computation only, the order doesn't matter (because
2127 when vectorizing a reduction we change the order of the computation).
2128 Some targets can take advantage of this and generate more efficient code.
2129 For example, targets like Altivec, that support widen_mult using a sequence
2130 of {mult_even,mult_odd} generate the following vectors:
2131 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
2133 When vectorizaing outer-loops, we execute the inner-loop sequentially
2134 (each vectorized inner-loop iteration contributes to VF outer-loop
2135 iterations in parallel). We therefore don't allow to change the order
2136 of the computation in the inner-loop during outer-loop vectorization. */
2138 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
2139 && !nested_in_vect_loop_p (vect_loop, stmt))
2140 ordered_p = false;
2141 else
2142 ordered_p = true;
2144 if (!ordered_p
2145 && code == WIDEN_MULT_EXPR
2146 && targetm.vectorize.builtin_mul_widen_even
2147 && targetm.vectorize.builtin_mul_widen_even (vectype)
2148 && targetm.vectorize.builtin_mul_widen_odd
2149 && targetm.vectorize.builtin_mul_widen_odd (vectype))
2151 if (vect_print_dump_info (REPORT_DETAILS))
2152 fprintf (vect_dump, "Unordered widening operation detected.");
2154 *code1 = *code2 = CALL_EXPR;
2155 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
2156 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
2157 return true;
2160 switch (code)
2162 case WIDEN_MULT_EXPR:
2163 if (BYTES_BIG_ENDIAN)
2165 c1 = VEC_WIDEN_MULT_HI_EXPR;
2166 c2 = VEC_WIDEN_MULT_LO_EXPR;
2168 else
2170 c2 = VEC_WIDEN_MULT_HI_EXPR;
2171 c1 = VEC_WIDEN_MULT_LO_EXPR;
2173 break;
2175 CASE_CONVERT:
2176 if (BYTES_BIG_ENDIAN)
2178 c1 = VEC_UNPACK_HI_EXPR;
2179 c2 = VEC_UNPACK_LO_EXPR;
2181 else
2183 c2 = VEC_UNPACK_HI_EXPR;
2184 c1 = VEC_UNPACK_LO_EXPR;
2186 break;
2188 case FLOAT_EXPR:
2189 if (BYTES_BIG_ENDIAN)
2191 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
2192 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
2194 else
2196 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
2197 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
2199 break;
2201 case FIX_TRUNC_EXPR:
2202 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
2203 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
2204 computing the operation. */
2205 return false;
2207 default:
2208 gcc_unreachable ();
2211 if (code == FIX_TRUNC_EXPR)
2213 /* The signedness is determined from output operand. */
2214 optab1 = optab_for_tree_code (c1, type, optab_default);
2215 optab2 = optab_for_tree_code (c2, type, optab_default);
2217 else
2219 optab1 = optab_for_tree_code (c1, vectype, optab_default);
2220 optab2 = optab_for_tree_code (c2, vectype, optab_default);
2223 if (!optab1 || !optab2)
2224 return false;
2226 vec_mode = TYPE_MODE (vectype);
2227 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
2228 || insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
2229 || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
2230 == CODE_FOR_nothing
2231 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
2232 return false;
2234 *code1 = c1;
2235 *code2 = c2;
2236 return true;
2240 /* Function supportable_narrowing_operation
2242 Check whether an operation represented by the code CODE is a
2243 narrowing operation that is supported by the target platform in
2244 vector form (i.e., when operating on arguments of type VECTYPE).
2246 Narrowing operations we currently support are NOP (CONVERT) and
2247 FIX_TRUNC. This function checks if these operations are supported by
2248 the target platform directly via vector tree-codes.
2250 Output:
2251 - CODE1 is the code of a vector operation to be used when
2252 vectorizing the operation, if available. */
2254 bool
2255 supportable_narrowing_operation (enum tree_code code,
2256 const_tree stmt, const_tree vectype,
2257 enum tree_code *code1)
2259 enum machine_mode vec_mode;
2260 enum insn_code icode1;
2261 optab optab1;
2262 tree expr = GIMPLE_STMT_OPERAND (stmt, 1);
2263 tree type = TREE_TYPE (expr);
2264 tree narrow_vectype = get_vectype_for_scalar_type (type);
2265 enum tree_code c1;
2267 switch (code)
2269 CASE_CONVERT:
2270 c1 = VEC_PACK_TRUNC_EXPR;
2271 break;
2273 case FIX_TRUNC_EXPR:
2274 c1 = VEC_PACK_FIX_TRUNC_EXPR;
2275 break;
2277 case FLOAT_EXPR:
2278 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
2279 tree code and optabs used for computing the operation. */
2280 return false;
2282 default:
2283 gcc_unreachable ();
2286 if (code == FIX_TRUNC_EXPR)
2287 /* The signedness is determined from output operand. */
2288 optab1 = optab_for_tree_code (c1, type, optab_default);
2289 else
2290 optab1 = optab_for_tree_code (c1, vectype, optab_default);
2292 if (!optab1)
2293 return false;
2295 vec_mode = TYPE_MODE (vectype);
2296 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
2297 || insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
2298 return false;
2300 *code1 = c1;
2301 return true;
2305 /* Function reduction_code_for_scalar_code
2307 Input:
2308 CODE - tree_code of a reduction operations.
2310 Output:
2311 REDUC_CODE - the corresponding tree-code to be used to reduce the
2312 vector of partial results into a single scalar result (which
2313 will also reside in a vector).
2315 Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise. */
2317 bool
2318 reduction_code_for_scalar_code (enum tree_code code,
2319 enum tree_code *reduc_code)
2321 switch (code)
2323 case MAX_EXPR:
2324 *reduc_code = REDUC_MAX_EXPR;
2325 return true;
2327 case MIN_EXPR:
2328 *reduc_code = REDUC_MIN_EXPR;
2329 return true;
2331 case PLUS_EXPR:
2332 *reduc_code = REDUC_PLUS_EXPR;
2333 return true;
2335 default:
2336 return false;
2341 /* Function vect_is_simple_reduction
2343 Detect a cross-iteration def-use cycle that represents a simple
2344 reduction computation. We look for the following pattern:
2346 loop_header:
2347 a1 = phi < a0, a2 >
2348 a3 = ...
2349 a2 = operation (a3, a1)
2351 such that:
2352 1. operation is commutative and associative and it is safe to
2353 change the order of the computation.
2354 2. no uses for a2 in the loop (a2 is used out of the loop)
2355 3. no uses of a1 in the loop besides the reduction operation.
2357 Condition 1 is tested here.
2358 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. */
2360 tree
2361 vect_is_simple_reduction (loop_vec_info loop_info, tree phi)
2363 struct loop *loop = (bb_for_stmt (phi))->loop_father;
2364 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2365 edge latch_e = loop_latch_edge (loop);
2366 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2367 tree def_stmt, def1, def2;
2368 enum tree_code code;
2369 int op_type;
2370 tree operation, op1, op2;
2371 tree type;
2372 int nloop_uses;
2373 tree name;
2374 imm_use_iterator imm_iter;
2375 use_operand_p use_p;
2377 gcc_assert (loop == vect_loop || flow_loop_nested_p (vect_loop, loop));
2379 name = PHI_RESULT (phi);
2380 nloop_uses = 0;
2381 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2383 tree use_stmt = USE_STMT (use_p);
2384 if (flow_bb_inside_loop_p (loop, bb_for_stmt (use_stmt))
2385 && vinfo_for_stmt (use_stmt)
2386 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2387 nloop_uses++;
2388 if (nloop_uses > 1)
2390 if (vect_print_dump_info (REPORT_DETAILS))
2391 fprintf (vect_dump, "reduction used in loop.");
2392 return NULL_TREE;
2396 if (TREE_CODE (loop_arg) != SSA_NAME)
2398 if (vect_print_dump_info (REPORT_DETAILS))
2400 fprintf (vect_dump, "reduction: not ssa_name: ");
2401 print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
2403 return NULL_TREE;
2406 def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2407 if (!def_stmt)
2409 if (vect_print_dump_info (REPORT_DETAILS))
2410 fprintf (vect_dump, "reduction: no def_stmt.");
2411 return NULL_TREE;
2414 if (TREE_CODE (def_stmt) != GIMPLE_MODIFY_STMT)
2416 if (vect_print_dump_info (REPORT_DETAILS))
2417 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2418 return NULL_TREE;
2421 name = GIMPLE_STMT_OPERAND (def_stmt, 0);
2422 nloop_uses = 0;
2423 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2425 tree use_stmt = USE_STMT (use_p);
2426 if (flow_bb_inside_loop_p (loop, bb_for_stmt (use_stmt))
2427 && vinfo_for_stmt (use_stmt)
2428 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2429 nloop_uses++;
2430 if (nloop_uses > 1)
2432 if (vect_print_dump_info (REPORT_DETAILS))
2433 fprintf (vect_dump, "reduction used in loop.");
2434 return NULL_TREE;
2438 operation = GIMPLE_STMT_OPERAND (def_stmt, 1);
2439 code = TREE_CODE (operation);
2440 if (!commutative_tree_code (code) || !associative_tree_code (code))
2442 if (vect_print_dump_info (REPORT_DETAILS))
2444 fprintf (vect_dump, "reduction: not commutative/associative: ");
2445 print_generic_expr (vect_dump, operation, TDF_SLIM);
2447 return NULL_TREE;
2450 op_type = TREE_OPERAND_LENGTH (operation);
2451 if (op_type != binary_op)
2453 if (vect_print_dump_info (REPORT_DETAILS))
2455 fprintf (vect_dump, "reduction: not binary operation: ");
2456 print_generic_expr (vect_dump, operation, TDF_SLIM);
2458 return NULL_TREE;
2461 op1 = TREE_OPERAND (operation, 0);
2462 op2 = TREE_OPERAND (operation, 1);
2463 if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
2465 if (vect_print_dump_info (REPORT_DETAILS))
2467 fprintf (vect_dump, "reduction: uses not ssa_names: ");
2468 print_generic_expr (vect_dump, operation, TDF_SLIM);
2470 return NULL_TREE;
2473 /* Check that it's ok to change the order of the computation. */
2474 type = TREE_TYPE (operation);
2475 if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))
2476 || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
2478 if (vect_print_dump_info (REPORT_DETAILS))
2480 fprintf (vect_dump, "reduction: multiple types: operation type: ");
2481 print_generic_expr (vect_dump, type, TDF_SLIM);
2482 fprintf (vect_dump, ", operands types: ");
2483 print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
2484 fprintf (vect_dump, ",");
2485 print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
2487 return NULL_TREE;
2490 /* Generally, when vectorizing a reduction we change the order of the
2491 computation. This may change the behavior of the program in some
2492 cases, so we need to check that this is ok. One exception is when
2493 vectorizing an outer-loop: the inner-loop is executed sequentially,
2494 and therefore vectorizing reductions in the inner-loop durint
2495 outer-loop vectorization is safe. */
2497 /* CHECKME: check for !flag_finite_math_only too? */
2498 if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2499 && !nested_in_vect_loop_p (vect_loop, def_stmt))
2501 /* Changing the order of operations changes the semantics. */
2502 if (vect_print_dump_info (REPORT_DETAILS))
2504 fprintf (vect_dump, "reduction: unsafe fp math optimization: ");
2505 print_generic_expr (vect_dump, operation, TDF_SLIM);
2507 return NULL_TREE;
2509 else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
2510 && !nested_in_vect_loop_p (vect_loop, def_stmt))
2512 /* Changing the order of operations changes the semantics. */
2513 if (vect_print_dump_info (REPORT_DETAILS))
2515 fprintf (vect_dump, "reduction: unsafe int math optimization: ");
2516 print_generic_expr (vect_dump, operation, TDF_SLIM);
2518 return NULL_TREE;
2520 else if (SAT_FIXED_POINT_TYPE_P (type))
2522 /* Changing the order of operations changes the semantics. */
2523 if (vect_print_dump_info (REPORT_DETAILS))
2525 fprintf (vect_dump, "reduction: unsafe fixed-point math optimization: ");
2526 print_generic_expr (vect_dump, operation, TDF_SLIM);
2528 return NULL_TREE;
2531 /* reduction is safe. we're dealing with one of the following:
2532 1) integer arithmetic and no trapv
2533 2) floating point arithmetic, and special flags permit this optimization.
2535 def1 = SSA_NAME_DEF_STMT (op1);
2536 def2 = SSA_NAME_DEF_STMT (op2);
2537 if (!def1 || !def2 || IS_EMPTY_STMT (def1) || IS_EMPTY_STMT (def2))
2539 if (vect_print_dump_info (REPORT_DETAILS))
2541 fprintf (vect_dump, "reduction: no defs for operands: ");
2542 print_generic_expr (vect_dump, operation, TDF_SLIM);
2544 return NULL_TREE;
2548 /* Check that one def is the reduction def, defined by PHI,
2549 the other def is either defined in the loop ("vect_loop_def"),
2550 or it's an induction (defined by a loop-header phi-node). */
2552 if (def2 == phi
2553 && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
2554 && (TREE_CODE (def1) == GIMPLE_MODIFY_STMT
2555 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def
2556 || (TREE_CODE (def1) == PHI_NODE
2557 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_loop_def
2558 && !is_loop_header_bb_p (bb_for_stmt (def1)))))
2560 if (vect_print_dump_info (REPORT_DETAILS))
2562 fprintf (vect_dump, "detected reduction:");
2563 print_generic_expr (vect_dump, operation, TDF_SLIM);
2565 return def_stmt;
2567 else if (def1 == phi
2568 && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
2569 && (TREE_CODE (def2) == GIMPLE_MODIFY_STMT
2570 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def
2571 || (TREE_CODE (def2) == PHI_NODE
2572 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_loop_def
2573 && !is_loop_header_bb_p (bb_for_stmt (def2)))))
2575 /* Swap operands (just for simplicity - so that the rest of the code
2576 can assume that the reduction variable is always the last (second)
2577 argument). */
2578 if (vect_print_dump_info (REPORT_DETAILS))
2580 fprintf (vect_dump, "detected reduction: need to swap operands:");
2581 print_generic_expr (vect_dump, operation, TDF_SLIM);
2583 swap_tree_operands (def_stmt, &TREE_OPERAND (operation, 0),
2584 &TREE_OPERAND (operation, 1));
2585 return def_stmt;
2587 else
2589 if (vect_print_dump_info (REPORT_DETAILS))
2591 fprintf (vect_dump, "reduction: unknown pattern.");
2592 print_generic_expr (vect_dump, operation, TDF_SLIM);
2594 return NULL_TREE;
2599 /* Function vect_is_simple_iv_evolution.
2601 FORNOW: A simple evolution of an induction variables in the loop is
2602 considered a polynomial evolution with constant step. */
2604 bool
2605 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
2606 tree * step)
2608 tree init_expr;
2609 tree step_expr;
2610 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
2612 /* When there is no evolution in this loop, the evolution function
2613 is not "simple". */
2614 if (evolution_part == NULL_TREE)
2615 return false;
2617 /* When the evolution is a polynomial of degree >= 2
2618 the evolution function is not "simple". */
2619 if (tree_is_chrec (evolution_part))
2620 return false;
2622 step_expr = evolution_part;
2623 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
2625 if (vect_print_dump_info (REPORT_DETAILS))
2627 fprintf (vect_dump, "step: ");
2628 print_generic_expr (vect_dump, step_expr, TDF_SLIM);
2629 fprintf (vect_dump, ", init: ");
2630 print_generic_expr (vect_dump, init_expr, TDF_SLIM);
2633 *init = init_expr;
2634 *step = step_expr;
2636 if (TREE_CODE (step_expr) != INTEGER_CST)
2638 if (vect_print_dump_info (REPORT_DETAILS))
2639 fprintf (vect_dump, "step unknown.");
2640 return false;
2643 return true;
2647 /* Function vectorize_loops.
2649 Entry Point to loop vectorization phase. */
2651 unsigned
2652 vectorize_loops (void)
2654 unsigned int i;
2655 unsigned int num_vectorized_loops = 0;
2656 unsigned int vect_loops_num;
2657 loop_iterator li;
2658 struct loop *loop;
2660 vect_loops_num = number_of_loops ();
2662 /* Bail out if there are no loops. */
2663 if (vect_loops_num <= 1)
2664 return 0;
2666 /* Fix the verbosity level if not defined explicitly by the user. */
2667 vect_set_dump_settings ();
2669 /* Allocate the bitmap that records which virtual variables that
2670 need to be renamed. */
2671 vect_memsyms_to_rename = BITMAP_ALLOC (NULL);
2673 /* ----------- Analyze loops. ----------- */
2675 /* If some loop was duplicated, it gets bigger number
2676 than all previously defined loops. This fact allows us to run
2677 only over initial loops skipping newly generated ones. */
2678 FOR_EACH_LOOP (li, loop, 0)
2680 loop_vec_info loop_vinfo;
2682 vect_loop_location = find_loop_location (loop);
2683 loop_vinfo = vect_analyze_loop (loop);
2684 loop->aux = loop_vinfo;
2686 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
2687 continue;
2689 vect_transform_loop (loop_vinfo);
2690 num_vectorized_loops++;
2692 vect_loop_location = UNKNOWN_LOC;
2694 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)
2695 || (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
2696 && num_vectorized_loops > 0))
2697 fprintf (vect_dump, "vectorized %u loops in function.\n",
2698 num_vectorized_loops);
2700 /* ----------- Finalize. ----------- */
2702 BITMAP_FREE (vect_memsyms_to_rename);
2704 for (i = 1; i < vect_loops_num; i++)
2706 loop_vec_info loop_vinfo;
2708 loop = get_loop (i);
2709 if (!loop)
2710 continue;
2711 loop_vinfo = loop->aux;
2712 destroy_loop_vec_info (loop_vinfo, true);
2713 loop->aux = NULL;
2716 return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0;
2719 /* Increase alignment of global arrays to improve vectorization potential.
2720 TODO:
2721 - Consider also structs that have an array field.
2722 - Use ipa analysis to prune arrays that can't be vectorized?
2723 This should involve global alignment analysis and in the future also
2724 array padding. */
2726 static unsigned int
2727 increase_alignment (void)
2729 struct varpool_node *vnode;
2731 /* Increase the alignment of all global arrays for vectorization. */
2732 for (vnode = varpool_nodes_queue;
2733 vnode;
2734 vnode = vnode->next_needed)
2736 tree vectype, decl = vnode->decl;
2737 unsigned int alignment;
2739 if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE)
2740 continue;
2741 vectype = get_vectype_for_scalar_type (TREE_TYPE (TREE_TYPE (decl)));
2742 if (!vectype)
2743 continue;
2744 alignment = TYPE_ALIGN (vectype);
2745 if (DECL_ALIGN (decl) >= alignment)
2746 continue;
2748 if (vect_can_force_dr_alignment_p (decl, alignment))
2750 DECL_ALIGN (decl) = TYPE_ALIGN (vectype);
2751 DECL_USER_ALIGN (decl) = 1;
2752 if (dump_file)
2754 fprintf (dump_file, "Increasing alignment of decl: ");
2755 print_generic_expr (dump_file, decl, TDF_SLIM);
2759 return 0;
2762 static bool
2763 gate_increase_alignment (void)
2765 return flag_section_anchors && flag_tree_vectorize;
2768 struct simple_ipa_opt_pass pass_ipa_increase_alignment =
2771 SIMPLE_IPA_PASS,
2772 "increase_alignment", /* name */
2773 gate_increase_alignment, /* gate */
2774 increase_alignment, /* execute */
2775 NULL, /* sub */
2776 NULL, /* next */
2777 0, /* static_pass_number */
2778 0, /* tv_id */
2779 0, /* properties_required */
2780 0, /* properties_provided */
2781 0, /* properties_destroyed */
2782 0, /* todo_flags_start */
2783 0 /* todo_flags_finish */