Update concepts branch to revision 131834
[official-gcc.git] / gcc / tree-vectorizer.c
blobbe93e02457691de5966123e978b4f30fb50c3af5
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, optab_handler (add_optab, V8HImode)->insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "ggc.h"
128 #include "tree.h"
129 #include "target.h"
130 #include "rtl.h"
131 #include "basic-block.h"
132 #include "diagnostic.h"
133 #include "tree-flow.h"
134 #include "tree-dump.h"
135 #include "timevar.h"
136 #include "cfgloop.h"
137 #include "cfglayout.h"
138 #include "expr.h"
139 #include "recog.h"
140 #include "optabs.h"
141 #include "params.h"
142 #include "toplev.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "input.h"
147 #include "tree-vectorizer.h"
148 #include "tree-pass.h"
150 /*************************************************************************
151 General Vectorization Utilities
152 *************************************************************************/
154 /* vect_dump will be set to stderr or dump_file if exist. */
155 FILE *vect_dump;
157 /* vect_verbosity_level set to an invalid value
158 to mark that it's uninitialized. */
159 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
161 /* Loop location. */
162 static LOC vect_loop_location;
164 /* Bitmap of virtual variables to be renamed. */
165 bitmap vect_memsyms_to_rename;
167 /*************************************************************************
168 Simple Loop Peeling Utilities
170 Utilities to support loop peeling for vectorization purposes.
171 *************************************************************************/
174 /* Renames the use *OP_P. */
176 static void
177 rename_use_op (use_operand_p op_p)
179 tree new_name;
181 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
182 return;
184 new_name = get_current_def (USE_FROM_PTR (op_p));
186 /* Something defined outside of the loop. */
187 if (!new_name)
188 return;
190 /* An ordinary ssa name defined in the loop. */
192 SET_USE (op_p, new_name);
196 /* Renames the variables in basic block BB. */
198 static void
199 rename_variables_in_bb (basic_block bb)
201 tree phi;
202 block_stmt_iterator bsi;
203 tree stmt;
204 use_operand_p use_p;
205 ssa_op_iter iter;
206 edge e;
207 edge_iterator ei;
208 struct loop *loop = bb->loop_father;
210 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
212 stmt = bsi_stmt (bsi);
213 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
214 rename_use_op (use_p);
217 FOR_EACH_EDGE (e, ei, bb->succs)
219 if (!flow_bb_inside_loop_p (loop, e->dest))
220 continue;
221 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
222 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
227 /* Renames variables in new generated LOOP. */
229 void
230 rename_variables_in_loop (struct loop *loop)
232 unsigned i;
233 basic_block *bbs;
235 bbs = get_loop_body (loop);
237 for (i = 0; i < loop->num_nodes; i++)
238 rename_variables_in_bb (bbs[i]);
240 free (bbs);
244 /* Update the PHI nodes of NEW_LOOP.
246 NEW_LOOP is a duplicate of ORIG_LOOP.
247 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
248 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
249 executes before it. */
251 static void
252 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
253 struct loop *new_loop, bool after)
255 tree new_ssa_name;
256 tree phi_new, phi_orig;
257 tree def;
258 edge orig_loop_latch = loop_latch_edge (orig_loop);
259 edge orig_entry_e = loop_preheader_edge (orig_loop);
260 edge new_loop_exit_e = single_exit (new_loop);
261 edge new_loop_entry_e = loop_preheader_edge (new_loop);
262 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
265 step 1. For each loop-header-phi:
266 Add the first phi argument for the phi in NEW_LOOP
267 (the one associated with the entry of NEW_LOOP)
269 step 2. For each loop-header-phi:
270 Add the second phi argument for the phi in NEW_LOOP
271 (the one associated with the latch of NEW_LOOP)
273 step 3. Update the phis in the successor block of NEW_LOOP.
275 case 1: NEW_LOOP was placed before ORIG_LOOP:
276 The successor block of NEW_LOOP is the header of ORIG_LOOP.
277 Updating the phis in the successor block can therefore be done
278 along with the scanning of the loop header phis, because the
279 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
280 phi nodes, organized in the same order.
282 case 2: NEW_LOOP was placed after ORIG_LOOP:
283 The successor block of NEW_LOOP is the original exit block of
284 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
285 We postpone updating these phis to a later stage (when
286 loop guards are added).
290 /* Scan the phis in the headers of the old and new loops
291 (they are organized in exactly the same order). */
293 for (phi_new = phi_nodes (new_loop->header),
294 phi_orig = phi_nodes (orig_loop->header);
295 phi_new && phi_orig;
296 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
298 /* step 1. */
299 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
300 add_phi_arg (phi_new, def, new_loop_entry_e);
302 /* step 2. */
303 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
304 if (TREE_CODE (def) != SSA_NAME)
305 continue;
307 new_ssa_name = get_current_def (def);
308 if (!new_ssa_name)
310 /* This only happens if there are no definitions
311 inside the loop. use the phi_result in this case. */
312 new_ssa_name = PHI_RESULT (phi_new);
315 /* An ordinary ssa name defined in the loop. */
316 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
318 /* step 3 (case 1). */
319 if (!after)
321 gcc_assert (new_loop_exit_e == orig_entry_e);
322 SET_PHI_ARG_DEF (phi_orig,
323 new_loop_exit_e->dest_idx,
324 new_ssa_name);
330 /* Update PHI nodes for a guard of the LOOP.
332 Input:
333 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
334 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
335 originates from the guard-bb, skips LOOP and reaches the (unique) exit
336 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
337 We denote this bb NEW_MERGE_BB because before the guard code was added
338 it had a single predecessor (the LOOP header), and now it became a merge
339 point of two paths - the path that ends with the LOOP exit-edge, and
340 the path that ends with GUARD_EDGE.
341 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
342 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
344 ===> The CFG before the guard-code was added:
345 LOOP_header_bb:
346 loop_body
347 if (exit_loop) goto update_bb
348 else goto LOOP_header_bb
349 update_bb:
351 ==> The CFG after the guard-code was added:
352 guard_bb:
353 if (LOOP_guard_condition) goto new_merge_bb
354 else goto LOOP_header_bb
355 LOOP_header_bb:
356 loop_body
357 if (exit_loop_condition) goto new_merge_bb
358 else goto LOOP_header_bb
359 new_merge_bb:
360 goto update_bb
361 update_bb:
363 ==> The CFG after this function:
364 guard_bb:
365 if (LOOP_guard_condition) goto new_merge_bb
366 else goto LOOP_header_bb
367 LOOP_header_bb:
368 loop_body
369 if (exit_loop_condition) goto new_exit_bb
370 else goto LOOP_header_bb
371 new_exit_bb:
372 new_merge_bb:
373 goto update_bb
374 update_bb:
376 This function:
377 1. creates and updates the relevant phi nodes to account for the new
378 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
379 1.1. Create phi nodes at NEW_MERGE_BB.
380 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
381 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
382 2. preserves loop-closed-ssa-form by creating the required phi nodes
383 at the exit of LOOP (i.e, in NEW_EXIT_BB).
385 There are two flavors to this function:
387 slpeel_update_phi_nodes_for_guard1:
388 Here the guard controls whether we enter or skip LOOP, where LOOP is a
389 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
390 for variables that have phis in the loop header.
392 slpeel_update_phi_nodes_for_guard2:
393 Here the guard controls whether we enter or skip LOOP, where LOOP is an
394 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
395 for variables that have phis in the loop exit.
397 I.E., the overall structure is:
399 loop1_preheader_bb:
400 guard1 (goto loop1/merge1_bb)
401 loop1
402 loop1_exit_bb:
403 guard2 (goto merge1_bb/merge2_bb)
404 merge1_bb
405 loop2
406 loop2_exit_bb
407 merge2_bb
408 next_bb
410 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
411 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
412 that have phis in loop1->header).
414 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
415 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
416 that have phis in next_bb). It also adds some of these phis to
417 loop1_exit_bb.
419 slpeel_update_phi_nodes_for_guard1 is always called before
420 slpeel_update_phi_nodes_for_guard2. They are both needed in order
421 to create correct data-flow and loop-closed-ssa-form.
423 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
424 that change between iterations of a loop (and therefore have a phi-node
425 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
426 phis for variables that are used out of the loop (and therefore have
427 loop-closed exit phis). Some variables may be both updated between
428 iterations and used after the loop. This is why in loop1_exit_bb we
429 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
430 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
432 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
433 an original loop. i.e., we have:
435 orig_loop
436 guard_bb (goto LOOP/new_merge)
437 new_loop <-- LOOP
438 new_exit
439 new_merge
440 next_bb
442 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
443 have:
445 new_loop
446 guard_bb (goto LOOP/new_merge)
447 orig_loop <-- LOOP
448 new_exit
449 new_merge
450 next_bb
452 The SSA names defined in the original loop have a current
453 reaching definition that that records the corresponding new
454 ssa-name used in the new duplicated loop copy.
457 /* Function slpeel_update_phi_nodes_for_guard1
459 Input:
460 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
461 - DEFS - a bitmap of ssa names to mark new names for which we recorded
462 information.
464 In the context of the overall structure, we have:
466 loop1_preheader_bb:
467 guard1 (goto loop1/merge1_bb)
468 LOOP-> loop1
469 loop1_exit_bb:
470 guard2 (goto merge1_bb/merge2_bb)
471 merge1_bb
472 loop2
473 loop2_exit_bb
474 merge2_bb
475 next_bb
477 For each name updated between loop iterations (i.e - for each name that has
478 an entry (loop-header) phi in LOOP) we create a new phi in:
479 1. merge1_bb (to account for the edge from guard1)
480 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
483 static void
484 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
485 bool is_new_loop, basic_block *new_exit_bb,
486 bitmap *defs)
488 tree orig_phi, new_phi;
489 tree update_phi, update_phi2;
490 tree guard_arg, loop_arg;
491 basic_block new_merge_bb = guard_edge->dest;
492 edge e = EDGE_SUCC (new_merge_bb, 0);
493 basic_block update_bb = e->dest;
494 basic_block orig_bb = loop->header;
495 edge new_exit_e;
496 tree current_new_name;
497 tree name;
499 /* Create new bb between loop and new_merge_bb. */
500 *new_exit_bb = split_edge (single_exit (loop));
502 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
504 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
505 orig_phi && update_phi;
506 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
508 /* Virtual phi; Mark it for renaming. We actually want to call
509 mar_sym_for_renaming, but since all ssa renaming datastructures
510 are going to be freed before we get to call ssa_update, we just
511 record this name for now in a bitmap, and will mark it for
512 renaming later. */
513 name = PHI_RESULT (orig_phi);
514 if (!is_gimple_reg (SSA_NAME_VAR (name)))
515 bitmap_set_bit (vect_memsyms_to_rename, DECL_UID (SSA_NAME_VAR (name)));
517 /** 1. Handle new-merge-point phis **/
519 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
520 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
521 new_merge_bb);
523 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
524 of LOOP. Set the two phi args in NEW_PHI for these edges: */
525 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
526 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
528 add_phi_arg (new_phi, loop_arg, new_exit_e);
529 add_phi_arg (new_phi, guard_arg, guard_edge);
531 /* 1.3. Update phi in successor block. */
532 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
533 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
534 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
535 update_phi2 = new_phi;
538 /** 2. Handle loop-closed-ssa-form phis **/
540 if (!is_gimple_reg (PHI_RESULT (orig_phi)))
541 continue;
543 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
544 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
545 *new_exit_bb);
547 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
548 add_phi_arg (new_phi, loop_arg, single_exit (loop));
550 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
551 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
552 SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
554 /* 2.4. Record the newly created name with set_current_def.
555 We want to find a name such that
556 name = get_current_def (orig_loop_name)
557 and to set its current definition as follows:
558 set_current_def (name, new_phi_name)
560 If LOOP is a new loop then loop_arg is already the name we're
561 looking for. If LOOP is the original loop, then loop_arg is
562 the orig_loop_name and the relevant name is recorded in its
563 current reaching definition. */
564 if (is_new_loop)
565 current_new_name = loop_arg;
566 else
568 current_new_name = get_current_def (loop_arg);
569 /* current_def is not available only if the variable does not
570 change inside the loop, in which case we also don't care
571 about recording a current_def for it because we won't be
572 trying to create loop-exit-phis for it. */
573 if (!current_new_name)
574 continue;
576 gcc_assert (get_current_def (current_new_name) == NULL_TREE);
578 set_current_def (current_new_name, PHI_RESULT (new_phi));
579 bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
582 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
586 /* Function slpeel_update_phi_nodes_for_guard2
588 Input:
589 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
591 In the context of the overall structure, we have:
593 loop1_preheader_bb:
594 guard1 (goto loop1/merge1_bb)
595 loop1
596 loop1_exit_bb:
597 guard2 (goto merge1_bb/merge2_bb)
598 merge1_bb
599 LOOP-> loop2
600 loop2_exit_bb
601 merge2_bb
602 next_bb
604 For each name used out side the loop (i.e - for each name that has an exit
605 phi in next_bb) we create a new phi in:
606 1. merge2_bb (to account for the edge from guard_bb)
607 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
608 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
609 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
612 static void
613 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
614 bool is_new_loop, basic_block *new_exit_bb)
616 tree orig_phi, new_phi;
617 tree update_phi, update_phi2;
618 tree guard_arg, loop_arg;
619 basic_block new_merge_bb = guard_edge->dest;
620 edge e = EDGE_SUCC (new_merge_bb, 0);
621 basic_block update_bb = e->dest;
622 edge new_exit_e;
623 tree orig_def, orig_def_new_name;
624 tree new_name, new_name2;
625 tree arg;
627 /* Create new bb between loop and new_merge_bb. */
628 *new_exit_bb = split_edge (single_exit (loop));
630 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
632 for (update_phi = phi_nodes (update_bb); update_phi;
633 update_phi = PHI_CHAIN (update_phi))
635 orig_phi = update_phi;
636 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
637 /* This loop-closed-phi actually doesn't represent a use
638 out of the loop - the phi arg is a constant. */
639 if (TREE_CODE (orig_def) != SSA_NAME)
640 continue;
641 orig_def_new_name = get_current_def (orig_def);
642 arg = NULL_TREE;
644 /** 1. Handle new-merge-point phis **/
646 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
647 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
648 new_merge_bb);
650 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
651 of LOOP. Set the two PHI args in NEW_PHI for these edges: */
652 new_name = orig_def;
653 new_name2 = NULL_TREE;
654 if (orig_def_new_name)
656 new_name = orig_def_new_name;
657 /* Some variables have both loop-entry-phis and loop-exit-phis.
658 Such variables were given yet newer names by phis placed in
659 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
660 new_name2 = get_current_def (get_current_def (orig_name)). */
661 new_name2 = get_current_def (new_name);
664 if (is_new_loop)
666 guard_arg = orig_def;
667 loop_arg = new_name;
669 else
671 guard_arg = new_name;
672 loop_arg = orig_def;
674 if (new_name2)
675 guard_arg = new_name2;
677 add_phi_arg (new_phi, loop_arg, new_exit_e);
678 add_phi_arg (new_phi, guard_arg, guard_edge);
680 /* 1.3. Update phi in successor block. */
681 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
682 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
683 update_phi2 = new_phi;
686 /** 2. Handle loop-closed-ssa-form phis **/
688 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
689 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
690 *new_exit_bb);
692 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
693 add_phi_arg (new_phi, loop_arg, single_exit (loop));
695 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
696 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
697 SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
700 /** 3. Handle loop-closed-ssa-form phis for first loop **/
702 /* 3.1. Find the relevant names that need an exit-phi in
703 GUARD_BB, i.e. names for which
704 slpeel_update_phi_nodes_for_guard1 had not already created a
705 phi node. This is the case for names that are used outside
706 the loop (and therefore need an exit phi) but are not updated
707 across loop iterations (and therefore don't have a
708 loop-header-phi).
710 slpeel_update_phi_nodes_for_guard1 is responsible for
711 creating loop-exit phis in GUARD_BB for names that have a
712 loop-header-phi. When such a phi is created we also record
713 the new name in its current definition. If this new name
714 exists, then guard_arg was set to this new name (see 1.2
715 above). Therefore, if guard_arg is not this new name, this
716 is an indication that an exit-phi in GUARD_BB was not yet
717 created, so we take care of it here. */
718 if (guard_arg == new_name2)
719 continue;
720 arg = guard_arg;
722 /* 3.2. Generate new phi node in GUARD_BB: */
723 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
724 guard_edge->src);
726 /* 3.3. GUARD_BB has one incoming edge: */
727 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
728 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
730 /* 3.4. Update phi in successor of GUARD_BB: */
731 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
732 == guard_arg);
733 SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
736 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
740 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
741 that starts at zero, increases by one and its limit is NITERS.
743 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
745 void
746 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
748 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
749 tree orig_cond;
750 edge exit_edge = single_exit (loop);
751 block_stmt_iterator loop_cond_bsi;
752 block_stmt_iterator incr_bsi;
753 bool insert_after;
754 tree init = build_int_cst (TREE_TYPE (niters), 0);
755 tree step = build_int_cst (TREE_TYPE (niters), 1);
756 LOC loop_loc;
758 orig_cond = get_loop_exit_condition (loop);
759 gcc_assert (orig_cond);
760 loop_cond_bsi = bsi_for_stmt (orig_cond);
762 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
763 create_iv (init, step, NULL_TREE, loop,
764 &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
766 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
767 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
768 else /* 'then' edge loops back. */
769 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
771 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
772 NULL_TREE, NULL_TREE);
773 bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
775 /* Remove old loop exit test: */
776 bsi_remove (&loop_cond_bsi, true);
778 loop_loc = find_loop_location (loop);
779 if (dump_file && (dump_flags & TDF_DETAILS))
781 if (loop_loc != UNKNOWN_LOC)
782 fprintf (dump_file, "\nloop at %s:%d: ",
783 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
784 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
787 loop->nb_iterations = niters;
791 /* Given LOOP this function generates a new copy of it and puts it
792 on E which is either the entry or exit of LOOP. */
794 struct loop *
795 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
797 struct loop *new_loop;
798 basic_block *new_bbs, *bbs;
799 bool at_exit;
800 bool was_imm_dom;
801 basic_block exit_dest;
802 tree phi, phi_arg;
803 edge exit, new_exit;
805 at_exit = (e == single_exit (loop));
806 if (!at_exit && e != loop_preheader_edge (loop))
807 return NULL;
809 bbs = get_loop_body (loop);
811 /* Check whether duplication is possible. */
812 if (!can_copy_bbs_p (bbs, loop->num_nodes))
814 free (bbs);
815 return NULL;
818 /* Generate new loop structure. */
819 new_loop = duplicate_loop (loop, loop_outer (loop));
820 if (!new_loop)
822 free (bbs);
823 return NULL;
826 exit_dest = single_exit (loop)->dest;
827 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
828 exit_dest) == loop->header ?
829 true : false);
831 new_bbs = XNEWVEC (basic_block, loop->num_nodes);
833 exit = single_exit (loop);
834 copy_bbs (bbs, loop->num_nodes, new_bbs,
835 &exit, 1, &new_exit, NULL,
836 e->src);
838 /* Duplicating phi args at exit bbs as coming
839 also from exit of duplicated loop. */
840 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
842 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
843 if (phi_arg)
845 edge new_loop_exit_edge;
847 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
848 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
849 else
850 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
852 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
856 if (at_exit) /* Add the loop copy at exit. */
858 redirect_edge_and_branch_force (e, new_loop->header);
859 PENDING_STMT (e) = NULL;
860 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
861 if (was_imm_dom)
862 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
864 else /* Add the copy at entry. */
866 edge new_exit_e;
867 edge entry_e = loop_preheader_edge (loop);
868 basic_block preheader = entry_e->src;
870 if (!flow_bb_inside_loop_p (new_loop,
871 EDGE_SUCC (new_loop->header, 0)->dest))
872 new_exit_e = EDGE_SUCC (new_loop->header, 0);
873 else
874 new_exit_e = EDGE_SUCC (new_loop->header, 1);
876 redirect_edge_and_branch_force (new_exit_e, loop->header);
877 PENDING_STMT (new_exit_e) = NULL;
878 set_immediate_dominator (CDI_DOMINATORS, loop->header,
879 new_exit_e->src);
881 /* We have to add phi args to the loop->header here as coming
882 from new_exit_e edge. */
883 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
885 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
886 if (phi_arg)
887 add_phi_arg (phi, phi_arg, new_exit_e);
890 redirect_edge_and_branch_force (entry_e, new_loop->header);
891 PENDING_STMT (entry_e) = NULL;
892 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
895 free (new_bbs);
896 free (bbs);
898 return new_loop;
902 /* Given the condition statement COND, put it as the last statement
903 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
904 Assumes that this is the single exit of the guarded loop.
905 Returns the skip edge. */
907 static edge
908 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
909 basic_block dom_bb)
911 block_stmt_iterator bsi;
912 edge new_e, enter_e;
913 tree cond_stmt;
914 tree gimplify_stmt_list;
916 enter_e = EDGE_SUCC (guard_bb, 0);
917 enter_e->flags &= ~EDGE_FALLTHRU;
918 enter_e->flags |= EDGE_FALSE_VALUE;
919 bsi = bsi_last (guard_bb);
921 cond =
922 force_gimple_operand (cond, &gimplify_stmt_list, true,
923 NULL_TREE);
924 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
925 NULL_TREE, NULL_TREE);
926 if (gimplify_stmt_list)
927 bsi_insert_after (&bsi, gimplify_stmt_list, BSI_NEW_STMT);
929 bsi = bsi_last (guard_bb);
930 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
932 /* Add new edge to connect guard block to the merge/loop-exit block. */
933 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
934 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
935 return new_e;
939 /* This function verifies that the following restrictions apply to LOOP:
940 (1) it is innermost
941 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
942 (3) it is single entry, single exit
943 (4) its exit condition is the last stmt in the header
944 (5) E is the entry/exit edge of LOOP.
947 bool
948 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
950 edge exit_e = single_exit (loop);
951 edge entry_e = loop_preheader_edge (loop);
952 tree orig_cond = get_loop_exit_condition (loop);
953 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
955 if (need_ssa_update_p ())
956 return false;
958 if (loop->inner
959 /* All loops have an outer scope; the only case loop->outer is NULL is for
960 the function itself. */
961 || !loop_outer (loop)
962 || loop->num_nodes != 2
963 || !empty_block_p (loop->latch)
964 || !single_exit (loop)
965 /* Verify that new loop exit condition can be trivially modified. */
966 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
967 || (e != exit_e && e != entry_e))
968 return false;
970 return true;
973 #ifdef ENABLE_CHECKING
974 void
975 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
976 struct loop *second_loop)
978 basic_block loop1_exit_bb = single_exit (first_loop)->dest;
979 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
980 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
982 /* A guard that controls whether the second_loop is to be executed or skipped
983 is placed in first_loop->exit. first_loop->exit therefore has two
984 successors - one is the preheader of second_loop, and the other is a bb
985 after second_loop.
987 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
989 /* 1. Verify that one of the successors of first_loop->exit is the preheader
990 of second_loop. */
992 /* The preheader of new_loop is expected to have two predecessors:
993 first_loop->exit and the block that precedes first_loop. */
995 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
996 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
997 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
998 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
999 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1001 /* Verify that the other successor of first_loop->exit is after the
1002 second_loop. */
1003 /* TODO */
1005 #endif
1007 /* If the run time cost model check determines that vectorization is
1008 not profitable and hence scalar loop should be generated then set
1009 FIRST_NITERS to prologue peeled iterations. This will allow all the
1010 iterations to be executed in the prologue peeled scalar loop. */
1012 void
1013 set_prologue_iterations (basic_block bb_before_first_loop,
1014 tree first_niters,
1015 struct loop *loop,
1016 unsigned int th)
1018 edge e;
1019 basic_block cond_bb, then_bb;
1020 tree var, prologue_after_cost_adjust_name, stmt;
1021 block_stmt_iterator bsi;
1022 tree newphi;
1023 edge e_true, e_false, e_fallthru;
1024 tree cond_stmt;
1025 tree gimplify_stmt_list;
1026 tree cost_pre_condition = NULL_TREE;
1027 tree scalar_loop_iters =
1028 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1030 e = single_pred_edge (bb_before_first_loop);
1031 cond_bb = split_edge(e);
1033 e = single_pred_edge (bb_before_first_loop);
1034 then_bb = split_edge(e);
1035 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1037 e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1038 EDGE_FALSE_VALUE);
1039 set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1041 e_true = EDGE_PRED (then_bb, 0);
1042 e_true->flags &= ~EDGE_FALLTHRU;
1043 e_true->flags |= EDGE_TRUE_VALUE;
1045 e_fallthru = EDGE_SUCC (then_bb, 0);
1047 cost_pre_condition =
1048 build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1049 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1050 cost_pre_condition =
1051 force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
1052 true, NULL_TREE);
1053 cond_stmt = build3 (COND_EXPR, void_type_node, cost_pre_condition,
1054 NULL_TREE, NULL_TREE);
1056 bsi = bsi_last (cond_bb);
1057 if (gimplify_stmt_list)
1058 bsi_insert_after (&bsi, gimplify_stmt_list, BSI_NEW_STMT);
1060 bsi = bsi_last (cond_bb);
1061 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
1063 var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1064 "prologue_after_cost_adjust");
1065 add_referenced_var (var);
1066 prologue_after_cost_adjust_name =
1067 force_gimple_operand (scalar_loop_iters, &stmt, false, var);
1069 bsi = bsi_last (then_bb);
1070 if (stmt)
1071 bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
1073 newphi = create_phi_node (var, bb_before_first_loop);
1074 add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru);
1075 add_phi_arg (newphi, first_niters, e_false);
1077 first_niters = PHI_RESULT (newphi);
1081 /* Function slpeel_tree_peel_loop_to_edge.
1083 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1084 that is placed on the entry (exit) edge E of LOOP. After this transformation
1085 we have two loops one after the other - first-loop iterates FIRST_NITERS
1086 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1087 If the cost model indicates that it is profitable to emit a scalar
1088 loop instead of the vector one, then the prolog (epilog) loop will iterate
1089 for the entire unchanged scalar iterations of the loop.
1091 Input:
1092 - LOOP: the loop to be peeled.
1093 - E: the exit or entry edge of LOOP.
1094 If it is the entry edge, we peel the first iterations of LOOP. In this
1095 case first-loop is LOOP, and second-loop is the newly created loop.
1096 If it is the exit edge, we peel the last iterations of LOOP. In this
1097 case, first-loop is the newly created loop, and second-loop is LOOP.
1098 - NITERS: the number of iterations that LOOP iterates.
1099 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1100 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1101 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1102 is false, the caller of this function may want to take care of this
1103 (this can be useful if we don't want new stmts added to first-loop).
1104 - TH: cost model profitability threshold of iterations for vectorization.
1105 - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1106 during versioning and hence needs to occur during
1107 prologue generation or whether cost model check
1108 has not occurred during prologue generation and hence
1109 needs to occur during epilogue generation.
1112 Output:
1113 The function returns a pointer to the new loop-copy, or NULL if it failed
1114 to perform the transformation.
1116 The function generates two if-then-else guards: one before the first loop,
1117 and the other before the second loop:
1118 The first guard is:
1119 if (FIRST_NITERS == 0) then skip the first loop,
1120 and go directly to the second loop.
1121 The second guard is:
1122 if (FIRST_NITERS == NITERS) then skip the second loop.
1124 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1125 FORNOW the resulting code will not be in loop-closed-ssa form.
1128 struct loop*
1129 slpeel_tree_peel_loop_to_edge (struct loop *loop,
1130 edge e, tree first_niters,
1131 tree niters, bool update_first_loop_count,
1132 unsigned int th, bool check_profitability)
1134 struct loop *new_loop = NULL, *first_loop, *second_loop;
1135 edge skip_e;
1136 tree pre_condition = NULL_TREE;
1137 bitmap definitions;
1138 basic_block bb_before_second_loop, bb_after_second_loop;
1139 basic_block bb_before_first_loop;
1140 basic_block bb_between_loops;
1141 basic_block new_exit_bb;
1142 edge exit_e = single_exit (loop);
1143 LOC loop_loc;
1144 tree cost_pre_condition = NULL_TREE;
1146 if (!slpeel_can_duplicate_loop_p (loop, e))
1147 return NULL;
1149 /* We have to initialize cfg_hooks. Then, when calling
1150 cfg_hooks->split_edge, the function tree_split_edge
1151 is actually called and, when calling cfg_hooks->duplicate_block,
1152 the function tree_duplicate_bb is called. */
1153 tree_register_cfg_hooks ();
1156 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1157 Resulting CFG would be:
1159 first_loop:
1160 do {
1161 } while ...
1163 second_loop:
1164 do {
1165 } while ...
1167 orig_exit_bb:
1170 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1172 loop_loc = find_loop_location (loop);
1173 if (dump_file && (dump_flags & TDF_DETAILS))
1175 if (loop_loc != UNKNOWN_LOC)
1176 fprintf (dump_file, "\n%s:%d: note: ",
1177 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1178 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1180 return NULL;
1183 if (e == exit_e)
1185 /* NEW_LOOP was placed after LOOP. */
1186 first_loop = loop;
1187 second_loop = new_loop;
1189 else
1191 /* NEW_LOOP was placed before LOOP. */
1192 first_loop = new_loop;
1193 second_loop = loop;
1196 definitions = ssa_names_to_replace ();
1197 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1198 rename_variables_in_loop (new_loop);
1201 /* 2. Add the guard code in one of the following ways:
1203 2.a Add the guard that controls whether the first loop is executed.
1204 This occurs when this function is invoked for prologue or epilogue
1205 generation and when the cost model check can be done at compile time.
1207 Resulting CFG would be:
1209 bb_before_first_loop:
1210 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1211 GOTO first-loop
1213 first_loop:
1214 do {
1215 } while ...
1217 bb_before_second_loop:
1219 second_loop:
1220 do {
1221 } while ...
1223 orig_exit_bb:
1225 2.b Add the cost model check that allows the prologue
1226 to iterate for the entire unchanged scalar
1227 iterations of the loop in the event that the cost
1228 model indicates that the scalar loop is more
1229 profitable than the vector one. This occurs when
1230 this function is invoked for prologue generation
1231 and the cost model check needs to be done at run
1232 time.
1234 Resulting CFG after prologue peeling would be:
1236 if (scalar_loop_iterations <= th)
1237 FIRST_NITERS = scalar_loop_iterations
1239 bb_before_first_loop:
1240 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1241 GOTO first-loop
1243 first_loop:
1244 do {
1245 } while ...
1247 bb_before_second_loop:
1249 second_loop:
1250 do {
1251 } while ...
1253 orig_exit_bb:
1255 2.c Add the cost model check that allows the epilogue
1256 to iterate for the entire unchanged scalar
1257 iterations of the loop in the event that the cost
1258 model indicates that the scalar loop is more
1259 profitable than the vector one. This occurs when
1260 this function is invoked for epilogue generation
1261 and the cost model check needs to be done at run
1262 time.
1264 Resulting CFG after prologue peeling would be:
1266 bb_before_first_loop:
1267 if ((scalar_loop_iterations <= th)
1269 FIRST_NITERS == 0) GOTO bb_before_second_loop
1270 GOTO first-loop
1272 first_loop:
1273 do {
1274 } while ...
1276 bb_before_second_loop:
1278 second_loop:
1279 do {
1280 } while ...
1282 orig_exit_bb:
1285 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1286 bb_before_second_loop = split_edge (single_exit (first_loop));
1288 /* Epilogue peeling. */
1289 if (!update_first_loop_count)
1291 pre_condition =
1292 fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1293 build_int_cst (TREE_TYPE (first_niters), 0));
1294 if (check_profitability)
1296 tree scalar_loop_iters
1297 = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1298 (loop_vec_info_for_loop (loop)));
1299 cost_pre_condition =
1300 build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1301 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1303 pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1304 cost_pre_condition, pre_condition);
1308 /* Prologue peeling. */
1309 else
1311 if (check_profitability)
1312 set_prologue_iterations (bb_before_first_loop, first_niters,
1313 loop, th);
1315 pre_condition =
1316 fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1317 build_int_cst (TREE_TYPE (first_niters), 0));
1320 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1321 bb_before_second_loop, bb_before_first_loop);
1322 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1323 first_loop == new_loop,
1324 &new_exit_bb, &definitions);
1327 /* 3. Add the guard that controls whether the second loop is executed.
1328 Resulting CFG would be:
1330 bb_before_first_loop:
1331 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1332 GOTO first-loop
1334 first_loop:
1335 do {
1336 } while ...
1338 bb_between_loops:
1339 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1340 GOTO bb_before_second_loop
1342 bb_before_second_loop:
1344 second_loop:
1345 do {
1346 } while ...
1348 bb_after_second_loop:
1350 orig_exit_bb:
1353 bb_between_loops = new_exit_bb;
1354 bb_after_second_loop = split_edge (single_exit (second_loop));
1356 pre_condition =
1357 fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1358 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1359 bb_after_second_loop, bb_before_first_loop);
1360 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1361 second_loop == new_loop, &new_exit_bb);
1363 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1365 if (update_first_loop_count)
1366 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1368 BITMAP_FREE (definitions);
1369 delete_update_ssa ();
1371 return new_loop;
1374 /* Function vect_get_loop_location.
1376 Extract the location of the loop in the source code.
1377 If the loop is not well formed for vectorization, an estimated
1378 location is calculated.
1379 Return the loop location if succeed and NULL if not. */
1382 find_loop_location (struct loop *loop)
1384 tree node = NULL_TREE;
1385 basic_block bb;
1386 block_stmt_iterator si;
1388 if (!loop)
1389 return UNKNOWN_LOC;
1391 node = get_loop_exit_condition (loop);
1393 if (node && CAN_HAVE_LOCATION_P (node) && EXPR_HAS_LOCATION (node)
1394 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1395 return EXPR_LOC (node);
1397 /* If we got here the loop is probably not "well formed",
1398 try to estimate the loop location */
1400 if (!loop->header)
1401 return UNKNOWN_LOC;
1403 bb = loop->header;
1405 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1407 node = bsi_stmt (si);
1408 if (node && CAN_HAVE_LOCATION_P (node) && EXPR_HAS_LOCATION (node))
1409 return EXPR_LOC (node);
1412 return UNKNOWN_LOC;
1416 /*************************************************************************
1417 Vectorization Debug Information.
1418 *************************************************************************/
1420 /* Function vect_set_verbosity_level.
1422 Called from toplev.c upon detection of the
1423 -ftree-vectorizer-verbose=N option. */
1425 void
1426 vect_set_verbosity_level (const char *val)
1428 unsigned int vl;
1430 vl = atoi (val);
1431 if (vl < MAX_VERBOSITY_LEVEL)
1432 vect_verbosity_level = vl;
1433 else
1434 vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1438 /* Function vect_set_dump_settings.
1440 Fix the verbosity level of the vectorizer if the
1441 requested level was not set explicitly using the flag
1442 -ftree-vectorizer-verbose=N.
1443 Decide where to print the debugging information (dump_file/stderr).
1444 If the user defined the verbosity level, but there is no dump file,
1445 print to stderr, otherwise print to the dump file. */
1447 static void
1448 vect_set_dump_settings (void)
1450 vect_dump = dump_file;
1452 /* Check if the verbosity level was defined by the user: */
1453 if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1455 /* If there is no dump file, print to stderr. */
1456 if (!dump_file)
1457 vect_dump = stderr;
1458 return;
1461 /* User didn't specify verbosity level: */
1462 if (dump_file && (dump_flags & TDF_DETAILS))
1463 vect_verbosity_level = REPORT_DETAILS;
1464 else if (dump_file && (dump_flags & TDF_STATS))
1465 vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1466 else
1467 vect_verbosity_level = REPORT_NONE;
1469 gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1473 /* Function debug_loop_details.
1475 For vectorization debug dumps. */
1477 bool
1478 vect_print_dump_info (enum verbosity_levels vl)
1480 if (vl > vect_verbosity_level)
1481 return false;
1483 if (!current_function_decl || !vect_dump)
1484 return false;
1486 if (vect_loop_location == UNKNOWN_LOC)
1487 fprintf (vect_dump, "\n%s:%d: note: ",
1488 DECL_SOURCE_FILE (current_function_decl),
1489 DECL_SOURCE_LINE (current_function_decl));
1490 else
1491 fprintf (vect_dump, "\n%s:%d: note: ",
1492 LOC_FILE (vect_loop_location), LOC_LINE (vect_loop_location));
1494 return true;
1498 /*************************************************************************
1499 Vectorization Utilities.
1500 *************************************************************************/
1502 /* Function new_stmt_vec_info.
1504 Create and initialize a new stmt_vec_info struct for STMT. */
1506 stmt_vec_info
1507 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1509 stmt_vec_info res;
1510 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1512 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1513 STMT_VINFO_STMT (res) = stmt;
1514 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1515 STMT_VINFO_RELEVANT (res) = 0;
1516 STMT_VINFO_LIVE_P (res) = false;
1517 STMT_VINFO_VECTYPE (res) = NULL;
1518 STMT_VINFO_VEC_STMT (res) = NULL;
1519 STMT_VINFO_IN_PATTERN_P (res) = false;
1520 STMT_VINFO_RELATED_STMT (res) = NULL;
1521 STMT_VINFO_DATA_REF (res) = NULL;
1523 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
1524 STMT_VINFO_DR_OFFSET (res) = NULL;
1525 STMT_VINFO_DR_INIT (res) = NULL;
1526 STMT_VINFO_DR_STEP (res) = NULL;
1527 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
1529 if (TREE_CODE (stmt) == PHI_NODE && is_loop_header_bb_p (bb_for_stmt (stmt)))
1530 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
1531 else
1532 STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
1533 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
1534 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
1535 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
1536 STMT_SLP_TYPE (res) = 0;
1537 DR_GROUP_FIRST_DR (res) = NULL_TREE;
1538 DR_GROUP_NEXT_DR (res) = NULL_TREE;
1539 DR_GROUP_SIZE (res) = 0;
1540 DR_GROUP_STORE_COUNT (res) = 0;
1541 DR_GROUP_GAP (res) = 0;
1542 DR_GROUP_SAME_DR_STMT (res) = NULL_TREE;
1543 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
1545 return res;
1549 /* Free stmt vectorization related info. */
1551 void
1552 free_stmt_vec_info (tree stmt)
1554 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1556 if (!stmt_info)
1557 return;
1559 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1560 free (stmt_info);
1561 set_stmt_info (stmt_ann (stmt), NULL);
1565 /* Function bb_in_loop_p
1567 Used as predicate for dfs order traversal of the loop bbs. */
1569 static bool
1570 bb_in_loop_p (const_basic_block bb, const void *data)
1572 const struct loop *const loop = (const struct loop *)data;
1573 if (flow_bb_inside_loop_p (loop, bb))
1574 return true;
1575 return false;
1579 /* Function new_loop_vec_info.
1581 Create and initialize a new loop_vec_info struct for LOOP, as well as
1582 stmt_vec_info structs for all the stmts in LOOP. */
1584 loop_vec_info
1585 new_loop_vec_info (struct loop *loop)
1587 loop_vec_info res;
1588 basic_block *bbs;
1589 block_stmt_iterator si;
1590 unsigned int i, nbbs;
1592 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1593 LOOP_VINFO_LOOP (res) = loop;
1595 bbs = get_loop_body (loop);
1597 /* Create/Update stmt_info for all stmts in the loop. */
1598 for (i = 0; i < loop->num_nodes; i++)
1600 basic_block bb = bbs[i];
1601 tree phi;
1603 /* BBs in a nested inner-loop will have been already processed (because
1604 we will have called vect_analyze_loop_form for any nested inner-loop).
1605 Therefore, for stmts in an inner-loop we just want to update the
1606 STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
1607 loop_info of the outer-loop we are currently considering to vectorize
1608 (instead of the loop_info of the inner-loop).
1609 For stmts in other BBs we need to create a stmt_info from scratch. */
1610 if (bb->loop_father != loop)
1612 /* Inner-loop bb. */
1613 gcc_assert (loop->inner && bb->loop_father == loop->inner);
1614 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1616 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
1617 loop_vec_info inner_loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1618 gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
1619 STMT_VINFO_LOOP_VINFO (stmt_info) = res;
1621 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1623 tree stmt = bsi_stmt (si);
1624 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1625 loop_vec_info inner_loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1626 gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
1627 STMT_VINFO_LOOP_VINFO (stmt_info) = res;
1630 else
1632 /* bb in current nest. */
1633 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1635 stmt_ann_t ann = get_stmt_ann (phi);
1636 set_stmt_info (ann, new_stmt_vec_info (phi, res));
1639 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1641 tree stmt = bsi_stmt (si);
1642 stmt_ann_t ann = stmt_ann (stmt);
1643 set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1648 /* CHECKME: We want to visit all BBs before their successors (except for
1649 latch blocks, for which this assertion wouldn't hold). In the simple
1650 case of the loop forms we allow, a dfs order of the BBs would the same
1651 as reversed postorder traversal, so we are safe. */
1653 free (bbs);
1654 bbs = XCNEWVEC (basic_block, loop->num_nodes);
1655 nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
1656 bbs, loop->num_nodes, loop);
1657 gcc_assert (nbbs == loop->num_nodes);
1659 LOOP_VINFO_BBS (res) = bbs;
1660 LOOP_VINFO_NITERS (res) = NULL;
1661 LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
1662 LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
1663 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1664 LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1665 LOOP_VINFO_VECT_FACTOR (res) = 0;
1666 LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
1667 LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
1668 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1669 LOOP_VINFO_MAY_MISALIGN_STMTS (res) =
1670 VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
1671 LOOP_VINFO_MAY_ALIAS_DDRS (res) =
1672 VEC_alloc (ddr_p, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
1673 LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (tree, heap, 10);
1674 LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
1675 LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
1677 return res;
1681 /* Function destroy_loop_vec_info.
1683 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1684 stmts in the loop. */
1686 void
1687 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
1689 struct loop *loop;
1690 basic_block *bbs;
1691 int nbbs;
1692 block_stmt_iterator si;
1693 int j;
1694 VEC (slp_instance, heap) *slp_instances;
1695 slp_instance instance;
1697 if (!loop_vinfo)
1698 return;
1700 loop = LOOP_VINFO_LOOP (loop_vinfo);
1702 bbs = LOOP_VINFO_BBS (loop_vinfo);
1703 nbbs = loop->num_nodes;
1705 if (!clean_stmts)
1707 free (LOOP_VINFO_BBS (loop_vinfo));
1708 free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
1709 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1710 VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
1712 free (loop_vinfo);
1713 loop->aux = NULL;
1714 return;
1717 for (j = 0; j < nbbs; j++)
1719 basic_block bb = bbs[j];
1720 tree phi;
1722 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1723 free_stmt_vec_info (phi);
1725 for (si = bsi_start (bb); !bsi_end_p (si); )
1727 tree stmt = bsi_stmt (si);
1728 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1730 if (stmt_info)
1732 /* Check if this is a "pattern stmt" (introduced by the
1733 vectorizer during the pattern recognition pass). */
1734 bool remove_stmt_p = false;
1735 tree orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1736 if (orig_stmt)
1738 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
1739 if (orig_stmt_info
1740 && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
1741 remove_stmt_p = true;
1744 /* Free stmt_vec_info. */
1745 free_stmt_vec_info (stmt);
1747 /* Remove dead "pattern stmts". */
1748 if (remove_stmt_p)
1749 bsi_remove (&si, true);
1751 bsi_next (&si);
1755 free (LOOP_VINFO_BBS (loop_vinfo));
1756 free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
1757 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1758 VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
1759 VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
1760 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1761 for (j = 0; VEC_iterate (slp_instance, slp_instances, j, instance); j++)
1762 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
1763 VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
1764 VEC_free (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
1766 free (loop_vinfo);
1767 loop->aux = NULL;
1771 /* Function vect_force_dr_alignment_p.
1773 Returns whether the alignment of a DECL can be forced to be aligned
1774 on ALIGNMENT bit boundary. */
1776 bool
1777 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
1779 if (TREE_CODE (decl) != VAR_DECL)
1780 return false;
1782 if (DECL_EXTERNAL (decl))
1783 return false;
1785 if (TREE_ASM_WRITTEN (decl))
1786 return false;
1788 if (TREE_STATIC (decl))
1789 return (alignment <= MAX_OFILE_ALIGNMENT);
1790 else
1791 /* This used to be PREFERRED_STACK_BOUNDARY, however, that is not 100%
1792 correct until someone implements forced stack alignment. */
1793 return (alignment <= STACK_BOUNDARY);
1797 /* Function get_vectype_for_scalar_type.
1799 Returns the vector type corresponding to SCALAR_TYPE as supported
1800 by the target. */
1802 tree
1803 get_vectype_for_scalar_type (tree scalar_type)
1805 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1806 int nbytes = GET_MODE_SIZE (inner_mode);
1807 int nunits;
1808 tree vectype;
1810 if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
1811 return NULL_TREE;
1813 /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
1814 is expected. */
1815 nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes;
1817 vectype = build_vector_type (scalar_type, nunits);
1818 if (vect_print_dump_info (REPORT_DETAILS))
1820 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1821 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1824 if (!vectype)
1825 return NULL_TREE;
1827 if (vect_print_dump_info (REPORT_DETAILS))
1829 fprintf (vect_dump, "vectype: ");
1830 print_generic_expr (vect_dump, vectype, TDF_SLIM);
1833 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1834 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
1836 if (vect_print_dump_info (REPORT_DETAILS))
1837 fprintf (vect_dump, "mode not supported by target.");
1838 return NULL_TREE;
1841 return vectype;
1845 /* Function vect_supportable_dr_alignment
1847 Return whether the data reference DR is supported with respect to its
1848 alignment. */
1850 enum dr_alignment_support
1851 vect_supportable_dr_alignment (struct data_reference *dr)
1853 tree stmt = DR_STMT (dr);
1854 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1855 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1856 enum machine_mode mode = (int) TYPE_MODE (vectype);
1857 struct loop *vect_loop = LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info));
1858 bool nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
1859 bool invariant_in_outerloop = false;
1861 if (aligned_access_p (dr))
1862 return dr_aligned;
1864 if (nested_in_vect_loop)
1866 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
1867 invariant_in_outerloop =
1868 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
1871 /* Possibly unaligned access. */
1873 /* We can choose between using the implicit realignment scheme (generating
1874 a misaligned_move stmt) and the explicit realignment scheme (generating
1875 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
1876 realignment scheme: optimized, and unoptimized.
1877 We can optimize the realignment only if the step between consecutive
1878 vector loads is equal to the vector size. Since the vector memory
1879 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
1880 is guaranteed that the misalignment amount remains the same throughout the
1881 execution of the vectorized loop. Therefore, we can create the
1882 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
1883 at the loop preheader.
1885 However, in the case of outer-loop vectorization, when vectorizing a
1886 memory access in the inner-loop nested within the LOOP that is now being
1887 vectorized, while it is guaranteed that the misalignment of the
1888 vectorized memory access will remain the same in different outer-loop
1889 iterations, it is *not* guaranteed that is will remain the same throughout
1890 the execution of the inner-loop. This is because the inner-loop advances
1891 with the original scalar step (and not in steps of VS). If the inner-loop
1892 step happens to be a multiple of VS, then the misalignment remains fixed
1893 and we can use the optimized realignment scheme. For example:
1895 for (i=0; i<N; i++)
1896 for (j=0; j<M; j++)
1897 s += a[i+j];
1899 When vectorizing the i-loop in the above example, the step between
1900 consecutive vector loads is 1, and so the misalignment does not remain
1901 fixed across the execution of the inner-loop, and the realignment cannot
1902 be optimized (as illustrated in the following pseudo vectorized loop):
1904 for (i=0; i<N; i+=4)
1905 for (j=0; j<M; j++){
1906 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
1907 // when j is {0,1,2,3,4,5,6,7,...} respectively.
1908 // (assuming that we start from an aligned address).
1911 We therefore have to use the unoptimized realignment scheme:
1913 for (i=0; i<N; i+=4)
1914 for (j=k; j<M; j+=4)
1915 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
1916 // that the misalignment of the initial address is
1917 // 0).
1919 The loop can then be vectorized as follows:
1921 for (k=0; k<4; k++){
1922 rt = get_realignment_token (&vp[k]);
1923 for (i=0; i<N; i+=4){
1924 v1 = vp[i+k];
1925 for (j=k; j<M; j+=4){
1926 v2 = vp[i+j+VS-1];
1927 va = REALIGN_LOAD <v1,v2,rt>;
1928 vs += va;
1929 v1 = v2;
1932 } */
1934 if (DR_IS_READ (dr))
1936 if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
1937 CODE_FOR_nothing
1938 && (!targetm.vectorize.builtin_mask_for_load
1939 || targetm.vectorize.builtin_mask_for_load ()))
1941 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1942 if (nested_in_vect_loop
1943 && (TREE_INT_CST_LOW (DR_STEP (dr))
1944 != GET_MODE_SIZE (TYPE_MODE (vectype))))
1945 return dr_explicit_realign;
1946 else
1947 return dr_explicit_realign_optimized;
1950 if (optab_handler (movmisalign_optab, mode)->insn_code !=
1951 CODE_FOR_nothing)
1952 /* Can't software pipeline the loads, but can at least do them. */
1953 return dr_unaligned_supported;
1956 /* Unsupported. */
1957 return dr_unaligned_unsupported;
1961 /* Function vect_is_simple_use.
1963 Input:
1964 LOOP - the loop that is being vectorized.
1965 OPERAND - operand of a stmt in LOOP.
1966 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1968 Returns whether a stmt with OPERAND can be vectorized.
1969 Supportable operands are constants, loop invariants, and operands that are
1970 defined by the current iteration of the loop. Unsupportable operands are
1971 those that are defined by a previous iteration of the loop (as is the case
1972 in reduction/induction computations). */
1974 bool
1975 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
1976 tree *def, enum vect_def_type *dt)
1978 basic_block bb;
1979 stmt_vec_info stmt_vinfo;
1980 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1982 *def_stmt = NULL_TREE;
1983 *def = NULL_TREE;
1985 if (vect_print_dump_info (REPORT_DETAILS))
1987 fprintf (vect_dump, "vect_is_simple_use: operand ");
1988 print_generic_expr (vect_dump, operand, TDF_SLIM);
1991 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1993 *dt = vect_constant_def;
1994 return true;
1996 if (is_gimple_min_invariant (operand))
1998 *def = operand;
1999 *dt = vect_invariant_def;
2000 return true;
2003 if (TREE_CODE (operand) == PAREN_EXPR)
2005 if (vect_print_dump_info (REPORT_DETAILS))
2006 fprintf (vect_dump, "non-associatable copy.");
2007 operand = TREE_OPERAND (operand, 0);
2009 if (TREE_CODE (operand) != SSA_NAME)
2011 if (vect_print_dump_info (REPORT_DETAILS))
2012 fprintf (vect_dump, "not ssa-name.");
2013 return false;
2016 *def_stmt = SSA_NAME_DEF_STMT (operand);
2017 if (*def_stmt == NULL_TREE )
2019 if (vect_print_dump_info (REPORT_DETAILS))
2020 fprintf (vect_dump, "no def_stmt.");
2021 return false;
2024 if (vect_print_dump_info (REPORT_DETAILS))
2026 fprintf (vect_dump, "def_stmt: ");
2027 print_generic_expr (vect_dump, *def_stmt, TDF_SLIM);
2030 /* empty stmt is expected only in case of a function argument.
2031 (Otherwise - we expect a phi_node or a GIMPLE_MODIFY_STMT). */
2032 if (IS_EMPTY_STMT (*def_stmt))
2034 tree arg = TREE_OPERAND (*def_stmt, 0);
2035 if (is_gimple_min_invariant (arg))
2037 *def = operand;
2038 *dt = vect_invariant_def;
2039 return true;
2042 if (vect_print_dump_info (REPORT_DETAILS))
2043 fprintf (vect_dump, "Unexpected empty stmt.");
2044 return false;
2047 bb = bb_for_stmt (*def_stmt);
2048 if (!flow_bb_inside_loop_p (loop, bb))
2049 *dt = vect_invariant_def;
2050 else
2052 stmt_vinfo = vinfo_for_stmt (*def_stmt);
2053 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
2056 if (*dt == vect_unknown_def_type)
2058 if (vect_print_dump_info (REPORT_DETAILS))
2059 fprintf (vect_dump, "Unsupported pattern.");
2060 return false;
2063 if (vect_print_dump_info (REPORT_DETAILS))
2064 fprintf (vect_dump, "type of def: %d.",*dt);
2066 switch (TREE_CODE (*def_stmt))
2068 case PHI_NODE:
2069 *def = PHI_RESULT (*def_stmt);
2070 break;
2072 case GIMPLE_MODIFY_STMT:
2073 *def = GIMPLE_STMT_OPERAND (*def_stmt, 0);
2074 break;
2076 default:
2077 if (vect_print_dump_info (REPORT_DETAILS))
2078 fprintf (vect_dump, "unsupported defining stmt: ");
2079 return false;
2082 return true;
2086 /* Function supportable_widening_operation
2088 Check whether an operation represented by the code CODE is a
2089 widening operation that is supported by the target platform in
2090 vector form (i.e., when operating on arguments of type VECTYPE).
2092 Widening operations we currently support are NOP (CONVERT), FLOAT
2093 and WIDEN_MULT. This function checks if these operations are supported
2094 by the target platform either directly (via vector tree-codes), or via
2095 target builtins.
2097 Output:
2098 - CODE1 and CODE2 are codes of vector operations to be used when
2099 vectorizing the operation, if available.
2100 - DECL1 and DECL2 are decls of target builtin functions to be used
2101 when vectorizing the operation, if available. In this case,
2102 CODE1 and CODE2 are CALL_EXPR. */
2104 bool
2105 supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,
2106 tree *decl1, tree *decl2,
2107 enum tree_code *code1, enum tree_code *code2)
2109 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2110 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
2111 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2112 bool ordered_p;
2113 enum machine_mode vec_mode;
2114 enum insn_code icode1, icode2;
2115 optab optab1, optab2;
2116 tree expr = GIMPLE_STMT_OPERAND (stmt, 1);
2117 tree type = TREE_TYPE (expr);
2118 tree wide_vectype = get_vectype_for_scalar_type (type);
2119 enum tree_code c1, c2;
2121 /* The result of a vectorized widening operation usually requires two vectors
2122 (because the widened results do not fit int one vector). The generated
2123 vector results would normally be expected to be generated in the same
2124 order as in the original scalar computation, i.e. if 8 results are
2125 generated in each vector iteration, they are to be organized as follows:
2126 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
2128 However, in the special case that the result of the widening operation is
2129 used in a reduction computation only, the order doesn't matter (because
2130 when vectorizing a reduction we change the order of the computation).
2131 Some targets can take advantage of this and generate more efficient code.
2132 For example, targets like Altivec, that support widen_mult using a sequence
2133 of {mult_even,mult_odd} generate the following vectors:
2134 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
2136 When vectorizing outer-loops, we execute the inner-loop sequentially
2137 (each vectorized inner-loop iteration contributes to VF outer-loop
2138 iterations in parallel). We therefore don't allow to change the order
2139 of the computation in the inner-loop during outer-loop vectorization. */
2141 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
2142 && !nested_in_vect_loop_p (vect_loop, stmt))
2143 ordered_p = false;
2144 else
2145 ordered_p = true;
2147 if (!ordered_p
2148 && code == WIDEN_MULT_EXPR
2149 && targetm.vectorize.builtin_mul_widen_even
2150 && targetm.vectorize.builtin_mul_widen_even (vectype)
2151 && targetm.vectorize.builtin_mul_widen_odd
2152 && targetm.vectorize.builtin_mul_widen_odd (vectype))
2154 if (vect_print_dump_info (REPORT_DETAILS))
2155 fprintf (vect_dump, "Unordered widening operation detected.");
2157 *code1 = *code2 = CALL_EXPR;
2158 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
2159 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
2160 return true;
2163 switch (code)
2165 case WIDEN_MULT_EXPR:
2166 if (BYTES_BIG_ENDIAN)
2168 c1 = VEC_WIDEN_MULT_HI_EXPR;
2169 c2 = VEC_WIDEN_MULT_LO_EXPR;
2171 else
2173 c2 = VEC_WIDEN_MULT_HI_EXPR;
2174 c1 = VEC_WIDEN_MULT_LO_EXPR;
2176 break;
2178 CASE_CONVERT:
2179 if (BYTES_BIG_ENDIAN)
2181 c1 = VEC_UNPACK_HI_EXPR;
2182 c2 = VEC_UNPACK_LO_EXPR;
2184 else
2186 c2 = VEC_UNPACK_HI_EXPR;
2187 c1 = VEC_UNPACK_LO_EXPR;
2189 break;
2191 case FLOAT_EXPR:
2192 if (BYTES_BIG_ENDIAN)
2194 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
2195 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
2197 else
2199 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
2200 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
2202 break;
2204 case FIX_TRUNC_EXPR:
2205 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
2206 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
2207 computing the operation. */
2208 return false;
2210 default:
2211 gcc_unreachable ();
2214 if (code == FIX_TRUNC_EXPR)
2216 /* The signedness is determined from output operand. */
2217 optab1 = optab_for_tree_code (c1, type, optab_default);
2218 optab2 = optab_for_tree_code (c2, type, optab_default);
2220 else
2222 optab1 = optab_for_tree_code (c1, vectype, optab_default);
2223 optab2 = optab_for_tree_code (c2, vectype, optab_default);
2226 if (!optab1 || !optab2)
2227 return false;
2229 vec_mode = TYPE_MODE (vectype);
2230 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
2231 || insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
2232 || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
2233 == CODE_FOR_nothing
2234 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
2235 return false;
2237 *code1 = c1;
2238 *code2 = c2;
2239 return true;
2243 /* Function supportable_narrowing_operation
2245 Check whether an operation represented by the code CODE is a
2246 narrowing operation that is supported by the target platform in
2247 vector form (i.e., when operating on arguments of type VECTYPE).
2249 Narrowing operations we currently support are NOP (CONVERT) and
2250 FIX_TRUNC. This function checks if these operations are supported by
2251 the target platform directly via vector tree-codes.
2253 Output:
2254 - CODE1 is the code of a vector operation to be used when
2255 vectorizing the operation, if available. */
2257 bool
2258 supportable_narrowing_operation (enum tree_code code,
2259 const_tree stmt, const_tree vectype,
2260 enum tree_code *code1)
2262 enum machine_mode vec_mode;
2263 enum insn_code icode1;
2264 optab optab1;
2265 tree expr = GIMPLE_STMT_OPERAND (stmt, 1);
2266 tree type = TREE_TYPE (expr);
2267 tree narrow_vectype = get_vectype_for_scalar_type (type);
2268 enum tree_code c1;
2270 switch (code)
2272 CASE_CONVERT:
2273 c1 = VEC_PACK_TRUNC_EXPR;
2274 break;
2276 case FIX_TRUNC_EXPR:
2277 c1 = VEC_PACK_FIX_TRUNC_EXPR;
2278 break;
2280 case FLOAT_EXPR:
2281 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
2282 tree code and optabs used for computing the operation. */
2283 return false;
2285 default:
2286 gcc_unreachable ();
2289 if (code == FIX_TRUNC_EXPR)
2290 /* The signedness is determined from output operand. */
2291 optab1 = optab_for_tree_code (c1, type, optab_default);
2292 else
2293 optab1 = optab_for_tree_code (c1, vectype, optab_default);
2295 if (!optab1)
2296 return false;
2298 vec_mode = TYPE_MODE (vectype);
2299 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
2300 || insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
2301 return false;
2303 *code1 = c1;
2304 return true;
2308 /* Function reduction_code_for_scalar_code
2310 Input:
2311 CODE - tree_code of a reduction operations.
2313 Output:
2314 REDUC_CODE - the corresponding tree-code to be used to reduce the
2315 vector of partial results into a single scalar result (which
2316 will also reside in a vector).
2318 Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise. */
2320 bool
2321 reduction_code_for_scalar_code (enum tree_code code,
2322 enum tree_code *reduc_code)
2324 switch (code)
2326 case MAX_EXPR:
2327 *reduc_code = REDUC_MAX_EXPR;
2328 return true;
2330 case MIN_EXPR:
2331 *reduc_code = REDUC_MIN_EXPR;
2332 return true;
2334 case PLUS_EXPR:
2335 *reduc_code = REDUC_PLUS_EXPR;
2336 return true;
2338 default:
2339 return false;
2344 /* Function vect_is_simple_reduction
2346 Detect a cross-iteration def-use cycle that represents a simple
2347 reduction computation. We look for the following pattern:
2349 loop_header:
2350 a1 = phi < a0, a2 >
2351 a3 = ...
2352 a2 = operation (a3, a1)
2354 such that:
2355 1. operation is commutative and associative and it is safe to
2356 change the order of the computation.
2357 2. no uses for a2 in the loop (a2 is used out of the loop)
2358 3. no uses of a1 in the loop besides the reduction operation.
2360 Condition 1 is tested here.
2361 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. */
2363 tree
2364 vect_is_simple_reduction (loop_vec_info loop_info, tree phi)
2366 struct loop *loop = (bb_for_stmt (phi))->loop_father;
2367 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2368 edge latch_e = loop_latch_edge (loop);
2369 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2370 tree def_stmt, def1, def2;
2371 enum tree_code code;
2372 int op_type;
2373 tree operation, op1, op2;
2374 tree type;
2375 int nloop_uses;
2376 tree name;
2377 imm_use_iterator imm_iter;
2378 use_operand_p use_p;
2380 gcc_assert (loop == vect_loop || flow_loop_nested_p (vect_loop, loop));
2382 name = PHI_RESULT (phi);
2383 nloop_uses = 0;
2384 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2386 tree use_stmt = USE_STMT (use_p);
2387 if (flow_bb_inside_loop_p (loop, bb_for_stmt (use_stmt))
2388 && vinfo_for_stmt (use_stmt)
2389 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2390 nloop_uses++;
2391 if (nloop_uses > 1)
2393 if (vect_print_dump_info (REPORT_DETAILS))
2394 fprintf (vect_dump, "reduction used in loop.");
2395 return NULL_TREE;
2399 if (TREE_CODE (loop_arg) != SSA_NAME)
2401 if (vect_print_dump_info (REPORT_DETAILS))
2403 fprintf (vect_dump, "reduction: not ssa_name: ");
2404 print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
2406 return NULL_TREE;
2409 def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2410 if (!def_stmt)
2412 if (vect_print_dump_info (REPORT_DETAILS))
2413 fprintf (vect_dump, "reduction: no def_stmt.");
2414 return NULL_TREE;
2417 if (TREE_CODE (def_stmt) != GIMPLE_MODIFY_STMT)
2419 if (vect_print_dump_info (REPORT_DETAILS))
2420 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2421 return NULL_TREE;
2424 name = GIMPLE_STMT_OPERAND (def_stmt, 0);
2425 nloop_uses = 0;
2426 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2428 tree use_stmt = USE_STMT (use_p);
2429 if (flow_bb_inside_loop_p (loop, bb_for_stmt (use_stmt))
2430 && vinfo_for_stmt (use_stmt)
2431 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2432 nloop_uses++;
2433 if (nloop_uses > 1)
2435 if (vect_print_dump_info (REPORT_DETAILS))
2436 fprintf (vect_dump, "reduction used in loop.");
2437 return NULL_TREE;
2441 operation = GIMPLE_STMT_OPERAND (def_stmt, 1);
2442 code = TREE_CODE (operation);
2443 if (!commutative_tree_code (code) || !associative_tree_code (code))
2445 if (vect_print_dump_info (REPORT_DETAILS))
2447 fprintf (vect_dump, "reduction: not commutative/associative: ");
2448 print_generic_expr (vect_dump, operation, TDF_SLIM);
2450 return NULL_TREE;
2453 op_type = TREE_OPERAND_LENGTH (operation);
2454 if (op_type != binary_op)
2456 if (vect_print_dump_info (REPORT_DETAILS))
2458 fprintf (vect_dump, "reduction: not binary operation: ");
2459 print_generic_expr (vect_dump, operation, TDF_SLIM);
2461 return NULL_TREE;
2464 op1 = TREE_OPERAND (operation, 0);
2465 op2 = TREE_OPERAND (operation, 1);
2466 if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
2468 if (vect_print_dump_info (REPORT_DETAILS))
2470 fprintf (vect_dump, "reduction: uses not ssa_names: ");
2471 print_generic_expr (vect_dump, operation, TDF_SLIM);
2473 return NULL_TREE;
2476 /* Check that it's ok to change the order of the computation. */
2477 type = TREE_TYPE (operation);
2478 if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))
2479 || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
2481 if (vect_print_dump_info (REPORT_DETAILS))
2483 fprintf (vect_dump, "reduction: multiple types: operation type: ");
2484 print_generic_expr (vect_dump, type, TDF_SLIM);
2485 fprintf (vect_dump, ", operands types: ");
2486 print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
2487 fprintf (vect_dump, ",");
2488 print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
2490 return NULL_TREE;
2493 /* Generally, when vectorizing a reduction we change the order of the
2494 computation. This may change the behavior of the program in some
2495 cases, so we need to check that this is ok. One exception is when
2496 vectorizing an outer-loop: the inner-loop is executed sequentially,
2497 and therefore vectorizing reductions in the inner-loop during
2498 outer-loop vectorization is safe. */
2500 /* CHECKME: check for !flag_finite_math_only too? */
2501 if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2502 && !nested_in_vect_loop_p (vect_loop, def_stmt))
2504 /* Changing the order of operations changes the semantics. */
2505 if (vect_print_dump_info (REPORT_DETAILS))
2507 fprintf (vect_dump, "reduction: unsafe fp math optimization: ");
2508 print_generic_expr (vect_dump, operation, TDF_SLIM);
2510 return NULL_TREE;
2512 else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
2513 && !nested_in_vect_loop_p (vect_loop, def_stmt))
2515 /* Changing the order of operations changes the semantics. */
2516 if (vect_print_dump_info (REPORT_DETAILS))
2518 fprintf (vect_dump, "reduction: unsafe int math optimization: ");
2519 print_generic_expr (vect_dump, operation, TDF_SLIM);
2521 return NULL_TREE;
2523 else if (SAT_FIXED_POINT_TYPE_P (type))
2525 /* Changing the order of operations changes the semantics. */
2526 if (vect_print_dump_info (REPORT_DETAILS))
2528 fprintf (vect_dump, "reduction: unsafe fixed-point math optimization: ");
2529 print_generic_expr (vect_dump, operation, TDF_SLIM);
2531 return NULL_TREE;
2534 /* reduction is safe. we're dealing with one of the following:
2535 1) integer arithmetic and no trapv
2536 2) floating point arithmetic, and special flags permit this optimization.
2538 def1 = SSA_NAME_DEF_STMT (op1);
2539 def2 = SSA_NAME_DEF_STMT (op2);
2540 if (!def1 || !def2 || IS_EMPTY_STMT (def1) || IS_EMPTY_STMT (def2))
2542 if (vect_print_dump_info (REPORT_DETAILS))
2544 fprintf (vect_dump, "reduction: no defs for operands: ");
2545 print_generic_expr (vect_dump, operation, TDF_SLIM);
2547 return NULL_TREE;
2551 /* Check that one def is the reduction def, defined by PHI,
2552 the other def is either defined in the loop ("vect_loop_def"),
2553 or it's an induction (defined by a loop-header phi-node). */
2555 if (def2 == phi
2556 && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
2557 && (TREE_CODE (def1) == GIMPLE_MODIFY_STMT
2558 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def
2559 || (TREE_CODE (def1) == PHI_NODE
2560 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_loop_def
2561 && !is_loop_header_bb_p (bb_for_stmt (def1)))))
2563 if (vect_print_dump_info (REPORT_DETAILS))
2565 fprintf (vect_dump, "detected reduction:");
2566 print_generic_expr (vect_dump, operation, TDF_SLIM);
2568 return def_stmt;
2570 else if (def1 == phi
2571 && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
2572 && (TREE_CODE (def2) == GIMPLE_MODIFY_STMT
2573 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def
2574 || (TREE_CODE (def2) == PHI_NODE
2575 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_loop_def
2576 && !is_loop_header_bb_p (bb_for_stmt (def2)))))
2578 /* Swap operands (just for simplicity - so that the rest of the code
2579 can assume that the reduction variable is always the last (second)
2580 argument). */
2581 if (vect_print_dump_info (REPORT_DETAILS))
2583 fprintf (vect_dump, "detected reduction: need to swap operands:");
2584 print_generic_expr (vect_dump, operation, TDF_SLIM);
2586 swap_tree_operands (def_stmt, &TREE_OPERAND (operation, 0),
2587 &TREE_OPERAND (operation, 1));
2588 return def_stmt;
2590 else
2592 if (vect_print_dump_info (REPORT_DETAILS))
2594 fprintf (vect_dump, "reduction: unknown pattern.");
2595 print_generic_expr (vect_dump, operation, TDF_SLIM);
2597 return NULL_TREE;
2602 /* Function vect_is_simple_iv_evolution.
2604 FORNOW: A simple evolution of an induction variables in the loop is
2605 considered a polynomial evolution with constant step. */
2607 bool
2608 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
2609 tree * step)
2611 tree init_expr;
2612 tree step_expr;
2613 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
2615 /* When there is no evolution in this loop, the evolution function
2616 is not "simple". */
2617 if (evolution_part == NULL_TREE)
2618 return false;
2620 /* When the evolution is a polynomial of degree >= 2
2621 the evolution function is not "simple". */
2622 if (tree_is_chrec (evolution_part))
2623 return false;
2625 step_expr = evolution_part;
2626 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
2628 if (vect_print_dump_info (REPORT_DETAILS))
2630 fprintf (vect_dump, "step: ");
2631 print_generic_expr (vect_dump, step_expr, TDF_SLIM);
2632 fprintf (vect_dump, ", init: ");
2633 print_generic_expr (vect_dump, init_expr, TDF_SLIM);
2636 *init = init_expr;
2637 *step = step_expr;
2639 if (TREE_CODE (step_expr) != INTEGER_CST)
2641 if (vect_print_dump_info (REPORT_DETAILS))
2642 fprintf (vect_dump, "step unknown.");
2643 return false;
2646 return true;
2650 /* Function vectorize_loops.
2652 Entry Point to loop vectorization phase. */
2654 unsigned
2655 vectorize_loops (void)
2657 unsigned int i;
2658 unsigned int num_vectorized_loops = 0;
2659 unsigned int vect_loops_num;
2660 loop_iterator li;
2661 struct loop *loop;
2663 vect_loops_num = number_of_loops ();
2665 /* Bail out if there are no loops. */
2666 if (vect_loops_num <= 1)
2667 return 0;
2669 /* Fix the verbosity level if not defined explicitly by the user. */
2670 vect_set_dump_settings ();
2672 /* Allocate the bitmap that records which virtual variables that
2673 need to be renamed. */
2674 vect_memsyms_to_rename = BITMAP_ALLOC (NULL);
2676 /* ----------- Analyze loops. ----------- */
2678 /* If some loop was duplicated, it gets bigger number
2679 than all previously defined loops. This fact allows us to run
2680 only over initial loops skipping newly generated ones. */
2681 FOR_EACH_LOOP (li, loop, 0)
2683 loop_vec_info loop_vinfo;
2685 vect_loop_location = find_loop_location (loop);
2686 loop_vinfo = vect_analyze_loop (loop);
2687 loop->aux = loop_vinfo;
2689 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
2690 continue;
2692 vect_transform_loop (loop_vinfo);
2693 num_vectorized_loops++;
2695 vect_loop_location = UNKNOWN_LOC;
2697 statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
2698 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)
2699 || (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
2700 && num_vectorized_loops > 0))
2701 fprintf (vect_dump, "vectorized %u loops in function.\n",
2702 num_vectorized_loops);
2704 /* ----------- Finalize. ----------- */
2706 BITMAP_FREE (vect_memsyms_to_rename);
2708 for (i = 1; i < vect_loops_num; i++)
2710 loop_vec_info loop_vinfo;
2712 loop = get_loop (i);
2713 if (!loop)
2714 continue;
2715 loop_vinfo = loop->aux;
2716 destroy_loop_vec_info (loop_vinfo, true);
2717 loop->aux = NULL;
2720 return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0;
2723 /* Increase alignment of global arrays to improve vectorization potential.
2724 TODO:
2725 - Consider also structs that have an array field.
2726 - Use ipa analysis to prune arrays that can't be vectorized?
2727 This should involve global alignment analysis and in the future also
2728 array padding. */
2730 static unsigned int
2731 increase_alignment (void)
2733 struct varpool_node *vnode;
2735 /* Increase the alignment of all global arrays for vectorization. */
2736 for (vnode = varpool_nodes_queue;
2737 vnode;
2738 vnode = vnode->next_needed)
2740 tree vectype, decl = vnode->decl;
2741 unsigned int alignment;
2743 if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE)
2744 continue;
2745 vectype = get_vectype_for_scalar_type (TREE_TYPE (TREE_TYPE (decl)));
2746 if (!vectype)
2747 continue;
2748 alignment = TYPE_ALIGN (vectype);
2749 if (DECL_ALIGN (decl) >= alignment)
2750 continue;
2752 if (vect_can_force_dr_alignment_p (decl, alignment))
2754 DECL_ALIGN (decl) = TYPE_ALIGN (vectype);
2755 DECL_USER_ALIGN (decl) = 1;
2756 if (dump_file)
2758 fprintf (dump_file, "Increasing alignment of decl: ");
2759 print_generic_expr (dump_file, decl, TDF_SLIM);
2763 return 0;
2766 static bool
2767 gate_increase_alignment (void)
2769 return flag_section_anchors && flag_tree_vectorize;
2772 struct simple_ipa_opt_pass pass_ipa_increase_alignment =
2775 SIMPLE_IPA_PASS,
2776 "increase_alignment", /* name */
2777 gate_increase_alignment, /* gate */
2778 increase_alignment, /* execute */
2779 NULL, /* sub */
2780 NULL, /* next */
2781 0, /* static_pass_number */
2782 0, /* tv_id */
2783 0, /* properties_required */
2784 0, /* properties_provided */
2785 0, /* properties_destroyed */
2786 0, /* todo_flags_start */
2787 0 /* todo_flags_finish */