20090811-1.c: Skip for incompatible options, do not override other options.
[official-gcc.git] / gcc / tree-vect-loop.c
blob8ff5ad04b33a00fe3c2aad67debd002ddca8979e
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com> and
5 Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "basic-block.h"
30 #include "tree-pretty-print.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "params.h"
40 #include "diagnostic-core.h"
41 #include "tree-chrec.h"
42 #include "tree-scalar-evolution.h"
43 #include "tree-vectorizer.h"
44 #include "target.h"
46 /* Loop Vectorization Pass.
48 This pass tries to vectorize loops.
50 For example, the vectorizer transforms the following simple loop:
52 short a[N]; short b[N]; short c[N]; int i;
54 for (i=0; i<N; i++){
55 a[i] = b[i] + c[i];
58 as if it was manually vectorized by rewriting the source code into:
60 typedef int __attribute__((mode(V8HI))) v8hi;
61 short a[N]; short b[N]; short c[N]; int i;
62 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
63 v8hi va, vb, vc;
65 for (i=0; i<N/8; i++){
66 vb = pb[i];
67 vc = pc[i];
68 va = vb + vc;
69 pa[i] = va;
72 The main entry to this pass is vectorize_loops(), in which
73 the vectorizer applies a set of analyses on a given set of loops,
74 followed by the actual vectorization transformation for the loops that
75 had successfully passed the analysis phase.
76 Throughout this pass we make a distinction between two types of
77 data: scalars (which are represented by SSA_NAMES), and memory references
78 ("data-refs"). These two types of data require different handling both
79 during analysis and transformation. The types of data-refs that the
80 vectorizer currently supports are ARRAY_REFS which base is an array DECL
81 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
82 accesses are required to have a simple (consecutive) access pattern.
84 Analysis phase:
85 ===============
86 The driver for the analysis phase is vect_analyze_loop().
87 It applies a set of analyses, some of which rely on the scalar evolution
88 analyzer (scev) developed by Sebastian Pop.
90 During the analysis phase the vectorizer records some information
91 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
92 loop, as well as general information about the loop as a whole, which is
93 recorded in a "loop_vec_info" struct attached to each loop.
95 Transformation phase:
96 =====================
97 The loop transformation phase scans all the stmts in the loop, and
98 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
99 the loop that needs to be vectorized. It inserts the vector code sequence
100 just before the scalar stmt S, and records a pointer to the vector code
101 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
102 attached to S). This pointer will be used for the vectorization of following
103 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
104 otherwise, we rely on dead code elimination for removing it.
106 For example, say stmt S1 was vectorized into stmt VS1:
108 VS1: vb = px[i];
109 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
110 S2: a = b;
112 To vectorize stmt S2, the vectorizer first finds the stmt that defines
113 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
114 vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
115 resulting sequence would be:
117 VS1: vb = px[i];
118 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
119 VS2: va = vb;
120 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
122 Operands that are not SSA_NAMEs, are data-refs that appear in
123 load/store operations (like 'x[i]' in S1), and are handled differently.
125 Target modeling:
126 =================
127 Currently the only target specific information that is used is the
128 size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
129 Targets that can support different sizes of vectors, for now will need
130 to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
131 flexibility will be added in the future.
133 Since we only vectorize operations which vector form can be
134 expressed using existing tree codes, to verify that an operation is
135 supported, the vectorizer checks the relevant optab at the relevant
136 machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
137 the value found is CODE_FOR_nothing, then there's no target support, and
138 we can't vectorize the stmt.
140 For additional information on this project see:
141 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
144 /* Function vect_determine_vectorization_factor
146 Determine the vectorization factor (VF). VF is the number of data elements
147 that are operated upon in parallel in a single iteration of the vectorized
148 loop. For example, when vectorizing a loop that operates on 4byte elements,
149 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
150 elements can fit in a single vector register.
152 We currently support vectorization of loops in which all types operated upon
153 are of the same size. Therefore this function currently sets VF according to
154 the size of the types operated upon, and fails if there are multiple sizes
155 in the loop.
157 VF is also the factor by which the loop iterations are strip-mined, e.g.:
158 original loop:
159 for (i=0; i<N; i++){
160 a[i] = b[i] + c[i];
163 vectorized loop:
164 for (i=0; i<N; i+=VF){
165 a[i:VF] = b[i:VF] + c[i:VF];
169 static bool
170 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
172 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
173 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
174 int nbbs = loop->num_nodes;
175 gimple_stmt_iterator si;
176 unsigned int vectorization_factor = 0;
177 tree scalar_type;
178 gimple phi;
179 tree vectype;
180 unsigned int nunits;
181 stmt_vec_info stmt_info;
182 int i;
183 HOST_WIDE_INT dummy;
185 if (vect_print_dump_info (REPORT_DETAILS))
186 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
188 for (i = 0; i < nbbs; i++)
190 basic_block bb = bbs[i];
192 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
194 phi = gsi_stmt (si);
195 stmt_info = vinfo_for_stmt (phi);
196 if (vect_print_dump_info (REPORT_DETAILS))
198 fprintf (vect_dump, "==> examining phi: ");
199 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
202 gcc_assert (stmt_info);
204 if (STMT_VINFO_RELEVANT_P (stmt_info))
206 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
207 scalar_type = TREE_TYPE (PHI_RESULT (phi));
209 if (vect_print_dump_info (REPORT_DETAILS))
211 fprintf (vect_dump, "get vectype for scalar type: ");
212 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
215 vectype = get_vectype_for_scalar_type (scalar_type);
216 if (!vectype)
218 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
220 fprintf (vect_dump,
221 "not vectorized: unsupported data-type ");
222 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
224 return false;
226 STMT_VINFO_VECTYPE (stmt_info) = vectype;
228 if (vect_print_dump_info (REPORT_DETAILS))
230 fprintf (vect_dump, "vectype: ");
231 print_generic_expr (vect_dump, vectype, TDF_SLIM);
234 nunits = TYPE_VECTOR_SUBPARTS (vectype);
235 if (vect_print_dump_info (REPORT_DETAILS))
236 fprintf (vect_dump, "nunits = %d", nunits);
238 if (!vectorization_factor
239 || (nunits > vectorization_factor))
240 vectorization_factor = nunits;
244 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
246 tree vf_vectype;
247 gimple stmt = gsi_stmt (si);
248 stmt_info = vinfo_for_stmt (stmt);
250 if (vect_print_dump_info (REPORT_DETAILS))
252 fprintf (vect_dump, "==> examining statement: ");
253 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
256 gcc_assert (stmt_info);
258 /* Skip stmts which do not need to be vectorized. */
259 if (!STMT_VINFO_RELEVANT_P (stmt_info)
260 && !STMT_VINFO_LIVE_P (stmt_info))
262 if (is_pattern_stmt_p (stmt_info))
264 /* We are not going to vectorize this pattern statement,
265 therefore, remove it. */
266 gimple_stmt_iterator tmp_gsi = gsi_for_stmt (stmt);
267 STMT_VINFO_RELATED_STMT (stmt_info) = NULL;
268 gsi_remove (&tmp_gsi, true);
269 free_stmt_vec_info (stmt);
272 if (vect_print_dump_info (REPORT_DETAILS))
273 fprintf (vect_dump, "skip.");
274 continue;
277 if (gimple_get_lhs (stmt) == NULL_TREE)
279 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
281 fprintf (vect_dump, "not vectorized: irregular stmt.");
282 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
284 return false;
287 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
289 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
291 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
292 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
294 return false;
297 if (STMT_VINFO_VECTYPE (stmt_info))
299 /* The only case when a vectype had been already set is for stmts
300 that contain a dataref, or for "pattern-stmts" (stmts generated
301 by the vectorizer to represent/replace a certain idiom). */
302 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
303 || is_pattern_stmt_p (stmt_info));
304 vectype = STMT_VINFO_VECTYPE (stmt_info);
306 else
308 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)
309 && !is_pattern_stmt_p (stmt_info));
311 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
312 if (vect_print_dump_info (REPORT_DETAILS))
314 fprintf (vect_dump, "get vectype for scalar type: ");
315 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
317 vectype = get_vectype_for_scalar_type (scalar_type);
318 if (!vectype)
320 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
322 fprintf (vect_dump,
323 "not vectorized: unsupported data-type ");
324 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
326 return false;
329 STMT_VINFO_VECTYPE (stmt_info) = vectype;
332 /* The vectorization factor is according to the smallest
333 scalar type (or the largest vector size, but we only
334 support one vector size per loop). */
335 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
336 &dummy);
337 if (vect_print_dump_info (REPORT_DETAILS))
339 fprintf (vect_dump, "get vectype for scalar type: ");
340 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
342 vf_vectype = get_vectype_for_scalar_type (scalar_type);
343 if (!vf_vectype)
345 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
347 fprintf (vect_dump,
348 "not vectorized: unsupported data-type ");
349 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
351 return false;
354 if ((GET_MODE_SIZE (TYPE_MODE (vectype))
355 != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
357 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
359 fprintf (vect_dump,
360 "not vectorized: different sized vector "
361 "types in statement, ");
362 print_generic_expr (vect_dump, vectype, TDF_SLIM);
363 fprintf (vect_dump, " and ");
364 print_generic_expr (vect_dump, vf_vectype, TDF_SLIM);
366 return false;
369 if (vect_print_dump_info (REPORT_DETAILS))
371 fprintf (vect_dump, "vectype: ");
372 print_generic_expr (vect_dump, vf_vectype, TDF_SLIM);
375 nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
376 if (vect_print_dump_info (REPORT_DETAILS))
377 fprintf (vect_dump, "nunits = %d", nunits);
379 if (!vectorization_factor
380 || (nunits > vectorization_factor))
381 vectorization_factor = nunits;
385 /* TODO: Analyze cost. Decide if worth while to vectorize. */
386 if (vect_print_dump_info (REPORT_DETAILS))
387 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
388 if (vectorization_factor <= 1)
390 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
391 fprintf (vect_dump, "not vectorized: unsupported data-type");
392 return false;
394 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
396 return true;
400 /* Function vect_is_simple_iv_evolution.
402 FORNOW: A simple evolution of an induction variables in the loop is
403 considered a polynomial evolution with constant step. */
405 static bool
406 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
407 tree * step)
409 tree init_expr;
410 tree step_expr;
411 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
413 /* When there is no evolution in this loop, the evolution function
414 is not "simple". */
415 if (evolution_part == NULL_TREE)
416 return false;
418 /* When the evolution is a polynomial of degree >= 2
419 the evolution function is not "simple". */
420 if (tree_is_chrec (evolution_part))
421 return false;
423 step_expr = evolution_part;
424 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
426 if (vect_print_dump_info (REPORT_DETAILS))
428 fprintf (vect_dump, "step: ");
429 print_generic_expr (vect_dump, step_expr, TDF_SLIM);
430 fprintf (vect_dump, ", init: ");
431 print_generic_expr (vect_dump, init_expr, TDF_SLIM);
434 *init = init_expr;
435 *step = step_expr;
437 if (TREE_CODE (step_expr) != INTEGER_CST)
439 if (vect_print_dump_info (REPORT_DETAILS))
440 fprintf (vect_dump, "step unknown.");
441 return false;
444 return true;
447 /* Function vect_analyze_scalar_cycles_1.
449 Examine the cross iteration def-use cycles of scalar variables
450 in LOOP. LOOP_VINFO represents the loop that is now being
451 considered for vectorization (can be LOOP, or an outer-loop
452 enclosing LOOP). */
454 static void
455 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
457 basic_block bb = loop->header;
458 tree dumy;
459 VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64);
460 gimple_stmt_iterator gsi;
461 bool double_reduc;
463 if (vect_print_dump_info (REPORT_DETAILS))
464 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
466 /* First - identify all inductions. Reduction detection assumes that all the
467 inductions have been identified, therefore, this order must not be
468 changed. */
469 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
471 gimple phi = gsi_stmt (gsi);
472 tree access_fn = NULL;
473 tree def = PHI_RESULT (phi);
474 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
476 if (vect_print_dump_info (REPORT_DETAILS))
478 fprintf (vect_dump, "Analyze phi: ");
479 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
482 /* Skip virtual phi's. The data dependences that are associated with
483 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
484 if (!is_gimple_reg (SSA_NAME_VAR (def)))
485 continue;
487 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
489 /* Analyze the evolution function. */
490 access_fn = analyze_scalar_evolution (loop, def);
491 if (access_fn)
492 STRIP_NOPS (access_fn);
493 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
495 fprintf (vect_dump, "Access function of PHI: ");
496 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
499 if (!access_fn
500 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
502 VEC_safe_push (gimple, heap, worklist, phi);
503 continue;
506 if (vect_print_dump_info (REPORT_DETAILS))
507 fprintf (vect_dump, "Detected induction.");
508 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
512 /* Second - identify all reductions and nested cycles. */
513 while (VEC_length (gimple, worklist) > 0)
515 gimple phi = VEC_pop (gimple, worklist);
516 tree def = PHI_RESULT (phi);
517 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
518 gimple reduc_stmt;
519 bool nested_cycle;
521 if (vect_print_dump_info (REPORT_DETAILS))
523 fprintf (vect_dump, "Analyze phi: ");
524 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
527 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
528 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
530 nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
531 reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
532 &double_reduc);
533 if (reduc_stmt)
535 if (double_reduc)
537 if (vect_print_dump_info (REPORT_DETAILS))
538 fprintf (vect_dump, "Detected double reduction.");
540 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
541 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
542 vect_double_reduction_def;
544 else
546 if (nested_cycle)
548 if (vect_print_dump_info (REPORT_DETAILS))
549 fprintf (vect_dump, "Detected vectorizable nested cycle.");
551 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
552 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
553 vect_nested_cycle;
555 else
557 if (vect_print_dump_info (REPORT_DETAILS))
558 fprintf (vect_dump, "Detected reduction.");
560 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
561 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
562 vect_reduction_def;
563 /* Store the reduction cycles for possible vectorization in
564 loop-aware SLP. */
565 VEC_safe_push (gimple, heap,
566 LOOP_VINFO_REDUCTIONS (loop_vinfo),
567 reduc_stmt);
571 else
572 if (vect_print_dump_info (REPORT_DETAILS))
573 fprintf (vect_dump, "Unknown def-use cycle pattern.");
576 VEC_free (gimple, heap, worklist);
580 /* Function vect_analyze_scalar_cycles.
582 Examine the cross iteration def-use cycles of scalar variables, by
583 analyzing the loop-header PHIs of scalar variables. Classify each
584 cycle as one of the following: invariant, induction, reduction, unknown.
585 We do that for the loop represented by LOOP_VINFO, and also to its
586 inner-loop, if exists.
587 Examples for scalar cycles:
589 Example1: reduction:
591 loop1:
592 for (i=0; i<N; i++)
593 sum += a[i];
595 Example2: induction:
597 loop2:
598 for (i=0; i<N; i++)
599 a[i] = i; */
601 static void
602 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
604 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
606 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
608 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
609 Reductions in such inner-loop therefore have different properties than
610 the reductions in the nest that gets vectorized:
611 1. When vectorized, they are executed in the same order as in the original
612 scalar loop, so we can't change the order of computation when
613 vectorizing them.
614 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
615 current checks are too strict. */
617 if (loop->inner)
618 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
621 /* Function vect_get_loop_niters.
623 Determine how many iterations the loop is executed.
624 If an expression that represents the number of iterations
625 can be constructed, place it in NUMBER_OF_ITERATIONS.
626 Return the loop exit condition. */
628 static gimple
629 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
631 tree niters;
633 if (vect_print_dump_info (REPORT_DETAILS))
634 fprintf (vect_dump, "=== get_loop_niters ===");
636 niters = number_of_exit_cond_executions (loop);
638 if (niters != NULL_TREE
639 && niters != chrec_dont_know)
641 *number_of_iterations = niters;
643 if (vect_print_dump_info (REPORT_DETAILS))
645 fprintf (vect_dump, "==> get_loop_niters:" );
646 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
650 return get_loop_exit_condition (loop);
654 /* Function bb_in_loop_p
656 Used as predicate for dfs order traversal of the loop bbs. */
658 static bool
659 bb_in_loop_p (const_basic_block bb, const void *data)
661 const struct loop *const loop = (const struct loop *)data;
662 if (flow_bb_inside_loop_p (loop, bb))
663 return true;
664 return false;
668 /* Function new_loop_vec_info.
670 Create and initialize a new loop_vec_info struct for LOOP, as well as
671 stmt_vec_info structs for all the stmts in LOOP. */
673 static loop_vec_info
674 new_loop_vec_info (struct loop *loop)
676 loop_vec_info res;
677 basic_block *bbs;
678 gimple_stmt_iterator si;
679 unsigned int i, nbbs;
681 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
682 LOOP_VINFO_LOOP (res) = loop;
684 bbs = get_loop_body (loop);
686 /* Create/Update stmt_info for all stmts in the loop. */
687 for (i = 0; i < loop->num_nodes; i++)
689 basic_block bb = bbs[i];
691 /* BBs in a nested inner-loop will have been already processed (because
692 we will have called vect_analyze_loop_form for any nested inner-loop).
693 Therefore, for stmts in an inner-loop we just want to update the
694 STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
695 loop_info of the outer-loop we are currently considering to vectorize
696 (instead of the loop_info of the inner-loop).
697 For stmts in other BBs we need to create a stmt_info from scratch. */
698 if (bb->loop_father != loop)
700 /* Inner-loop bb. */
701 gcc_assert (loop->inner && bb->loop_father == loop->inner);
702 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
704 gimple phi = gsi_stmt (si);
705 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
706 loop_vec_info inner_loop_vinfo =
707 STMT_VINFO_LOOP_VINFO (stmt_info);
708 gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
709 STMT_VINFO_LOOP_VINFO (stmt_info) = res;
711 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
713 gimple stmt = gsi_stmt (si);
714 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
715 loop_vec_info inner_loop_vinfo =
716 STMT_VINFO_LOOP_VINFO (stmt_info);
717 gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
718 STMT_VINFO_LOOP_VINFO (stmt_info) = res;
721 else
723 /* bb in current nest. */
724 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
726 gimple phi = gsi_stmt (si);
727 gimple_set_uid (phi, 0);
728 set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
731 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
733 gimple stmt = gsi_stmt (si);
734 gimple_set_uid (stmt, 0);
735 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
740 /* CHECKME: We want to visit all BBs before their successors (except for
741 latch blocks, for which this assertion wouldn't hold). In the simple
742 case of the loop forms we allow, a dfs order of the BBs would the same
743 as reversed postorder traversal, so we are safe. */
745 free (bbs);
746 bbs = XCNEWVEC (basic_block, loop->num_nodes);
747 nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
748 bbs, loop->num_nodes, loop);
749 gcc_assert (nbbs == loop->num_nodes);
751 LOOP_VINFO_BBS (res) = bbs;
752 LOOP_VINFO_NITERS (res) = NULL;
753 LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
754 LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
755 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
756 LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
757 LOOP_VINFO_VECT_FACTOR (res) = 0;
758 LOOP_VINFO_LOOP_NEST (res) = VEC_alloc (loop_p, heap, 3);
759 LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
760 LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
761 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
762 LOOP_VINFO_MAY_MISALIGN_STMTS (res) =
763 VEC_alloc (gimple, heap,
764 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
765 LOOP_VINFO_MAY_ALIAS_DDRS (res) =
766 VEC_alloc (ddr_p, heap,
767 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
768 LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
769 LOOP_VINFO_REDUCTIONS (res) = VEC_alloc (gimple, heap, 10);
770 LOOP_VINFO_REDUCTION_CHAINS (res) = VEC_alloc (gimple, heap, 10);
771 LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
772 LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
773 LOOP_VINFO_PEELING_HTAB (res) = NULL;
774 LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
776 return res;
780 /* Function destroy_loop_vec_info.
782 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
783 stmts in the loop. */
785 void
786 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
788 struct loop *loop;
789 basic_block *bbs;
790 int nbbs;
791 gimple_stmt_iterator si;
792 int j;
793 VEC (slp_instance, heap) *slp_instances;
794 slp_instance instance;
796 if (!loop_vinfo)
797 return;
799 loop = LOOP_VINFO_LOOP (loop_vinfo);
801 bbs = LOOP_VINFO_BBS (loop_vinfo);
802 nbbs = loop->num_nodes;
804 if (!clean_stmts)
806 free (LOOP_VINFO_BBS (loop_vinfo));
807 free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
808 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
809 VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
810 VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
811 VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
813 free (loop_vinfo);
814 loop->aux = NULL;
815 return;
818 for (j = 0; j < nbbs; j++)
820 basic_block bb = bbs[j];
821 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
822 free_stmt_vec_info (gsi_stmt (si));
824 for (si = gsi_start_bb (bb); !gsi_end_p (si); )
826 gimple stmt = gsi_stmt (si);
827 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
829 if (stmt_info)
831 /* Check if this is a "pattern stmt" (introduced by the
832 vectorizer during the pattern recognition pass). */
833 bool remove_stmt_p = false;
834 gimple orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
835 if (orig_stmt)
837 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
838 if (orig_stmt_info
839 && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
840 remove_stmt_p = true;
843 /* Free stmt_vec_info. */
844 free_stmt_vec_info (stmt);
846 /* Remove dead "pattern stmts". */
847 if (remove_stmt_p)
848 gsi_remove (&si, true);
850 gsi_next (&si);
854 free (LOOP_VINFO_BBS (loop_vinfo));
855 free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
856 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
857 VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
858 VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
859 VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
860 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
861 FOR_EACH_VEC_ELT (slp_instance, slp_instances, j, instance)
862 vect_free_slp_instance (instance);
864 VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
865 VEC_free (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
866 VEC_free (gimple, heap, LOOP_VINFO_REDUCTIONS (loop_vinfo));
867 VEC_free (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo));
869 if (LOOP_VINFO_PEELING_HTAB (loop_vinfo))
870 htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo));
872 free (loop_vinfo);
873 loop->aux = NULL;
877 /* Function vect_analyze_loop_1.
879 Apply a set of analyses on LOOP, and create a loop_vec_info struct
880 for it. The different analyses will record information in the
881 loop_vec_info struct. This is a subset of the analyses applied in
882 vect_analyze_loop, to be applied on an inner-loop nested in the loop
883 that is now considered for (outer-loop) vectorization. */
885 static loop_vec_info
886 vect_analyze_loop_1 (struct loop *loop)
888 loop_vec_info loop_vinfo;
890 if (vect_print_dump_info (REPORT_DETAILS))
891 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
893 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
895 loop_vinfo = vect_analyze_loop_form (loop);
896 if (!loop_vinfo)
898 if (vect_print_dump_info (REPORT_DETAILS))
899 fprintf (vect_dump, "bad inner-loop form.");
900 return NULL;
903 return loop_vinfo;
907 /* Function vect_analyze_loop_form.
909 Verify that certain CFG restrictions hold, including:
910 - the loop has a pre-header
911 - the loop has a single entry and exit
912 - the loop exit condition is simple enough, and the number of iterations
913 can be analyzed (a countable loop). */
915 loop_vec_info
916 vect_analyze_loop_form (struct loop *loop)
918 loop_vec_info loop_vinfo;
919 gimple loop_cond;
920 tree number_of_iterations = NULL;
921 loop_vec_info inner_loop_vinfo = NULL;
923 if (vect_print_dump_info (REPORT_DETAILS))
924 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
926 /* Different restrictions apply when we are considering an inner-most loop,
927 vs. an outer (nested) loop.
928 (FORNOW. May want to relax some of these restrictions in the future). */
930 if (!loop->inner)
932 /* Inner-most loop. We currently require that the number of BBs is
933 exactly 2 (the header and latch). Vectorizable inner-most loops
934 look like this:
936 (pre-header)
938 header <--------+
939 | | |
940 | +--> latch --+
942 (exit-bb) */
944 if (loop->num_nodes != 2)
946 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
947 fprintf (vect_dump, "not vectorized: control flow in loop.");
948 return NULL;
951 if (empty_block_p (loop->header))
953 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
954 fprintf (vect_dump, "not vectorized: empty loop.");
955 return NULL;
958 else
960 struct loop *innerloop = loop->inner;
961 edge entryedge;
963 /* Nested loop. We currently require that the loop is doubly-nested,
964 contains a single inner loop, and the number of BBs is exactly 5.
965 Vectorizable outer-loops look like this:
967 (pre-header)
969 header <---+
971 inner-loop |
973 tail ------+
975 (exit-bb)
977 The inner-loop has the properties expected of inner-most loops
978 as described above. */
980 if ((loop->inner)->inner || (loop->inner)->next)
982 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
983 fprintf (vect_dump, "not vectorized: multiple nested loops.");
984 return NULL;
987 /* Analyze the inner-loop. */
988 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
989 if (!inner_loop_vinfo)
991 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
992 fprintf (vect_dump, "not vectorized: Bad inner loop.");
993 return NULL;
996 if (!expr_invariant_in_loop_p (loop,
997 LOOP_VINFO_NITERS (inner_loop_vinfo)))
999 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1000 fprintf (vect_dump,
1001 "not vectorized: inner-loop count not invariant.");
1002 destroy_loop_vec_info (inner_loop_vinfo, true);
1003 return NULL;
1006 if (loop->num_nodes != 5)
1008 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1009 fprintf (vect_dump, "not vectorized: control flow in loop.");
1010 destroy_loop_vec_info (inner_loop_vinfo, true);
1011 return NULL;
1014 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
1015 entryedge = EDGE_PRED (innerloop->header, 0);
1016 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
1017 entryedge = EDGE_PRED (innerloop->header, 1);
1019 if (entryedge->src != loop->header
1020 || !single_exit (innerloop)
1021 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
1023 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1024 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
1025 destroy_loop_vec_info (inner_loop_vinfo, true);
1026 return NULL;
1029 if (vect_print_dump_info (REPORT_DETAILS))
1030 fprintf (vect_dump, "Considering outer-loop vectorization.");
1033 if (!single_exit (loop)
1034 || EDGE_COUNT (loop->header->preds) != 2)
1036 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1038 if (!single_exit (loop))
1039 fprintf (vect_dump, "not vectorized: multiple exits.");
1040 else if (EDGE_COUNT (loop->header->preds) != 2)
1041 fprintf (vect_dump, "not vectorized: too many incoming edges.");
1043 if (inner_loop_vinfo)
1044 destroy_loop_vec_info (inner_loop_vinfo, true);
1045 return NULL;
1048 /* We assume that the loop exit condition is at the end of the loop. i.e,
1049 that the loop is represented as a do-while (with a proper if-guard
1050 before the loop if needed), where the loop header contains all the
1051 executable statements, and the latch is empty. */
1052 if (!empty_block_p (loop->latch)
1053 || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1055 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1056 fprintf (vect_dump, "not vectorized: unexpected loop form.");
1057 if (inner_loop_vinfo)
1058 destroy_loop_vec_info (inner_loop_vinfo, true);
1059 return NULL;
1062 /* Make sure there exists a single-predecessor exit bb: */
1063 if (!single_pred_p (single_exit (loop)->dest))
1065 edge e = single_exit (loop);
1066 if (!(e->flags & EDGE_ABNORMAL))
1068 split_loop_exit_edge (e);
1069 if (vect_print_dump_info (REPORT_DETAILS))
1070 fprintf (vect_dump, "split exit edge.");
1072 else
1074 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1075 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1076 if (inner_loop_vinfo)
1077 destroy_loop_vec_info (inner_loop_vinfo, true);
1078 return NULL;
1082 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1083 if (!loop_cond)
1085 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1086 fprintf (vect_dump, "not vectorized: complicated exit condition.");
1087 if (inner_loop_vinfo)
1088 destroy_loop_vec_info (inner_loop_vinfo, true);
1089 return NULL;
1092 if (!number_of_iterations)
1094 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1095 fprintf (vect_dump,
1096 "not vectorized: number of iterations cannot be computed.");
1097 if (inner_loop_vinfo)
1098 destroy_loop_vec_info (inner_loop_vinfo, true);
1099 return NULL;
1102 if (chrec_contains_undetermined (number_of_iterations))
1104 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1105 fprintf (vect_dump, "Infinite number of iterations.");
1106 if (inner_loop_vinfo)
1107 destroy_loop_vec_info (inner_loop_vinfo, true);
1108 return NULL;
1111 if (!NITERS_KNOWN_P (number_of_iterations))
1113 if (vect_print_dump_info (REPORT_DETAILS))
1115 fprintf (vect_dump, "Symbolic number of iterations is ");
1116 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
1119 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
1121 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1122 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
1123 if (inner_loop_vinfo)
1124 destroy_loop_vec_info (inner_loop_vinfo, false);
1125 return NULL;
1128 loop_vinfo = new_loop_vec_info (loop);
1129 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1130 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1132 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1134 /* CHECKME: May want to keep it around it in the future. */
1135 if (inner_loop_vinfo)
1136 destroy_loop_vec_info (inner_loop_vinfo, false);
1138 gcc_assert (!loop->aux);
1139 loop->aux = loop_vinfo;
1140 return loop_vinfo;
1144 /* Get cost by calling cost target builtin. */
1146 static inline int
1147 vect_get_cost (enum vect_cost_for_stmt type_of_cost)
1149 tree dummy_type = NULL;
1150 int dummy = 0;
1152 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
1153 dummy_type, dummy);
1157 /* Function vect_analyze_loop_operations.
1159 Scan the loop stmts and make sure they are all vectorizable. */
1161 static bool
1162 vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
1164 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1165 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1166 int nbbs = loop->num_nodes;
1167 gimple_stmt_iterator si;
1168 unsigned int vectorization_factor = 0;
1169 int i;
1170 gimple phi;
1171 stmt_vec_info stmt_info;
1172 bool need_to_vectorize = false;
1173 int min_profitable_iters;
1174 int min_scalar_loop_bound;
1175 unsigned int th;
1176 bool only_slp_in_loop = true, ok;
1178 if (vect_print_dump_info (REPORT_DETAILS))
1179 fprintf (vect_dump, "=== vect_analyze_loop_operations ===");
1181 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1182 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1183 if (slp)
1185 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1186 vectorization factor of the loop is the unrolling factor required by
1187 the SLP instances. If that unrolling factor is 1, we say, that we
1188 perform pure SLP on loop - cross iteration parallelism is not
1189 exploited. */
1190 for (i = 0; i < nbbs; i++)
1192 basic_block bb = bbs[i];
1193 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1195 gimple stmt = gsi_stmt (si);
1196 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1197 gcc_assert (stmt_info);
1198 if ((STMT_VINFO_RELEVANT_P (stmt_info)
1199 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1200 && !PURE_SLP_STMT (stmt_info))
1201 /* STMT needs both SLP and loop-based vectorization. */
1202 only_slp_in_loop = false;
1206 if (only_slp_in_loop)
1207 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1208 else
1209 vectorization_factor = least_common_multiple (vectorization_factor,
1210 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1212 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1213 if (vect_print_dump_info (REPORT_DETAILS))
1214 fprintf (vect_dump, "Updating vectorization factor to %d ",
1215 vectorization_factor);
1218 for (i = 0; i < nbbs; i++)
1220 basic_block bb = bbs[i];
1222 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1224 phi = gsi_stmt (si);
1225 ok = true;
1227 stmt_info = vinfo_for_stmt (phi);
1228 if (vect_print_dump_info (REPORT_DETAILS))
1230 fprintf (vect_dump, "examining phi: ");
1231 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1234 /* Inner-loop loop-closed exit phi in outer-loop vectorization
1235 (i.e., a phi in the tail of the outer-loop). */
1236 if (! is_loop_header_bb_p (bb))
1238 /* FORNOW: we currently don't support the case that these phis
1239 are not used in the outerloop (unless it is double reduction,
1240 i.e., this phi is vect_reduction_def), cause this case
1241 requires to actually do something here. */
1242 if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1243 || STMT_VINFO_LIVE_P (stmt_info))
1244 && STMT_VINFO_DEF_TYPE (stmt_info)
1245 != vect_double_reduction_def)
1247 if (vect_print_dump_info (REPORT_DETAILS))
1248 fprintf (vect_dump,
1249 "Unsupported loop-closed phi in outer-loop.");
1250 return false;
1253 /* If PHI is used in the outer loop, we check that its operand
1254 is defined in the inner loop. */
1255 if (STMT_VINFO_RELEVANT_P (stmt_info))
1257 tree phi_op;
1258 gimple op_def_stmt;
1260 if (gimple_phi_num_args (phi) != 1)
1261 return false;
1263 phi_op = PHI_ARG_DEF (phi, 0);
1264 if (TREE_CODE (phi_op) != SSA_NAME)
1265 return false;
1267 op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1268 if (!op_def_stmt || !vinfo_for_stmt (op_def_stmt))
1269 return false;
1271 if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1272 != vect_used_in_outer
1273 && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1274 != vect_used_in_outer_by_reduction)
1275 return false;
1278 continue;
1281 gcc_assert (stmt_info);
1283 if (STMT_VINFO_LIVE_P (stmt_info))
1285 /* FORNOW: not yet supported. */
1286 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1287 fprintf (vect_dump, "not vectorized: value used after loop.");
1288 return false;
1291 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1292 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1294 /* A scalar-dependence cycle that we don't support. */
1295 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1296 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
1297 return false;
1300 if (STMT_VINFO_RELEVANT_P (stmt_info))
1302 need_to_vectorize = true;
1303 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1304 ok = vectorizable_induction (phi, NULL, NULL);
1307 if (!ok)
1309 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1311 fprintf (vect_dump,
1312 "not vectorized: relevant phi not supported: ");
1313 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1315 return false;
1319 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1321 gimple stmt = gsi_stmt (si);
1322 if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1323 return false;
1325 } /* bbs */
1327 /* All operations in the loop are either irrelevant (deal with loop
1328 control, or dead), or only used outside the loop and can be moved
1329 out of the loop (e.g. invariants, inductions). The loop can be
1330 optimized away by scalar optimizations. We're better off not
1331 touching this loop. */
1332 if (!need_to_vectorize)
1334 if (vect_print_dump_info (REPORT_DETAILS))
1335 fprintf (vect_dump,
1336 "All the computation can be taken out of the loop.");
1337 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1338 fprintf (vect_dump,
1339 "not vectorized: redundant loop. no profit to vectorize.");
1340 return false;
1343 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1344 && vect_print_dump_info (REPORT_DETAILS))
1345 fprintf (vect_dump,
1346 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
1347 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
1349 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1350 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1352 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1353 fprintf (vect_dump, "not vectorized: iteration count too small.");
1354 if (vect_print_dump_info (REPORT_DETAILS))
1355 fprintf (vect_dump,"not vectorized: iteration count smaller than "
1356 "vectorization factor.");
1357 return false;
1360 /* Analyze cost. Decide if worth while to vectorize. */
1362 /* Once VF is set, SLP costs should be updated since the number of created
1363 vector stmts depends on VF. */
1364 vect_update_slp_costs_according_to_vf (loop_vinfo);
1366 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
1367 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1369 if (min_profitable_iters < 0)
1371 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1372 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
1373 if (vect_print_dump_info (REPORT_DETAILS))
1374 fprintf (vect_dump, "not vectorized: vector version will never be "
1375 "profitable.");
1376 return false;
1379 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1380 * vectorization_factor) - 1);
1382 /* Use the cost model only if it is more conservative than user specified
1383 threshold. */
1385 th = (unsigned) min_scalar_loop_bound;
1386 if (min_profitable_iters
1387 && (!min_scalar_loop_bound
1388 || min_profitable_iters > min_scalar_loop_bound))
1389 th = (unsigned) min_profitable_iters;
1391 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1392 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1394 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1395 fprintf (vect_dump, "not vectorized: vectorization not "
1396 "profitable.");
1397 if (vect_print_dump_info (REPORT_DETAILS))
1398 fprintf (vect_dump, "not vectorized: iteration count smaller than "
1399 "user specified loop bound parameter or minimum "
1400 "profitable iterations (whichever is more conservative).");
1401 return false;
1404 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1405 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
1406 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1408 if (vect_print_dump_info (REPORT_DETAILS))
1409 fprintf (vect_dump, "epilog loop required.");
1410 if (!vect_can_advance_ivs_p (loop_vinfo))
1412 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1413 fprintf (vect_dump,
1414 "not vectorized: can't create epilog loop 1.");
1415 return false;
1417 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1419 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1420 fprintf (vect_dump,
1421 "not vectorized: can't create epilog loop 2.");
1422 return false;
1426 return true;
1430 /* Function vect_analyze_loop_2.
1432 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1433 for it. The different analyses will record information in the
1434 loop_vec_info struct. */
1435 static bool
1436 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
1438 bool ok, dummy, slp = false;
1439 int max_vf = MAX_VECTORIZATION_FACTOR;
1440 int min_vf = 2;
1442 /* Find all data references in the loop (which correspond to vdefs/vuses)
1443 and analyze their evolution in the loop. Also adjust the minimal
1444 vectorization factor according to the loads and stores.
1446 FORNOW: Handle only simple, array references, which
1447 alignment can be forced, and aligned pointer-references. */
1449 ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf);
1450 if (!ok)
1452 if (vect_print_dump_info (REPORT_DETAILS))
1453 fprintf (vect_dump, "bad data references.");
1454 return false;
1457 /* Classify all cross-iteration scalar data-flow cycles.
1458 Cross-iteration cycles caused by virtual phis are analyzed separately. */
1460 vect_analyze_scalar_cycles (loop_vinfo);
1462 vect_pattern_recog (loop_vinfo);
1464 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
1466 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1467 if (!ok)
1469 if (vect_print_dump_info (REPORT_DETAILS))
1470 fprintf (vect_dump, "unexpected pattern.");
1471 return false;
1474 /* Analyze data dependences between the data-refs in the loop
1475 and adjust the maximum vectorization factor according to
1476 the dependences.
1477 FORNOW: fail at the first data dependence that we encounter. */
1479 ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf, &dummy);
1480 if (!ok
1481 || max_vf < min_vf)
1483 if (vect_print_dump_info (REPORT_DETAILS))
1484 fprintf (vect_dump, "bad data dependence.");
1485 return false;
1488 ok = vect_determine_vectorization_factor (loop_vinfo);
1489 if (!ok)
1491 if (vect_print_dump_info (REPORT_DETAILS))
1492 fprintf (vect_dump, "can't determine vectorization factor.");
1493 return false;
1495 if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1497 if (vect_print_dump_info (REPORT_DETAILS))
1498 fprintf (vect_dump, "bad data dependence.");
1499 return false;
1502 /* Analyze the alignment of the data-refs in the loop.
1503 Fail if a data reference is found that cannot be vectorized. */
1505 ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1506 if (!ok)
1508 if (vect_print_dump_info (REPORT_DETAILS))
1509 fprintf (vect_dump, "bad data alignment.");
1510 return false;
1513 /* Analyze the access patterns of the data-refs in the loop (consecutive,
1514 complex, etc.). FORNOW: Only handle consecutive access pattern. */
1516 ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1517 if (!ok)
1519 if (vect_print_dump_info (REPORT_DETAILS))
1520 fprintf (vect_dump, "bad data access.");
1521 return false;
1524 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1525 It is important to call pruning after vect_analyze_data_ref_accesses,
1526 since we use grouping information gathered by interleaving analysis. */
1527 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1528 if (!ok)
1530 if (vect_print_dump_info (REPORT_DETAILS))
1531 fprintf (vect_dump, "too long list of versioning for alias "
1532 "run-time tests.");
1533 return false;
1536 /* This pass will decide on using loop versioning and/or loop peeling in
1537 order to enhance the alignment of data references in the loop. */
1539 ok = vect_enhance_data_refs_alignment (loop_vinfo);
1540 if (!ok)
1542 if (vect_print_dump_info (REPORT_DETAILS))
1543 fprintf (vect_dump, "bad data alignment.");
1544 return false;
1547 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
1548 ok = vect_analyze_slp (loop_vinfo, NULL);
1549 if (ok)
1551 /* Decide which possible SLP instances to SLP. */
1552 slp = vect_make_slp_decision (loop_vinfo);
1554 /* Find stmts that need to be both vectorized and SLPed. */
1555 vect_detect_hybrid_slp (loop_vinfo);
1557 else
1558 return false;
1560 /* Scan all the operations in the loop and make sure they are
1561 vectorizable. */
1563 ok = vect_analyze_loop_operations (loop_vinfo, slp);
1564 if (!ok)
1566 if (vect_print_dump_info (REPORT_DETAILS))
1567 fprintf (vect_dump, "bad operation or unsupported loop bound.");
1568 return false;
1571 return true;
1574 /* Function vect_analyze_loop.
1576 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1577 for it. The different analyses will record information in the
1578 loop_vec_info struct. */
1579 loop_vec_info
1580 vect_analyze_loop (struct loop *loop)
1582 loop_vec_info loop_vinfo;
1583 unsigned int vector_sizes;
1585 /* Autodetect first vector size we try. */
1586 current_vector_size = 0;
1587 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1589 if (vect_print_dump_info (REPORT_DETAILS))
1590 fprintf (vect_dump, "===== analyze_loop_nest =====");
1592 if (loop_outer (loop)
1593 && loop_vec_info_for_loop (loop_outer (loop))
1594 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
1596 if (vect_print_dump_info (REPORT_DETAILS))
1597 fprintf (vect_dump, "outer-loop already vectorized.");
1598 return NULL;
1601 while (1)
1603 /* Check the CFG characteristics of the loop (nesting, entry/exit). */
1604 loop_vinfo = vect_analyze_loop_form (loop);
1605 if (!loop_vinfo)
1607 if (vect_print_dump_info (REPORT_DETAILS))
1608 fprintf (vect_dump, "bad loop form.");
1609 return NULL;
1612 if (vect_analyze_loop_2 (loop_vinfo))
1614 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
1616 return loop_vinfo;
1619 destroy_loop_vec_info (loop_vinfo, true);
1621 vector_sizes &= ~current_vector_size;
1622 if (vector_sizes == 0
1623 || current_vector_size == 0)
1624 return NULL;
1626 /* Try the next biggest vector size. */
1627 current_vector_size = 1 << floor_log2 (vector_sizes);
1628 if (vect_print_dump_info (REPORT_DETAILS))
1629 fprintf (vect_dump, "***** Re-trying analysis with "
1630 "vector size %d\n", current_vector_size);
1635 /* Function reduction_code_for_scalar_code
1637 Input:
1638 CODE - tree_code of a reduction operations.
1640 Output:
1641 REDUC_CODE - the corresponding tree-code to be used to reduce the
1642 vector of partial results into a single scalar result (which
1643 will also reside in a vector) or ERROR_MARK if the operation is
1644 a supported reduction operation, but does not have such tree-code.
1646 Return FALSE if CODE currently cannot be vectorized as reduction. */
1648 static bool
1649 reduction_code_for_scalar_code (enum tree_code code,
1650 enum tree_code *reduc_code)
1652 switch (code)
1654 case MAX_EXPR:
1655 *reduc_code = REDUC_MAX_EXPR;
1656 return true;
1658 case MIN_EXPR:
1659 *reduc_code = REDUC_MIN_EXPR;
1660 return true;
1662 case PLUS_EXPR:
1663 *reduc_code = REDUC_PLUS_EXPR;
1664 return true;
1666 case MULT_EXPR:
1667 case MINUS_EXPR:
1668 case BIT_IOR_EXPR:
1669 case BIT_XOR_EXPR:
1670 case BIT_AND_EXPR:
1671 *reduc_code = ERROR_MARK;
1672 return true;
1674 default:
1675 return false;
1680 /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
1681 STMT is printed with a message MSG. */
1683 static void
1684 report_vect_op (gimple stmt, const char *msg)
1686 fprintf (vect_dump, "%s", msg);
1687 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1691 /* Detect SLP reduction of the form:
1693 #a1 = phi <a5, a0>
1694 a2 = operation (a1)
1695 a3 = operation (a2)
1696 a4 = operation (a3)
1697 a5 = operation (a4)
1699 #a = phi <a5>
1701 PHI is the reduction phi node (#a1 = phi <a5, a0> above)
1702 FIRST_STMT is the first reduction stmt in the chain
1703 (a2 = operation (a1)).
1705 Return TRUE if a reduction chain was detected. */
1707 static bool
1708 vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
1710 struct loop *loop = (gimple_bb (phi))->loop_father;
1711 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1712 enum tree_code code;
1713 gimple current_stmt = NULL, use_stmt = NULL, first, next_stmt;
1714 stmt_vec_info use_stmt_info, current_stmt_info;
1715 tree lhs;
1716 imm_use_iterator imm_iter;
1717 use_operand_p use_p;
1718 int nloop_uses, size = 0, nuses;
1719 bool found = false;
1721 if (loop != vect_loop)
1722 return false;
1724 lhs = PHI_RESULT (phi);
1725 code = gimple_assign_rhs_code (first_stmt);
1726 while (1)
1728 nloop_uses = 0;
1729 nuses = 0;
1730 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1732 use_stmt = USE_STMT (use_p);
1733 nuses++;
1734 if (is_gimple_debug (use_stmt))
1735 continue;
1737 /* Check if we got back to the reduction phi. */
1738 if (gimple_code (use_stmt) == GIMPLE_PHI
1739 && use_stmt == phi)
1741 found = true;
1742 break;
1745 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
1746 && vinfo_for_stmt (use_stmt)
1747 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt))
1748 && use_stmt != first_stmt)
1749 nloop_uses++;
1751 if (nloop_uses > 1)
1752 return false;
1755 /* We reached a statement with no uses. */
1756 if (nuses == 0)
1757 return false;
1759 if (found)
1760 break;
1762 /* This is a loop exit phi, and we haven't reached the reduction phi. */
1763 if (gimple_code (use_stmt) == GIMPLE_PHI)
1764 return false;
1766 if (!is_gimple_assign (use_stmt)
1767 || code != gimple_assign_rhs_code (use_stmt)
1768 || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1769 return false;
1771 /* Insert USE_STMT into reduction chain. */
1772 use_stmt_info = vinfo_for_stmt (use_stmt);
1773 if (current_stmt)
1775 current_stmt_info = vinfo_for_stmt (current_stmt);
1776 GROUP_NEXT_ELEMENT (current_stmt_info) = use_stmt;
1777 GROUP_FIRST_ELEMENT (use_stmt_info)
1778 = GROUP_FIRST_ELEMENT (current_stmt_info);
1780 else
1781 GROUP_FIRST_ELEMENT (use_stmt_info) = use_stmt;
1783 lhs = gimple_assign_lhs (use_stmt);
1784 current_stmt = use_stmt;
1785 size++;
1788 if (!found || use_stmt != phi || size < 2)
1789 return false;
1791 /* Swap the operands, if needed, to make the reduction operand be the second
1792 operand. */
1793 lhs = PHI_RESULT (phi);
1794 next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
1795 while (next_stmt)
1797 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1799 if (gimple_assign_rhs2 (next_stmt) == lhs)
1801 tree op = gimple_assign_rhs1 (next_stmt);
1802 gimple def_stmt = NULL;
1804 if (TREE_CODE (op) == SSA_NAME)
1805 def_stmt = SSA_NAME_DEF_STMT (op);
1807 /* Check that the other def is either defined in the loop
1808 ("vect_internal_def"), or it's an induction (defined by a
1809 loop-header phi-node). */
1810 if (code == COND_EXPR
1811 || (def_stmt
1812 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1813 && (is_gimple_assign (def_stmt)
1814 || is_gimple_call (def_stmt)
1815 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1816 == vect_induction_def
1817 || (gimple_code (def_stmt) == GIMPLE_PHI
1818 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1819 == vect_internal_def
1820 && !is_loop_header_bb_p (gimple_bb (def_stmt))))))
1822 lhs = gimple_assign_lhs (next_stmt);
1823 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1824 continue;
1827 return false;
1829 else
1831 tree op = gimple_assign_rhs2 (next_stmt);
1832 gimple def_stmt = NULL;
1834 if (TREE_CODE (op) == SSA_NAME)
1835 def_stmt = SSA_NAME_DEF_STMT (op);
1837 /* Check that the other def is either defined in the loop
1838 ("vect_internal_def"), or it's an induction (defined by a
1839 loop-header phi-node). */
1840 if (code == COND_EXPR
1841 || (def_stmt
1842 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1843 && (is_gimple_assign (def_stmt)
1844 || is_gimple_call (def_stmt)
1845 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1846 == vect_induction_def
1847 || (gimple_code (def_stmt) == GIMPLE_PHI
1848 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1849 == vect_internal_def
1850 && !is_loop_header_bb_p (gimple_bb (def_stmt))))))
1852 if (vect_print_dump_info (REPORT_DETAILS))
1854 fprintf (vect_dump, "swapping oprnds: ");
1855 print_gimple_stmt (vect_dump, next_stmt, 0, TDF_SLIM);
1858 swap_tree_operands (next_stmt,
1859 gimple_assign_rhs1_ptr (next_stmt),
1860 gimple_assign_rhs2_ptr (next_stmt));
1861 mark_symbols_for_renaming (next_stmt);
1863 else
1864 return false;
1868 lhs = gimple_assign_lhs (next_stmt);
1869 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1872 /* Save the chain for further analysis in SLP detection. */
1873 first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
1874 VEC_safe_push (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_info), first);
1875 GROUP_SIZE (vinfo_for_stmt (first)) = size;
1877 return true;
1881 /* Function vect_is_simple_reduction_1
1883 (1) Detect a cross-iteration def-use cycle that represents a simple
1884 reduction computation. We look for the following pattern:
1886 loop_header:
1887 a1 = phi < a0, a2 >
1888 a3 = ...
1889 a2 = operation (a3, a1)
1891 such that:
1892 1. operation is commutative and associative and it is safe to
1893 change the order of the computation (if CHECK_REDUCTION is true)
1894 2. no uses for a2 in the loop (a2 is used out of the loop)
1895 3. no uses of a1 in the loop besides the reduction operation
1896 4. no uses of a1 outside the loop.
1898 Conditions 1,4 are tested here.
1899 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
1901 (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
1902 nested cycles, if CHECK_REDUCTION is false.
1904 (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
1905 reductions:
1907 a1 = phi < a0, a2 >
1908 inner loop (def of a3)
1909 a2 = phi < a3 >
1911 If MODIFY is true it tries also to rework the code in-place to enable
1912 detection of more reduction patterns. For the time being we rewrite
1913 "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
1916 static gimple
1917 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
1918 bool check_reduction, bool *double_reduc,
1919 bool modify)
1921 struct loop *loop = (gimple_bb (phi))->loop_father;
1922 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1923 edge latch_e = loop_latch_edge (loop);
1924 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1925 gimple def_stmt, def1 = NULL, def2 = NULL;
1926 enum tree_code orig_code, code;
1927 tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
1928 tree type;
1929 int nloop_uses;
1930 tree name;
1931 imm_use_iterator imm_iter;
1932 use_operand_p use_p;
1933 bool phi_def;
1935 *double_reduc = false;
1937 /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
1938 otherwise, we assume outer loop vectorization. */
1939 gcc_assert ((check_reduction && loop == vect_loop)
1940 || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
1942 name = PHI_RESULT (phi);
1943 nloop_uses = 0;
1944 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
1946 gimple use_stmt = USE_STMT (use_p);
1947 if (is_gimple_debug (use_stmt))
1948 continue;
1950 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1952 if (vect_print_dump_info (REPORT_DETAILS))
1953 fprintf (vect_dump, "intermediate value used outside loop.");
1955 return NULL;
1958 if (vinfo_for_stmt (use_stmt)
1959 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
1960 nloop_uses++;
1961 if (nloop_uses > 1)
1963 if (vect_print_dump_info (REPORT_DETAILS))
1964 fprintf (vect_dump, "reduction used in loop.");
1965 return NULL;
1969 if (TREE_CODE (loop_arg) != SSA_NAME)
1971 if (vect_print_dump_info (REPORT_DETAILS))
1973 fprintf (vect_dump, "reduction: not ssa_name: ");
1974 print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
1976 return NULL;
1979 def_stmt = SSA_NAME_DEF_STMT (loop_arg);
1980 if (!def_stmt)
1982 if (vect_print_dump_info (REPORT_DETAILS))
1983 fprintf (vect_dump, "reduction: no def_stmt.");
1984 return NULL;
1987 if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
1989 if (vect_print_dump_info (REPORT_DETAILS))
1990 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1991 return NULL;
1994 if (is_gimple_assign (def_stmt))
1996 name = gimple_assign_lhs (def_stmt);
1997 phi_def = false;
1999 else
2001 name = PHI_RESULT (def_stmt);
2002 phi_def = true;
2005 nloop_uses = 0;
2006 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2008 gimple use_stmt = USE_STMT (use_p);
2009 if (is_gimple_debug (use_stmt))
2010 continue;
2011 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
2012 && vinfo_for_stmt (use_stmt)
2013 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2014 nloop_uses++;
2015 if (nloop_uses > 1)
2017 if (vect_print_dump_info (REPORT_DETAILS))
2018 fprintf (vect_dump, "reduction used in loop.");
2019 return NULL;
2023 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2024 defined in the inner loop. */
2025 if (phi_def)
2027 op1 = PHI_ARG_DEF (def_stmt, 0);
2029 if (gimple_phi_num_args (def_stmt) != 1
2030 || TREE_CODE (op1) != SSA_NAME)
2032 if (vect_print_dump_info (REPORT_DETAILS))
2033 fprintf (vect_dump, "unsupported phi node definition.");
2035 return NULL;
2038 def1 = SSA_NAME_DEF_STMT (op1);
2039 if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2040 && loop->inner
2041 && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2042 && is_gimple_assign (def1))
2044 if (vect_print_dump_info (REPORT_DETAILS))
2045 report_vect_op (def_stmt, "detected double reduction: ");
2047 *double_reduc = true;
2048 return def_stmt;
2051 return NULL;
2054 code = orig_code = gimple_assign_rhs_code (def_stmt);
2056 /* We can handle "res -= x[i]", which is non-associative by
2057 simply rewriting this into "res += -x[i]". Avoid changing
2058 gimple instruction for the first simple tests and only do this
2059 if we're allowed to change code at all. */
2060 if (code == MINUS_EXPR
2061 && modify
2062 && (op1 = gimple_assign_rhs1 (def_stmt))
2063 && TREE_CODE (op1) == SSA_NAME
2064 && SSA_NAME_DEF_STMT (op1) == phi)
2065 code = PLUS_EXPR;
2067 if (check_reduction
2068 && (!commutative_tree_code (code) || !associative_tree_code (code)))
2070 if (vect_print_dump_info (REPORT_DETAILS))
2071 report_vect_op (def_stmt, "reduction: not commutative/associative: ");
2072 return NULL;
2075 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2077 if (code != COND_EXPR)
2079 if (vect_print_dump_info (REPORT_DETAILS))
2080 report_vect_op (def_stmt, "reduction: not binary operation: ");
2082 return NULL;
2085 op3 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
2086 if (COMPARISON_CLASS_P (op3))
2088 op4 = TREE_OPERAND (op3, 1);
2089 op3 = TREE_OPERAND (op3, 0);
2092 op1 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 1);
2093 op2 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 2);
2095 if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2097 if (vect_print_dump_info (REPORT_DETAILS))
2098 report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
2100 return NULL;
2103 else
2105 op1 = gimple_assign_rhs1 (def_stmt);
2106 op2 = gimple_assign_rhs2 (def_stmt);
2108 if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
2110 if (vect_print_dump_info (REPORT_DETAILS))
2111 report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
2113 return NULL;
2117 type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2118 if ((TREE_CODE (op1) == SSA_NAME
2119 && !types_compatible_p (type,TREE_TYPE (op1)))
2120 || (TREE_CODE (op2) == SSA_NAME
2121 && !types_compatible_p (type, TREE_TYPE (op2)))
2122 || (op3 && TREE_CODE (op3) == SSA_NAME
2123 && !types_compatible_p (type, TREE_TYPE (op3)))
2124 || (op4 && TREE_CODE (op4) == SSA_NAME
2125 && !types_compatible_p (type, TREE_TYPE (op4))))
2127 if (vect_print_dump_info (REPORT_DETAILS))
2129 fprintf (vect_dump, "reduction: multiple types: operation type: ");
2130 print_generic_expr (vect_dump, type, TDF_SLIM);
2131 fprintf (vect_dump, ", operands types: ");
2132 print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
2133 fprintf (vect_dump, ",");
2134 print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
2135 if (op3)
2137 fprintf (vect_dump, ",");
2138 print_generic_expr (vect_dump, TREE_TYPE (op3), TDF_SLIM);
2141 if (op4)
2143 fprintf (vect_dump, ",");
2144 print_generic_expr (vect_dump, TREE_TYPE (op4), TDF_SLIM);
2148 return NULL;
2151 /* Check that it's ok to change the order of the computation.
2152 Generally, when vectorizing a reduction we change the order of the
2153 computation. This may change the behavior of the program in some
2154 cases, so we need to check that this is ok. One exception is when
2155 vectorizing an outer-loop: the inner-loop is executed sequentially,
2156 and therefore vectorizing reductions in the inner-loop during
2157 outer-loop vectorization is safe. */
2159 /* CHECKME: check for !flag_finite_math_only too? */
2160 if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2161 && check_reduction)
2163 /* Changing the order of operations changes the semantics. */
2164 if (vect_print_dump_info (REPORT_DETAILS))
2165 report_vect_op (def_stmt, "reduction: unsafe fp math optimization: ");
2166 return NULL;
2168 else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
2169 && check_reduction)
2171 /* Changing the order of operations changes the semantics. */
2172 if (vect_print_dump_info (REPORT_DETAILS))
2173 report_vect_op (def_stmt, "reduction: unsafe int math optimization: ");
2174 return NULL;
2176 else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2178 /* Changing the order of operations changes the semantics. */
2179 if (vect_print_dump_info (REPORT_DETAILS))
2180 report_vect_op (def_stmt,
2181 "reduction: unsafe fixed-point math optimization: ");
2182 return NULL;
2185 /* If we detected "res -= x[i]" earlier, rewrite it into
2186 "res += -x[i]" now. If this turns out to be useless reassoc
2187 will clean it up again. */
2188 if (orig_code == MINUS_EXPR)
2190 tree rhs = gimple_assign_rhs2 (def_stmt);
2191 tree negrhs = make_ssa_name (SSA_NAME_VAR (rhs), NULL);
2192 gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
2193 rhs, NULL);
2194 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2195 set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2196 loop_info, NULL));
2197 gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2198 gimple_assign_set_rhs2 (def_stmt, negrhs);
2199 gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2200 update_stmt (def_stmt);
2203 /* Reduction is safe. We're dealing with one of the following:
2204 1) integer arithmetic and no trapv
2205 2) floating point arithmetic, and special flags permit this optimization
2206 3) nested cycle (i.e., outer loop vectorization). */
2207 if (TREE_CODE (op1) == SSA_NAME)
2208 def1 = SSA_NAME_DEF_STMT (op1);
2210 if (TREE_CODE (op2) == SSA_NAME)
2211 def2 = SSA_NAME_DEF_STMT (op2);
2213 if (code != COND_EXPR
2214 && (!def1 || !def2 || gimple_nop_p (def1) || gimple_nop_p (def2)))
2216 if (vect_print_dump_info (REPORT_DETAILS))
2217 report_vect_op (def_stmt, "reduction: no defs for operands: ");
2218 return NULL;
2221 /* Check that one def is the reduction def, defined by PHI,
2222 the other def is either defined in the loop ("vect_internal_def"),
2223 or it's an induction (defined by a loop-header phi-node). */
2225 if (def2 && def2 == phi
2226 && (code == COND_EXPR
2227 || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2228 && (is_gimple_assign (def1)
2229 || is_gimple_call (def1)
2230 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2231 == vect_induction_def
2232 || (gimple_code (def1) == GIMPLE_PHI
2233 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2234 == vect_internal_def
2235 && !is_loop_header_bb_p (gimple_bb (def1)))))))
2237 if (vect_print_dump_info (REPORT_DETAILS))
2238 report_vect_op (def_stmt, "detected reduction: ");
2239 return def_stmt;
2242 if (def1 && def1 == phi
2243 && (code == COND_EXPR
2244 || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2245 && (is_gimple_assign (def2)
2246 || is_gimple_call (def2)
2247 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2248 == vect_induction_def
2249 || (gimple_code (def2) == GIMPLE_PHI
2250 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2251 == vect_internal_def
2252 && !is_loop_header_bb_p (gimple_bb (def2)))))))
2254 if (check_reduction)
2256 /* Swap operands (just for simplicity - so that the rest of the code
2257 can assume that the reduction variable is always the last (second)
2258 argument). */
2259 if (vect_print_dump_info (REPORT_DETAILS))
2260 report_vect_op (def_stmt,
2261 "detected reduction: need to swap operands: ");
2263 swap_tree_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2264 gimple_assign_rhs2_ptr (def_stmt));
2266 else
2268 if (vect_print_dump_info (REPORT_DETAILS))
2269 report_vect_op (def_stmt, "detected reduction: ");
2272 return def_stmt;
2275 /* Try to find SLP reduction chain. */
2276 if (vect_is_slp_reduction (loop_info, phi, def_stmt))
2278 if (vect_print_dump_info (REPORT_DETAILS))
2279 report_vect_op (def_stmt, "reduction: detected reduction chain: ");
2281 return def_stmt;
2284 if (vect_print_dump_info (REPORT_DETAILS))
2285 report_vect_op (def_stmt, "reduction: unknown pattern: ");
2287 return NULL;
2290 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2291 in-place. Arguments as there. */
2293 static gimple
2294 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
2295 bool check_reduction, bool *double_reduc)
2297 return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2298 double_reduc, false);
2301 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2302 in-place if it enables detection of more reductions. Arguments
2303 as there. */
2305 gimple
2306 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
2307 bool check_reduction, bool *double_reduc)
2309 return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2310 double_reduc, true);
2313 /* Calculate the cost of one scalar iteration of the loop. */
2315 vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
2317 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2318 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2319 int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
2320 int innerloop_iters, i, stmt_cost;
2322 /* Count statements in scalar loop. Using this as scalar cost for a single
2323 iteration for now.
2325 TODO: Add outer loop support.
2327 TODO: Consider assigning different costs to different scalar
2328 statements. */
2330 /* FORNOW. */
2331 innerloop_iters = 1;
2332 if (loop->inner)
2333 innerloop_iters = 50; /* FIXME */
2335 for (i = 0; i < nbbs; i++)
2337 gimple_stmt_iterator si;
2338 basic_block bb = bbs[i];
2340 if (bb->loop_father == loop->inner)
2341 factor = innerloop_iters;
2342 else
2343 factor = 1;
2345 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2347 gimple stmt = gsi_stmt (si);
2348 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2350 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
2351 continue;
2353 /* Skip stmts that are not vectorized inside the loop. */
2354 if (stmt_info
2355 && !STMT_VINFO_RELEVANT_P (stmt_info)
2356 && (!STMT_VINFO_LIVE_P (stmt_info)
2357 || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
2358 continue;
2360 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
2362 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
2363 stmt_cost = vect_get_cost (scalar_load);
2364 else
2365 stmt_cost = vect_get_cost (scalar_store);
2367 else
2368 stmt_cost = vect_get_cost (scalar_stmt);
2370 scalar_single_iter_cost += stmt_cost * factor;
2373 return scalar_single_iter_cost;
2376 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
2378 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2379 int *peel_iters_epilogue,
2380 int scalar_single_iter_cost)
2382 int peel_guard_costs = 0;
2383 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2385 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2387 *peel_iters_epilogue = vf/2;
2388 if (vect_print_dump_info (REPORT_COST))
2389 fprintf (vect_dump, "cost model: "
2390 "epilogue peel iters set to vf/2 because "
2391 "loop iterations are unknown .");
2393 /* If peeled iterations are known but number of scalar loop
2394 iterations are unknown, count a taken branch per peeled loop. */
2395 peel_guard_costs = 2 * vect_get_cost (cond_branch_taken);
2397 else
2399 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2400 peel_iters_prologue = niters < peel_iters_prologue ?
2401 niters : peel_iters_prologue;
2402 *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2403 /* If we need to peel for gaps, but no peeling is required, we have to
2404 peel VF iterations. */
2405 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2406 *peel_iters_epilogue = vf;
2409 return (peel_iters_prologue * scalar_single_iter_cost)
2410 + (*peel_iters_epilogue * scalar_single_iter_cost)
2411 + peel_guard_costs;
2414 /* Function vect_estimate_min_profitable_iters
2416 Return the number of iterations required for the vector version of the
2417 loop to be profitable relative to the cost of the scalar version of the
2418 loop.
2420 TODO: Take profile info into account before making vectorization
2421 decisions, if available. */
2424 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
2426 int i;
2427 int min_profitable_iters;
2428 int peel_iters_prologue;
2429 int peel_iters_epilogue;
2430 int vec_inside_cost = 0;
2431 int vec_outside_cost = 0;
2432 int scalar_single_iter_cost = 0;
2433 int scalar_outside_cost = 0;
2434 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2435 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2436 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2437 int nbbs = loop->num_nodes;
2438 int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2439 int peel_guard_costs = 0;
2440 int innerloop_iters = 0, factor;
2441 VEC (slp_instance, heap) *slp_instances;
2442 slp_instance instance;
2444 /* Cost model disabled. */
2445 if (!flag_vect_cost_model)
2447 if (vect_print_dump_info (REPORT_COST))
2448 fprintf (vect_dump, "cost model disabled.");
2449 return 0;
2452 /* Requires loop versioning tests to handle misalignment. */
2453 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2455 /* FIXME: Make cost depend on complexity of individual check. */
2456 vec_outside_cost +=
2457 VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
2458 if (vect_print_dump_info (REPORT_COST))
2459 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
2460 "versioning to treat misalignment.\n");
2463 /* Requires loop versioning with alias checks. */
2464 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2466 /* FIXME: Make cost depend on complexity of individual check. */
2467 vec_outside_cost +=
2468 VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
2469 if (vect_print_dump_info (REPORT_COST))
2470 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
2471 "versioning aliasing.\n");
2474 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2475 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2476 vec_outside_cost += vect_get_cost (cond_branch_taken);
2478 /* Count statements in scalar loop. Using this as scalar cost for a single
2479 iteration for now.
2481 TODO: Add outer loop support.
2483 TODO: Consider assigning different costs to different scalar
2484 statements. */
2486 /* FORNOW. */
2487 if (loop->inner)
2488 innerloop_iters = 50; /* FIXME */
2490 for (i = 0; i < nbbs; i++)
2492 gimple_stmt_iterator si;
2493 basic_block bb = bbs[i];
2495 if (bb->loop_father == loop->inner)
2496 factor = innerloop_iters;
2497 else
2498 factor = 1;
2500 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2502 gimple stmt = gsi_stmt (si);
2503 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2504 /* Skip stmts that are not vectorized inside the loop. */
2505 if (!STMT_VINFO_RELEVANT_P (stmt_info)
2506 && (!STMT_VINFO_LIVE_P (stmt_info)
2507 || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
2508 continue;
2509 vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
2510 /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
2511 some of the "outside" costs are generated inside the outer-loop. */
2512 vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
2516 scalar_single_iter_cost = vect_get_single_scalar_iteraion_cost (loop_vinfo);
2518 /* Add additional cost for the peeled instructions in prologue and epilogue
2519 loop.
2521 FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2522 at compile-time - we assume it's vf/2 (the worst would be vf-1).
2524 TODO: Build an expression that represents peel_iters for prologue and
2525 epilogue to be used in a run-time test. */
2527 if (npeel < 0)
2529 peel_iters_prologue = vf/2;
2530 if (vect_print_dump_info (REPORT_COST))
2531 fprintf (vect_dump, "cost model: "
2532 "prologue peel iters set to vf/2.");
2534 /* If peeling for alignment is unknown, loop bound of main loop becomes
2535 unknown. */
2536 peel_iters_epilogue = vf/2;
2537 if (vect_print_dump_info (REPORT_COST))
2538 fprintf (vect_dump, "cost model: "
2539 "epilogue peel iters set to vf/2 because "
2540 "peeling for alignment is unknown .");
2542 /* If peeled iterations are unknown, count a taken branch and a not taken
2543 branch per peeled loop. Even if scalar loop iterations are known,
2544 vector iterations are not known since peeled prologue iterations are
2545 not known. Hence guards remain the same. */
2546 peel_guard_costs += 2 * (vect_get_cost (cond_branch_taken)
2547 + vect_get_cost (cond_branch_not_taken));
2548 vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
2549 + (peel_iters_epilogue * scalar_single_iter_cost)
2550 + peel_guard_costs;
2552 else
2554 peel_iters_prologue = npeel;
2555 vec_outside_cost += vect_get_known_peeling_cost (loop_vinfo,
2556 peel_iters_prologue, &peel_iters_epilogue,
2557 scalar_single_iter_cost);
2560 /* FORNOW: The scalar outside cost is incremented in one of the
2561 following ways:
2563 1. The vectorizer checks for alignment and aliasing and generates
2564 a condition that allows dynamic vectorization. A cost model
2565 check is ANDED with the versioning condition. Hence scalar code
2566 path now has the added cost of the versioning check.
2568 if (cost > th & versioning_check)
2569 jmp to vector code
2571 Hence run-time scalar is incremented by not-taken branch cost.
2573 2. The vectorizer then checks if a prologue is required. If the
2574 cost model check was not done before during versioning, it has to
2575 be done before the prologue check.
2577 if (cost <= th)
2578 prologue = scalar_iters
2579 if (prologue == 0)
2580 jmp to vector code
2581 else
2582 execute prologue
2583 if (prologue == num_iters)
2584 go to exit
2586 Hence the run-time scalar cost is incremented by a taken branch,
2587 plus a not-taken branch, plus a taken branch cost.
2589 3. The vectorizer then checks if an epilogue is required. If the
2590 cost model check was not done before during prologue check, it
2591 has to be done with the epilogue check.
2593 if (prologue == 0)
2594 jmp to vector code
2595 else
2596 execute prologue
2597 if (prologue == num_iters)
2598 go to exit
2599 vector code:
2600 if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2601 jmp to epilogue
2603 Hence the run-time scalar cost should be incremented by 2 taken
2604 branches.
2606 TODO: The back end may reorder the BBS's differently and reverse
2607 conditions/branch directions. Change the estimates below to
2608 something more reasonable. */
2610 /* If the number of iterations is known and we do not do versioning, we can
2611 decide whether to vectorize at compile time. Hence the scalar version
2612 do not carry cost model guard costs. */
2613 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2614 || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2615 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2617 /* Cost model check occurs at versioning. */
2618 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2619 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2620 scalar_outside_cost += vect_get_cost (cond_branch_not_taken);
2621 else
2623 /* Cost model check occurs at prologue generation. */
2624 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2625 scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken)
2626 + vect_get_cost (cond_branch_not_taken);
2627 /* Cost model check occurs at epilogue generation. */
2628 else
2629 scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken);
2633 /* Add SLP costs. */
2634 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2635 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2637 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
2638 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
2641 /* Calculate number of iterations required to make the vector version
2642 profitable, relative to the loop bodies only. The following condition
2643 must hold true:
2644 SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2645 where
2646 SIC = scalar iteration cost, VIC = vector iteration cost,
2647 VOC = vector outside cost, VF = vectorization factor,
2648 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2649 SOC = scalar outside cost for run time cost model check. */
2651 if ((scalar_single_iter_cost * vf) > vec_inside_cost)
2653 if (vec_outside_cost <= 0)
2654 min_profitable_iters = 1;
2655 else
2657 min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
2658 - vec_inside_cost * peel_iters_prologue
2659 - vec_inside_cost * peel_iters_epilogue)
2660 / ((scalar_single_iter_cost * vf)
2661 - vec_inside_cost);
2663 if ((scalar_single_iter_cost * vf * min_profitable_iters)
2664 <= ((vec_inside_cost * min_profitable_iters)
2665 + ((vec_outside_cost - scalar_outside_cost) * vf)))
2666 min_profitable_iters++;
2669 /* vector version will never be profitable. */
2670 else
2672 if (vect_print_dump_info (REPORT_COST))
2673 fprintf (vect_dump, "cost model: the vector iteration cost = %d "
2674 "divided by the scalar iteration cost = %d "
2675 "is greater or equal to the vectorization factor = %d.",
2676 vec_inside_cost, scalar_single_iter_cost, vf);
2677 return -1;
2680 if (vect_print_dump_info (REPORT_COST))
2682 fprintf (vect_dump, "Cost model analysis: \n");
2683 fprintf (vect_dump, " Vector inside of loop cost: %d\n",
2684 vec_inside_cost);
2685 fprintf (vect_dump, " Vector outside of loop cost: %d\n",
2686 vec_outside_cost);
2687 fprintf (vect_dump, " Scalar iteration cost: %d\n",
2688 scalar_single_iter_cost);
2689 fprintf (vect_dump, " Scalar outside cost: %d\n", scalar_outside_cost);
2690 fprintf (vect_dump, " prologue iterations: %d\n",
2691 peel_iters_prologue);
2692 fprintf (vect_dump, " epilogue iterations: %d\n",
2693 peel_iters_epilogue);
2694 fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
2695 min_profitable_iters);
2698 min_profitable_iters =
2699 min_profitable_iters < vf ? vf : min_profitable_iters;
2701 /* Because the condition we create is:
2702 if (niters <= min_profitable_iters)
2703 then skip the vectorized loop. */
2704 min_profitable_iters--;
2706 if (vect_print_dump_info (REPORT_COST))
2707 fprintf (vect_dump, " Profitability threshold = %d\n",
2708 min_profitable_iters);
2710 return min_profitable_iters;
2714 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
2715 functions. Design better to avoid maintenance issues. */
2717 /* Function vect_model_reduction_cost.
2719 Models cost for a reduction operation, including the vector ops
2720 generated within the strip-mine loop, the initial definition before
2721 the loop, and the epilogue code that must be generated. */
2723 static bool
2724 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
2725 int ncopies)
2727 int outer_cost = 0;
2728 enum tree_code code;
2729 optab optab;
2730 tree vectype;
2731 gimple stmt, orig_stmt;
2732 tree reduction_op;
2733 enum machine_mode mode;
2734 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2735 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2738 /* Cost of reduction op inside loop. */
2739 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info)
2740 += ncopies * vect_get_cost (vector_stmt);
2742 stmt = STMT_VINFO_STMT (stmt_info);
2744 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
2746 case GIMPLE_SINGLE_RHS:
2747 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
2748 reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
2749 break;
2750 case GIMPLE_UNARY_RHS:
2751 reduction_op = gimple_assign_rhs1 (stmt);
2752 break;
2753 case GIMPLE_BINARY_RHS:
2754 reduction_op = gimple_assign_rhs2 (stmt);
2755 break;
2756 case GIMPLE_TERNARY_RHS:
2757 reduction_op = gimple_assign_rhs3 (stmt);
2758 break;
2759 default:
2760 gcc_unreachable ();
2763 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
2764 if (!vectype)
2766 if (vect_print_dump_info (REPORT_COST))
2768 fprintf (vect_dump, "unsupported data-type ");
2769 print_generic_expr (vect_dump, TREE_TYPE (reduction_op), TDF_SLIM);
2771 return false;
2774 mode = TYPE_MODE (vectype);
2775 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2777 if (!orig_stmt)
2778 orig_stmt = STMT_VINFO_STMT (stmt_info);
2780 code = gimple_assign_rhs_code (orig_stmt);
2782 /* Add in cost for initial definition. */
2783 outer_cost += vect_get_cost (scalar_to_vec);
2785 /* Determine cost of epilogue code.
2787 We have a reduction operator that will reduce the vector in one statement.
2788 Also requires scalar extract. */
2790 if (!nested_in_vect_loop_p (loop, orig_stmt))
2792 if (reduc_code != ERROR_MARK)
2793 outer_cost += vect_get_cost (vector_stmt)
2794 + vect_get_cost (vec_to_scalar);
2795 else
2797 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2798 tree bitsize =
2799 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
2800 int element_bitsize = tree_low_cst (bitsize, 1);
2801 int nelements = vec_size_in_bits / element_bitsize;
2803 optab = optab_for_tree_code (code, vectype, optab_default);
2805 /* We have a whole vector shift available. */
2806 if (VECTOR_MODE_P (mode)
2807 && optab_handler (optab, mode) != CODE_FOR_nothing
2808 && optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
2809 /* Final reduction via vector shifts and the reduction operator. Also
2810 requires scalar extract. */
2811 outer_cost += ((exact_log2(nelements) * 2)
2812 * vect_get_cost (vector_stmt)
2813 + vect_get_cost (vec_to_scalar));
2814 else
2815 /* Use extracts and reduction op for final reduction. For N elements,
2816 we have N extracts and N-1 reduction ops. */
2817 outer_cost += ((nelements + nelements - 1)
2818 * vect_get_cost (vector_stmt));
2822 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
2824 if (vect_print_dump_info (REPORT_COST))
2825 fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
2826 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
2827 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
2829 return true;
2833 /* Function vect_model_induction_cost.
2835 Models cost for induction operations. */
2837 static void
2838 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
2840 /* loop cost for vec_loop. */
2841 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info)
2842 = ncopies * vect_get_cost (vector_stmt);
2843 /* prologue cost for vec_init and vec_step. */
2844 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info)
2845 = 2 * vect_get_cost (scalar_to_vec);
2847 if (vect_print_dump_info (REPORT_COST))
2848 fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
2849 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
2850 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
2854 /* Function get_initial_def_for_induction
2856 Input:
2857 STMT - a stmt that performs an induction operation in the loop.
2858 IV_PHI - the initial value of the induction variable
2860 Output:
2861 Return a vector variable, initialized with the first VF values of
2862 the induction variable. E.g., for an iv with IV_PHI='X' and
2863 evolution S, for a vector of 4 units, we want to return:
2864 [X, X + S, X + 2*S, X + 3*S]. */
2866 static tree
2867 get_initial_def_for_induction (gimple iv_phi)
2869 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
2870 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2871 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2872 tree scalar_type;
2873 tree vectype;
2874 int nunits;
2875 edge pe = loop_preheader_edge (loop);
2876 struct loop *iv_loop;
2877 basic_block new_bb;
2878 tree vec, vec_init, vec_step, t;
2879 tree access_fn;
2880 tree new_var;
2881 tree new_name;
2882 gimple init_stmt, induction_phi, new_stmt;
2883 tree induc_def, vec_def, vec_dest;
2884 tree init_expr, step_expr;
2885 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2886 int i;
2887 bool ok;
2888 int ncopies;
2889 tree expr;
2890 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
2891 bool nested_in_vect_loop = false;
2892 gimple_seq stmts = NULL;
2893 imm_use_iterator imm_iter;
2894 use_operand_p use_p;
2895 gimple exit_phi;
2896 edge latch_e;
2897 tree loop_arg;
2898 gimple_stmt_iterator si;
2899 basic_block bb = gimple_bb (iv_phi);
2900 tree stepvectype;
2901 tree resvectype;
2903 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
2904 if (nested_in_vect_loop_p (loop, iv_phi))
2906 nested_in_vect_loop = true;
2907 iv_loop = loop->inner;
2909 else
2910 iv_loop = loop;
2911 gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
2913 latch_e = loop_latch_edge (iv_loop);
2914 loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
2916 access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
2917 gcc_assert (access_fn);
2918 STRIP_NOPS (access_fn);
2919 ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
2920 &init_expr, &step_expr);
2921 gcc_assert (ok);
2922 pe = loop_preheader_edge (iv_loop);
2924 scalar_type = TREE_TYPE (init_expr);
2925 vectype = get_vectype_for_scalar_type (scalar_type);
2926 resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
2927 gcc_assert (vectype);
2928 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2929 ncopies = vf / nunits;
2931 gcc_assert (phi_info);
2932 gcc_assert (ncopies >= 1);
2934 /* Find the first insertion point in the BB. */
2935 si = gsi_after_labels (bb);
2937 /* Create the vector that holds the initial_value of the induction. */
2938 if (nested_in_vect_loop)
2940 /* iv_loop is nested in the loop to be vectorized. init_expr had already
2941 been created during vectorization of previous stmts. We obtain it
2942 from the STMT_VINFO_VEC_STMT of the defining stmt. */
2943 tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
2944 loop_preheader_edge (iv_loop));
2945 vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
2947 else
2949 /* iv_loop is the loop to be vectorized. Create:
2950 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
2951 new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
2952 add_referenced_var (new_var);
2954 new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
2955 if (stmts)
2957 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2958 gcc_assert (!new_bb);
2961 t = NULL_TREE;
2962 t = tree_cons (NULL_TREE, new_name, t);
2963 for (i = 1; i < nunits; i++)
2965 /* Create: new_name_i = new_name + step_expr */
2966 enum tree_code code = POINTER_TYPE_P (scalar_type)
2967 ? POINTER_PLUS_EXPR : PLUS_EXPR;
2968 init_stmt = gimple_build_assign_with_ops (code, new_var,
2969 new_name, step_expr);
2970 new_name = make_ssa_name (new_var, init_stmt);
2971 gimple_assign_set_lhs (init_stmt, new_name);
2973 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
2974 gcc_assert (!new_bb);
2976 if (vect_print_dump_info (REPORT_DETAILS))
2978 fprintf (vect_dump, "created new init_stmt: ");
2979 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
2981 t = tree_cons (NULL_TREE, new_name, t);
2983 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
2984 vec = build_constructor_from_list (vectype, nreverse (t));
2985 vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
2989 /* Create the vector that holds the step of the induction. */
2990 if (nested_in_vect_loop)
2991 /* iv_loop is nested in the loop to be vectorized. Generate:
2992 vec_step = [S, S, S, S] */
2993 new_name = step_expr;
2994 else
2996 /* iv_loop is the loop to be vectorized. Generate:
2997 vec_step = [VF*S, VF*S, VF*S, VF*S] */
2998 expr = build_int_cst (TREE_TYPE (step_expr), vf);
2999 new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3000 expr, step_expr);
3003 t = unshare_expr (new_name);
3004 gcc_assert (CONSTANT_CLASS_P (new_name));
3005 stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3006 gcc_assert (stepvectype);
3007 vec = build_vector_from_val (stepvectype, t);
3008 vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
3011 /* Create the following def-use cycle:
3012 loop prolog:
3013 vec_init = ...
3014 vec_step = ...
3015 loop:
3016 vec_iv = PHI <vec_init, vec_loop>
3018 STMT
3020 vec_loop = vec_iv + vec_step; */
3022 /* Create the induction-phi that defines the induction-operand. */
3023 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3024 add_referenced_var (vec_dest);
3025 induction_phi = create_phi_node (vec_dest, iv_loop->header);
3026 set_vinfo_for_stmt (induction_phi,
3027 new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
3028 induc_def = PHI_RESULT (induction_phi);
3030 /* Create the iv update inside the loop */
3031 new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3032 induc_def, vec_step);
3033 vec_def = make_ssa_name (vec_dest, new_stmt);
3034 gimple_assign_set_lhs (new_stmt, vec_def);
3035 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3036 set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
3037 NULL));
3039 /* Set the arguments of the phi node: */
3040 add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3041 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3042 UNKNOWN_LOCATION);
3045 /* In case that vectorization factor (VF) is bigger than the number
3046 of elements that we can fit in a vectype (nunits), we have to generate
3047 more than one vector stmt - i.e - we need to "unroll" the
3048 vector stmt by a factor VF/nunits. For more details see documentation
3049 in vectorizable_operation. */
3051 if (ncopies > 1)
3053 stmt_vec_info prev_stmt_vinfo;
3054 /* FORNOW. This restriction should be relaxed. */
3055 gcc_assert (!nested_in_vect_loop);
3057 /* Create the vector that holds the step of the induction. */
3058 expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3059 new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3060 expr, step_expr);
3061 t = unshare_expr (new_name);
3062 gcc_assert (CONSTANT_CLASS_P (new_name));
3063 vec = build_vector_from_val (stepvectype, t);
3064 vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
3066 vec_def = induc_def;
3067 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3068 for (i = 1; i < ncopies; i++)
3070 /* vec_i = vec_prev + vec_step */
3071 new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3072 vec_def, vec_step);
3073 vec_def = make_ssa_name (vec_dest, new_stmt);
3074 gimple_assign_set_lhs (new_stmt, vec_def);
3076 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3077 if (!useless_type_conversion_p (resvectype, vectype))
3079 new_stmt = gimple_build_assign_with_ops
3080 (VIEW_CONVERT_EXPR,
3081 vect_get_new_vect_var (resvectype, vect_simple_var,
3082 "vec_iv_"),
3083 build1 (VIEW_CONVERT_EXPR, resvectype,
3084 gimple_assign_lhs (new_stmt)), NULL_TREE);
3085 gimple_assign_set_lhs (new_stmt,
3086 make_ssa_name
3087 (gimple_assign_lhs (new_stmt), new_stmt));
3088 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3090 set_vinfo_for_stmt (new_stmt,
3091 new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3092 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3093 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3097 if (nested_in_vect_loop)
3099 /* Find the loop-closed exit-phi of the induction, and record
3100 the final vector of induction results: */
3101 exit_phi = NULL;
3102 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3104 if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (USE_STMT (use_p))))
3106 exit_phi = USE_STMT (use_p);
3107 break;
3110 if (exit_phi)
3112 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3113 /* FORNOW. Currently not supporting the case that an inner-loop induction
3114 is not used in the outer-loop (i.e. only outside the outer-loop). */
3115 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3116 && !STMT_VINFO_LIVE_P (stmt_vinfo));
3118 STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3119 if (vect_print_dump_info (REPORT_DETAILS))
3121 fprintf (vect_dump, "vector of inductions after inner-loop:");
3122 print_gimple_stmt (vect_dump, new_stmt, 0, TDF_SLIM);
3128 if (vect_print_dump_info (REPORT_DETAILS))
3130 fprintf (vect_dump, "transform induction: created def-use cycle: ");
3131 print_gimple_stmt (vect_dump, induction_phi, 0, TDF_SLIM);
3132 fprintf (vect_dump, "\n");
3133 print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vec_def), 0, TDF_SLIM);
3136 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3137 if (!useless_type_conversion_p (resvectype, vectype))
3139 new_stmt = gimple_build_assign_with_ops
3140 (VIEW_CONVERT_EXPR,
3141 vect_get_new_vect_var (resvectype, vect_simple_var, "vec_iv_"),
3142 build1 (VIEW_CONVERT_EXPR, resvectype, induc_def), NULL_TREE);
3143 induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3144 gimple_assign_set_lhs (new_stmt, induc_def);
3145 si = gsi_start_bb (bb);
3146 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3147 set_vinfo_for_stmt (new_stmt,
3148 new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3149 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3150 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3153 return induc_def;
3157 /* Function get_initial_def_for_reduction
3159 Input:
3160 STMT - a stmt that performs a reduction operation in the loop.
3161 INIT_VAL - the initial value of the reduction variable
3163 Output:
3164 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3165 of the reduction (used for adjusting the epilog - see below).
3166 Return a vector variable, initialized according to the operation that STMT
3167 performs. This vector will be used as the initial value of the
3168 vector of partial results.
3170 Option1 (adjust in epilog): Initialize the vector as follows:
3171 add/bit or/xor: [0,0,...,0,0]
3172 mult/bit and: [1,1,...,1,1]
3173 min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3174 and when necessary (e.g. add/mult case) let the caller know
3175 that it needs to adjust the result by init_val.
3177 Option2: Initialize the vector as follows:
3178 add/bit or/xor: [init_val,0,0,...,0]
3179 mult/bit and: [init_val,1,1,...,1]
3180 min/max/cond_expr: [init_val,init_val,...,init_val]
3181 and no adjustments are needed.
3183 For example, for the following code:
3185 s = init_val;
3186 for (i=0;i<n;i++)
3187 s = s + a[i];
3189 STMT is 's = s + a[i]', and the reduction variable is 's'.
3190 For a vector of 4 units, we want to return either [0,0,0,init_val],
3191 or [0,0,0,0] and let the caller know that it needs to adjust
3192 the result at the end by 'init_val'.
3194 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3195 initialization vector is simpler (same element in all entries), if
3196 ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3198 A cost model should help decide between these two schemes. */
3200 tree
3201 get_initial_def_for_reduction (gimple stmt, tree init_val,
3202 tree *adjustment_def)
3204 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3205 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3206 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3207 tree scalar_type = TREE_TYPE (init_val);
3208 tree vectype = get_vectype_for_scalar_type (scalar_type);
3209 int nunits;
3210 enum tree_code code = gimple_assign_rhs_code (stmt);
3211 tree def_for_init;
3212 tree init_def;
3213 tree t = NULL_TREE;
3214 int i;
3215 bool nested_in_vect_loop = false;
3216 tree init_value;
3217 REAL_VALUE_TYPE real_init_val = dconst0;
3218 int int_init_val = 0;
3219 gimple def_stmt = NULL;
3221 gcc_assert (vectype);
3222 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3224 gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3225 || SCALAR_FLOAT_TYPE_P (scalar_type));
3227 if (nested_in_vect_loop_p (loop, stmt))
3228 nested_in_vect_loop = true;
3229 else
3230 gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3232 /* In case of double reduction we only create a vector variable to be put
3233 in the reduction phi node. The actual statement creation is done in
3234 vect_create_epilog_for_reduction. */
3235 if (adjustment_def && nested_in_vect_loop
3236 && TREE_CODE (init_val) == SSA_NAME
3237 && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3238 && gimple_code (def_stmt) == GIMPLE_PHI
3239 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3240 && vinfo_for_stmt (def_stmt)
3241 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3242 == vect_double_reduction_def)
3244 *adjustment_def = NULL;
3245 return vect_create_destination_var (init_val, vectype);
3248 if (TREE_CONSTANT (init_val))
3250 if (SCALAR_FLOAT_TYPE_P (scalar_type))
3251 init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3252 else
3253 init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3255 else
3256 init_value = init_val;
3258 switch (code)
3260 case WIDEN_SUM_EXPR:
3261 case DOT_PROD_EXPR:
3262 case PLUS_EXPR:
3263 case MINUS_EXPR:
3264 case BIT_IOR_EXPR:
3265 case BIT_XOR_EXPR:
3266 case MULT_EXPR:
3267 case BIT_AND_EXPR:
3268 /* ADJUSMENT_DEF is NULL when called from
3269 vect_create_epilog_for_reduction to vectorize double reduction. */
3270 if (adjustment_def)
3272 if (nested_in_vect_loop)
3273 *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
3274 NULL);
3275 else
3276 *adjustment_def = init_val;
3279 if (code == MULT_EXPR)
3281 real_init_val = dconst1;
3282 int_init_val = 1;
3285 if (code == BIT_AND_EXPR)
3286 int_init_val = -1;
3288 if (SCALAR_FLOAT_TYPE_P (scalar_type))
3289 def_for_init = build_real (scalar_type, real_init_val);
3290 else
3291 def_for_init = build_int_cst (scalar_type, int_init_val);
3293 /* Create a vector of '0' or '1' except the first element. */
3294 for (i = nunits - 2; i >= 0; --i)
3295 t = tree_cons (NULL_TREE, def_for_init, t);
3297 /* Option1: the first element is '0' or '1' as well. */
3298 if (adjustment_def)
3300 t = tree_cons (NULL_TREE, def_for_init, t);
3301 init_def = build_vector (vectype, t);
3302 break;
3305 /* Option2: the first element is INIT_VAL. */
3306 t = tree_cons (NULL_TREE, init_value, t);
3307 if (TREE_CONSTANT (init_val))
3308 init_def = build_vector (vectype, t);
3309 else
3310 init_def = build_constructor_from_list (vectype, t);
3312 break;
3314 case MIN_EXPR:
3315 case MAX_EXPR:
3316 case COND_EXPR:
3317 if (adjustment_def)
3319 *adjustment_def = NULL_TREE;
3320 init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3321 break;
3324 init_def = build_vector_from_val (vectype, init_value);
3325 break;
3327 default:
3328 gcc_unreachable ();
3331 return init_def;
3335 /* Function vect_create_epilog_for_reduction
3337 Create code at the loop-epilog to finalize the result of a reduction
3338 computation.
3340 VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3341 reduction statements.
3342 STMT is the scalar reduction stmt that is being vectorized.
3343 NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3344 number of elements that we can fit in a vectype (nunits). In this case
3345 we have to generate more than one vector stmt - i.e - we need to "unroll"
3346 the vector stmt by a factor VF/nunits. For more details see documentation
3347 in vectorizable_operation.
3348 REDUC_CODE is the tree-code for the epilog reduction.
3349 REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3350 computation.
3351 REDUC_INDEX is the index of the operand in the right hand side of the
3352 statement that is defined by REDUCTION_PHI.
3353 DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3354 SLP_NODE is an SLP node containing a group of reduction statements. The
3355 first one in this group is STMT.
3357 This function:
3358 1. Creates the reduction def-use cycles: sets the arguments for
3359 REDUCTION_PHIS:
3360 The loop-entry argument is the vectorized initial-value of the reduction.
3361 The loop-latch argument is taken from VECT_DEFS - the vector of partial
3362 sums.
3363 2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3364 by applying the operation specified by REDUC_CODE if available, or by
3365 other means (whole-vector shifts or a scalar loop).
3366 The function also creates a new phi node at the loop exit to preserve
3367 loop-closed form, as illustrated below.
3369 The flow at the entry to this function:
3371 loop:
3372 vec_def = phi <null, null> # REDUCTION_PHI
3373 VECT_DEF = vector_stmt # vectorized form of STMT
3374 s_loop = scalar_stmt # (scalar) STMT
3375 loop_exit:
3376 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
3377 use <s_out0>
3378 use <s_out0>
3380 The above is transformed by this function into:
3382 loop:
3383 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
3384 VECT_DEF = vector_stmt # vectorized form of STMT
3385 s_loop = scalar_stmt # (scalar) STMT
3386 loop_exit:
3387 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
3388 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
3389 v_out2 = reduce <v_out1>
3390 s_out3 = extract_field <v_out2, 0>
3391 s_out4 = adjust_result <s_out3>
3392 use <s_out4>
3393 use <s_out4>
3396 static void
3397 vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
3398 int ncopies, enum tree_code reduc_code,
3399 VEC (gimple, heap) *reduction_phis,
3400 int reduc_index, bool double_reduc,
3401 slp_tree slp_node)
3403 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3404 stmt_vec_info prev_phi_info;
3405 tree vectype;
3406 enum machine_mode mode;
3407 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3408 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
3409 basic_block exit_bb;
3410 tree scalar_dest;
3411 tree scalar_type;
3412 gimple new_phi = NULL, phi;
3413 gimple_stmt_iterator exit_gsi;
3414 tree vec_dest;
3415 tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
3416 gimple epilog_stmt = NULL;
3417 enum tree_code code = gimple_assign_rhs_code (stmt);
3418 gimple exit_phi;
3419 tree bitsize, bitpos;
3420 tree adjustment_def = NULL;
3421 tree vec_initial_def = NULL;
3422 tree reduction_op, expr, def;
3423 tree orig_name, scalar_result;
3424 imm_use_iterator imm_iter, phi_imm_iter;
3425 use_operand_p use_p, phi_use_p;
3426 bool extract_scalar_result = false;
3427 gimple use_stmt, orig_stmt, reduction_phi = NULL;
3428 bool nested_in_vect_loop = false;
3429 VEC (gimple, heap) *new_phis = NULL;
3430 enum vect_def_type dt = vect_unknown_def_type;
3431 int j, i;
3432 VEC (tree, heap) *scalar_results = NULL;
3433 unsigned int group_size = 1, k, ratio;
3434 VEC (tree, heap) *vec_initial_defs = NULL;
3435 VEC (gimple, heap) *phis;
3436 bool slp_reduc = false;
3437 tree new_phi_result;
3439 if (slp_node)
3440 group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (slp_node));
3442 if (nested_in_vect_loop_p (loop, stmt))
3444 outer_loop = loop;
3445 loop = loop->inner;
3446 nested_in_vect_loop = true;
3447 gcc_assert (!slp_node);
3450 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3452 case GIMPLE_SINGLE_RHS:
3453 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3454 == ternary_op);
3455 reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3456 break;
3457 case GIMPLE_UNARY_RHS:
3458 reduction_op = gimple_assign_rhs1 (stmt);
3459 break;
3460 case GIMPLE_BINARY_RHS:
3461 reduction_op = reduc_index ?
3462 gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
3463 break;
3464 case GIMPLE_TERNARY_RHS:
3465 reduction_op = gimple_op (stmt, reduc_index + 1);
3466 break;
3467 default:
3468 gcc_unreachable ();
3471 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3472 gcc_assert (vectype);
3473 mode = TYPE_MODE (vectype);
3475 /* 1. Create the reduction def-use cycle:
3476 Set the arguments of REDUCTION_PHIS, i.e., transform
3478 loop:
3479 vec_def = phi <null, null> # REDUCTION_PHI
3480 VECT_DEF = vector_stmt # vectorized form of STMT
3483 into:
3485 loop:
3486 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
3487 VECT_DEF = vector_stmt # vectorized form of STMT
3490 (in case of SLP, do it for all the phis). */
3492 /* Get the loop-entry arguments. */
3493 if (slp_node)
3494 vect_get_slp_defs (reduction_op, NULL_TREE, slp_node, &vec_initial_defs,
3495 NULL, reduc_index);
3496 else
3498 vec_initial_defs = VEC_alloc (tree, heap, 1);
3499 /* For the case of reduction, vect_get_vec_def_for_operand returns
3500 the scalar def before the loop, that defines the initial value
3501 of the reduction variable. */
3502 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
3503 &adjustment_def);
3504 VEC_quick_push (tree, vec_initial_defs, vec_initial_def);
3507 /* Set phi nodes arguments. */
3508 FOR_EACH_VEC_ELT (gimple, reduction_phis, i, phi)
3510 tree vec_init_def = VEC_index (tree, vec_initial_defs, i);
3511 tree def = VEC_index (tree, vect_defs, i);
3512 for (j = 0; j < ncopies; j++)
3514 /* Set the loop-entry arg of the reduction-phi. */
3515 add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
3516 UNKNOWN_LOCATION);
3518 /* Set the loop-latch arg for the reduction-phi. */
3519 if (j > 0)
3520 def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
3522 add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
3524 if (vect_print_dump_info (REPORT_DETAILS))
3526 fprintf (vect_dump, "transform reduction: created def-use"
3527 " cycle: ");
3528 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
3529 fprintf (vect_dump, "\n");
3530 print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0,
3531 TDF_SLIM);
3534 phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3538 VEC_free (tree, heap, vec_initial_defs);
3540 /* 2. Create epilog code.
3541 The reduction epilog code operates across the elements of the vector
3542 of partial results computed by the vectorized loop.
3543 The reduction epilog code consists of:
3545 step 1: compute the scalar result in a vector (v_out2)
3546 step 2: extract the scalar result (s_out3) from the vector (v_out2)
3547 step 3: adjust the scalar result (s_out3) if needed.
3549 Step 1 can be accomplished using one the following three schemes:
3550 (scheme 1) using reduc_code, if available.
3551 (scheme 2) using whole-vector shifts, if available.
3552 (scheme 3) using a scalar loop. In this case steps 1+2 above are
3553 combined.
3555 The overall epilog code looks like this:
3557 s_out0 = phi <s_loop> # original EXIT_PHI
3558 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
3559 v_out2 = reduce <v_out1> # step 1
3560 s_out3 = extract_field <v_out2, 0> # step 2
3561 s_out4 = adjust_result <s_out3> # step 3
3563 (step 3 is optional, and steps 1 and 2 may be combined).
3564 Lastly, the uses of s_out0 are replaced by s_out4. */
3567 /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
3568 v_out1 = phi <VECT_DEF>
3569 Store them in NEW_PHIS. */
3571 exit_bb = single_exit (loop)->dest;
3572 prev_phi_info = NULL;
3573 new_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
3574 FOR_EACH_VEC_ELT (tree, vect_defs, i, def)
3576 for (j = 0; j < ncopies; j++)
3578 phi = create_phi_node (SSA_NAME_VAR (def), exit_bb);
3579 set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
3580 if (j == 0)
3581 VEC_quick_push (gimple, new_phis, phi);
3582 else
3584 def = vect_get_vec_def_for_stmt_copy (dt, def);
3585 STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
3588 SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
3589 prev_phi_info = vinfo_for_stmt (phi);
3593 /* The epilogue is created for the outer-loop, i.e., for the loop being
3594 vectorized. */
3595 if (double_reduc)
3597 loop = outer_loop;
3598 exit_bb = single_exit (loop)->dest;
3601 exit_gsi = gsi_after_labels (exit_bb);
3603 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
3604 (i.e. when reduc_code is not available) and in the final adjustment
3605 code (if needed). Also get the original scalar reduction variable as
3606 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
3607 represents a reduction pattern), the tree-code and scalar-def are
3608 taken from the original stmt that the pattern-stmt (STMT) replaces.
3609 Otherwise (it is a regular reduction) - the tree-code and scalar-def
3610 are taken from STMT. */
3612 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3613 if (!orig_stmt)
3615 /* Regular reduction */
3616 orig_stmt = stmt;
3618 else
3620 /* Reduction pattern */
3621 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
3622 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
3623 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
3626 code = gimple_assign_rhs_code (orig_stmt);
3627 /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
3628 partial results are added and not subtracted. */
3629 if (code == MINUS_EXPR)
3630 code = PLUS_EXPR;
3632 scalar_dest = gimple_assign_lhs (orig_stmt);
3633 scalar_type = TREE_TYPE (scalar_dest);
3634 scalar_results = VEC_alloc (tree, heap, group_size);
3635 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
3636 bitsize = TYPE_SIZE (scalar_type);
3638 /* In case this is a reduction in an inner-loop while vectorizing an outer
3639 loop - we don't need to extract a single scalar result at the end of the
3640 inner-loop (unless it is double reduction, i.e., the use of reduction is
3641 outside the outer-loop). The final vector of partial results will be used
3642 in the vectorized outer-loop, or reduced to a scalar result at the end of
3643 the outer-loop. */
3644 if (nested_in_vect_loop && !double_reduc)
3645 goto vect_finalize_reduction;
3647 /* SLP reduction without reduction chain, e.g.,
3648 # a1 = phi <a2, a0>
3649 # b1 = phi <b2, b0>
3650 a2 = operation (a1)
3651 b2 = operation (b1) */
3652 slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
3654 /* In case of reduction chain, e.g.,
3655 # a1 = phi <a3, a0>
3656 a2 = operation (a1)
3657 a3 = operation (a2),
3659 we may end up with more than one vector result. Here we reduce them to
3660 one vector. */
3661 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
3663 tree first_vect = PHI_RESULT (VEC_index (gimple, new_phis, 0));
3664 tree tmp;
3666 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3667 for (k = 1; k < VEC_length (gimple, new_phis); k++)
3669 gimple next_phi = VEC_index (gimple, new_phis, k);
3670 tree second_vect = PHI_RESULT (next_phi);
3671 gimple new_vec_stmt;
3673 tmp = build2 (code, vectype, first_vect, second_vect);
3674 new_vec_stmt = gimple_build_assign (vec_dest, tmp);
3675 first_vect = make_ssa_name (vec_dest, new_vec_stmt);
3676 gimple_assign_set_lhs (new_vec_stmt, first_vect);
3677 gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
3680 new_phi_result = first_vect;
3682 else
3683 new_phi_result = PHI_RESULT (VEC_index (gimple, new_phis, 0));
3685 /* 2.3 Create the reduction code, using one of the three schemes described
3686 above. In SLP we simply need to extract all the elements from the
3687 vector (without reducing them), so we use scalar shifts. */
3688 if (reduc_code != ERROR_MARK && !slp_reduc)
3690 tree tmp;
3692 /*** Case 1: Create:
3693 v_out2 = reduc_expr <v_out1> */
3695 if (vect_print_dump_info (REPORT_DETAILS))
3696 fprintf (vect_dump, "Reduce using direct vector reduction.");
3698 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3699 tmp = build1 (reduc_code, vectype, new_phi_result);
3700 epilog_stmt = gimple_build_assign (vec_dest, tmp);
3701 new_temp = make_ssa_name (vec_dest, epilog_stmt);
3702 gimple_assign_set_lhs (epilog_stmt, new_temp);
3703 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3705 extract_scalar_result = true;
3707 else
3709 enum tree_code shift_code = ERROR_MARK;
3710 bool have_whole_vector_shift = true;
3711 int bit_offset;
3712 int element_bitsize = tree_low_cst (bitsize, 1);
3713 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3714 tree vec_temp;
3716 if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3717 shift_code = VEC_RSHIFT_EXPR;
3718 else
3719 have_whole_vector_shift = false;
3721 /* Regardless of whether we have a whole vector shift, if we're
3722 emulating the operation via tree-vect-generic, we don't want
3723 to use it. Only the first round of the reduction is likely
3724 to still be profitable via emulation. */
3725 /* ??? It might be better to emit a reduction tree code here, so that
3726 tree-vect-generic can expand the first round via bit tricks. */
3727 if (!VECTOR_MODE_P (mode))
3728 have_whole_vector_shift = false;
3729 else
3731 optab optab = optab_for_tree_code (code, vectype, optab_default);
3732 if (optab_handler (optab, mode) == CODE_FOR_nothing)
3733 have_whole_vector_shift = false;
3736 if (have_whole_vector_shift && !slp_reduc)
3738 /*** Case 2: Create:
3739 for (offset = VS/2; offset >= element_size; offset/=2)
3741 Create: va' = vec_shift <va, offset>
3742 Create: va = vop <va, va'>
3743 } */
3745 if (vect_print_dump_info (REPORT_DETAILS))
3746 fprintf (vect_dump, "Reduce using vector shifts");
3748 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3749 new_temp = new_phi_result;
3750 for (bit_offset = vec_size_in_bits/2;
3751 bit_offset >= element_bitsize;
3752 bit_offset /= 2)
3754 tree bitpos = size_int (bit_offset);
3756 epilog_stmt = gimple_build_assign_with_ops (shift_code,
3757 vec_dest, new_temp, bitpos);
3758 new_name = make_ssa_name (vec_dest, epilog_stmt);
3759 gimple_assign_set_lhs (epilog_stmt, new_name);
3760 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3762 epilog_stmt = gimple_build_assign_with_ops (code, vec_dest,
3763 new_name, new_temp);
3764 new_temp = make_ssa_name (vec_dest, epilog_stmt);
3765 gimple_assign_set_lhs (epilog_stmt, new_temp);
3766 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3769 extract_scalar_result = true;
3771 else
3773 tree rhs;
3775 /*** Case 3: Create:
3776 s = extract_field <v_out2, 0>
3777 for (offset = element_size;
3778 offset < vector_size;
3779 offset += element_size;)
3781 Create: s' = extract_field <v_out2, offset>
3782 Create: s = op <s, s'> // For non SLP cases
3783 } */
3785 if (vect_print_dump_info (REPORT_DETAILS))
3786 fprintf (vect_dump, "Reduce using scalar code. ");
3788 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3789 FOR_EACH_VEC_ELT (gimple, new_phis, i, new_phi)
3791 vec_temp = PHI_RESULT (new_phi);
3792 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
3793 bitsize_zero_node);
3794 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3795 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3796 gimple_assign_set_lhs (epilog_stmt, new_temp);
3797 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3799 /* In SLP we don't need to apply reduction operation, so we just
3800 collect s' values in SCALAR_RESULTS. */
3801 if (slp_reduc)
3802 VEC_safe_push (tree, heap, scalar_results, new_temp);
3804 for (bit_offset = element_bitsize;
3805 bit_offset < vec_size_in_bits;
3806 bit_offset += element_bitsize)
3808 tree bitpos = bitsize_int (bit_offset);
3809 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
3810 bitsize, bitpos);
3812 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3813 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
3814 gimple_assign_set_lhs (epilog_stmt, new_name);
3815 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3817 if (slp_reduc)
3819 /* In SLP we don't need to apply reduction operation, so
3820 we just collect s' values in SCALAR_RESULTS. */
3821 new_temp = new_name;
3822 VEC_safe_push (tree, heap, scalar_results, new_name);
3824 else
3826 epilog_stmt = gimple_build_assign_with_ops (code,
3827 new_scalar_dest, new_name, new_temp);
3828 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3829 gimple_assign_set_lhs (epilog_stmt, new_temp);
3830 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3835 /* The only case where we need to reduce scalar results in SLP, is
3836 unrolling. If the size of SCALAR_RESULTS is greater than
3837 GROUP_SIZE, we reduce them combining elements modulo
3838 GROUP_SIZE. */
3839 if (slp_reduc)
3841 tree res, first_res, new_res;
3842 gimple new_stmt;
3844 /* Reduce multiple scalar results in case of SLP unrolling. */
3845 for (j = group_size; VEC_iterate (tree, scalar_results, j, res);
3846 j++)
3848 first_res = VEC_index (tree, scalar_results, j % group_size);
3849 new_stmt = gimple_build_assign_with_ops (code,
3850 new_scalar_dest, first_res, res);
3851 new_res = make_ssa_name (new_scalar_dest, new_stmt);
3852 gimple_assign_set_lhs (new_stmt, new_res);
3853 gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
3854 VEC_replace (tree, scalar_results, j % group_size, new_res);
3857 else
3858 /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
3859 VEC_safe_push (tree, heap, scalar_results, new_temp);
3861 extract_scalar_result = false;
3865 /* 2.4 Extract the final scalar result. Create:
3866 s_out3 = extract_field <v_out2, bitpos> */
3868 if (extract_scalar_result)
3870 tree rhs;
3872 if (vect_print_dump_info (REPORT_DETAILS))
3873 fprintf (vect_dump, "extract scalar result");
3875 if (BYTES_BIG_ENDIAN)
3876 bitpos = size_binop (MULT_EXPR,
3877 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
3878 TYPE_SIZE (scalar_type));
3879 else
3880 bitpos = bitsize_zero_node;
3882 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
3883 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3884 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3885 gimple_assign_set_lhs (epilog_stmt, new_temp);
3886 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3887 VEC_safe_push (tree, heap, scalar_results, new_temp);
3890 vect_finalize_reduction:
3892 if (double_reduc)
3893 loop = loop->inner;
3895 /* 2.5 Adjust the final result by the initial value of the reduction
3896 variable. (When such adjustment is not needed, then
3897 'adjustment_def' is zero). For example, if code is PLUS we create:
3898 new_temp = loop_exit_def + adjustment_def */
3900 if (adjustment_def)
3902 gcc_assert (!slp_reduc);
3903 if (nested_in_vect_loop)
3905 new_phi = VEC_index (gimple, new_phis, 0);
3906 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
3907 expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
3908 new_dest = vect_create_destination_var (scalar_dest, vectype);
3910 else
3912 new_temp = VEC_index (tree, scalar_results, 0);
3913 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
3914 expr = build2 (code, scalar_type, new_temp, adjustment_def);
3915 new_dest = vect_create_destination_var (scalar_dest, scalar_type);
3918 epilog_stmt = gimple_build_assign (new_dest, expr);
3919 new_temp = make_ssa_name (new_dest, epilog_stmt);
3920 gimple_assign_set_lhs (epilog_stmt, new_temp);
3921 SSA_NAME_DEF_STMT (new_temp) = epilog_stmt;
3922 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3923 if (nested_in_vect_loop)
3925 set_vinfo_for_stmt (epilog_stmt,
3926 new_stmt_vec_info (epilog_stmt, loop_vinfo,
3927 NULL));
3928 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
3929 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
3931 if (!double_reduc)
3932 VEC_quick_push (tree, scalar_results, new_temp);
3933 else
3934 VEC_replace (tree, scalar_results, 0, new_temp);
3936 else
3937 VEC_replace (tree, scalar_results, 0, new_temp);
3939 VEC_replace (gimple, new_phis, 0, epilog_stmt);
3942 /* 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit
3943 phis with new adjusted scalar results, i.e., replace use <s_out0>
3944 with use <s_out4>.
3946 Transform:
3947 loop_exit:
3948 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
3949 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
3950 v_out2 = reduce <v_out1>
3951 s_out3 = extract_field <v_out2, 0>
3952 s_out4 = adjust_result <s_out3>
3953 use <s_out0>
3954 use <s_out0>
3956 into:
3958 loop_exit:
3959 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
3960 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
3961 v_out2 = reduce <v_out1>
3962 s_out3 = extract_field <v_out2, 0>
3963 s_out4 = adjust_result <s_out3>
3964 use <s_out4>
3965 use <s_out4> */
3968 /* In SLP reduction chain we reduce vector results into one vector if
3969 necessary, hence we set here GROUP_SIZE to 1. SCALAR_DEST is the LHS of
3970 the last stmt in the reduction chain, since we are looking for the loop
3971 exit phi node. */
3972 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
3974 scalar_dest = gimple_assign_lhs (VEC_index (gimple,
3975 SLP_TREE_SCALAR_STMTS (slp_node),
3976 group_size - 1));
3977 group_size = 1;
3980 /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
3981 case that GROUP_SIZE is greater than vectorization factor). Therefore, we
3982 need to match SCALAR_RESULTS with corresponding statements. The first
3983 (GROUP_SIZE / number of new vector stmts) scalar results correspond to
3984 the first vector stmt, etc.
3985 (RATIO is equal to (GROUP_SIZE / number of new vector stmts)). */
3986 if (group_size > VEC_length (gimple, new_phis))
3988 ratio = group_size / VEC_length (gimple, new_phis);
3989 gcc_assert (!(group_size % VEC_length (gimple, new_phis)));
3991 else
3992 ratio = 1;
3994 for (k = 0; k < group_size; k++)
3996 if (k % ratio == 0)
3998 epilog_stmt = VEC_index (gimple, new_phis, k / ratio);
3999 reduction_phi = VEC_index (gimple, reduction_phis, k / ratio);
4002 if (slp_reduc)
4004 gimple current_stmt = VEC_index (gimple,
4005 SLP_TREE_SCALAR_STMTS (slp_node), k);
4007 orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4008 /* SLP statements can't participate in patterns. */
4009 gcc_assert (!orig_stmt);
4010 scalar_dest = gimple_assign_lhs (current_stmt);
4013 phis = VEC_alloc (gimple, heap, 3);
4014 /* Find the loop-closed-use at the loop exit of the original scalar
4015 result. (The reduction result is expected to have two immediate uses -
4016 one at the latch block, and one at the loop exit). */
4017 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4018 if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4019 VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
4021 /* We expect to have found an exit_phi because of loop-closed-ssa
4022 form. */
4023 gcc_assert (!VEC_empty (gimple, phis));
4025 FOR_EACH_VEC_ELT (gimple, phis, i, exit_phi)
4027 if (outer_loop)
4029 stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4030 gimple vect_phi;
4032 /* FORNOW. Currently not supporting the case that an inner-loop
4033 reduction is not used in the outer-loop (but only outside the
4034 outer-loop), unless it is double reduction. */
4035 gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4036 && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4037 || double_reduc);
4039 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4040 if (!double_reduc
4041 || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4042 != vect_double_reduction_def)
4043 continue;
4045 /* Handle double reduction:
4047 stmt1: s1 = phi <s0, s2> - double reduction phi (outer loop)
4048 stmt2: s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4049 stmt3: s4 = use (s3) - (regular) reduc stmt (inner loop)
4050 stmt4: s2 = phi <s4> - double reduction stmt (outer loop)
4052 At that point the regular reduction (stmt2 and stmt3) is
4053 already vectorized, as well as the exit phi node, stmt4.
4054 Here we vectorize the phi node of double reduction, stmt1, and
4055 update all relevant statements. */
4057 /* Go through all the uses of s2 to find double reduction phi
4058 node, i.e., stmt1 above. */
4059 orig_name = PHI_RESULT (exit_phi);
4060 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4062 stmt_vec_info use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4063 stmt_vec_info new_phi_vinfo;
4064 tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4065 basic_block bb = gimple_bb (use_stmt);
4066 gimple use;
4068 /* Check that USE_STMT is really double reduction phi
4069 node. */
4070 if (gimple_code (use_stmt) != GIMPLE_PHI
4071 || gimple_phi_num_args (use_stmt) != 2
4072 || !use_stmt_vinfo
4073 || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4074 != vect_double_reduction_def
4075 || bb->loop_father != outer_loop)
4076 continue;
4078 /* Create vector phi node for double reduction:
4079 vs1 = phi <vs0, vs2>
4080 vs1 was created previously in this function by a call to
4081 vect_get_vec_def_for_operand and is stored in
4082 vec_initial_def;
4083 vs2 is defined by EPILOG_STMT, the vectorized EXIT_PHI;
4084 vs0 is created here. */
4086 /* Create vector phi node. */
4087 vect_phi = create_phi_node (vec_initial_def, bb);
4088 new_phi_vinfo = new_stmt_vec_info (vect_phi,
4089 loop_vec_info_for_loop (outer_loop), NULL);
4090 set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4092 /* Create vs0 - initial def of the double reduction phi. */
4093 preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4094 loop_preheader_edge (outer_loop));
4095 init_def = get_initial_def_for_reduction (stmt,
4096 preheader_arg, NULL);
4097 vect_phi_init = vect_init_vector (use_stmt, init_def,
4098 vectype, NULL);
4100 /* Update phi node arguments with vs0 and vs2. */
4101 add_phi_arg (vect_phi, vect_phi_init,
4102 loop_preheader_edge (outer_loop),
4103 UNKNOWN_LOCATION);
4104 add_phi_arg (vect_phi, PHI_RESULT (epilog_stmt),
4105 loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
4106 if (vect_print_dump_info (REPORT_DETAILS))
4108 fprintf (vect_dump, "created double reduction phi "
4109 "node: ");
4110 print_gimple_stmt (vect_dump, vect_phi, 0, TDF_SLIM);
4113 vect_phi_res = PHI_RESULT (vect_phi);
4115 /* Replace the use, i.e., set the correct vs1 in the regular
4116 reduction phi node. FORNOW, NCOPIES is always 1, so the
4117 loop is redundant. */
4118 use = reduction_phi;
4119 for (j = 0; j < ncopies; j++)
4121 edge pr_edge = loop_preheader_edge (loop);
4122 SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
4123 use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
4129 VEC_free (gimple, heap, phis);
4130 if (nested_in_vect_loop)
4132 if (double_reduc)
4133 loop = outer_loop;
4134 else
4135 continue;
4138 phis = VEC_alloc (gimple, heap, 3);
4139 /* Find the loop-closed-use at the loop exit of the original scalar
4140 result. (The reduction result is expected to have two immediate uses,
4141 one at the latch block, and one at the loop exit). For double
4142 reductions we are looking for exit phis of the outer loop. */
4143 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4145 if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4146 VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
4147 else
4149 if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
4151 tree phi_res = PHI_RESULT (USE_STMT (use_p));
4153 FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
4155 if (!flow_bb_inside_loop_p (loop,
4156 gimple_bb (USE_STMT (phi_use_p))))
4157 VEC_safe_push (gimple, heap, phis,
4158 USE_STMT (phi_use_p));
4164 FOR_EACH_VEC_ELT (gimple, phis, i, exit_phi)
4166 /* Replace the uses: */
4167 orig_name = PHI_RESULT (exit_phi);
4168 scalar_result = VEC_index (tree, scalar_results, k);
4169 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4170 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4171 SET_USE (use_p, scalar_result);
4174 VEC_free (gimple, heap, phis);
4177 VEC_free (tree, heap, scalar_results);
4178 VEC_free (gimple, heap, new_phis);
4182 /* Function vectorizable_reduction.
4184 Check if STMT performs a reduction operation that can be vectorized.
4185 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4186 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4187 Return FALSE if not a vectorizable STMT, TRUE otherwise.
4189 This function also handles reduction idioms (patterns) that have been
4190 recognized in advance during vect_pattern_recog. In this case, STMT may be
4191 of this form:
4192 X = pattern_expr (arg0, arg1, ..., X)
4193 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4194 sequence that had been detected and replaced by the pattern-stmt (STMT).
4196 In some cases of reduction patterns, the type of the reduction variable X is
4197 different than the type of the other arguments of STMT.
4198 In such cases, the vectype that is used when transforming STMT into a vector
4199 stmt is different than the vectype that is used to determine the
4200 vectorization factor, because it consists of a different number of elements
4201 than the actual number of elements that are being operated upon in parallel.
4203 For example, consider an accumulation of shorts into an int accumulator.
4204 On some targets it's possible to vectorize this pattern operating on 8
4205 shorts at a time (hence, the vectype for purposes of determining the
4206 vectorization factor should be V8HI); on the other hand, the vectype that
4207 is used to create the vector form is actually V4SI (the type of the result).
4209 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4210 indicates what is the actual level of parallelism (V8HI in the example), so
4211 that the right vectorization factor would be derived. This vectype
4212 corresponds to the type of arguments to the reduction stmt, and should *NOT*
4213 be used to create the vectorized stmt. The right vectype for the vectorized
4214 stmt is obtained from the type of the result X:
4215 get_vectype_for_scalar_type (TREE_TYPE (X))
4217 This means that, contrary to "regular" reductions (or "regular" stmts in
4218 general), the following equation:
4219 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4220 does *NOT* necessarily hold for reduction patterns. */
4222 bool
4223 vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
4224 gimple *vec_stmt, slp_tree slp_node)
4226 tree vec_dest;
4227 tree scalar_dest;
4228 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
4229 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4230 tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4231 tree vectype_in = NULL_TREE;
4232 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4233 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4234 enum tree_code code, orig_code, epilog_reduc_code;
4235 enum machine_mode vec_mode;
4236 int op_type;
4237 optab optab, reduc_optab;
4238 tree new_temp = NULL_TREE;
4239 tree def;
4240 gimple def_stmt;
4241 enum vect_def_type dt;
4242 gimple new_phi = NULL;
4243 tree scalar_type;
4244 bool is_simple_use;
4245 gimple orig_stmt;
4246 stmt_vec_info orig_stmt_info;
4247 tree expr = NULL_TREE;
4248 int i;
4249 int ncopies;
4250 int epilog_copies;
4251 stmt_vec_info prev_stmt_info, prev_phi_info;
4252 bool single_defuse_cycle = false;
4253 tree reduc_def = NULL_TREE;
4254 gimple new_stmt = NULL;
4255 int j;
4256 tree ops[3];
4257 bool nested_cycle = false, found_nested_cycle_def = false;
4258 gimple reduc_def_stmt = NULL;
4259 /* The default is that the reduction variable is the last in statement. */
4260 int reduc_index = 2;
4261 bool double_reduc = false, dummy;
4262 basic_block def_bb;
4263 struct loop * def_stmt_loop, *outer_loop = NULL;
4264 tree def_arg;
4265 gimple def_arg_stmt;
4266 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL;
4267 VEC (gimple, heap) *phis = NULL;
4268 int vec_num;
4269 tree def0, def1, tem;
4271 /* In case of reduction chain we switch to the first stmt in the chain, but
4272 we don't update STMT_INFO, since only the last stmt is marked as reduction
4273 and has reduction properties. */
4274 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4275 stmt = GROUP_FIRST_ELEMENT (stmt_info);
4277 if (nested_in_vect_loop_p (loop, stmt))
4279 outer_loop = loop;
4280 loop = loop->inner;
4281 nested_cycle = true;
4284 /* 1. Is vectorizable reduction? */
4285 /* Not supportable if the reduction variable is used in the loop, unless
4286 it's a reduction chain. */
4287 if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
4288 && !GROUP_FIRST_ELEMENT (stmt_info))
4289 return false;
4291 /* Reductions that are not used even in an enclosing outer-loop,
4292 are expected to be "live" (used out of the loop). */
4293 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
4294 && !STMT_VINFO_LIVE_P (stmt_info))
4295 return false;
4297 /* Make sure it was already recognized as a reduction computation. */
4298 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
4299 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
4300 return false;
4302 /* 2. Has this been recognized as a reduction pattern?
4304 Check if STMT represents a pattern that has been recognized
4305 in earlier analysis stages. For stmts that represent a pattern,
4306 the STMT_VINFO_RELATED_STMT field records the last stmt in
4307 the original sequence that constitutes the pattern. */
4309 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4310 if (orig_stmt)
4312 orig_stmt_info = vinfo_for_stmt (orig_stmt);
4313 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
4314 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4315 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4318 /* 3. Check the operands of the operation. The first operands are defined
4319 inside the loop body. The last operand is the reduction variable,
4320 which is defined by the loop-header-phi. */
4322 gcc_assert (is_gimple_assign (stmt));
4324 /* Flatten RHS. */
4325 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4327 case GIMPLE_SINGLE_RHS:
4328 op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4329 if (op_type == ternary_op)
4331 tree rhs = gimple_assign_rhs1 (stmt);
4332 ops[0] = TREE_OPERAND (rhs, 0);
4333 ops[1] = TREE_OPERAND (rhs, 1);
4334 ops[2] = TREE_OPERAND (rhs, 2);
4335 code = TREE_CODE (rhs);
4337 else
4338 return false;
4339 break;
4341 case GIMPLE_BINARY_RHS:
4342 code = gimple_assign_rhs_code (stmt);
4343 op_type = TREE_CODE_LENGTH (code);
4344 gcc_assert (op_type == binary_op);
4345 ops[0] = gimple_assign_rhs1 (stmt);
4346 ops[1] = gimple_assign_rhs2 (stmt);
4347 break;
4349 case GIMPLE_TERNARY_RHS:
4350 code = gimple_assign_rhs_code (stmt);
4351 op_type = TREE_CODE_LENGTH (code);
4352 gcc_assert (op_type == ternary_op);
4353 ops[0] = gimple_assign_rhs1 (stmt);
4354 ops[1] = gimple_assign_rhs2 (stmt);
4355 ops[2] = gimple_assign_rhs3 (stmt);
4356 break;
4358 case GIMPLE_UNARY_RHS:
4359 return false;
4361 default:
4362 gcc_unreachable ();
4365 scalar_dest = gimple_assign_lhs (stmt);
4366 scalar_type = TREE_TYPE (scalar_dest);
4367 if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
4368 && !SCALAR_FLOAT_TYPE_P (scalar_type))
4369 return false;
4371 /* All uses but the last are expected to be defined in the loop.
4372 The last use is the reduction variable. In case of nested cycle this
4373 assumption is not true: we use reduc_index to record the index of the
4374 reduction variable. */
4375 for (i = 0; i < op_type-1; i++)
4377 /* The condition of COND_EXPR is checked in vectorizable_condition(). */
4378 if (i == 0 && code == COND_EXPR)
4379 continue;
4381 is_simple_use = vect_is_simple_use_1 (ops[i], loop_vinfo, NULL,
4382 &def_stmt, &def, &dt, &tem);
4383 if (!vectype_in)
4384 vectype_in = tem;
4385 gcc_assert (is_simple_use);
4387 if (dt != vect_internal_def
4388 && dt != vect_external_def
4389 && dt != vect_constant_def
4390 && dt != vect_induction_def
4391 && !(dt == vect_nested_cycle && nested_cycle))
4392 return false;
4394 if (dt == vect_nested_cycle)
4396 found_nested_cycle_def = true;
4397 reduc_def_stmt = def_stmt;
4398 reduc_index = i;
4402 is_simple_use = vect_is_simple_use_1 (ops[i], loop_vinfo, NULL, &def_stmt,
4403 &def, &dt, &tem);
4404 if (!vectype_in)
4405 vectype_in = tem;
4406 gcc_assert (is_simple_use);
4407 gcc_assert (dt == vect_reduction_def
4408 || dt == vect_nested_cycle
4409 || ((dt == vect_internal_def || dt == vect_external_def
4410 || dt == vect_constant_def || dt == vect_induction_def)
4411 && nested_cycle && found_nested_cycle_def));
4412 if (!found_nested_cycle_def)
4413 reduc_def_stmt = def_stmt;
4415 gcc_assert (gimple_code (reduc_def_stmt) == GIMPLE_PHI);
4416 if (orig_stmt)
4417 gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
4418 reduc_def_stmt,
4419 !nested_cycle,
4420 &dummy));
4421 else
4423 gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
4424 !nested_cycle, &dummy);
4425 /* We changed STMT to be the first stmt in reduction chain, hence we
4426 check that in this case the first element in the chain is STMT. */
4427 gcc_assert (stmt == tmp
4428 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
4431 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
4432 return false;
4434 if (slp_node || PURE_SLP_STMT (stmt_info))
4435 ncopies = 1;
4436 else
4437 ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4438 / TYPE_VECTOR_SUBPARTS (vectype_in));
4440 gcc_assert (ncopies >= 1);
4442 vec_mode = TYPE_MODE (vectype_in);
4444 if (code == COND_EXPR)
4446 if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0))
4448 if (vect_print_dump_info (REPORT_DETAILS))
4449 fprintf (vect_dump, "unsupported condition in reduction");
4451 return false;
4454 else
4456 /* 4. Supportable by target? */
4458 /* 4.1. check support for the operation in the loop */
4459 optab = optab_for_tree_code (code, vectype_in, optab_default);
4460 if (!optab)
4462 if (vect_print_dump_info (REPORT_DETAILS))
4463 fprintf (vect_dump, "no optab.");
4465 return false;
4468 if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
4470 if (vect_print_dump_info (REPORT_DETAILS))
4471 fprintf (vect_dump, "op not supported by target.");
4473 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4474 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4475 < vect_min_worthwhile_factor (code))
4476 return false;
4478 if (vect_print_dump_info (REPORT_DETAILS))
4479 fprintf (vect_dump, "proceeding using word mode.");
4482 /* Worthwhile without SIMD support? */
4483 if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
4484 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4485 < vect_min_worthwhile_factor (code))
4487 if (vect_print_dump_info (REPORT_DETAILS))
4488 fprintf (vect_dump, "not worthwhile without SIMD support.");
4490 return false;
4494 /* 4.2. Check support for the epilog operation.
4496 If STMT represents a reduction pattern, then the type of the
4497 reduction variable may be different than the type of the rest
4498 of the arguments. For example, consider the case of accumulation
4499 of shorts into an int accumulator; The original code:
4500 S1: int_a = (int) short_a;
4501 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
4503 was replaced with:
4504 STMT: int_acc = widen_sum <short_a, int_acc>
4506 This means that:
4507 1. The tree-code that is used to create the vector operation in the
4508 epilog code (that reduces the partial results) is not the
4509 tree-code of STMT, but is rather the tree-code of the original
4510 stmt from the pattern that STMT is replacing. I.e, in the example
4511 above we want to use 'widen_sum' in the loop, but 'plus' in the
4512 epilog.
4513 2. The type (mode) we use to check available target support
4514 for the vector operation to be created in the *epilog*, is
4515 determined by the type of the reduction variable (in the example
4516 above we'd check this: optab_handler (plus_optab, vect_int_mode])).
4517 However the type (mode) we use to check available target support
4518 for the vector operation to be created *inside the loop*, is
4519 determined by the type of the other arguments to STMT (in the
4520 example we'd check this: optab_handler (widen_sum_optab,
4521 vect_short_mode)).
4523 This is contrary to "regular" reductions, in which the types of all
4524 the arguments are the same as the type of the reduction variable.
4525 For "regular" reductions we can therefore use the same vector type
4526 (and also the same tree-code) when generating the epilog code and
4527 when generating the code inside the loop. */
4529 if (orig_stmt)
4531 /* This is a reduction pattern: get the vectype from the type of the
4532 reduction variable, and get the tree-code from orig_stmt. */
4533 orig_code = gimple_assign_rhs_code (orig_stmt);
4534 gcc_assert (vectype_out);
4535 vec_mode = TYPE_MODE (vectype_out);
4537 else
4539 /* Regular reduction: use the same vectype and tree-code as used for
4540 the vector code inside the loop can be used for the epilog code. */
4541 orig_code = code;
4544 if (nested_cycle)
4546 def_bb = gimple_bb (reduc_def_stmt);
4547 def_stmt_loop = def_bb->loop_father;
4548 def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
4549 loop_preheader_edge (def_stmt_loop));
4550 if (TREE_CODE (def_arg) == SSA_NAME
4551 && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
4552 && gimple_code (def_arg_stmt) == GIMPLE_PHI
4553 && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
4554 && vinfo_for_stmt (def_arg_stmt)
4555 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
4556 == vect_double_reduction_def)
4557 double_reduc = true;
4560 epilog_reduc_code = ERROR_MARK;
4561 if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
4563 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
4564 optab_default);
4565 if (!reduc_optab)
4567 if (vect_print_dump_info (REPORT_DETAILS))
4568 fprintf (vect_dump, "no optab for reduction.");
4570 epilog_reduc_code = ERROR_MARK;
4573 if (reduc_optab
4574 && optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
4576 if (vect_print_dump_info (REPORT_DETAILS))
4577 fprintf (vect_dump, "reduc op not supported by target.");
4579 epilog_reduc_code = ERROR_MARK;
4582 else
4584 if (!nested_cycle || double_reduc)
4586 if (vect_print_dump_info (REPORT_DETAILS))
4587 fprintf (vect_dump, "no reduc code for scalar code.");
4589 return false;
4593 if (double_reduc && ncopies > 1)
4595 if (vect_print_dump_info (REPORT_DETAILS))
4596 fprintf (vect_dump, "multiple types in double reduction");
4598 return false;
4601 if (!vec_stmt) /* transformation not required. */
4603 if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
4604 return false;
4605 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
4606 return true;
4609 /** Transform. **/
4611 if (vect_print_dump_info (REPORT_DETAILS))
4612 fprintf (vect_dump, "transform reduction.");
4614 /* FORNOW: Multiple types are not supported for condition. */
4615 if (code == COND_EXPR)
4616 gcc_assert (ncopies == 1);
4618 /* Create the destination vector */
4619 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4621 /* In case the vectorization factor (VF) is bigger than the number
4622 of elements that we can fit in a vectype (nunits), we have to generate
4623 more than one vector stmt - i.e - we need to "unroll" the
4624 vector stmt by a factor VF/nunits. For more details see documentation
4625 in vectorizable_operation. */
4627 /* If the reduction is used in an outer loop we need to generate
4628 VF intermediate results, like so (e.g. for ncopies=2):
4629 r0 = phi (init, r0)
4630 r1 = phi (init, r1)
4631 r0 = x0 + r0;
4632 r1 = x1 + r1;
4633 (i.e. we generate VF results in 2 registers).
4634 In this case we have a separate def-use cycle for each copy, and therefore
4635 for each copy we get the vector def for the reduction variable from the
4636 respective phi node created for this copy.
4638 Otherwise (the reduction is unused in the loop nest), we can combine
4639 together intermediate results, like so (e.g. for ncopies=2):
4640 r = phi (init, r)
4641 r = x0 + r;
4642 r = x1 + r;
4643 (i.e. we generate VF/2 results in a single register).
4644 In this case for each copy we get the vector def for the reduction variable
4645 from the vectorized reduction operation generated in the previous iteration.
4648 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
4650 single_defuse_cycle = true;
4651 epilog_copies = 1;
4653 else
4654 epilog_copies = ncopies;
4656 prev_stmt_info = NULL;
4657 prev_phi_info = NULL;
4658 if (slp_node)
4660 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4661 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out)
4662 == TYPE_VECTOR_SUBPARTS (vectype_in));
4664 else
4666 vec_num = 1;
4667 vec_oprnds0 = VEC_alloc (tree, heap, 1);
4668 if (op_type == ternary_op)
4669 vec_oprnds1 = VEC_alloc (tree, heap, 1);
4672 phis = VEC_alloc (gimple, heap, vec_num);
4673 vect_defs = VEC_alloc (tree, heap, vec_num);
4674 if (!slp_node)
4675 VEC_quick_push (tree, vect_defs, NULL_TREE);
4677 for (j = 0; j < ncopies; j++)
4679 if (j == 0 || !single_defuse_cycle)
4681 for (i = 0; i < vec_num; i++)
4683 /* Create the reduction-phi that defines the reduction
4684 operand. */
4685 new_phi = create_phi_node (vec_dest, loop->header);
4686 set_vinfo_for_stmt (new_phi,
4687 new_stmt_vec_info (new_phi, loop_vinfo,
4688 NULL));
4689 if (j == 0 || slp_node)
4690 VEC_quick_push (gimple, phis, new_phi);
4694 if (code == COND_EXPR)
4696 gcc_assert (!slp_node);
4697 vectorizable_condition (stmt, gsi, vec_stmt,
4698 PHI_RESULT (VEC_index (gimple, phis, 0)),
4699 reduc_index);
4700 /* Multiple types are not supported for condition. */
4701 break;
4704 /* Handle uses. */
4705 if (j == 0)
4707 tree op0, op1 = NULL_TREE;
4709 op0 = ops[!reduc_index];
4710 if (op_type == ternary_op)
4712 if (reduc_index == 0)
4713 op1 = ops[2];
4714 else
4715 op1 = ops[1];
4718 if (slp_node)
4719 vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, &vec_oprnds1,
4720 -1);
4721 else
4723 loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
4724 stmt, NULL);
4725 VEC_quick_push (tree, vec_oprnds0, loop_vec_def0);
4726 if (op_type == ternary_op)
4728 loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
4729 NULL);
4730 VEC_quick_push (tree, vec_oprnds1, loop_vec_def1);
4734 else
4736 if (!slp_node)
4738 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
4739 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
4740 VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0);
4741 if (op_type == ternary_op)
4743 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
4744 loop_vec_def1);
4745 VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1);
4749 if (single_defuse_cycle)
4750 reduc_def = gimple_assign_lhs (new_stmt);
4752 STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
4755 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, def0)
4757 if (slp_node)
4758 reduc_def = PHI_RESULT (VEC_index (gimple, phis, i));
4759 else
4761 if (!single_defuse_cycle || j == 0)
4762 reduc_def = PHI_RESULT (new_phi);
4765 def1 = ((op_type == ternary_op)
4766 ? VEC_index (tree, vec_oprnds1, i) : NULL);
4767 if (op_type == binary_op)
4769 if (reduc_index == 0)
4770 expr = build2 (code, vectype_out, reduc_def, def0);
4771 else
4772 expr = build2 (code, vectype_out, def0, reduc_def);
4774 else
4776 if (reduc_index == 0)
4777 expr = build3 (code, vectype_out, reduc_def, def0, def1);
4778 else
4780 if (reduc_index == 1)
4781 expr = build3 (code, vectype_out, def0, reduc_def, def1);
4782 else
4783 expr = build3 (code, vectype_out, def0, def1, reduc_def);
4787 new_stmt = gimple_build_assign (vec_dest, expr);
4788 new_temp = make_ssa_name (vec_dest, new_stmt);
4789 gimple_assign_set_lhs (new_stmt, new_temp);
4790 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4792 if (slp_node)
4794 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4795 VEC_quick_push (tree, vect_defs, new_temp);
4797 else
4798 VEC_replace (tree, vect_defs, 0, new_temp);
4801 if (slp_node)
4802 continue;
4804 if (j == 0)
4805 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4806 else
4807 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4809 prev_stmt_info = vinfo_for_stmt (new_stmt);
4810 prev_phi_info = vinfo_for_stmt (new_phi);
4813 /* Finalize the reduction-phi (set its arguments) and create the
4814 epilog reduction code. */
4815 if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
4817 new_temp = gimple_assign_lhs (*vec_stmt);
4818 VEC_replace (tree, vect_defs, 0, new_temp);
4821 vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
4822 epilog_reduc_code, phis, reduc_index,
4823 double_reduc, slp_node);
4825 VEC_free (gimple, heap, phis);
4826 VEC_free (tree, heap, vec_oprnds0);
4827 if (vec_oprnds1)
4828 VEC_free (tree, heap, vec_oprnds1);
4830 return true;
4833 /* Function vect_min_worthwhile_factor.
4835 For a loop where we could vectorize the operation indicated by CODE,
4836 return the minimum vectorization factor that makes it worthwhile
4837 to use generic vectors. */
4839 vect_min_worthwhile_factor (enum tree_code code)
4841 switch (code)
4843 case PLUS_EXPR:
4844 case MINUS_EXPR:
4845 case NEGATE_EXPR:
4846 return 4;
4848 case BIT_AND_EXPR:
4849 case BIT_IOR_EXPR:
4850 case BIT_XOR_EXPR:
4851 case BIT_NOT_EXPR:
4852 return 2;
4854 default:
4855 return INT_MAX;
4860 /* Function vectorizable_induction
4862 Check if PHI performs an induction computation that can be vectorized.
4863 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
4864 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
4865 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4867 bool
4868 vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
4869 gimple *vec_stmt)
4871 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
4872 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4873 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4874 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4875 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4876 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4877 tree vec_def;
4879 gcc_assert (ncopies >= 1);
4880 /* FORNOW. This restriction should be relaxed. */
4881 if (nested_in_vect_loop_p (loop, phi) && ncopies > 1)
4883 if (vect_print_dump_info (REPORT_DETAILS))
4884 fprintf (vect_dump, "multiple types in nested loop.");
4885 return false;
4888 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4889 return false;
4891 /* FORNOW: SLP not supported. */
4892 if (STMT_SLP_TYPE (stmt_info))
4893 return false;
4895 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
4897 if (gimple_code (phi) != GIMPLE_PHI)
4898 return false;
4900 if (!vec_stmt) /* transformation not required. */
4902 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
4903 if (vect_print_dump_info (REPORT_DETAILS))
4904 fprintf (vect_dump, "=== vectorizable_induction ===");
4905 vect_model_induction_cost (stmt_info, ncopies);
4906 return true;
4909 /** Transform. **/
4911 if (vect_print_dump_info (REPORT_DETAILS))
4912 fprintf (vect_dump, "transform induction phi.");
4914 vec_def = get_initial_def_for_induction (phi);
4915 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
4916 return true;
4919 /* Function vectorizable_live_operation.
4921 STMT computes a value that is used outside the loop. Check if
4922 it can be supported. */
4924 bool
4925 vectorizable_live_operation (gimple stmt,
4926 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
4927 gimple *vec_stmt ATTRIBUTE_UNUSED)
4929 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4930 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4931 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4932 int i;
4933 int op_type;
4934 tree op;
4935 tree def;
4936 gimple def_stmt;
4937 enum vect_def_type dt;
4938 enum tree_code code;
4939 enum gimple_rhs_class rhs_class;
4941 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
4943 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
4944 return false;
4946 if (!is_gimple_assign (stmt))
4947 return false;
4949 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4950 return false;
4952 /* FORNOW. CHECKME. */
4953 if (nested_in_vect_loop_p (loop, stmt))
4954 return false;
4956 code = gimple_assign_rhs_code (stmt);
4957 op_type = TREE_CODE_LENGTH (code);
4958 rhs_class = get_gimple_rhs_class (code);
4959 gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
4960 gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
4962 /* FORNOW: support only if all uses are invariant. This means
4963 that the scalar operations can remain in place, unvectorized.
4964 The original last scalar value that they compute will be used. */
4966 for (i = 0; i < op_type; i++)
4968 if (rhs_class == GIMPLE_SINGLE_RHS)
4969 op = TREE_OPERAND (gimple_op (stmt, 1), i);
4970 else
4971 op = gimple_op (stmt, i + 1);
4972 if (op
4973 && !vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def, &dt))
4975 if (vect_print_dump_info (REPORT_DETAILS))
4976 fprintf (vect_dump, "use not simple.");
4977 return false;
4980 if (dt != vect_external_def && dt != vect_constant_def)
4981 return false;
4984 /* No transformation is required for the cases we currently support. */
4985 return true;
4988 /* Kill any debug uses outside LOOP of SSA names defined in STMT. */
4990 static void
4991 vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
4993 ssa_op_iter op_iter;
4994 imm_use_iterator imm_iter;
4995 def_operand_p def_p;
4996 gimple ustmt;
4998 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
5000 FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
5002 basic_block bb;
5004 if (!is_gimple_debug (ustmt))
5005 continue;
5007 bb = gimple_bb (ustmt);
5009 if (!flow_bb_inside_loop_p (loop, bb))
5011 if (gimple_debug_bind_p (ustmt))
5013 if (vect_print_dump_info (REPORT_DETAILS))
5014 fprintf (vect_dump, "killing debug use");
5016 gimple_debug_bind_reset_value (ustmt);
5017 update_stmt (ustmt);
5019 else
5020 gcc_unreachable ();
5026 /* Function vect_transform_loop.
5028 The analysis phase has determined that the loop is vectorizable.
5029 Vectorize the loop - created vectorized stmts to replace the scalar
5030 stmts in the loop, and update the loop exit condition. */
5032 void
5033 vect_transform_loop (loop_vec_info loop_vinfo)
5035 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5036 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5037 int nbbs = loop->num_nodes;
5038 gimple_stmt_iterator si;
5039 int i;
5040 tree ratio = NULL;
5041 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5042 bool strided_store;
5043 bool slp_scheduled = false;
5044 unsigned int nunits;
5045 tree cond_expr = NULL_TREE;
5046 gimple_seq cond_expr_stmt_list = NULL;
5047 bool do_peeling_for_loop_bound;
5049 if (vect_print_dump_info (REPORT_DETAILS))
5050 fprintf (vect_dump, "=== vec_transform_loop ===");
5052 /* Peel the loop if there are data refs with unknown alignment.
5053 Only one data ref with unknown store is allowed. */
5055 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5056 vect_do_peeling_for_alignment (loop_vinfo);
5058 do_peeling_for_loop_bound
5059 = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5060 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5061 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
5062 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
5064 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
5065 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
5066 vect_loop_versioning (loop_vinfo,
5067 !do_peeling_for_loop_bound,
5068 &cond_expr, &cond_expr_stmt_list);
5070 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5071 compile time constant), or it is a constant that doesn't divide by the
5072 vectorization factor, then an epilog loop needs to be created.
5073 We therefore duplicate the loop: the original loop will be vectorized,
5074 and will compute the first (n/VF) iterations. The second copy of the loop
5075 will remain scalar and will compute the remaining (n%VF) iterations.
5076 (VF is the vectorization factor). */
5078 if (do_peeling_for_loop_bound)
5079 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
5080 cond_expr, cond_expr_stmt_list);
5081 else
5082 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5083 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5085 /* 1) Make sure the loop header has exactly two entries
5086 2) Make sure we have a preheader basic block. */
5088 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5090 split_edge (loop_preheader_edge (loop));
5092 /* FORNOW: the vectorizer supports only loops which body consist
5093 of one basic block (header + empty latch). When the vectorizer will
5094 support more involved loop forms, the order by which the BBs are
5095 traversed need to be reconsidered. */
5097 for (i = 0; i < nbbs; i++)
5099 basic_block bb = bbs[i];
5100 stmt_vec_info stmt_info;
5101 gimple phi;
5103 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
5105 phi = gsi_stmt (si);
5106 if (vect_print_dump_info (REPORT_DETAILS))
5108 fprintf (vect_dump, "------>vectorizing phi: ");
5109 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
5111 stmt_info = vinfo_for_stmt (phi);
5112 if (!stmt_info)
5113 continue;
5115 if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5116 vect_loop_kill_debug_uses (loop, phi);
5118 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5119 && !STMT_VINFO_LIVE_P (stmt_info))
5120 continue;
5122 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5123 != (unsigned HOST_WIDE_INT) vectorization_factor)
5124 && vect_print_dump_info (REPORT_DETAILS))
5125 fprintf (vect_dump, "multiple-types.");
5127 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5129 if (vect_print_dump_info (REPORT_DETAILS))
5130 fprintf (vect_dump, "transform phi.");
5131 vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
5135 for (si = gsi_start_bb (bb); !gsi_end_p (si);)
5137 gimple stmt = gsi_stmt (si);
5138 bool is_store;
5140 if (vect_print_dump_info (REPORT_DETAILS))
5142 fprintf (vect_dump, "------>vectorizing statement: ");
5143 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5146 stmt_info = vinfo_for_stmt (stmt);
5148 /* vector stmts created in the outer-loop during vectorization of
5149 stmts in an inner-loop may not have a stmt_info, and do not
5150 need to be vectorized. */
5151 if (!stmt_info)
5153 gsi_next (&si);
5154 continue;
5157 if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5158 vect_loop_kill_debug_uses (loop, stmt);
5160 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5161 && !STMT_VINFO_LIVE_P (stmt_info))
5163 gsi_next (&si);
5164 continue;
5167 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5168 nunits =
5169 (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
5170 if (!STMT_SLP_TYPE (stmt_info)
5171 && nunits != (unsigned int) vectorization_factor
5172 && vect_print_dump_info (REPORT_DETAILS))
5173 /* For SLP VF is set according to unrolling factor, and not to
5174 vector size, hence for SLP this print is not valid. */
5175 fprintf (vect_dump, "multiple-types.");
5177 /* SLP. Schedule all the SLP instances when the first SLP stmt is
5178 reached. */
5179 if (STMT_SLP_TYPE (stmt_info))
5181 if (!slp_scheduled)
5183 slp_scheduled = true;
5185 if (vect_print_dump_info (REPORT_DETAILS))
5186 fprintf (vect_dump, "=== scheduling SLP instances ===");
5188 vect_schedule_slp (loop_vinfo, NULL);
5191 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
5192 if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
5194 gsi_next (&si);
5195 continue;
5199 /* -------- vectorize statement ------------ */
5200 if (vect_print_dump_info (REPORT_DETAILS))
5201 fprintf (vect_dump, "transform statement.");
5203 strided_store = false;
5204 is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL, NULL);
5205 if (is_store)
5207 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
5209 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5210 interleaving chain was completed - free all the stores in
5211 the chain. */
5212 vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
5213 gsi_remove (&si, true);
5214 continue;
5216 else
5218 /* Free the attached stmt_vec_info and remove the stmt. */
5219 free_stmt_vec_info (stmt);
5220 gsi_remove (&si, true);
5221 continue;
5224 gsi_next (&si);
5225 } /* stmts in BB */
5226 } /* BBs in loop */
5228 slpeel_make_loop_iterate_ntimes (loop, ratio);
5230 /* The memory tags and pointers in vectorized statements need to
5231 have their SSA forms updated. FIXME, why can't this be delayed
5232 until all the loops have been transformed? */
5233 update_ssa (TODO_update_ssa);
5235 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
5236 fprintf (vect_dump, "LOOP VECTORIZED.");
5237 if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
5238 fprintf (vect_dump, "OUTER LOOP VECTORIZED.");