2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com> and
5 Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "tree-pretty-print.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
39 #include "diagnostic-core.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
45 /* Loop Vectorization Pass.
47 This pass tries to vectorize loops.
49 For example, the vectorizer transforms the following simple loop:
51 short a[N]; short b[N]; short c[N]; int i;
57 as if it was manually vectorized by rewriting the source code into:
59 typedef int __attribute__((mode(V8HI))) v8hi;
60 short a[N]; short b[N]; short c[N]; int i;
61 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
64 for (i=0; i<N/8; i++){
71 The main entry to this pass is vectorize_loops(), in which
72 the vectorizer applies a set of analyses on a given set of loops,
73 followed by the actual vectorization transformation for the loops that
74 had successfully passed the analysis phase.
75 Throughout this pass we make a distinction between two types of
76 data: scalars (which are represented by SSA_NAMES), and memory references
77 ("data-refs"). These two types of data require different handling both
78 during analysis and transformation. The types of data-refs that the
79 vectorizer currently supports are ARRAY_REFS which base is an array DECL
80 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
81 accesses are required to have a simple (consecutive) access pattern.
85 The driver for the analysis phase is vect_analyze_loop().
86 It applies a set of analyses, some of which rely on the scalar evolution
87 analyzer (scev) developed by Sebastian Pop.
89 During the analysis phase the vectorizer records some information
90 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
91 loop, as well as general information about the loop as a whole, which is
92 recorded in a "loop_vec_info" struct attached to each loop.
96 The loop transformation phase scans all the stmts in the loop, and
97 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
98 the loop that needs to be vectorized. It inserts the vector code sequence
99 just before the scalar stmt S, and records a pointer to the vector code
100 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
101 attached to S). This pointer will be used for the vectorization of following
102 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
103 otherwise, we rely on dead code elimination for removing it.
105 For example, say stmt S1 was vectorized into stmt VS1:
108 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
111 To vectorize stmt S2, the vectorizer first finds the stmt that defines
112 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
113 vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
114 resulting sequence would be:
117 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
119 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
121 Operands that are not SSA_NAMEs, are data-refs that appear in
122 load/store operations (like 'x[i]' in S1), and are handled differently.
126 Currently the only target specific information that is used is the
127 size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
128 Targets that can support different sizes of vectors, for now will need
129 to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
130 flexibility will be added in the future.
132 Since we only vectorize operations which vector form can be
133 expressed using existing tree codes, to verify that an operation is
134 supported, the vectorizer checks the relevant optab at the relevant
135 machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
136 the value found is CODE_FOR_nothing, then there's no target support, and
137 we can't vectorize the stmt.
139 For additional information on this project see:
140 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
143 /* Function vect_determine_vectorization_factor
145 Determine the vectorization factor (VF). VF is the number of data elements
146 that are operated upon in parallel in a single iteration of the vectorized
147 loop. For example, when vectorizing a loop that operates on 4byte elements,
148 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
149 elements can fit in a single vector register.
151 We currently support vectorization of loops in which all types operated upon
152 are of the same size. Therefore this function currently sets VF according to
153 the size of the types operated upon, and fails if there are multiple sizes
156 VF is also the factor by which the loop iterations are strip-mined, e.g.:
163 for (i=0; i<N; i+=VF){
164 a[i:VF] = b[i:VF] + c[i:VF];
169 vect_determine_vectorization_factor (loop_vec_info loop_vinfo
)
171 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
172 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
173 int nbbs
= loop
->num_nodes
;
174 gimple_stmt_iterator si
;
175 unsigned int vectorization_factor
= 0;
180 stmt_vec_info stmt_info
;
183 gimple stmt
, pattern_stmt
= NULL
;
184 gimple_seq pattern_def_seq
= NULL
;
185 gimple_stmt_iterator pattern_def_si
= gsi_none ();
186 bool analyze_pattern_stmt
= false;
188 if (vect_print_dump_info (REPORT_DETAILS
))
189 fprintf (vect_dump
, "=== vect_determine_vectorization_factor ===");
191 for (i
= 0; i
< nbbs
; i
++)
193 basic_block bb
= bbs
[i
];
195 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
198 stmt_info
= vinfo_for_stmt (phi
);
199 if (vect_print_dump_info (REPORT_DETAILS
))
201 fprintf (vect_dump
, "==> examining phi: ");
202 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
205 gcc_assert (stmt_info
);
207 if (STMT_VINFO_RELEVANT_P (stmt_info
))
209 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info
));
210 scalar_type
= TREE_TYPE (PHI_RESULT (phi
));
212 if (vect_print_dump_info (REPORT_DETAILS
))
214 fprintf (vect_dump
, "get vectype for scalar type: ");
215 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
218 vectype
= get_vectype_for_scalar_type (scalar_type
);
221 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
224 "not vectorized: unsupported data-type ");
225 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
229 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
231 if (vect_print_dump_info (REPORT_DETAILS
))
233 fprintf (vect_dump
, "vectype: ");
234 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
237 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
238 if (vect_print_dump_info (REPORT_DETAILS
))
239 fprintf (vect_dump
, "nunits = %d", nunits
);
241 if (!vectorization_factor
242 || (nunits
> vectorization_factor
))
243 vectorization_factor
= nunits
;
247 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
) || analyze_pattern_stmt
;)
251 if (analyze_pattern_stmt
)
254 stmt
= gsi_stmt (si
);
256 stmt_info
= vinfo_for_stmt (stmt
);
258 if (vect_print_dump_info (REPORT_DETAILS
))
260 fprintf (vect_dump
, "==> examining statement: ");
261 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
264 gcc_assert (stmt_info
);
266 /* Skip stmts which do not need to be vectorized. */
267 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
268 && !STMT_VINFO_LIVE_P (stmt_info
))
270 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
271 && (pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
))
272 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
273 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
276 stmt_info
= vinfo_for_stmt (pattern_stmt
);
277 if (vect_print_dump_info (REPORT_DETAILS
))
279 fprintf (vect_dump
, "==> examining pattern statement: ");
280 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
285 if (vect_print_dump_info (REPORT_DETAILS
))
286 fprintf (vect_dump
, "skip.");
291 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
292 && (pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
))
293 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
294 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
295 analyze_pattern_stmt
= true;
297 /* If a pattern statement has def stmts, analyze them too. */
298 if (is_pattern_stmt_p (stmt_info
))
300 if (pattern_def_seq
== NULL
)
302 pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
);
303 pattern_def_si
= gsi_start (pattern_def_seq
);
305 else if (!gsi_end_p (pattern_def_si
))
306 gsi_next (&pattern_def_si
);
307 if (pattern_def_seq
!= NULL
)
309 gimple pattern_def_stmt
= NULL
;
310 stmt_vec_info pattern_def_stmt_info
= NULL
;
312 while (!gsi_end_p (pattern_def_si
))
314 pattern_def_stmt
= gsi_stmt (pattern_def_si
);
315 pattern_def_stmt_info
316 = vinfo_for_stmt (pattern_def_stmt
);
317 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
318 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
320 gsi_next (&pattern_def_si
);
323 if (!gsi_end_p (pattern_def_si
))
325 if (vect_print_dump_info (REPORT_DETAILS
))
328 "==> examining pattern def stmt: ");
329 print_gimple_stmt (vect_dump
, pattern_def_stmt
, 0,
333 stmt
= pattern_def_stmt
;
334 stmt_info
= pattern_def_stmt_info
;
338 pattern_def_si
= gsi_none ();
339 analyze_pattern_stmt
= false;
343 analyze_pattern_stmt
= false;
346 if (gimple_get_lhs (stmt
) == NULL_TREE
)
348 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
350 fprintf (vect_dump
, "not vectorized: irregular stmt.");
351 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
356 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
358 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
360 fprintf (vect_dump
, "not vectorized: vector stmt in loop:");
361 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
366 if (STMT_VINFO_VECTYPE (stmt_info
))
368 /* The only case when a vectype had been already set is for stmts
369 that contain a dataref, or for "pattern-stmts" (stmts
370 generated by the vectorizer to represent/replace a certain
372 gcc_assert (STMT_VINFO_DATA_REF (stmt_info
)
373 || is_pattern_stmt_p (stmt_info
)
374 || !gsi_end_p (pattern_def_si
));
375 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
379 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info
));
380 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
381 if (vect_print_dump_info (REPORT_DETAILS
))
383 fprintf (vect_dump
, "get vectype for scalar type: ");
384 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
386 vectype
= get_vectype_for_scalar_type (scalar_type
);
389 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
392 "not vectorized: unsupported data-type ");
393 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
398 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
401 /* The vectorization factor is according to the smallest
402 scalar type (or the largest vector size, but we only
403 support one vector size per loop). */
404 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
,
406 if (vect_print_dump_info (REPORT_DETAILS
))
408 fprintf (vect_dump
, "get vectype for scalar type: ");
409 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
411 vf_vectype
= get_vectype_for_scalar_type (scalar_type
);
414 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
417 "not vectorized: unsupported data-type ");
418 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
423 if ((GET_MODE_SIZE (TYPE_MODE (vectype
))
424 != GET_MODE_SIZE (TYPE_MODE (vf_vectype
))))
426 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
429 "not vectorized: different sized vector "
430 "types in statement, ");
431 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
432 fprintf (vect_dump
, " and ");
433 print_generic_expr (vect_dump
, vf_vectype
, TDF_SLIM
);
438 if (vect_print_dump_info (REPORT_DETAILS
))
440 fprintf (vect_dump
, "vectype: ");
441 print_generic_expr (vect_dump
, vf_vectype
, TDF_SLIM
);
444 nunits
= TYPE_VECTOR_SUBPARTS (vf_vectype
);
445 if (vect_print_dump_info (REPORT_DETAILS
))
446 fprintf (vect_dump
, "nunits = %d", nunits
);
448 if (!vectorization_factor
449 || (nunits
> vectorization_factor
))
450 vectorization_factor
= nunits
;
452 if (!analyze_pattern_stmt
&& gsi_end_p (pattern_def_si
))
454 pattern_def_seq
= NULL
;
460 /* TODO: Analyze cost. Decide if worth while to vectorize. */
461 if (vect_print_dump_info (REPORT_DETAILS
))
462 fprintf (vect_dump
, "vectorization factor = %d", vectorization_factor
);
463 if (vectorization_factor
<= 1)
465 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
466 fprintf (vect_dump
, "not vectorized: unsupported data-type");
469 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
475 /* Function vect_is_simple_iv_evolution.
477 FORNOW: A simple evolution of an induction variables in the loop is
478 considered a polynomial evolution with constant step. */
481 vect_is_simple_iv_evolution (unsigned loop_nb
, tree access_fn
, tree
* init
,
486 tree evolution_part
= evolution_part_in_loop_num (access_fn
, loop_nb
);
488 /* When there is no evolution in this loop, the evolution function
490 if (evolution_part
== NULL_TREE
)
493 /* When the evolution is a polynomial of degree >= 2
494 the evolution function is not "simple". */
495 if (tree_is_chrec (evolution_part
))
498 step_expr
= evolution_part
;
499 init_expr
= unshare_expr (initial_condition_in_loop_num (access_fn
, loop_nb
));
501 if (vect_print_dump_info (REPORT_DETAILS
))
503 fprintf (vect_dump
, "step: ");
504 print_generic_expr (vect_dump
, step_expr
, TDF_SLIM
);
505 fprintf (vect_dump
, ", init: ");
506 print_generic_expr (vect_dump
, init_expr
, TDF_SLIM
);
512 if (TREE_CODE (step_expr
) != INTEGER_CST
)
514 if (vect_print_dump_info (REPORT_DETAILS
))
515 fprintf (vect_dump
, "step unknown.");
522 /* Function vect_analyze_scalar_cycles_1.
524 Examine the cross iteration def-use cycles of scalar variables
525 in LOOP. LOOP_VINFO represents the loop that is now being
526 considered for vectorization (can be LOOP, or an outer-loop
530 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo
, struct loop
*loop
)
532 basic_block bb
= loop
->header
;
534 VEC(gimple
,heap
) *worklist
= VEC_alloc (gimple
, heap
, 64);
535 gimple_stmt_iterator gsi
;
538 if (vect_print_dump_info (REPORT_DETAILS
))
539 fprintf (vect_dump
, "=== vect_analyze_scalar_cycles ===");
541 /* First - identify all inductions. Reduction detection assumes that all the
542 inductions have been identified, therefore, this order must not be
544 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
546 gimple phi
= gsi_stmt (gsi
);
547 tree access_fn
= NULL
;
548 tree def
= PHI_RESULT (phi
);
549 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
551 if (vect_print_dump_info (REPORT_DETAILS
))
553 fprintf (vect_dump
, "Analyze phi: ");
554 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
557 /* Skip virtual phi's. The data dependences that are associated with
558 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
559 if (!is_gimple_reg (SSA_NAME_VAR (def
)))
562 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_unknown_def_type
;
564 /* Analyze the evolution function. */
565 access_fn
= analyze_scalar_evolution (loop
, def
);
568 STRIP_NOPS (access_fn
);
569 if (vect_print_dump_info (REPORT_DETAILS
))
571 fprintf (vect_dump
, "Access function of PHI: ");
572 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
574 STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo
)
575 = evolution_part_in_loop_num (access_fn
, loop
->num
);
579 || !vect_is_simple_iv_evolution (loop
->num
, access_fn
, &dumy
, &dumy
))
581 VEC_safe_push (gimple
, heap
, worklist
, phi
);
585 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo
) != NULL_TREE
);
587 if (vect_print_dump_info (REPORT_DETAILS
))
588 fprintf (vect_dump
, "Detected induction.");
589 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_induction_def
;
593 /* Second - identify all reductions and nested cycles. */
594 while (VEC_length (gimple
, worklist
) > 0)
596 gimple phi
= VEC_pop (gimple
, worklist
);
597 tree def
= PHI_RESULT (phi
);
598 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
602 if (vect_print_dump_info (REPORT_DETAILS
))
604 fprintf (vect_dump
, "Analyze phi: ");
605 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
608 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def
)));
609 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_unknown_def_type
);
611 nested_cycle
= (loop
!= LOOP_VINFO_LOOP (loop_vinfo
));
612 reduc_stmt
= vect_force_simple_reduction (loop_vinfo
, phi
, !nested_cycle
,
618 if (vect_print_dump_info (REPORT_DETAILS
))
619 fprintf (vect_dump
, "Detected double reduction.");
621 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_double_reduction_def
;
622 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
623 vect_double_reduction_def
;
629 if (vect_print_dump_info (REPORT_DETAILS
))
630 fprintf (vect_dump
, "Detected vectorizable nested cycle.");
632 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_nested_cycle
;
633 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
638 if (vect_print_dump_info (REPORT_DETAILS
))
639 fprintf (vect_dump
, "Detected reduction.");
641 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_reduction_def
;
642 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
644 /* Store the reduction cycles for possible vectorization in
646 VEC_safe_push (gimple
, heap
,
647 LOOP_VINFO_REDUCTIONS (loop_vinfo
),
653 if (vect_print_dump_info (REPORT_DETAILS
))
654 fprintf (vect_dump
, "Unknown def-use cycle pattern.");
657 VEC_free (gimple
, heap
, worklist
);
661 /* Function vect_analyze_scalar_cycles.
663 Examine the cross iteration def-use cycles of scalar variables, by
664 analyzing the loop-header PHIs of scalar variables. Classify each
665 cycle as one of the following: invariant, induction, reduction, unknown.
666 We do that for the loop represented by LOOP_VINFO, and also to its
667 inner-loop, if exists.
668 Examples for scalar cycles:
683 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo
)
685 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
687 vect_analyze_scalar_cycles_1 (loop_vinfo
, loop
);
689 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
690 Reductions in such inner-loop therefore have different properties than
691 the reductions in the nest that gets vectorized:
692 1. When vectorized, they are executed in the same order as in the original
693 scalar loop, so we can't change the order of computation when
695 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
696 current checks are too strict. */
699 vect_analyze_scalar_cycles_1 (loop_vinfo
, loop
->inner
);
702 /* Function vect_get_loop_niters.
704 Determine how many iterations the loop is executed.
705 If an expression that represents the number of iterations
706 can be constructed, place it in NUMBER_OF_ITERATIONS.
707 Return the loop exit condition. */
710 vect_get_loop_niters (struct loop
*loop
, tree
*number_of_iterations
)
714 if (vect_print_dump_info (REPORT_DETAILS
))
715 fprintf (vect_dump
, "=== get_loop_niters ===");
717 niters
= number_of_exit_cond_executions (loop
);
719 if (niters
!= NULL_TREE
720 && niters
!= chrec_dont_know
)
722 *number_of_iterations
= niters
;
724 if (vect_print_dump_info (REPORT_DETAILS
))
726 fprintf (vect_dump
, "==> get_loop_niters:" );
727 print_generic_expr (vect_dump
, *number_of_iterations
, TDF_SLIM
);
731 return get_loop_exit_condition (loop
);
735 /* Function bb_in_loop_p
737 Used as predicate for dfs order traversal of the loop bbs. */
740 bb_in_loop_p (const_basic_block bb
, const void *data
)
742 const struct loop
*const loop
= (const struct loop
*)data
;
743 if (flow_bb_inside_loop_p (loop
, bb
))
749 /* Function new_loop_vec_info.
751 Create and initialize a new loop_vec_info struct for LOOP, as well as
752 stmt_vec_info structs for all the stmts in LOOP. */
755 new_loop_vec_info (struct loop
*loop
)
759 gimple_stmt_iterator si
;
760 unsigned int i
, nbbs
;
762 res
= (loop_vec_info
) xcalloc (1, sizeof (struct _loop_vec_info
));
763 LOOP_VINFO_LOOP (res
) = loop
;
765 bbs
= get_loop_body (loop
);
767 /* Create/Update stmt_info for all stmts in the loop. */
768 for (i
= 0; i
< loop
->num_nodes
; i
++)
770 basic_block bb
= bbs
[i
];
772 /* BBs in a nested inner-loop will have been already processed (because
773 we will have called vect_analyze_loop_form for any nested inner-loop).
774 Therefore, for stmts in an inner-loop we just want to update the
775 STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
776 loop_info of the outer-loop we are currently considering to vectorize
777 (instead of the loop_info of the inner-loop).
778 For stmts in other BBs we need to create a stmt_info from scratch. */
779 if (bb
->loop_father
!= loop
)
782 gcc_assert (loop
->inner
&& bb
->loop_father
== loop
->inner
);
783 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
785 gimple phi
= gsi_stmt (si
);
786 stmt_vec_info stmt_info
= vinfo_for_stmt (phi
);
787 loop_vec_info inner_loop_vinfo
=
788 STMT_VINFO_LOOP_VINFO (stmt_info
);
789 gcc_assert (loop
->inner
== LOOP_VINFO_LOOP (inner_loop_vinfo
));
790 STMT_VINFO_LOOP_VINFO (stmt_info
) = res
;
792 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
794 gimple stmt
= gsi_stmt (si
);
795 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
796 loop_vec_info inner_loop_vinfo
=
797 STMT_VINFO_LOOP_VINFO (stmt_info
);
798 gcc_assert (loop
->inner
== LOOP_VINFO_LOOP (inner_loop_vinfo
));
799 STMT_VINFO_LOOP_VINFO (stmt_info
) = res
;
804 /* bb in current nest. */
805 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
807 gimple phi
= gsi_stmt (si
);
808 gimple_set_uid (phi
, 0);
809 set_vinfo_for_stmt (phi
, new_stmt_vec_info (phi
, res
, NULL
));
812 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
814 gimple stmt
= gsi_stmt (si
);
815 gimple_set_uid (stmt
, 0);
816 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, res
, NULL
));
821 /* CHECKME: We want to visit all BBs before their successors (except for
822 latch blocks, for which this assertion wouldn't hold). In the simple
823 case of the loop forms we allow, a dfs order of the BBs would the same
824 as reversed postorder traversal, so we are safe. */
827 bbs
= XCNEWVEC (basic_block
, loop
->num_nodes
);
828 nbbs
= dfs_enumerate_from (loop
->header
, 0, bb_in_loop_p
,
829 bbs
, loop
->num_nodes
, loop
);
830 gcc_assert (nbbs
== loop
->num_nodes
);
832 LOOP_VINFO_BBS (res
) = bbs
;
833 LOOP_VINFO_NITERS (res
) = NULL
;
834 LOOP_VINFO_NITERS_UNCHANGED (res
) = NULL
;
835 LOOP_VINFO_COST_MODEL_MIN_ITERS (res
) = 0;
836 LOOP_VINFO_VECTORIZABLE_P (res
) = 0;
837 LOOP_PEELING_FOR_ALIGNMENT (res
) = 0;
838 LOOP_VINFO_VECT_FACTOR (res
) = 0;
839 LOOP_VINFO_LOOP_NEST (res
) = VEC_alloc (loop_p
, heap
, 3);
840 LOOP_VINFO_DATAREFS (res
) = VEC_alloc (data_reference_p
, heap
, 10);
841 LOOP_VINFO_DDRS (res
) = VEC_alloc (ddr_p
, heap
, 10 * 10);
842 LOOP_VINFO_UNALIGNED_DR (res
) = NULL
;
843 LOOP_VINFO_MAY_MISALIGN_STMTS (res
) =
844 VEC_alloc (gimple
, heap
,
845 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS
));
846 LOOP_VINFO_MAY_ALIAS_DDRS (res
) =
847 VEC_alloc (ddr_p
, heap
,
848 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
));
849 LOOP_VINFO_GROUPED_STORES (res
) = VEC_alloc (gimple
, heap
, 10);
850 LOOP_VINFO_REDUCTIONS (res
) = VEC_alloc (gimple
, heap
, 10);
851 LOOP_VINFO_REDUCTION_CHAINS (res
) = VEC_alloc (gimple
, heap
, 10);
852 LOOP_VINFO_SLP_INSTANCES (res
) = VEC_alloc (slp_instance
, heap
, 10);
853 LOOP_VINFO_SLP_UNROLLING_FACTOR (res
) = 1;
854 LOOP_VINFO_PEELING_HTAB (res
) = NULL
;
855 LOOP_VINFO_PEELING_FOR_GAPS (res
) = false;
861 /* Function destroy_loop_vec_info.
863 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
864 stmts in the loop. */
867 destroy_loop_vec_info (loop_vec_info loop_vinfo
, bool clean_stmts
)
872 gimple_stmt_iterator si
;
874 VEC (slp_instance
, heap
) *slp_instances
;
875 slp_instance instance
;
880 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
882 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
883 nbbs
= loop
->num_nodes
;
887 free (LOOP_VINFO_BBS (loop_vinfo
));
888 free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo
));
889 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo
));
890 VEC_free (loop_p
, heap
, LOOP_VINFO_LOOP_NEST (loop_vinfo
));
891 VEC_free (gimple
, heap
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
));
892 VEC_free (ddr_p
, heap
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
));
899 for (j
= 0; j
< nbbs
; j
++)
901 basic_block bb
= bbs
[j
];
902 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
903 free_stmt_vec_info (gsi_stmt (si
));
905 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); )
907 gimple stmt
= gsi_stmt (si
);
908 /* Free stmt_vec_info. */
909 free_stmt_vec_info (stmt
);
914 free (LOOP_VINFO_BBS (loop_vinfo
));
915 free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo
));
916 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo
));
917 VEC_free (loop_p
, heap
, LOOP_VINFO_LOOP_NEST (loop_vinfo
));
918 VEC_free (gimple
, heap
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
));
919 VEC_free (ddr_p
, heap
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
));
920 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
921 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, j
, instance
)
922 vect_free_slp_instance (instance
);
924 VEC_free (slp_instance
, heap
, LOOP_VINFO_SLP_INSTANCES (loop_vinfo
));
925 VEC_free (gimple
, heap
, LOOP_VINFO_GROUPED_STORES (loop_vinfo
));
926 VEC_free (gimple
, heap
, LOOP_VINFO_REDUCTIONS (loop_vinfo
));
927 VEC_free (gimple
, heap
, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
));
929 if (LOOP_VINFO_PEELING_HTAB (loop_vinfo
))
930 htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo
));
937 /* Function vect_analyze_loop_1.
939 Apply a set of analyses on LOOP, and create a loop_vec_info struct
940 for it. The different analyses will record information in the
941 loop_vec_info struct. This is a subset of the analyses applied in
942 vect_analyze_loop, to be applied on an inner-loop nested in the loop
943 that is now considered for (outer-loop) vectorization. */
946 vect_analyze_loop_1 (struct loop
*loop
)
948 loop_vec_info loop_vinfo
;
950 if (vect_print_dump_info (REPORT_DETAILS
))
951 fprintf (vect_dump
, "===== analyze_loop_nest_1 =====");
953 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
955 loop_vinfo
= vect_analyze_loop_form (loop
);
958 if (vect_print_dump_info (REPORT_DETAILS
))
959 fprintf (vect_dump
, "bad inner-loop form.");
967 /* Function vect_analyze_loop_form.
969 Verify that certain CFG restrictions hold, including:
970 - the loop has a pre-header
971 - the loop has a single entry and exit
972 - the loop exit condition is simple enough, and the number of iterations
973 can be analyzed (a countable loop). */
976 vect_analyze_loop_form (struct loop
*loop
)
978 loop_vec_info loop_vinfo
;
980 tree number_of_iterations
= NULL
;
981 loop_vec_info inner_loop_vinfo
= NULL
;
983 if (vect_print_dump_info (REPORT_DETAILS
))
984 fprintf (vect_dump
, "=== vect_analyze_loop_form ===");
986 /* Different restrictions apply when we are considering an inner-most loop,
987 vs. an outer (nested) loop.
988 (FORNOW. May want to relax some of these restrictions in the future). */
992 /* Inner-most loop. We currently require that the number of BBs is
993 exactly 2 (the header and latch). Vectorizable inner-most loops
1004 if (loop
->num_nodes
!= 2)
1006 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1007 fprintf (vect_dump
, "not vectorized: control flow in loop.");
1011 if (empty_block_p (loop
->header
))
1013 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1014 fprintf (vect_dump
, "not vectorized: empty loop.");
1020 struct loop
*innerloop
= loop
->inner
;
1023 /* Nested loop. We currently require that the loop is doubly-nested,
1024 contains a single inner loop, and the number of BBs is exactly 5.
1025 Vectorizable outer-loops look like this:
1037 The inner-loop has the properties expected of inner-most loops
1038 as described above. */
1040 if ((loop
->inner
)->inner
|| (loop
->inner
)->next
)
1042 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1043 fprintf (vect_dump
, "not vectorized: multiple nested loops.");
1047 /* Analyze the inner-loop. */
1048 inner_loop_vinfo
= vect_analyze_loop_1 (loop
->inner
);
1049 if (!inner_loop_vinfo
)
1051 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1052 fprintf (vect_dump
, "not vectorized: Bad inner loop.");
1056 if (!expr_invariant_in_loop_p (loop
,
1057 LOOP_VINFO_NITERS (inner_loop_vinfo
)))
1059 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1061 "not vectorized: inner-loop count not invariant.");
1062 destroy_loop_vec_info (inner_loop_vinfo
, true);
1066 if (loop
->num_nodes
!= 5)
1068 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1069 fprintf (vect_dump
, "not vectorized: control flow in loop.");
1070 destroy_loop_vec_info (inner_loop_vinfo
, true);
1074 gcc_assert (EDGE_COUNT (innerloop
->header
->preds
) == 2);
1075 entryedge
= EDGE_PRED (innerloop
->header
, 0);
1076 if (EDGE_PRED (innerloop
->header
, 0)->src
== innerloop
->latch
)
1077 entryedge
= EDGE_PRED (innerloop
->header
, 1);
1079 if (entryedge
->src
!= loop
->header
1080 || !single_exit (innerloop
)
1081 || single_exit (innerloop
)->dest
!= EDGE_PRED (loop
->latch
, 0)->src
)
1083 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1084 fprintf (vect_dump
, "not vectorized: unsupported outerloop form.");
1085 destroy_loop_vec_info (inner_loop_vinfo
, true);
1089 if (vect_print_dump_info (REPORT_DETAILS
))
1090 fprintf (vect_dump
, "Considering outer-loop vectorization.");
1093 if (!single_exit (loop
)
1094 || EDGE_COUNT (loop
->header
->preds
) != 2)
1096 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1098 if (!single_exit (loop
))
1099 fprintf (vect_dump
, "not vectorized: multiple exits.");
1100 else if (EDGE_COUNT (loop
->header
->preds
) != 2)
1101 fprintf (vect_dump
, "not vectorized: too many incoming edges.");
1103 if (inner_loop_vinfo
)
1104 destroy_loop_vec_info (inner_loop_vinfo
, true);
1108 /* We assume that the loop exit condition is at the end of the loop. i.e,
1109 that the loop is represented as a do-while (with a proper if-guard
1110 before the loop if needed), where the loop header contains all the
1111 executable statements, and the latch is empty. */
1112 if (!empty_block_p (loop
->latch
)
1113 || !gimple_seq_empty_p (phi_nodes (loop
->latch
)))
1115 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1116 fprintf (vect_dump
, "not vectorized: unexpected loop form.");
1117 if (inner_loop_vinfo
)
1118 destroy_loop_vec_info (inner_loop_vinfo
, true);
1122 /* Make sure there exists a single-predecessor exit bb: */
1123 if (!single_pred_p (single_exit (loop
)->dest
))
1125 edge e
= single_exit (loop
);
1126 if (!(e
->flags
& EDGE_ABNORMAL
))
1128 split_loop_exit_edge (e
);
1129 if (vect_print_dump_info (REPORT_DETAILS
))
1130 fprintf (vect_dump
, "split exit edge.");
1134 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1135 fprintf (vect_dump
, "not vectorized: abnormal loop exit edge.");
1136 if (inner_loop_vinfo
)
1137 destroy_loop_vec_info (inner_loop_vinfo
, true);
1142 loop_cond
= vect_get_loop_niters (loop
, &number_of_iterations
);
1145 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1146 fprintf (vect_dump
, "not vectorized: complicated exit condition.");
1147 if (inner_loop_vinfo
)
1148 destroy_loop_vec_info (inner_loop_vinfo
, true);
1152 if (!number_of_iterations
)
1154 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1156 "not vectorized: number of iterations cannot be computed.");
1157 if (inner_loop_vinfo
)
1158 destroy_loop_vec_info (inner_loop_vinfo
, true);
1162 if (chrec_contains_undetermined (number_of_iterations
))
1164 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1165 fprintf (vect_dump
, "Infinite number of iterations.");
1166 if (inner_loop_vinfo
)
1167 destroy_loop_vec_info (inner_loop_vinfo
, true);
1171 if (!NITERS_KNOWN_P (number_of_iterations
))
1173 if (vect_print_dump_info (REPORT_DETAILS
))
1175 fprintf (vect_dump
, "Symbolic number of iterations is ");
1176 print_generic_expr (vect_dump
, number_of_iterations
, TDF_DETAILS
);
1179 else if (TREE_INT_CST_LOW (number_of_iterations
) == 0)
1181 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1182 fprintf (vect_dump
, "not vectorized: number of iterations = 0.");
1183 if (inner_loop_vinfo
)
1184 destroy_loop_vec_info (inner_loop_vinfo
, false);
1188 loop_vinfo
= new_loop_vec_info (loop
);
1189 LOOP_VINFO_NITERS (loop_vinfo
) = number_of_iterations
;
1190 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
) = number_of_iterations
;
1192 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond
)) = loop_exit_ctrl_vec_info_type
;
1194 /* CHECKME: May want to keep it around it in the future. */
1195 if (inner_loop_vinfo
)
1196 destroy_loop_vec_info (inner_loop_vinfo
, false);
1198 gcc_assert (!loop
->aux
);
1199 loop
->aux
= loop_vinfo
;
1204 /* Function vect_analyze_loop_operations.
1206 Scan the loop stmts and make sure they are all vectorizable. */
1209 vect_analyze_loop_operations (loop_vec_info loop_vinfo
, bool slp
)
1211 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1212 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1213 int nbbs
= loop
->num_nodes
;
1214 gimple_stmt_iterator si
;
1215 unsigned int vectorization_factor
= 0;
1218 stmt_vec_info stmt_info
;
1219 bool need_to_vectorize
= false;
1220 int min_profitable_iters
;
1221 int min_scalar_loop_bound
;
1223 bool only_slp_in_loop
= true, ok
;
1224 HOST_WIDE_INT max_niter
;
1226 if (vect_print_dump_info (REPORT_DETAILS
))
1227 fprintf (vect_dump
, "=== vect_analyze_loop_operations ===");
1229 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
1230 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1233 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1234 vectorization factor of the loop is the unrolling factor required by
1235 the SLP instances. If that unrolling factor is 1, we say, that we
1236 perform pure SLP on loop - cross iteration parallelism is not
1238 for (i
= 0; i
< nbbs
; i
++)
1240 basic_block bb
= bbs
[i
];
1241 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1243 gimple stmt
= gsi_stmt (si
);
1244 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1245 gcc_assert (stmt_info
);
1246 if ((STMT_VINFO_RELEVANT_P (stmt_info
)
1247 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info
)))
1248 && !PURE_SLP_STMT (stmt_info
))
1249 /* STMT needs both SLP and loop-based vectorization. */
1250 only_slp_in_loop
= false;
1254 if (only_slp_in_loop
)
1255 vectorization_factor
= LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
);
1257 vectorization_factor
= least_common_multiple (vectorization_factor
,
1258 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
));
1260 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
1261 if (vect_print_dump_info (REPORT_DETAILS
))
1262 fprintf (vect_dump
, "Updating vectorization factor to %d ",
1263 vectorization_factor
);
1266 for (i
= 0; i
< nbbs
; i
++)
1268 basic_block bb
= bbs
[i
];
1270 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
1272 phi
= gsi_stmt (si
);
1275 stmt_info
= vinfo_for_stmt (phi
);
1276 if (vect_print_dump_info (REPORT_DETAILS
))
1278 fprintf (vect_dump
, "examining phi: ");
1279 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
1282 /* Inner-loop loop-closed exit phi in outer-loop vectorization
1283 (i.e., a phi in the tail of the outer-loop). */
1284 if (! is_loop_header_bb_p (bb
))
1286 /* FORNOW: we currently don't support the case that these phis
1287 are not used in the outerloop (unless it is double reduction,
1288 i.e., this phi is vect_reduction_def), cause this case
1289 requires to actually do something here. */
1290 if ((!STMT_VINFO_RELEVANT_P (stmt_info
)
1291 || STMT_VINFO_LIVE_P (stmt_info
))
1292 && STMT_VINFO_DEF_TYPE (stmt_info
)
1293 != vect_double_reduction_def
)
1295 if (vect_print_dump_info (REPORT_DETAILS
))
1297 "Unsupported loop-closed phi in outer-loop.");
1301 /* If PHI is used in the outer loop, we check that its operand
1302 is defined in the inner loop. */
1303 if (STMT_VINFO_RELEVANT_P (stmt_info
))
1308 if (gimple_phi_num_args (phi
) != 1)
1311 phi_op
= PHI_ARG_DEF (phi
, 0);
1312 if (TREE_CODE (phi_op
) != SSA_NAME
)
1315 op_def_stmt
= SSA_NAME_DEF_STMT (phi_op
);
1317 || !flow_bb_inside_loop_p (loop
, gimple_bb (op_def_stmt
))
1318 || !vinfo_for_stmt (op_def_stmt
))
1321 if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt
))
1322 != vect_used_in_outer
1323 && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt
))
1324 != vect_used_in_outer_by_reduction
)
1331 gcc_assert (stmt_info
);
1333 if (STMT_VINFO_LIVE_P (stmt_info
))
1335 /* FORNOW: not yet supported. */
1336 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1337 fprintf (vect_dump
, "not vectorized: value used after loop.");
1341 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
1342 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_induction_def
)
1344 /* A scalar-dependence cycle that we don't support. */
1345 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1346 fprintf (vect_dump
, "not vectorized: scalar dependence cycle.");
1350 if (STMT_VINFO_RELEVANT_P (stmt_info
))
1352 need_to_vectorize
= true;
1353 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
1354 ok
= vectorizable_induction (phi
, NULL
, NULL
);
1359 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1362 "not vectorized: relevant phi not supported: ");
1363 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
1369 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1371 gimple stmt
= gsi_stmt (si
);
1372 if (!vect_analyze_stmt (stmt
, &need_to_vectorize
, NULL
))
1377 /* All operations in the loop are either irrelevant (deal with loop
1378 control, or dead), or only used outside the loop and can be moved
1379 out of the loop (e.g. invariants, inductions). The loop can be
1380 optimized away by scalar optimizations. We're better off not
1381 touching this loop. */
1382 if (!need_to_vectorize
)
1384 if (vect_print_dump_info (REPORT_DETAILS
))
1386 "All the computation can be taken out of the loop.");
1387 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1389 "not vectorized: redundant loop. no profit to vectorize.");
1393 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1394 && vect_print_dump_info (REPORT_DETAILS
))
1396 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC
,
1397 vectorization_factor
, LOOP_VINFO_INT_NITERS (loop_vinfo
));
1399 if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1400 && (LOOP_VINFO_INT_NITERS (loop_vinfo
) < vectorization_factor
))
1401 || ((max_niter
= max_stmt_executions_int (loop
)) != -1
1402 && (unsigned HOST_WIDE_INT
) max_niter
< vectorization_factor
))
1404 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1405 fprintf (vect_dump
, "not vectorized: iteration count too small.");
1406 if (vect_print_dump_info (REPORT_DETAILS
))
1407 fprintf (vect_dump
,"not vectorized: iteration count smaller than "
1408 "vectorization factor.");
1412 /* Analyze cost. Decide if worth while to vectorize. */
1414 /* Once VF is set, SLP costs should be updated since the number of created
1415 vector stmts depends on VF. */
1416 vect_update_slp_costs_according_to_vf (loop_vinfo
);
1418 min_profitable_iters
= vect_estimate_min_profitable_iters (loop_vinfo
);
1419 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
) = min_profitable_iters
;
1421 if (min_profitable_iters
< 0)
1423 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1424 fprintf (vect_dump
, "not vectorized: vectorization not profitable.");
1425 if (vect_print_dump_info (REPORT_DETAILS
))
1426 fprintf (vect_dump
, "not vectorized: vector version will never be "
1431 min_scalar_loop_bound
= ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND
)
1432 * vectorization_factor
) - 1);
1434 /* Use the cost model only if it is more conservative than user specified
1437 th
= (unsigned) min_scalar_loop_bound
;
1438 if (min_profitable_iters
1439 && (!min_scalar_loop_bound
1440 || min_profitable_iters
> min_scalar_loop_bound
))
1441 th
= (unsigned) min_profitable_iters
;
1443 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1444 && LOOP_VINFO_INT_NITERS (loop_vinfo
) <= th
)
1446 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1447 fprintf (vect_dump
, "not vectorized: vectorization not "
1449 if (vect_print_dump_info (REPORT_DETAILS
))
1450 fprintf (vect_dump
, "not vectorized: iteration count smaller than "
1451 "user specified loop bound parameter or minimum "
1452 "profitable iterations (whichever is more conservative).");
1456 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1457 || LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0
1458 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
1460 if (vect_print_dump_info (REPORT_DETAILS
))
1461 fprintf (vect_dump
, "epilog loop required.");
1462 if (!vect_can_advance_ivs_p (loop_vinfo
))
1464 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1466 "not vectorized: can't create epilog loop 1.");
1469 if (!slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
1471 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1473 "not vectorized: can't create epilog loop 2.");
1482 /* Function vect_analyze_loop_2.
1484 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1485 for it. The different analyses will record information in the
1486 loop_vec_info struct. */
1488 vect_analyze_loop_2 (loop_vec_info loop_vinfo
)
1490 bool ok
, slp
= false;
1491 int max_vf
= MAX_VECTORIZATION_FACTOR
;
1494 /* Find all data references in the loop (which correspond to vdefs/vuses)
1495 and analyze their evolution in the loop. Also adjust the minimal
1496 vectorization factor according to the loads and stores.
1498 FORNOW: Handle only simple, array references, which
1499 alignment can be forced, and aligned pointer-references. */
1501 ok
= vect_analyze_data_refs (loop_vinfo
, NULL
, &min_vf
);
1504 if (vect_print_dump_info (REPORT_DETAILS
))
1505 fprintf (vect_dump
, "bad data references.");
1509 /* Classify all cross-iteration scalar data-flow cycles.
1510 Cross-iteration cycles caused by virtual phis are analyzed separately. */
1512 vect_analyze_scalar_cycles (loop_vinfo
);
1514 vect_pattern_recog (loop_vinfo
, NULL
);
1516 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
1518 ok
= vect_mark_stmts_to_be_vectorized (loop_vinfo
);
1521 if (vect_print_dump_info (REPORT_DETAILS
))
1522 fprintf (vect_dump
, "unexpected pattern.");
1526 /* Analyze data dependences between the data-refs in the loop
1527 and adjust the maximum vectorization factor according to
1529 FORNOW: fail at the first data dependence that we encounter. */
1531 ok
= vect_analyze_data_ref_dependences (loop_vinfo
, NULL
, &max_vf
);
1535 if (vect_print_dump_info (REPORT_DETAILS
))
1536 fprintf (vect_dump
, "bad data dependence.");
1540 ok
= vect_determine_vectorization_factor (loop_vinfo
);
1543 if (vect_print_dump_info (REPORT_DETAILS
))
1544 fprintf (vect_dump
, "can't determine vectorization factor.");
1547 if (max_vf
< LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
1549 if (vect_print_dump_info (REPORT_DETAILS
))
1550 fprintf (vect_dump
, "bad data dependence.");
1554 /* Analyze the alignment of the data-refs in the loop.
1555 Fail if a data reference is found that cannot be vectorized. */
1557 ok
= vect_analyze_data_refs_alignment (loop_vinfo
, NULL
);
1560 if (vect_print_dump_info (REPORT_DETAILS
))
1561 fprintf (vect_dump
, "bad data alignment.");
1565 /* Analyze the access patterns of the data-refs in the loop (consecutive,
1566 complex, etc.). FORNOW: Only handle consecutive access pattern. */
1568 ok
= vect_analyze_data_ref_accesses (loop_vinfo
, NULL
);
1571 if (vect_print_dump_info (REPORT_DETAILS
))
1572 fprintf (vect_dump
, "bad data access.");
1576 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1577 It is important to call pruning after vect_analyze_data_ref_accesses,
1578 since we use grouping information gathered by interleaving analysis. */
1579 ok
= vect_prune_runtime_alias_test_list (loop_vinfo
);
1582 if (vect_print_dump_info (REPORT_DETAILS
))
1583 fprintf (vect_dump
, "too long list of versioning for alias "
1588 /* This pass will decide on using loop versioning and/or loop peeling in
1589 order to enhance the alignment of data references in the loop. */
1591 ok
= vect_enhance_data_refs_alignment (loop_vinfo
);
1594 if (vect_print_dump_info (REPORT_DETAILS
))
1595 fprintf (vect_dump
, "bad data alignment.");
1599 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
1600 ok
= vect_analyze_slp (loop_vinfo
, NULL
);
1603 /* Decide which possible SLP instances to SLP. */
1604 slp
= vect_make_slp_decision (loop_vinfo
);
1606 /* Find stmts that need to be both vectorized and SLPed. */
1607 vect_detect_hybrid_slp (loop_vinfo
);
1612 /* Scan all the operations in the loop and make sure they are
1615 ok
= vect_analyze_loop_operations (loop_vinfo
, slp
);
1618 if (vect_print_dump_info (REPORT_DETAILS
))
1619 fprintf (vect_dump
, "bad operation or unsupported loop bound.");
1626 /* Function vect_analyze_loop.
1628 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1629 for it. The different analyses will record information in the
1630 loop_vec_info struct. */
1632 vect_analyze_loop (struct loop
*loop
)
1634 loop_vec_info loop_vinfo
;
1635 unsigned int vector_sizes
;
1637 /* Autodetect first vector size we try. */
1638 current_vector_size
= 0;
1639 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
1641 if (vect_print_dump_info (REPORT_DETAILS
))
1642 fprintf (vect_dump
, "===== analyze_loop_nest =====");
1644 if (loop_outer (loop
)
1645 && loop_vec_info_for_loop (loop_outer (loop
))
1646 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop
))))
1648 if (vect_print_dump_info (REPORT_DETAILS
))
1649 fprintf (vect_dump
, "outer-loop already vectorized.");
1655 /* Check the CFG characteristics of the loop (nesting, entry/exit). */
1656 loop_vinfo
= vect_analyze_loop_form (loop
);
1659 if (vect_print_dump_info (REPORT_DETAILS
))
1660 fprintf (vect_dump
, "bad loop form.");
1664 if (vect_analyze_loop_2 (loop_vinfo
))
1666 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
) = 1;
1671 destroy_loop_vec_info (loop_vinfo
, true);
1673 vector_sizes
&= ~current_vector_size
;
1674 if (vector_sizes
== 0
1675 || current_vector_size
== 0)
1678 /* Try the next biggest vector size. */
1679 current_vector_size
= 1 << floor_log2 (vector_sizes
);
1680 if (vect_print_dump_info (REPORT_DETAILS
))
1681 fprintf (vect_dump
, "***** Re-trying analysis with "
1682 "vector size %d\n", current_vector_size
);
1687 /* Function reduction_code_for_scalar_code
1690 CODE - tree_code of a reduction operations.
1693 REDUC_CODE - the corresponding tree-code to be used to reduce the
1694 vector of partial results into a single scalar result (which
1695 will also reside in a vector) or ERROR_MARK if the operation is
1696 a supported reduction operation, but does not have such tree-code.
1698 Return FALSE if CODE currently cannot be vectorized as reduction. */
1701 reduction_code_for_scalar_code (enum tree_code code
,
1702 enum tree_code
*reduc_code
)
1707 *reduc_code
= REDUC_MAX_EXPR
;
1711 *reduc_code
= REDUC_MIN_EXPR
;
1715 *reduc_code
= REDUC_PLUS_EXPR
;
1723 *reduc_code
= ERROR_MARK
;
1732 /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
1733 STMT is printed with a message MSG. */
1736 report_vect_op (gimple stmt
, const char *msg
)
1738 fprintf (vect_dump
, "%s", msg
);
1739 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
1743 /* Detect SLP reduction of the form:
1753 PHI is the reduction phi node (#a1 = phi <a5, a0> above)
1754 FIRST_STMT is the first reduction stmt in the chain
1755 (a2 = operation (a1)).
1757 Return TRUE if a reduction chain was detected. */
1760 vect_is_slp_reduction (loop_vec_info loop_info
, gimple phi
, gimple first_stmt
)
1762 struct loop
*loop
= (gimple_bb (phi
))->loop_father
;
1763 struct loop
*vect_loop
= LOOP_VINFO_LOOP (loop_info
);
1764 enum tree_code code
;
1765 gimple current_stmt
= NULL
, loop_use_stmt
= NULL
, first
, next_stmt
;
1766 stmt_vec_info use_stmt_info
, current_stmt_info
;
1768 imm_use_iterator imm_iter
;
1769 use_operand_p use_p
;
1770 int nloop_uses
, size
= 0, n_out_of_loop_uses
;
1773 if (loop
!= vect_loop
)
1776 lhs
= PHI_RESULT (phi
);
1777 code
= gimple_assign_rhs_code (first_stmt
);
1781 n_out_of_loop_uses
= 0;
1782 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
1784 gimple use_stmt
= USE_STMT (use_p
);
1785 if (is_gimple_debug (use_stmt
))
1788 use_stmt
= USE_STMT (use_p
);
1790 /* Check if we got back to the reduction phi. */
1791 if (use_stmt
== phi
)
1793 loop_use_stmt
= use_stmt
;
1798 if (flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
1800 if (vinfo_for_stmt (use_stmt
)
1801 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1803 loop_use_stmt
= use_stmt
;
1808 n_out_of_loop_uses
++;
1810 /* There are can be either a single use in the loop or two uses in
1812 if (nloop_uses
> 1 || (n_out_of_loop_uses
&& nloop_uses
))
1819 /* We reached a statement with no loop uses. */
1820 if (nloop_uses
== 0)
1823 /* This is a loop exit phi, and we haven't reached the reduction phi. */
1824 if (gimple_code (loop_use_stmt
) == GIMPLE_PHI
)
1827 if (!is_gimple_assign (loop_use_stmt
)
1828 || code
!= gimple_assign_rhs_code (loop_use_stmt
)
1829 || !flow_bb_inside_loop_p (loop
, gimple_bb (loop_use_stmt
)))
1832 /* Insert USE_STMT into reduction chain. */
1833 use_stmt_info
= vinfo_for_stmt (loop_use_stmt
);
1836 current_stmt_info
= vinfo_for_stmt (current_stmt
);
1837 GROUP_NEXT_ELEMENT (current_stmt_info
) = loop_use_stmt
;
1838 GROUP_FIRST_ELEMENT (use_stmt_info
)
1839 = GROUP_FIRST_ELEMENT (current_stmt_info
);
1842 GROUP_FIRST_ELEMENT (use_stmt_info
) = loop_use_stmt
;
1844 lhs
= gimple_assign_lhs (loop_use_stmt
);
1845 current_stmt
= loop_use_stmt
;
1849 if (!found
|| loop_use_stmt
!= phi
|| size
< 2)
1852 /* Swap the operands, if needed, to make the reduction operand be the second
1854 lhs
= PHI_RESULT (phi
);
1855 next_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt
));
1858 if (gimple_assign_rhs2 (next_stmt
) == lhs
)
1860 tree op
= gimple_assign_rhs1 (next_stmt
);
1861 gimple def_stmt
= NULL
;
1863 if (TREE_CODE (op
) == SSA_NAME
)
1864 def_stmt
= SSA_NAME_DEF_STMT (op
);
1866 /* Check that the other def is either defined in the loop
1867 ("vect_internal_def"), or it's an induction (defined by a
1868 loop-header phi-node). */
1870 && gimple_bb (def_stmt
)
1871 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
1872 && (is_gimple_assign (def_stmt
)
1873 || is_gimple_call (def_stmt
)
1874 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
1875 == vect_induction_def
1876 || (gimple_code (def_stmt
) == GIMPLE_PHI
1877 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
1878 == vect_internal_def
1879 && !is_loop_header_bb_p (gimple_bb (def_stmt
)))))
1881 lhs
= gimple_assign_lhs (next_stmt
);
1882 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
1890 tree op
= gimple_assign_rhs2 (next_stmt
);
1891 gimple def_stmt
= NULL
;
1893 if (TREE_CODE (op
) == SSA_NAME
)
1894 def_stmt
= SSA_NAME_DEF_STMT (op
);
1896 /* Check that the other def is either defined in the loop
1897 ("vect_internal_def"), or it's an induction (defined by a
1898 loop-header phi-node). */
1900 && gimple_bb (def_stmt
)
1901 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
1902 && (is_gimple_assign (def_stmt
)
1903 || is_gimple_call (def_stmt
)
1904 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
1905 == vect_induction_def
1906 || (gimple_code (def_stmt
) == GIMPLE_PHI
1907 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
1908 == vect_internal_def
1909 && !is_loop_header_bb_p (gimple_bb (def_stmt
)))))
1911 if (vect_print_dump_info (REPORT_DETAILS
))
1913 fprintf (vect_dump
, "swapping oprnds: ");
1914 print_gimple_stmt (vect_dump
, next_stmt
, 0, TDF_SLIM
);
1917 swap_tree_operands (next_stmt
,
1918 gimple_assign_rhs1_ptr (next_stmt
),
1919 gimple_assign_rhs2_ptr (next_stmt
));
1920 update_stmt (next_stmt
);
1926 lhs
= gimple_assign_lhs (next_stmt
);
1927 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
1930 /* Save the chain for further analysis in SLP detection. */
1931 first
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt
));
1932 VEC_safe_push (gimple
, heap
, LOOP_VINFO_REDUCTION_CHAINS (loop_info
), first
);
1933 GROUP_SIZE (vinfo_for_stmt (first
)) = size
;
1939 /* Function vect_is_simple_reduction_1
1941 (1) Detect a cross-iteration def-use cycle that represents a simple
1942 reduction computation. We look for the following pattern:
1947 a2 = operation (a3, a1)
1950 1. operation is commutative and associative and it is safe to
1951 change the order of the computation (if CHECK_REDUCTION is true)
1952 2. no uses for a2 in the loop (a2 is used out of the loop)
1953 3. no uses of a1 in the loop besides the reduction operation
1954 4. no uses of a1 outside the loop.
1956 Conditions 1,4 are tested here.
1957 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
1959 (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
1960 nested cycles, if CHECK_REDUCTION is false.
1962 (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
1966 inner loop (def of a3)
1969 If MODIFY is true it tries also to rework the code in-place to enable
1970 detection of more reduction patterns. For the time being we rewrite
1971 "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
1975 vect_is_simple_reduction_1 (loop_vec_info loop_info
, gimple phi
,
1976 bool check_reduction
, bool *double_reduc
,
1979 struct loop
*loop
= (gimple_bb (phi
))->loop_father
;
1980 struct loop
*vect_loop
= LOOP_VINFO_LOOP (loop_info
);
1981 edge latch_e
= loop_latch_edge (loop
);
1982 tree loop_arg
= PHI_ARG_DEF_FROM_EDGE (phi
, latch_e
);
1983 gimple def_stmt
, def1
= NULL
, def2
= NULL
;
1984 enum tree_code orig_code
, code
;
1985 tree op1
, op2
, op3
= NULL_TREE
, op4
= NULL_TREE
;
1989 imm_use_iterator imm_iter
;
1990 use_operand_p use_p
;
1993 *double_reduc
= false;
1995 /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
1996 otherwise, we assume outer loop vectorization. */
1997 gcc_assert ((check_reduction
&& loop
== vect_loop
)
1998 || (!check_reduction
&& flow_loop_nested_p (vect_loop
, loop
)));
2000 name
= PHI_RESULT (phi
);
2002 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, name
)
2004 gimple use_stmt
= USE_STMT (use_p
);
2005 if (is_gimple_debug (use_stmt
))
2008 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2010 if (vect_print_dump_info (REPORT_DETAILS
))
2011 fprintf (vect_dump
, "intermediate value used outside loop.");
2016 if (vinfo_for_stmt (use_stmt
)
2017 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt
)))
2021 if (vect_print_dump_info (REPORT_DETAILS
))
2022 fprintf (vect_dump
, "reduction used in loop.");
2027 if (TREE_CODE (loop_arg
) != SSA_NAME
)
2029 if (vect_print_dump_info (REPORT_DETAILS
))
2031 fprintf (vect_dump
, "reduction: not ssa_name: ");
2032 print_generic_expr (vect_dump
, loop_arg
, TDF_SLIM
);
2037 def_stmt
= SSA_NAME_DEF_STMT (loop_arg
);
2040 if (vect_print_dump_info (REPORT_DETAILS
))
2041 fprintf (vect_dump
, "reduction: no def_stmt.");
2045 if (!is_gimple_assign (def_stmt
) && gimple_code (def_stmt
) != GIMPLE_PHI
)
2047 if (vect_print_dump_info (REPORT_DETAILS
))
2048 print_gimple_stmt (vect_dump
, def_stmt
, 0, TDF_SLIM
);
2052 if (is_gimple_assign (def_stmt
))
2054 name
= gimple_assign_lhs (def_stmt
);
2059 name
= PHI_RESULT (def_stmt
);
2064 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, name
)
2066 gimple use_stmt
= USE_STMT (use_p
);
2067 if (is_gimple_debug (use_stmt
))
2069 if (flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
))
2070 && vinfo_for_stmt (use_stmt
)
2071 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt
)))
2075 if (vect_print_dump_info (REPORT_DETAILS
))
2076 fprintf (vect_dump
, "reduction used in loop.");
2081 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2082 defined in the inner loop. */
2085 op1
= PHI_ARG_DEF (def_stmt
, 0);
2087 if (gimple_phi_num_args (def_stmt
) != 1
2088 || TREE_CODE (op1
) != SSA_NAME
)
2090 if (vect_print_dump_info (REPORT_DETAILS
))
2091 fprintf (vect_dump
, "unsupported phi node definition.");
2096 def1
= SSA_NAME_DEF_STMT (op1
);
2097 if (flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
2099 && flow_bb_inside_loop_p (loop
->inner
, gimple_bb (def1
))
2100 && is_gimple_assign (def1
))
2102 if (vect_print_dump_info (REPORT_DETAILS
))
2103 report_vect_op (def_stmt
, "detected double reduction: ");
2105 *double_reduc
= true;
2112 code
= orig_code
= gimple_assign_rhs_code (def_stmt
);
2114 /* We can handle "res -= x[i]", which is non-associative by
2115 simply rewriting this into "res += -x[i]". Avoid changing
2116 gimple instruction for the first simple tests and only do this
2117 if we're allowed to change code at all. */
2118 if (code
== MINUS_EXPR
2120 && (op1
= gimple_assign_rhs1 (def_stmt
))
2121 && TREE_CODE (op1
) == SSA_NAME
2122 && SSA_NAME_DEF_STMT (op1
) == phi
)
2126 && (!commutative_tree_code (code
) || !associative_tree_code (code
)))
2128 if (vect_print_dump_info (REPORT_DETAILS
))
2129 report_vect_op (def_stmt
, "reduction: not commutative/associative: ");
2133 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
2135 if (code
!= COND_EXPR
)
2137 if (vect_print_dump_info (REPORT_DETAILS
))
2138 report_vect_op (def_stmt
, "reduction: not binary operation: ");
2143 op3
= gimple_assign_rhs1 (def_stmt
);
2144 if (COMPARISON_CLASS_P (op3
))
2146 op4
= TREE_OPERAND (op3
, 1);
2147 op3
= TREE_OPERAND (op3
, 0);
2150 op1
= gimple_assign_rhs2 (def_stmt
);
2151 op2
= gimple_assign_rhs3 (def_stmt
);
2153 if (TREE_CODE (op1
) != SSA_NAME
&& TREE_CODE (op2
) != SSA_NAME
)
2155 if (vect_print_dump_info (REPORT_DETAILS
))
2156 report_vect_op (def_stmt
, "reduction: uses not ssa_names: ");
2163 op1
= gimple_assign_rhs1 (def_stmt
);
2164 op2
= gimple_assign_rhs2 (def_stmt
);
2166 if (TREE_CODE (op1
) != SSA_NAME
&& TREE_CODE (op2
) != SSA_NAME
)
2168 if (vect_print_dump_info (REPORT_DETAILS
))
2169 report_vect_op (def_stmt
, "reduction: uses not ssa_names: ");
2175 type
= TREE_TYPE (gimple_assign_lhs (def_stmt
));
2176 if ((TREE_CODE (op1
) == SSA_NAME
2177 && !types_compatible_p (type
,TREE_TYPE (op1
)))
2178 || (TREE_CODE (op2
) == SSA_NAME
2179 && !types_compatible_p (type
, TREE_TYPE (op2
)))
2180 || (op3
&& TREE_CODE (op3
) == SSA_NAME
2181 && !types_compatible_p (type
, TREE_TYPE (op3
)))
2182 || (op4
&& TREE_CODE (op4
) == SSA_NAME
2183 && !types_compatible_p (type
, TREE_TYPE (op4
))))
2185 if (vect_print_dump_info (REPORT_DETAILS
))
2187 fprintf (vect_dump
, "reduction: multiple types: operation type: ");
2188 print_generic_expr (vect_dump
, type
, TDF_SLIM
);
2189 fprintf (vect_dump
, ", operands types: ");
2190 print_generic_expr (vect_dump
, TREE_TYPE (op1
), TDF_SLIM
);
2191 fprintf (vect_dump
, ",");
2192 print_generic_expr (vect_dump
, TREE_TYPE (op2
), TDF_SLIM
);
2195 fprintf (vect_dump
, ",");
2196 print_generic_expr (vect_dump
, TREE_TYPE (op3
), TDF_SLIM
);
2201 fprintf (vect_dump
, ",");
2202 print_generic_expr (vect_dump
, TREE_TYPE (op4
), TDF_SLIM
);
2209 /* Check that it's ok to change the order of the computation.
2210 Generally, when vectorizing a reduction we change the order of the
2211 computation. This may change the behavior of the program in some
2212 cases, so we need to check that this is ok. One exception is when
2213 vectorizing an outer-loop: the inner-loop is executed sequentially,
2214 and therefore vectorizing reductions in the inner-loop during
2215 outer-loop vectorization is safe. */
2217 /* CHECKME: check for !flag_finite_math_only too? */
2218 if (SCALAR_FLOAT_TYPE_P (type
) && !flag_associative_math
2221 /* Changing the order of operations changes the semantics. */
2222 if (vect_print_dump_info (REPORT_DETAILS
))
2223 report_vect_op (def_stmt
, "reduction: unsafe fp math optimization: ");
2226 else if (INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_TRAPS (type
)
2229 /* Changing the order of operations changes the semantics. */
2230 if (vect_print_dump_info (REPORT_DETAILS
))
2231 report_vect_op (def_stmt
, "reduction: unsafe int math optimization: ");
2234 else if (SAT_FIXED_POINT_TYPE_P (type
) && check_reduction
)
2236 /* Changing the order of operations changes the semantics. */
2237 if (vect_print_dump_info (REPORT_DETAILS
))
2238 report_vect_op (def_stmt
,
2239 "reduction: unsafe fixed-point math optimization: ");
2243 /* If we detected "res -= x[i]" earlier, rewrite it into
2244 "res += -x[i]" now. If this turns out to be useless reassoc
2245 will clean it up again. */
2246 if (orig_code
== MINUS_EXPR
)
2248 tree rhs
= gimple_assign_rhs2 (def_stmt
);
2249 tree negrhs
= make_ssa_name (SSA_NAME_VAR (rhs
), NULL
);
2250 gimple negate_stmt
= gimple_build_assign_with_ops (NEGATE_EXPR
, negrhs
,
2252 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
2253 set_vinfo_for_stmt (negate_stmt
, new_stmt_vec_info (negate_stmt
,
2255 gsi_insert_before (&gsi
, negate_stmt
, GSI_NEW_STMT
);
2256 gimple_assign_set_rhs2 (def_stmt
, negrhs
);
2257 gimple_assign_set_rhs_code (def_stmt
, PLUS_EXPR
);
2258 update_stmt (def_stmt
);
2261 /* Reduction is safe. We're dealing with one of the following:
2262 1) integer arithmetic and no trapv
2263 2) floating point arithmetic, and special flags permit this optimization
2264 3) nested cycle (i.e., outer loop vectorization). */
2265 if (TREE_CODE (op1
) == SSA_NAME
)
2266 def1
= SSA_NAME_DEF_STMT (op1
);
2268 if (TREE_CODE (op2
) == SSA_NAME
)
2269 def2
= SSA_NAME_DEF_STMT (op2
);
2271 if (code
!= COND_EXPR
2272 && ((!def1
|| gimple_nop_p (def1
)) && (!def2
|| gimple_nop_p (def2
))))
2274 if (vect_print_dump_info (REPORT_DETAILS
))
2275 report_vect_op (def_stmt
, "reduction: no defs for operands: ");
2279 /* Check that one def is the reduction def, defined by PHI,
2280 the other def is either defined in the loop ("vect_internal_def"),
2281 or it's an induction (defined by a loop-header phi-node). */
2283 if (def2
&& def2
== phi
2284 && (code
== COND_EXPR
2285 || !def1
|| gimple_nop_p (def1
)
2286 || (def1
&& flow_bb_inside_loop_p (loop
, gimple_bb (def1
))
2287 && (is_gimple_assign (def1
)
2288 || is_gimple_call (def1
)
2289 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1
))
2290 == vect_induction_def
2291 || (gimple_code (def1
) == GIMPLE_PHI
2292 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1
))
2293 == vect_internal_def
2294 && !is_loop_header_bb_p (gimple_bb (def1
)))))))
2296 if (vect_print_dump_info (REPORT_DETAILS
))
2297 report_vect_op (def_stmt
, "detected reduction: ");
2301 if (def1
&& def1
== phi
2302 && (code
== COND_EXPR
2303 || !def2
|| gimple_nop_p (def2
)
2304 || (def2
&& flow_bb_inside_loop_p (loop
, gimple_bb (def2
))
2305 && (is_gimple_assign (def2
)
2306 || is_gimple_call (def2
)
2307 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2
))
2308 == vect_induction_def
2309 || (gimple_code (def2
) == GIMPLE_PHI
2310 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2
))
2311 == vect_internal_def
2312 && !is_loop_header_bb_p (gimple_bb (def2
)))))))
2314 if (check_reduction
)
2316 /* Swap operands (just for simplicity - so that the rest of the code
2317 can assume that the reduction variable is always the last (second)
2319 if (vect_print_dump_info (REPORT_DETAILS
))
2320 report_vect_op (def_stmt
,
2321 "detected reduction: need to swap operands: ");
2323 swap_tree_operands (def_stmt
, gimple_assign_rhs1_ptr (def_stmt
),
2324 gimple_assign_rhs2_ptr (def_stmt
));
2328 if (vect_print_dump_info (REPORT_DETAILS
))
2329 report_vect_op (def_stmt
, "detected reduction: ");
2335 /* Try to find SLP reduction chain. */
2336 if (check_reduction
&& vect_is_slp_reduction (loop_info
, phi
, def_stmt
))
2338 if (vect_print_dump_info (REPORT_DETAILS
))
2339 report_vect_op (def_stmt
, "reduction: detected reduction chain: ");
2344 if (vect_print_dump_info (REPORT_DETAILS
))
2345 report_vect_op (def_stmt
, "reduction: unknown pattern: ");
2350 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2351 in-place. Arguments as there. */
2354 vect_is_simple_reduction (loop_vec_info loop_info
, gimple phi
,
2355 bool check_reduction
, bool *double_reduc
)
2357 return vect_is_simple_reduction_1 (loop_info
, phi
, check_reduction
,
2358 double_reduc
, false);
2361 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2362 in-place if it enables detection of more reductions. Arguments
2366 vect_force_simple_reduction (loop_vec_info loop_info
, gimple phi
,
2367 bool check_reduction
, bool *double_reduc
)
2369 return vect_is_simple_reduction_1 (loop_info
, phi
, check_reduction
,
2370 double_reduc
, true);
2373 /* Calculate the cost of one scalar iteration of the loop. */
2375 vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo
)
2377 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2378 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2379 int nbbs
= loop
->num_nodes
, factor
, scalar_single_iter_cost
= 0;
2380 int innerloop_iters
, i
, stmt_cost
;
2382 /* Count statements in scalar loop. Using this as scalar cost for a single
2385 TODO: Add outer loop support.
2387 TODO: Consider assigning different costs to different scalar
2391 innerloop_iters
= 1;
2393 innerloop_iters
= 50; /* FIXME */
2395 for (i
= 0; i
< nbbs
; i
++)
2397 gimple_stmt_iterator si
;
2398 basic_block bb
= bbs
[i
];
2400 if (bb
->loop_father
== loop
->inner
)
2401 factor
= innerloop_iters
;
2405 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2407 gimple stmt
= gsi_stmt (si
);
2408 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2410 if (!is_gimple_assign (stmt
) && !is_gimple_call (stmt
))
2413 /* Skip stmts that are not vectorized inside the loop. */
2415 && !STMT_VINFO_RELEVANT_P (stmt_info
)
2416 && (!STMT_VINFO_LIVE_P (stmt_info
)
2417 || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info
)))
2418 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
2421 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
)))
2423 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
2424 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2426 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2429 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2431 scalar_single_iter_cost
+= stmt_cost
* factor
;
2434 return scalar_single_iter_cost
;
2437 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
2439 vect_get_known_peeling_cost (loop_vec_info loop_vinfo
, int peel_iters_prologue
,
2440 int *peel_iters_epilogue
,
2441 int scalar_single_iter_cost
)
2443 int peel_guard_costs
= 0;
2444 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2446 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
2448 *peel_iters_epilogue
= vf
/2;
2449 if (vect_print_dump_info (REPORT_COST
))
2450 fprintf (vect_dump
, "cost model: "
2451 "epilogue peel iters set to vf/2 because "
2452 "loop iterations are unknown .");
2454 /* If peeled iterations are known but number of scalar loop
2455 iterations are unknown, count a taken branch per peeled loop. */
2456 peel_guard_costs
= 2 * vect_get_stmt_cost (cond_branch_taken
);
2460 int niters
= LOOP_VINFO_INT_NITERS (loop_vinfo
);
2461 peel_iters_prologue
= niters
< peel_iters_prologue
?
2462 niters
: peel_iters_prologue
;
2463 *peel_iters_epilogue
= (niters
- peel_iters_prologue
) % vf
;
2464 /* If we need to peel for gaps, but no peeling is required, we have to
2465 peel VF iterations. */
2466 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) && !*peel_iters_epilogue
)
2467 *peel_iters_epilogue
= vf
;
2470 return (peel_iters_prologue
* scalar_single_iter_cost
)
2471 + (*peel_iters_epilogue
* scalar_single_iter_cost
)
2475 /* Function vect_estimate_min_profitable_iters
2477 Return the number of iterations required for the vector version of the
2478 loop to be profitable relative to the cost of the scalar version of the
2481 TODO: Take profile info into account before making vectorization
2482 decisions, if available. */
2485 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo
)
2488 int min_profitable_iters
;
2489 int peel_iters_prologue
;
2490 int peel_iters_epilogue
;
2491 int vec_inside_cost
= 0;
2492 int vec_outside_cost
= 0;
2493 int scalar_single_iter_cost
= 0;
2494 int scalar_outside_cost
= 0;
2495 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2496 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2497 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2498 int nbbs
= loop
->num_nodes
;
2499 int npeel
= LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
);
2500 int peel_guard_costs
= 0;
2501 int innerloop_iters
= 0, factor
;
2502 VEC (slp_instance
, heap
) *slp_instances
;
2503 slp_instance instance
;
2505 /* Cost model disabled. */
2506 if (!flag_vect_cost_model
)
2508 if (vect_print_dump_info (REPORT_COST
))
2509 fprintf (vect_dump
, "cost model disabled.");
2513 /* Requires loop versioning tests to handle misalignment. */
2514 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
))
2516 /* FIXME: Make cost depend on complexity of individual check. */
2518 VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
));
2519 if (vect_print_dump_info (REPORT_COST
))
2520 fprintf (vect_dump
, "cost model: Adding cost of checks for loop "
2521 "versioning to treat misalignment.\n");
2524 /* Requires loop versioning with alias checks. */
2525 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
2527 /* FIXME: Make cost depend on complexity of individual check. */
2529 VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
));
2530 if (vect_print_dump_info (REPORT_COST
))
2531 fprintf (vect_dump
, "cost model: Adding cost of checks for loop "
2532 "versioning aliasing.\n");
2535 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
)
2536 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
2537 vec_outside_cost
+= vect_get_stmt_cost (cond_branch_taken
);
2539 /* Count statements in scalar loop. Using this as scalar cost for a single
2542 TODO: Add outer loop support.
2544 TODO: Consider assigning different costs to different scalar
2549 innerloop_iters
= 50; /* FIXME */
2551 for (i
= 0; i
< nbbs
; i
++)
2553 gimple_stmt_iterator si
;
2554 basic_block bb
= bbs
[i
];
2556 if (bb
->loop_father
== loop
->inner
)
2557 factor
= innerloop_iters
;
2561 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2563 gimple stmt
= gsi_stmt (si
);
2564 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2566 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2568 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2569 stmt_info
= vinfo_for_stmt (stmt
);
2572 /* Skip stmts that are not vectorized inside the loop. */
2573 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
2574 && (!STMT_VINFO_LIVE_P (stmt_info
)
2575 || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info
))))
2578 vec_inside_cost
+= STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
) * factor
;
2579 /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
2580 some of the "outside" costs are generated inside the outer-loop. */
2581 vec_outside_cost
+= STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
);
2582 if (is_pattern_stmt_p (stmt_info
)
2583 && STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
))
2585 gimple_stmt_iterator gsi
;
2587 for (gsi
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
2588 !gsi_end_p (gsi
); gsi_next (&gsi
))
2590 gimple pattern_def_stmt
= gsi_stmt (gsi
);
2591 stmt_vec_info pattern_def_stmt_info
2592 = vinfo_for_stmt (pattern_def_stmt
);
2593 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
2594 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
2597 += STMT_VINFO_INSIDE_OF_LOOP_COST
2598 (pattern_def_stmt_info
) * factor
;
2600 += STMT_VINFO_OUTSIDE_OF_LOOP_COST
2601 (pattern_def_stmt_info
);
2608 scalar_single_iter_cost
= vect_get_single_scalar_iteration_cost (loop_vinfo
);
2610 /* Add additional cost for the peeled instructions in prologue and epilogue
2613 FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2614 at compile-time - we assume it's vf/2 (the worst would be vf-1).
2616 TODO: Build an expression that represents peel_iters for prologue and
2617 epilogue to be used in a run-time test. */
2621 peel_iters_prologue
= vf
/2;
2622 if (vect_print_dump_info (REPORT_COST
))
2623 fprintf (vect_dump
, "cost model: "
2624 "prologue peel iters set to vf/2.");
2626 /* If peeling for alignment is unknown, loop bound of main loop becomes
2628 peel_iters_epilogue
= vf
/2;
2629 if (vect_print_dump_info (REPORT_COST
))
2630 fprintf (vect_dump
, "cost model: "
2631 "epilogue peel iters set to vf/2 because "
2632 "peeling for alignment is unknown .");
2634 /* If peeled iterations are unknown, count a taken branch and a not taken
2635 branch per peeled loop. Even if scalar loop iterations are known,
2636 vector iterations are not known since peeled prologue iterations are
2637 not known. Hence guards remain the same. */
2638 peel_guard_costs
+= 2 * (vect_get_stmt_cost (cond_branch_taken
)
2639 + vect_get_stmt_cost (cond_branch_not_taken
));
2640 vec_outside_cost
+= (peel_iters_prologue
* scalar_single_iter_cost
)
2641 + (peel_iters_epilogue
* scalar_single_iter_cost
)
2646 peel_iters_prologue
= npeel
;
2647 vec_outside_cost
+= vect_get_known_peeling_cost (loop_vinfo
,
2648 peel_iters_prologue
, &peel_iters_epilogue
,
2649 scalar_single_iter_cost
);
2652 /* FORNOW: The scalar outside cost is incremented in one of the
2655 1. The vectorizer checks for alignment and aliasing and generates
2656 a condition that allows dynamic vectorization. A cost model
2657 check is ANDED with the versioning condition. Hence scalar code
2658 path now has the added cost of the versioning check.
2660 if (cost > th & versioning_check)
2663 Hence run-time scalar is incremented by not-taken branch cost.
2665 2. The vectorizer then checks if a prologue is required. If the
2666 cost model check was not done before during versioning, it has to
2667 be done before the prologue check.
2670 prologue = scalar_iters
2675 if (prologue == num_iters)
2678 Hence the run-time scalar cost is incremented by a taken branch,
2679 plus a not-taken branch, plus a taken branch cost.
2681 3. The vectorizer then checks if an epilogue is required. If the
2682 cost model check was not done before during prologue check, it
2683 has to be done with the epilogue check.
2689 if (prologue == num_iters)
2692 if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2695 Hence the run-time scalar cost should be incremented by 2 taken
2698 TODO: The back end may reorder the BBS's differently and reverse
2699 conditions/branch directions. Change the estimates below to
2700 something more reasonable. */
2702 /* If the number of iterations is known and we do not do versioning, we can
2703 decide whether to vectorize at compile time. Hence the scalar version
2704 do not carry cost model guard costs. */
2705 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
2706 || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
)
2707 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
2709 /* Cost model check occurs at versioning. */
2710 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
)
2711 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
2712 scalar_outside_cost
+= vect_get_stmt_cost (cond_branch_not_taken
);
2715 /* Cost model check occurs at prologue generation. */
2716 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) < 0)
2717 scalar_outside_cost
+= 2 * vect_get_stmt_cost (cond_branch_taken
)
2718 + vect_get_stmt_cost (cond_branch_not_taken
);
2719 /* Cost model check occurs at epilogue generation. */
2721 scalar_outside_cost
+= 2 * vect_get_stmt_cost (cond_branch_taken
);
2725 /* Add SLP costs. */
2726 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2727 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2729 vec_outside_cost
+= SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance
);
2730 vec_inside_cost
+= SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
);
2733 /* Calculate number of iterations required to make the vector version
2734 profitable, relative to the loop bodies only. The following condition
2736 SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2738 SIC = scalar iteration cost, VIC = vector iteration cost,
2739 VOC = vector outside cost, VF = vectorization factor,
2740 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2741 SOC = scalar outside cost for run time cost model check. */
2743 if ((scalar_single_iter_cost
* vf
) > vec_inside_cost
)
2745 if (vec_outside_cost
<= 0)
2746 min_profitable_iters
= 1;
2749 min_profitable_iters
= ((vec_outside_cost
- scalar_outside_cost
) * vf
2750 - vec_inside_cost
* peel_iters_prologue
2751 - vec_inside_cost
* peel_iters_epilogue
)
2752 / ((scalar_single_iter_cost
* vf
)
2755 if ((scalar_single_iter_cost
* vf
* min_profitable_iters
)
2756 <= ((vec_inside_cost
* min_profitable_iters
)
2757 + ((vec_outside_cost
- scalar_outside_cost
) * vf
)))
2758 min_profitable_iters
++;
2761 /* vector version will never be profitable. */
2764 if (vect_print_dump_info (REPORT_COST
))
2765 fprintf (vect_dump
, "cost model: the vector iteration cost = %d "
2766 "divided by the scalar iteration cost = %d "
2767 "is greater or equal to the vectorization factor = %d.",
2768 vec_inside_cost
, scalar_single_iter_cost
, vf
);
2772 if (vect_print_dump_info (REPORT_COST
))
2774 fprintf (vect_dump
, "Cost model analysis: \n");
2775 fprintf (vect_dump
, " Vector inside of loop cost: %d\n",
2777 fprintf (vect_dump
, " Vector outside of loop cost: %d\n",
2779 fprintf (vect_dump
, " Scalar iteration cost: %d\n",
2780 scalar_single_iter_cost
);
2781 fprintf (vect_dump
, " Scalar outside cost: %d\n", scalar_outside_cost
);
2782 fprintf (vect_dump
, " prologue iterations: %d\n",
2783 peel_iters_prologue
);
2784 fprintf (vect_dump
, " epilogue iterations: %d\n",
2785 peel_iters_epilogue
);
2786 fprintf (vect_dump
, " Calculated minimum iters for profitability: %d\n",
2787 min_profitable_iters
);
2790 min_profitable_iters
=
2791 min_profitable_iters
< vf
? vf
: min_profitable_iters
;
2793 /* Because the condition we create is:
2794 if (niters <= min_profitable_iters)
2795 then skip the vectorized loop. */
2796 min_profitable_iters
--;
2798 if (vect_print_dump_info (REPORT_COST
))
2799 fprintf (vect_dump
, " Profitability threshold = %d\n",
2800 min_profitable_iters
);
2802 return min_profitable_iters
;
2806 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
2807 functions. Design better to avoid maintenance issues. */
2809 /* Function vect_model_reduction_cost.
2811 Models cost for a reduction operation, including the vector ops
2812 generated within the strip-mine loop, the initial definition before
2813 the loop, and the epilogue code that must be generated. */
2816 vect_model_reduction_cost (stmt_vec_info stmt_info
, enum tree_code reduc_code
,
2820 enum tree_code code
;
2823 gimple stmt
, orig_stmt
;
2825 enum machine_mode mode
;
2826 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2827 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2830 /* Cost of reduction op inside loop. */
2831 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
)
2832 += ncopies
* vect_get_stmt_cost (vector_stmt
);
2834 stmt
= STMT_VINFO_STMT (stmt_info
);
2836 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
)))
2838 case GIMPLE_SINGLE_RHS
:
2839 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt
)) == ternary_op
);
2840 reduction_op
= TREE_OPERAND (gimple_assign_rhs1 (stmt
), 2);
2842 case GIMPLE_UNARY_RHS
:
2843 reduction_op
= gimple_assign_rhs1 (stmt
);
2845 case GIMPLE_BINARY_RHS
:
2846 reduction_op
= gimple_assign_rhs2 (stmt
);
2848 case GIMPLE_TERNARY_RHS
:
2849 reduction_op
= gimple_assign_rhs3 (stmt
);
2855 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
2858 if (vect_print_dump_info (REPORT_COST
))
2860 fprintf (vect_dump
, "unsupported data-type ");
2861 print_generic_expr (vect_dump
, TREE_TYPE (reduction_op
), TDF_SLIM
);
2866 mode
= TYPE_MODE (vectype
);
2867 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2870 orig_stmt
= STMT_VINFO_STMT (stmt_info
);
2872 code
= gimple_assign_rhs_code (orig_stmt
);
2874 /* Add in cost for initial definition. */
2875 outer_cost
+= vect_get_stmt_cost (scalar_to_vec
);
2877 /* Determine cost of epilogue code.
2879 We have a reduction operator that will reduce the vector in one statement.
2880 Also requires scalar extract. */
2882 if (!nested_in_vect_loop_p (loop
, orig_stmt
))
2884 if (reduc_code
!= ERROR_MARK
)
2885 outer_cost
+= vect_get_stmt_cost (vector_stmt
)
2886 + vect_get_stmt_cost (vec_to_scalar
);
2889 int vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
2891 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt
)));
2892 int element_bitsize
= tree_low_cst (bitsize
, 1);
2893 int nelements
= vec_size_in_bits
/ element_bitsize
;
2895 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
2897 /* We have a whole vector shift available. */
2898 if (VECTOR_MODE_P (mode
)
2899 && optab_handler (optab
, mode
) != CODE_FOR_nothing
2900 && optab_handler (vec_shr_optab
, mode
) != CODE_FOR_nothing
)
2901 /* Final reduction via vector shifts and the reduction operator. Also
2902 requires scalar extract. */
2903 outer_cost
+= ((exact_log2(nelements
) * 2)
2904 * vect_get_stmt_cost (vector_stmt
)
2905 + vect_get_stmt_cost (vec_to_scalar
));
2907 /* Use extracts and reduction op for final reduction. For N elements,
2908 we have N extracts and N-1 reduction ops. */
2909 outer_cost
+= ((nelements
+ nelements
- 1)
2910 * vect_get_stmt_cost (vector_stmt
));
2914 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
) = outer_cost
;
2916 if (vect_print_dump_info (REPORT_COST
))
2917 fprintf (vect_dump
, "vect_model_reduction_cost: inside_cost = %d, "
2918 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
),
2919 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
));
2925 /* Function vect_model_induction_cost.
2927 Models cost for induction operations. */
2930 vect_model_induction_cost (stmt_vec_info stmt_info
, int ncopies
)
2932 /* loop cost for vec_loop. */
2933 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
)
2934 = ncopies
* vect_get_stmt_cost (vector_stmt
);
2935 /* prologue cost for vec_init and vec_step. */
2936 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
)
2937 = 2 * vect_get_stmt_cost (scalar_to_vec
);
2939 if (vect_print_dump_info (REPORT_COST
))
2940 fprintf (vect_dump
, "vect_model_induction_cost: inside_cost = %d, "
2941 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
),
2942 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
));
2946 /* Function get_initial_def_for_induction
2949 STMT - a stmt that performs an induction operation in the loop.
2950 IV_PHI - the initial value of the induction variable
2953 Return a vector variable, initialized with the first VF values of
2954 the induction variable. E.g., for an iv with IV_PHI='X' and
2955 evolution S, for a vector of 4 units, we want to return:
2956 [X, X + S, X + 2*S, X + 3*S]. */
2959 get_initial_def_for_induction (gimple iv_phi
)
2961 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (iv_phi
);
2962 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2963 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2967 edge pe
= loop_preheader_edge (loop
);
2968 struct loop
*iv_loop
;
2970 tree vec
, vec_init
, vec_step
, t
;
2974 gimple init_stmt
, induction_phi
, new_stmt
;
2975 tree induc_def
, vec_def
, vec_dest
;
2976 tree init_expr
, step_expr
;
2977 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2982 stmt_vec_info phi_info
= vinfo_for_stmt (iv_phi
);
2983 bool nested_in_vect_loop
= false;
2984 gimple_seq stmts
= NULL
;
2985 imm_use_iterator imm_iter
;
2986 use_operand_p use_p
;
2990 gimple_stmt_iterator si
;
2991 basic_block bb
= gimple_bb (iv_phi
);
2995 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
2996 if (nested_in_vect_loop_p (loop
, iv_phi
))
2998 nested_in_vect_loop
= true;
2999 iv_loop
= loop
->inner
;
3003 gcc_assert (iv_loop
== (gimple_bb (iv_phi
))->loop_father
);
3005 latch_e
= loop_latch_edge (iv_loop
);
3006 loop_arg
= PHI_ARG_DEF_FROM_EDGE (iv_phi
, latch_e
);
3008 access_fn
= analyze_scalar_evolution (iv_loop
, PHI_RESULT (iv_phi
));
3009 gcc_assert (access_fn
);
3010 STRIP_NOPS (access_fn
);
3011 ok
= vect_is_simple_iv_evolution (iv_loop
->num
, access_fn
,
3012 &init_expr
, &step_expr
);
3014 pe
= loop_preheader_edge (iv_loop
);
3016 scalar_type
= TREE_TYPE (init_expr
);
3017 vectype
= get_vectype_for_scalar_type (scalar_type
);
3018 resvectype
= get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi
)));
3019 gcc_assert (vectype
);
3020 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3021 ncopies
= vf
/ nunits
;
3023 gcc_assert (phi_info
);
3024 gcc_assert (ncopies
>= 1);
3026 /* Find the first insertion point in the BB. */
3027 si
= gsi_after_labels (bb
);
3029 /* Create the vector that holds the initial_value of the induction. */
3030 if (nested_in_vect_loop
)
3032 /* iv_loop is nested in the loop to be vectorized. init_expr had already
3033 been created during vectorization of previous stmts. We obtain it
3034 from the STMT_VINFO_VEC_STMT of the defining stmt. */
3035 tree iv_def
= PHI_ARG_DEF_FROM_EDGE (iv_phi
,
3036 loop_preheader_edge (iv_loop
));
3037 vec_init
= vect_get_vec_def_for_operand (iv_def
, iv_phi
, NULL
);
3041 VEC(constructor_elt
,gc
) *v
;
3043 /* iv_loop is the loop to be vectorized. Create:
3044 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
3045 new_var
= vect_get_new_vect_var (scalar_type
, vect_scalar_var
, "var_");
3046 add_referenced_var (new_var
);
3048 new_name
= force_gimple_operand (init_expr
, &stmts
, false, new_var
);
3051 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3052 gcc_assert (!new_bb
);
3055 v
= VEC_alloc (constructor_elt
, gc
, nunits
);
3056 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, new_name
);
3057 for (i
= 1; i
< nunits
; i
++)
3059 /* Create: new_name_i = new_name + step_expr */
3060 enum tree_code code
= POINTER_TYPE_P (scalar_type
)
3061 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3062 init_stmt
= gimple_build_assign_with_ops (code
, new_var
,
3063 new_name
, step_expr
);
3064 new_name
= make_ssa_name (new_var
, init_stmt
);
3065 gimple_assign_set_lhs (init_stmt
, new_name
);
3067 new_bb
= gsi_insert_on_edge_immediate (pe
, init_stmt
);
3068 gcc_assert (!new_bb
);
3070 if (vect_print_dump_info (REPORT_DETAILS
))
3072 fprintf (vect_dump
, "created new init_stmt: ");
3073 print_gimple_stmt (vect_dump
, init_stmt
, 0, TDF_SLIM
);
3075 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, new_name
);
3077 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
3078 vec
= build_constructor (vectype
, v
);
3079 vec_init
= vect_init_vector (iv_phi
, vec
, vectype
, NULL
);
3083 /* Create the vector that holds the step of the induction. */
3084 if (nested_in_vect_loop
)
3085 /* iv_loop is nested in the loop to be vectorized. Generate:
3086 vec_step = [S, S, S, S] */
3087 new_name
= step_expr
;
3090 /* iv_loop is the loop to be vectorized. Generate:
3091 vec_step = [VF*S, VF*S, VF*S, VF*S] */
3092 expr
= build_int_cst (TREE_TYPE (step_expr
), vf
);
3093 new_name
= fold_build2 (MULT_EXPR
, TREE_TYPE (step_expr
),
3097 t
= unshare_expr (new_name
);
3098 gcc_assert (CONSTANT_CLASS_P (new_name
));
3099 stepvectype
= get_vectype_for_scalar_type (TREE_TYPE (new_name
));
3100 gcc_assert (stepvectype
);
3101 vec
= build_vector_from_val (stepvectype
, t
);
3102 vec_step
= vect_init_vector (iv_phi
, vec
, stepvectype
, NULL
);
3105 /* Create the following def-use cycle:
3110 vec_iv = PHI <vec_init, vec_loop>
3114 vec_loop = vec_iv + vec_step; */
3116 /* Create the induction-phi that defines the induction-operand. */
3117 vec_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, "vec_iv_");
3118 add_referenced_var (vec_dest
);
3119 induction_phi
= create_phi_node (vec_dest
, iv_loop
->header
);
3120 set_vinfo_for_stmt (induction_phi
,
3121 new_stmt_vec_info (induction_phi
, loop_vinfo
, NULL
));
3122 induc_def
= PHI_RESULT (induction_phi
);
3124 /* Create the iv update inside the loop */
3125 new_stmt
= gimple_build_assign_with_ops (PLUS_EXPR
, vec_dest
,
3126 induc_def
, vec_step
);
3127 vec_def
= make_ssa_name (vec_dest
, new_stmt
);
3128 gimple_assign_set_lhs (new_stmt
, vec_def
);
3129 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
3130 set_vinfo_for_stmt (new_stmt
, new_stmt_vec_info (new_stmt
, loop_vinfo
,
3133 /* Set the arguments of the phi node: */
3134 add_phi_arg (induction_phi
, vec_init
, pe
, UNKNOWN_LOCATION
);
3135 add_phi_arg (induction_phi
, vec_def
, loop_latch_edge (iv_loop
),
3139 /* In case that vectorization factor (VF) is bigger than the number
3140 of elements that we can fit in a vectype (nunits), we have to generate
3141 more than one vector stmt - i.e - we need to "unroll" the
3142 vector stmt by a factor VF/nunits. For more details see documentation
3143 in vectorizable_operation. */
3147 stmt_vec_info prev_stmt_vinfo
;
3148 /* FORNOW. This restriction should be relaxed. */
3149 gcc_assert (!nested_in_vect_loop
);
3151 /* Create the vector that holds the step of the induction. */
3152 expr
= build_int_cst (TREE_TYPE (step_expr
), nunits
);
3153 new_name
= fold_build2 (MULT_EXPR
, TREE_TYPE (step_expr
),
3155 t
= unshare_expr (new_name
);
3156 gcc_assert (CONSTANT_CLASS_P (new_name
));
3157 vec
= build_vector_from_val (stepvectype
, t
);
3158 vec_step
= vect_init_vector (iv_phi
, vec
, stepvectype
, NULL
);
3160 vec_def
= induc_def
;
3161 prev_stmt_vinfo
= vinfo_for_stmt (induction_phi
);
3162 for (i
= 1; i
< ncopies
; i
++)
3164 /* vec_i = vec_prev + vec_step */
3165 new_stmt
= gimple_build_assign_with_ops (PLUS_EXPR
, vec_dest
,
3167 vec_def
= make_ssa_name (vec_dest
, new_stmt
);
3168 gimple_assign_set_lhs (new_stmt
, vec_def
);
3170 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
3171 if (!useless_type_conversion_p (resvectype
, vectype
))
3173 new_stmt
= gimple_build_assign_with_ops
3175 vect_get_new_vect_var (resvectype
, vect_simple_var
,
3177 build1 (VIEW_CONVERT_EXPR
, resvectype
,
3178 gimple_assign_lhs (new_stmt
)), NULL_TREE
);
3179 gimple_assign_set_lhs (new_stmt
,
3181 (gimple_assign_lhs (new_stmt
), new_stmt
));
3182 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
3184 set_vinfo_for_stmt (new_stmt
,
3185 new_stmt_vec_info (new_stmt
, loop_vinfo
, NULL
));
3186 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo
) = new_stmt
;
3187 prev_stmt_vinfo
= vinfo_for_stmt (new_stmt
);
3191 if (nested_in_vect_loop
)
3193 /* Find the loop-closed exit-phi of the induction, and record
3194 the final vector of induction results: */
3196 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, loop_arg
)
3198 if (!flow_bb_inside_loop_p (iv_loop
, gimple_bb (USE_STMT (use_p
))))
3200 exit_phi
= USE_STMT (use_p
);
3206 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (exit_phi
);
3207 /* FORNOW. Currently not supporting the case that an inner-loop induction
3208 is not used in the outer-loop (i.e. only outside the outer-loop). */
3209 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo
)
3210 && !STMT_VINFO_LIVE_P (stmt_vinfo
));
3212 STMT_VINFO_VEC_STMT (stmt_vinfo
) = new_stmt
;
3213 if (vect_print_dump_info (REPORT_DETAILS
))
3215 fprintf (vect_dump
, "vector of inductions after inner-loop:");
3216 print_gimple_stmt (vect_dump
, new_stmt
, 0, TDF_SLIM
);
3222 if (vect_print_dump_info (REPORT_DETAILS
))
3224 fprintf (vect_dump
, "transform induction: created def-use cycle: ");
3225 print_gimple_stmt (vect_dump
, induction_phi
, 0, TDF_SLIM
);
3226 fprintf (vect_dump
, "\n");
3227 print_gimple_stmt (vect_dump
, SSA_NAME_DEF_STMT (vec_def
), 0, TDF_SLIM
);
3230 STMT_VINFO_VEC_STMT (phi_info
) = induction_phi
;
3231 if (!useless_type_conversion_p (resvectype
, vectype
))
3233 new_stmt
= gimple_build_assign_with_ops
3235 vect_get_new_vect_var (resvectype
, vect_simple_var
, "vec_iv_"),
3236 build1 (VIEW_CONVERT_EXPR
, resvectype
, induc_def
), NULL_TREE
);
3237 induc_def
= make_ssa_name (gimple_assign_lhs (new_stmt
), new_stmt
);
3238 gimple_assign_set_lhs (new_stmt
, induc_def
);
3239 si
= gsi_start_bb (bb
);
3240 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
3241 set_vinfo_for_stmt (new_stmt
,
3242 new_stmt_vec_info (new_stmt
, loop_vinfo
, NULL
));
3243 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt
))
3244 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi
));
3251 /* Function get_initial_def_for_reduction
3254 STMT - a stmt that performs a reduction operation in the loop.
3255 INIT_VAL - the initial value of the reduction variable
3258 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3259 of the reduction (used for adjusting the epilog - see below).
3260 Return a vector variable, initialized according to the operation that STMT
3261 performs. This vector will be used as the initial value of the
3262 vector of partial results.
3264 Option1 (adjust in epilog): Initialize the vector as follows:
3265 add/bit or/xor: [0,0,...,0,0]
3266 mult/bit and: [1,1,...,1,1]
3267 min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3268 and when necessary (e.g. add/mult case) let the caller know
3269 that it needs to adjust the result by init_val.
3271 Option2: Initialize the vector as follows:
3272 add/bit or/xor: [init_val,0,0,...,0]
3273 mult/bit and: [init_val,1,1,...,1]
3274 min/max/cond_expr: [init_val,init_val,...,init_val]
3275 and no adjustments are needed.
3277 For example, for the following code:
3283 STMT is 's = s + a[i]', and the reduction variable is 's'.
3284 For a vector of 4 units, we want to return either [0,0,0,init_val],
3285 or [0,0,0,0] and let the caller know that it needs to adjust
3286 the result at the end by 'init_val'.
3288 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3289 initialization vector is simpler (same element in all entries), if
3290 ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3292 A cost model should help decide between these two schemes. */
3295 get_initial_def_for_reduction (gimple stmt
, tree init_val
,
3296 tree
*adjustment_def
)
3298 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3299 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3300 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3301 tree scalar_type
= TREE_TYPE (init_val
);
3302 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
3304 enum tree_code code
= gimple_assign_rhs_code (stmt
);
3309 bool nested_in_vect_loop
= false;
3311 REAL_VALUE_TYPE real_init_val
= dconst0
;
3312 int int_init_val
= 0;
3313 gimple def_stmt
= NULL
;
3315 gcc_assert (vectype
);
3316 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3318 gcc_assert (POINTER_TYPE_P (scalar_type
) || INTEGRAL_TYPE_P (scalar_type
)
3319 || SCALAR_FLOAT_TYPE_P (scalar_type
));
3321 if (nested_in_vect_loop_p (loop
, stmt
))
3322 nested_in_vect_loop
= true;
3324 gcc_assert (loop
== (gimple_bb (stmt
))->loop_father
);
3326 /* In case of double reduction we only create a vector variable to be put
3327 in the reduction phi node. The actual statement creation is done in
3328 vect_create_epilog_for_reduction. */
3329 if (adjustment_def
&& nested_in_vect_loop
3330 && TREE_CODE (init_val
) == SSA_NAME
3331 && (def_stmt
= SSA_NAME_DEF_STMT (init_val
))
3332 && gimple_code (def_stmt
) == GIMPLE_PHI
3333 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
3334 && vinfo_for_stmt (def_stmt
)
3335 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
3336 == vect_double_reduction_def
)
3338 *adjustment_def
= NULL
;
3339 return vect_create_destination_var (init_val
, vectype
);
3342 if (TREE_CONSTANT (init_val
))
3344 if (SCALAR_FLOAT_TYPE_P (scalar_type
))
3345 init_value
= build_real (scalar_type
, TREE_REAL_CST (init_val
));
3347 init_value
= build_int_cst (scalar_type
, TREE_INT_CST_LOW (init_val
));
3350 init_value
= init_val
;
3354 case WIDEN_SUM_EXPR
:
3362 /* ADJUSMENT_DEF is NULL when called from
3363 vect_create_epilog_for_reduction to vectorize double reduction. */
3366 if (nested_in_vect_loop
)
3367 *adjustment_def
= vect_get_vec_def_for_operand (init_val
, stmt
,
3370 *adjustment_def
= init_val
;
3373 if (code
== MULT_EXPR
)
3375 real_init_val
= dconst1
;
3379 if (code
== BIT_AND_EXPR
)
3382 if (SCALAR_FLOAT_TYPE_P (scalar_type
))
3383 def_for_init
= build_real (scalar_type
, real_init_val
);
3385 def_for_init
= build_int_cst (scalar_type
, int_init_val
);
3387 /* Create a vector of '0' or '1' except the first element. */
3388 elts
= XALLOCAVEC (tree
, nunits
);
3389 for (i
= nunits
- 2; i
>= 0; --i
)
3390 elts
[i
+ 1] = def_for_init
;
3392 /* Option1: the first element is '0' or '1' as well. */
3395 elts
[0] = def_for_init
;
3396 init_def
= build_vector (vectype
, elts
);
3400 /* Option2: the first element is INIT_VAL. */
3402 if (TREE_CONSTANT (init_val
))
3403 init_def
= build_vector (vectype
, elts
);
3406 VEC(constructor_elt
,gc
) *v
;
3407 v
= VEC_alloc (constructor_elt
, gc
, nunits
);
3408 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, init_val
);
3409 for (i
= 1; i
< nunits
; ++i
)
3410 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[i
]);
3411 init_def
= build_constructor (vectype
, v
);
3421 *adjustment_def
= NULL_TREE
;
3422 init_def
= vect_get_vec_def_for_operand (init_val
, stmt
, NULL
);
3426 init_def
= build_vector_from_val (vectype
, init_value
);
3437 /* Function vect_create_epilog_for_reduction
3439 Create code at the loop-epilog to finalize the result of a reduction
3442 VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3443 reduction statements.
3444 STMT is the scalar reduction stmt that is being vectorized.
3445 NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3446 number of elements that we can fit in a vectype (nunits). In this case
3447 we have to generate more than one vector stmt - i.e - we need to "unroll"
3448 the vector stmt by a factor VF/nunits. For more details see documentation
3449 in vectorizable_operation.
3450 REDUC_CODE is the tree-code for the epilog reduction.
3451 REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3453 REDUC_INDEX is the index of the operand in the right hand side of the
3454 statement that is defined by REDUCTION_PHI.
3455 DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3456 SLP_NODE is an SLP node containing a group of reduction statements. The
3457 first one in this group is STMT.
3460 1. Creates the reduction def-use cycles: sets the arguments for
3462 The loop-entry argument is the vectorized initial-value of the reduction.
3463 The loop-latch argument is taken from VECT_DEFS - the vector of partial
3465 2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3466 by applying the operation specified by REDUC_CODE if available, or by
3467 other means (whole-vector shifts or a scalar loop).
3468 The function also creates a new phi node at the loop exit to preserve
3469 loop-closed form, as illustrated below.
3471 The flow at the entry to this function:
3474 vec_def = phi <null, null> # REDUCTION_PHI
3475 VECT_DEF = vector_stmt # vectorized form of STMT
3476 s_loop = scalar_stmt # (scalar) STMT
3478 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
3482 The above is transformed by this function into:
3485 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
3486 VECT_DEF = vector_stmt # vectorized form of STMT
3487 s_loop = scalar_stmt # (scalar) STMT
3489 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
3490 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
3491 v_out2 = reduce <v_out1>
3492 s_out3 = extract_field <v_out2, 0>
3493 s_out4 = adjust_result <s_out3>
3499 vect_create_epilog_for_reduction (VEC (tree
, heap
) *vect_defs
, gimple stmt
,
3500 int ncopies
, enum tree_code reduc_code
,
3501 VEC (gimple
, heap
) *reduction_phis
,
3502 int reduc_index
, bool double_reduc
,
3505 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3506 stmt_vec_info prev_phi_info
;
3508 enum machine_mode mode
;
3509 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3510 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
), *outer_loop
= NULL
;
3511 basic_block exit_bb
;
3514 gimple new_phi
= NULL
, phi
;
3515 gimple_stmt_iterator exit_gsi
;
3517 tree new_temp
= NULL_TREE
, new_dest
, new_name
, new_scalar_dest
;
3518 gimple epilog_stmt
= NULL
;
3519 enum tree_code code
= gimple_assign_rhs_code (stmt
);
3521 tree bitsize
, bitpos
;
3522 tree adjustment_def
= NULL
;
3523 tree vec_initial_def
= NULL
;
3524 tree reduction_op
, expr
, def
;
3525 tree orig_name
, scalar_result
;
3526 imm_use_iterator imm_iter
, phi_imm_iter
;
3527 use_operand_p use_p
, phi_use_p
;
3528 bool extract_scalar_result
= false;
3529 gimple use_stmt
, orig_stmt
, reduction_phi
= NULL
;
3530 bool nested_in_vect_loop
= false;
3531 VEC (gimple
, heap
) *new_phis
= NULL
;
3532 VEC (gimple
, heap
) *inner_phis
= NULL
;
3533 enum vect_def_type dt
= vect_unknown_def_type
;
3535 VEC (tree
, heap
) *scalar_results
= NULL
;
3536 unsigned int group_size
= 1, k
, ratio
;
3537 VEC (tree
, heap
) *vec_initial_defs
= NULL
;
3538 VEC (gimple
, heap
) *phis
;
3539 bool slp_reduc
= false;
3540 tree new_phi_result
;
3541 gimple inner_phi
= NULL
;
3544 group_size
= VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
));
3546 if (nested_in_vect_loop_p (loop
, stmt
))
3550 nested_in_vect_loop
= true;
3551 gcc_assert (!slp_node
);
3554 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
)))
3556 case GIMPLE_SINGLE_RHS
:
3557 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt
))
3559 reduction_op
= TREE_OPERAND (gimple_assign_rhs1 (stmt
), reduc_index
);
3561 case GIMPLE_UNARY_RHS
:
3562 reduction_op
= gimple_assign_rhs1 (stmt
);
3564 case GIMPLE_BINARY_RHS
:
3565 reduction_op
= reduc_index
?
3566 gimple_assign_rhs2 (stmt
) : gimple_assign_rhs1 (stmt
);
3568 case GIMPLE_TERNARY_RHS
:
3569 reduction_op
= gimple_op (stmt
, reduc_index
+ 1);
3575 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
3576 gcc_assert (vectype
);
3577 mode
= TYPE_MODE (vectype
);
3579 /* 1. Create the reduction def-use cycle:
3580 Set the arguments of REDUCTION_PHIS, i.e., transform
3583 vec_def = phi <null, null> # REDUCTION_PHI
3584 VECT_DEF = vector_stmt # vectorized form of STMT
3590 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
3591 VECT_DEF = vector_stmt # vectorized form of STMT
3594 (in case of SLP, do it for all the phis). */
3596 /* Get the loop-entry arguments. */
3598 vect_get_vec_defs (reduction_op
, NULL_TREE
, stmt
, &vec_initial_defs
,
3599 NULL
, slp_node
, reduc_index
);
3602 vec_initial_defs
= VEC_alloc (tree
, heap
, 1);
3603 /* For the case of reduction, vect_get_vec_def_for_operand returns
3604 the scalar def before the loop, that defines the initial value
3605 of the reduction variable. */
3606 vec_initial_def
= vect_get_vec_def_for_operand (reduction_op
, stmt
,
3608 VEC_quick_push (tree
, vec_initial_defs
, vec_initial_def
);
3611 /* Set phi nodes arguments. */
3612 FOR_EACH_VEC_ELT (gimple
, reduction_phis
, i
, phi
)
3614 tree vec_init_def
= VEC_index (tree
, vec_initial_defs
, i
);
3615 tree def
= VEC_index (tree
, vect_defs
, i
);
3616 for (j
= 0; j
< ncopies
; j
++)
3618 /* Set the loop-entry arg of the reduction-phi. */
3619 add_phi_arg (phi
, vec_init_def
, loop_preheader_edge (loop
),
3622 /* Set the loop-latch arg for the reduction-phi. */
3624 def
= vect_get_vec_def_for_stmt_copy (vect_unknown_def_type
, def
);
3626 add_phi_arg (phi
, def
, loop_latch_edge (loop
), UNKNOWN_LOCATION
);
3628 if (vect_print_dump_info (REPORT_DETAILS
))
3630 fprintf (vect_dump
, "transform reduction: created def-use"
3632 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
3633 fprintf (vect_dump
, "\n");
3634 print_gimple_stmt (vect_dump
, SSA_NAME_DEF_STMT (def
), 0,
3638 phi
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi
));
3642 VEC_free (tree
, heap
, vec_initial_defs
);
3644 /* 2. Create epilog code.
3645 The reduction epilog code operates across the elements of the vector
3646 of partial results computed by the vectorized loop.
3647 The reduction epilog code consists of:
3649 step 1: compute the scalar result in a vector (v_out2)
3650 step 2: extract the scalar result (s_out3) from the vector (v_out2)
3651 step 3: adjust the scalar result (s_out3) if needed.
3653 Step 1 can be accomplished using one the following three schemes:
3654 (scheme 1) using reduc_code, if available.
3655 (scheme 2) using whole-vector shifts, if available.
3656 (scheme 3) using a scalar loop. In this case steps 1+2 above are
3659 The overall epilog code looks like this:
3661 s_out0 = phi <s_loop> # original EXIT_PHI
3662 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
3663 v_out2 = reduce <v_out1> # step 1
3664 s_out3 = extract_field <v_out2, 0> # step 2
3665 s_out4 = adjust_result <s_out3> # step 3
3667 (step 3 is optional, and steps 1 and 2 may be combined).
3668 Lastly, the uses of s_out0 are replaced by s_out4. */
3671 /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
3672 v_out1 = phi <VECT_DEF>
3673 Store them in NEW_PHIS. */
3675 exit_bb
= single_exit (loop
)->dest
;
3676 prev_phi_info
= NULL
;
3677 new_phis
= VEC_alloc (gimple
, heap
, VEC_length (tree
, vect_defs
));
3678 FOR_EACH_VEC_ELT (tree
, vect_defs
, i
, def
)
3680 for (j
= 0; j
< ncopies
; j
++)
3682 phi
= create_phi_node (SSA_NAME_VAR (def
), exit_bb
);
3683 set_vinfo_for_stmt (phi
, new_stmt_vec_info (phi
, loop_vinfo
, NULL
));
3685 VEC_quick_push (gimple
, new_phis
, phi
);
3688 def
= vect_get_vec_def_for_stmt_copy (dt
, def
);
3689 STMT_VINFO_RELATED_STMT (prev_phi_info
) = phi
;
3692 SET_PHI_ARG_DEF (phi
, single_exit (loop
)->dest_idx
, def
);
3693 prev_phi_info
= vinfo_for_stmt (phi
);
3697 /* The epilogue is created for the outer-loop, i.e., for the loop being
3698 vectorized. Create exit phis for the outer loop. */
3702 exit_bb
= single_exit (loop
)->dest
;
3703 inner_phis
= VEC_alloc (gimple
, heap
, VEC_length (tree
, vect_defs
));
3704 FOR_EACH_VEC_ELT (gimple
, new_phis
, i
, phi
)
3706 gimple outer_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi
)),
3708 SET_PHI_ARG_DEF (outer_phi
, single_exit (loop
)->dest_idx
,
3710 set_vinfo_for_stmt (outer_phi
, new_stmt_vec_info (outer_phi
,
3712 VEC_quick_push (gimple
, inner_phis
, phi
);
3713 VEC_replace (gimple
, new_phis
, i
, outer_phi
);
3714 prev_phi_info
= vinfo_for_stmt (outer_phi
);
3715 while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi
)))
3717 phi
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi
));
3718 outer_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi
)),
3720 SET_PHI_ARG_DEF (outer_phi
, single_exit (loop
)->dest_idx
,
3722 set_vinfo_for_stmt (outer_phi
, new_stmt_vec_info (outer_phi
,
3724 STMT_VINFO_RELATED_STMT (prev_phi_info
) = outer_phi
;
3725 prev_phi_info
= vinfo_for_stmt (outer_phi
);
3730 exit_gsi
= gsi_after_labels (exit_bb
);
3732 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
3733 (i.e. when reduc_code is not available) and in the final adjustment
3734 code (if needed). Also get the original scalar reduction variable as
3735 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
3736 represents a reduction pattern), the tree-code and scalar-def are
3737 taken from the original stmt that the pattern-stmt (STMT) replaces.
3738 Otherwise (it is a regular reduction) - the tree-code and scalar-def
3739 are taken from STMT. */
3741 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3744 /* Regular reduction */
3749 /* Reduction pattern */
3750 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt
);
3751 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
));
3752 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
3755 code
= gimple_assign_rhs_code (orig_stmt
);
3756 /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
3757 partial results are added and not subtracted. */
3758 if (code
== MINUS_EXPR
)
3761 scalar_dest
= gimple_assign_lhs (orig_stmt
);
3762 scalar_type
= TREE_TYPE (scalar_dest
);
3763 scalar_results
= VEC_alloc (tree
, heap
, group_size
);
3764 new_scalar_dest
= vect_create_destination_var (scalar_dest
, NULL
);
3765 bitsize
= TYPE_SIZE (scalar_type
);
3767 /* In case this is a reduction in an inner-loop while vectorizing an outer
3768 loop - we don't need to extract a single scalar result at the end of the
3769 inner-loop (unless it is double reduction, i.e., the use of reduction is
3770 outside the outer-loop). The final vector of partial results will be used
3771 in the vectorized outer-loop, or reduced to a scalar result at the end of
3773 if (nested_in_vect_loop
&& !double_reduc
)
3774 goto vect_finalize_reduction
;
3776 /* SLP reduction without reduction chain, e.g.,
3780 b2 = operation (b1) */
3781 slp_reduc
= (slp_node
&& !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)));
3783 /* In case of reduction chain, e.g.,
3786 a3 = operation (a2),
3788 we may end up with more than one vector result. Here we reduce them to
3790 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
3792 tree first_vect
= PHI_RESULT (VEC_index (gimple
, new_phis
, 0));
3794 gimple new_vec_stmt
= NULL
;
3796 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3797 for (k
= 1; k
< VEC_length (gimple
, new_phis
); k
++)
3799 gimple next_phi
= VEC_index (gimple
, new_phis
, k
);
3800 tree second_vect
= PHI_RESULT (next_phi
);
3802 tmp
= build2 (code
, vectype
, first_vect
, second_vect
);
3803 new_vec_stmt
= gimple_build_assign (vec_dest
, tmp
);
3804 first_vect
= make_ssa_name (vec_dest
, new_vec_stmt
);
3805 gimple_assign_set_lhs (new_vec_stmt
, first_vect
);
3806 gsi_insert_before (&exit_gsi
, new_vec_stmt
, GSI_SAME_STMT
);
3809 new_phi_result
= first_vect
;
3812 VEC_truncate (gimple
, new_phis
, 0);
3813 VEC_safe_push (gimple
, heap
, new_phis
, new_vec_stmt
);
3817 new_phi_result
= PHI_RESULT (VEC_index (gimple
, new_phis
, 0));
3819 /* 2.3 Create the reduction code, using one of the three schemes described
3820 above. In SLP we simply need to extract all the elements from the
3821 vector (without reducing them), so we use scalar shifts. */
3822 if (reduc_code
!= ERROR_MARK
&& !slp_reduc
)
3826 /*** Case 1: Create:
3827 v_out2 = reduc_expr <v_out1> */
3829 if (vect_print_dump_info (REPORT_DETAILS
))
3830 fprintf (vect_dump
, "Reduce using direct vector reduction.");
3832 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3833 tmp
= build1 (reduc_code
, vectype
, new_phi_result
);
3834 epilog_stmt
= gimple_build_assign (vec_dest
, tmp
);
3835 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
3836 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
3837 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
3839 extract_scalar_result
= true;
3843 enum tree_code shift_code
= ERROR_MARK
;
3844 bool have_whole_vector_shift
= true;
3846 int element_bitsize
= tree_low_cst (bitsize
, 1);
3847 int vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
3850 if (optab_handler (vec_shr_optab
, mode
) != CODE_FOR_nothing
)
3851 shift_code
= VEC_RSHIFT_EXPR
;
3853 have_whole_vector_shift
= false;
3855 /* Regardless of whether we have a whole vector shift, if we're
3856 emulating the operation via tree-vect-generic, we don't want
3857 to use it. Only the first round of the reduction is likely
3858 to still be profitable via emulation. */
3859 /* ??? It might be better to emit a reduction tree code here, so that
3860 tree-vect-generic can expand the first round via bit tricks. */
3861 if (!VECTOR_MODE_P (mode
))
3862 have_whole_vector_shift
= false;
3865 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3866 if (optab_handler (optab
, mode
) == CODE_FOR_nothing
)
3867 have_whole_vector_shift
= false;
3870 if (have_whole_vector_shift
&& !slp_reduc
)
3872 /*** Case 2: Create:
3873 for (offset = VS/2; offset >= element_size; offset/=2)
3875 Create: va' = vec_shift <va, offset>
3876 Create: va = vop <va, va'>
3879 if (vect_print_dump_info (REPORT_DETAILS
))
3880 fprintf (vect_dump
, "Reduce using vector shifts");
3882 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3883 new_temp
= new_phi_result
;
3884 for (bit_offset
= vec_size_in_bits
/2;
3885 bit_offset
>= element_bitsize
;
3888 tree bitpos
= size_int (bit_offset
);
3890 epilog_stmt
= gimple_build_assign_with_ops (shift_code
,
3891 vec_dest
, new_temp
, bitpos
);
3892 new_name
= make_ssa_name (vec_dest
, epilog_stmt
);
3893 gimple_assign_set_lhs (epilog_stmt
, new_name
);
3894 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
3896 epilog_stmt
= gimple_build_assign_with_ops (code
, vec_dest
,
3897 new_name
, new_temp
);
3898 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
3899 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
3900 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
3903 extract_scalar_result
= true;
3909 /*** Case 3: Create:
3910 s = extract_field <v_out2, 0>
3911 for (offset = element_size;
3912 offset < vector_size;
3913 offset += element_size;)
3915 Create: s' = extract_field <v_out2, offset>
3916 Create: s = op <s, s'> // For non SLP cases
3919 if (vect_print_dump_info (REPORT_DETAILS
))
3920 fprintf (vect_dump
, "Reduce using scalar code. ");
3922 vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
3923 FOR_EACH_VEC_ELT (gimple
, new_phis
, i
, new_phi
)
3925 if (gimple_code (new_phi
) == GIMPLE_PHI
)
3926 vec_temp
= PHI_RESULT (new_phi
);
3928 vec_temp
= gimple_assign_lhs (new_phi
);
3929 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
3931 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
3932 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
3933 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
3934 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
3936 /* In SLP we don't need to apply reduction operation, so we just
3937 collect s' values in SCALAR_RESULTS. */
3939 VEC_safe_push (tree
, heap
, scalar_results
, new_temp
);
3941 for (bit_offset
= element_bitsize
;
3942 bit_offset
< vec_size_in_bits
;
3943 bit_offset
+= element_bitsize
)
3945 tree bitpos
= bitsize_int (bit_offset
);
3946 tree rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
,
3949 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
3950 new_name
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
3951 gimple_assign_set_lhs (epilog_stmt
, new_name
);
3952 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
3956 /* In SLP we don't need to apply reduction operation, so
3957 we just collect s' values in SCALAR_RESULTS. */
3958 new_temp
= new_name
;
3959 VEC_safe_push (tree
, heap
, scalar_results
, new_name
);
3963 epilog_stmt
= gimple_build_assign_with_ops (code
,
3964 new_scalar_dest
, new_name
, new_temp
);
3965 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
3966 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
3967 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
3972 /* The only case where we need to reduce scalar results in SLP, is
3973 unrolling. If the size of SCALAR_RESULTS is greater than
3974 GROUP_SIZE, we reduce them combining elements modulo
3978 tree res
, first_res
, new_res
;
3981 /* Reduce multiple scalar results in case of SLP unrolling. */
3982 for (j
= group_size
; VEC_iterate (tree
, scalar_results
, j
, res
);
3985 first_res
= VEC_index (tree
, scalar_results
, j
% group_size
);
3986 new_stmt
= gimple_build_assign_with_ops (code
,
3987 new_scalar_dest
, first_res
, res
);
3988 new_res
= make_ssa_name (new_scalar_dest
, new_stmt
);
3989 gimple_assign_set_lhs (new_stmt
, new_res
);
3990 gsi_insert_before (&exit_gsi
, new_stmt
, GSI_SAME_STMT
);
3991 VEC_replace (tree
, scalar_results
, j
% group_size
, new_res
);
3995 /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
3996 VEC_safe_push (tree
, heap
, scalar_results
, new_temp
);
3998 extract_scalar_result
= false;
4002 /* 2.4 Extract the final scalar result. Create:
4003 s_out3 = extract_field <v_out2, bitpos> */
4005 if (extract_scalar_result
)
4009 if (vect_print_dump_info (REPORT_DETAILS
))
4010 fprintf (vect_dump
, "extract scalar result");
4012 if (BYTES_BIG_ENDIAN
)
4013 bitpos
= size_binop (MULT_EXPR
,
4014 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1),
4015 TYPE_SIZE (scalar_type
));
4017 bitpos
= bitsize_zero_node
;
4019 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, new_temp
, bitsize
, bitpos
);
4020 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
4021 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
4022 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4023 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4024 VEC_safe_push (tree
, heap
, scalar_results
, new_temp
);
4027 vect_finalize_reduction
:
4032 /* 2.5 Adjust the final result by the initial value of the reduction
4033 variable. (When such adjustment is not needed, then
4034 'adjustment_def' is zero). For example, if code is PLUS we create:
4035 new_temp = loop_exit_def + adjustment_def */
4039 gcc_assert (!slp_reduc
);
4040 if (nested_in_vect_loop
)
4042 new_phi
= VEC_index (gimple
, new_phis
, 0);
4043 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def
)) == VECTOR_TYPE
);
4044 expr
= build2 (code
, vectype
, PHI_RESULT (new_phi
), adjustment_def
);
4045 new_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4049 new_temp
= VEC_index (tree
, scalar_results
, 0);
4050 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def
)) != VECTOR_TYPE
);
4051 expr
= build2 (code
, scalar_type
, new_temp
, adjustment_def
);
4052 new_dest
= vect_create_destination_var (scalar_dest
, scalar_type
);
4055 epilog_stmt
= gimple_build_assign (new_dest
, expr
);
4056 new_temp
= make_ssa_name (new_dest
, epilog_stmt
);
4057 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4058 SSA_NAME_DEF_STMT (new_temp
) = epilog_stmt
;
4059 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4060 if (nested_in_vect_loop
)
4062 set_vinfo_for_stmt (epilog_stmt
,
4063 new_stmt_vec_info (epilog_stmt
, loop_vinfo
,
4065 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt
)) =
4066 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi
));
4069 VEC_quick_push (tree
, scalar_results
, new_temp
);
4071 VEC_replace (tree
, scalar_results
, 0, new_temp
);
4074 VEC_replace (tree
, scalar_results
, 0, new_temp
);
4076 VEC_replace (gimple
, new_phis
, 0, epilog_stmt
);
4079 /* 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit
4080 phis with new adjusted scalar results, i.e., replace use <s_out0>
4085 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4086 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4087 v_out2 = reduce <v_out1>
4088 s_out3 = extract_field <v_out2, 0>
4089 s_out4 = adjust_result <s_out3>
4096 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4097 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4098 v_out2 = reduce <v_out1>
4099 s_out3 = extract_field <v_out2, 0>
4100 s_out4 = adjust_result <s_out3>
4105 /* In SLP reduction chain we reduce vector results into one vector if
4106 necessary, hence we set here GROUP_SIZE to 1. SCALAR_DEST is the LHS of
4107 the last stmt in the reduction chain, since we are looking for the loop
4109 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
4111 scalar_dest
= gimple_assign_lhs (VEC_index (gimple
,
4112 SLP_TREE_SCALAR_STMTS (slp_node
),
4117 /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4118 case that GROUP_SIZE is greater than vectorization factor). Therefore, we
4119 need to match SCALAR_RESULTS with corresponding statements. The first
4120 (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4121 the first vector stmt, etc.
4122 (RATIO is equal to (GROUP_SIZE / number of new vector stmts)). */
4123 if (group_size
> VEC_length (gimple
, new_phis
))
4125 ratio
= group_size
/ VEC_length (gimple
, new_phis
);
4126 gcc_assert (!(group_size
% VEC_length (gimple
, new_phis
)));
4131 for (k
= 0; k
< group_size
; k
++)
4135 epilog_stmt
= VEC_index (gimple
, new_phis
, k
/ ratio
);
4136 reduction_phi
= VEC_index (gimple
, reduction_phis
, k
/ ratio
);
4138 inner_phi
= VEC_index (gimple
, inner_phis
, k
/ ratio
);
4143 gimple current_stmt
= VEC_index (gimple
,
4144 SLP_TREE_SCALAR_STMTS (slp_node
), k
);
4146 orig_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt
));
4147 /* SLP statements can't participate in patterns. */
4148 gcc_assert (!orig_stmt
);
4149 scalar_dest
= gimple_assign_lhs (current_stmt
);
4152 phis
= VEC_alloc (gimple
, heap
, 3);
4153 /* Find the loop-closed-use at the loop exit of the original scalar
4154 result. (The reduction result is expected to have two immediate uses -
4155 one at the latch block, and one at the loop exit). */
4156 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
4157 if (!flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
4158 VEC_safe_push (gimple
, heap
, phis
, USE_STMT (use_p
));
4160 /* We expect to have found an exit_phi because of loop-closed-ssa
4162 gcc_assert (!VEC_empty (gimple
, phis
));
4164 FOR_EACH_VEC_ELT (gimple
, phis
, i
, exit_phi
)
4168 stmt_vec_info exit_phi_vinfo
= vinfo_for_stmt (exit_phi
);
4171 /* FORNOW. Currently not supporting the case that an inner-loop
4172 reduction is not used in the outer-loop (but only outside the
4173 outer-loop), unless it is double reduction. */
4174 gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo
)
4175 && !STMT_VINFO_LIVE_P (exit_phi_vinfo
))
4178 STMT_VINFO_VEC_STMT (exit_phi_vinfo
) = epilog_stmt
;
4180 || STMT_VINFO_DEF_TYPE (exit_phi_vinfo
)
4181 != vect_double_reduction_def
)
4184 /* Handle double reduction:
4186 stmt1: s1 = phi <s0, s2> - double reduction phi (outer loop)
4187 stmt2: s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4188 stmt3: s4 = use (s3) - (regular) reduc stmt (inner loop)
4189 stmt4: s2 = phi <s4> - double reduction stmt (outer loop)
4191 At that point the regular reduction (stmt2 and stmt3) is
4192 already vectorized, as well as the exit phi node, stmt4.
4193 Here we vectorize the phi node of double reduction, stmt1, and
4194 update all relevant statements. */
4196 /* Go through all the uses of s2 to find double reduction phi
4197 node, i.e., stmt1 above. */
4198 orig_name
= PHI_RESULT (exit_phi
);
4199 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, orig_name
)
4201 stmt_vec_info use_stmt_vinfo
;
4202 stmt_vec_info new_phi_vinfo
;
4203 tree vect_phi_init
, preheader_arg
, vect_phi_res
, init_def
;
4204 basic_block bb
= gimple_bb (use_stmt
);
4207 /* Check that USE_STMT is really double reduction phi
4209 if (gimple_code (use_stmt
) != GIMPLE_PHI
4210 || gimple_phi_num_args (use_stmt
) != 2
4211 || bb
->loop_father
!= outer_loop
)
4213 use_stmt_vinfo
= vinfo_for_stmt (use_stmt
);
4215 || STMT_VINFO_DEF_TYPE (use_stmt_vinfo
)
4216 != vect_double_reduction_def
)
4219 /* Create vector phi node for double reduction:
4220 vs1 = phi <vs0, vs2>
4221 vs1 was created previously in this function by a call to
4222 vect_get_vec_def_for_operand and is stored in
4224 vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4225 vs0 is created here. */
4227 /* Create vector phi node. */
4228 vect_phi
= create_phi_node (vec_initial_def
, bb
);
4229 new_phi_vinfo
= new_stmt_vec_info (vect_phi
,
4230 loop_vec_info_for_loop (outer_loop
), NULL
);
4231 set_vinfo_for_stmt (vect_phi
, new_phi_vinfo
);
4233 /* Create vs0 - initial def of the double reduction phi. */
4234 preheader_arg
= PHI_ARG_DEF_FROM_EDGE (use_stmt
,
4235 loop_preheader_edge (outer_loop
));
4236 init_def
= get_initial_def_for_reduction (stmt
,
4237 preheader_arg
, NULL
);
4238 vect_phi_init
= vect_init_vector (use_stmt
, init_def
,
4241 /* Update phi node arguments with vs0 and vs2. */
4242 add_phi_arg (vect_phi
, vect_phi_init
,
4243 loop_preheader_edge (outer_loop
),
4245 add_phi_arg (vect_phi
, PHI_RESULT (inner_phi
),
4246 loop_latch_edge (outer_loop
), UNKNOWN_LOCATION
);
4247 if (vect_print_dump_info (REPORT_DETAILS
))
4249 fprintf (vect_dump
, "created double reduction phi "
4251 print_gimple_stmt (vect_dump
, vect_phi
, 0, TDF_SLIM
);
4254 vect_phi_res
= PHI_RESULT (vect_phi
);
4256 /* Replace the use, i.e., set the correct vs1 in the regular
4257 reduction phi node. FORNOW, NCOPIES is always 1, so the
4258 loop is redundant. */
4259 use
= reduction_phi
;
4260 for (j
= 0; j
< ncopies
; j
++)
4262 edge pr_edge
= loop_preheader_edge (loop
);
4263 SET_PHI_ARG_DEF (use
, pr_edge
->dest_idx
, vect_phi_res
);
4264 use
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use
));
4270 VEC_free (gimple
, heap
, phis
);
4271 if (nested_in_vect_loop
)
4279 phis
= VEC_alloc (gimple
, heap
, 3);
4280 /* Find the loop-closed-use at the loop exit of the original scalar
4281 result. (The reduction result is expected to have two immediate uses,
4282 one at the latch block, and one at the loop exit). For double
4283 reductions we are looking for exit phis of the outer loop. */
4284 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
4286 if (!flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
4287 VEC_safe_push (gimple
, heap
, phis
, USE_STMT (use_p
));
4290 if (double_reduc
&& gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
)
4292 tree phi_res
= PHI_RESULT (USE_STMT (use_p
));
4294 FOR_EACH_IMM_USE_FAST (phi_use_p
, phi_imm_iter
, phi_res
)
4296 if (!flow_bb_inside_loop_p (loop
,
4297 gimple_bb (USE_STMT (phi_use_p
))))
4298 VEC_safe_push (gimple
, heap
, phis
,
4299 USE_STMT (phi_use_p
));
4305 FOR_EACH_VEC_ELT (gimple
, phis
, i
, exit_phi
)
4307 /* Replace the uses: */
4308 orig_name
= PHI_RESULT (exit_phi
);
4309 scalar_result
= VEC_index (tree
, scalar_results
, k
);
4310 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, orig_name
)
4311 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
4312 SET_USE (use_p
, scalar_result
);
4315 VEC_free (gimple
, heap
, phis
);
4318 VEC_free (tree
, heap
, scalar_results
);
4319 VEC_free (gimple
, heap
, new_phis
);
4323 /* Function vectorizable_reduction.
4325 Check if STMT performs a reduction operation that can be vectorized.
4326 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4327 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4328 Return FALSE if not a vectorizable STMT, TRUE otherwise.
4330 This function also handles reduction idioms (patterns) that have been
4331 recognized in advance during vect_pattern_recog. In this case, STMT may be
4333 X = pattern_expr (arg0, arg1, ..., X)
4334 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4335 sequence that had been detected and replaced by the pattern-stmt (STMT).
4337 In some cases of reduction patterns, the type of the reduction variable X is
4338 different than the type of the other arguments of STMT.
4339 In such cases, the vectype that is used when transforming STMT into a vector
4340 stmt is different than the vectype that is used to determine the
4341 vectorization factor, because it consists of a different number of elements
4342 than the actual number of elements that are being operated upon in parallel.
4344 For example, consider an accumulation of shorts into an int accumulator.
4345 On some targets it's possible to vectorize this pattern operating on 8
4346 shorts at a time (hence, the vectype for purposes of determining the
4347 vectorization factor should be V8HI); on the other hand, the vectype that
4348 is used to create the vector form is actually V4SI (the type of the result).
4350 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4351 indicates what is the actual level of parallelism (V8HI in the example), so
4352 that the right vectorization factor would be derived. This vectype
4353 corresponds to the type of arguments to the reduction stmt, and should *NOT*
4354 be used to create the vectorized stmt. The right vectype for the vectorized
4355 stmt is obtained from the type of the result X:
4356 get_vectype_for_scalar_type (TREE_TYPE (X))
4358 This means that, contrary to "regular" reductions (or "regular" stmts in
4359 general), the following equation:
4360 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4361 does *NOT* necessarily hold for reduction patterns. */
4364 vectorizable_reduction (gimple stmt
, gimple_stmt_iterator
*gsi
,
4365 gimple
*vec_stmt
, slp_tree slp_node
)
4369 tree loop_vec_def0
= NULL_TREE
, loop_vec_def1
= NULL_TREE
;
4370 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4371 tree vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4372 tree vectype_in
= NULL_TREE
;
4373 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4374 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4375 enum tree_code code
, orig_code
, epilog_reduc_code
;
4376 enum machine_mode vec_mode
;
4378 optab optab
, reduc_optab
;
4379 tree new_temp
= NULL_TREE
;
4382 enum vect_def_type dt
;
4383 gimple new_phi
= NULL
;
4387 stmt_vec_info orig_stmt_info
;
4388 tree expr
= NULL_TREE
;
4392 stmt_vec_info prev_stmt_info
, prev_phi_info
;
4393 bool single_defuse_cycle
= false;
4394 tree reduc_def
= NULL_TREE
;
4395 gimple new_stmt
= NULL
;
4398 bool nested_cycle
= false, found_nested_cycle_def
= false;
4399 gimple reduc_def_stmt
= NULL
;
4400 /* The default is that the reduction variable is the last in statement. */
4401 int reduc_index
= 2;
4402 bool double_reduc
= false, dummy
;
4404 struct loop
* def_stmt_loop
, *outer_loop
= NULL
;
4406 gimple def_arg_stmt
;
4407 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
, *vect_defs
= NULL
;
4408 VEC (gimple
, heap
) *phis
= NULL
;
4410 tree def0
, def1
, tem
, op0
, op1
= NULL_TREE
;
4412 /* In case of reduction chain we switch to the first stmt in the chain, but
4413 we don't update STMT_INFO, since only the last stmt is marked as reduction
4414 and has reduction properties. */
4415 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
4416 stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4418 if (nested_in_vect_loop_p (loop
, stmt
))
4422 nested_cycle
= true;
4425 /* 1. Is vectorizable reduction? */
4426 /* Not supportable if the reduction variable is used in the loop, unless
4427 it's a reduction chain. */
4428 if (STMT_VINFO_RELEVANT (stmt_info
) > vect_used_in_outer
4429 && !GROUP_FIRST_ELEMENT (stmt_info
))
4432 /* Reductions that are not used even in an enclosing outer-loop,
4433 are expected to be "live" (used out of the loop). */
4434 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_unused_in_scope
4435 && !STMT_VINFO_LIVE_P (stmt_info
))
4438 /* Make sure it was already recognized as a reduction computation. */
4439 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
4440 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
)
4443 /* 2. Has this been recognized as a reduction pattern?
4445 Check if STMT represents a pattern that has been recognized
4446 in earlier analysis stages. For stmts that represent a pattern,
4447 the STMT_VINFO_RELATED_STMT field records the last stmt in
4448 the original sequence that constitutes the pattern. */
4450 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
4453 orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
4454 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
) == stmt
);
4455 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info
));
4456 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info
));
4459 /* 3. Check the operands of the operation. The first operands are defined
4460 inside the loop body. The last operand is the reduction variable,
4461 which is defined by the loop-header-phi. */
4463 gcc_assert (is_gimple_assign (stmt
));
4466 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
)))
4468 case GIMPLE_SINGLE_RHS
:
4469 op_type
= TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt
));
4470 if (op_type
== ternary_op
)
4472 tree rhs
= gimple_assign_rhs1 (stmt
);
4473 ops
[0] = TREE_OPERAND (rhs
, 0);
4474 ops
[1] = TREE_OPERAND (rhs
, 1);
4475 ops
[2] = TREE_OPERAND (rhs
, 2);
4476 code
= TREE_CODE (rhs
);
4482 case GIMPLE_BINARY_RHS
:
4483 code
= gimple_assign_rhs_code (stmt
);
4484 op_type
= TREE_CODE_LENGTH (code
);
4485 gcc_assert (op_type
== binary_op
);
4486 ops
[0] = gimple_assign_rhs1 (stmt
);
4487 ops
[1] = gimple_assign_rhs2 (stmt
);
4490 case GIMPLE_TERNARY_RHS
:
4491 code
= gimple_assign_rhs_code (stmt
);
4492 op_type
= TREE_CODE_LENGTH (code
);
4493 gcc_assert (op_type
== ternary_op
);
4494 ops
[0] = gimple_assign_rhs1 (stmt
);
4495 ops
[1] = gimple_assign_rhs2 (stmt
);
4496 ops
[2] = gimple_assign_rhs3 (stmt
);
4499 case GIMPLE_UNARY_RHS
:
4506 if (code
== COND_EXPR
&& slp_node
)
4509 scalar_dest
= gimple_assign_lhs (stmt
);
4510 scalar_type
= TREE_TYPE (scalar_dest
);
4511 if (!POINTER_TYPE_P (scalar_type
) && !INTEGRAL_TYPE_P (scalar_type
)
4512 && !SCALAR_FLOAT_TYPE_P (scalar_type
))
4515 /* Do not try to vectorize bit-precision reductions. */
4516 if ((TYPE_PRECISION (scalar_type
)
4517 != GET_MODE_PRECISION (TYPE_MODE (scalar_type
))))
4520 /* All uses but the last are expected to be defined in the loop.
4521 The last use is the reduction variable. In case of nested cycle this
4522 assumption is not true: we use reduc_index to record the index of the
4523 reduction variable. */
4524 for (i
= 0; i
< op_type
-1; i
++)
4526 /* The condition of COND_EXPR is checked in vectorizable_condition(). */
4527 if (i
== 0 && code
== COND_EXPR
)
4530 is_simple_use
= vect_is_simple_use_1 (ops
[i
], stmt
, loop_vinfo
, NULL
,
4531 &def_stmt
, &def
, &dt
, &tem
);
4534 gcc_assert (is_simple_use
);
4536 if (dt
!= vect_internal_def
4537 && dt
!= vect_external_def
4538 && dt
!= vect_constant_def
4539 && dt
!= vect_induction_def
4540 && !(dt
== vect_nested_cycle
&& nested_cycle
))
4543 if (dt
== vect_nested_cycle
)
4545 found_nested_cycle_def
= true;
4546 reduc_def_stmt
= def_stmt
;
4551 is_simple_use
= vect_is_simple_use_1 (ops
[i
], stmt
, loop_vinfo
, NULL
,
4552 &def_stmt
, &def
, &dt
, &tem
);
4555 gcc_assert (is_simple_use
);
4556 gcc_assert (dt
== vect_reduction_def
4557 || dt
== vect_nested_cycle
4558 || ((dt
== vect_internal_def
|| dt
== vect_external_def
4559 || dt
== vect_constant_def
|| dt
== vect_induction_def
)
4560 && nested_cycle
&& found_nested_cycle_def
));
4561 if (!found_nested_cycle_def
)
4562 reduc_def_stmt
= def_stmt
;
4564 gcc_assert (gimple_code (reduc_def_stmt
) == GIMPLE_PHI
);
4566 gcc_assert (orig_stmt
== vect_is_simple_reduction (loop_vinfo
,
4572 gimple tmp
= vect_is_simple_reduction (loop_vinfo
, reduc_def_stmt
,
4573 !nested_cycle
, &dummy
);
4574 /* We changed STMT to be the first stmt in reduction chain, hence we
4575 check that in this case the first element in the chain is STMT. */
4576 gcc_assert (stmt
== tmp
4577 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp
)) == stmt
);
4580 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt
)))
4583 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4586 ncopies
= (LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
4587 / TYPE_VECTOR_SUBPARTS (vectype_in
));
4589 gcc_assert (ncopies
>= 1);
4591 vec_mode
= TYPE_MODE (vectype_in
);
4593 if (code
== COND_EXPR
)
4595 if (!vectorizable_condition (stmt
, gsi
, NULL
, ops
[reduc_index
], 0, NULL
))
4597 if (vect_print_dump_info (REPORT_DETAILS
))
4598 fprintf (vect_dump
, "unsupported condition in reduction");
4605 /* 4. Supportable by target? */
4607 /* 4.1. check support for the operation in the loop */
4608 optab
= optab_for_tree_code (code
, vectype_in
, optab_default
);
4611 if (vect_print_dump_info (REPORT_DETAILS
))
4612 fprintf (vect_dump
, "no optab.");
4617 if (optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
4619 if (vect_print_dump_info (REPORT_DETAILS
))
4620 fprintf (vect_dump
, "op not supported by target.");
4622 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4623 || LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
4624 < vect_min_worthwhile_factor (code
))
4627 if (vect_print_dump_info (REPORT_DETAILS
))
4628 fprintf (vect_dump
, "proceeding using word mode.");
4631 /* Worthwhile without SIMD support? */
4632 if (!VECTOR_MODE_P (TYPE_MODE (vectype_in
))
4633 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
4634 < vect_min_worthwhile_factor (code
))
4636 if (vect_print_dump_info (REPORT_DETAILS
))
4637 fprintf (vect_dump
, "not worthwhile without SIMD support.");
4643 /* 4.2. Check support for the epilog operation.
4645 If STMT represents a reduction pattern, then the type of the
4646 reduction variable may be different than the type of the rest
4647 of the arguments. For example, consider the case of accumulation
4648 of shorts into an int accumulator; The original code:
4649 S1: int_a = (int) short_a;
4650 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
4653 STMT: int_acc = widen_sum <short_a, int_acc>
4656 1. The tree-code that is used to create the vector operation in the
4657 epilog code (that reduces the partial results) is not the
4658 tree-code of STMT, but is rather the tree-code of the original
4659 stmt from the pattern that STMT is replacing. I.e, in the example
4660 above we want to use 'widen_sum' in the loop, but 'plus' in the
4662 2. The type (mode) we use to check available target support
4663 for the vector operation to be created in the *epilog*, is
4664 determined by the type of the reduction variable (in the example
4665 above we'd check this: optab_handler (plus_optab, vect_int_mode])).
4666 However the type (mode) we use to check available target support
4667 for the vector operation to be created *inside the loop*, is
4668 determined by the type of the other arguments to STMT (in the
4669 example we'd check this: optab_handler (widen_sum_optab,
4672 This is contrary to "regular" reductions, in which the types of all
4673 the arguments are the same as the type of the reduction variable.
4674 For "regular" reductions we can therefore use the same vector type
4675 (and also the same tree-code) when generating the epilog code and
4676 when generating the code inside the loop. */
4680 /* This is a reduction pattern: get the vectype from the type of the
4681 reduction variable, and get the tree-code from orig_stmt. */
4682 orig_code
= gimple_assign_rhs_code (orig_stmt
);
4683 gcc_assert (vectype_out
);
4684 vec_mode
= TYPE_MODE (vectype_out
);
4688 /* Regular reduction: use the same vectype and tree-code as used for
4689 the vector code inside the loop can be used for the epilog code. */
4695 def_bb
= gimple_bb (reduc_def_stmt
);
4696 def_stmt_loop
= def_bb
->loop_father
;
4697 def_arg
= PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt
,
4698 loop_preheader_edge (def_stmt_loop
));
4699 if (TREE_CODE (def_arg
) == SSA_NAME
4700 && (def_arg_stmt
= SSA_NAME_DEF_STMT (def_arg
))
4701 && gimple_code (def_arg_stmt
) == GIMPLE_PHI
4702 && flow_bb_inside_loop_p (outer_loop
, gimple_bb (def_arg_stmt
))
4703 && vinfo_for_stmt (def_arg_stmt
)
4704 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt
))
4705 == vect_double_reduction_def
)
4706 double_reduc
= true;
4709 epilog_reduc_code
= ERROR_MARK
;
4710 if (reduction_code_for_scalar_code (orig_code
, &epilog_reduc_code
))
4712 reduc_optab
= optab_for_tree_code (epilog_reduc_code
, vectype_out
,
4716 if (vect_print_dump_info (REPORT_DETAILS
))
4717 fprintf (vect_dump
, "no optab for reduction.");
4719 epilog_reduc_code
= ERROR_MARK
;
4723 && optab_handler (reduc_optab
, vec_mode
) == CODE_FOR_nothing
)
4725 if (vect_print_dump_info (REPORT_DETAILS
))
4726 fprintf (vect_dump
, "reduc op not supported by target.");
4728 epilog_reduc_code
= ERROR_MARK
;
4733 if (!nested_cycle
|| double_reduc
)
4735 if (vect_print_dump_info (REPORT_DETAILS
))
4736 fprintf (vect_dump
, "no reduc code for scalar code.");
4742 if (double_reduc
&& ncopies
> 1)
4744 if (vect_print_dump_info (REPORT_DETAILS
))
4745 fprintf (vect_dump
, "multiple types in double reduction");
4750 /* In case of widenning multiplication by a constant, we update the type
4751 of the constant to be the type of the other operand. We check that the
4752 constant fits the type in the pattern recognition pass. */
4753 if (code
== DOT_PROD_EXPR
4754 && !types_compatible_p (TREE_TYPE (ops
[0]), TREE_TYPE (ops
[1])))
4756 if (TREE_CODE (ops
[0]) == INTEGER_CST
)
4757 ops
[0] = fold_convert (TREE_TYPE (ops
[1]), ops
[0]);
4758 else if (TREE_CODE (ops
[1]) == INTEGER_CST
)
4759 ops
[1] = fold_convert (TREE_TYPE (ops
[0]), ops
[1]);
4762 if (vect_print_dump_info (REPORT_DETAILS
))
4763 fprintf (vect_dump
, "invalid types in dot-prod");
4769 if (!vec_stmt
) /* transformation not required. */
4771 if (!vect_model_reduction_cost (stmt_info
, epilog_reduc_code
, ncopies
))
4773 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
4779 if (vect_print_dump_info (REPORT_DETAILS
))
4780 fprintf (vect_dump
, "transform reduction.");
4782 /* FORNOW: Multiple types are not supported for condition. */
4783 if (code
== COND_EXPR
)
4784 gcc_assert (ncopies
== 1);
4786 /* Create the destination vector */
4787 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
4789 /* In case the vectorization factor (VF) is bigger than the number
4790 of elements that we can fit in a vectype (nunits), we have to generate
4791 more than one vector stmt - i.e - we need to "unroll" the
4792 vector stmt by a factor VF/nunits. For more details see documentation
4793 in vectorizable_operation. */
4795 /* If the reduction is used in an outer loop we need to generate
4796 VF intermediate results, like so (e.g. for ncopies=2):
4801 (i.e. we generate VF results in 2 registers).
4802 In this case we have a separate def-use cycle for each copy, and therefore
4803 for each copy we get the vector def for the reduction variable from the
4804 respective phi node created for this copy.
4806 Otherwise (the reduction is unused in the loop nest), we can combine
4807 together intermediate results, like so (e.g. for ncopies=2):
4811 (i.e. we generate VF/2 results in a single register).
4812 In this case for each copy we get the vector def for the reduction variable
4813 from the vectorized reduction operation generated in the previous iteration.
4816 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_unused_in_scope
)
4818 single_defuse_cycle
= true;
4822 epilog_copies
= ncopies
;
4824 prev_stmt_info
= NULL
;
4825 prev_phi_info
= NULL
;
4828 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4829 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out
)
4830 == TYPE_VECTOR_SUBPARTS (vectype_in
));
4835 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
4836 if (op_type
== ternary_op
)
4837 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
4840 phis
= VEC_alloc (gimple
, heap
, vec_num
);
4841 vect_defs
= VEC_alloc (tree
, heap
, vec_num
);
4843 VEC_quick_push (tree
, vect_defs
, NULL_TREE
);
4845 for (j
= 0; j
< ncopies
; j
++)
4847 if (j
== 0 || !single_defuse_cycle
)
4849 for (i
= 0; i
< vec_num
; i
++)
4851 /* Create the reduction-phi that defines the reduction
4853 new_phi
= create_phi_node (vec_dest
, loop
->header
);
4854 set_vinfo_for_stmt (new_phi
,
4855 new_stmt_vec_info (new_phi
, loop_vinfo
,
4857 if (j
== 0 || slp_node
)
4858 VEC_quick_push (gimple
, phis
, new_phi
);
4862 if (code
== COND_EXPR
)
4864 gcc_assert (!slp_node
);
4865 vectorizable_condition (stmt
, gsi
, vec_stmt
,
4866 PHI_RESULT (VEC_index (gimple
, phis
, 0)),
4868 /* Multiple types are not supported for condition. */
4875 op0
= ops
[!reduc_index
];
4876 if (op_type
== ternary_op
)
4878 if (reduc_index
== 0)
4885 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4889 loop_vec_def0
= vect_get_vec_def_for_operand (ops
[!reduc_index
],
4891 VEC_quick_push (tree
, vec_oprnds0
, loop_vec_def0
);
4892 if (op_type
== ternary_op
)
4894 loop_vec_def1
= vect_get_vec_def_for_operand (op1
, stmt
,
4896 VEC_quick_push (tree
, vec_oprnds1
, loop_vec_def1
);
4904 enum vect_def_type dt
;
4908 vect_is_simple_use (ops
[!reduc_index
], stmt
, loop_vinfo
, NULL
,
4909 &dummy_stmt
, &dummy
, &dt
);
4910 loop_vec_def0
= vect_get_vec_def_for_stmt_copy (dt
,
4912 VEC_replace (tree
, vec_oprnds0
, 0, loop_vec_def0
);
4913 if (op_type
== ternary_op
)
4915 vect_is_simple_use (op1
, stmt
, loop_vinfo
, NULL
, &dummy_stmt
,
4917 loop_vec_def1
= vect_get_vec_def_for_stmt_copy (dt
,
4919 VEC_replace (tree
, vec_oprnds1
, 0, loop_vec_def1
);
4923 if (single_defuse_cycle
)
4924 reduc_def
= gimple_assign_lhs (new_stmt
);
4926 STMT_VINFO_RELATED_STMT (prev_phi_info
) = new_phi
;
4929 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, def0
)
4932 reduc_def
= PHI_RESULT (VEC_index (gimple
, phis
, i
));
4935 if (!single_defuse_cycle
|| j
== 0)
4936 reduc_def
= PHI_RESULT (new_phi
);
4939 def1
= ((op_type
== ternary_op
)
4940 ? VEC_index (tree
, vec_oprnds1
, i
) : NULL
);
4941 if (op_type
== binary_op
)
4943 if (reduc_index
== 0)
4944 expr
= build2 (code
, vectype_out
, reduc_def
, def0
);
4946 expr
= build2 (code
, vectype_out
, def0
, reduc_def
);
4950 if (reduc_index
== 0)
4951 expr
= build3 (code
, vectype_out
, reduc_def
, def0
, def1
);
4954 if (reduc_index
== 1)
4955 expr
= build3 (code
, vectype_out
, def0
, reduc_def
, def1
);
4957 expr
= build3 (code
, vectype_out
, def0
, def1
, reduc_def
);
4961 new_stmt
= gimple_build_assign (vec_dest
, expr
);
4962 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4963 gimple_assign_set_lhs (new_stmt
, new_temp
);
4964 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4968 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
4969 VEC_quick_push (tree
, vect_defs
, new_temp
);
4972 VEC_replace (tree
, vect_defs
, 0, new_temp
);
4979 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4981 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4983 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4984 prev_phi_info
= vinfo_for_stmt (new_phi
);
4987 /* Finalize the reduction-phi (set its arguments) and create the
4988 epilog reduction code. */
4989 if ((!single_defuse_cycle
|| code
== COND_EXPR
) && !slp_node
)
4991 new_temp
= gimple_assign_lhs (*vec_stmt
);
4992 VEC_replace (tree
, vect_defs
, 0, new_temp
);
4995 vect_create_epilog_for_reduction (vect_defs
, stmt
, epilog_copies
,
4996 epilog_reduc_code
, phis
, reduc_index
,
4997 double_reduc
, slp_node
);
4999 VEC_free (gimple
, heap
, phis
);
5000 VEC_free (tree
, heap
, vec_oprnds0
);
5002 VEC_free (tree
, heap
, vec_oprnds1
);
5007 /* Function vect_min_worthwhile_factor.
5009 For a loop where we could vectorize the operation indicated by CODE,
5010 return the minimum vectorization factor that makes it worthwhile
5011 to use generic vectors. */
5013 vect_min_worthwhile_factor (enum tree_code code
)
5034 /* Function vectorizable_induction
5036 Check if PHI performs an induction computation that can be vectorized.
5037 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5038 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5039 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5042 vectorizable_induction (gimple phi
, gimple_stmt_iterator
*gsi ATTRIBUTE_UNUSED
,
5045 stmt_vec_info stmt_info
= vinfo_for_stmt (phi
);
5046 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5047 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5048 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5049 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5050 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5053 gcc_assert (ncopies
>= 1);
5054 /* FORNOW. These restrictions should be relaxed. */
5055 if (nested_in_vect_loop_p (loop
, phi
))
5057 imm_use_iterator imm_iter
;
5058 use_operand_p use_p
;
5065 if (vect_print_dump_info (REPORT_DETAILS
))
5066 fprintf (vect_dump
, "multiple types in nested loop.");
5071 latch_e
= loop_latch_edge (loop
->inner
);
5072 loop_arg
= PHI_ARG_DEF_FROM_EDGE (phi
, latch_e
);
5073 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, loop_arg
)
5075 if (!flow_bb_inside_loop_p (loop
->inner
,
5076 gimple_bb (USE_STMT (use_p
))))
5078 exit_phi
= USE_STMT (use_p
);
5084 stmt_vec_info exit_phi_vinfo
= vinfo_for_stmt (exit_phi
);
5085 if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo
)
5086 && !STMT_VINFO_LIVE_P (exit_phi_vinfo
)))
5088 if (vect_print_dump_info (REPORT_DETAILS
))
5089 fprintf (vect_dump
, "inner-loop induction only used outside "
5090 "of the outer vectorized loop.");
5096 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
5099 /* FORNOW: SLP not supported. */
5100 if (STMT_SLP_TYPE (stmt_info
))
5103 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
);
5105 if (gimple_code (phi
) != GIMPLE_PHI
)
5108 if (!vec_stmt
) /* transformation not required. */
5110 STMT_VINFO_TYPE (stmt_info
) = induc_vec_info_type
;
5111 if (vect_print_dump_info (REPORT_DETAILS
))
5112 fprintf (vect_dump
, "=== vectorizable_induction ===");
5113 vect_model_induction_cost (stmt_info
, ncopies
);
5119 if (vect_print_dump_info (REPORT_DETAILS
))
5120 fprintf (vect_dump
, "transform induction phi.");
5122 vec_def
= get_initial_def_for_induction (phi
);
5123 *vec_stmt
= SSA_NAME_DEF_STMT (vec_def
);
5127 /* Function vectorizable_live_operation.
5129 STMT computes a value that is used outside the loop. Check if
5130 it can be supported. */
5133 vectorizable_live_operation (gimple stmt
,
5134 gimple_stmt_iterator
*gsi ATTRIBUTE_UNUSED
,
5135 gimple
*vec_stmt ATTRIBUTE_UNUSED
)
5137 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5138 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5139 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5145 enum vect_def_type dt
;
5146 enum tree_code code
;
5147 enum gimple_rhs_class rhs_class
;
5149 gcc_assert (STMT_VINFO_LIVE_P (stmt_info
));
5151 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
)
5154 if (!is_gimple_assign (stmt
))
5157 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5160 /* FORNOW. CHECKME. */
5161 if (nested_in_vect_loop_p (loop
, stmt
))
5164 code
= gimple_assign_rhs_code (stmt
);
5165 op_type
= TREE_CODE_LENGTH (code
);
5166 rhs_class
= get_gimple_rhs_class (code
);
5167 gcc_assert (rhs_class
!= GIMPLE_UNARY_RHS
|| op_type
== unary_op
);
5168 gcc_assert (rhs_class
!= GIMPLE_BINARY_RHS
|| op_type
== binary_op
);
5170 /* FORNOW: support only if all uses are invariant. This means
5171 that the scalar operations can remain in place, unvectorized.
5172 The original last scalar value that they compute will be used. */
5174 for (i
= 0; i
< op_type
; i
++)
5176 if (rhs_class
== GIMPLE_SINGLE_RHS
)
5177 op
= TREE_OPERAND (gimple_op (stmt
, 1), i
);
5179 op
= gimple_op (stmt
, i
+ 1);
5181 && !vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
,
5184 if (vect_print_dump_info (REPORT_DETAILS
))
5185 fprintf (vect_dump
, "use not simple.");
5189 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
5193 /* No transformation is required for the cases we currently support. */
5197 /* Kill any debug uses outside LOOP of SSA names defined in STMT. */
5200 vect_loop_kill_debug_uses (struct loop
*loop
, gimple stmt
)
5202 ssa_op_iter op_iter
;
5203 imm_use_iterator imm_iter
;
5204 def_operand_p def_p
;
5207 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
5209 FOR_EACH_IMM_USE_STMT (ustmt
, imm_iter
, DEF_FROM_PTR (def_p
))
5213 if (!is_gimple_debug (ustmt
))
5216 bb
= gimple_bb (ustmt
);
5218 if (!flow_bb_inside_loop_p (loop
, bb
))
5220 if (gimple_debug_bind_p (ustmt
))
5222 if (vect_print_dump_info (REPORT_DETAILS
))
5223 fprintf (vect_dump
, "killing debug use");
5225 gimple_debug_bind_reset_value (ustmt
);
5226 update_stmt (ustmt
);
5235 /* Function vect_transform_loop.
5237 The analysis phase has determined that the loop is vectorizable.
5238 Vectorize the loop - created vectorized stmts to replace the scalar
5239 stmts in the loop, and update the loop exit condition. */
5242 vect_transform_loop (loop_vec_info loop_vinfo
)
5244 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5245 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
5246 int nbbs
= loop
->num_nodes
;
5247 gimple_stmt_iterator si
;
5250 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5252 bool slp_scheduled
= false;
5253 unsigned int nunits
;
5254 gimple stmt
, pattern_stmt
;
5255 gimple_seq pattern_def_seq
= NULL
;
5256 gimple_stmt_iterator pattern_def_si
= gsi_none ();
5257 bool transform_pattern_stmt
= false;
5258 bool check_profitability
;
5261 if (vect_print_dump_info (REPORT_DETAILS
))
5262 fprintf (vect_dump
, "=== vec_transform_loop ===");
5264 /* Use the more conservative vectorization threshold. If the number
5265 of iterations is constant assume the cost check has been performed
5266 by our caller. If the threshold makes all loops profitable that
5267 run at least the vectorization factor number of times checking
5268 is pointless, too. */
5269 th
= ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND
)
5270 * LOOP_VINFO_VECT_FACTOR (loop_vinfo
)) - 1);
5271 th
= MAX (th
, LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
));
5272 if (th
>= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) - 1
5273 && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
5275 if (vect_print_dump_info (REPORT_COST
))
5277 "Profitability threshold is %d loop iterations.", th
);
5278 check_profitability
= true;
5281 /* Peel the loop if there are data refs with unknown alignment.
5282 Only one data ref with unknown store is allowed. */
5284 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
5286 vect_do_peeling_for_alignment (loop_vinfo
, th
, check_profitability
);
5287 check_profitability
= false;
5290 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
)
5291 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
5293 vect_loop_versioning (loop_vinfo
, th
, check_profitability
);
5294 check_profitability
= false;
5297 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5298 compile time constant), or it is a constant that doesn't divide by the
5299 vectorization factor, then an epilog loop needs to be created.
5300 We therefore duplicate the loop: the original loop will be vectorized,
5301 and will compute the first (n/VF) iterations. The second copy of the loop
5302 will remain scalar and will compute the remaining (n%VF) iterations.
5303 (VF is the vectorization factor). */
5305 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
5306 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
5307 && LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0)
5308 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
))
5309 vect_do_peeling_for_loop_bound (loop_vinfo
, &ratio
,
5310 th
, check_profitability
);
5312 ratio
= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
)),
5313 LOOP_VINFO_INT_NITERS (loop_vinfo
) / vectorization_factor
);
5315 /* 1) Make sure the loop header has exactly two entries
5316 2) Make sure we have a preheader basic block. */
5318 gcc_assert (EDGE_COUNT (loop
->header
->preds
) == 2);
5320 split_edge (loop_preheader_edge (loop
));
5322 /* FORNOW: the vectorizer supports only loops which body consist
5323 of one basic block (header + empty latch). When the vectorizer will
5324 support more involved loop forms, the order by which the BBs are
5325 traversed need to be reconsidered. */
5327 for (i
= 0; i
< nbbs
; i
++)
5329 basic_block bb
= bbs
[i
];
5330 stmt_vec_info stmt_info
;
5333 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
5335 phi
= gsi_stmt (si
);
5336 if (vect_print_dump_info (REPORT_DETAILS
))
5338 fprintf (vect_dump
, "------>vectorizing phi: ");
5339 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
5341 stmt_info
= vinfo_for_stmt (phi
);
5345 if (MAY_HAVE_DEBUG_STMTS
&& !STMT_VINFO_LIVE_P (stmt_info
))
5346 vect_loop_kill_debug_uses (loop
, phi
);
5348 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5349 && !STMT_VINFO_LIVE_P (stmt_info
))
5352 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
))
5353 != (unsigned HOST_WIDE_INT
) vectorization_factor
)
5354 && vect_print_dump_info (REPORT_DETAILS
))
5355 fprintf (vect_dump
, "multiple-types.");
5357 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
5359 if (vect_print_dump_info (REPORT_DETAILS
))
5360 fprintf (vect_dump
, "transform phi.");
5361 vect_transform_stmt (phi
, NULL
, NULL
, NULL
, NULL
);
5365 pattern_stmt
= NULL
;
5366 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
) || transform_pattern_stmt
;)
5370 if (transform_pattern_stmt
)
5371 stmt
= pattern_stmt
;
5373 stmt
= gsi_stmt (si
);
5375 if (vect_print_dump_info (REPORT_DETAILS
))
5377 fprintf (vect_dump
, "------>vectorizing statement: ");
5378 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5381 stmt_info
= vinfo_for_stmt (stmt
);
5383 /* vector stmts created in the outer-loop during vectorization of
5384 stmts in an inner-loop may not have a stmt_info, and do not
5385 need to be vectorized. */
5392 if (MAY_HAVE_DEBUG_STMTS
&& !STMT_VINFO_LIVE_P (stmt_info
))
5393 vect_loop_kill_debug_uses (loop
, stmt
);
5395 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5396 && !STMT_VINFO_LIVE_P (stmt_info
))
5398 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5399 && (pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
))
5400 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5401 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5403 stmt
= pattern_stmt
;
5404 stmt_info
= vinfo_for_stmt (stmt
);
5412 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5413 && (pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
))
5414 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5415 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5416 transform_pattern_stmt
= true;
5418 /* If pattern statement has def stmts, vectorize them too. */
5419 if (is_pattern_stmt_p (stmt_info
))
5421 if (pattern_def_seq
== NULL
)
5423 pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
);
5424 pattern_def_si
= gsi_start (pattern_def_seq
);
5426 else if (!gsi_end_p (pattern_def_si
))
5427 gsi_next (&pattern_def_si
);
5428 if (pattern_def_seq
!= NULL
)
5430 gimple pattern_def_stmt
= NULL
;
5431 stmt_vec_info pattern_def_stmt_info
= NULL
;
5433 while (!gsi_end_p (pattern_def_si
))
5435 pattern_def_stmt
= gsi_stmt (pattern_def_si
);
5436 pattern_def_stmt_info
5437 = vinfo_for_stmt (pattern_def_stmt
);
5438 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
5439 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
5441 gsi_next (&pattern_def_si
);
5444 if (!gsi_end_p (pattern_def_si
))
5446 if (vect_print_dump_info (REPORT_DETAILS
))
5448 fprintf (vect_dump
, "==> vectorizing pattern def"
5450 print_gimple_stmt (vect_dump
, pattern_def_stmt
, 0,
5454 stmt
= pattern_def_stmt
;
5455 stmt_info
= pattern_def_stmt_info
;
5459 pattern_def_si
= gsi_none ();
5460 transform_pattern_stmt
= false;
5464 transform_pattern_stmt
= false;
5467 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
5468 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (
5469 STMT_VINFO_VECTYPE (stmt_info
));
5470 if (!STMT_SLP_TYPE (stmt_info
)
5471 && nunits
!= (unsigned int) vectorization_factor
5472 && vect_print_dump_info (REPORT_DETAILS
))
5473 /* For SLP VF is set according to unrolling factor, and not to
5474 vector size, hence for SLP this print is not valid. */
5475 fprintf (vect_dump
, "multiple-types.");
5477 /* SLP. Schedule all the SLP instances when the first SLP stmt is
5479 if (STMT_SLP_TYPE (stmt_info
))
5483 slp_scheduled
= true;
5485 if (vect_print_dump_info (REPORT_DETAILS
))
5486 fprintf (vect_dump
, "=== scheduling SLP instances ===");
5488 vect_schedule_slp (loop_vinfo
, NULL
);
5491 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
5492 if (!vinfo_for_stmt (stmt
) || PURE_SLP_STMT (stmt_info
))
5494 if (!transform_pattern_stmt
&& gsi_end_p (pattern_def_si
))
5496 pattern_def_seq
= NULL
;
5503 /* -------- vectorize statement ------------ */
5504 if (vect_print_dump_info (REPORT_DETAILS
))
5505 fprintf (vect_dump
, "transform statement.");
5507 grouped_store
= false;
5508 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, NULL
, NULL
);
5511 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5513 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5514 interleaving chain was completed - free all the stores in
5517 vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info
));
5522 /* Free the attached stmt_vec_info and remove the stmt. */
5523 gimple store
= gsi_stmt (si
);
5524 free_stmt_vec_info (store
);
5525 unlink_stmt_vdef (store
);
5526 gsi_remove (&si
, true);
5527 release_defs (store
);
5532 if (!transform_pattern_stmt
&& gsi_end_p (pattern_def_si
))
5534 pattern_def_seq
= NULL
;
5540 slpeel_make_loop_iterate_ntimes (loop
, ratio
);
5542 /* The memory tags and pointers in vectorized statements need to
5543 have their SSA forms updated. FIXME, why can't this be delayed
5544 until all the loops have been transformed? */
5545 update_ssa (TODO_update_ssa
);
5547 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS
))
5548 fprintf (vect_dump
, "LOOP VECTORIZED.");
5549 if (loop
->inner
&& vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS
))
5550 fprintf (vect_dump
, "OUTER LOOP VECTORIZED.");