2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com> and
4 Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "tree-pass.h"
33 #include "optabs-tree.h"
34 #include "diagnostic-core.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "tree-ssa-loop-niter.h"
46 #include "tree-scalar-evolution.h"
47 #include "tree-vectorizer.h"
48 #include "gimple-fold.h"
51 /* Loop Vectorization Pass.
53 This pass tries to vectorize loops.
55 For example, the vectorizer transforms the following simple loop:
57 short a[N]; short b[N]; short c[N]; int i;
63 as if it was manually vectorized by rewriting the source code into:
65 typedef int __attribute__((mode(V8HI))) v8hi;
66 short a[N]; short b[N]; short c[N]; int i;
67 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
70 for (i=0; i<N/8; i++){
77 The main entry to this pass is vectorize_loops(), in which
78 the vectorizer applies a set of analyses on a given set of loops,
79 followed by the actual vectorization transformation for the loops that
80 had successfully passed the analysis phase.
81 Throughout this pass we make a distinction between two types of
82 data: scalars (which are represented by SSA_NAMES), and memory references
83 ("data-refs"). These two types of data require different handling both
84 during analysis and transformation. The types of data-refs that the
85 vectorizer currently supports are ARRAY_REFS which base is an array DECL
86 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
87 accesses are required to have a simple (consecutive) access pattern.
91 The driver for the analysis phase is vect_analyze_loop().
92 It applies a set of analyses, some of which rely on the scalar evolution
93 analyzer (scev) developed by Sebastian Pop.
95 During the analysis phase the vectorizer records some information
96 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
97 loop, as well as general information about the loop as a whole, which is
98 recorded in a "loop_vec_info" struct attached to each loop.
100 Transformation phase:
101 =====================
102 The loop transformation phase scans all the stmts in the loop, and
103 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
104 the loop that needs to be vectorized. It inserts the vector code sequence
105 just before the scalar stmt S, and records a pointer to the vector code
106 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
107 attached to S). This pointer will be used for the vectorization of following
108 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
109 otherwise, we rely on dead code elimination for removing it.
111 For example, say stmt S1 was vectorized into stmt VS1:
114 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
117 To vectorize stmt S2, the vectorizer first finds the stmt that defines
118 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
119 vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
120 resulting sequence would be:
123 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
125 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
127 Operands that are not SSA_NAMEs, are data-refs that appear in
128 load/store operations (like 'x[i]' in S1), and are handled differently.
132 Currently the only target specific information that is used is the
133 size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
134 Targets that can support different sizes of vectors, for now will need
135 to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
136 flexibility will be added in the future.
138 Since we only vectorize operations which vector form can be
139 expressed using existing tree codes, to verify that an operation is
140 supported, the vectorizer checks the relevant optab at the relevant
141 machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
142 the value found is CODE_FOR_nothing, then there's no target support, and
143 we can't vectorize the stmt.
145 For additional information on this project see:
146 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
149 static void vect_estimate_min_profitable_iters (loop_vec_info
, int *, int *);
151 /* Function vect_determine_vectorization_factor
153 Determine the vectorization factor (VF). VF is the number of data elements
154 that are operated upon in parallel in a single iteration of the vectorized
155 loop. For example, when vectorizing a loop that operates on 4byte elements,
156 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
157 elements can fit in a single vector register.
159 We currently support vectorization of loops in which all types operated upon
160 are of the same size. Therefore this function currently sets VF according to
161 the size of the types operated upon, and fails if there are multiple sizes
164 VF is also the factor by which the loop iterations are strip-mined, e.g.:
171 for (i=0; i<N; i+=VF){
172 a[i:VF] = b[i:VF] + c[i:VF];
177 vect_determine_vectorization_factor (loop_vec_info loop_vinfo
)
179 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
180 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
181 unsigned nbbs
= loop
->num_nodes
;
182 unsigned int vectorization_factor
= 0;
187 stmt_vec_info stmt_info
;
190 gimple
*stmt
, *pattern_stmt
= NULL
;
191 gimple_seq pattern_def_seq
= NULL
;
192 gimple_stmt_iterator pattern_def_si
= gsi_none ();
193 bool analyze_pattern_stmt
= false;
195 auto_vec
<stmt_vec_info
> mask_producers
;
197 if (dump_enabled_p ())
198 dump_printf_loc (MSG_NOTE
, vect_location
,
199 "=== vect_determine_vectorization_factor ===\n");
201 for (i
= 0; i
< nbbs
; i
++)
203 basic_block bb
= bbs
[i
];
205 for (gphi_iterator si
= gsi_start_phis (bb
); !gsi_end_p (si
);
209 stmt_info
= vinfo_for_stmt (phi
);
210 if (dump_enabled_p ())
212 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining phi: ");
213 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
214 dump_printf (MSG_NOTE
, "\n");
217 gcc_assert (stmt_info
);
219 if (STMT_VINFO_RELEVANT_P (stmt_info
))
221 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info
));
222 scalar_type
= TREE_TYPE (PHI_RESULT (phi
));
224 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE
, vect_location
,
227 "get vectype for scalar type: ");
228 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
229 dump_printf (MSG_NOTE
, "\n");
232 vectype
= get_vectype_for_scalar_type (scalar_type
);
235 if (dump_enabled_p ())
237 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
238 "not vectorized: unsupported "
240 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
242 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
246 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
248 if (dump_enabled_p ())
250 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
251 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
252 dump_printf (MSG_NOTE
, "\n");
255 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
256 if (dump_enabled_p ())
257 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = %d\n",
260 if (!vectorization_factor
261 || (nunits
> vectorization_factor
))
262 vectorization_factor
= nunits
;
266 for (gimple_stmt_iterator si
= gsi_start_bb (bb
);
267 !gsi_end_p (si
) || analyze_pattern_stmt
;)
271 if (analyze_pattern_stmt
)
274 stmt
= gsi_stmt (si
);
276 stmt_info
= vinfo_for_stmt (stmt
);
278 if (dump_enabled_p ())
280 dump_printf_loc (MSG_NOTE
, vect_location
,
281 "==> examining statement: ");
282 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
283 dump_printf (MSG_NOTE
, "\n");
286 gcc_assert (stmt_info
);
288 /* Skip stmts which do not need to be vectorized. */
289 if ((!STMT_VINFO_RELEVANT_P (stmt_info
)
290 && !STMT_VINFO_LIVE_P (stmt_info
))
291 || gimple_clobber_p (stmt
))
293 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
294 && (pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
))
295 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
296 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
299 stmt_info
= vinfo_for_stmt (pattern_stmt
);
300 if (dump_enabled_p ())
302 dump_printf_loc (MSG_NOTE
, vect_location
,
303 "==> examining pattern statement: ");
304 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
305 dump_printf (MSG_NOTE
, "\n");
310 if (dump_enabled_p ())
311 dump_printf_loc (MSG_NOTE
, vect_location
, "skip.\n");
316 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
317 && (pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
))
318 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
319 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
320 analyze_pattern_stmt
= true;
322 /* If a pattern statement has def stmts, analyze them too. */
323 if (is_pattern_stmt_p (stmt_info
))
325 if (pattern_def_seq
== NULL
)
327 pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
);
328 pattern_def_si
= gsi_start (pattern_def_seq
);
330 else if (!gsi_end_p (pattern_def_si
))
331 gsi_next (&pattern_def_si
);
332 if (pattern_def_seq
!= NULL
)
334 gimple
*pattern_def_stmt
= NULL
;
335 stmt_vec_info pattern_def_stmt_info
= NULL
;
337 while (!gsi_end_p (pattern_def_si
))
339 pattern_def_stmt
= gsi_stmt (pattern_def_si
);
340 pattern_def_stmt_info
341 = vinfo_for_stmt (pattern_def_stmt
);
342 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
343 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
345 gsi_next (&pattern_def_si
);
348 if (!gsi_end_p (pattern_def_si
))
350 if (dump_enabled_p ())
352 dump_printf_loc (MSG_NOTE
, vect_location
,
353 "==> examining pattern def stmt: ");
354 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
355 pattern_def_stmt
, 0);
356 dump_printf (MSG_NOTE
, "\n");
359 stmt
= pattern_def_stmt
;
360 stmt_info
= pattern_def_stmt_info
;
364 pattern_def_si
= gsi_none ();
365 analyze_pattern_stmt
= false;
369 analyze_pattern_stmt
= false;
372 if (gimple_get_lhs (stmt
) == NULL_TREE
373 /* MASK_STORE has no lhs, but is ok. */
374 && (!is_gimple_call (stmt
)
375 || !gimple_call_internal_p (stmt
)
376 || gimple_call_internal_fn (stmt
) != IFN_MASK_STORE
))
378 if (is_gimple_call (stmt
))
380 /* Ignore calls with no lhs. These must be calls to
381 #pragma omp simd functions, and what vectorization factor
382 it really needs can't be determined until
383 vectorizable_simd_clone_call. */
384 if (!analyze_pattern_stmt
&& gsi_end_p (pattern_def_si
))
386 pattern_def_seq
= NULL
;
391 if (dump_enabled_p ())
393 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
394 "not vectorized: irregular stmt.");
395 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
,
397 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
402 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
404 if (dump_enabled_p ())
406 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
407 "not vectorized: vector stmt in loop:");
408 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
409 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
416 if (STMT_VINFO_VECTYPE (stmt_info
))
418 /* The only case when a vectype had been already set is for stmts
419 that contain a dataref, or for "pattern-stmts" (stmts
420 generated by the vectorizer to represent/replace a certain
422 gcc_assert (STMT_VINFO_DATA_REF (stmt_info
)
423 || is_pattern_stmt_p (stmt_info
)
424 || !gsi_end_p (pattern_def_si
));
425 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
429 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info
));
430 if (is_gimple_call (stmt
)
431 && gimple_call_internal_p (stmt
)
432 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
433 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
435 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
437 /* Bool ops don't participate in vectorization factor
438 computation. For comparison use compared types to
440 if (TREE_CODE (scalar_type
) == BOOLEAN_TYPE
)
442 mask_producers
.safe_push (stmt_info
);
445 if (gimple_code (stmt
) == GIMPLE_ASSIGN
446 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
448 && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt
)))
450 scalar_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
453 if (!analyze_pattern_stmt
&& gsi_end_p (pattern_def_si
))
455 pattern_def_seq
= NULL
;
462 if (dump_enabled_p ())
464 dump_printf_loc (MSG_NOTE
, vect_location
,
465 "get vectype for scalar type: ");
466 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
467 dump_printf (MSG_NOTE
, "\n");
469 vectype
= get_vectype_for_scalar_type (scalar_type
);
472 if (dump_enabled_p ())
474 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
475 "not vectorized: unsupported "
477 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
479 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
485 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
487 if (dump_enabled_p ())
489 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
490 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
491 dump_printf (MSG_NOTE
, "\n");
495 /* Don't try to compute VF out scalar types if we stmt
496 produces boolean vector. Use result vectype instead. */
497 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
498 vf_vectype
= vectype
;
501 /* The vectorization factor is according to the smallest
502 scalar type (or the largest vector size, but we only
503 support one vector size per loop). */
505 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
,
507 if (dump_enabled_p ())
509 dump_printf_loc (MSG_NOTE
, vect_location
,
510 "get vectype for scalar type: ");
511 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
512 dump_printf (MSG_NOTE
, "\n");
514 vf_vectype
= get_vectype_for_scalar_type (scalar_type
);
518 if (dump_enabled_p ())
520 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
521 "not vectorized: unsupported data-type ");
522 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
524 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
529 if ((GET_MODE_SIZE (TYPE_MODE (vectype
))
530 != GET_MODE_SIZE (TYPE_MODE (vf_vectype
))))
532 if (dump_enabled_p ())
534 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
535 "not vectorized: different sized vector "
536 "types in statement, ");
537 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
539 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
540 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
542 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
547 if (dump_enabled_p ())
549 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
550 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vf_vectype
);
551 dump_printf (MSG_NOTE
, "\n");
554 nunits
= TYPE_VECTOR_SUBPARTS (vf_vectype
);
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = %d\n", nunits
);
557 if (!vectorization_factor
558 || (nunits
> vectorization_factor
))
559 vectorization_factor
= nunits
;
561 if (!analyze_pattern_stmt
&& gsi_end_p (pattern_def_si
))
563 pattern_def_seq
= NULL
;
569 /* TODO: Analyze cost. Decide if worth while to vectorize. */
570 if (dump_enabled_p ())
571 dump_printf_loc (MSG_NOTE
, vect_location
, "vectorization factor = %d\n",
572 vectorization_factor
);
573 if (vectorization_factor
<= 1)
575 if (dump_enabled_p ())
576 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
577 "not vectorized: unsupported data-type\n");
580 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
582 for (i
= 0; i
< mask_producers
.length (); i
++)
584 tree mask_type
= NULL
;
586 stmt
= STMT_VINFO_STMT (mask_producers
[i
]);
588 if (gimple_code (stmt
) == GIMPLE_ASSIGN
589 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
590 && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt
))) != BOOLEAN_TYPE
)
592 scalar_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
593 mask_type
= get_mask_type_for_scalar_type (scalar_type
);
597 if (dump_enabled_p ())
598 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
599 "not vectorized: unsupported mask\n");
608 enum vect_def_type dt
;
610 FOR_EACH_SSA_TREE_OPERAND (rhs
, stmt
, iter
, SSA_OP_USE
)
612 if (!vect_is_simple_use (rhs
, mask_producers
[i
]->vinfo
,
613 &def_stmt
, &dt
, &vectype
))
615 if (dump_enabled_p ())
617 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
618 "not vectorized: can't compute mask type "
620 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
,
622 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
627 /* No vectype probably means external definition.
628 Allow it in case there is another operand which
629 allows to determine mask type. */
635 else if (TYPE_VECTOR_SUBPARTS (mask_type
)
636 != TYPE_VECTOR_SUBPARTS (vectype
))
638 if (dump_enabled_p ())
640 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
641 "not vectorized: different sized masks "
642 "types in statement, ");
643 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
645 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
646 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
648 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
652 else if (VECTOR_BOOLEAN_TYPE_P (mask_type
)
653 != VECTOR_BOOLEAN_TYPE_P (vectype
))
655 if (dump_enabled_p ())
657 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
658 "not vectorized: mixed mask and "
659 "nonmask vector types in statement, ");
660 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
662 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
663 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
665 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
671 /* We may compare boolean value loaded as vector of integers.
672 Fix mask_type in such case. */
674 && !VECTOR_BOOLEAN_TYPE_P (mask_type
)
675 && gimple_code (stmt
) == GIMPLE_ASSIGN
676 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
)
677 mask_type
= build_same_sized_truth_vector_type (mask_type
);
680 /* No mask_type should mean loop invariant predicate.
681 This is probably a subject for optimization in
685 if (dump_enabled_p ())
687 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
688 "not vectorized: can't compute mask type "
690 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
,
692 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
697 STMT_VINFO_VECTYPE (mask_producers
[i
]) = mask_type
;
704 /* Function vect_is_simple_iv_evolution.
706 FORNOW: A simple evolution of an induction variables in the loop is
707 considered a polynomial evolution. */
710 vect_is_simple_iv_evolution (unsigned loop_nb
, tree access_fn
, tree
* init
,
715 tree evolution_part
= evolution_part_in_loop_num (access_fn
, loop_nb
);
718 /* When there is no evolution in this loop, the evolution function
720 if (evolution_part
== NULL_TREE
)
723 /* When the evolution is a polynomial of degree >= 2
724 the evolution function is not "simple". */
725 if (tree_is_chrec (evolution_part
))
728 step_expr
= evolution_part
;
729 init_expr
= unshare_expr (initial_condition_in_loop_num (access_fn
, loop_nb
));
731 if (dump_enabled_p ())
733 dump_printf_loc (MSG_NOTE
, vect_location
, "step: ");
734 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, step_expr
);
735 dump_printf (MSG_NOTE
, ", init: ");
736 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, init_expr
);
737 dump_printf (MSG_NOTE
, "\n");
743 if (TREE_CODE (step_expr
) != INTEGER_CST
744 && (TREE_CODE (step_expr
) != SSA_NAME
745 || ((bb
= gimple_bb (SSA_NAME_DEF_STMT (step_expr
)))
746 && flow_bb_inside_loop_p (get_loop (cfun
, loop_nb
), bb
))
747 || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr
))
748 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr
))
749 || !flag_associative_math
)))
750 && (TREE_CODE (step_expr
) != REAL_CST
751 || !flag_associative_math
))
753 if (dump_enabled_p ())
754 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
762 /* Function vect_analyze_scalar_cycles_1.
764 Examine the cross iteration def-use cycles of scalar variables
765 in LOOP. LOOP_VINFO represents the loop that is now being
766 considered for vectorization (can be LOOP, or an outer-loop
770 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo
, struct loop
*loop
)
772 basic_block bb
= loop
->header
;
774 auto_vec
<gimple
*, 64> worklist
;
778 if (dump_enabled_p ())
779 dump_printf_loc (MSG_NOTE
, vect_location
,
780 "=== vect_analyze_scalar_cycles ===\n");
782 /* First - identify all inductions. Reduction detection assumes that all the
783 inductions have been identified, therefore, this order must not be
785 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
787 gphi
*phi
= gsi
.phi ();
788 tree access_fn
= NULL
;
789 tree def
= PHI_RESULT (phi
);
790 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
792 if (dump_enabled_p ())
794 dump_printf_loc (MSG_NOTE
, vect_location
, "Analyze phi: ");
795 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
796 dump_printf (MSG_NOTE
, "\n");
799 /* Skip virtual phi's. The data dependences that are associated with
800 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
801 if (virtual_operand_p (def
))
804 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_unknown_def_type
;
806 /* Analyze the evolution function. */
807 access_fn
= analyze_scalar_evolution (loop
, def
);
810 STRIP_NOPS (access_fn
);
811 if (dump_enabled_p ())
813 dump_printf_loc (MSG_NOTE
, vect_location
,
814 "Access function of PHI: ");
815 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, access_fn
);
816 dump_printf (MSG_NOTE
, "\n");
818 STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo
)
819 = initial_condition_in_loop_num (access_fn
, loop
->num
);
820 STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo
)
821 = evolution_part_in_loop_num (access_fn
, loop
->num
);
825 || !vect_is_simple_iv_evolution (loop
->num
, access_fn
, &init
, &step
)
826 || (LOOP_VINFO_LOOP (loop_vinfo
) != loop
827 && TREE_CODE (step
) != INTEGER_CST
))
829 worklist
.safe_push (phi
);
833 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo
)
835 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo
) != NULL_TREE
);
837 if (dump_enabled_p ())
838 dump_printf_loc (MSG_NOTE
, vect_location
, "Detected induction.\n");
839 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_induction_def
;
843 /* Second - identify all reductions and nested cycles. */
844 while (worklist
.length () > 0)
846 gimple
*phi
= worklist
.pop ();
847 tree def
= PHI_RESULT (phi
);
848 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
852 if (dump_enabled_p ())
854 dump_printf_loc (MSG_NOTE
, vect_location
, "Analyze phi: ");
855 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
856 dump_printf (MSG_NOTE
, "\n");
859 gcc_assert (!virtual_operand_p (def
)
860 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_unknown_def_type
);
862 nested_cycle
= (loop
!= LOOP_VINFO_LOOP (loop_vinfo
));
863 reduc_stmt
= vect_force_simple_reduction (loop_vinfo
, phi
, !nested_cycle
,
864 &double_reduc
, false);
869 if (dump_enabled_p ())
870 dump_printf_loc (MSG_NOTE
, vect_location
,
871 "Detected double reduction.\n");
873 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_double_reduction_def
;
874 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
875 vect_double_reduction_def
;
881 if (dump_enabled_p ())
882 dump_printf_loc (MSG_NOTE
, vect_location
,
883 "Detected vectorizable nested cycle.\n");
885 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_nested_cycle
;
886 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
891 if (dump_enabled_p ())
892 dump_printf_loc (MSG_NOTE
, vect_location
,
893 "Detected reduction.\n");
895 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_reduction_def
;
896 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
898 /* Store the reduction cycles for possible vectorization in
900 LOOP_VINFO_REDUCTIONS (loop_vinfo
).safe_push (reduc_stmt
);
905 if (dump_enabled_p ())
906 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
907 "Unknown def-use cycle pattern.\n");
912 /* Function vect_analyze_scalar_cycles.
914 Examine the cross iteration def-use cycles of scalar variables, by
915 analyzing the loop-header PHIs of scalar variables. Classify each
916 cycle as one of the following: invariant, induction, reduction, unknown.
917 We do that for the loop represented by LOOP_VINFO, and also to its
918 inner-loop, if exists.
919 Examples for scalar cycles:
934 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo
)
936 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
938 vect_analyze_scalar_cycles_1 (loop_vinfo
, loop
);
940 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
941 Reductions in such inner-loop therefore have different properties than
942 the reductions in the nest that gets vectorized:
943 1. When vectorized, they are executed in the same order as in the original
944 scalar loop, so we can't change the order of computation when
946 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
947 current checks are too strict. */
950 vect_analyze_scalar_cycles_1 (loop_vinfo
, loop
->inner
);
953 /* Transfer group and reduction information from STMT to its pattern stmt. */
956 vect_fixup_reduc_chain (gimple
*stmt
)
958 gimple
*firstp
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
));
960 gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp
))
961 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)));
962 GROUP_SIZE (vinfo_for_stmt (firstp
)) = GROUP_SIZE (vinfo_for_stmt (stmt
));
965 stmtp
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
));
966 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp
)) = firstp
;
967 stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
969 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp
))
970 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
));
973 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp
)) = vect_reduction_def
;
976 /* Fixup scalar cycles that now have their stmts detected as patterns. */
979 vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo
)
984 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
), i
, first
)
985 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first
)))
987 vect_fixup_reduc_chain (first
);
988 LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
)[i
]
989 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first
));
993 /* Function vect_get_loop_niters.
995 Determine how many iterations the loop is executed and place it
996 in NUMBER_OF_ITERATIONS. Place the number of latch iterations
997 in NUMBER_OF_ITERATIONSM1.
999 Return the loop exit condition. */
1003 vect_get_loop_niters (struct loop
*loop
, tree
*number_of_iterations
,
1004 tree
*number_of_iterationsm1
)
1008 if (dump_enabled_p ())
1009 dump_printf_loc (MSG_NOTE
, vect_location
,
1010 "=== get_loop_niters ===\n");
1012 niters
= number_of_latch_executions (loop
);
1013 *number_of_iterationsm1
= niters
;
1015 /* We want the number of loop header executions which is the number
1016 of latch executions plus one.
1017 ??? For UINT_MAX latch executions this number overflows to zero
1018 for loops like do { n++; } while (n != 0); */
1019 if (niters
&& !chrec_contains_undetermined (niters
))
1020 niters
= fold_build2 (PLUS_EXPR
, TREE_TYPE (niters
), unshare_expr (niters
),
1021 build_int_cst (TREE_TYPE (niters
), 1));
1022 *number_of_iterations
= niters
;
1024 return get_loop_exit_condition (loop
);
1028 /* Function bb_in_loop_p
1030 Used as predicate for dfs order traversal of the loop bbs. */
1033 bb_in_loop_p (const_basic_block bb
, const void *data
)
1035 const struct loop
*const loop
= (const struct loop
*)data
;
1036 if (flow_bb_inside_loop_p (loop
, bb
))
1042 /* Function new_loop_vec_info.
1044 Create and initialize a new loop_vec_info struct for LOOP, as well as
1045 stmt_vec_info structs for all the stmts in LOOP. */
1047 static loop_vec_info
1048 new_loop_vec_info (struct loop
*loop
)
1052 gimple_stmt_iterator si
;
1053 unsigned int i
, nbbs
;
1055 res
= (loop_vec_info
) xcalloc (1, sizeof (struct _loop_vec_info
));
1056 res
->kind
= vec_info::loop
;
1057 LOOP_VINFO_LOOP (res
) = loop
;
1059 bbs
= get_loop_body (loop
);
1061 /* Create/Update stmt_info for all stmts in the loop. */
1062 for (i
= 0; i
< loop
->num_nodes
; i
++)
1064 basic_block bb
= bbs
[i
];
1066 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
1068 gimple
*phi
= gsi_stmt (si
);
1069 gimple_set_uid (phi
, 0);
1070 set_vinfo_for_stmt (phi
, new_stmt_vec_info (phi
, res
));
1073 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1075 gimple
*stmt
= gsi_stmt (si
);
1076 gimple_set_uid (stmt
, 0);
1077 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, res
));
1081 /* CHECKME: We want to visit all BBs before their successors (except for
1082 latch blocks, for which this assertion wouldn't hold). In the simple
1083 case of the loop forms we allow, a dfs order of the BBs would the same
1084 as reversed postorder traversal, so we are safe. */
1087 bbs
= XCNEWVEC (basic_block
, loop
->num_nodes
);
1088 nbbs
= dfs_enumerate_from (loop
->header
, 0, bb_in_loop_p
,
1089 bbs
, loop
->num_nodes
, loop
);
1090 gcc_assert (nbbs
== loop
->num_nodes
);
1092 LOOP_VINFO_BBS (res
) = bbs
;
1093 LOOP_VINFO_NITERSM1 (res
) = NULL
;
1094 LOOP_VINFO_NITERS (res
) = NULL
;
1095 LOOP_VINFO_NITERS_UNCHANGED (res
) = NULL
;
1096 LOOP_VINFO_COST_MODEL_THRESHOLD (res
) = 0;
1097 LOOP_VINFO_VECTORIZABLE_P (res
) = 0;
1098 LOOP_VINFO_PEELING_FOR_ALIGNMENT (res
) = 0;
1099 LOOP_VINFO_VECT_FACTOR (res
) = 0;
1100 LOOP_VINFO_LOOP_NEST (res
) = vNULL
;
1101 LOOP_VINFO_DATAREFS (res
) = vNULL
;
1102 LOOP_VINFO_DDRS (res
) = vNULL
;
1103 LOOP_VINFO_UNALIGNED_DR (res
) = NULL
;
1104 LOOP_VINFO_MAY_MISALIGN_STMTS (res
) = vNULL
;
1105 LOOP_VINFO_MAY_ALIAS_DDRS (res
) = vNULL
;
1106 LOOP_VINFO_GROUPED_STORES (res
) = vNULL
;
1107 LOOP_VINFO_REDUCTIONS (res
) = vNULL
;
1108 LOOP_VINFO_REDUCTION_CHAINS (res
) = vNULL
;
1109 LOOP_VINFO_SLP_INSTANCES (res
) = vNULL
;
1110 LOOP_VINFO_SLP_UNROLLING_FACTOR (res
) = 1;
1111 LOOP_VINFO_TARGET_COST_DATA (res
) = init_cost (loop
);
1112 LOOP_VINFO_PEELING_FOR_GAPS (res
) = false;
1113 LOOP_VINFO_PEELING_FOR_NITER (res
) = false;
1114 LOOP_VINFO_OPERANDS_SWAPPED (res
) = false;
1120 /* Function destroy_loop_vec_info.
1122 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1123 stmts in the loop. */
1126 destroy_loop_vec_info (loop_vec_info loop_vinfo
, bool clean_stmts
)
1131 gimple_stmt_iterator si
;
1133 vec
<slp_instance
> slp_instances
;
1134 slp_instance instance
;
1140 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1142 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1143 nbbs
= clean_stmts
? loop
->num_nodes
: 0;
1144 swapped
= LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo
);
1146 for (j
= 0; j
< nbbs
; j
++)
1148 basic_block bb
= bbs
[j
];
1149 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
1150 free_stmt_vec_info (gsi_stmt (si
));
1152 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); )
1154 gimple
*stmt
= gsi_stmt (si
);
1156 /* We may have broken canonical form by moving a constant
1157 into RHS1 of a commutative op. Fix such occurrences. */
1158 if (swapped
&& is_gimple_assign (stmt
))
1160 enum tree_code code
= gimple_assign_rhs_code (stmt
);
1162 if ((code
== PLUS_EXPR
1163 || code
== POINTER_PLUS_EXPR
1164 || code
== MULT_EXPR
)
1165 && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt
)))
1166 swap_ssa_operands (stmt
,
1167 gimple_assign_rhs1_ptr (stmt
),
1168 gimple_assign_rhs2_ptr (stmt
));
1171 /* Free stmt_vec_info. */
1172 free_stmt_vec_info (stmt
);
1177 free (LOOP_VINFO_BBS (loop_vinfo
));
1178 vect_destroy_datarefs (loop_vinfo
);
1179 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo
));
1180 LOOP_VINFO_LOOP_NEST (loop_vinfo
).release ();
1181 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
).release ();
1182 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
).release ();
1183 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1184 FOR_EACH_VEC_ELT (slp_instances
, j
, instance
)
1185 vect_free_slp_instance (instance
);
1187 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
).release ();
1188 LOOP_VINFO_GROUPED_STORES (loop_vinfo
).release ();
1189 LOOP_VINFO_REDUCTIONS (loop_vinfo
).release ();
1190 LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
).release ();
1192 destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
));
1193 loop_vinfo
->scalar_cost_vec
.release ();
1200 /* Calculate the cost of one scalar iteration of the loop. */
1202 vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo
)
1204 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1205 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1206 int nbbs
= loop
->num_nodes
, factor
, scalar_single_iter_cost
= 0;
1207 int innerloop_iters
, i
;
1209 /* Count statements in scalar loop. Using this as scalar cost for a single
1212 TODO: Add outer loop support.
1214 TODO: Consider assigning different costs to different scalar
1218 innerloop_iters
= 1;
1220 innerloop_iters
= 50; /* FIXME */
1222 for (i
= 0; i
< nbbs
; i
++)
1224 gimple_stmt_iterator si
;
1225 basic_block bb
= bbs
[i
];
1227 if (bb
->loop_father
== loop
->inner
)
1228 factor
= innerloop_iters
;
1232 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1234 gimple
*stmt
= gsi_stmt (si
);
1235 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1237 if (!is_gimple_assign (stmt
) && !is_gimple_call (stmt
))
1240 /* Skip stmts that are not vectorized inside the loop. */
1242 && !STMT_VINFO_RELEVANT_P (stmt_info
)
1243 && (!STMT_VINFO_LIVE_P (stmt_info
)
1244 || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info
)))
1245 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
1248 vect_cost_for_stmt kind
;
1249 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
)))
1251 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
1254 kind
= scalar_store
;
1259 scalar_single_iter_cost
1260 += record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo
),
1261 factor
, kind
, NULL
, 0, vect_prologue
);
1264 LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo
)
1265 = scalar_single_iter_cost
;
1269 /* Function vect_analyze_loop_form_1.
1271 Verify that certain CFG restrictions hold, including:
1272 - the loop has a pre-header
1273 - the loop has a single entry and exit
1274 - the loop exit condition is simple enough, and the number of iterations
1275 can be analyzed (a countable loop). */
1278 vect_analyze_loop_form_1 (struct loop
*loop
, gcond
**loop_cond
,
1279 tree
*number_of_iterationsm1
,
1280 tree
*number_of_iterations
, gcond
**inner_loop_cond
)
1282 if (dump_enabled_p ())
1283 dump_printf_loc (MSG_NOTE
, vect_location
,
1284 "=== vect_analyze_loop_form ===\n");
1286 /* Different restrictions apply when we are considering an inner-most loop,
1287 vs. an outer (nested) loop.
1288 (FORNOW. May want to relax some of these restrictions in the future). */
1292 /* Inner-most loop. We currently require that the number of BBs is
1293 exactly 2 (the header and latch). Vectorizable inner-most loops
1304 if (loop
->num_nodes
!= 2)
1306 if (dump_enabled_p ())
1307 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1308 "not vectorized: control flow in loop.\n");
1312 if (empty_block_p (loop
->header
))
1314 if (dump_enabled_p ())
1315 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1316 "not vectorized: empty loop.\n");
1322 struct loop
*innerloop
= loop
->inner
;
1325 /* Nested loop. We currently require that the loop is doubly-nested,
1326 contains a single inner loop, and the number of BBs is exactly 5.
1327 Vectorizable outer-loops look like this:
1339 The inner-loop has the properties expected of inner-most loops
1340 as described above. */
1342 if ((loop
->inner
)->inner
|| (loop
->inner
)->next
)
1344 if (dump_enabled_p ())
1345 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1346 "not vectorized: multiple nested loops.\n");
1350 if (loop
->num_nodes
!= 5)
1352 if (dump_enabled_p ())
1353 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1354 "not vectorized: control flow in loop.\n");
1358 entryedge
= loop_preheader_edge (innerloop
);
1359 if (entryedge
->src
!= loop
->header
1360 || !single_exit (innerloop
)
1361 || single_exit (innerloop
)->dest
!= EDGE_PRED (loop
->latch
, 0)->src
)
1363 if (dump_enabled_p ())
1364 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1365 "not vectorized: unsupported outerloop form.\n");
1369 /* Analyze the inner-loop. */
1370 tree inner_niterm1
, inner_niter
;
1371 if (! vect_analyze_loop_form_1 (loop
->inner
, inner_loop_cond
,
1372 &inner_niterm1
, &inner_niter
, NULL
))
1374 if (dump_enabled_p ())
1375 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1376 "not vectorized: Bad inner loop.\n");
1380 if (!expr_invariant_in_loop_p (loop
, inner_niter
))
1382 if (dump_enabled_p ())
1383 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1384 "not vectorized: inner-loop count not"
1389 if (dump_enabled_p ())
1390 dump_printf_loc (MSG_NOTE
, vect_location
,
1391 "Considering outer-loop vectorization.\n");
1394 if (!single_exit (loop
)
1395 || EDGE_COUNT (loop
->header
->preds
) != 2)
1397 if (dump_enabled_p ())
1399 if (!single_exit (loop
))
1400 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1401 "not vectorized: multiple exits.\n");
1402 else if (EDGE_COUNT (loop
->header
->preds
) != 2)
1403 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1404 "not vectorized: too many incoming edges.\n");
1409 /* We assume that the loop exit condition is at the end of the loop. i.e,
1410 that the loop is represented as a do-while (with a proper if-guard
1411 before the loop if needed), where the loop header contains all the
1412 executable statements, and the latch is empty. */
1413 if (!empty_block_p (loop
->latch
)
1414 || !gimple_seq_empty_p (phi_nodes (loop
->latch
)))
1416 if (dump_enabled_p ())
1417 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1418 "not vectorized: latch block not empty.\n");
1422 /* Make sure there exists a single-predecessor exit bb: */
1423 if (!single_pred_p (single_exit (loop
)->dest
))
1425 edge e
= single_exit (loop
);
1426 if (!(e
->flags
& EDGE_ABNORMAL
))
1428 split_loop_exit_edge (e
);
1429 if (dump_enabled_p ())
1430 dump_printf (MSG_NOTE
, "split exit edge.\n");
1434 if (dump_enabled_p ())
1435 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1436 "not vectorized: abnormal loop exit edge.\n");
1441 *loop_cond
= vect_get_loop_niters (loop
, number_of_iterations
,
1442 number_of_iterationsm1
);
1445 if (dump_enabled_p ())
1446 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1447 "not vectorized: complicated exit condition.\n");
1451 if (!*number_of_iterations
1452 || chrec_contains_undetermined (*number_of_iterations
))
1454 if (dump_enabled_p ())
1455 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1456 "not vectorized: number of iterations cannot be "
1461 if (integer_zerop (*number_of_iterations
))
1463 if (dump_enabled_p ())
1464 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1465 "not vectorized: number of iterations = 0.\n");
1472 /* Analyze LOOP form and return a loop_vec_info if it is of suitable form. */
1475 vect_analyze_loop_form (struct loop
*loop
)
1477 tree number_of_iterations
, number_of_iterationsm1
;
1478 gcond
*loop_cond
, *inner_loop_cond
= NULL
;
1480 if (! vect_analyze_loop_form_1 (loop
, &loop_cond
, &number_of_iterationsm1
,
1481 &number_of_iterations
, &inner_loop_cond
))
1484 loop_vec_info loop_vinfo
= new_loop_vec_info (loop
);
1485 LOOP_VINFO_NITERSM1 (loop_vinfo
) = number_of_iterationsm1
;
1486 LOOP_VINFO_NITERS (loop_vinfo
) = number_of_iterations
;
1487 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
) = number_of_iterations
;
1489 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
1491 if (dump_enabled_p ())
1493 dump_printf_loc (MSG_NOTE
, vect_location
,
1494 "Symbolic number of iterations is ");
1495 dump_generic_expr (MSG_NOTE
, TDF_DETAILS
, number_of_iterations
);
1496 dump_printf (MSG_NOTE
, "\n");
1500 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond
)) = loop_exit_ctrl_vec_info_type
;
1501 if (inner_loop_cond
)
1502 STMT_VINFO_TYPE (vinfo_for_stmt (inner_loop_cond
))
1503 = loop_exit_ctrl_vec_info_type
;
1505 gcc_assert (!loop
->aux
);
1506 loop
->aux
= loop_vinfo
;
1512 /* Scan the loop stmts and dependent on whether there are any (non-)SLP
1513 statements update the vectorization factor. */
1516 vect_update_vf_for_slp (loop_vec_info loop_vinfo
)
1518 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1519 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1520 int nbbs
= loop
->num_nodes
;
1521 unsigned int vectorization_factor
;
1524 if (dump_enabled_p ())
1525 dump_printf_loc (MSG_NOTE
, vect_location
,
1526 "=== vect_update_vf_for_slp ===\n");
1528 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1529 gcc_assert (vectorization_factor
!= 0);
1531 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1532 vectorization factor of the loop is the unrolling factor required by
1533 the SLP instances. If that unrolling factor is 1, we say, that we
1534 perform pure SLP on loop - cross iteration parallelism is not
1536 bool only_slp_in_loop
= true;
1537 for (i
= 0; i
< nbbs
; i
++)
1539 basic_block bb
= bbs
[i
];
1540 for (gimple_stmt_iterator si
= gsi_start_bb (bb
); !gsi_end_p (si
);
1543 gimple
*stmt
= gsi_stmt (si
);
1544 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1545 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
1546 && STMT_VINFO_RELATED_STMT (stmt_info
))
1548 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
1549 stmt_info
= vinfo_for_stmt (stmt
);
1551 if ((STMT_VINFO_RELEVANT_P (stmt_info
)
1552 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info
)))
1553 && !PURE_SLP_STMT (stmt_info
))
1554 /* STMT needs both SLP and loop-based vectorization. */
1555 only_slp_in_loop
= false;
1559 if (only_slp_in_loop
)
1560 vectorization_factor
= LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
);
1562 vectorization_factor
1563 = least_common_multiple (vectorization_factor
,
1564 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
));
1566 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
1567 if (dump_enabled_p ())
1568 dump_printf_loc (MSG_NOTE
, vect_location
,
1569 "Updating vectorization factor to %d\n",
1570 vectorization_factor
);
1573 /* Function vect_analyze_loop_operations.
1575 Scan the loop stmts and make sure they are all vectorizable. */
1578 vect_analyze_loop_operations (loop_vec_info loop_vinfo
)
1580 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1581 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1582 int nbbs
= loop
->num_nodes
;
1584 stmt_vec_info stmt_info
;
1585 bool need_to_vectorize
= false;
1588 if (dump_enabled_p ())
1589 dump_printf_loc (MSG_NOTE
, vect_location
,
1590 "=== vect_analyze_loop_operations ===\n");
1592 for (i
= 0; i
< nbbs
; i
++)
1594 basic_block bb
= bbs
[i
];
1596 for (gphi_iterator si
= gsi_start_phis (bb
); !gsi_end_p (si
);
1599 gphi
*phi
= si
.phi ();
1602 stmt_info
= vinfo_for_stmt (phi
);
1603 if (dump_enabled_p ())
1605 dump_printf_loc (MSG_NOTE
, vect_location
, "examining phi: ");
1606 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
1607 dump_printf (MSG_NOTE
, "\n");
1609 if (virtual_operand_p (gimple_phi_result (phi
)))
1612 /* Inner-loop loop-closed exit phi in outer-loop vectorization
1613 (i.e., a phi in the tail of the outer-loop). */
1614 if (! is_loop_header_bb_p (bb
))
1616 /* FORNOW: we currently don't support the case that these phis
1617 are not used in the outerloop (unless it is double reduction,
1618 i.e., this phi is vect_reduction_def), cause this case
1619 requires to actually do something here. */
1620 if ((!STMT_VINFO_RELEVANT_P (stmt_info
)
1621 || STMT_VINFO_LIVE_P (stmt_info
))
1622 && STMT_VINFO_DEF_TYPE (stmt_info
)
1623 != vect_double_reduction_def
)
1625 if (dump_enabled_p ())
1626 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1627 "Unsupported loop-closed phi in "
1632 /* If PHI is used in the outer loop, we check that its operand
1633 is defined in the inner loop. */
1634 if (STMT_VINFO_RELEVANT_P (stmt_info
))
1637 gimple
*op_def_stmt
;
1639 if (gimple_phi_num_args (phi
) != 1)
1642 phi_op
= PHI_ARG_DEF (phi
, 0);
1643 if (TREE_CODE (phi_op
) != SSA_NAME
)
1646 op_def_stmt
= SSA_NAME_DEF_STMT (phi_op
);
1647 if (gimple_nop_p (op_def_stmt
)
1648 || !flow_bb_inside_loop_p (loop
, gimple_bb (op_def_stmt
))
1649 || !vinfo_for_stmt (op_def_stmt
))
1652 if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt
))
1653 != vect_used_in_outer
1654 && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt
))
1655 != vect_used_in_outer_by_reduction
)
1662 gcc_assert (stmt_info
);
1664 if (STMT_VINFO_LIVE_P (stmt_info
))
1666 /* FORNOW: not yet supported. */
1667 if (dump_enabled_p ())
1668 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1669 "not vectorized: value used after loop.\n");
1673 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
1674 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_induction_def
)
1676 /* A scalar-dependence cycle that we don't support. */
1677 if (dump_enabled_p ())
1678 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1679 "not vectorized: scalar dependence cycle.\n");
1683 if (STMT_VINFO_RELEVANT_P (stmt_info
))
1685 need_to_vectorize
= true;
1686 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
1687 ok
= vectorizable_induction (phi
, NULL
, NULL
);
1692 if (dump_enabled_p ())
1694 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1695 "not vectorized: relevant phi not "
1697 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, phi
, 0);
1698 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1704 for (gimple_stmt_iterator si
= gsi_start_bb (bb
); !gsi_end_p (si
);
1707 gimple
*stmt
= gsi_stmt (si
);
1708 if (!gimple_clobber_p (stmt
)
1709 && !vect_analyze_stmt (stmt
, &need_to_vectorize
, NULL
))
1714 /* All operations in the loop are either irrelevant (deal with loop
1715 control, or dead), or only used outside the loop and can be moved
1716 out of the loop (e.g. invariants, inductions). The loop can be
1717 optimized away by scalar optimizations. We're better off not
1718 touching this loop. */
1719 if (!need_to_vectorize
)
1721 if (dump_enabled_p ())
1722 dump_printf_loc (MSG_NOTE
, vect_location
,
1723 "All the computation can be taken out of the loop.\n");
1724 if (dump_enabled_p ())
1725 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1726 "not vectorized: redundant loop. no profit to "
1735 /* Function vect_analyze_loop_2.
1737 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1738 for it. The different analyses will record information in the
1739 loop_vec_info struct. */
1741 vect_analyze_loop_2 (loop_vec_info loop_vinfo
, bool &fatal
)
1744 int max_vf
= MAX_VECTORIZATION_FACTOR
;
1746 unsigned int n_stmts
= 0;
1748 /* The first group of checks is independent of the vector size. */
1751 /* Find all data references in the loop (which correspond to vdefs/vuses)
1752 and analyze their evolution in the loop. */
1754 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1756 loop_p loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1757 if (!find_loop_nest (loop
, &LOOP_VINFO_LOOP_NEST (loop_vinfo
)))
1759 if (dump_enabled_p ())
1760 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1761 "not vectorized: loop contains function calls"
1762 " or data references that cannot be analyzed\n");
1766 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
1767 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]);
1768 !gsi_end_p (gsi
); gsi_next (&gsi
))
1770 gimple
*stmt
= gsi_stmt (gsi
);
1771 if (is_gimple_debug (stmt
))
1774 if (!find_data_references_in_stmt (loop
, stmt
,
1775 &LOOP_VINFO_DATAREFS (loop_vinfo
)))
1777 if (is_gimple_call (stmt
) && loop
->safelen
)
1779 tree fndecl
= gimple_call_fndecl (stmt
), op
;
1780 if (fndecl
!= NULL_TREE
)
1782 cgraph_node
*node
= cgraph_node::get (fndecl
);
1783 if (node
!= NULL
&& node
->simd_clones
!= NULL
)
1785 unsigned int j
, n
= gimple_call_num_args (stmt
);
1786 for (j
= 0; j
< n
; j
++)
1788 op
= gimple_call_arg (stmt
, j
);
1790 || (REFERENCE_CLASS_P (op
)
1791 && get_base_address (op
)))
1794 op
= gimple_call_lhs (stmt
);
1795 /* Ignore #pragma omp declare simd functions
1796 if they don't have data references in the
1797 call stmt itself. */
1801 || (REFERENCE_CLASS_P (op
)
1802 && get_base_address (op
)))))
1807 if (dump_enabled_p ())
1808 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1809 "not vectorized: loop contains function "
1810 "calls or data references that cannot "
1816 /* Analyze the data references and also adjust the minimal
1817 vectorization factor according to the loads and stores. */
1819 ok
= vect_analyze_data_refs (loop_vinfo
, &min_vf
);
1822 if (dump_enabled_p ())
1823 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1824 "bad data references.\n");
1828 /* Classify all cross-iteration scalar data-flow cycles.
1829 Cross-iteration cycles caused by virtual phis are analyzed separately. */
1830 vect_analyze_scalar_cycles (loop_vinfo
);
1832 vect_pattern_recog (loop_vinfo
);
1834 vect_fixup_scalar_cycles_with_patterns (loop_vinfo
);
1836 /* Analyze the access patterns of the data-refs in the loop (consecutive,
1837 complex, etc.). FORNOW: Only handle consecutive access pattern. */
1839 ok
= vect_analyze_data_ref_accesses (loop_vinfo
);
1842 if (dump_enabled_p ())
1843 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1844 "bad data access.\n");
1848 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
1850 ok
= vect_mark_stmts_to_be_vectorized (loop_vinfo
);
1853 if (dump_enabled_p ())
1854 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1855 "unexpected pattern.\n");
1859 /* While the rest of the analysis below depends on it in some way. */
1862 /* Analyze data dependences between the data-refs in the loop
1863 and adjust the maximum vectorization factor according to
1865 FORNOW: fail at the first data dependence that we encounter. */
1867 ok
= vect_analyze_data_ref_dependences (loop_vinfo
, &max_vf
);
1871 if (dump_enabled_p ())
1872 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1873 "bad data dependence.\n");
1877 ok
= vect_determine_vectorization_factor (loop_vinfo
);
1880 if (dump_enabled_p ())
1881 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1882 "can't determine vectorization factor.\n");
1885 if (max_vf
< LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1889 "bad data dependence.\n");
1893 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
1894 ok
= vect_analyze_slp (loop_vinfo
, n_stmts
);
1898 /* If there are any SLP instances mark them as pure_slp. */
1899 bool slp
= vect_make_slp_decision (loop_vinfo
);
1902 /* Find stmts that need to be both vectorized and SLPed. */
1903 vect_detect_hybrid_slp (loop_vinfo
);
1905 /* Update the vectorization factor based on the SLP decision. */
1906 vect_update_vf_for_slp (loop_vinfo
);
1909 /* Now the vectorization factor is final. */
1910 unsigned vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1911 gcc_assert (vectorization_factor
!= 0);
1913 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
) && dump_enabled_p ())
1914 dump_printf_loc (MSG_NOTE
, vect_location
,
1915 "vectorization_factor = %d, niters = "
1916 HOST_WIDE_INT_PRINT_DEC
"\n", vectorization_factor
,
1917 LOOP_VINFO_INT_NITERS (loop_vinfo
));
1919 HOST_WIDE_INT max_niter
1920 = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo
));
1921 if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1922 && (LOOP_VINFO_INT_NITERS (loop_vinfo
) < vectorization_factor
))
1924 && (unsigned HOST_WIDE_INT
) max_niter
< vectorization_factor
))
1926 if (dump_enabled_p ())
1927 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1928 "not vectorized: iteration count too small.\n");
1929 if (dump_enabled_p ())
1930 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1931 "not vectorized: iteration count smaller than "
1932 "vectorization factor.\n");
1936 /* Analyze the alignment of the data-refs in the loop.
1937 Fail if a data reference is found that cannot be vectorized. */
1939 ok
= vect_analyze_data_refs_alignment (loop_vinfo
);
1942 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1944 "bad data alignment.\n");
1948 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1949 It is important to call pruning after vect_analyze_data_ref_accesses,
1950 since we use grouping information gathered by interleaving analysis. */
1951 ok
= vect_prune_runtime_alias_test_list (loop_vinfo
);
1954 if (dump_enabled_p ())
1955 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1956 "number of versioning for alias "
1957 "run-time tests exceeds %d "
1958 "(--param vect-max-version-for-alias-checks)\n",
1959 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
));
1963 /* Compute the scalar iteration cost. */
1964 vect_compute_single_scalar_iteration_cost (loop_vinfo
);
1966 /* This pass will decide on using loop versioning and/or loop peeling in
1967 order to enhance the alignment of data references in the loop. */
1969 ok
= vect_enhance_data_refs_alignment (loop_vinfo
);
1972 if (dump_enabled_p ())
1973 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1974 "bad data alignment.\n");
1980 /* Analyze operations in the SLP instances. Note this may
1981 remove unsupported SLP instances which makes the above
1982 SLP kind detection invalid. */
1983 unsigned old_size
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
).length ();
1984 vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo
),
1985 LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
));
1986 if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo
).length () != old_size
)
1990 /* Scan all the remaining operations in the loop that are not subject
1991 to SLP and make sure they are vectorizable. */
1992 ok
= vect_analyze_loop_operations (loop_vinfo
);
1995 if (dump_enabled_p ())
1996 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1997 "bad operation or unsupported loop bound.\n");
2001 /* Analyze cost. Decide if worth while to vectorize. */
2002 int min_profitable_estimate
, min_profitable_iters
;
2003 vect_estimate_min_profitable_iters (loop_vinfo
, &min_profitable_iters
,
2004 &min_profitable_estimate
);
2006 if (min_profitable_iters
< 0)
2008 if (dump_enabled_p ())
2009 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2010 "not vectorized: vectorization not profitable.\n");
2011 if (dump_enabled_p ())
2012 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2013 "not vectorized: vector version will never be "
2018 int min_scalar_loop_bound
= ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND
)
2019 * vectorization_factor
) - 1);
2021 /* Use the cost model only if it is more conservative than user specified
2023 unsigned th
= (unsigned) min_scalar_loop_bound
;
2024 if (min_profitable_iters
2025 && (!min_scalar_loop_bound
2026 || min_profitable_iters
> min_scalar_loop_bound
))
2027 th
= (unsigned) min_profitable_iters
;
2029 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo
) = th
;
2031 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
2032 && LOOP_VINFO_INT_NITERS (loop_vinfo
) <= th
)
2034 if (dump_enabled_p ())
2035 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2036 "not vectorized: vectorization not profitable.\n");
2037 if (dump_enabled_p ())
2038 dump_printf_loc (MSG_NOTE
, vect_location
,
2039 "not vectorized: iteration count smaller than user "
2040 "specified loop bound parameter or minimum profitable "
2041 "iterations (whichever is more conservative).\n");
2045 HOST_WIDE_INT estimated_niter
2046 = estimated_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo
));
2047 if (estimated_niter
!= -1
2048 && ((unsigned HOST_WIDE_INT
) estimated_niter
2049 <= MAX (th
, (unsigned)min_profitable_estimate
)))
2051 if (dump_enabled_p ())
2052 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2053 "not vectorized: estimated iteration count too "
2055 if (dump_enabled_p ())
2056 dump_printf_loc (MSG_NOTE
, vect_location
,
2057 "not vectorized: estimated iteration count smaller "
2058 "than specified loop bound parameter or minimum "
2059 "profitable iterations (whichever is more "
2060 "conservative).\n");
2064 /* Decide whether we need to create an epilogue loop to handle
2065 remaining scalar iterations. */
2066 th
= ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo
) + 1)
2067 / LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2068 * LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2070 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
2071 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
) > 0)
2073 if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo
)
2074 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
))
2075 < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
)))
2076 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo
) = true;
2078 else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
)
2079 || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo
))
2080 < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2081 /* In case of versioning, check if the maximum number of
2082 iterations is greater than th. If they are identical,
2083 the epilogue is unnecessary. */
2084 && ((!LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
)
2085 && !LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
))
2086 || (unsigned HOST_WIDE_INT
) max_niter
> th
)))
2087 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo
) = true;
2089 /* If an epilogue loop is required make sure we can create one. */
2090 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
2091 || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo
))
2093 if (dump_enabled_p ())
2094 dump_printf_loc (MSG_NOTE
, vect_location
, "epilog loop required\n");
2095 if (!vect_can_advance_ivs_p (loop_vinfo
)
2096 || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo
),
2097 single_exit (LOOP_VINFO_LOOP
2100 if (dump_enabled_p ())
2101 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2102 "not vectorized: can't create required "
2108 gcc_assert (vectorization_factor
2109 == (unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
2114 /* Function vect_analyze_loop.
2116 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2117 for it. The different analyses will record information in the
2118 loop_vec_info struct. */
2120 vect_analyze_loop (struct loop
*loop
)
2122 loop_vec_info loop_vinfo
;
2123 unsigned int vector_sizes
;
2125 /* Autodetect first vector size we try. */
2126 current_vector_size
= 0;
2127 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2129 if (dump_enabled_p ())
2130 dump_printf_loc (MSG_NOTE
, vect_location
,
2131 "===== analyze_loop_nest =====\n");
2133 if (loop_outer (loop
)
2134 && loop_vec_info_for_loop (loop_outer (loop
))
2135 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop
))))
2137 if (dump_enabled_p ())
2138 dump_printf_loc (MSG_NOTE
, vect_location
,
2139 "outer-loop already vectorized.\n");
2145 /* Check the CFG characteristics of the loop (nesting, entry/exit). */
2146 loop_vinfo
= vect_analyze_loop_form (loop
);
2149 if (dump_enabled_p ())
2150 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2151 "bad loop form.\n");
2156 if (vect_analyze_loop_2 (loop_vinfo
, fatal
))
2158 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
) = 1;
2163 destroy_loop_vec_info (loop_vinfo
, true);
2165 vector_sizes
&= ~current_vector_size
;
2167 || vector_sizes
== 0
2168 || current_vector_size
== 0)
2171 /* Try the next biggest vector size. */
2172 current_vector_size
= 1 << floor_log2 (vector_sizes
);
2173 if (dump_enabled_p ())
2174 dump_printf_loc (MSG_NOTE
, vect_location
,
2175 "***** Re-trying analysis with "
2176 "vector size %d\n", current_vector_size
);
2181 /* Function reduction_code_for_scalar_code
2184 CODE - tree_code of a reduction operations.
2187 REDUC_CODE - the corresponding tree-code to be used to reduce the
2188 vector of partial results into a single scalar result, or ERROR_MARK
2189 if the operation is a supported reduction operation, but does not have
2192 Return FALSE if CODE currently cannot be vectorized as reduction. */
2195 reduction_code_for_scalar_code (enum tree_code code
,
2196 enum tree_code
*reduc_code
)
2201 *reduc_code
= REDUC_MAX_EXPR
;
2205 *reduc_code
= REDUC_MIN_EXPR
;
2209 *reduc_code
= REDUC_PLUS_EXPR
;
2217 *reduc_code
= ERROR_MARK
;
2226 /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
2227 STMT is printed with a message MSG. */
2230 report_vect_op (int msg_type
, gimple
*stmt
, const char *msg
)
2232 dump_printf_loc (msg_type
, vect_location
, "%s", msg
);
2233 dump_gimple_stmt (msg_type
, TDF_SLIM
, stmt
, 0);
2234 dump_printf (msg_type
, "\n");
2238 /* Detect SLP reduction of the form:
2248 PHI is the reduction phi node (#a1 = phi <a5, a0> above)
2249 FIRST_STMT is the first reduction stmt in the chain
2250 (a2 = operation (a1)).
2252 Return TRUE if a reduction chain was detected. */
2255 vect_is_slp_reduction (loop_vec_info loop_info
, gimple
*phi
,
2258 struct loop
*loop
= (gimple_bb (phi
))->loop_father
;
2259 struct loop
*vect_loop
= LOOP_VINFO_LOOP (loop_info
);
2260 enum tree_code code
;
2261 gimple
*current_stmt
= NULL
, *loop_use_stmt
= NULL
, *first
, *next_stmt
;
2262 stmt_vec_info use_stmt_info
, current_stmt_info
;
2264 imm_use_iterator imm_iter
;
2265 use_operand_p use_p
;
2266 int nloop_uses
, size
= 0, n_out_of_loop_uses
;
2269 if (loop
!= vect_loop
)
2272 lhs
= PHI_RESULT (phi
);
2273 code
= gimple_assign_rhs_code (first_stmt
);
2277 n_out_of_loop_uses
= 0;
2278 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2280 gimple
*use_stmt
= USE_STMT (use_p
);
2281 if (is_gimple_debug (use_stmt
))
2284 /* Check if we got back to the reduction phi. */
2285 if (use_stmt
== phi
)
2287 loop_use_stmt
= use_stmt
;
2292 if (flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2294 loop_use_stmt
= use_stmt
;
2298 n_out_of_loop_uses
++;
2300 /* There are can be either a single use in the loop or two uses in
2302 if (nloop_uses
> 1 || (n_out_of_loop_uses
&& nloop_uses
))
2309 /* We reached a statement with no loop uses. */
2310 if (nloop_uses
== 0)
2313 /* This is a loop exit phi, and we haven't reached the reduction phi. */
2314 if (gimple_code (loop_use_stmt
) == GIMPLE_PHI
)
2317 if (!is_gimple_assign (loop_use_stmt
)
2318 || code
!= gimple_assign_rhs_code (loop_use_stmt
)
2319 || !flow_bb_inside_loop_p (loop
, gimple_bb (loop_use_stmt
)))
2322 /* Insert USE_STMT into reduction chain. */
2323 use_stmt_info
= vinfo_for_stmt (loop_use_stmt
);
2326 current_stmt_info
= vinfo_for_stmt (current_stmt
);
2327 GROUP_NEXT_ELEMENT (current_stmt_info
) = loop_use_stmt
;
2328 GROUP_FIRST_ELEMENT (use_stmt_info
)
2329 = GROUP_FIRST_ELEMENT (current_stmt_info
);
2332 GROUP_FIRST_ELEMENT (use_stmt_info
) = loop_use_stmt
;
2334 lhs
= gimple_assign_lhs (loop_use_stmt
);
2335 current_stmt
= loop_use_stmt
;
2339 if (!found
|| loop_use_stmt
!= phi
|| size
< 2)
2342 /* Swap the operands, if needed, to make the reduction operand be the second
2344 lhs
= PHI_RESULT (phi
);
2345 next_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt
));
2348 if (gimple_assign_rhs2 (next_stmt
) == lhs
)
2350 tree op
= gimple_assign_rhs1 (next_stmt
);
2351 gimple
*def_stmt
= NULL
;
2353 if (TREE_CODE (op
) == SSA_NAME
)
2354 def_stmt
= SSA_NAME_DEF_STMT (op
);
2356 /* Check that the other def is either defined in the loop
2357 ("vect_internal_def"), or it's an induction (defined by a
2358 loop-header phi-node). */
2360 && gimple_bb (def_stmt
)
2361 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
2362 && (is_gimple_assign (def_stmt
)
2363 || is_gimple_call (def_stmt
)
2364 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
2365 == vect_induction_def
2366 || (gimple_code (def_stmt
) == GIMPLE_PHI
2367 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
2368 == vect_internal_def
2369 && !is_loop_header_bb_p (gimple_bb (def_stmt
)))))
2371 lhs
= gimple_assign_lhs (next_stmt
);
2372 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
2380 tree op
= gimple_assign_rhs2 (next_stmt
);
2381 gimple
*def_stmt
= NULL
;
2383 if (TREE_CODE (op
) == SSA_NAME
)
2384 def_stmt
= SSA_NAME_DEF_STMT (op
);
2386 /* Check that the other def is either defined in the loop
2387 ("vect_internal_def"), or it's an induction (defined by a
2388 loop-header phi-node). */
2390 && gimple_bb (def_stmt
)
2391 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
2392 && (is_gimple_assign (def_stmt
)
2393 || is_gimple_call (def_stmt
)
2394 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
2395 == vect_induction_def
2396 || (gimple_code (def_stmt
) == GIMPLE_PHI
2397 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
2398 == vect_internal_def
2399 && !is_loop_header_bb_p (gimple_bb (def_stmt
)))))
2401 if (dump_enabled_p ())
2403 dump_printf_loc (MSG_NOTE
, vect_location
, "swapping oprnds: ");
2404 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, next_stmt
, 0);
2405 dump_printf (MSG_NOTE
, "\n");
2408 swap_ssa_operands (next_stmt
,
2409 gimple_assign_rhs1_ptr (next_stmt
),
2410 gimple_assign_rhs2_ptr (next_stmt
));
2411 update_stmt (next_stmt
);
2413 if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt
)))
2414 LOOP_VINFO_OPERANDS_SWAPPED (loop_info
) = true;
2420 lhs
= gimple_assign_lhs (next_stmt
);
2421 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
2424 /* Save the chain for further analysis in SLP detection. */
2425 first
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt
));
2426 LOOP_VINFO_REDUCTION_CHAINS (loop_info
).safe_push (first
);
2427 GROUP_SIZE (vinfo_for_stmt (first
)) = size
;
2433 /* Function vect_is_simple_reduction_1
2435 (1) Detect a cross-iteration def-use cycle that represents a simple
2436 reduction computation. We look for the following pattern:
2441 a2 = operation (a3, a1)
2448 a2 = operation (a3, a1)
2451 1. operation is commutative and associative and it is safe to
2452 change the order of the computation (if CHECK_REDUCTION is true)
2453 2. no uses for a2 in the loop (a2 is used out of the loop)
2454 3. no uses of a1 in the loop besides the reduction operation
2455 4. no uses of a1 outside the loop.
2457 Conditions 1,4 are tested here.
2458 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2460 (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2461 nested cycles, if CHECK_REDUCTION is false.
2463 (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2467 inner loop (def of a3)
2470 (4) Detect condition expressions, ie:
2471 for (int i = 0; i < N; i++)
2478 vect_is_simple_reduction (loop_vec_info loop_info
, gimple
*phi
,
2479 bool check_reduction
, bool *double_reduc
,
2480 bool need_wrapping_integral_overflow
,
2481 enum vect_reduction_type
*v_reduc_type
)
2483 struct loop
*loop
= (gimple_bb (phi
))->loop_father
;
2484 struct loop
*vect_loop
= LOOP_VINFO_LOOP (loop_info
);
2485 edge latch_e
= loop_latch_edge (loop
);
2486 tree loop_arg
= PHI_ARG_DEF_FROM_EDGE (phi
, latch_e
);
2487 gimple
*def_stmt
, *def1
= NULL
, *def2
= NULL
;
2488 enum tree_code orig_code
, code
;
2489 tree op1
, op2
, op3
= NULL_TREE
, op4
= NULL_TREE
;
2493 imm_use_iterator imm_iter
;
2494 use_operand_p use_p
;
2497 *double_reduc
= false;
2498 *v_reduc_type
= TREE_CODE_REDUCTION
;
2500 /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2501 otherwise, we assume outer loop vectorization. */
2502 gcc_assert ((check_reduction
&& loop
== vect_loop
)
2503 || (!check_reduction
&& flow_loop_nested_p (vect_loop
, loop
)));
2505 name
= PHI_RESULT (phi
);
2506 /* ??? If there are no uses of the PHI result the inner loop reduction
2507 won't be detected as possibly double-reduction by vectorizable_reduction
2508 because that tries to walk the PHI arg from the preheader edge which
2509 can be constant. See PR60382. */
2510 if (has_zero_uses (name
))
2513 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, name
)
2515 gimple
*use_stmt
= USE_STMT (use_p
);
2516 if (is_gimple_debug (use_stmt
))
2519 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2521 if (dump_enabled_p ())
2522 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2523 "intermediate value used outside loop.\n");
2531 if (dump_enabled_p ())
2532 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2533 "reduction used in loop.\n");
2538 if (TREE_CODE (loop_arg
) != SSA_NAME
)
2540 if (dump_enabled_p ())
2542 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2543 "reduction: not ssa_name: ");
2544 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, loop_arg
);
2545 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2550 def_stmt
= SSA_NAME_DEF_STMT (loop_arg
);
2553 if (dump_enabled_p ())
2554 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2555 "reduction: no def_stmt.\n");
2559 if (!is_gimple_assign (def_stmt
) && gimple_code (def_stmt
) != GIMPLE_PHI
)
2561 if (dump_enabled_p ())
2563 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2564 dump_printf (MSG_NOTE
, "\n");
2569 if (is_gimple_assign (def_stmt
))
2571 name
= gimple_assign_lhs (def_stmt
);
2576 name
= PHI_RESULT (def_stmt
);
2581 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, name
)
2583 gimple
*use_stmt
= USE_STMT (use_p
);
2584 if (is_gimple_debug (use_stmt
))
2586 if (flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2590 if (dump_enabled_p ())
2591 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2592 "reduction used in loop.\n");
2597 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2598 defined in the inner loop. */
2601 op1
= PHI_ARG_DEF (def_stmt
, 0);
2603 if (gimple_phi_num_args (def_stmt
) != 1
2604 || TREE_CODE (op1
) != SSA_NAME
)
2606 if (dump_enabled_p ())
2607 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2608 "unsupported phi node definition.\n");
2613 def1
= SSA_NAME_DEF_STMT (op1
);
2614 if (gimple_bb (def1
)
2615 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
2617 && flow_bb_inside_loop_p (loop
->inner
, gimple_bb (def1
))
2618 && is_gimple_assign (def1
))
2620 if (dump_enabled_p ())
2621 report_vect_op (MSG_NOTE
, def_stmt
,
2622 "detected double reduction: ");
2624 *double_reduc
= true;
2631 code
= orig_code
= gimple_assign_rhs_code (def_stmt
);
2633 /* We can handle "res -= x[i]", which is non-associative by
2634 simply rewriting this into "res += -x[i]". Avoid changing
2635 gimple instruction for the first simple tests and only do this
2636 if we're allowed to change code at all. */
2637 if (code
== MINUS_EXPR
2638 && (op1
= gimple_assign_rhs1 (def_stmt
))
2639 && TREE_CODE (op1
) == SSA_NAME
2640 && SSA_NAME_DEF_STMT (op1
) == phi
)
2643 if (check_reduction
)
2645 if (code
== COND_EXPR
)
2646 *v_reduc_type
= COND_REDUCTION
;
2647 else if (!commutative_tree_code (code
) || !associative_tree_code (code
))
2649 if (dump_enabled_p ())
2650 report_vect_op (MSG_MISSED_OPTIMIZATION
, def_stmt
,
2651 "reduction: not commutative/associative: ");
2656 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
2658 if (code
!= COND_EXPR
)
2660 if (dump_enabled_p ())
2661 report_vect_op (MSG_MISSED_OPTIMIZATION
, def_stmt
,
2662 "reduction: not binary operation: ");
2667 op3
= gimple_assign_rhs1 (def_stmt
);
2668 if (COMPARISON_CLASS_P (op3
))
2670 op4
= TREE_OPERAND (op3
, 1);
2671 op3
= TREE_OPERAND (op3
, 0);
2674 op1
= gimple_assign_rhs2 (def_stmt
);
2675 op2
= gimple_assign_rhs3 (def_stmt
);
2677 if (TREE_CODE (op1
) != SSA_NAME
&& TREE_CODE (op2
) != SSA_NAME
)
2679 if (dump_enabled_p ())
2680 report_vect_op (MSG_MISSED_OPTIMIZATION
, def_stmt
,
2681 "reduction: uses not ssa_names: ");
2688 op1
= gimple_assign_rhs1 (def_stmt
);
2689 op2
= gimple_assign_rhs2 (def_stmt
);
2691 if (TREE_CODE (op1
) != SSA_NAME
&& TREE_CODE (op2
) != SSA_NAME
)
2693 if (dump_enabled_p ())
2694 report_vect_op (MSG_MISSED_OPTIMIZATION
, def_stmt
,
2695 "reduction: uses not ssa_names: ");
2701 type
= TREE_TYPE (gimple_assign_lhs (def_stmt
));
2702 if ((TREE_CODE (op1
) == SSA_NAME
2703 && !types_compatible_p (type
,TREE_TYPE (op1
)))
2704 || (TREE_CODE (op2
) == SSA_NAME
2705 && !types_compatible_p (type
, TREE_TYPE (op2
)))
2706 || (op3
&& TREE_CODE (op3
) == SSA_NAME
2707 && !types_compatible_p (type
, TREE_TYPE (op3
)))
2708 || (op4
&& TREE_CODE (op4
) == SSA_NAME
2709 && !types_compatible_p (type
, TREE_TYPE (op4
))))
2711 if (dump_enabled_p ())
2713 dump_printf_loc (MSG_NOTE
, vect_location
,
2714 "reduction: multiple types: operation type: ");
2715 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, type
);
2716 dump_printf (MSG_NOTE
, ", operands types: ");
2717 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
2719 dump_printf (MSG_NOTE
, ",");
2720 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
2724 dump_printf (MSG_NOTE
, ",");
2725 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
2731 dump_printf (MSG_NOTE
, ",");
2732 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
2735 dump_printf (MSG_NOTE
, "\n");
2741 /* Check that it's ok to change the order of the computation.
2742 Generally, when vectorizing a reduction we change the order of the
2743 computation. This may change the behavior of the program in some
2744 cases, so we need to check that this is ok. One exception is when
2745 vectorizing an outer-loop: the inner-loop is executed sequentially,
2746 and therefore vectorizing reductions in the inner-loop during
2747 outer-loop vectorization is safe. */
2749 if (*v_reduc_type
!= COND_REDUCTION
)
2751 /* CHECKME: check for !flag_finite_math_only too? */
2752 if (SCALAR_FLOAT_TYPE_P (type
) && !flag_associative_math
2755 /* Changing the order of operations changes the semantics. */
2756 if (dump_enabled_p ())
2757 report_vect_op (MSG_MISSED_OPTIMIZATION
, def_stmt
,
2758 "reduction: unsafe fp math optimization: ");
2761 else if (INTEGRAL_TYPE_P (type
) && check_reduction
)
2763 if (!operation_no_trapping_overflow (type
, code
))
2765 /* Changing the order of operations changes the semantics. */
2766 if (dump_enabled_p ())
2767 report_vect_op (MSG_MISSED_OPTIMIZATION
, def_stmt
,
2768 "reduction: unsafe int math optimization"
2769 " (overflow traps): ");
2772 if (need_wrapping_integral_overflow
2773 && !TYPE_OVERFLOW_WRAPS (type
)
2774 && operation_can_overflow (code
))
2776 /* Changing the order of operations changes the semantics. */
2777 if (dump_enabled_p ())
2778 report_vect_op (MSG_MISSED_OPTIMIZATION
, def_stmt
,
2779 "reduction: unsafe int math optimization"
2780 " (overflow doesn't wrap): ");
2784 else if (SAT_FIXED_POINT_TYPE_P (type
) && check_reduction
)
2786 /* Changing the order of operations changes the semantics. */
2787 if (dump_enabled_p ())
2788 report_vect_op (MSG_MISSED_OPTIMIZATION
, def_stmt
,
2789 "reduction: unsafe fixed-point math optimization: ");
2794 /* Reduction is safe. We're dealing with one of the following:
2795 1) integer arithmetic and no trapv
2796 2) floating point arithmetic, and special flags permit this optimization
2797 3) nested cycle (i.e., outer loop vectorization). */
2798 if (TREE_CODE (op1
) == SSA_NAME
)
2799 def1
= SSA_NAME_DEF_STMT (op1
);
2801 if (TREE_CODE (op2
) == SSA_NAME
)
2802 def2
= SSA_NAME_DEF_STMT (op2
);
2804 if (code
!= COND_EXPR
2805 && ((!def1
|| gimple_nop_p (def1
)) && (!def2
|| gimple_nop_p (def2
))))
2807 if (dump_enabled_p ())
2808 report_vect_op (MSG_NOTE
, def_stmt
, "reduction: no defs for operands: ");
2812 /* Check that one def is the reduction def, defined by PHI,
2813 the other def is either defined in the loop ("vect_internal_def"),
2814 or it's an induction (defined by a loop-header phi-node). */
2816 if (def2
&& def2
== phi
2817 && (code
== COND_EXPR
2818 || !def1
|| gimple_nop_p (def1
)
2819 || !flow_bb_inside_loop_p (loop
, gimple_bb (def1
))
2820 || (def1
&& flow_bb_inside_loop_p (loop
, gimple_bb (def1
))
2821 && (is_gimple_assign (def1
)
2822 || is_gimple_call (def1
)
2823 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1
))
2824 == vect_induction_def
2825 || (gimple_code (def1
) == GIMPLE_PHI
2826 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1
))
2827 == vect_internal_def
2828 && !is_loop_header_bb_p (gimple_bb (def1
)))))))
2830 if (dump_enabled_p ())
2831 report_vect_op (MSG_NOTE
, def_stmt
, "detected reduction: ");
2835 if (def1
&& def1
== phi
2836 && (code
== COND_EXPR
2837 || !def2
|| gimple_nop_p (def2
)
2838 || !flow_bb_inside_loop_p (loop
, gimple_bb (def2
))
2839 || (def2
&& flow_bb_inside_loop_p (loop
, gimple_bb (def2
))
2840 && (is_gimple_assign (def2
)
2841 || is_gimple_call (def2
)
2842 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2
))
2843 == vect_induction_def
2844 || (gimple_code (def2
) == GIMPLE_PHI
2845 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2
))
2846 == vect_internal_def
2847 && !is_loop_header_bb_p (gimple_bb (def2
)))))))
2850 && orig_code
!= MINUS_EXPR
)
2852 if (code
== COND_EXPR
)
2854 /* No current known use where this case would be useful. */
2855 if (dump_enabled_p ())
2856 report_vect_op (MSG_NOTE
, def_stmt
,
2857 "detected reduction: cannot currently swap "
2858 "operands for cond_expr");
2862 /* Swap operands (just for simplicity - so that the rest of the code
2863 can assume that the reduction variable is always the last (second)
2865 if (dump_enabled_p ())
2866 report_vect_op (MSG_NOTE
, def_stmt
,
2867 "detected reduction: need to swap operands: ");
2869 swap_ssa_operands (def_stmt
, gimple_assign_rhs1_ptr (def_stmt
),
2870 gimple_assign_rhs2_ptr (def_stmt
));
2872 if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt
)))
2873 LOOP_VINFO_OPERANDS_SWAPPED (loop_info
) = true;
2877 if (dump_enabled_p ())
2878 report_vect_op (MSG_NOTE
, def_stmt
, "detected reduction: ");
2884 /* Try to find SLP reduction chain. */
2885 if (check_reduction
&& code
!= COND_EXPR
2886 && vect_is_slp_reduction (loop_info
, phi
, def_stmt
))
2888 if (dump_enabled_p ())
2889 report_vect_op (MSG_NOTE
, def_stmt
,
2890 "reduction: detected reduction chain: ");
2895 if (dump_enabled_p ())
2896 report_vect_op (MSG_MISSED_OPTIMIZATION
, def_stmt
,
2897 "reduction: unknown pattern: ");
2902 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2903 in-place if it enables detection of more reductions. Arguments
2907 vect_force_simple_reduction (loop_vec_info loop_info
, gimple
*phi
,
2908 bool check_reduction
, bool *double_reduc
,
2909 bool need_wrapping_integral_overflow
)
2911 enum vect_reduction_type v_reduc_type
;
2912 return vect_is_simple_reduction (loop_info
, phi
, check_reduction
,
2914 need_wrapping_integral_overflow
,
2918 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
2920 vect_get_known_peeling_cost (loop_vec_info loop_vinfo
, int peel_iters_prologue
,
2921 int *peel_iters_epilogue
,
2922 stmt_vector_for_cost
*scalar_cost_vec
,
2923 stmt_vector_for_cost
*prologue_cost_vec
,
2924 stmt_vector_for_cost
*epilogue_cost_vec
)
2927 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2929 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
2931 *peel_iters_epilogue
= vf
/2;
2932 if (dump_enabled_p ())
2933 dump_printf_loc (MSG_NOTE
, vect_location
,
2934 "cost model: epilogue peel iters set to vf/2 "
2935 "because loop iterations are unknown .\n");
2937 /* If peeled iterations are known but number of scalar loop
2938 iterations are unknown, count a taken branch per peeled loop. */
2939 retval
= record_stmt_cost (prologue_cost_vec
, 1, cond_branch_taken
,
2940 NULL
, 0, vect_prologue
);
2941 retval
= record_stmt_cost (prologue_cost_vec
, 1, cond_branch_taken
,
2942 NULL
, 0, vect_epilogue
);
2946 int niters
= LOOP_VINFO_INT_NITERS (loop_vinfo
);
2947 peel_iters_prologue
= niters
< peel_iters_prologue
?
2948 niters
: peel_iters_prologue
;
2949 *peel_iters_epilogue
= (niters
- peel_iters_prologue
) % vf
;
2950 /* If we need to peel for gaps, but no peeling is required, we have to
2951 peel VF iterations. */
2952 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) && !*peel_iters_epilogue
)
2953 *peel_iters_epilogue
= vf
;
2956 stmt_info_for_cost
*si
;
2958 if (peel_iters_prologue
)
2959 FOR_EACH_VEC_ELT (*scalar_cost_vec
, j
, si
)
2960 retval
+= record_stmt_cost (prologue_cost_vec
,
2961 si
->count
* peel_iters_prologue
,
2962 si
->kind
, NULL
, si
->misalign
,
2964 if (*peel_iters_epilogue
)
2965 FOR_EACH_VEC_ELT (*scalar_cost_vec
, j
, si
)
2966 retval
+= record_stmt_cost (epilogue_cost_vec
,
2967 si
->count
* *peel_iters_epilogue
,
2968 si
->kind
, NULL
, si
->misalign
,
2974 /* Function vect_estimate_min_profitable_iters
2976 Return the number of iterations required for the vector version of the
2977 loop to be profitable relative to the cost of the scalar version of the
2981 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo
,
2982 int *ret_min_profitable_niters
,
2983 int *ret_min_profitable_estimate
)
2985 int min_profitable_iters
;
2986 int min_profitable_estimate
;
2987 int peel_iters_prologue
;
2988 int peel_iters_epilogue
;
2989 unsigned vec_inside_cost
= 0;
2990 int vec_outside_cost
= 0;
2991 unsigned vec_prologue_cost
= 0;
2992 unsigned vec_epilogue_cost
= 0;
2993 int scalar_single_iter_cost
= 0;
2994 int scalar_outside_cost
= 0;
2995 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2996 int npeel
= LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
);
2997 void *target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
2999 /* Cost model disabled. */
3000 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo
)))
3002 dump_printf_loc (MSG_NOTE
, vect_location
, "cost model disabled.\n");
3003 *ret_min_profitable_niters
= 0;
3004 *ret_min_profitable_estimate
= 0;
3008 /* Requires loop versioning tests to handle misalignment. */
3009 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
))
3011 /* FIXME: Make cost depend on complexity of individual check. */
3012 unsigned len
= LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
).length ();
3013 (void) add_stmt_cost (target_cost_data
, len
, vector_stmt
, NULL
, 0,
3015 dump_printf (MSG_NOTE
,
3016 "cost model: Adding cost of checks for loop "
3017 "versioning to treat misalignment.\n");
3020 /* Requires loop versioning with alias checks. */
3021 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
3023 /* FIXME: Make cost depend on complexity of individual check. */
3024 unsigned len
= LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo
).length ();
3025 (void) add_stmt_cost (target_cost_data
, len
, vector_stmt
, NULL
, 0,
3027 dump_printf (MSG_NOTE
,
3028 "cost model: Adding cost of checks for loop "
3029 "versioning aliasing.\n");
3032 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
)
3033 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
3034 (void) add_stmt_cost (target_cost_data
, 1, cond_branch_taken
, NULL
, 0,
3037 /* Count statements in scalar loop. Using this as scalar cost for a single
3040 TODO: Add outer loop support.
3042 TODO: Consider assigning different costs to different scalar
3045 scalar_single_iter_cost
3046 = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo
);
3048 /* Add additional cost for the peeled instructions in prologue and epilogue
3051 FORNOW: If we don't know the value of peel_iters for prologue or epilogue
3052 at compile-time - we assume it's vf/2 (the worst would be vf-1).
3054 TODO: Build an expression that represents peel_iters for prologue and
3055 epilogue to be used in a run-time test. */
3059 peel_iters_prologue
= vf
/2;
3060 dump_printf (MSG_NOTE
, "cost model: "
3061 "prologue peel iters set to vf/2.\n");
3063 /* If peeling for alignment is unknown, loop bound of main loop becomes
3065 peel_iters_epilogue
= vf
/2;
3066 dump_printf (MSG_NOTE
, "cost model: "
3067 "epilogue peel iters set to vf/2 because "
3068 "peeling for alignment is unknown.\n");
3070 /* If peeled iterations are unknown, count a taken branch and a not taken
3071 branch per peeled loop. Even if scalar loop iterations are known,
3072 vector iterations are not known since peeled prologue iterations are
3073 not known. Hence guards remain the same. */
3074 (void) add_stmt_cost (target_cost_data
, 1, cond_branch_taken
,
3075 NULL
, 0, vect_prologue
);
3076 (void) add_stmt_cost (target_cost_data
, 1, cond_branch_not_taken
,
3077 NULL
, 0, vect_prologue
);
3078 (void) add_stmt_cost (target_cost_data
, 1, cond_branch_taken
,
3079 NULL
, 0, vect_epilogue
);
3080 (void) add_stmt_cost (target_cost_data
, 1, cond_branch_not_taken
,
3081 NULL
, 0, vect_epilogue
);
3082 stmt_info_for_cost
*si
;
3084 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo
), j
, si
)
3086 struct _stmt_vec_info
*stmt_info
3087 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
3088 (void) add_stmt_cost (target_cost_data
,
3089 si
->count
* peel_iters_prologue
,
3090 si
->kind
, stmt_info
, si
->misalign
,
3092 (void) add_stmt_cost (target_cost_data
,
3093 si
->count
* peel_iters_epilogue
,
3094 si
->kind
, stmt_info
, si
->misalign
,
3100 stmt_vector_for_cost prologue_cost_vec
, epilogue_cost_vec
;
3101 stmt_info_for_cost
*si
;
3103 void *data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
3105 prologue_cost_vec
.create (2);
3106 epilogue_cost_vec
.create (2);
3107 peel_iters_prologue
= npeel
;
3109 (void) vect_get_known_peeling_cost (loop_vinfo
, peel_iters_prologue
,
3110 &peel_iters_epilogue
,
3111 &LOOP_VINFO_SCALAR_ITERATION_COST
3114 &epilogue_cost_vec
);
3116 FOR_EACH_VEC_ELT (prologue_cost_vec
, j
, si
)
3118 struct _stmt_vec_info
*stmt_info
3119 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
3120 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
3121 si
->misalign
, vect_prologue
);
3124 FOR_EACH_VEC_ELT (epilogue_cost_vec
, j
, si
)
3126 struct _stmt_vec_info
*stmt_info
3127 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
3128 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
3129 si
->misalign
, vect_epilogue
);
3132 prologue_cost_vec
.release ();
3133 epilogue_cost_vec
.release ();
3136 /* FORNOW: The scalar outside cost is incremented in one of the
3139 1. The vectorizer checks for alignment and aliasing and generates
3140 a condition that allows dynamic vectorization. A cost model
3141 check is ANDED with the versioning condition. Hence scalar code
3142 path now has the added cost of the versioning check.
3144 if (cost > th & versioning_check)
3147 Hence run-time scalar is incremented by not-taken branch cost.
3149 2. The vectorizer then checks if a prologue is required. If the
3150 cost model check was not done before during versioning, it has to
3151 be done before the prologue check.
3154 prologue = scalar_iters
3159 if (prologue == num_iters)
3162 Hence the run-time scalar cost is incremented by a taken branch,
3163 plus a not-taken branch, plus a taken branch cost.
3165 3. The vectorizer then checks if an epilogue is required. If the
3166 cost model check was not done before during prologue check, it
3167 has to be done with the epilogue check.
3173 if (prologue == num_iters)
3176 if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
3179 Hence the run-time scalar cost should be incremented by 2 taken
3182 TODO: The back end may reorder the BBS's differently and reverse
3183 conditions/branch directions. Change the estimates below to
3184 something more reasonable. */
3186 /* If the number of iterations is known and we do not do versioning, we can
3187 decide whether to vectorize at compile time. Hence the scalar version
3188 do not carry cost model guard costs. */
3189 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
3190 || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
)
3191 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
3193 /* Cost model check occurs at versioning. */
3194 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
)
3195 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
3196 scalar_outside_cost
+= vect_get_stmt_cost (cond_branch_not_taken
);
3199 /* Cost model check occurs at prologue generation. */
3200 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
) < 0)
3201 scalar_outside_cost
+= 2 * vect_get_stmt_cost (cond_branch_taken
)
3202 + vect_get_stmt_cost (cond_branch_not_taken
);
3203 /* Cost model check occurs at epilogue generation. */
3205 scalar_outside_cost
+= 2 * vect_get_stmt_cost (cond_branch_taken
);
3209 /* Complete the target-specific cost calculations. */
3210 finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
), &vec_prologue_cost
,
3211 &vec_inside_cost
, &vec_epilogue_cost
);
3213 vec_outside_cost
= (int)(vec_prologue_cost
+ vec_epilogue_cost
);
3215 if (dump_enabled_p ())
3217 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
3218 dump_printf (MSG_NOTE
, " Vector inside of loop cost: %d\n",
3220 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n",
3222 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n",
3224 dump_printf (MSG_NOTE
, " Scalar iteration cost: %d\n",
3225 scalar_single_iter_cost
);
3226 dump_printf (MSG_NOTE
, " Scalar outside cost: %d\n",
3227 scalar_outside_cost
);
3228 dump_printf (MSG_NOTE
, " Vector outside cost: %d\n",
3230 dump_printf (MSG_NOTE
, " prologue iterations: %d\n",
3231 peel_iters_prologue
);
3232 dump_printf (MSG_NOTE
, " epilogue iterations: %d\n",
3233 peel_iters_epilogue
);
3236 /* Calculate number of iterations required to make the vector version
3237 profitable, relative to the loop bodies only. The following condition
3239 SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
3241 SIC = scalar iteration cost, VIC = vector iteration cost,
3242 VOC = vector outside cost, VF = vectorization factor,
3243 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
3244 SOC = scalar outside cost for run time cost model check. */
3246 if ((scalar_single_iter_cost
* vf
) > (int) vec_inside_cost
)
3248 if (vec_outside_cost
<= 0)
3249 min_profitable_iters
= 1;
3252 min_profitable_iters
= ((vec_outside_cost
- scalar_outside_cost
) * vf
3253 - vec_inside_cost
* peel_iters_prologue
3254 - vec_inside_cost
* peel_iters_epilogue
)
3255 / ((scalar_single_iter_cost
* vf
)
3258 if ((scalar_single_iter_cost
* vf
* min_profitable_iters
)
3259 <= (((int) vec_inside_cost
* min_profitable_iters
)
3260 + (((int) vec_outside_cost
- scalar_outside_cost
) * vf
)))
3261 min_profitable_iters
++;
3264 /* vector version will never be profitable. */
3267 if (LOOP_VINFO_LOOP (loop_vinfo
)->force_vectorize
)
3268 warning_at (vect_location
, OPT_Wopenmp_simd
, "vectorization "
3269 "did not happen for a simd loop");
3271 if (dump_enabled_p ())
3272 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3273 "cost model: the vector iteration cost = %d "
3274 "divided by the scalar iteration cost = %d "
3275 "is greater or equal to the vectorization factor = %d"
3277 vec_inside_cost
, scalar_single_iter_cost
, vf
);
3278 *ret_min_profitable_niters
= -1;
3279 *ret_min_profitable_estimate
= -1;
3283 dump_printf (MSG_NOTE
,
3284 " Calculated minimum iters for profitability: %d\n",
3285 min_profitable_iters
);
3287 min_profitable_iters
=
3288 min_profitable_iters
< vf
? vf
: min_profitable_iters
;
3290 /* Because the condition we create is:
3291 if (niters <= min_profitable_iters)
3292 then skip the vectorized loop. */
3293 min_profitable_iters
--;
3295 if (dump_enabled_p ())
3296 dump_printf_loc (MSG_NOTE
, vect_location
,
3297 " Runtime profitability threshold = %d\n",
3298 min_profitable_iters
);
3300 *ret_min_profitable_niters
= min_profitable_iters
;
3302 /* Calculate number of iterations required to make the vector version
3303 profitable, relative to the loop bodies only.
3305 Non-vectorized variant is SIC * niters and it must win over vector
3306 variant on the expected loop trip count. The following condition must hold true:
3307 SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC */
3309 if (vec_outside_cost
<= 0)
3310 min_profitable_estimate
= 1;
3313 min_profitable_estimate
= ((vec_outside_cost
+ scalar_outside_cost
) * vf
3314 - vec_inside_cost
* peel_iters_prologue
3315 - vec_inside_cost
* peel_iters_epilogue
)
3316 / ((scalar_single_iter_cost
* vf
)
3319 min_profitable_estimate
--;
3320 min_profitable_estimate
= MAX (min_profitable_estimate
, min_profitable_iters
);
3321 if (dump_enabled_p ())
3322 dump_printf_loc (MSG_NOTE
, vect_location
,
3323 " Static estimate profitability threshold = %d\n",
3324 min_profitable_iters
);
3326 *ret_min_profitable_estimate
= min_profitable_estimate
;
3329 /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
3330 vector elements (not bits) for a vector of mode MODE. */
3332 calc_vec_perm_mask_for_shift (enum machine_mode mode
, unsigned int offset
,
3335 unsigned int i
, nelt
= GET_MODE_NUNITS (mode
);
3337 for (i
= 0; i
< nelt
; i
++)
3338 sel
[i
] = (i
+ offset
) & (2*nelt
- 1);
3341 /* Checks whether the target supports whole-vector shifts for vectors of mode
3342 MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_
3343 it supports vec_perm_const with masks for all necessary shift amounts. */
3345 have_whole_vector_shift (enum machine_mode mode
)
3347 if (optab_handler (vec_shr_optab
, mode
) != CODE_FOR_nothing
)
3350 if (direct_optab_handler (vec_perm_const_optab
, mode
) == CODE_FOR_nothing
)
3353 unsigned int i
, nelt
= GET_MODE_NUNITS (mode
);
3354 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
3356 for (i
= nelt
/2; i
>= 1; i
/=2)
3358 calc_vec_perm_mask_for_shift (mode
, i
, sel
);
3359 if (!can_vec_perm_p (mode
, false, sel
))
3365 /* Return the reduction operand (with index REDUC_INDEX) of STMT. */
3368 get_reduction_op (gimple
*stmt
, int reduc_index
)
3370 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
)))
3372 case GIMPLE_SINGLE_RHS
:
3373 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt
))
3375 return TREE_OPERAND (gimple_assign_rhs1 (stmt
), reduc_index
);
3376 case GIMPLE_UNARY_RHS
:
3377 return gimple_assign_rhs1 (stmt
);
3378 case GIMPLE_BINARY_RHS
:
3380 ? gimple_assign_rhs2 (stmt
) : gimple_assign_rhs1 (stmt
));
3381 case GIMPLE_TERNARY_RHS
:
3382 return gimple_op (stmt
, reduc_index
+ 1);
3388 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
3389 functions. Design better to avoid maintenance issues. */
3391 /* Function vect_model_reduction_cost.
3393 Models cost for a reduction operation, including the vector ops
3394 generated within the strip-mine loop, the initial definition before
3395 the loop, and the epilogue code that must be generated. */
3398 vect_model_reduction_cost (stmt_vec_info stmt_info
, enum tree_code reduc_code
,
3399 int ncopies
, int reduc_index
)
3401 int prologue_cost
= 0, epilogue_cost
= 0;
3402 enum tree_code code
;
3405 gimple
*stmt
, *orig_stmt
;
3408 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3409 struct loop
*loop
= NULL
;
3410 void *target_cost_data
;
3414 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3415 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
3418 target_cost_data
= BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info
));
3420 /* Condition reductions generate two reductions in the loop. */
3421 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == COND_REDUCTION
)
3424 /* Cost of reduction op inside loop. */
3425 unsigned inside_cost
= add_stmt_cost (target_cost_data
, ncopies
, vector_stmt
,
3426 stmt_info
, 0, vect_body
);
3427 stmt
= STMT_VINFO_STMT (stmt_info
);
3429 reduction_op
= get_reduction_op (stmt
, reduc_index
);
3431 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
3434 if (dump_enabled_p ())
3436 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3437 "unsupported data-type ");
3438 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3439 TREE_TYPE (reduction_op
));
3440 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3445 mode
= TYPE_MODE (vectype
);
3446 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3449 orig_stmt
= STMT_VINFO_STMT (stmt_info
);
3451 code
= gimple_assign_rhs_code (orig_stmt
);
3453 /* Add in cost for initial definition.
3454 For cond reduction we have four vectors: initial index, step, initial
3455 result of the data reduction, initial value of the index reduction. */
3456 int prologue_stmts
= STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
)
3457 == COND_REDUCTION
? 4 : 1;
3458 prologue_cost
+= add_stmt_cost (target_cost_data
, prologue_stmts
,
3459 scalar_to_vec
, stmt_info
, 0,
3462 /* Determine cost of epilogue code.
3464 We have a reduction operator that will reduce the vector in one statement.
3465 Also requires scalar extract. */
3467 if (!loop
|| !nested_in_vect_loop_p (loop
, orig_stmt
))
3469 if (reduc_code
!= ERROR_MARK
)
3471 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == COND_REDUCTION
)
3473 /* An EQ stmt and an COND_EXPR stmt. */
3474 epilogue_cost
+= add_stmt_cost (target_cost_data
, 2,
3475 vector_stmt
, stmt_info
, 0,
3477 /* Reduction of the max index and a reduction of the found
3479 epilogue_cost
+= add_stmt_cost (target_cost_data
, 2,
3480 vec_to_scalar
, stmt_info
, 0,
3482 /* A broadcast of the max value. */
3483 epilogue_cost
+= add_stmt_cost (target_cost_data
, 1,
3484 scalar_to_vec
, stmt_info
, 0,
3489 epilogue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
3490 stmt_info
, 0, vect_epilogue
);
3491 epilogue_cost
+= add_stmt_cost (target_cost_data
, 1,
3492 vec_to_scalar
, stmt_info
, 0,
3498 int vec_size_in_bits
= tree_to_uhwi (TYPE_SIZE (vectype
));
3500 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt
)));
3501 int element_bitsize
= tree_to_uhwi (bitsize
);
3502 int nelements
= vec_size_in_bits
/ element_bitsize
;
3504 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3506 /* We have a whole vector shift available. */
3507 if (VECTOR_MODE_P (mode
)
3508 && optab_handler (optab
, mode
) != CODE_FOR_nothing
3509 && have_whole_vector_shift (mode
))
3511 /* Final reduction via vector shifts and the reduction operator.
3512 Also requires scalar extract. */
3513 epilogue_cost
+= add_stmt_cost (target_cost_data
,
3514 exact_log2 (nelements
) * 2,
3515 vector_stmt
, stmt_info
, 0,
3517 epilogue_cost
+= add_stmt_cost (target_cost_data
, 1,
3518 vec_to_scalar
, stmt_info
, 0,
3522 /* Use extracts and reduction op for final reduction. For N
3523 elements, we have N extracts and N-1 reduction ops. */
3524 epilogue_cost
+= add_stmt_cost (target_cost_data
,
3525 nelements
+ nelements
- 1,
3526 vector_stmt
, stmt_info
, 0,
3531 if (dump_enabled_p ())
3532 dump_printf (MSG_NOTE
,
3533 "vect_model_reduction_cost: inside_cost = %d, "
3534 "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost
,
3535 prologue_cost
, epilogue_cost
);
3541 /* Function vect_model_induction_cost.
3543 Models cost for induction operations. */
3546 vect_model_induction_cost (stmt_vec_info stmt_info
, int ncopies
)
3548 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3549 void *target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
3550 unsigned inside_cost
, prologue_cost
;
3552 /* loop cost for vec_loop. */
3553 inside_cost
= add_stmt_cost (target_cost_data
, ncopies
, vector_stmt
,
3554 stmt_info
, 0, vect_body
);
3556 /* prologue cost for vec_init and vec_step. */
3557 prologue_cost
= add_stmt_cost (target_cost_data
, 2, scalar_to_vec
,
3558 stmt_info
, 0, vect_prologue
);
3560 if (dump_enabled_p ())
3561 dump_printf_loc (MSG_NOTE
, vect_location
,
3562 "vect_model_induction_cost: inside_cost = %d, "
3563 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
3567 /* Function get_initial_def_for_induction
3570 STMT - a stmt that performs an induction operation in the loop.
3571 IV_PHI - the initial value of the induction variable
3574 Return a vector variable, initialized with the first VF values of
3575 the induction variable. E.g., for an iv with IV_PHI='X' and
3576 evolution S, for a vector of 4 units, we want to return:
3577 [X, X + S, X + 2*S, X + 3*S]. */
3580 get_initial_def_for_induction (gimple
*iv_phi
)
3582 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (iv_phi
);
3583 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3584 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3587 edge pe
= loop_preheader_edge (loop
);
3588 struct loop
*iv_loop
;
3590 tree new_vec
, vec_init
, vec_step
, t
;
3593 gphi
*induction_phi
;
3594 tree induc_def
, vec_def
, vec_dest
;
3595 tree init_expr
, step_expr
;
3596 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3600 stmt_vec_info phi_info
= vinfo_for_stmt (iv_phi
);
3601 bool nested_in_vect_loop
= false;
3603 imm_use_iterator imm_iter
;
3604 use_operand_p use_p
;
3608 gimple_stmt_iterator si
;
3609 basic_block bb
= gimple_bb (iv_phi
);
3613 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
3614 if (nested_in_vect_loop_p (loop
, iv_phi
))
3616 nested_in_vect_loop
= true;
3617 iv_loop
= loop
->inner
;
3621 gcc_assert (iv_loop
== (gimple_bb (iv_phi
))->loop_father
);
3623 latch_e
= loop_latch_edge (iv_loop
);
3624 loop_arg
= PHI_ARG_DEF_FROM_EDGE (iv_phi
, latch_e
);
3626 step_expr
= STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info
);
3627 gcc_assert (step_expr
!= NULL_TREE
);
3629 pe
= loop_preheader_edge (iv_loop
);
3630 init_expr
= PHI_ARG_DEF_FROM_EDGE (iv_phi
,
3631 loop_preheader_edge (iv_loop
));
3633 vectype
= get_vectype_for_scalar_type (TREE_TYPE (init_expr
));
3634 resvectype
= get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi
)));
3635 gcc_assert (vectype
);
3636 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3637 ncopies
= vf
/ nunits
;
3639 gcc_assert (phi_info
);
3640 gcc_assert (ncopies
>= 1);
3642 /* Convert the step to the desired type. */
3644 step_expr
= gimple_convert (&stmts
, TREE_TYPE (vectype
), step_expr
);
3647 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3648 gcc_assert (!new_bb
);
3651 /* Find the first insertion point in the BB. */
3652 si
= gsi_after_labels (bb
);
3654 /* Create the vector that holds the initial_value of the induction. */
3655 if (nested_in_vect_loop
)
3657 /* iv_loop is nested in the loop to be vectorized. init_expr had already
3658 been created during vectorization of previous stmts. We obtain it
3659 from the STMT_VINFO_VEC_STMT of the defining stmt. */
3660 vec_init
= vect_get_vec_def_for_operand (init_expr
, iv_phi
);
3661 /* If the initial value is not of proper type, convert it. */
3662 if (!useless_type_conversion_p (vectype
, TREE_TYPE (vec_init
)))
3665 = gimple_build_assign (vect_get_new_ssa_name (vectype
,
3669 build1 (VIEW_CONVERT_EXPR
, vectype
,
3671 vec_init
= gimple_assign_lhs (new_stmt
);
3672 new_bb
= gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop
),
3674 gcc_assert (!new_bb
);
3675 set_vinfo_for_stmt (new_stmt
,
3676 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3681 vec
<constructor_elt
, va_gc
> *v
;
3683 /* iv_loop is the loop to be vectorized. Create:
3684 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
3686 new_name
= gimple_convert (&stmts
, TREE_TYPE (vectype
), init_expr
);
3688 vec_alloc (v
, nunits
);
3689 bool constant_p
= is_gimple_min_invariant (new_name
);
3690 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, new_name
);
3691 for (i
= 1; i
< nunits
; i
++)
3693 /* Create: new_name_i = new_name + step_expr */
3694 new_name
= gimple_build (&stmts
, PLUS_EXPR
, TREE_TYPE (new_name
),
3695 new_name
, step_expr
);
3696 if (!is_gimple_min_invariant (new_name
))
3698 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, new_name
);
3702 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3703 gcc_assert (!new_bb
);
3706 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
3708 new_vec
= build_vector_from_ctor (vectype
, v
);
3710 new_vec
= build_constructor (vectype
, v
);
3711 vec_init
= vect_init_vector (iv_phi
, new_vec
, vectype
, NULL
);
3715 /* Create the vector that holds the step of the induction. */
3716 if (nested_in_vect_loop
)
3717 /* iv_loop is nested in the loop to be vectorized. Generate:
3718 vec_step = [S, S, S, S] */
3719 new_name
= step_expr
;
3722 /* iv_loop is the loop to be vectorized. Generate:
3723 vec_step = [VF*S, VF*S, VF*S, VF*S] */
3724 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr
)))
3726 expr
= build_int_cst (integer_type_node
, vf
);
3727 expr
= fold_convert (TREE_TYPE (step_expr
), expr
);
3730 expr
= build_int_cst (TREE_TYPE (step_expr
), vf
);
3731 new_name
= fold_build2 (MULT_EXPR
, TREE_TYPE (step_expr
),
3733 if (TREE_CODE (step_expr
) == SSA_NAME
)
3734 new_name
= vect_init_vector (iv_phi
, new_name
,
3735 TREE_TYPE (step_expr
), NULL
);
3738 t
= unshare_expr (new_name
);
3739 gcc_assert (CONSTANT_CLASS_P (new_name
)
3740 || TREE_CODE (new_name
) == SSA_NAME
);
3741 stepvectype
= get_vectype_for_scalar_type (TREE_TYPE (new_name
));
3742 gcc_assert (stepvectype
);
3743 new_vec
= build_vector_from_val (stepvectype
, t
);
3744 vec_step
= vect_init_vector (iv_phi
, new_vec
, stepvectype
, NULL
);
3747 /* Create the following def-use cycle:
3752 vec_iv = PHI <vec_init, vec_loop>
3756 vec_loop = vec_iv + vec_step; */
3758 /* Create the induction-phi that defines the induction-operand. */
3759 vec_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, "vec_iv_");
3760 induction_phi
= create_phi_node (vec_dest
, iv_loop
->header
);
3761 set_vinfo_for_stmt (induction_phi
,
3762 new_stmt_vec_info (induction_phi
, loop_vinfo
));
3763 induc_def
= PHI_RESULT (induction_phi
);
3765 /* Create the iv update inside the loop */
3766 new_stmt
= gimple_build_assign (vec_dest
, PLUS_EXPR
, induc_def
, vec_step
);
3767 vec_def
= make_ssa_name (vec_dest
, new_stmt
);
3768 gimple_assign_set_lhs (new_stmt
, vec_def
);
3769 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
3770 set_vinfo_for_stmt (new_stmt
, new_stmt_vec_info (new_stmt
, loop_vinfo
));
3772 /* Set the arguments of the phi node: */
3773 add_phi_arg (induction_phi
, vec_init
, pe
, UNKNOWN_LOCATION
);
3774 add_phi_arg (induction_phi
, vec_def
, loop_latch_edge (iv_loop
),
3778 /* In case that vectorization factor (VF) is bigger than the number
3779 of elements that we can fit in a vectype (nunits), we have to generate
3780 more than one vector stmt - i.e - we need to "unroll" the
3781 vector stmt by a factor VF/nunits. For more details see documentation
3782 in vectorizable_operation. */
3786 stmt_vec_info prev_stmt_vinfo
;
3787 /* FORNOW. This restriction should be relaxed. */
3788 gcc_assert (!nested_in_vect_loop
);
3790 /* Create the vector that holds the step of the induction. */
3791 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr
)))
3793 expr
= build_int_cst (integer_type_node
, nunits
);
3794 expr
= fold_convert (TREE_TYPE (step_expr
), expr
);
3797 expr
= build_int_cst (TREE_TYPE (step_expr
), nunits
);
3798 new_name
= fold_build2 (MULT_EXPR
, TREE_TYPE (step_expr
),
3800 if (TREE_CODE (step_expr
) == SSA_NAME
)
3801 new_name
= vect_init_vector (iv_phi
, new_name
,
3802 TREE_TYPE (step_expr
), NULL
);
3803 t
= unshare_expr (new_name
);
3804 gcc_assert (CONSTANT_CLASS_P (new_name
)
3805 || TREE_CODE (new_name
) == SSA_NAME
);
3806 new_vec
= build_vector_from_val (stepvectype
, t
);
3807 vec_step
= vect_init_vector (iv_phi
, new_vec
, stepvectype
, NULL
);
3809 vec_def
= induc_def
;
3810 prev_stmt_vinfo
= vinfo_for_stmt (induction_phi
);
3811 for (i
= 1; i
< ncopies
; i
++)
3813 /* vec_i = vec_prev + vec_step */
3814 new_stmt
= gimple_build_assign (vec_dest
, PLUS_EXPR
,
3816 vec_def
= make_ssa_name (vec_dest
, new_stmt
);
3817 gimple_assign_set_lhs (new_stmt
, vec_def
);
3819 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
3820 if (!useless_type_conversion_p (resvectype
, vectype
))
3823 = gimple_build_assign
3824 (vect_get_new_vect_var (resvectype
, vect_simple_var
,
3827 build1 (VIEW_CONVERT_EXPR
, resvectype
,
3828 gimple_assign_lhs (new_stmt
)));
3829 gimple_assign_set_lhs (new_stmt
,
3831 (gimple_assign_lhs (new_stmt
), new_stmt
));
3832 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
3834 set_vinfo_for_stmt (new_stmt
,
3835 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3836 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo
) = new_stmt
;
3837 prev_stmt_vinfo
= vinfo_for_stmt (new_stmt
);
3841 if (nested_in_vect_loop
)
3843 /* Find the loop-closed exit-phi of the induction, and record
3844 the final vector of induction results: */
3846 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, loop_arg
)
3848 gimple
*use_stmt
= USE_STMT (use_p
);
3849 if (is_gimple_debug (use_stmt
))
3852 if (!flow_bb_inside_loop_p (iv_loop
, gimple_bb (use_stmt
)))
3854 exit_phi
= use_stmt
;
3860 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (exit_phi
);
3861 /* FORNOW. Currently not supporting the case that an inner-loop induction
3862 is not used in the outer-loop (i.e. only outside the outer-loop). */
3863 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo
)
3864 && !STMT_VINFO_LIVE_P (stmt_vinfo
));
3866 STMT_VINFO_VEC_STMT (stmt_vinfo
) = new_stmt
;
3867 if (dump_enabled_p ())
3869 dump_printf_loc (MSG_NOTE
, vect_location
,
3870 "vector of inductions after inner-loop:");
3871 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
3872 dump_printf (MSG_NOTE
, "\n");
3878 if (dump_enabled_p ())
3880 dump_printf_loc (MSG_NOTE
, vect_location
,
3881 "transform induction: created def-use cycle: ");
3882 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, induction_phi
, 0);
3883 dump_printf (MSG_NOTE
, "\n");
3884 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
3885 SSA_NAME_DEF_STMT (vec_def
), 0);
3886 dump_printf (MSG_NOTE
, "\n");
3889 STMT_VINFO_VEC_STMT (phi_info
) = induction_phi
;
3890 if (!useless_type_conversion_p (resvectype
, vectype
))
3892 new_stmt
= gimple_build_assign (vect_get_new_vect_var (resvectype
,
3896 build1 (VIEW_CONVERT_EXPR
, resvectype
,
3898 induc_def
= make_ssa_name (gimple_assign_lhs (new_stmt
), new_stmt
);
3899 gimple_assign_set_lhs (new_stmt
, induc_def
);
3900 si
= gsi_after_labels (bb
);
3901 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
3902 set_vinfo_for_stmt (new_stmt
,
3903 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3904 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt
))
3905 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi
));
3912 /* Function get_initial_def_for_reduction
3915 STMT - a stmt that performs a reduction operation in the loop.
3916 INIT_VAL - the initial value of the reduction variable
3919 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3920 of the reduction (used for adjusting the epilog - see below).
3921 Return a vector variable, initialized according to the operation that STMT
3922 performs. This vector will be used as the initial value of the
3923 vector of partial results.
3925 Option1 (adjust in epilog): Initialize the vector as follows:
3926 add/bit or/xor: [0,0,...,0,0]
3927 mult/bit and: [1,1,...,1,1]
3928 min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3929 and when necessary (e.g. add/mult case) let the caller know
3930 that it needs to adjust the result by init_val.
3932 Option2: Initialize the vector as follows:
3933 add/bit or/xor: [init_val,0,0,...,0]
3934 mult/bit and: [init_val,1,1,...,1]
3935 min/max/cond_expr: [init_val,init_val,...,init_val]
3936 and no adjustments are needed.
3938 For example, for the following code:
3944 STMT is 's = s + a[i]', and the reduction variable is 's'.
3945 For a vector of 4 units, we want to return either [0,0,0,init_val],
3946 or [0,0,0,0] and let the caller know that it needs to adjust
3947 the result at the end by 'init_val'.
3949 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3950 initialization vector is simpler (same element in all entries), if
3951 ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3953 A cost model should help decide between these two schemes. */
3956 get_initial_def_for_reduction (gimple
*stmt
, tree init_val
,
3957 tree
*adjustment_def
)
3959 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3960 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3961 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3962 tree scalar_type
= TREE_TYPE (init_val
);
3963 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
3965 enum tree_code code
= gimple_assign_rhs_code (stmt
);
3970 bool nested_in_vect_loop
= false;
3972 REAL_VALUE_TYPE real_init_val
= dconst0
;
3973 int int_init_val
= 0;
3974 gimple
*def_stmt
= NULL
;
3976 gcc_assert (vectype
);
3977 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3979 gcc_assert (POINTER_TYPE_P (scalar_type
) || INTEGRAL_TYPE_P (scalar_type
)
3980 || SCALAR_FLOAT_TYPE_P (scalar_type
));
3982 if (nested_in_vect_loop_p (loop
, stmt
))
3983 nested_in_vect_loop
= true;
3985 gcc_assert (loop
== (gimple_bb (stmt
))->loop_father
);
3987 /* In case of double reduction we only create a vector variable to be put
3988 in the reduction phi node. The actual statement creation is done in
3989 vect_create_epilog_for_reduction. */
3990 if (adjustment_def
&& nested_in_vect_loop
3991 && TREE_CODE (init_val
) == SSA_NAME
3992 && (def_stmt
= SSA_NAME_DEF_STMT (init_val
))
3993 && gimple_code (def_stmt
) == GIMPLE_PHI
3994 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
3995 && vinfo_for_stmt (def_stmt
)
3996 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
3997 == vect_double_reduction_def
)
3999 *adjustment_def
= NULL
;
4000 return vect_create_destination_var (init_val
, vectype
);
4003 if (TREE_CONSTANT (init_val
))
4005 if (SCALAR_FLOAT_TYPE_P (scalar_type
))
4006 init_value
= build_real (scalar_type
, TREE_REAL_CST (init_val
));
4008 init_value
= build_int_cst (scalar_type
, TREE_INT_CST_LOW (init_val
));
4011 init_value
= init_val
;
4015 case WIDEN_SUM_EXPR
:
4024 /* ADJUSMENT_DEF is NULL when called from
4025 vect_create_epilog_for_reduction to vectorize double reduction. */
4028 if (nested_in_vect_loop
)
4029 *adjustment_def
= vect_get_vec_def_for_operand (init_val
, stmt
);
4031 *adjustment_def
= init_val
;
4034 if (code
== MULT_EXPR
)
4036 real_init_val
= dconst1
;
4040 if (code
== BIT_AND_EXPR
)
4043 if (SCALAR_FLOAT_TYPE_P (scalar_type
))
4044 def_for_init
= build_real (scalar_type
, real_init_val
);
4046 def_for_init
= build_int_cst (scalar_type
, int_init_val
);
4048 /* Create a vector of '0' or '1' except the first element. */
4049 elts
= XALLOCAVEC (tree
, nunits
);
4050 for (i
= nunits
- 2; i
>= 0; --i
)
4051 elts
[i
+ 1] = def_for_init
;
4053 /* Option1: the first element is '0' or '1' as well. */
4056 elts
[0] = def_for_init
;
4057 init_def
= build_vector (vectype
, elts
);
4061 /* Option2: the first element is INIT_VAL. */
4063 if (TREE_CONSTANT (init_val
))
4064 init_def
= build_vector (vectype
, elts
);
4067 vec
<constructor_elt
, va_gc
> *v
;
4068 vec_alloc (v
, nunits
);
4069 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, init_val
);
4070 for (i
= 1; i
< nunits
; ++i
)
4071 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[i
]);
4072 init_def
= build_constructor (vectype
, v
);
4082 *adjustment_def
= NULL_TREE
;
4083 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo
) != COND_REDUCTION
)
4085 init_def
= vect_get_vec_def_for_operand (init_val
, stmt
);
4089 init_def
= build_vector_from_val (vectype
, init_value
);
4099 /* Function vect_create_epilog_for_reduction
4101 Create code at the loop-epilog to finalize the result of a reduction
4104 VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
4105 reduction statements.
4106 STMT is the scalar reduction stmt that is being vectorized.
4107 NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
4108 number of elements that we can fit in a vectype (nunits). In this case
4109 we have to generate more than one vector stmt - i.e - we need to "unroll"
4110 the vector stmt by a factor VF/nunits. For more details see documentation
4111 in vectorizable_operation.
4112 REDUC_CODE is the tree-code for the epilog reduction.
4113 REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
4115 REDUC_INDEX is the index of the operand in the right hand side of the
4116 statement that is defined by REDUCTION_PHI.
4117 DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
4118 SLP_NODE is an SLP node containing a group of reduction statements. The
4119 first one in this group is STMT.
4120 INDUCTION_INDEX is the index of the loop for condition reductions.
4121 Otherwise it is undefined.
4124 1. Creates the reduction def-use cycles: sets the arguments for
4126 The loop-entry argument is the vectorized initial-value of the reduction.
4127 The loop-latch argument is taken from VECT_DEFS - the vector of partial
4129 2. "Reduces" each vector of partial results VECT_DEFS into a single result,
4130 by applying the operation specified by REDUC_CODE if available, or by
4131 other means (whole-vector shifts or a scalar loop).
4132 The function also creates a new phi node at the loop exit to preserve
4133 loop-closed form, as illustrated below.
4135 The flow at the entry to this function:
4138 vec_def = phi <null, null> # REDUCTION_PHI
4139 VECT_DEF = vector_stmt # vectorized form of STMT
4140 s_loop = scalar_stmt # (scalar) STMT
4142 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4146 The above is transformed by this function into:
4149 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
4150 VECT_DEF = vector_stmt # vectorized form of STMT
4151 s_loop = scalar_stmt # (scalar) STMT
4153 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4154 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4155 v_out2 = reduce <v_out1>
4156 s_out3 = extract_field <v_out2, 0>
4157 s_out4 = adjust_result <s_out3>
4163 vect_create_epilog_for_reduction (vec
<tree
> vect_defs
, gimple
*stmt
,
4164 int ncopies
, enum tree_code reduc_code
,
4165 vec
<gimple
*> reduction_phis
,
4166 int reduc_index
, bool double_reduc
,
4167 slp_tree slp_node
, tree induction_index
)
4169 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4170 stmt_vec_info prev_phi_info
;
4173 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4174 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
), *outer_loop
= NULL
;
4175 basic_block exit_bb
;
4178 gimple
*new_phi
= NULL
, *phi
;
4179 gimple_stmt_iterator exit_gsi
;
4181 tree new_temp
= NULL_TREE
, new_dest
, new_name
, new_scalar_dest
;
4182 gimple
*epilog_stmt
= NULL
;
4183 enum tree_code code
= gimple_assign_rhs_code (stmt
);
4186 tree adjustment_def
= NULL
;
4187 tree vec_initial_def
= NULL
;
4188 tree reduction_op
, expr
, def
, initial_def
= NULL
;
4189 tree orig_name
, scalar_result
;
4190 imm_use_iterator imm_iter
, phi_imm_iter
;
4191 use_operand_p use_p
, phi_use_p
;
4192 gimple
*use_stmt
, *orig_stmt
, *reduction_phi
= NULL
;
4193 bool nested_in_vect_loop
= false;
4194 auto_vec
<gimple
*> new_phis
;
4195 auto_vec
<gimple
*> inner_phis
;
4196 enum vect_def_type dt
= vect_unknown_def_type
;
4198 auto_vec
<tree
> scalar_results
;
4199 unsigned int group_size
= 1, k
, ratio
;
4200 auto_vec
<tree
> vec_initial_defs
;
4201 auto_vec
<gimple
*> phis
;
4202 bool slp_reduc
= false;
4203 tree new_phi_result
;
4204 gimple
*inner_phi
= NULL
;
4207 group_size
= SLP_TREE_SCALAR_STMTS (slp_node
).length ();
4209 if (nested_in_vect_loop_p (loop
, stmt
))
4213 nested_in_vect_loop
= true;
4214 gcc_assert (!slp_node
);
4217 reduction_op
= get_reduction_op (stmt
, reduc_index
);
4219 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
4220 gcc_assert (vectype
);
4221 mode
= TYPE_MODE (vectype
);
4223 /* 1. Create the reduction def-use cycle:
4224 Set the arguments of REDUCTION_PHIS, i.e., transform
4227 vec_def = phi <null, null> # REDUCTION_PHI
4228 VECT_DEF = vector_stmt # vectorized form of STMT
4234 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
4235 VECT_DEF = vector_stmt # vectorized form of STMT
4238 (in case of SLP, do it for all the phis). */
4240 /* Get the loop-entry arguments. */
4242 vect_get_vec_defs (reduction_op
, NULL_TREE
, stmt
, &vec_initial_defs
,
4243 NULL
, slp_node
, reduc_index
);
4246 /* Get at the scalar def before the loop, that defines the initial value
4247 of the reduction variable. */
4248 gimple
*def_stmt
= SSA_NAME_DEF_STMT (reduction_op
);
4249 initial_def
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
4250 loop_preheader_edge (loop
));
4251 vec_initial_defs
.create (1);
4252 vec_initial_def
= get_initial_def_for_reduction (stmt
, initial_def
,
4254 vec_initial_defs
.quick_push (vec_initial_def
);
4257 /* Set phi nodes arguments. */
4258 FOR_EACH_VEC_ELT (reduction_phis
, i
, phi
)
4260 tree vec_init_def
, def
;
4262 vec_init_def
= force_gimple_operand (vec_initial_defs
[i
], &stmts
,
4264 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
4266 for (j
= 0; j
< ncopies
; j
++)
4268 /* Set the loop-entry arg of the reduction-phi. */
4270 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
)
4271 == INTEGER_INDUC_COND_REDUCTION
)
4273 /* Initialise the reduction phi to zero. This prevents initial
4274 values of non-zero interferring with the reduction op. */
4275 gcc_assert (ncopies
== 1);
4276 gcc_assert (i
== 0);
4278 tree vec_init_def_type
= TREE_TYPE (vec_init_def
);
4279 tree zero_vec
= build_zero_cst (vec_init_def_type
);
4281 add_phi_arg (as_a
<gphi
*> (phi
), zero_vec
,
4282 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4285 add_phi_arg (as_a
<gphi
*> (phi
), vec_init_def
,
4286 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4288 /* Set the loop-latch arg for the reduction-phi. */
4290 def
= vect_get_vec_def_for_stmt_copy (vect_unknown_def_type
, def
);
4292 add_phi_arg (as_a
<gphi
*> (phi
), def
, loop_latch_edge (loop
),
4295 if (dump_enabled_p ())
4297 dump_printf_loc (MSG_NOTE
, vect_location
,
4298 "transform reduction: created def-use cycle: ");
4299 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
4300 dump_printf (MSG_NOTE
, "\n");
4301 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (def
), 0);
4302 dump_printf (MSG_NOTE
, "\n");
4305 phi
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi
));
4309 /* 2. Create epilog code.
4310 The reduction epilog code operates across the elements of the vector
4311 of partial results computed by the vectorized loop.
4312 The reduction epilog code consists of:
4314 step 1: compute the scalar result in a vector (v_out2)
4315 step 2: extract the scalar result (s_out3) from the vector (v_out2)
4316 step 3: adjust the scalar result (s_out3) if needed.
4318 Step 1 can be accomplished using one the following three schemes:
4319 (scheme 1) using reduc_code, if available.
4320 (scheme 2) using whole-vector shifts, if available.
4321 (scheme 3) using a scalar loop. In this case steps 1+2 above are
4324 The overall epilog code looks like this:
4326 s_out0 = phi <s_loop> # original EXIT_PHI
4327 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4328 v_out2 = reduce <v_out1> # step 1
4329 s_out3 = extract_field <v_out2, 0> # step 2
4330 s_out4 = adjust_result <s_out3> # step 3
4332 (step 3 is optional, and steps 1 and 2 may be combined).
4333 Lastly, the uses of s_out0 are replaced by s_out4. */
4336 /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
4337 v_out1 = phi <VECT_DEF>
4338 Store them in NEW_PHIS. */
4340 exit_bb
= single_exit (loop
)->dest
;
4341 prev_phi_info
= NULL
;
4342 new_phis
.create (vect_defs
.length ());
4343 FOR_EACH_VEC_ELT (vect_defs
, i
, def
)
4345 for (j
= 0; j
< ncopies
; j
++)
4347 tree new_def
= copy_ssa_name (def
);
4348 phi
= create_phi_node (new_def
, exit_bb
);
4349 set_vinfo_for_stmt (phi
, new_stmt_vec_info (phi
, loop_vinfo
));
4351 new_phis
.quick_push (phi
);
4354 def
= vect_get_vec_def_for_stmt_copy (dt
, def
);
4355 STMT_VINFO_RELATED_STMT (prev_phi_info
) = phi
;
4358 SET_PHI_ARG_DEF (phi
, single_exit (loop
)->dest_idx
, def
);
4359 prev_phi_info
= vinfo_for_stmt (phi
);
4363 /* The epilogue is created for the outer-loop, i.e., for the loop being
4364 vectorized. Create exit phis for the outer loop. */
4368 exit_bb
= single_exit (loop
)->dest
;
4369 inner_phis
.create (vect_defs
.length ());
4370 FOR_EACH_VEC_ELT (new_phis
, i
, phi
)
4372 tree new_result
= copy_ssa_name (PHI_RESULT (phi
));
4373 gphi
*outer_phi
= create_phi_node (new_result
, exit_bb
);
4374 SET_PHI_ARG_DEF (outer_phi
, single_exit (loop
)->dest_idx
,
4376 set_vinfo_for_stmt (outer_phi
, new_stmt_vec_info (outer_phi
,
4378 inner_phis
.quick_push (phi
);
4379 new_phis
[i
] = outer_phi
;
4380 prev_phi_info
= vinfo_for_stmt (outer_phi
);
4381 while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi
)))
4383 phi
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi
));
4384 new_result
= copy_ssa_name (PHI_RESULT (phi
));
4385 outer_phi
= create_phi_node (new_result
, exit_bb
);
4386 SET_PHI_ARG_DEF (outer_phi
, single_exit (loop
)->dest_idx
,
4388 set_vinfo_for_stmt (outer_phi
, new_stmt_vec_info (outer_phi
,
4390 STMT_VINFO_RELATED_STMT (prev_phi_info
) = outer_phi
;
4391 prev_phi_info
= vinfo_for_stmt (outer_phi
);
4396 exit_gsi
= gsi_after_labels (exit_bb
);
4398 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
4399 (i.e. when reduc_code is not available) and in the final adjustment
4400 code (if needed). Also get the original scalar reduction variable as
4401 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
4402 represents a reduction pattern), the tree-code and scalar-def are
4403 taken from the original stmt that the pattern-stmt (STMT) replaces.
4404 Otherwise (it is a regular reduction) - the tree-code and scalar-def
4405 are taken from STMT. */
4407 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
4410 /* Regular reduction */
4415 /* Reduction pattern */
4416 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt
);
4417 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
));
4418 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
4421 code
= gimple_assign_rhs_code (orig_stmt
);
4422 /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4423 partial results are added and not subtracted. */
4424 if (code
== MINUS_EXPR
)
4427 scalar_dest
= gimple_assign_lhs (orig_stmt
);
4428 scalar_type
= TREE_TYPE (scalar_dest
);
4429 scalar_results
.create (group_size
);
4430 new_scalar_dest
= vect_create_destination_var (scalar_dest
, NULL
);
4431 bitsize
= TYPE_SIZE (scalar_type
);
4433 /* In case this is a reduction in an inner-loop while vectorizing an outer
4434 loop - we don't need to extract a single scalar result at the end of the
4435 inner-loop (unless it is double reduction, i.e., the use of reduction is
4436 outside the outer-loop). The final vector of partial results will be used
4437 in the vectorized outer-loop, or reduced to a scalar result at the end of
4439 if (nested_in_vect_loop
&& !double_reduc
)
4440 goto vect_finalize_reduction
;
4442 /* SLP reduction without reduction chain, e.g.,
4446 b2 = operation (b1) */
4447 slp_reduc
= (slp_node
&& !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)));
4449 /* In case of reduction chain, e.g.,
4452 a3 = operation (a2),
4454 we may end up with more than one vector result. Here we reduce them to
4456 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
4458 tree first_vect
= PHI_RESULT (new_phis
[0]);
4460 gassign
*new_vec_stmt
= NULL
;
4462 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4463 for (k
= 1; k
< new_phis
.length (); k
++)
4465 gimple
*next_phi
= new_phis
[k
];
4466 tree second_vect
= PHI_RESULT (next_phi
);
4468 tmp
= build2 (code
, vectype
, first_vect
, second_vect
);
4469 new_vec_stmt
= gimple_build_assign (vec_dest
, tmp
);
4470 first_vect
= make_ssa_name (vec_dest
, new_vec_stmt
);
4471 gimple_assign_set_lhs (new_vec_stmt
, first_vect
);
4472 gsi_insert_before (&exit_gsi
, new_vec_stmt
, GSI_SAME_STMT
);
4475 new_phi_result
= first_vect
;
4478 new_phis
.truncate (0);
4479 new_phis
.safe_push (new_vec_stmt
);
4483 new_phi_result
= PHI_RESULT (new_phis
[0]);
4485 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == COND_REDUCTION
)
4487 /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
4488 various data values where the condition matched and another vector
4489 (INDUCTION_INDEX) containing all the indexes of those matches. We
4490 need to extract the last matching index (which will be the index with
4491 highest value) and use this to index into the data vector.
4492 For the case where there were no matches, the data vector will contain
4493 all default values and the index vector will be all zeros. */
4495 /* Get various versions of the type of the vector of indexes. */
4496 tree index_vec_type
= TREE_TYPE (induction_index
);
4497 gcc_checking_assert (TYPE_UNSIGNED (index_vec_type
));
4498 tree index_scalar_type
= TREE_TYPE (index_vec_type
);
4499 tree index_vec_cmp_type
= build_same_sized_truth_vector_type
4502 /* Get an unsigned integer version of the type of the data vector. */
4503 int scalar_precision
= GET_MODE_PRECISION (TYPE_MODE (scalar_type
));
4504 tree scalar_type_unsigned
= make_unsigned_type (scalar_precision
);
4505 tree vectype_unsigned
= build_vector_type
4506 (scalar_type_unsigned
, TYPE_VECTOR_SUBPARTS (vectype
));
4508 /* First we need to create a vector (ZERO_VEC) of zeros and another
4509 vector (MAX_INDEX_VEC) filled with the last matching index, which we
4510 can create using a MAX reduction and then expanding.
4511 In the case where the loop never made any matches, the max index will
4514 /* Vector of {0, 0, 0,...}. */
4515 tree zero_vec
= make_ssa_name (vectype
);
4516 tree zero_vec_rhs
= build_zero_cst (vectype
);
4517 gimple
*zero_vec_stmt
= gimple_build_assign (zero_vec
, zero_vec_rhs
);
4518 gsi_insert_before (&exit_gsi
, zero_vec_stmt
, GSI_SAME_STMT
);
4520 /* Find maximum value from the vector of found indexes. */
4521 tree max_index
= make_ssa_name (index_scalar_type
);
4522 gimple
*max_index_stmt
= gimple_build_assign (max_index
, REDUC_MAX_EXPR
,
4524 gsi_insert_before (&exit_gsi
, max_index_stmt
, GSI_SAME_STMT
);
4526 /* Vector of {max_index, max_index, max_index,...}. */
4527 tree max_index_vec
= make_ssa_name (index_vec_type
);
4528 tree max_index_vec_rhs
= build_vector_from_val (index_vec_type
,
4530 gimple
*max_index_vec_stmt
= gimple_build_assign (max_index_vec
,
4532 gsi_insert_before (&exit_gsi
, max_index_vec_stmt
, GSI_SAME_STMT
);
4534 /* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes
4535 with the vector (INDUCTION_INDEX) of found indexes, choosing values
4536 from the data vector (NEW_PHI_RESULT) for matches, 0 (ZERO_VEC)
4537 otherwise. Only one value should match, resulting in a vector
4538 (VEC_COND) with one data value and the rest zeros.
4539 In the case where the loop never made any matches, every index will
4540 match, resulting in a vector with all data values (which will all be
4541 the default value). */
4543 /* Compare the max index vector to the vector of found indexes to find
4544 the position of the max value. */
4545 tree vec_compare
= make_ssa_name (index_vec_cmp_type
);
4546 gimple
*vec_compare_stmt
= gimple_build_assign (vec_compare
, EQ_EXPR
,
4549 gsi_insert_before (&exit_gsi
, vec_compare_stmt
, GSI_SAME_STMT
);
4551 /* Use the compare to choose either values from the data vector or
4553 tree vec_cond
= make_ssa_name (vectype
);
4554 gimple
*vec_cond_stmt
= gimple_build_assign (vec_cond
, VEC_COND_EXPR
,
4555 vec_compare
, new_phi_result
,
4557 gsi_insert_before (&exit_gsi
, vec_cond_stmt
, GSI_SAME_STMT
);
4559 /* Finally we need to extract the data value from the vector (VEC_COND)
4560 into a scalar (MATCHED_DATA_REDUC). Logically we want to do a OR
4561 reduction, but because this doesn't exist, we can use a MAX reduction
4562 instead. The data value might be signed or a float so we need to cast
4564 In the case where the loop never made any matches, the data values are
4565 all identical, and so will reduce down correctly. */
4567 /* Make the matched data values unsigned. */
4568 tree vec_cond_cast
= make_ssa_name (vectype_unsigned
);
4569 tree vec_cond_cast_rhs
= build1 (VIEW_CONVERT_EXPR
, vectype_unsigned
,
4571 gimple
*vec_cond_cast_stmt
= gimple_build_assign (vec_cond_cast
,
4574 gsi_insert_before (&exit_gsi
, vec_cond_cast_stmt
, GSI_SAME_STMT
);
4576 /* Reduce down to a scalar value. */
4577 tree data_reduc
= make_ssa_name (scalar_type_unsigned
);
4578 optab ot
= optab_for_tree_code (REDUC_MAX_EXPR
, vectype_unsigned
,
4580 gcc_assert (optab_handler (ot
, TYPE_MODE (vectype_unsigned
))
4581 != CODE_FOR_nothing
);
4582 gimple
*data_reduc_stmt
= gimple_build_assign (data_reduc
,
4585 gsi_insert_before (&exit_gsi
, data_reduc_stmt
, GSI_SAME_STMT
);
4587 /* Convert the reduced value back to the result type and set as the
4589 tree data_reduc_cast
= build1 (VIEW_CONVERT_EXPR
, scalar_type
,
4591 epilog_stmt
= gimple_build_assign (new_scalar_dest
, data_reduc_cast
);
4592 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
4593 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4594 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4595 scalar_results
.safe_push (new_temp
);
4598 /* 2.3 Create the reduction code, using one of the three schemes described
4599 above. In SLP we simply need to extract all the elements from the
4600 vector (without reducing them), so we use scalar shifts. */
4601 else if (reduc_code
!= ERROR_MARK
&& !slp_reduc
)
4606 /*** Case 1: Create:
4607 v_out2 = reduc_expr <v_out1> */
4609 if (dump_enabled_p ())
4610 dump_printf_loc (MSG_NOTE
, vect_location
,
4611 "Reduce using direct vector reduction.\n");
4613 vec_elem_type
= TREE_TYPE (TREE_TYPE (new_phi_result
));
4614 if (!useless_type_conversion_p (scalar_type
, vec_elem_type
))
4617 vect_create_destination_var (scalar_dest
, vec_elem_type
);
4618 tmp
= build1 (reduc_code
, vec_elem_type
, new_phi_result
);
4619 epilog_stmt
= gimple_build_assign (tmp_dest
, tmp
);
4620 new_temp
= make_ssa_name (tmp_dest
, epilog_stmt
);
4621 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4622 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4624 tmp
= build1 (NOP_EXPR
, scalar_type
, new_temp
);
4627 tmp
= build1 (reduc_code
, scalar_type
, new_phi_result
);
4629 epilog_stmt
= gimple_build_assign (new_scalar_dest
, tmp
);
4630 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
4631 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4632 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4634 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
)
4635 == INTEGER_INDUC_COND_REDUCTION
)
4637 /* Earlier we set the initial value to be zero. Check the result
4638 and if it is zero then replace with the original initial
4640 tree zero
= build_zero_cst (scalar_type
);
4641 tree zcompare
= build2 (EQ_EXPR
, boolean_type_node
, new_temp
, zero
);
4643 tmp
= make_ssa_name (new_scalar_dest
);
4644 epilog_stmt
= gimple_build_assign (tmp
, COND_EXPR
, zcompare
,
4645 initial_def
, new_temp
);
4646 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4650 scalar_results
.safe_push (new_temp
);
4654 bool reduce_with_shift
= have_whole_vector_shift (mode
);
4655 int element_bitsize
= tree_to_uhwi (bitsize
);
4656 int vec_size_in_bits
= tree_to_uhwi (TYPE_SIZE (vectype
));
4659 /* Regardless of whether we have a whole vector shift, if we're
4660 emulating the operation via tree-vect-generic, we don't want
4661 to use it. Only the first round of the reduction is likely
4662 to still be profitable via emulation. */
4663 /* ??? It might be better to emit a reduction tree code here, so that
4664 tree-vect-generic can expand the first round via bit tricks. */
4665 if (!VECTOR_MODE_P (mode
))
4666 reduce_with_shift
= false;
4669 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4670 if (optab_handler (optab
, mode
) == CODE_FOR_nothing
)
4671 reduce_with_shift
= false;
4674 if (reduce_with_shift
&& !slp_reduc
)
4676 int nelements
= vec_size_in_bits
/ element_bitsize
;
4677 unsigned char *sel
= XALLOCAVEC (unsigned char, nelements
);
4681 tree zero_vec
= build_zero_cst (vectype
);
4682 /*** Case 2: Create:
4683 for (offset = nelements/2; offset >= 1; offset/=2)
4685 Create: va' = vec_shift <va, offset>
4686 Create: va = vop <va, va'>
4691 if (dump_enabled_p ())
4692 dump_printf_loc (MSG_NOTE
, vect_location
,
4693 "Reduce using vector shifts\n");
4695 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4696 new_temp
= new_phi_result
;
4697 for (elt_offset
= nelements
/ 2;
4701 calc_vec_perm_mask_for_shift (mode
, elt_offset
, sel
);
4702 tree mask
= vect_gen_perm_mask_any (vectype
, sel
);
4703 epilog_stmt
= gimple_build_assign (vec_dest
, VEC_PERM_EXPR
,
4704 new_temp
, zero_vec
, mask
);
4705 new_name
= make_ssa_name (vec_dest
, epilog_stmt
);
4706 gimple_assign_set_lhs (epilog_stmt
, new_name
);
4707 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4709 epilog_stmt
= gimple_build_assign (vec_dest
, code
, new_name
,
4711 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
4712 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4713 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4716 /* 2.4 Extract the final scalar result. Create:
4717 s_out3 = extract_field <v_out2, bitpos> */
4719 if (dump_enabled_p ())
4720 dump_printf_loc (MSG_NOTE
, vect_location
,
4721 "extract scalar result\n");
4723 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, new_temp
,
4724 bitsize
, bitsize_zero_node
);
4725 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
4726 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
4727 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4728 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4729 scalar_results
.safe_push (new_temp
);
4733 /*** Case 3: Create:
4734 s = extract_field <v_out2, 0>
4735 for (offset = element_size;
4736 offset < vector_size;
4737 offset += element_size;)
4739 Create: s' = extract_field <v_out2, offset>
4740 Create: s = op <s, s'> // For non SLP cases
4743 if (dump_enabled_p ())
4744 dump_printf_loc (MSG_NOTE
, vect_location
,
4745 "Reduce using scalar code.\n");
4747 vec_size_in_bits
= tree_to_uhwi (TYPE_SIZE (vectype
));
4748 FOR_EACH_VEC_ELT (new_phis
, i
, new_phi
)
4751 if (gimple_code (new_phi
) == GIMPLE_PHI
)
4752 vec_temp
= PHI_RESULT (new_phi
);
4754 vec_temp
= gimple_assign_lhs (new_phi
);
4755 tree rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
4757 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
4758 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
4759 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4760 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4762 /* In SLP we don't need to apply reduction operation, so we just
4763 collect s' values in SCALAR_RESULTS. */
4765 scalar_results
.safe_push (new_temp
);
4767 for (bit_offset
= element_bitsize
;
4768 bit_offset
< vec_size_in_bits
;
4769 bit_offset
+= element_bitsize
)
4771 tree bitpos
= bitsize_int (bit_offset
);
4772 tree rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
,
4775 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
4776 new_name
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
4777 gimple_assign_set_lhs (epilog_stmt
, new_name
);
4778 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4782 /* In SLP we don't need to apply reduction operation, so
4783 we just collect s' values in SCALAR_RESULTS. */
4784 new_temp
= new_name
;
4785 scalar_results
.safe_push (new_name
);
4789 epilog_stmt
= gimple_build_assign (new_scalar_dest
, code
,
4790 new_name
, new_temp
);
4791 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
4792 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4793 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4798 /* The only case where we need to reduce scalar results in SLP, is
4799 unrolling. If the size of SCALAR_RESULTS is greater than
4800 GROUP_SIZE, we reduce them combining elements modulo
4804 tree res
, first_res
, new_res
;
4807 /* Reduce multiple scalar results in case of SLP unrolling. */
4808 for (j
= group_size
; scalar_results
.iterate (j
, &res
);
4811 first_res
= scalar_results
[j
% group_size
];
4812 new_stmt
= gimple_build_assign (new_scalar_dest
, code
,
4814 new_res
= make_ssa_name (new_scalar_dest
, new_stmt
);
4815 gimple_assign_set_lhs (new_stmt
, new_res
);
4816 gsi_insert_before (&exit_gsi
, new_stmt
, GSI_SAME_STMT
);
4817 scalar_results
[j
% group_size
] = new_res
;
4821 /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
4822 scalar_results
.safe_push (new_temp
);
4826 vect_finalize_reduction
:
4831 /* 2.5 Adjust the final result by the initial value of the reduction
4832 variable. (When such adjustment is not needed, then
4833 'adjustment_def' is zero). For example, if code is PLUS we create:
4834 new_temp = loop_exit_def + adjustment_def */
4838 gcc_assert (!slp_reduc
);
4839 if (nested_in_vect_loop
)
4841 new_phi
= new_phis
[0];
4842 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def
)) == VECTOR_TYPE
);
4843 expr
= build2 (code
, vectype
, PHI_RESULT (new_phi
), adjustment_def
);
4844 new_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4848 new_temp
= scalar_results
[0];
4849 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def
)) != VECTOR_TYPE
);
4850 expr
= build2 (code
, scalar_type
, new_temp
, adjustment_def
);
4851 new_dest
= vect_create_destination_var (scalar_dest
, scalar_type
);
4854 epilog_stmt
= gimple_build_assign (new_dest
, expr
);
4855 new_temp
= make_ssa_name (new_dest
, epilog_stmt
);
4856 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4857 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4858 if (nested_in_vect_loop
)
4860 set_vinfo_for_stmt (epilog_stmt
,
4861 new_stmt_vec_info (epilog_stmt
, loop_vinfo
));
4862 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt
)) =
4863 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi
));
4866 scalar_results
.quick_push (new_temp
);
4868 scalar_results
[0] = new_temp
;
4871 scalar_results
[0] = new_temp
;
4873 new_phis
[0] = epilog_stmt
;
4876 /* 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit
4877 phis with new adjusted scalar results, i.e., replace use <s_out0>
4882 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4883 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4884 v_out2 = reduce <v_out1>
4885 s_out3 = extract_field <v_out2, 0>
4886 s_out4 = adjust_result <s_out3>
4893 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4894 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4895 v_out2 = reduce <v_out1>
4896 s_out3 = extract_field <v_out2, 0>
4897 s_out4 = adjust_result <s_out3>
4902 /* In SLP reduction chain we reduce vector results into one vector if
4903 necessary, hence we set here GROUP_SIZE to 1. SCALAR_DEST is the LHS of
4904 the last stmt in the reduction chain, since we are looking for the loop
4906 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
4908 gimple
*dest_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[group_size
- 1];
4909 /* Handle reduction patterns. */
4910 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt
)))
4911 dest_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt
));
4913 scalar_dest
= gimple_assign_lhs (dest_stmt
);
4917 /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4918 case that GROUP_SIZE is greater than vectorization factor). Therefore, we
4919 need to match SCALAR_RESULTS with corresponding statements. The first
4920 (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4921 the first vector stmt, etc.
4922 (RATIO is equal to (GROUP_SIZE / number of new vector stmts)). */
4923 if (group_size
> new_phis
.length ())
4925 ratio
= group_size
/ new_phis
.length ();
4926 gcc_assert (!(group_size
% new_phis
.length ()));
4931 for (k
= 0; k
< group_size
; k
++)
4935 epilog_stmt
= new_phis
[k
/ ratio
];
4936 reduction_phi
= reduction_phis
[k
/ ratio
];
4938 inner_phi
= inner_phis
[k
/ ratio
];
4943 gimple
*current_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[k
];
4945 orig_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt
));
4946 /* SLP statements can't participate in patterns. */
4947 gcc_assert (!orig_stmt
);
4948 scalar_dest
= gimple_assign_lhs (current_stmt
);
4952 /* Find the loop-closed-use at the loop exit of the original scalar
4953 result. (The reduction result is expected to have two immediate uses -
4954 one at the latch block, and one at the loop exit). */
4955 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
4956 if (!flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
)))
4957 && !is_gimple_debug (USE_STMT (use_p
)))
4958 phis
.safe_push (USE_STMT (use_p
));
4960 /* While we expect to have found an exit_phi because of loop-closed-ssa
4961 form we can end up without one if the scalar cycle is dead. */
4963 FOR_EACH_VEC_ELT (phis
, i
, exit_phi
)
4967 stmt_vec_info exit_phi_vinfo
= vinfo_for_stmt (exit_phi
);
4970 /* FORNOW. Currently not supporting the case that an inner-loop
4971 reduction is not used in the outer-loop (but only outside the
4972 outer-loop), unless it is double reduction. */
4973 gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo
)
4974 && !STMT_VINFO_LIVE_P (exit_phi_vinfo
))
4978 STMT_VINFO_VEC_STMT (exit_phi_vinfo
) = inner_phi
;
4980 STMT_VINFO_VEC_STMT (exit_phi_vinfo
) = epilog_stmt
;
4982 || STMT_VINFO_DEF_TYPE (exit_phi_vinfo
)
4983 != vect_double_reduction_def
)
4986 /* Handle double reduction:
4988 stmt1: s1 = phi <s0, s2> - double reduction phi (outer loop)
4989 stmt2: s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4990 stmt3: s4 = use (s3) - (regular) reduc stmt (inner loop)
4991 stmt4: s2 = phi <s4> - double reduction stmt (outer loop)
4993 At that point the regular reduction (stmt2 and stmt3) is
4994 already vectorized, as well as the exit phi node, stmt4.
4995 Here we vectorize the phi node of double reduction, stmt1, and
4996 update all relevant statements. */
4998 /* Go through all the uses of s2 to find double reduction phi
4999 node, i.e., stmt1 above. */
5000 orig_name
= PHI_RESULT (exit_phi
);
5001 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, orig_name
)
5003 stmt_vec_info use_stmt_vinfo
;
5004 stmt_vec_info new_phi_vinfo
;
5005 tree vect_phi_init
, preheader_arg
, vect_phi_res
, init_def
;
5006 basic_block bb
= gimple_bb (use_stmt
);
5009 /* Check that USE_STMT is really double reduction phi
5011 if (gimple_code (use_stmt
) != GIMPLE_PHI
5012 || gimple_phi_num_args (use_stmt
) != 2
5013 || bb
->loop_father
!= outer_loop
)
5015 use_stmt_vinfo
= vinfo_for_stmt (use_stmt
);
5017 || STMT_VINFO_DEF_TYPE (use_stmt_vinfo
)
5018 != vect_double_reduction_def
)
5021 /* Create vector phi node for double reduction:
5022 vs1 = phi <vs0, vs2>
5023 vs1 was created previously in this function by a call to
5024 vect_get_vec_def_for_operand and is stored in
5026 vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
5027 vs0 is created here. */
5029 /* Create vector phi node. */
5030 vect_phi
= create_phi_node (vec_initial_def
, bb
);
5031 new_phi_vinfo
= new_stmt_vec_info (vect_phi
,
5032 loop_vec_info_for_loop (outer_loop
));
5033 set_vinfo_for_stmt (vect_phi
, new_phi_vinfo
);
5035 /* Create vs0 - initial def of the double reduction phi. */
5036 preheader_arg
= PHI_ARG_DEF_FROM_EDGE (use_stmt
,
5037 loop_preheader_edge (outer_loop
));
5038 init_def
= get_initial_def_for_reduction (stmt
,
5039 preheader_arg
, NULL
);
5040 vect_phi_init
= vect_init_vector (use_stmt
, init_def
,
5043 /* Update phi node arguments with vs0 and vs2. */
5044 add_phi_arg (vect_phi
, vect_phi_init
,
5045 loop_preheader_edge (outer_loop
),
5047 add_phi_arg (vect_phi
, PHI_RESULT (inner_phi
),
5048 loop_latch_edge (outer_loop
), UNKNOWN_LOCATION
);
5049 if (dump_enabled_p ())
5051 dump_printf_loc (MSG_NOTE
, vect_location
,
5052 "created double reduction phi node: ");
5053 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vect_phi
, 0);
5054 dump_printf (MSG_NOTE
, "\n");
5057 vect_phi_res
= PHI_RESULT (vect_phi
);
5059 /* Replace the use, i.e., set the correct vs1 in the regular
5060 reduction phi node. FORNOW, NCOPIES is always 1, so the
5061 loop is redundant. */
5062 use
= reduction_phi
;
5063 for (j
= 0; j
< ncopies
; j
++)
5065 edge pr_edge
= loop_preheader_edge (loop
);
5066 SET_PHI_ARG_DEF (use
, pr_edge
->dest_idx
, vect_phi_res
);
5067 use
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use
));
5074 if (nested_in_vect_loop
)
5083 /* Find the loop-closed-use at the loop exit of the original scalar
5084 result. (The reduction result is expected to have two immediate uses,
5085 one at the latch block, and one at the loop exit). For double
5086 reductions we are looking for exit phis of the outer loop. */
5087 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
5089 if (!flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
5091 if (!is_gimple_debug (USE_STMT (use_p
)))
5092 phis
.safe_push (USE_STMT (use_p
));
5096 if (double_reduc
&& gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
)
5098 tree phi_res
= PHI_RESULT (USE_STMT (use_p
));
5100 FOR_EACH_IMM_USE_FAST (phi_use_p
, phi_imm_iter
, phi_res
)
5102 if (!flow_bb_inside_loop_p (loop
,
5103 gimple_bb (USE_STMT (phi_use_p
)))
5104 && !is_gimple_debug (USE_STMT (phi_use_p
)))
5105 phis
.safe_push (USE_STMT (phi_use_p
));
5111 FOR_EACH_VEC_ELT (phis
, i
, exit_phi
)
5113 /* Replace the uses: */
5114 orig_name
= PHI_RESULT (exit_phi
);
5115 scalar_result
= scalar_results
[k
];
5116 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, orig_name
)
5117 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
5118 SET_USE (use_p
, scalar_result
);
5126 /* Function is_nonwrapping_integer_induction.
5128 Check if STMT (which is part of loop LOOP) both increments and
5129 does not cause overflow. */
5132 is_nonwrapping_integer_induction (gimple
*stmt
, struct loop
*loop
)
5134 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
5135 tree base
= STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo
);
5136 tree step
= STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo
);
5137 tree lhs_type
= TREE_TYPE (gimple_phi_result (stmt
));
5138 widest_int ni
, max_loop_value
, lhs_max
;
5139 bool overflow
= false;
5141 /* Make sure the loop is integer based. */
5142 if (TREE_CODE (base
) != INTEGER_CST
5143 || TREE_CODE (step
) != INTEGER_CST
)
5146 /* Check that the induction increments. */
5147 if (tree_int_cst_sgn (step
) == -1)
5150 /* Check that the max size of the loop will not wrap. */
5152 if (TYPE_OVERFLOW_UNDEFINED (lhs_type
))
5155 if (! max_stmt_executions (loop
, &ni
))
5158 max_loop_value
= wi::mul (wi::to_widest (step
), ni
, TYPE_SIGN (lhs_type
),
5163 max_loop_value
= wi::add (wi::to_widest (base
), max_loop_value
,
5164 TYPE_SIGN (lhs_type
), &overflow
);
5168 return (wi::min_precision (max_loop_value
, TYPE_SIGN (lhs_type
))
5169 <= TYPE_PRECISION (lhs_type
));
5172 /* Function vectorizable_reduction.
5174 Check if STMT performs a reduction operation that can be vectorized.
5175 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5176 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5177 Return FALSE if not a vectorizable STMT, TRUE otherwise.
5179 This function also handles reduction idioms (patterns) that have been
5180 recognized in advance during vect_pattern_recog. In this case, STMT may be
5182 X = pattern_expr (arg0, arg1, ..., X)
5183 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
5184 sequence that had been detected and replaced by the pattern-stmt (STMT).
5186 This function also handles reduction of condition expressions, for example:
5187 for (int i = 0; i < N; i++)
5190 This is handled by vectorising the loop and creating an additional vector
5191 containing the loop indexes for which "a[i] < value" was true. In the
5192 function epilogue this is reduced to a single max value and then used to
5193 index into the vector of results.
5195 In some cases of reduction patterns, the type of the reduction variable X is
5196 different than the type of the other arguments of STMT.
5197 In such cases, the vectype that is used when transforming STMT into a vector
5198 stmt is different than the vectype that is used to determine the
5199 vectorization factor, because it consists of a different number of elements
5200 than the actual number of elements that are being operated upon in parallel.
5202 For example, consider an accumulation of shorts into an int accumulator.
5203 On some targets it's possible to vectorize this pattern operating on 8
5204 shorts at a time (hence, the vectype for purposes of determining the
5205 vectorization factor should be V8HI); on the other hand, the vectype that
5206 is used to create the vector form is actually V4SI (the type of the result).
5208 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
5209 indicates what is the actual level of parallelism (V8HI in the example), so
5210 that the right vectorization factor would be derived. This vectype
5211 corresponds to the type of arguments to the reduction stmt, and should *NOT*
5212 be used to create the vectorized stmt. The right vectype for the vectorized
5213 stmt is obtained from the type of the result X:
5214 get_vectype_for_scalar_type (TREE_TYPE (X))
5216 This means that, contrary to "regular" reductions (or "regular" stmts in
5217 general), the following equation:
5218 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
5219 does *NOT* necessarily hold for reduction patterns. */
5222 vectorizable_reduction (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5223 gimple
**vec_stmt
, slp_tree slp_node
)
5227 tree loop_vec_def0
= NULL_TREE
, loop_vec_def1
= NULL_TREE
;
5228 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5229 tree vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5230 tree vectype_in
= NULL_TREE
;
5231 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5232 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5233 enum tree_code code
, orig_code
, epilog_reduc_code
;
5234 machine_mode vec_mode
;
5236 optab optab
, reduc_optab
;
5237 tree new_temp
= NULL_TREE
;
5239 enum vect_def_type dt
;
5240 gphi
*new_phi
= NULL
;
5244 stmt_vec_info orig_stmt_info
;
5245 tree expr
= NULL_TREE
;
5249 stmt_vec_info prev_stmt_info
, prev_phi_info
;
5250 bool single_defuse_cycle
= false;
5251 tree reduc_def
= NULL_TREE
;
5252 gimple
*new_stmt
= NULL
;
5255 bool nested_cycle
= false, found_nested_cycle_def
= false;
5256 gimple
*reduc_def_stmt
= NULL
;
5257 bool double_reduc
= false, dummy
;
5259 struct loop
* def_stmt_loop
, *outer_loop
= NULL
;
5261 gimple
*def_arg_stmt
;
5262 auto_vec
<tree
> vec_oprnds0
;
5263 auto_vec
<tree
> vec_oprnds1
;
5264 auto_vec
<tree
> vect_defs
;
5265 auto_vec
<gimple
*> phis
;
5267 tree def0
, def1
, tem
, op0
, op1
= NULL_TREE
;
5268 bool first_p
= true;
5269 tree cr_index_scalar_type
= NULL_TREE
, cr_index_vector_type
= NULL_TREE
;
5270 gimple
*cond_expr_induction_def_stmt
= NULL
;
5272 /* In case of reduction chain we switch to the first stmt in the chain, but
5273 we don't update STMT_INFO, since only the last stmt is marked as reduction
5274 and has reduction properties. */
5275 if (GROUP_FIRST_ELEMENT (stmt_info
)
5276 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
5278 stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5282 if (nested_in_vect_loop_p (loop
, stmt
))
5286 nested_cycle
= true;
5289 /* 1. Is vectorizable reduction? */
5290 /* Not supportable if the reduction variable is used in the loop, unless
5291 it's a reduction chain. */
5292 if (STMT_VINFO_RELEVANT (stmt_info
) > vect_used_in_outer
5293 && !GROUP_FIRST_ELEMENT (stmt_info
))
5296 /* Reductions that are not used even in an enclosing outer-loop,
5297 are expected to be "live" (used out of the loop). */
5298 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_unused_in_scope
5299 && !STMT_VINFO_LIVE_P (stmt_info
))
5302 /* Make sure it was already recognized as a reduction computation. */
5303 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) != vect_reduction_def
5304 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) != vect_nested_cycle
)
5307 /* 2. Has this been recognized as a reduction pattern?
5309 Check if STMT represents a pattern that has been recognized
5310 in earlier analysis stages. For stmts that represent a pattern,
5311 the STMT_VINFO_RELATED_STMT field records the last stmt in
5312 the original sequence that constitutes the pattern. */
5314 orig_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
));
5317 orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
5318 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info
));
5319 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info
));
5322 /* 3. Check the operands of the operation. The first operands are defined
5323 inside the loop body. The last operand is the reduction variable,
5324 which is defined by the loop-header-phi. */
5326 gcc_assert (is_gimple_assign (stmt
));
5329 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
)))
5331 case GIMPLE_SINGLE_RHS
:
5332 op_type
= TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt
));
5333 if (op_type
== ternary_op
)
5335 tree rhs
= gimple_assign_rhs1 (stmt
);
5336 ops
[0] = TREE_OPERAND (rhs
, 0);
5337 ops
[1] = TREE_OPERAND (rhs
, 1);
5338 ops
[2] = TREE_OPERAND (rhs
, 2);
5339 code
= TREE_CODE (rhs
);
5345 case GIMPLE_BINARY_RHS
:
5346 code
= gimple_assign_rhs_code (stmt
);
5347 op_type
= TREE_CODE_LENGTH (code
);
5348 gcc_assert (op_type
== binary_op
);
5349 ops
[0] = gimple_assign_rhs1 (stmt
);
5350 ops
[1] = gimple_assign_rhs2 (stmt
);
5353 case GIMPLE_TERNARY_RHS
:
5354 code
= gimple_assign_rhs_code (stmt
);
5355 op_type
= TREE_CODE_LENGTH (code
);
5356 gcc_assert (op_type
== ternary_op
);
5357 ops
[0] = gimple_assign_rhs1 (stmt
);
5358 ops
[1] = gimple_assign_rhs2 (stmt
);
5359 ops
[2] = gimple_assign_rhs3 (stmt
);
5362 case GIMPLE_UNARY_RHS
:
5368 /* The default is that the reduction variable is the last in statement. */
5369 int reduc_index
= op_type
- 1;
5370 if (code
== MINUS_EXPR
)
5373 if (code
== COND_EXPR
&& slp_node
)
5376 scalar_dest
= gimple_assign_lhs (stmt
);
5377 scalar_type
= TREE_TYPE (scalar_dest
);
5378 if (!POINTER_TYPE_P (scalar_type
) && !INTEGRAL_TYPE_P (scalar_type
)
5379 && !SCALAR_FLOAT_TYPE_P (scalar_type
))
5382 /* Do not try to vectorize bit-precision reductions. */
5383 if ((TYPE_PRECISION (scalar_type
)
5384 != GET_MODE_PRECISION (TYPE_MODE (scalar_type
))))
5387 /* All uses but the last are expected to be defined in the loop.
5388 The last use is the reduction variable. In case of nested cycle this
5389 assumption is not true: we use reduc_index to record the index of the
5390 reduction variable. */
5391 for (i
= 0; i
< op_type
; i
++)
5393 if (i
== reduc_index
)
5396 /* The condition of COND_EXPR is checked in vectorizable_condition(). */
5397 if (i
== 0 && code
== COND_EXPR
)
5400 is_simple_use
= vect_is_simple_use (ops
[i
], loop_vinfo
,
5401 &def_stmt
, &dt
, &tem
);
5404 gcc_assert (is_simple_use
);
5406 if (dt
!= vect_internal_def
5407 && dt
!= vect_external_def
5408 && dt
!= vect_constant_def
5409 && dt
!= vect_induction_def
5410 && !(dt
== vect_nested_cycle
&& nested_cycle
))
5413 if (dt
== vect_nested_cycle
)
5415 found_nested_cycle_def
= true;
5416 reduc_def_stmt
= def_stmt
;
5420 if (i
== 1 && code
== COND_EXPR
&& dt
== vect_induction_def
)
5421 cond_expr_induction_def_stmt
= def_stmt
;
5424 is_simple_use
= vect_is_simple_use (ops
[reduc_index
], loop_vinfo
,
5425 &def_stmt
, &dt
, &tem
);
5428 gcc_assert (is_simple_use
);
5429 if (!found_nested_cycle_def
)
5430 reduc_def_stmt
= def_stmt
;
5432 if (reduc_def_stmt
&& gimple_code (reduc_def_stmt
) != GIMPLE_PHI
)
5435 if (!(dt
== vect_reduction_def
5436 || dt
== vect_nested_cycle
5437 || ((dt
== vect_internal_def
|| dt
== vect_external_def
5438 || dt
== vect_constant_def
|| dt
== vect_induction_def
)
5439 && nested_cycle
&& found_nested_cycle_def
)))
5441 /* For pattern recognized stmts, orig_stmt might be a reduction,
5442 but some helper statements for the pattern might not, or
5443 might be COND_EXPRs with reduction uses in the condition. */
5444 gcc_assert (orig_stmt
);
5448 enum vect_reduction_type v_reduc_type
;
5449 gimple
*tmp
= vect_is_simple_reduction (loop_vinfo
, reduc_def_stmt
,
5450 !nested_cycle
, &dummy
, false,
5453 /* If we have a condition reduction, see if we can simplify it further. */
5454 if (v_reduc_type
== COND_REDUCTION
5455 && cond_expr_induction_def_stmt
!= NULL
5456 && is_nonwrapping_integer_induction (cond_expr_induction_def_stmt
, loop
))
5458 if (dump_enabled_p ())
5459 dump_printf_loc (MSG_NOTE
, vect_location
,
5460 "condition expression based on integer induction.\n");
5461 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) = INTEGER_INDUC_COND_REDUCTION
;
5464 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) = v_reduc_type
;
5467 gcc_assert (tmp
== orig_stmt
5468 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp
)) == orig_stmt
);
5470 /* We changed STMT to be the first stmt in reduction chain, hence we
5471 check that in this case the first element in the chain is STMT. */
5472 gcc_assert (stmt
== tmp
5473 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp
)) == stmt
);
5475 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt
)))
5478 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
5481 ncopies
= (LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5482 / TYPE_VECTOR_SUBPARTS (vectype_in
));
5484 gcc_assert (ncopies
>= 1);
5486 vec_mode
= TYPE_MODE (vectype_in
);
5488 if (code
== COND_EXPR
)
5490 /* Only call during the analysis stage, otherwise we'll lose
5492 if (!vec_stmt
&& !vectorizable_condition (stmt
, gsi
, NULL
,
5493 ops
[reduc_index
], 0, NULL
))
5495 if (dump_enabled_p ())
5496 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5497 "unsupported condition in reduction\n");
5503 /* 4. Supportable by target? */
5505 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
5506 || code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
5508 /* Shifts and rotates are only supported by vectorizable_shifts,
5509 not vectorizable_reduction. */
5510 if (dump_enabled_p ())
5511 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5512 "unsupported shift or rotation.\n");
5516 /* 4.1. check support for the operation in the loop */
5517 optab
= optab_for_tree_code (code
, vectype_in
, optab_default
);
5520 if (dump_enabled_p ())
5521 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5527 if (optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
5529 if (dump_enabled_p ())
5530 dump_printf (MSG_NOTE
, "op not supported by target.\n");
5532 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5533 || LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5534 < vect_min_worthwhile_factor (code
))
5537 if (dump_enabled_p ())
5538 dump_printf (MSG_NOTE
, "proceeding using word mode.\n");
5541 /* Worthwhile without SIMD support? */
5542 if (!VECTOR_MODE_P (TYPE_MODE (vectype_in
))
5543 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5544 < vect_min_worthwhile_factor (code
))
5546 if (dump_enabled_p ())
5547 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5548 "not worthwhile without SIMD support.\n");
5554 /* 4.2. Check support for the epilog operation.
5556 If STMT represents a reduction pattern, then the type of the
5557 reduction variable may be different than the type of the rest
5558 of the arguments. For example, consider the case of accumulation
5559 of shorts into an int accumulator; The original code:
5560 S1: int_a = (int) short_a;
5561 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
5564 STMT: int_acc = widen_sum <short_a, int_acc>
5567 1. The tree-code that is used to create the vector operation in the
5568 epilog code (that reduces the partial results) is not the
5569 tree-code of STMT, but is rather the tree-code of the original
5570 stmt from the pattern that STMT is replacing. I.e, in the example
5571 above we want to use 'widen_sum' in the loop, but 'plus' in the
5573 2. The type (mode) we use to check available target support
5574 for the vector operation to be created in the *epilog*, is
5575 determined by the type of the reduction variable (in the example
5576 above we'd check this: optab_handler (plus_optab, vect_int_mode])).
5577 However the type (mode) we use to check available target support
5578 for the vector operation to be created *inside the loop*, is
5579 determined by the type of the other arguments to STMT (in the
5580 example we'd check this: optab_handler (widen_sum_optab,
5583 This is contrary to "regular" reductions, in which the types of all
5584 the arguments are the same as the type of the reduction variable.
5585 For "regular" reductions we can therefore use the same vector type
5586 (and also the same tree-code) when generating the epilog code and
5587 when generating the code inside the loop. */
5591 /* This is a reduction pattern: get the vectype from the type of the
5592 reduction variable, and get the tree-code from orig_stmt. */
5593 gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
)
5594 == TREE_CODE_REDUCTION
);
5595 orig_code
= gimple_assign_rhs_code (orig_stmt
);
5596 gcc_assert (vectype_out
);
5597 vec_mode
= TYPE_MODE (vectype_out
);
5601 /* Regular reduction: use the same vectype and tree-code as used for
5602 the vector code inside the loop can be used for the epilog code. */
5605 if (code
== MINUS_EXPR
)
5606 orig_code
= PLUS_EXPR
;
5608 /* For simple condition reductions, replace with the actual expression
5609 we want to base our reduction around. */
5610 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
)
5611 == INTEGER_INDUC_COND_REDUCTION
)
5612 orig_code
= MAX_EXPR
;
5617 def_bb
= gimple_bb (reduc_def_stmt
);
5618 def_stmt_loop
= def_bb
->loop_father
;
5619 def_arg
= PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt
,
5620 loop_preheader_edge (def_stmt_loop
));
5621 if (TREE_CODE (def_arg
) == SSA_NAME
5622 && (def_arg_stmt
= SSA_NAME_DEF_STMT (def_arg
))
5623 && gimple_code (def_arg_stmt
) == GIMPLE_PHI
5624 && flow_bb_inside_loop_p (outer_loop
, gimple_bb (def_arg_stmt
))
5625 && vinfo_for_stmt (def_arg_stmt
)
5626 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt
))
5627 == vect_double_reduction_def
)
5628 double_reduc
= true;
5631 epilog_reduc_code
= ERROR_MARK
;
5633 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == TREE_CODE_REDUCTION
5634 || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
)
5635 == INTEGER_INDUC_COND_REDUCTION
)
5637 if (reduction_code_for_scalar_code (orig_code
, &epilog_reduc_code
))
5639 reduc_optab
= optab_for_tree_code (epilog_reduc_code
, vectype_out
,
5643 if (dump_enabled_p ())
5644 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5645 "no optab for reduction.\n");
5647 epilog_reduc_code
= ERROR_MARK
;
5649 else if (optab_handler (reduc_optab
, vec_mode
) == CODE_FOR_nothing
)
5651 optab
= scalar_reduc_to_vector (reduc_optab
, vectype_out
);
5652 if (optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
5654 if (dump_enabled_p ())
5655 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5656 "reduc op not supported by target.\n");
5658 epilog_reduc_code
= ERROR_MARK
;
5662 /* When epilog_reduc_code is ERROR_MARK then a reduction will be
5663 generated in the epilog using multiple expressions. This does not
5664 work for condition reductions. */
5665 if (epilog_reduc_code
== ERROR_MARK
5666 && STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
)
5667 == INTEGER_INDUC_COND_REDUCTION
)
5669 if (dump_enabled_p ())
5670 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5671 "no reduc code for scalar code.\n");
5677 if (!nested_cycle
|| double_reduc
)
5679 if (dump_enabled_p ())
5680 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5681 "no reduc code for scalar code.\n");
5689 int scalar_precision
= GET_MODE_PRECISION (TYPE_MODE (scalar_type
));
5690 cr_index_scalar_type
= make_unsigned_type (scalar_precision
);
5691 cr_index_vector_type
= build_vector_type
5692 (cr_index_scalar_type
, TYPE_VECTOR_SUBPARTS (vectype_out
));
5694 epilog_reduc_code
= REDUC_MAX_EXPR
;
5695 optab
= optab_for_tree_code (REDUC_MAX_EXPR
, cr_index_vector_type
,
5697 if (optab_handler (optab
, TYPE_MODE (cr_index_vector_type
))
5698 == CODE_FOR_nothing
)
5700 if (dump_enabled_p ())
5701 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5702 "reduc max op not supported by target.\n");
5708 || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == COND_REDUCTION
5709 || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
)
5710 == INTEGER_INDUC_COND_REDUCTION
)
5713 if (dump_enabled_p ())
5714 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5715 "multiple types in double reduction or condition "
5720 /* In case of widenning multiplication by a constant, we update the type
5721 of the constant to be the type of the other operand. We check that the
5722 constant fits the type in the pattern recognition pass. */
5723 if (code
== DOT_PROD_EXPR
5724 && !types_compatible_p (TREE_TYPE (ops
[0]), TREE_TYPE (ops
[1])))
5726 if (TREE_CODE (ops
[0]) == INTEGER_CST
)
5727 ops
[0] = fold_convert (TREE_TYPE (ops
[1]), ops
[0]);
5728 else if (TREE_CODE (ops
[1]) == INTEGER_CST
)
5729 ops
[1] = fold_convert (TREE_TYPE (ops
[0]), ops
[1]);
5732 if (dump_enabled_p ())
5733 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5734 "invalid types in dot-prod\n");
5740 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == COND_REDUCTION
)
5744 if (! max_loop_iterations (loop
, &ni
))
5746 if (dump_enabled_p ())
5747 dump_printf_loc (MSG_NOTE
, vect_location
,
5748 "loop count not known, cannot create cond "
5752 /* Convert backedges to iterations. */
5755 /* The additional index will be the same type as the condition. Check
5756 that the loop can fit into this less one (because we'll use up the
5757 zero slot for when there are no matches). */
5758 tree max_index
= TYPE_MAX_VALUE (cr_index_scalar_type
);
5759 if (wi::geu_p (ni
, wi::to_widest (max_index
)))
5761 if (dump_enabled_p ())
5762 dump_printf_loc (MSG_NOTE
, vect_location
,
5763 "loop size is greater than data size.\n");
5768 if (!vec_stmt
) /* transformation not required. */
5771 && !vect_model_reduction_cost (stmt_info
, epilog_reduc_code
, ncopies
,
5774 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
5780 if (dump_enabled_p ())
5781 dump_printf_loc (MSG_NOTE
, vect_location
, "transform reduction.\n");
5783 /* FORNOW: Multiple types are not supported for condition. */
5784 if (code
== COND_EXPR
)
5785 gcc_assert (ncopies
== 1);
5787 /* Create the destination vector */
5788 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
5790 /* In case the vectorization factor (VF) is bigger than the number
5791 of elements that we can fit in a vectype (nunits), we have to generate
5792 more than one vector stmt - i.e - we need to "unroll" the
5793 vector stmt by a factor VF/nunits. For more details see documentation
5794 in vectorizable_operation. */
5796 /* If the reduction is used in an outer loop we need to generate
5797 VF intermediate results, like so (e.g. for ncopies=2):
5802 (i.e. we generate VF results in 2 registers).
5803 In this case we have a separate def-use cycle for each copy, and therefore
5804 for each copy we get the vector def for the reduction variable from the
5805 respective phi node created for this copy.
5807 Otherwise (the reduction is unused in the loop nest), we can combine
5808 together intermediate results, like so (e.g. for ncopies=2):
5812 (i.e. we generate VF/2 results in a single register).
5813 In this case for each copy we get the vector def for the reduction variable
5814 from the vectorized reduction operation generated in the previous iteration.
5817 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_unused_in_scope
)
5819 single_defuse_cycle
= true;
5823 epilog_copies
= ncopies
;
5825 prev_stmt_info
= NULL
;
5826 prev_phi_info
= NULL
;
5828 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5832 vec_oprnds0
.create (1);
5833 if (op_type
== ternary_op
)
5834 vec_oprnds1
.create (1);
5837 phis
.create (vec_num
);
5838 vect_defs
.create (vec_num
);
5840 vect_defs
.quick_push (NULL_TREE
);
5842 for (j
= 0; j
< ncopies
; j
++)
5844 if (j
== 0 || !single_defuse_cycle
)
5846 for (i
= 0; i
< vec_num
; i
++)
5848 /* Create the reduction-phi that defines the reduction
5850 new_phi
= create_phi_node (vec_dest
, loop
->header
);
5851 set_vinfo_for_stmt (new_phi
,
5852 new_stmt_vec_info (new_phi
, loop_vinfo
));
5853 if (j
== 0 || slp_node
)
5854 phis
.quick_push (new_phi
);
5858 if (code
== COND_EXPR
)
5860 gcc_assert (!slp_node
);
5861 vectorizable_condition (stmt
, gsi
, vec_stmt
,
5862 PHI_RESULT (phis
[0]),
5864 /* Multiple types are not supported for condition. */
5871 op0
= ops
[!reduc_index
];
5872 if (op_type
== ternary_op
)
5874 if (reduc_index
== 0)
5881 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5885 loop_vec_def0
= vect_get_vec_def_for_operand (ops
[!reduc_index
],
5887 vec_oprnds0
.quick_push (loop_vec_def0
);
5888 if (op_type
== ternary_op
)
5890 loop_vec_def1
= vect_get_vec_def_for_operand (op1
, stmt
);
5891 vec_oprnds1
.quick_push (loop_vec_def1
);
5899 enum vect_def_type dt
;
5902 vect_is_simple_use (ops
[!reduc_index
], loop_vinfo
,
5904 loop_vec_def0
= vect_get_vec_def_for_stmt_copy (dt
,
5906 vec_oprnds0
[0] = loop_vec_def0
;
5907 if (op_type
== ternary_op
)
5909 vect_is_simple_use (op1
, loop_vinfo
, &dummy_stmt
, &dt
);
5910 loop_vec_def1
= vect_get_vec_def_for_stmt_copy (dt
,
5912 vec_oprnds1
[0] = loop_vec_def1
;
5916 if (single_defuse_cycle
)
5917 reduc_def
= gimple_assign_lhs (new_stmt
);
5919 STMT_VINFO_RELATED_STMT (prev_phi_info
) = new_phi
;
5922 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, def0
)
5925 reduc_def
= PHI_RESULT (phis
[i
]);
5928 if (!single_defuse_cycle
|| j
== 0)
5929 reduc_def
= PHI_RESULT (new_phi
);
5932 def1
= ((op_type
== ternary_op
)
5933 ? vec_oprnds1
[i
] : NULL
);
5934 if (op_type
== binary_op
)
5936 if (reduc_index
== 0)
5937 expr
= build2 (code
, vectype_out
, reduc_def
, def0
);
5939 expr
= build2 (code
, vectype_out
, def0
, reduc_def
);
5943 if (reduc_index
== 0)
5944 expr
= build3 (code
, vectype_out
, reduc_def
, def0
, def1
);
5947 if (reduc_index
== 1)
5948 expr
= build3 (code
, vectype_out
, def0
, reduc_def
, def1
);
5950 expr
= build3 (code
, vectype_out
, def0
, def1
, reduc_def
);
5954 new_stmt
= gimple_build_assign (vec_dest
, expr
);
5955 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5956 gimple_assign_set_lhs (new_stmt
, new_temp
);
5957 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5961 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5962 vect_defs
.quick_push (new_temp
);
5965 vect_defs
[0] = new_temp
;
5972 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5974 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5976 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5977 prev_phi_info
= vinfo_for_stmt (new_phi
);
5980 tree indx_before_incr
, indx_after_incr
, cond_name
= NULL
;
5982 /* Finalize the reduction-phi (set its arguments) and create the
5983 epilog reduction code. */
5984 if ((!single_defuse_cycle
|| code
== COND_EXPR
) && !slp_node
)
5986 new_temp
= gimple_assign_lhs (*vec_stmt
);
5987 vect_defs
[0] = new_temp
;
5989 /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
5990 which is updated with the current index of the loop for every match of
5991 the original loop's cond_expr (VEC_STMT). This results in a vector
5992 containing the last time the condition passed for that vector lane.
5993 The first match will be a 1 to allow 0 to be used for non-matching
5994 indexes. If there are no matches at all then the vector will be all
5996 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == COND_REDUCTION
)
5998 int nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6001 gcc_assert (gimple_assign_rhs_code (*vec_stmt
) == VEC_COND_EXPR
);
6003 /* First we create a simple vector induction variable which starts
6004 with the values {1,2,3,...} (SERIES_VECT) and increments by the
6005 vector size (STEP). */
6007 /* Create a {1,2,3,...} vector. */
6008 tree
*vtemp
= XALLOCAVEC (tree
, nunits_out
);
6009 for (k
= 0; k
< nunits_out
; ++k
)
6010 vtemp
[k
] = build_int_cst (cr_index_scalar_type
, k
+ 1);
6011 tree series_vect
= build_vector (cr_index_vector_type
, vtemp
);
6013 /* Create a vector of the step value. */
6014 tree step
= build_int_cst (cr_index_scalar_type
, nunits_out
);
6015 tree vec_step
= build_vector_from_val (cr_index_vector_type
, step
);
6017 /* Create an induction variable. */
6018 gimple_stmt_iterator incr_gsi
;
6020 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6021 create_iv (series_vect
, vec_step
, NULL_TREE
, loop
, &incr_gsi
,
6022 insert_after
, &indx_before_incr
, &indx_after_incr
);
6024 /* Next create a new phi node vector (NEW_PHI_TREE) which starts
6025 filled with zeros (VEC_ZERO). */
6027 /* Create a vector of 0s. */
6028 tree zero
= build_zero_cst (cr_index_scalar_type
);
6029 tree vec_zero
= build_vector_from_val (cr_index_vector_type
, zero
);
6031 /* Create a vector phi node. */
6032 tree new_phi_tree
= make_ssa_name (cr_index_vector_type
);
6033 new_phi
= create_phi_node (new_phi_tree
, loop
->header
);
6034 set_vinfo_for_stmt (new_phi
,
6035 new_stmt_vec_info (new_phi
, loop_vinfo
));
6036 add_phi_arg (new_phi
, vec_zero
, loop_preheader_edge (loop
),
6039 /* Now take the condition from the loops original cond_expr
6040 (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for
6041 every match uses values from the induction variable
6042 (INDEX_BEFORE_INCR) otherwise uses values from the phi node
6044 Finally, we update the phi (NEW_PHI_TREE) to take the value of
6045 the new cond_expr (INDEX_COND_EXPR). */
6047 /* Turn the condition from vec_stmt into an ssa name. */
6048 gimple_stmt_iterator vec_stmt_gsi
= gsi_for_stmt (*vec_stmt
);
6049 tree ccompare
= gimple_assign_rhs1 (*vec_stmt
);
6050 tree ccompare_name
= make_ssa_name (TREE_TYPE (ccompare
));
6051 gimple
*ccompare_stmt
= gimple_build_assign (ccompare_name
,
6053 gsi_insert_before (&vec_stmt_gsi
, ccompare_stmt
, GSI_SAME_STMT
);
6054 gimple_assign_set_rhs1 (*vec_stmt
, ccompare_name
);
6055 update_stmt (*vec_stmt
);
6057 /* Create a conditional, where the condition is taken from vec_stmt
6058 (CCOMPARE_NAME), then is the induction index (INDEX_BEFORE_INCR)
6059 and else is the phi (NEW_PHI_TREE). */
6060 tree index_cond_expr
= build3 (VEC_COND_EXPR
, cr_index_vector_type
,
6061 ccompare_name
, indx_before_incr
,
6063 cond_name
= make_ssa_name (cr_index_vector_type
);
6064 gimple
*index_condition
= gimple_build_assign (cond_name
,
6066 gsi_insert_before (&incr_gsi
, index_condition
, GSI_SAME_STMT
);
6067 stmt_vec_info index_vec_info
= new_stmt_vec_info (index_condition
,
6069 STMT_VINFO_VECTYPE (index_vec_info
) = cr_index_vector_type
;
6070 set_vinfo_for_stmt (index_condition
, index_vec_info
);
6072 /* Update the phi with the vec cond. */
6073 add_phi_arg (new_phi
, cond_name
, loop_latch_edge (loop
),
6078 vect_create_epilog_for_reduction (vect_defs
, stmt
, epilog_copies
,
6079 epilog_reduc_code
, phis
, reduc_index
,
6080 double_reduc
, slp_node
, cond_name
);
6085 /* Function vect_min_worthwhile_factor.
6087 For a loop where we could vectorize the operation indicated by CODE,
6088 return the minimum vectorization factor that makes it worthwhile
6089 to use generic vectors. */
6091 vect_min_worthwhile_factor (enum tree_code code
)
6112 /* Function vectorizable_induction
6114 Check if PHI performs an induction computation that can be vectorized.
6115 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
6116 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
6117 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6120 vectorizable_induction (gimple
*phi
,
6121 gimple_stmt_iterator
*gsi ATTRIBUTE_UNUSED
,
6124 stmt_vec_info stmt_info
= vinfo_for_stmt (phi
);
6125 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6126 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6127 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6128 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6129 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6132 gcc_assert (ncopies
>= 1);
6133 /* FORNOW. These restrictions should be relaxed. */
6134 if (nested_in_vect_loop_p (loop
, phi
))
6136 imm_use_iterator imm_iter
;
6137 use_operand_p use_p
;
6144 if (dump_enabled_p ())
6145 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6146 "multiple types in nested loop.\n");
6151 latch_e
= loop_latch_edge (loop
->inner
);
6152 loop_arg
= PHI_ARG_DEF_FROM_EDGE (phi
, latch_e
);
6153 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, loop_arg
)
6155 gimple
*use_stmt
= USE_STMT (use_p
);
6156 if (is_gimple_debug (use_stmt
))
6159 if (!flow_bb_inside_loop_p (loop
->inner
, gimple_bb (use_stmt
)))
6161 exit_phi
= use_stmt
;
6167 stmt_vec_info exit_phi_vinfo
= vinfo_for_stmt (exit_phi
);
6168 if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo
)
6169 && !STMT_VINFO_LIVE_P (exit_phi_vinfo
)))
6171 if (dump_enabled_p ())
6172 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6173 "inner-loop induction only used outside "
6174 "of the outer vectorized loop.\n");
6180 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
6183 /* FORNOW: SLP not supported. */
6184 if (STMT_SLP_TYPE (stmt_info
))
6187 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
);
6189 if (gimple_code (phi
) != GIMPLE_PHI
)
6192 if (!vec_stmt
) /* transformation not required. */
6194 STMT_VINFO_TYPE (stmt_info
) = induc_vec_info_type
;
6195 if (dump_enabled_p ())
6196 dump_printf_loc (MSG_NOTE
, vect_location
,
6197 "=== vectorizable_induction ===\n");
6198 vect_model_induction_cost (stmt_info
, ncopies
);
6204 if (dump_enabled_p ())
6205 dump_printf_loc (MSG_NOTE
, vect_location
, "transform induction phi.\n");
6207 vec_def
= get_initial_def_for_induction (phi
);
6208 *vec_stmt
= SSA_NAME_DEF_STMT (vec_def
);
6212 /* Function vectorizable_live_operation.
6214 STMT computes a value that is used outside the loop. Check if
6215 it can be supported. */
6218 vectorizable_live_operation (gimple
*stmt
,
6219 gimple_stmt_iterator
*gsi ATTRIBUTE_UNUSED
,
6222 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6223 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6224 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6229 gcc_assert (STMT_VINFO_LIVE_P (stmt_info
));
6231 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
)
6234 if (!is_gimple_assign (stmt
))
6236 if (gimple_call_internal_p (stmt
)
6237 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
6238 && gimple_call_lhs (stmt
)
6240 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
6242 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
6244 edge e
= single_exit (loop
);
6245 basic_block merge_bb
= e
->dest
;
6246 imm_use_iterator imm_iter
;
6247 use_operand_p use_p
;
6248 tree lhs
= gimple_call_lhs (stmt
);
6250 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
6252 gimple
*use_stmt
= USE_STMT (use_p
);
6253 if (gimple_code (use_stmt
) == GIMPLE_PHI
6254 && gimple_bb (use_stmt
) == merge_bb
)
6259 = build_int_cst (unsigned_type_node
,
6260 loop_vinfo
->vectorization_factor
- 1);
6261 SET_PHI_ARG_DEF (use_stmt
, e
->dest_idx
, vfm1
);
6271 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6274 /* FORNOW. CHECKME. */
6275 if (nested_in_vect_loop_p (loop
, stmt
))
6278 /* FORNOW: support only if all uses are invariant. This means
6279 that the scalar operations can remain in place, unvectorized.
6280 The original last scalar value that they compute will be used. */
6281 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
6283 enum vect_def_type dt
= vect_uninitialized_def
;
6285 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
))
6287 if (dump_enabled_p ())
6288 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6289 "use not simple.\n");
6293 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
6297 /* No transformation is required for the cases we currently support. */
6301 /* Kill any debug uses outside LOOP of SSA names defined in STMT. */
6304 vect_loop_kill_debug_uses (struct loop
*loop
, gimple
*stmt
)
6306 ssa_op_iter op_iter
;
6307 imm_use_iterator imm_iter
;
6308 def_operand_p def_p
;
6311 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
6313 FOR_EACH_IMM_USE_STMT (ustmt
, imm_iter
, DEF_FROM_PTR (def_p
))
6317 if (!is_gimple_debug (ustmt
))
6320 bb
= gimple_bb (ustmt
);
6322 if (!flow_bb_inside_loop_p (loop
, bb
))
6324 if (gimple_debug_bind_p (ustmt
))
6326 if (dump_enabled_p ())
6327 dump_printf_loc (MSG_NOTE
, vect_location
,
6328 "killing debug use\n");
6330 gimple_debug_bind_reset_value (ustmt
);
6331 update_stmt (ustmt
);
6341 /* This function builds ni_name = number of iterations. Statements
6342 are emitted on the loop preheader edge. */
6345 vect_build_loop_niters (loop_vec_info loop_vinfo
)
6347 tree ni
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
6348 if (TREE_CODE (ni
) == INTEGER_CST
)
6353 gimple_seq stmts
= NULL
;
6354 edge pe
= loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo
));
6356 var
= create_tmp_var (TREE_TYPE (ni
), "niters");
6357 ni_name
= force_gimple_operand (ni
, &stmts
, false, var
);
6359 gsi_insert_seq_on_edge_immediate (pe
, stmts
);
6366 /* This function generates the following statements:
6368 ni_name = number of iterations loop executes
6369 ratio = ni_name / vf
6370 ratio_mult_vf_name = ratio * vf
6372 and places them on the loop preheader edge. */
6375 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo
,
6377 tree
*ratio_mult_vf_name_ptr
,
6378 tree
*ratio_name_ptr
)
6380 tree ni_minus_gap_name
;
6383 tree ratio_mult_vf_name
;
6384 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6385 edge pe
= loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo
));
6388 log_vf
= build_int_cst (TREE_TYPE (ni_name
), exact_log2 (vf
));
6390 /* If epilogue loop is required because of data accesses with gaps, we
6391 subtract one iteration from the total number of iterations here for
6392 correct calculation of RATIO. */
6393 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
))
6395 ni_minus_gap_name
= fold_build2 (MINUS_EXPR
, TREE_TYPE (ni_name
),
6397 build_one_cst (TREE_TYPE (ni_name
)));
6398 if (!is_gimple_val (ni_minus_gap_name
))
6400 var
= create_tmp_var (TREE_TYPE (ni_name
), "ni_gap");
6401 gimple
*stmts
= NULL
;
6402 ni_minus_gap_name
= force_gimple_operand (ni_minus_gap_name
, &stmts
,
6404 gsi_insert_seq_on_edge_immediate (pe
, stmts
);
6408 ni_minus_gap_name
= ni_name
;
6410 /* Create: ratio = ni >> log2(vf) */
6411 /* ??? As we have ni == number of latch executions + 1, ni could
6412 have overflown to zero. So avoid computing ratio based on ni
6413 but compute it using the fact that we know ratio will be at least
6414 one, thus via (ni - vf) >> log2(vf) + 1. */
6416 = fold_build2 (PLUS_EXPR
, TREE_TYPE (ni_name
),
6417 fold_build2 (RSHIFT_EXPR
, TREE_TYPE (ni_name
),
6418 fold_build2 (MINUS_EXPR
, TREE_TYPE (ni_name
),
6421 (TREE_TYPE (ni_name
), vf
)),
6423 build_int_cst (TREE_TYPE (ni_name
), 1));
6424 if (!is_gimple_val (ratio_name
))
6426 var
= create_tmp_var (TREE_TYPE (ni_name
), "bnd");
6427 gimple
*stmts
= NULL
;
6428 ratio_name
= force_gimple_operand (ratio_name
, &stmts
, true, var
);
6429 gsi_insert_seq_on_edge_immediate (pe
, stmts
);
6431 *ratio_name_ptr
= ratio_name
;
6433 /* Create: ratio_mult_vf = ratio << log2 (vf). */
6435 if (ratio_mult_vf_name_ptr
)
6437 ratio_mult_vf_name
= fold_build2 (LSHIFT_EXPR
, TREE_TYPE (ratio_name
),
6438 ratio_name
, log_vf
);
6439 if (!is_gimple_val (ratio_mult_vf_name
))
6441 var
= create_tmp_var (TREE_TYPE (ni_name
), "ratio_mult_vf");
6442 gimple
*stmts
= NULL
;
6443 ratio_mult_vf_name
= force_gimple_operand (ratio_mult_vf_name
, &stmts
,
6445 gsi_insert_seq_on_edge_immediate (pe
, stmts
);
6447 *ratio_mult_vf_name_ptr
= ratio_mult_vf_name
;
6454 /* Function vect_transform_loop.
6456 The analysis phase has determined that the loop is vectorizable.
6457 Vectorize the loop - created vectorized stmts to replace the scalar
6458 stmts in the loop, and update the loop exit condition. */
6461 vect_transform_loop (loop_vec_info loop_vinfo
)
6463 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6464 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
6465 int nbbs
= loop
->num_nodes
;
6468 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6470 bool slp_scheduled
= false;
6471 gimple
*stmt
, *pattern_stmt
;
6472 gimple_seq pattern_def_seq
= NULL
;
6473 gimple_stmt_iterator pattern_def_si
= gsi_none ();
6474 bool transform_pattern_stmt
= false;
6475 bool check_profitability
= false;
6477 /* Record number of iterations before we started tampering with the profile. */
6478 gcov_type expected_iterations
= expected_loop_iterations_unbounded (loop
);
6480 if (dump_enabled_p ())
6481 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vec_transform_loop ===\n");
6483 /* If profile is inprecise, we have chance to fix it up. */
6484 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
6485 expected_iterations
= LOOP_VINFO_INT_NITERS (loop_vinfo
);
6487 /* Use the more conservative vectorization threshold. If the number
6488 of iterations is constant assume the cost check has been performed
6489 by our caller. If the threshold makes all loops profitable that
6490 run at least the vectorization factor number of times checking
6491 is pointless, too. */
6492 th
= LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo
);
6493 if (th
>= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) - 1
6494 && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
6496 if (dump_enabled_p ())
6497 dump_printf_loc (MSG_NOTE
, vect_location
,
6498 "Profitability threshold is %d loop iterations.\n",
6500 check_profitability
= true;
6503 /* Version the loop first, if required, so the profitability check
6506 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
)
6507 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
6509 vect_loop_versioning (loop_vinfo
, th
, check_profitability
);
6510 check_profitability
= false;
6513 tree ni_name
= vect_build_loop_niters (loop_vinfo
);
6514 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
) = ni_name
;
6516 /* Peel the loop if there are data refs with unknown alignment.
6517 Only one data ref with unknown store is allowed. */
6519 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
))
6521 vect_do_peeling_for_alignment (loop_vinfo
, ni_name
,
6522 th
, check_profitability
);
6523 check_profitability
= false;
6524 /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
6526 ni_name
= NULL_TREE
;
6529 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
6530 compile time constant), or it is a constant that doesn't divide by the
6531 vectorization factor, then an epilog loop needs to be created.
6532 We therefore duplicate the loop: the original loop will be vectorized,
6533 and will compute the first (n/VF) iterations. The second copy of the loop
6534 will remain scalar and will compute the remaining (n%VF) iterations.
6535 (VF is the vectorization factor). */
6537 if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo
)
6538 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
))
6542 ni_name
= vect_build_loop_niters (loop_vinfo
);
6543 vect_generate_tmps_on_preheader (loop_vinfo
, ni_name
, &ratio_mult_vf
,
6545 vect_do_peeling_for_loop_bound (loop_vinfo
, ni_name
, ratio_mult_vf
,
6546 th
, check_profitability
);
6548 else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
6549 ratio
= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
)),
6550 LOOP_VINFO_INT_NITERS (loop_vinfo
) / vectorization_factor
);
6554 ni_name
= vect_build_loop_niters (loop_vinfo
);
6555 vect_generate_tmps_on_preheader (loop_vinfo
, ni_name
, NULL
, &ratio
);
6558 /* 1) Make sure the loop header has exactly two entries
6559 2) Make sure we have a preheader basic block. */
6561 gcc_assert (EDGE_COUNT (loop
->header
->preds
) == 2);
6563 split_edge (loop_preheader_edge (loop
));
6565 /* FORNOW: the vectorizer supports only loops which body consist
6566 of one basic block (header + empty latch). When the vectorizer will
6567 support more involved loop forms, the order by which the BBs are
6568 traversed need to be reconsidered. */
6570 for (i
= 0; i
< nbbs
; i
++)
6572 basic_block bb
= bbs
[i
];
6573 stmt_vec_info stmt_info
;
6575 for (gphi_iterator si
= gsi_start_phis (bb
); !gsi_end_p (si
);
6578 gphi
*phi
= si
.phi ();
6579 if (dump_enabled_p ())
6581 dump_printf_loc (MSG_NOTE
, vect_location
,
6582 "------>vectorizing phi: ");
6583 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
6584 dump_printf (MSG_NOTE
, "\n");
6586 stmt_info
= vinfo_for_stmt (phi
);
6590 if (MAY_HAVE_DEBUG_STMTS
&& !STMT_VINFO_LIVE_P (stmt_info
))
6591 vect_loop_kill_debug_uses (loop
, phi
);
6593 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
6594 && !STMT_VINFO_LIVE_P (stmt_info
))
6597 if (STMT_VINFO_VECTYPE (stmt_info
)
6598 && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
))
6599 != (unsigned HOST_WIDE_INT
) vectorization_factor
)
6600 && dump_enabled_p ())
6601 dump_printf_loc (MSG_NOTE
, vect_location
, "multiple-types.\n");
6603 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
6605 if (dump_enabled_p ())
6606 dump_printf_loc (MSG_NOTE
, vect_location
, "transform phi.\n");
6607 vect_transform_stmt (phi
, NULL
, NULL
, NULL
, NULL
);
6611 pattern_stmt
= NULL
;
6612 for (gimple_stmt_iterator si
= gsi_start_bb (bb
);
6613 !gsi_end_p (si
) || transform_pattern_stmt
;)
6617 if (transform_pattern_stmt
)
6618 stmt
= pattern_stmt
;
6621 stmt
= gsi_stmt (si
);
6622 /* During vectorization remove existing clobber stmts. */
6623 if (gimple_clobber_p (stmt
))
6625 unlink_stmt_vdef (stmt
);
6626 gsi_remove (&si
, true);
6627 release_defs (stmt
);
6632 if (dump_enabled_p ())
6634 dump_printf_loc (MSG_NOTE
, vect_location
,
6635 "------>vectorizing statement: ");
6636 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6637 dump_printf (MSG_NOTE
, "\n");
6640 stmt_info
= vinfo_for_stmt (stmt
);
6642 /* vector stmts created in the outer-loop during vectorization of
6643 stmts in an inner-loop may not have a stmt_info, and do not
6644 need to be vectorized. */
6651 if (MAY_HAVE_DEBUG_STMTS
&& !STMT_VINFO_LIVE_P (stmt_info
))
6652 vect_loop_kill_debug_uses (loop
, stmt
);
6654 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
6655 && !STMT_VINFO_LIVE_P (stmt_info
))
6657 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6658 && (pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
))
6659 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
6660 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
6662 stmt
= pattern_stmt
;
6663 stmt_info
= vinfo_for_stmt (stmt
);
6671 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6672 && (pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
))
6673 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
6674 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
6675 transform_pattern_stmt
= true;
6677 /* If pattern statement has def stmts, vectorize them too. */
6678 if (is_pattern_stmt_p (stmt_info
))
6680 if (pattern_def_seq
== NULL
)
6682 pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
);
6683 pattern_def_si
= gsi_start (pattern_def_seq
);
6685 else if (!gsi_end_p (pattern_def_si
))
6686 gsi_next (&pattern_def_si
);
6687 if (pattern_def_seq
!= NULL
)
6689 gimple
*pattern_def_stmt
= NULL
;
6690 stmt_vec_info pattern_def_stmt_info
= NULL
;
6692 while (!gsi_end_p (pattern_def_si
))
6694 pattern_def_stmt
= gsi_stmt (pattern_def_si
);
6695 pattern_def_stmt_info
6696 = vinfo_for_stmt (pattern_def_stmt
);
6697 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
6698 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
6700 gsi_next (&pattern_def_si
);
6703 if (!gsi_end_p (pattern_def_si
))
6705 if (dump_enabled_p ())
6707 dump_printf_loc (MSG_NOTE
, vect_location
,
6708 "==> vectorizing pattern def "
6710 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
6711 pattern_def_stmt
, 0);
6712 dump_printf (MSG_NOTE
, "\n");
6715 stmt
= pattern_def_stmt
;
6716 stmt_info
= pattern_def_stmt_info
;
6720 pattern_def_si
= gsi_none ();
6721 transform_pattern_stmt
= false;
6725 transform_pattern_stmt
= false;
6728 if (STMT_VINFO_VECTYPE (stmt_info
))
6732 TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
6733 if (!STMT_SLP_TYPE (stmt_info
)
6734 && nunits
!= (unsigned int) vectorization_factor
6735 && dump_enabled_p ())
6736 /* For SLP VF is set according to unrolling factor, and not
6737 to vector size, hence for SLP this print is not valid. */
6738 dump_printf_loc (MSG_NOTE
, vect_location
, "multiple-types.\n");
6741 /* SLP. Schedule all the SLP instances when the first SLP stmt is
6743 if (STMT_SLP_TYPE (stmt_info
))
6747 slp_scheduled
= true;
6749 if (dump_enabled_p ())
6750 dump_printf_loc (MSG_NOTE
, vect_location
,
6751 "=== scheduling SLP instances ===\n");
6753 vect_schedule_slp (loop_vinfo
);
6756 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
6757 if (!vinfo_for_stmt (stmt
) || PURE_SLP_STMT (stmt_info
))
6759 if (!transform_pattern_stmt
&& gsi_end_p (pattern_def_si
))
6761 pattern_def_seq
= NULL
;
6768 /* -------- vectorize statement ------------ */
6769 if (dump_enabled_p ())
6770 dump_printf_loc (MSG_NOTE
, vect_location
, "transform statement.\n");
6772 grouped_store
= false;
6773 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, NULL
, NULL
);
6776 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6778 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
6779 interleaving chain was completed - free all the stores in
6782 vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info
));
6786 /* Free the attached stmt_vec_info and remove the stmt. */
6787 gimple
*store
= gsi_stmt (si
);
6788 free_stmt_vec_info (store
);
6789 unlink_stmt_vdef (store
);
6790 gsi_remove (&si
, true);
6791 release_defs (store
);
6794 /* Stores can only appear at the end of pattern statements. */
6795 gcc_assert (!transform_pattern_stmt
);
6796 pattern_def_seq
= NULL
;
6798 else if (!transform_pattern_stmt
&& gsi_end_p (pattern_def_si
))
6800 pattern_def_seq
= NULL
;
6806 slpeel_make_loop_iterate_ntimes (loop
, ratio
);
6808 /* Reduce loop iterations by the vectorization factor. */
6809 scale_loop_profile (loop
, GCOV_COMPUTE_SCALE (1, vectorization_factor
),
6810 expected_iterations
/ vectorization_factor
);
6811 loop
->nb_iterations_upper_bound
6812 = wi::udiv_floor (loop
->nb_iterations_upper_bound
, vectorization_factor
);
6813 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
6814 && loop
->nb_iterations_upper_bound
!= 0)
6815 loop
->nb_iterations_upper_bound
= loop
->nb_iterations_upper_bound
- 1;
6816 if (loop
->any_estimate
)
6818 loop
->nb_iterations_estimate
6819 = wi::udiv_floor (loop
->nb_iterations_estimate
, vectorization_factor
);
6820 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
6821 && loop
->nb_iterations_estimate
!= 0)
6822 loop
->nb_iterations_estimate
= loop
->nb_iterations_estimate
- 1;
6825 if (dump_enabled_p ())
6827 dump_printf_loc (MSG_NOTE
, vect_location
,
6828 "LOOP VECTORIZED\n");
6830 dump_printf_loc (MSG_NOTE
, vect_location
,
6831 "OUTER LOOP VECTORIZED\n");
6832 dump_printf (MSG_NOTE
, "\n");