2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com> and
4 Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "double-int.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
40 #include "hard-reg-set.h"
42 #include "dominance.h"
45 #include "basic-block.h"
46 #include "gimple-pretty-print.h"
47 #include "tree-ssa-alias.h"
48 #include "internal-fn.h"
49 #include "gimple-expr.h"
53 #include "gimple-iterator.h"
54 #include "gimplify-me.h"
55 #include "gimple-ssa.h"
56 #include "tree-phinodes.h"
57 #include "ssa-iterators.h"
58 #include "stringpool.h"
59 #include "tree-ssanames.h"
60 #include "tree-ssa-loop-ivopts.h"
61 #include "tree-ssa-loop-manip.h"
62 #include "tree-ssa-loop-niter.h"
63 #include "tree-pass.h"
68 #include "statistics.h"
70 #include "fixed-value.h"
71 #include "insn-config.h"
81 #include "insn-codes.h"
84 #include "diagnostic-core.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
87 #include "tree-vectorizer.h"
90 /* Loop Vectorization Pass.
92 This pass tries to vectorize loops.
94 For example, the vectorizer transforms the following simple loop:
96 short a[N]; short b[N]; short c[N]; int i;
102 as if it was manually vectorized by rewriting the source code into:
104 typedef int __attribute__((mode(V8HI))) v8hi;
105 short a[N]; short b[N]; short c[N]; int i;
106 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
109 for (i=0; i<N/8; i++){
116 The main entry to this pass is vectorize_loops(), in which
117 the vectorizer applies a set of analyses on a given set of loops,
118 followed by the actual vectorization transformation for the loops that
119 had successfully passed the analysis phase.
120 Throughout this pass we make a distinction between two types of
121 data: scalars (which are represented by SSA_NAMES), and memory references
122 ("data-refs"). These two types of data require different handling both
123 during analysis and transformation. The types of data-refs that the
124 vectorizer currently supports are ARRAY_REFS which base is an array DECL
125 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
126 accesses are required to have a simple (consecutive) access pattern.
130 The driver for the analysis phase is vect_analyze_loop().
131 It applies a set of analyses, some of which rely on the scalar evolution
132 analyzer (scev) developed by Sebastian Pop.
134 During the analysis phase the vectorizer records some information
135 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
136 loop, as well as general information about the loop as a whole, which is
137 recorded in a "loop_vec_info" struct attached to each loop.
139 Transformation phase:
140 =====================
141 The loop transformation phase scans all the stmts in the loop, and
142 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
143 the loop that needs to be vectorized. It inserts the vector code sequence
144 just before the scalar stmt S, and records a pointer to the vector code
145 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
146 attached to S). This pointer will be used for the vectorization of following
147 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
148 otherwise, we rely on dead code elimination for removing it.
150 For example, say stmt S1 was vectorized into stmt VS1:
153 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
156 To vectorize stmt S2, the vectorizer first finds the stmt that defines
157 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
158 vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
159 resulting sequence would be:
162 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
164 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
166 Operands that are not SSA_NAMEs, are data-refs that appear in
167 load/store operations (like 'x[i]' in S1), and are handled differently.
171 Currently the only target specific information that is used is the
172 size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
173 Targets that can support different sizes of vectors, for now will need
174 to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
175 flexibility will be added in the future.
177 Since we only vectorize operations which vector form can be
178 expressed using existing tree codes, to verify that an operation is
179 supported, the vectorizer checks the relevant optab at the relevant
180 machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
181 the value found is CODE_FOR_nothing, then there's no target support, and
182 we can't vectorize the stmt.
184 For additional information on this project see:
185 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
188 static void vect_estimate_min_profitable_iters (loop_vec_info
, int *, int *);
190 /* Function vect_determine_vectorization_factor
192 Determine the vectorization factor (VF). VF is the number of data elements
193 that are operated upon in parallel in a single iteration of the vectorized
194 loop. For example, when vectorizing a loop that operates on 4byte elements,
195 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
196 elements can fit in a single vector register.
198 We currently support vectorization of loops in which all types operated upon
199 are of the same size. Therefore this function currently sets VF according to
200 the size of the types operated upon, and fails if there are multiple sizes
203 VF is also the factor by which the loop iterations are strip-mined, e.g.:
210 for (i=0; i<N; i+=VF){
211 a[i:VF] = b[i:VF] + c[i:VF];
216 vect_determine_vectorization_factor (loop_vec_info loop_vinfo
)
218 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
219 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
220 int nbbs
= loop
->num_nodes
;
221 unsigned int vectorization_factor
= 0;
226 stmt_vec_info stmt_info
;
229 gimple stmt
, pattern_stmt
= NULL
;
230 gimple_seq pattern_def_seq
= NULL
;
231 gimple_stmt_iterator pattern_def_si
= gsi_none ();
232 bool analyze_pattern_stmt
= false;
234 if (dump_enabled_p ())
235 dump_printf_loc (MSG_NOTE
, vect_location
,
236 "=== vect_determine_vectorization_factor ===\n");
238 for (i
= 0; i
< nbbs
; i
++)
240 basic_block bb
= bbs
[i
];
242 for (gphi_iterator si
= gsi_start_phis (bb
); !gsi_end_p (si
);
246 stmt_info
= vinfo_for_stmt (phi
);
247 if (dump_enabled_p ())
249 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining phi: ");
250 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
251 dump_printf (MSG_NOTE
, "\n");
254 gcc_assert (stmt_info
);
256 if (STMT_VINFO_RELEVANT_P (stmt_info
))
258 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info
));
259 scalar_type
= TREE_TYPE (PHI_RESULT (phi
));
261 if (dump_enabled_p ())
263 dump_printf_loc (MSG_NOTE
, vect_location
,
264 "get vectype for scalar type: ");
265 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
266 dump_printf (MSG_NOTE
, "\n");
269 vectype
= get_vectype_for_scalar_type (scalar_type
);
272 if (dump_enabled_p ())
274 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
275 "not vectorized: unsupported "
277 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
279 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
283 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
285 if (dump_enabled_p ())
287 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
288 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
289 dump_printf (MSG_NOTE
, "\n");
292 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = %d\n",
297 if (!vectorization_factor
298 || (nunits
> vectorization_factor
))
299 vectorization_factor
= nunits
;
303 for (gimple_stmt_iterator si
= gsi_start_bb (bb
);
304 !gsi_end_p (si
) || analyze_pattern_stmt
;)
308 if (analyze_pattern_stmt
)
311 stmt
= gsi_stmt (si
);
313 stmt_info
= vinfo_for_stmt (stmt
);
315 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE
, vect_location
,
318 "==> examining statement: ");
319 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
320 dump_printf (MSG_NOTE
, "\n");
323 gcc_assert (stmt_info
);
325 /* Skip stmts which do not need to be vectorized. */
326 if ((!STMT_VINFO_RELEVANT_P (stmt_info
)
327 && !STMT_VINFO_LIVE_P (stmt_info
))
328 || gimple_clobber_p (stmt
))
330 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
331 && (pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
))
332 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
333 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
336 stmt_info
= vinfo_for_stmt (pattern_stmt
);
337 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE
, vect_location
,
340 "==> examining pattern statement: ");
341 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
342 dump_printf (MSG_NOTE
, "\n");
347 if (dump_enabled_p ())
348 dump_printf_loc (MSG_NOTE
, vect_location
, "skip.\n");
353 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
354 && (pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
))
355 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
356 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
357 analyze_pattern_stmt
= true;
359 /* If a pattern statement has def stmts, analyze them too. */
360 if (is_pattern_stmt_p (stmt_info
))
362 if (pattern_def_seq
== NULL
)
364 pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
);
365 pattern_def_si
= gsi_start (pattern_def_seq
);
367 else if (!gsi_end_p (pattern_def_si
))
368 gsi_next (&pattern_def_si
);
369 if (pattern_def_seq
!= NULL
)
371 gimple pattern_def_stmt
= NULL
;
372 stmt_vec_info pattern_def_stmt_info
= NULL
;
374 while (!gsi_end_p (pattern_def_si
))
376 pattern_def_stmt
= gsi_stmt (pattern_def_si
);
377 pattern_def_stmt_info
378 = vinfo_for_stmt (pattern_def_stmt
);
379 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
380 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
382 gsi_next (&pattern_def_si
);
385 if (!gsi_end_p (pattern_def_si
))
387 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE
, vect_location
,
390 "==> examining pattern def stmt: ");
391 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
392 pattern_def_stmt
, 0);
393 dump_printf (MSG_NOTE
, "\n");
396 stmt
= pattern_def_stmt
;
397 stmt_info
= pattern_def_stmt_info
;
401 pattern_def_si
= gsi_none ();
402 analyze_pattern_stmt
= false;
406 analyze_pattern_stmt
= false;
409 if (gimple_get_lhs (stmt
) == NULL_TREE
410 /* MASK_STORE has no lhs, but is ok. */
411 && (!is_gimple_call (stmt
)
412 || !gimple_call_internal_p (stmt
)
413 || gimple_call_internal_fn (stmt
) != IFN_MASK_STORE
))
415 if (is_gimple_call (stmt
))
417 /* Ignore calls with no lhs. These must be calls to
418 #pragma omp simd functions, and what vectorization factor
419 it really needs can't be determined until
420 vectorizable_simd_clone_call. */
421 if (!analyze_pattern_stmt
&& gsi_end_p (pattern_def_si
))
423 pattern_def_seq
= NULL
;
428 if (dump_enabled_p ())
430 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
431 "not vectorized: irregular stmt.");
432 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
,
434 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
439 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
441 if (dump_enabled_p ())
443 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
444 "not vectorized: vector stmt in loop:");
445 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
446 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
451 if (STMT_VINFO_VECTYPE (stmt_info
))
453 /* The only case when a vectype had been already set is for stmts
454 that contain a dataref, or for "pattern-stmts" (stmts
455 generated by the vectorizer to represent/replace a certain
457 gcc_assert (STMT_VINFO_DATA_REF (stmt_info
)
458 || is_pattern_stmt_p (stmt_info
)
459 || !gsi_end_p (pattern_def_si
));
460 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
464 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info
));
465 if (is_gimple_call (stmt
)
466 && gimple_call_internal_p (stmt
)
467 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
468 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
470 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
471 if (dump_enabled_p ())
473 dump_printf_loc (MSG_NOTE
, vect_location
,
474 "get vectype for scalar type: ");
475 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
476 dump_printf (MSG_NOTE
, "\n");
478 vectype
= get_vectype_for_scalar_type (scalar_type
);
481 if (dump_enabled_p ())
483 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
484 "not vectorized: unsupported "
486 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
488 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
493 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
495 if (dump_enabled_p ())
497 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
498 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
499 dump_printf (MSG_NOTE
, "\n");
503 /* The vectorization factor is according to the smallest
504 scalar type (or the largest vector size, but we only
505 support one vector size per loop). */
506 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
,
508 if (dump_enabled_p ())
510 dump_printf_loc (MSG_NOTE
, vect_location
,
511 "get vectype for scalar type: ");
512 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
513 dump_printf (MSG_NOTE
, "\n");
515 vf_vectype
= get_vectype_for_scalar_type (scalar_type
);
518 if (dump_enabled_p ())
520 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
521 "not vectorized: unsupported data-type ");
522 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
524 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
529 if ((GET_MODE_SIZE (TYPE_MODE (vectype
))
530 != GET_MODE_SIZE (TYPE_MODE (vf_vectype
))))
532 if (dump_enabled_p ())
534 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
535 "not vectorized: different sized vector "
536 "types in statement, ");
537 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
539 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
540 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
542 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
547 if (dump_enabled_p ())
549 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
550 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vf_vectype
);
551 dump_printf (MSG_NOTE
, "\n");
554 nunits
= TYPE_VECTOR_SUBPARTS (vf_vectype
);
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = %d\n", nunits
);
557 if (!vectorization_factor
558 || (nunits
> vectorization_factor
))
559 vectorization_factor
= nunits
;
561 if (!analyze_pattern_stmt
&& gsi_end_p (pattern_def_si
))
563 pattern_def_seq
= NULL
;
569 /* TODO: Analyze cost. Decide if worth while to vectorize. */
570 if (dump_enabled_p ())
571 dump_printf_loc (MSG_NOTE
, vect_location
, "vectorization factor = %d\n",
572 vectorization_factor
);
573 if (vectorization_factor
<= 1)
575 if (dump_enabled_p ())
576 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
577 "not vectorized: unsupported data-type\n");
580 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
586 /* Function vect_is_simple_iv_evolution.
588 FORNOW: A simple evolution of an induction variables in the loop is
589 considered a polynomial evolution. */
592 vect_is_simple_iv_evolution (unsigned loop_nb
, tree access_fn
, tree
* init
,
597 tree evolution_part
= evolution_part_in_loop_num (access_fn
, loop_nb
);
600 /* When there is no evolution in this loop, the evolution function
602 if (evolution_part
== NULL_TREE
)
605 /* When the evolution is a polynomial of degree >= 2
606 the evolution function is not "simple". */
607 if (tree_is_chrec (evolution_part
))
610 step_expr
= evolution_part
;
611 init_expr
= unshare_expr (initial_condition_in_loop_num (access_fn
, loop_nb
));
613 if (dump_enabled_p ())
615 dump_printf_loc (MSG_NOTE
, vect_location
, "step: ");
616 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, step_expr
);
617 dump_printf (MSG_NOTE
, ", init: ");
618 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, init_expr
);
619 dump_printf (MSG_NOTE
, "\n");
625 if (TREE_CODE (step_expr
) != INTEGER_CST
626 && (TREE_CODE (step_expr
) != SSA_NAME
627 || ((bb
= gimple_bb (SSA_NAME_DEF_STMT (step_expr
)))
628 && flow_bb_inside_loop_p (get_loop (cfun
, loop_nb
), bb
))
629 || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr
))
630 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr
))
631 || !flag_associative_math
)))
632 && (TREE_CODE (step_expr
) != REAL_CST
633 || !flag_associative_math
))
635 if (dump_enabled_p ())
636 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
644 /* Function vect_analyze_scalar_cycles_1.
646 Examine the cross iteration def-use cycles of scalar variables
647 in LOOP. LOOP_VINFO represents the loop that is now being
648 considered for vectorization (can be LOOP, or an outer-loop
652 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo
, struct loop
*loop
)
654 basic_block bb
= loop
->header
;
656 auto_vec
<gimple
, 64> worklist
;
660 if (dump_enabled_p ())
661 dump_printf_loc (MSG_NOTE
, vect_location
,
662 "=== vect_analyze_scalar_cycles ===\n");
664 /* First - identify all inductions. Reduction detection assumes that all the
665 inductions have been identified, therefore, this order must not be
667 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
669 gphi
*phi
= gsi
.phi ();
670 tree access_fn
= NULL
;
671 tree def
= PHI_RESULT (phi
);
672 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
674 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE
, vect_location
, "Analyze phi: ");
677 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
678 dump_printf (MSG_NOTE
, "\n");
681 /* Skip virtual phi's. The data dependences that are associated with
682 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
683 if (virtual_operand_p (def
))
686 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_unknown_def_type
;
688 /* Analyze the evolution function. */
689 access_fn
= analyze_scalar_evolution (loop
, def
);
692 STRIP_NOPS (access_fn
);
693 if (dump_enabled_p ())
695 dump_printf_loc (MSG_NOTE
, vect_location
,
696 "Access function of PHI: ");
697 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, access_fn
);
698 dump_printf (MSG_NOTE
, "\n");
700 STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo
)
701 = evolution_part_in_loop_num (access_fn
, loop
->num
);
705 || !vect_is_simple_iv_evolution (loop
->num
, access_fn
, &init
, &step
)
706 || (LOOP_VINFO_LOOP (loop_vinfo
) != loop
707 && TREE_CODE (step
) != INTEGER_CST
))
709 worklist
.safe_push (phi
);
713 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo
) != NULL_TREE
);
715 if (dump_enabled_p ())
716 dump_printf_loc (MSG_NOTE
, vect_location
, "Detected induction.\n");
717 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_induction_def
;
721 /* Second - identify all reductions and nested cycles. */
722 while (worklist
.length () > 0)
724 gimple phi
= worklist
.pop ();
725 tree def
= PHI_RESULT (phi
);
726 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
730 if (dump_enabled_p ())
732 dump_printf_loc (MSG_NOTE
, vect_location
, "Analyze phi: ");
733 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
734 dump_printf (MSG_NOTE
, "\n");
737 gcc_assert (!virtual_operand_p (def
)
738 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_unknown_def_type
);
740 nested_cycle
= (loop
!= LOOP_VINFO_LOOP (loop_vinfo
));
741 reduc_stmt
= vect_force_simple_reduction (loop_vinfo
, phi
, !nested_cycle
,
747 if (dump_enabled_p ())
748 dump_printf_loc (MSG_NOTE
, vect_location
,
749 "Detected double reduction.\n");
751 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_double_reduction_def
;
752 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
753 vect_double_reduction_def
;
759 if (dump_enabled_p ())
760 dump_printf_loc (MSG_NOTE
, vect_location
,
761 "Detected vectorizable nested cycle.\n");
763 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_nested_cycle
;
764 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
769 if (dump_enabled_p ())
770 dump_printf_loc (MSG_NOTE
, vect_location
,
771 "Detected reduction.\n");
773 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_reduction_def
;
774 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
776 /* Store the reduction cycles for possible vectorization in
778 LOOP_VINFO_REDUCTIONS (loop_vinfo
).safe_push (reduc_stmt
);
783 if (dump_enabled_p ())
784 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
785 "Unknown def-use cycle pattern.\n");
790 /* Function vect_analyze_scalar_cycles.
792 Examine the cross iteration def-use cycles of scalar variables, by
793 analyzing the loop-header PHIs of scalar variables. Classify each
794 cycle as one of the following: invariant, induction, reduction, unknown.
795 We do that for the loop represented by LOOP_VINFO, and also to its
796 inner-loop, if exists.
797 Examples for scalar cycles:
812 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo
)
814 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
816 vect_analyze_scalar_cycles_1 (loop_vinfo
, loop
);
818 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
819 Reductions in such inner-loop therefore have different properties than
820 the reductions in the nest that gets vectorized:
821 1. When vectorized, they are executed in the same order as in the original
822 scalar loop, so we can't change the order of computation when
824 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
825 current checks are too strict. */
828 vect_analyze_scalar_cycles_1 (loop_vinfo
, loop
->inner
);
832 /* Function vect_get_loop_niters.
834 Determine how many iterations the loop is executed and place it
835 in NUMBER_OF_ITERATIONS. Place the number of latch iterations
836 in NUMBER_OF_ITERATIONSM1.
838 Return the loop exit condition. */
842 vect_get_loop_niters (struct loop
*loop
, tree
*number_of_iterations
,
843 tree
*number_of_iterationsm1
)
847 if (dump_enabled_p ())
848 dump_printf_loc (MSG_NOTE
, vect_location
,
849 "=== get_loop_niters ===\n");
851 niters
= number_of_latch_executions (loop
);
852 *number_of_iterationsm1
= niters
;
854 /* We want the number of loop header executions which is the number
855 of latch executions plus one.
856 ??? For UINT_MAX latch executions this number overflows to zero
857 for loops like do { n++; } while (n != 0); */
858 if (niters
&& !chrec_contains_undetermined (niters
))
859 niters
= fold_build2 (PLUS_EXPR
, TREE_TYPE (niters
), unshare_expr (niters
),
860 build_int_cst (TREE_TYPE (niters
), 1));
861 *number_of_iterations
= niters
;
863 return get_loop_exit_condition (loop
);
867 /* Function bb_in_loop_p
869 Used as predicate for dfs order traversal of the loop bbs. */
872 bb_in_loop_p (const_basic_block bb
, const void *data
)
874 const struct loop
*const loop
= (const struct loop
*)data
;
875 if (flow_bb_inside_loop_p (loop
, bb
))
881 /* Function new_loop_vec_info.
883 Create and initialize a new loop_vec_info struct for LOOP, as well as
884 stmt_vec_info structs for all the stmts in LOOP. */
887 new_loop_vec_info (struct loop
*loop
)
891 gimple_stmt_iterator si
;
892 unsigned int i
, nbbs
;
894 res
= (loop_vec_info
) xcalloc (1, sizeof (struct _loop_vec_info
));
895 LOOP_VINFO_LOOP (res
) = loop
;
897 bbs
= get_loop_body (loop
);
899 /* Create/Update stmt_info for all stmts in the loop. */
900 for (i
= 0; i
< loop
->num_nodes
; i
++)
902 basic_block bb
= bbs
[i
];
904 /* BBs in a nested inner-loop will have been already processed (because
905 we will have called vect_analyze_loop_form for any nested inner-loop).
906 Therefore, for stmts in an inner-loop we just want to update the
907 STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
908 loop_info of the outer-loop we are currently considering to vectorize
909 (instead of the loop_info of the inner-loop).
910 For stmts in other BBs we need to create a stmt_info from scratch. */
911 if (bb
->loop_father
!= loop
)
914 gcc_assert (loop
->inner
&& bb
->loop_father
== loop
->inner
);
915 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
917 gimple phi
= gsi_stmt (si
);
918 stmt_vec_info stmt_info
= vinfo_for_stmt (phi
);
919 loop_vec_info inner_loop_vinfo
=
920 STMT_VINFO_LOOP_VINFO (stmt_info
);
921 gcc_assert (loop
->inner
== LOOP_VINFO_LOOP (inner_loop_vinfo
));
922 STMT_VINFO_LOOP_VINFO (stmt_info
) = res
;
924 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
926 gimple stmt
= gsi_stmt (si
);
927 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
928 loop_vec_info inner_loop_vinfo
=
929 STMT_VINFO_LOOP_VINFO (stmt_info
);
930 gcc_assert (loop
->inner
== LOOP_VINFO_LOOP (inner_loop_vinfo
));
931 STMT_VINFO_LOOP_VINFO (stmt_info
) = res
;
936 /* bb in current nest. */
937 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
939 gimple phi
= gsi_stmt (si
);
940 gimple_set_uid (phi
, 0);
941 set_vinfo_for_stmt (phi
, new_stmt_vec_info (phi
, res
, NULL
));
944 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
946 gimple stmt
= gsi_stmt (si
);
947 gimple_set_uid (stmt
, 0);
948 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, res
, NULL
));
953 /* CHECKME: We want to visit all BBs before their successors (except for
954 latch blocks, for which this assertion wouldn't hold). In the simple
955 case of the loop forms we allow, a dfs order of the BBs would the same
956 as reversed postorder traversal, so we are safe. */
959 bbs
= XCNEWVEC (basic_block
, loop
->num_nodes
);
960 nbbs
= dfs_enumerate_from (loop
->header
, 0, bb_in_loop_p
,
961 bbs
, loop
->num_nodes
, loop
);
962 gcc_assert (nbbs
== loop
->num_nodes
);
964 LOOP_VINFO_BBS (res
) = bbs
;
965 LOOP_VINFO_NITERSM1 (res
) = NULL
;
966 LOOP_VINFO_NITERS (res
) = NULL
;
967 LOOP_VINFO_NITERS_UNCHANGED (res
) = NULL
;
968 LOOP_VINFO_COST_MODEL_MIN_ITERS (res
) = 0;
969 LOOP_VINFO_COST_MODEL_THRESHOLD (res
) = 0;
970 LOOP_VINFO_VECTORIZABLE_P (res
) = 0;
971 LOOP_VINFO_PEELING_FOR_ALIGNMENT (res
) = 0;
972 LOOP_VINFO_VECT_FACTOR (res
) = 0;
973 LOOP_VINFO_LOOP_NEST (res
).create (3);
974 LOOP_VINFO_DATAREFS (res
).create (10);
975 LOOP_VINFO_DDRS (res
).create (10 * 10);
976 LOOP_VINFO_UNALIGNED_DR (res
) = NULL
;
977 LOOP_VINFO_MAY_MISALIGN_STMTS (res
).create (
978 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS
));
979 LOOP_VINFO_MAY_ALIAS_DDRS (res
).create (
980 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
));
981 LOOP_VINFO_GROUPED_STORES (res
).create (10);
982 LOOP_VINFO_REDUCTIONS (res
).create (10);
983 LOOP_VINFO_REDUCTION_CHAINS (res
).create (10);
984 LOOP_VINFO_SLP_INSTANCES (res
).create (10);
985 LOOP_VINFO_SLP_UNROLLING_FACTOR (res
) = 1;
986 LOOP_VINFO_TARGET_COST_DATA (res
) = init_cost (loop
);
987 LOOP_VINFO_PEELING_FOR_GAPS (res
) = false;
988 LOOP_VINFO_PEELING_FOR_NITER (res
) = false;
989 LOOP_VINFO_OPERANDS_SWAPPED (res
) = false;
995 /* Function destroy_loop_vec_info.
997 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
998 stmts in the loop. */
1001 destroy_loop_vec_info (loop_vec_info loop_vinfo
, bool clean_stmts
)
1006 gimple_stmt_iterator si
;
1008 vec
<slp_instance
> slp_instances
;
1009 slp_instance instance
;
1015 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1017 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1018 nbbs
= clean_stmts
? loop
->num_nodes
: 0;
1019 swapped
= LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo
);
1021 for (j
= 0; j
< nbbs
; j
++)
1023 basic_block bb
= bbs
[j
];
1024 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
1025 free_stmt_vec_info (gsi_stmt (si
));
1027 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); )
1029 gimple stmt
= gsi_stmt (si
);
1031 /* We may have broken canonical form by moving a constant
1032 into RHS1 of a commutative op. Fix such occurrences. */
1033 if (swapped
&& is_gimple_assign (stmt
))
1035 enum tree_code code
= gimple_assign_rhs_code (stmt
);
1037 if ((code
== PLUS_EXPR
1038 || code
== POINTER_PLUS_EXPR
1039 || code
== MULT_EXPR
)
1040 && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt
)))
1041 swap_ssa_operands (stmt
,
1042 gimple_assign_rhs1_ptr (stmt
),
1043 gimple_assign_rhs2_ptr (stmt
));
1046 /* Free stmt_vec_info. */
1047 free_stmt_vec_info (stmt
);
1052 free (LOOP_VINFO_BBS (loop_vinfo
));
1053 vect_destroy_datarefs (loop_vinfo
, NULL
);
1054 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo
));
1055 LOOP_VINFO_LOOP_NEST (loop_vinfo
).release ();
1056 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
).release ();
1057 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
).release ();
1058 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1059 FOR_EACH_VEC_ELT (slp_instances
, j
, instance
)
1060 vect_free_slp_instance (instance
);
1062 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
).release ();
1063 LOOP_VINFO_GROUPED_STORES (loop_vinfo
).release ();
1064 LOOP_VINFO_REDUCTIONS (loop_vinfo
).release ();
1065 LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
).release ();
1067 delete LOOP_VINFO_PEELING_HTAB (loop_vinfo
);
1068 LOOP_VINFO_PEELING_HTAB (loop_vinfo
) = NULL
;
1070 destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
));
1077 /* Function vect_analyze_loop_1.
1079 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1080 for it. The different analyses will record information in the
1081 loop_vec_info struct. This is a subset of the analyses applied in
1082 vect_analyze_loop, to be applied on an inner-loop nested in the loop
1083 that is now considered for (outer-loop) vectorization. */
1085 static loop_vec_info
1086 vect_analyze_loop_1 (struct loop
*loop
)
1088 loop_vec_info loop_vinfo
;
1090 if (dump_enabled_p ())
1091 dump_printf_loc (MSG_NOTE
, vect_location
,
1092 "===== analyze_loop_nest_1 =====\n");
1094 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
1096 loop_vinfo
= vect_analyze_loop_form (loop
);
1099 if (dump_enabled_p ())
1100 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1101 "bad inner-loop form.\n");
1109 /* Function vect_analyze_loop_form.
1111 Verify that certain CFG restrictions hold, including:
1112 - the loop has a pre-header
1113 - the loop has a single entry and exit
1114 - the loop exit condition is simple enough, and the number of iterations
1115 can be analyzed (a countable loop). */
1118 vect_analyze_loop_form (struct loop
*loop
)
1120 loop_vec_info loop_vinfo
;
1122 tree number_of_iterations
= NULL
, number_of_iterationsm1
= NULL
;
1123 loop_vec_info inner_loop_vinfo
= NULL
;
1125 if (dump_enabled_p ())
1126 dump_printf_loc (MSG_NOTE
, vect_location
,
1127 "=== vect_analyze_loop_form ===\n");
1129 /* Different restrictions apply when we are considering an inner-most loop,
1130 vs. an outer (nested) loop.
1131 (FORNOW. May want to relax some of these restrictions in the future). */
1135 /* Inner-most loop. We currently require that the number of BBs is
1136 exactly 2 (the header and latch). Vectorizable inner-most loops
1147 if (loop
->num_nodes
!= 2)
1149 if (dump_enabled_p ())
1150 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1151 "not vectorized: control flow in loop.\n");
1155 if (empty_block_p (loop
->header
))
1157 if (dump_enabled_p ())
1158 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1159 "not vectorized: empty loop.\n");
1165 struct loop
*innerloop
= loop
->inner
;
1168 /* Nested loop. We currently require that the loop is doubly-nested,
1169 contains a single inner loop, and the number of BBs is exactly 5.
1170 Vectorizable outer-loops look like this:
1182 The inner-loop has the properties expected of inner-most loops
1183 as described above. */
1185 if ((loop
->inner
)->inner
|| (loop
->inner
)->next
)
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1189 "not vectorized: multiple nested loops.\n");
1193 /* Analyze the inner-loop. */
1194 inner_loop_vinfo
= vect_analyze_loop_1 (loop
->inner
);
1195 if (!inner_loop_vinfo
)
1197 if (dump_enabled_p ())
1198 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1199 "not vectorized: Bad inner loop.\n");
1203 if (!expr_invariant_in_loop_p (loop
,
1204 LOOP_VINFO_NITERS (inner_loop_vinfo
)))
1206 if (dump_enabled_p ())
1207 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1208 "not vectorized: inner-loop count not"
1210 destroy_loop_vec_info (inner_loop_vinfo
, true);
1214 if (loop
->num_nodes
!= 5)
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1218 "not vectorized: control flow in loop.\n");
1219 destroy_loop_vec_info (inner_loop_vinfo
, true);
1223 gcc_assert (EDGE_COUNT (innerloop
->header
->preds
) == 2);
1224 entryedge
= EDGE_PRED (innerloop
->header
, 0);
1225 if (EDGE_PRED (innerloop
->header
, 0)->src
== innerloop
->latch
)
1226 entryedge
= EDGE_PRED (innerloop
->header
, 1);
1228 if (entryedge
->src
!= loop
->header
1229 || !single_exit (innerloop
)
1230 || single_exit (innerloop
)->dest
!= EDGE_PRED (loop
->latch
, 0)->src
)
1232 if (dump_enabled_p ())
1233 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1234 "not vectorized: unsupported outerloop form.\n");
1235 destroy_loop_vec_info (inner_loop_vinfo
, true);
1239 if (dump_enabled_p ())
1240 dump_printf_loc (MSG_NOTE
, vect_location
,
1241 "Considering outer-loop vectorization.\n");
1244 if (!single_exit (loop
)
1245 || EDGE_COUNT (loop
->header
->preds
) != 2)
1247 if (dump_enabled_p ())
1249 if (!single_exit (loop
))
1250 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1251 "not vectorized: multiple exits.\n");
1252 else if (EDGE_COUNT (loop
->header
->preds
) != 2)
1253 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1254 "not vectorized: too many incoming edges.\n");
1256 if (inner_loop_vinfo
)
1257 destroy_loop_vec_info (inner_loop_vinfo
, true);
1261 /* We assume that the loop exit condition is at the end of the loop. i.e,
1262 that the loop is represented as a do-while (with a proper if-guard
1263 before the loop if needed), where the loop header contains all the
1264 executable statements, and the latch is empty. */
1265 if (!empty_block_p (loop
->latch
)
1266 || !gimple_seq_empty_p (phi_nodes (loop
->latch
)))
1268 if (dump_enabled_p ())
1269 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1270 "not vectorized: latch block not empty.\n");
1271 if (inner_loop_vinfo
)
1272 destroy_loop_vec_info (inner_loop_vinfo
, true);
1276 /* Make sure there exists a single-predecessor exit bb: */
1277 if (!single_pred_p (single_exit (loop
)->dest
))
1279 edge e
= single_exit (loop
);
1280 if (!(e
->flags
& EDGE_ABNORMAL
))
1282 split_loop_exit_edge (e
);
1283 if (dump_enabled_p ())
1284 dump_printf (MSG_NOTE
, "split exit edge.\n");
1288 if (dump_enabled_p ())
1289 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1290 "not vectorized: abnormal loop exit edge.\n");
1291 if (inner_loop_vinfo
)
1292 destroy_loop_vec_info (inner_loop_vinfo
, true);
1297 loop_cond
= vect_get_loop_niters (loop
, &number_of_iterations
,
1298 &number_of_iterationsm1
);
1301 if (dump_enabled_p ())
1302 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1303 "not vectorized: complicated exit condition.\n");
1304 if (inner_loop_vinfo
)
1305 destroy_loop_vec_info (inner_loop_vinfo
, true);
1309 if (!number_of_iterations
1310 || chrec_contains_undetermined (number_of_iterations
))
1312 if (dump_enabled_p ())
1313 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1314 "not vectorized: number of iterations cannot be "
1316 if (inner_loop_vinfo
)
1317 destroy_loop_vec_info (inner_loop_vinfo
, true);
1321 if (integer_zerop (number_of_iterations
))
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1325 "not vectorized: number of iterations = 0.\n");
1326 if (inner_loop_vinfo
)
1327 destroy_loop_vec_info (inner_loop_vinfo
, true);
1331 loop_vinfo
= new_loop_vec_info (loop
);
1332 LOOP_VINFO_NITERSM1 (loop_vinfo
) = number_of_iterationsm1
;
1333 LOOP_VINFO_NITERS (loop_vinfo
) = number_of_iterations
;
1334 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
) = number_of_iterations
;
1336 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
1338 if (dump_enabled_p ())
1340 dump_printf_loc (MSG_NOTE
, vect_location
,
1341 "Symbolic number of iterations is ");
1342 dump_generic_expr (MSG_NOTE
, TDF_DETAILS
, number_of_iterations
);
1343 dump_printf (MSG_NOTE
, "\n");
1347 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond
)) = loop_exit_ctrl_vec_info_type
;
1349 /* CHECKME: May want to keep it around it in the future. */
1350 if (inner_loop_vinfo
)
1351 destroy_loop_vec_info (inner_loop_vinfo
, false);
1353 gcc_assert (!loop
->aux
);
1354 loop
->aux
= loop_vinfo
;
1359 /* Function vect_analyze_loop_operations.
1361 Scan the loop stmts and make sure they are all vectorizable. */
1364 vect_analyze_loop_operations (loop_vec_info loop_vinfo
, bool slp
)
1366 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1367 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1368 int nbbs
= loop
->num_nodes
;
1369 unsigned int vectorization_factor
= 0;
1371 stmt_vec_info stmt_info
;
1372 bool need_to_vectorize
= false;
1373 int min_profitable_iters
;
1374 int min_scalar_loop_bound
;
1376 bool only_slp_in_loop
= true, ok
;
1377 HOST_WIDE_INT max_niter
;
1378 HOST_WIDE_INT estimated_niter
;
1379 int min_profitable_estimate
;
1381 if (dump_enabled_p ())
1382 dump_printf_loc (MSG_NOTE
, vect_location
,
1383 "=== vect_analyze_loop_operations ===\n");
1385 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
1386 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1389 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1390 vectorization factor of the loop is the unrolling factor required by
1391 the SLP instances. If that unrolling factor is 1, we say, that we
1392 perform pure SLP on loop - cross iteration parallelism is not
1394 for (i
= 0; i
< nbbs
; i
++)
1396 basic_block bb
= bbs
[i
];
1397 for (gimple_stmt_iterator si
= gsi_start_bb (bb
); !gsi_end_p (si
);
1400 gimple stmt
= gsi_stmt (si
);
1401 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1402 gcc_assert (stmt_info
);
1403 if ((STMT_VINFO_RELEVANT_P (stmt_info
)
1404 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info
)))
1405 && !PURE_SLP_STMT (stmt_info
))
1406 /* STMT needs both SLP and loop-based vectorization. */
1407 only_slp_in_loop
= false;
1411 if (only_slp_in_loop
)
1412 vectorization_factor
= LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
);
1414 vectorization_factor
= least_common_multiple (vectorization_factor
,
1415 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
));
1417 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
1418 if (dump_enabled_p ())
1419 dump_printf_loc (MSG_NOTE
, vect_location
,
1420 "Updating vectorization factor to %d\n",
1421 vectorization_factor
);
1424 for (i
= 0; i
< nbbs
; i
++)
1426 basic_block bb
= bbs
[i
];
1428 for (gphi_iterator si
= gsi_start_phis (bb
); !gsi_end_p (si
);
1431 gphi
*phi
= si
.phi ();
1434 stmt_info
= vinfo_for_stmt (phi
);
1435 if (dump_enabled_p ())
1437 dump_printf_loc (MSG_NOTE
, vect_location
, "examining phi: ");
1438 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
1439 dump_printf (MSG_NOTE
, "\n");
1442 /* Inner-loop loop-closed exit phi in outer-loop vectorization
1443 (i.e., a phi in the tail of the outer-loop). */
1444 if (! is_loop_header_bb_p (bb
))
1446 /* FORNOW: we currently don't support the case that these phis
1447 are not used in the outerloop (unless it is double reduction,
1448 i.e., this phi is vect_reduction_def), cause this case
1449 requires to actually do something here. */
1450 if ((!STMT_VINFO_RELEVANT_P (stmt_info
)
1451 || STMT_VINFO_LIVE_P (stmt_info
))
1452 && STMT_VINFO_DEF_TYPE (stmt_info
)
1453 != vect_double_reduction_def
)
1455 if (dump_enabled_p ())
1456 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1457 "Unsupported loop-closed phi in "
1462 /* If PHI is used in the outer loop, we check that its operand
1463 is defined in the inner loop. */
1464 if (STMT_VINFO_RELEVANT_P (stmt_info
))
1469 if (gimple_phi_num_args (phi
) != 1)
1472 phi_op
= PHI_ARG_DEF (phi
, 0);
1473 if (TREE_CODE (phi_op
) != SSA_NAME
)
1476 op_def_stmt
= SSA_NAME_DEF_STMT (phi_op
);
1477 if (gimple_nop_p (op_def_stmt
)
1478 || !flow_bb_inside_loop_p (loop
, gimple_bb (op_def_stmt
))
1479 || !vinfo_for_stmt (op_def_stmt
))
1482 if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt
))
1483 != vect_used_in_outer
1484 && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt
))
1485 != vect_used_in_outer_by_reduction
)
1492 gcc_assert (stmt_info
);
1494 if (STMT_VINFO_LIVE_P (stmt_info
))
1496 /* FORNOW: not yet supported. */
1497 if (dump_enabled_p ())
1498 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1499 "not vectorized: value used after loop.\n");
1503 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
1504 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_induction_def
)
1506 /* A scalar-dependence cycle that we don't support. */
1507 if (dump_enabled_p ())
1508 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1509 "not vectorized: scalar dependence cycle.\n");
1513 if (STMT_VINFO_RELEVANT_P (stmt_info
))
1515 need_to_vectorize
= true;
1516 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
1517 ok
= vectorizable_induction (phi
, NULL
, NULL
);
1522 if (dump_enabled_p ())
1524 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1525 "not vectorized: relevant phi not "
1527 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, phi
, 0);
1528 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1534 for (gimple_stmt_iterator si
= gsi_start_bb (bb
); !gsi_end_p (si
);
1537 gimple stmt
= gsi_stmt (si
);
1538 if (!gimple_clobber_p (stmt
)
1539 && !vect_analyze_stmt (stmt
, &need_to_vectorize
, NULL
))
1544 /* All operations in the loop are either irrelevant (deal with loop
1545 control, or dead), or only used outside the loop and can be moved
1546 out of the loop (e.g. invariants, inductions). The loop can be
1547 optimized away by scalar optimizations. We're better off not
1548 touching this loop. */
1549 if (!need_to_vectorize
)
1551 if (dump_enabled_p ())
1552 dump_printf_loc (MSG_NOTE
, vect_location
,
1553 "All the computation can be taken out of the loop.\n");
1554 if (dump_enabled_p ())
1555 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1556 "not vectorized: redundant loop. no profit to "
1561 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
) && dump_enabled_p ())
1562 dump_printf_loc (MSG_NOTE
, vect_location
,
1563 "vectorization_factor = %d, niters = "
1564 HOST_WIDE_INT_PRINT_DEC
"\n", vectorization_factor
,
1565 LOOP_VINFO_INT_NITERS (loop_vinfo
));
1567 if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1568 && (LOOP_VINFO_INT_NITERS (loop_vinfo
) < vectorization_factor
))
1569 || ((max_niter
= max_stmt_executions_int (loop
)) != -1
1570 && (unsigned HOST_WIDE_INT
) max_niter
< vectorization_factor
))
1572 if (dump_enabled_p ())
1573 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1574 "not vectorized: iteration count too small.\n");
1575 if (dump_enabled_p ())
1576 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1577 "not vectorized: iteration count smaller than "
1578 "vectorization factor.\n");
1582 /* Analyze cost. Decide if worth while to vectorize. */
1584 /* Once VF is set, SLP costs should be updated since the number of created
1585 vector stmts depends on VF. */
1586 vect_update_slp_costs_according_to_vf (loop_vinfo
);
1588 vect_estimate_min_profitable_iters (loop_vinfo
, &min_profitable_iters
,
1589 &min_profitable_estimate
);
1590 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
) = min_profitable_iters
;
1592 if (min_profitable_iters
< 0)
1594 if (dump_enabled_p ())
1595 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1596 "not vectorized: vectorization not profitable.\n");
1597 if (dump_enabled_p ())
1598 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1599 "not vectorized: vector version will never be "
1604 min_scalar_loop_bound
= ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND
)
1605 * vectorization_factor
) - 1);
1608 /* Use the cost model only if it is more conservative than user specified
1611 th
= (unsigned) min_scalar_loop_bound
;
1612 if (min_profitable_iters
1613 && (!min_scalar_loop_bound
1614 || min_profitable_iters
> min_scalar_loop_bound
))
1615 th
= (unsigned) min_profitable_iters
;
1617 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo
) = th
;
1619 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1620 && LOOP_VINFO_INT_NITERS (loop_vinfo
) <= th
)
1622 if (dump_enabled_p ())
1623 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1624 "not vectorized: vectorization not profitable.\n");
1625 if (dump_enabled_p ())
1626 dump_printf_loc (MSG_NOTE
, vect_location
,
1627 "not vectorized: iteration count smaller than user "
1628 "specified loop bound parameter or minimum profitable "
1629 "iterations (whichever is more conservative).\n");
1633 if ((estimated_niter
= estimated_stmt_executions_int (loop
)) != -1
1634 && ((unsigned HOST_WIDE_INT
) estimated_niter
1635 <= MAX (th
, (unsigned)min_profitable_estimate
)))
1637 if (dump_enabled_p ())
1638 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1639 "not vectorized: estimated iteration count too "
1641 if (dump_enabled_p ())
1642 dump_printf_loc (MSG_NOTE
, vect_location
,
1643 "not vectorized: estimated iteration count smaller "
1644 "than specified loop bound parameter or minimum "
1645 "profitable iterations (whichever is more "
1646 "conservative).\n");
1654 /* Function vect_analyze_loop_2.
1656 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1657 for it. The different analyses will record information in the
1658 loop_vec_info struct. */
1660 vect_analyze_loop_2 (loop_vec_info loop_vinfo
)
1662 bool ok
, slp
= false;
1663 int max_vf
= MAX_VECTORIZATION_FACTOR
;
1666 unsigned int n_stmts
= 0;
1668 /* Find all data references in the loop (which correspond to vdefs/vuses)
1669 and analyze their evolution in the loop. Also adjust the minimal
1670 vectorization factor according to the loads and stores.
1672 FORNOW: Handle only simple, array references, which
1673 alignment can be forced, and aligned pointer-references. */
1675 ok
= vect_analyze_data_refs (loop_vinfo
, NULL
, &min_vf
, &n_stmts
);
1678 if (dump_enabled_p ())
1679 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1680 "bad data references.\n");
1684 /* Classify all cross-iteration scalar data-flow cycles.
1685 Cross-iteration cycles caused by virtual phis are analyzed separately. */
1687 vect_analyze_scalar_cycles (loop_vinfo
);
1689 vect_pattern_recog (loop_vinfo
, NULL
);
1691 /* Analyze the access patterns of the data-refs in the loop (consecutive,
1692 complex, etc.). FORNOW: Only handle consecutive access pattern. */
1694 ok
= vect_analyze_data_ref_accesses (loop_vinfo
, NULL
);
1697 if (dump_enabled_p ())
1698 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1699 "bad data access.\n");
1703 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
1705 ok
= vect_mark_stmts_to_be_vectorized (loop_vinfo
);
1708 if (dump_enabled_p ())
1709 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1710 "unexpected pattern.\n");
1714 /* Analyze data dependences between the data-refs in the loop
1715 and adjust the maximum vectorization factor according to
1717 FORNOW: fail at the first data dependence that we encounter. */
1719 ok
= vect_analyze_data_ref_dependences (loop_vinfo
, &max_vf
);
1723 if (dump_enabled_p ())
1724 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1725 "bad data dependence.\n");
1729 ok
= vect_determine_vectorization_factor (loop_vinfo
);
1732 if (dump_enabled_p ())
1733 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1734 "can't determine vectorization factor.\n");
1737 if (max_vf
< LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
1739 if (dump_enabled_p ())
1740 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1741 "bad data dependence.\n");
1745 /* Analyze the alignment of the data-refs in the loop.
1746 Fail if a data reference is found that cannot be vectorized. */
1748 ok
= vect_analyze_data_refs_alignment (loop_vinfo
, NULL
);
1751 if (dump_enabled_p ())
1752 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1753 "bad data alignment.\n");
1757 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1758 It is important to call pruning after vect_analyze_data_ref_accesses,
1759 since we use grouping information gathered by interleaving analysis. */
1760 ok
= vect_prune_runtime_alias_test_list (loop_vinfo
);
1763 if (dump_enabled_p ())
1764 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1765 "number of versioning for alias "
1766 "run-time tests exceeds %d "
1767 "(--param vect-max-version-for-alias-checks)\n",
1768 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
));
1772 /* This pass will decide on using loop versioning and/or loop peeling in
1773 order to enhance the alignment of data references in the loop. */
1775 ok
= vect_enhance_data_refs_alignment (loop_vinfo
);
1778 if (dump_enabled_p ())
1779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1780 "bad data alignment.\n");
1784 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
1785 ok
= vect_analyze_slp (loop_vinfo
, NULL
, n_stmts
);
1788 /* Decide which possible SLP instances to SLP. */
1789 slp
= vect_make_slp_decision (loop_vinfo
);
1791 /* Find stmts that need to be both vectorized and SLPed. */
1792 vect_detect_hybrid_slp (loop_vinfo
);
1797 /* Scan all the operations in the loop and make sure they are
1800 ok
= vect_analyze_loop_operations (loop_vinfo
, slp
);
1803 if (dump_enabled_p ())
1804 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1805 "bad operation or unsupported loop bound.\n");
1809 /* Decide whether we need to create an epilogue loop to handle
1810 remaining scalar iterations. */
1811 th
= ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo
) + 1)
1812 / LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
1813 * LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1815 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1816 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
) > 0)
1818 if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo
)
1819 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
))
1820 < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
)))
1821 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo
) = true;
1823 else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
)
1824 || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo
))
1825 < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
1826 /* In case of versioning, check if the maximum number of
1827 iterations is greater than th. If they are identical,
1828 the epilogue is unnecessary. */
1829 && ((!LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
)
1830 && !LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
))
1831 || (unsigned HOST_WIDE_INT
)max_stmt_executions_int
1832 (LOOP_VINFO_LOOP (loop_vinfo
)) > th
)))
1833 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo
) = true;
1835 /* If an epilogue loop is required make sure we can create one. */
1836 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
1837 || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo
))
1839 if (dump_enabled_p ())
1840 dump_printf_loc (MSG_NOTE
, vect_location
, "epilog loop required\n");
1841 if (!vect_can_advance_ivs_p (loop_vinfo
)
1842 || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo
),
1843 single_exit (LOOP_VINFO_LOOP
1846 if (dump_enabled_p ())
1847 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1848 "not vectorized: can't create required "
1857 /* Function vect_analyze_loop.
1859 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1860 for it. The different analyses will record information in the
1861 loop_vec_info struct. */
1863 vect_analyze_loop (struct loop
*loop
)
1865 loop_vec_info loop_vinfo
;
1866 unsigned int vector_sizes
;
1868 /* Autodetect first vector size we try. */
1869 current_vector_size
= 0;
1870 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
1872 if (dump_enabled_p ())
1873 dump_printf_loc (MSG_NOTE
, vect_location
,
1874 "===== analyze_loop_nest =====\n");
1876 if (loop_outer (loop
)
1877 && loop_vec_info_for_loop (loop_outer (loop
))
1878 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop
))))
1880 if (dump_enabled_p ())
1881 dump_printf_loc (MSG_NOTE
, vect_location
,
1882 "outer-loop already vectorized.\n");
1888 /* Check the CFG characteristics of the loop (nesting, entry/exit). */
1889 loop_vinfo
= vect_analyze_loop_form (loop
);
1892 if (dump_enabled_p ())
1893 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1894 "bad loop form.\n");
1898 if (vect_analyze_loop_2 (loop_vinfo
))
1900 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
) = 1;
1905 destroy_loop_vec_info (loop_vinfo
, true);
1907 vector_sizes
&= ~current_vector_size
;
1908 if (vector_sizes
== 0
1909 || current_vector_size
== 0)
1912 /* Try the next biggest vector size. */
1913 current_vector_size
= 1 << floor_log2 (vector_sizes
);
1914 if (dump_enabled_p ())
1915 dump_printf_loc (MSG_NOTE
, vect_location
,
1916 "***** Re-trying analysis with "
1917 "vector size %d\n", current_vector_size
);
1922 /* Function reduction_code_for_scalar_code
1925 CODE - tree_code of a reduction operations.
1928 REDUC_CODE - the corresponding tree-code to be used to reduce the
1929 vector of partial results into a single scalar result, or ERROR_MARK
1930 if the operation is a supported reduction operation, but does not have
1933 Return FALSE if CODE currently cannot be vectorized as reduction. */
1936 reduction_code_for_scalar_code (enum tree_code code
,
1937 enum tree_code
*reduc_code
)
1942 *reduc_code
= REDUC_MAX_EXPR
;
1946 *reduc_code
= REDUC_MIN_EXPR
;
1950 *reduc_code
= REDUC_PLUS_EXPR
;
1958 *reduc_code
= ERROR_MARK
;
1967 /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
1968 STMT is printed with a message MSG. */
1971 report_vect_op (int msg_type
, gimple stmt
, const char *msg
)
1973 dump_printf_loc (msg_type
, vect_location
, "%s", msg
);
1974 dump_gimple_stmt (msg_type
, TDF_SLIM
, stmt
, 0);
1975 dump_printf (msg_type
, "\n");
1979 /* Detect SLP reduction of the form:
1989 PHI is the reduction phi node (#a1 = phi <a5, a0> above)
1990 FIRST_STMT is the first reduction stmt in the chain
1991 (a2 = operation (a1)).
1993 Return TRUE if a reduction chain was detected. */
1996 vect_is_slp_reduction (loop_vec_info loop_info
, gimple phi
, gimple first_stmt
)
1998 struct loop
*loop
= (gimple_bb (phi
))->loop_father
;
1999 struct loop
*vect_loop
= LOOP_VINFO_LOOP (loop_info
);
2000 enum tree_code code
;
2001 gimple current_stmt
= NULL
, loop_use_stmt
= NULL
, first
, next_stmt
;
2002 stmt_vec_info use_stmt_info
, current_stmt_info
;
2004 imm_use_iterator imm_iter
;
2005 use_operand_p use_p
;
2006 int nloop_uses
, size
= 0, n_out_of_loop_uses
;
2009 if (loop
!= vect_loop
)
2012 lhs
= PHI_RESULT (phi
);
2013 code
= gimple_assign_rhs_code (first_stmt
);
2017 n_out_of_loop_uses
= 0;
2018 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2020 gimple use_stmt
= USE_STMT (use_p
);
2021 if (is_gimple_debug (use_stmt
))
2024 /* Check if we got back to the reduction phi. */
2025 if (use_stmt
== phi
)
2027 loop_use_stmt
= use_stmt
;
2032 if (flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2034 if (vinfo_for_stmt (use_stmt
)
2035 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
2037 loop_use_stmt
= use_stmt
;
2042 n_out_of_loop_uses
++;
2044 /* There are can be either a single use in the loop or two uses in
2046 if (nloop_uses
> 1 || (n_out_of_loop_uses
&& nloop_uses
))
2053 /* We reached a statement with no loop uses. */
2054 if (nloop_uses
== 0)
2057 /* This is a loop exit phi, and we haven't reached the reduction phi. */
2058 if (gimple_code (loop_use_stmt
) == GIMPLE_PHI
)
2061 if (!is_gimple_assign (loop_use_stmt
)
2062 || code
!= gimple_assign_rhs_code (loop_use_stmt
)
2063 || !flow_bb_inside_loop_p (loop
, gimple_bb (loop_use_stmt
)))
2066 /* Insert USE_STMT into reduction chain. */
2067 use_stmt_info
= vinfo_for_stmt (loop_use_stmt
);
2070 current_stmt_info
= vinfo_for_stmt (current_stmt
);
2071 GROUP_NEXT_ELEMENT (current_stmt_info
) = loop_use_stmt
;
2072 GROUP_FIRST_ELEMENT (use_stmt_info
)
2073 = GROUP_FIRST_ELEMENT (current_stmt_info
);
2076 GROUP_FIRST_ELEMENT (use_stmt_info
) = loop_use_stmt
;
2078 lhs
= gimple_assign_lhs (loop_use_stmt
);
2079 current_stmt
= loop_use_stmt
;
2083 if (!found
|| loop_use_stmt
!= phi
|| size
< 2)
2086 /* Swap the operands, if needed, to make the reduction operand be the second
2088 lhs
= PHI_RESULT (phi
);
2089 next_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt
));
2092 if (gimple_assign_rhs2 (next_stmt
) == lhs
)
2094 tree op
= gimple_assign_rhs1 (next_stmt
);
2095 gimple def_stmt
= NULL
;
2097 if (TREE_CODE (op
) == SSA_NAME
)
2098 def_stmt
= SSA_NAME_DEF_STMT (op
);
2100 /* Check that the other def is either defined in the loop
2101 ("vect_internal_def"), or it's an induction (defined by a
2102 loop-header phi-node). */
2104 && gimple_bb (def_stmt
)
2105 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
2106 && (is_gimple_assign (def_stmt
)
2107 || is_gimple_call (def_stmt
)
2108 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
2109 == vect_induction_def
2110 || (gimple_code (def_stmt
) == GIMPLE_PHI
2111 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
2112 == vect_internal_def
2113 && !is_loop_header_bb_p (gimple_bb (def_stmt
)))))
2115 lhs
= gimple_assign_lhs (next_stmt
);
2116 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
2124 tree op
= gimple_assign_rhs2 (next_stmt
);
2125 gimple def_stmt
= NULL
;
2127 if (TREE_CODE (op
) == SSA_NAME
)
2128 def_stmt
= SSA_NAME_DEF_STMT (op
);
2130 /* Check that the other def is either defined in the loop
2131 ("vect_internal_def"), or it's an induction (defined by a
2132 loop-header phi-node). */
2134 && gimple_bb (def_stmt
)
2135 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
2136 && (is_gimple_assign (def_stmt
)
2137 || is_gimple_call (def_stmt
)
2138 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
2139 == vect_induction_def
2140 || (gimple_code (def_stmt
) == GIMPLE_PHI
2141 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
2142 == vect_internal_def
2143 && !is_loop_header_bb_p (gimple_bb (def_stmt
)))))
2145 if (dump_enabled_p ())
2147 dump_printf_loc (MSG_NOTE
, vect_location
, "swapping oprnds: ");
2148 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, next_stmt
, 0);
2149 dump_printf (MSG_NOTE
, "\n");
2152 swap_ssa_operands (next_stmt
,
2153 gimple_assign_rhs1_ptr (next_stmt
),
2154 gimple_assign_rhs2_ptr (next_stmt
));
2155 update_stmt (next_stmt
);
2157 if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt
)))
2158 LOOP_VINFO_OPERANDS_SWAPPED (loop_info
) = true;
2164 lhs
= gimple_assign_lhs (next_stmt
);
2165 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
2168 /* Save the chain for further analysis in SLP detection. */
2169 first
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt
));
2170 LOOP_VINFO_REDUCTION_CHAINS (loop_info
).safe_push (first
);
2171 GROUP_SIZE (vinfo_for_stmt (first
)) = size
;
2177 /* Function vect_is_simple_reduction_1
2179 (1) Detect a cross-iteration def-use cycle that represents a simple
2180 reduction computation. We look for the following pattern:
2185 a2 = operation (a3, a1)
2192 a2 = operation (a3, a1)
2195 1. operation is commutative and associative and it is safe to
2196 change the order of the computation (if CHECK_REDUCTION is true)
2197 2. no uses for a2 in the loop (a2 is used out of the loop)
2198 3. no uses of a1 in the loop besides the reduction operation
2199 4. no uses of a1 outside the loop.
2201 Conditions 1,4 are tested here.
2202 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2204 (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2205 nested cycles, if CHECK_REDUCTION is false.
2207 (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2211 inner loop (def of a3)
2214 If MODIFY is true it tries also to rework the code in-place to enable
2215 detection of more reduction patterns. For the time being we rewrite
2216 "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
2220 vect_is_simple_reduction_1 (loop_vec_info loop_info
, gimple phi
,
2221 bool check_reduction
, bool *double_reduc
,
2224 struct loop
*loop
= (gimple_bb (phi
))->loop_father
;
2225 struct loop
*vect_loop
= LOOP_VINFO_LOOP (loop_info
);
2226 edge latch_e
= loop_latch_edge (loop
);
2227 tree loop_arg
= PHI_ARG_DEF_FROM_EDGE (phi
, latch_e
);
2228 gimple def_stmt
, def1
= NULL
, def2
= NULL
;
2229 enum tree_code orig_code
, code
;
2230 tree op1
, op2
, op3
= NULL_TREE
, op4
= NULL_TREE
;
2234 imm_use_iterator imm_iter
;
2235 use_operand_p use_p
;
2238 *double_reduc
= false;
2240 /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2241 otherwise, we assume outer loop vectorization. */
2242 gcc_assert ((check_reduction
&& loop
== vect_loop
)
2243 || (!check_reduction
&& flow_loop_nested_p (vect_loop
, loop
)));
2245 name
= PHI_RESULT (phi
);
2246 /* ??? If there are no uses of the PHI result the inner loop reduction
2247 won't be detected as possibly double-reduction by vectorizable_reduction
2248 because that tries to walk the PHI arg from the preheader edge which
2249 can be constant. See PR60382. */
2250 if (has_zero_uses (name
))
2253 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, name
)
2255 gimple use_stmt
= USE_STMT (use_p
);
2256 if (is_gimple_debug (use_stmt
))
2259 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2261 if (dump_enabled_p ())
2262 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2263 "intermediate value used outside loop.\n");
2268 if (vinfo_for_stmt (use_stmt
)
2269 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt
)))
2273 if (dump_enabled_p ())
2274 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2275 "reduction used in loop.\n");
2280 if (TREE_CODE (loop_arg
) != SSA_NAME
)
2282 if (dump_enabled_p ())
2284 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2285 "reduction: not ssa_name: ");
2286 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, loop_arg
);
2287 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2292 def_stmt
= SSA_NAME_DEF_STMT (loop_arg
);
2295 if (dump_enabled_p ())
2296 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2297 "reduction: no def_stmt.\n");
2301 if (!is_gimple_assign (def_stmt
) && gimple_code (def_stmt
) != GIMPLE_PHI
)
2303 if (dump_enabled_p ())
2305 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2306 dump_printf (MSG_NOTE
, "\n");
2311 if (is_gimple_assign (def_stmt
))
2313 name
= gimple_assign_lhs (def_stmt
);
2318 name
= PHI_RESULT (def_stmt
);
2323 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, name
)
2325 gimple use_stmt
= USE_STMT (use_p
);
2326 if (is_gimple_debug (use_stmt
))
2328 if (flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
))
2329 && vinfo_for_stmt (use_stmt
)
2330 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt
)))
2334 if (dump_enabled_p ())
2335 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2336 "reduction used in loop.\n");
2341 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2342 defined in the inner loop. */
2345 op1
= PHI_ARG_DEF (def_stmt
, 0);
2347 if (gimple_phi_num_args (def_stmt
) != 1
2348 || TREE_CODE (op1
) != SSA_NAME
)
2350 if (dump_enabled_p ())
2351 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2352 "unsupported phi node definition.\n");
2357 def1
= SSA_NAME_DEF_STMT (op1
);
2358 if (gimple_bb (def1
)
2359 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
2361 && flow_bb_inside_loop_p (loop
->inner
, gimple_bb (def1
))
2362 && is_gimple_assign (def1
))
2364 if (dump_enabled_p ())
2365 report_vect_op (MSG_NOTE
, def_stmt
,
2366 "detected double reduction: ");
2368 *double_reduc
= true;
2375 code
= orig_code
= gimple_assign_rhs_code (def_stmt
);
2377 /* We can handle "res -= x[i]", which is non-associative by
2378 simply rewriting this into "res += -x[i]". Avoid changing
2379 gimple instruction for the first simple tests and only do this
2380 if we're allowed to change code at all. */
2381 if (code
== MINUS_EXPR
2383 && (op1
= gimple_assign_rhs1 (def_stmt
))
2384 && TREE_CODE (op1
) == SSA_NAME
2385 && SSA_NAME_DEF_STMT (op1
) == phi
)
2389 && (!commutative_tree_code (code
) || !associative_tree_code (code
)))
2391 if (dump_enabled_p ())
2392 report_vect_op (MSG_MISSED_OPTIMIZATION
, def_stmt
,
2393 "reduction: not commutative/associative: ");
2397 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
2399 if (code
!= COND_EXPR
)
2401 if (dump_enabled_p ())
2402 report_vect_op (MSG_MISSED_OPTIMIZATION
, def_stmt
,
2403 "reduction: not binary operation: ");
2408 op3
= gimple_assign_rhs1 (def_stmt
);
2409 if (COMPARISON_CLASS_P (op3
))
2411 op4
= TREE_OPERAND (op3
, 1);
2412 op3
= TREE_OPERAND (op3
, 0);
2415 op1
= gimple_assign_rhs2 (def_stmt
);
2416 op2
= gimple_assign_rhs3 (def_stmt
);
2418 if (TREE_CODE (op1
) != SSA_NAME
&& TREE_CODE (op2
) != SSA_NAME
)
2420 if (dump_enabled_p ())
2421 report_vect_op (MSG_MISSED_OPTIMIZATION
, def_stmt
,
2422 "reduction: uses not ssa_names: ");
2429 op1
= gimple_assign_rhs1 (def_stmt
);
2430 op2
= gimple_assign_rhs2 (def_stmt
);
2432 if (TREE_CODE (op1
) != SSA_NAME
&& TREE_CODE (op2
) != SSA_NAME
)
2434 if (dump_enabled_p ())
2435 report_vect_op (MSG_MISSED_OPTIMIZATION
, def_stmt
,
2436 "reduction: uses not ssa_names: ");
2442 type
= TREE_TYPE (gimple_assign_lhs (def_stmt
));
2443 if ((TREE_CODE (op1
) == SSA_NAME
2444 && !types_compatible_p (type
,TREE_TYPE (op1
)))
2445 || (TREE_CODE (op2
) == SSA_NAME
2446 && !types_compatible_p (type
, TREE_TYPE (op2
)))
2447 || (op3
&& TREE_CODE (op3
) == SSA_NAME
2448 && !types_compatible_p (type
, TREE_TYPE (op3
)))
2449 || (op4
&& TREE_CODE (op4
) == SSA_NAME
2450 && !types_compatible_p (type
, TREE_TYPE (op4
))))
2452 if (dump_enabled_p ())
2454 dump_printf_loc (MSG_NOTE
, vect_location
,
2455 "reduction: multiple types: operation type: ");
2456 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, type
);
2457 dump_printf (MSG_NOTE
, ", operands types: ");
2458 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
2460 dump_printf (MSG_NOTE
, ",");
2461 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
2465 dump_printf (MSG_NOTE
, ",");
2466 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
2472 dump_printf (MSG_NOTE
, ",");
2473 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
2476 dump_printf (MSG_NOTE
, "\n");
2482 /* Check that it's ok to change the order of the computation.
2483 Generally, when vectorizing a reduction we change the order of the
2484 computation. This may change the behavior of the program in some
2485 cases, so we need to check that this is ok. One exception is when
2486 vectorizing an outer-loop: the inner-loop is executed sequentially,
2487 and therefore vectorizing reductions in the inner-loop during
2488 outer-loop vectorization is safe. */
2490 /* CHECKME: check for !flag_finite_math_only too? */
2491 if (SCALAR_FLOAT_TYPE_P (type
) && !flag_associative_math
2494 /* Changing the order of operations changes the semantics. */
2495 if (dump_enabled_p ())
2496 report_vect_op (MSG_MISSED_OPTIMIZATION
, def_stmt
,
2497 "reduction: unsafe fp math optimization: ");
2500 else if (INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_TRAPS (type
)
2503 /* Changing the order of operations changes the semantics. */
2504 if (dump_enabled_p ())
2505 report_vect_op (MSG_MISSED_OPTIMIZATION
, def_stmt
,
2506 "reduction: unsafe int math optimization: ");
2509 else if (SAT_FIXED_POINT_TYPE_P (type
) && check_reduction
)
2511 /* Changing the order of operations changes the semantics. */
2512 if (dump_enabled_p ())
2513 report_vect_op (MSG_MISSED_OPTIMIZATION
, def_stmt
,
2514 "reduction: unsafe fixed-point math optimization: ");
2518 /* If we detected "res -= x[i]" earlier, rewrite it into
2519 "res += -x[i]" now. If this turns out to be useless reassoc
2520 will clean it up again. */
2521 if (orig_code
== MINUS_EXPR
)
2523 tree rhs
= gimple_assign_rhs2 (def_stmt
);
2524 tree negrhs
= make_ssa_name (TREE_TYPE (rhs
));
2525 gimple negate_stmt
= gimple_build_assign (negrhs
, NEGATE_EXPR
, rhs
);
2526 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
2527 set_vinfo_for_stmt (negate_stmt
, new_stmt_vec_info (negate_stmt
,
2529 gsi_insert_before (&gsi
, negate_stmt
, GSI_NEW_STMT
);
2530 gimple_assign_set_rhs2 (def_stmt
, negrhs
);
2531 gimple_assign_set_rhs_code (def_stmt
, PLUS_EXPR
);
2532 update_stmt (def_stmt
);
2535 /* Reduction is safe. We're dealing with one of the following:
2536 1) integer arithmetic and no trapv
2537 2) floating point arithmetic, and special flags permit this optimization
2538 3) nested cycle (i.e., outer loop vectorization). */
2539 if (TREE_CODE (op1
) == SSA_NAME
)
2540 def1
= SSA_NAME_DEF_STMT (op1
);
2542 if (TREE_CODE (op2
) == SSA_NAME
)
2543 def2
= SSA_NAME_DEF_STMT (op2
);
2545 if (code
!= COND_EXPR
2546 && ((!def1
|| gimple_nop_p (def1
)) && (!def2
|| gimple_nop_p (def2
))))
2548 if (dump_enabled_p ())
2549 report_vect_op (MSG_NOTE
, def_stmt
, "reduction: no defs for operands: ");
2553 /* Check that one def is the reduction def, defined by PHI,
2554 the other def is either defined in the loop ("vect_internal_def"),
2555 or it's an induction (defined by a loop-header phi-node). */
2557 if (def2
&& def2
== phi
2558 && (code
== COND_EXPR
2559 || !def1
|| gimple_nop_p (def1
)
2560 || !flow_bb_inside_loop_p (loop
, gimple_bb (def1
))
2561 || (def1
&& flow_bb_inside_loop_p (loop
, gimple_bb (def1
))
2562 && (is_gimple_assign (def1
)
2563 || is_gimple_call (def1
)
2564 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1
))
2565 == vect_induction_def
2566 || (gimple_code (def1
) == GIMPLE_PHI
2567 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1
))
2568 == vect_internal_def
2569 && !is_loop_header_bb_p (gimple_bb (def1
)))))))
2571 if (dump_enabled_p ())
2572 report_vect_op (MSG_NOTE
, def_stmt
, "detected reduction: ");
2576 if (def1
&& def1
== phi
2577 && (code
== COND_EXPR
2578 || !def2
|| gimple_nop_p (def2
)
2579 || !flow_bb_inside_loop_p (loop
, gimple_bb (def2
))
2580 || (def2
&& flow_bb_inside_loop_p (loop
, gimple_bb (def2
))
2581 && (is_gimple_assign (def2
)
2582 || is_gimple_call (def2
)
2583 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2
))
2584 == vect_induction_def
2585 || (gimple_code (def2
) == GIMPLE_PHI
2586 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2
))
2587 == vect_internal_def
2588 && !is_loop_header_bb_p (gimple_bb (def2
)))))))
2590 if (check_reduction
)
2592 /* Swap operands (just for simplicity - so that the rest of the code
2593 can assume that the reduction variable is always the last (second)
2595 if (dump_enabled_p ())
2596 report_vect_op (MSG_NOTE
, def_stmt
,
2597 "detected reduction: need to swap operands: ");
2599 swap_ssa_operands (def_stmt
, gimple_assign_rhs1_ptr (def_stmt
),
2600 gimple_assign_rhs2_ptr (def_stmt
));
2602 if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt
)))
2603 LOOP_VINFO_OPERANDS_SWAPPED (loop_info
) = true;
2607 if (dump_enabled_p ())
2608 report_vect_op (MSG_NOTE
, def_stmt
, "detected reduction: ");
2614 /* Try to find SLP reduction chain. */
2615 if (check_reduction
&& vect_is_slp_reduction (loop_info
, phi
, def_stmt
))
2617 if (dump_enabled_p ())
2618 report_vect_op (MSG_NOTE
, def_stmt
,
2619 "reduction: detected reduction chain: ");
2624 if (dump_enabled_p ())
2625 report_vect_op (MSG_MISSED_OPTIMIZATION
, def_stmt
,
2626 "reduction: unknown pattern: ");
2631 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2632 in-place. Arguments as there. */
2635 vect_is_simple_reduction (loop_vec_info loop_info
, gimple phi
,
2636 bool check_reduction
, bool *double_reduc
)
2638 return vect_is_simple_reduction_1 (loop_info
, phi
, check_reduction
,
2639 double_reduc
, false);
2642 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2643 in-place if it enables detection of more reductions. Arguments
2647 vect_force_simple_reduction (loop_vec_info loop_info
, gimple phi
,
2648 bool check_reduction
, bool *double_reduc
)
2650 return vect_is_simple_reduction_1 (loop_info
, phi
, check_reduction
,
2651 double_reduc
, true);
2654 /* Calculate the cost of one scalar iteration of the loop. */
2656 vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo
)
2658 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2659 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2660 int nbbs
= loop
->num_nodes
, factor
, scalar_single_iter_cost
= 0;
2661 int innerloop_iters
, i
, stmt_cost
;
2663 /* Count statements in scalar loop. Using this as scalar cost for a single
2666 TODO: Add outer loop support.
2668 TODO: Consider assigning different costs to different scalar
2672 innerloop_iters
= 1;
2674 innerloop_iters
= 50; /* FIXME */
2676 for (i
= 0; i
< nbbs
; i
++)
2678 gimple_stmt_iterator si
;
2679 basic_block bb
= bbs
[i
];
2681 if (bb
->loop_father
== loop
->inner
)
2682 factor
= innerloop_iters
;
2686 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2688 gimple stmt
= gsi_stmt (si
);
2689 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2691 if (!is_gimple_assign (stmt
) && !is_gimple_call (stmt
))
2694 /* Skip stmts that are not vectorized inside the loop. */
2696 && !STMT_VINFO_RELEVANT_P (stmt_info
)
2697 && (!STMT_VINFO_LIVE_P (stmt_info
)
2698 || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info
)))
2699 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
2702 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
)))
2704 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
2705 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2707 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2710 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2712 scalar_single_iter_cost
+= stmt_cost
* factor
;
2715 return scalar_single_iter_cost
;
2718 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
2720 vect_get_known_peeling_cost (loop_vec_info loop_vinfo
, int peel_iters_prologue
,
2721 int *peel_iters_epilogue
,
2722 int scalar_single_iter_cost
,
2723 stmt_vector_for_cost
*prologue_cost_vec
,
2724 stmt_vector_for_cost
*epilogue_cost_vec
)
2727 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2729 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
2731 *peel_iters_epilogue
= vf
/2;
2732 if (dump_enabled_p ())
2733 dump_printf_loc (MSG_NOTE
, vect_location
,
2734 "cost model: epilogue peel iters set to vf/2 "
2735 "because loop iterations are unknown .\n");
2737 /* If peeled iterations are known but number of scalar loop
2738 iterations are unknown, count a taken branch per peeled loop. */
2739 retval
= record_stmt_cost (prologue_cost_vec
, 2, cond_branch_taken
,
2740 NULL
, 0, vect_prologue
);
2744 int niters
= LOOP_VINFO_INT_NITERS (loop_vinfo
);
2745 peel_iters_prologue
= niters
< peel_iters_prologue
?
2746 niters
: peel_iters_prologue
;
2747 *peel_iters_epilogue
= (niters
- peel_iters_prologue
) % vf
;
2748 /* If we need to peel for gaps, but no peeling is required, we have to
2749 peel VF iterations. */
2750 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) && !*peel_iters_epilogue
)
2751 *peel_iters_epilogue
= vf
;
2754 if (peel_iters_prologue
)
2755 retval
+= record_stmt_cost (prologue_cost_vec
,
2756 peel_iters_prologue
* scalar_single_iter_cost
,
2757 scalar_stmt
, NULL
, 0, vect_prologue
);
2758 if (*peel_iters_epilogue
)
2759 retval
+= record_stmt_cost (epilogue_cost_vec
,
2760 *peel_iters_epilogue
* scalar_single_iter_cost
,
2761 scalar_stmt
, NULL
, 0, vect_epilogue
);
2765 /* Function vect_estimate_min_profitable_iters
2767 Return the number of iterations required for the vector version of the
2768 loop to be profitable relative to the cost of the scalar version of the
2772 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo
,
2773 int *ret_min_profitable_niters
,
2774 int *ret_min_profitable_estimate
)
2776 int min_profitable_iters
;
2777 int min_profitable_estimate
;
2778 int peel_iters_prologue
;
2779 int peel_iters_epilogue
;
2780 unsigned vec_inside_cost
= 0;
2781 int vec_outside_cost
= 0;
2782 unsigned vec_prologue_cost
= 0;
2783 unsigned vec_epilogue_cost
= 0;
2784 int scalar_single_iter_cost
= 0;
2785 int scalar_outside_cost
= 0;
2786 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2787 int npeel
= LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
);
2788 void *target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
2790 /* Cost model disabled. */
2791 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo
)))
2793 dump_printf_loc (MSG_NOTE
, vect_location
, "cost model disabled.\n");
2794 *ret_min_profitable_niters
= 0;
2795 *ret_min_profitable_estimate
= 0;
2799 /* Requires loop versioning tests to handle misalignment. */
2800 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
))
2802 /* FIXME: Make cost depend on complexity of individual check. */
2803 unsigned len
= LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
).length ();
2804 (void) add_stmt_cost (target_cost_data
, len
, vector_stmt
, NULL
, 0,
2806 dump_printf (MSG_NOTE
,
2807 "cost model: Adding cost of checks for loop "
2808 "versioning to treat misalignment.\n");
2811 /* Requires loop versioning with alias checks. */
2812 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
2814 /* FIXME: Make cost depend on complexity of individual check. */
2815 unsigned len
= LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
).length ();
2816 (void) add_stmt_cost (target_cost_data
, len
, vector_stmt
, NULL
, 0,
2818 dump_printf (MSG_NOTE
,
2819 "cost model: Adding cost of checks for loop "
2820 "versioning aliasing.\n");
2823 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
)
2824 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
2825 (void) add_stmt_cost (target_cost_data
, 1, cond_branch_taken
, NULL
, 0,
2828 /* Count statements in scalar loop. Using this as scalar cost for a single
2831 TODO: Add outer loop support.
2833 TODO: Consider assigning different costs to different scalar
2836 scalar_single_iter_cost
= vect_get_single_scalar_iteration_cost (loop_vinfo
);
2837 /* ??? Below we use this cost as number of stmts with scalar_stmt cost,
2838 thus divide by that. This introduces rounding errors, thus better
2839 introduce a new cost kind (raw_cost? scalar_iter_cost?). */
2840 int scalar_single_iter_stmts
2841 = scalar_single_iter_cost
/ vect_get_stmt_cost (scalar_stmt
);
2843 /* Add additional cost for the peeled instructions in prologue and epilogue
2846 FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2847 at compile-time - we assume it's vf/2 (the worst would be vf-1).
2849 TODO: Build an expression that represents peel_iters for prologue and
2850 epilogue to be used in a run-time test. */
2854 peel_iters_prologue
= vf
/2;
2855 dump_printf (MSG_NOTE
, "cost model: "
2856 "prologue peel iters set to vf/2.\n");
2858 /* If peeling for alignment is unknown, loop bound of main loop becomes
2860 peel_iters_epilogue
= vf
/2;
2861 dump_printf (MSG_NOTE
, "cost model: "
2862 "epilogue peel iters set to vf/2 because "
2863 "peeling for alignment is unknown.\n");
2865 /* If peeled iterations are unknown, count a taken branch and a not taken
2866 branch per peeled loop. Even if scalar loop iterations are known,
2867 vector iterations are not known since peeled prologue iterations are
2868 not known. Hence guards remain the same. */
2869 (void) add_stmt_cost (target_cost_data
, 2, cond_branch_taken
,
2870 NULL
, 0, vect_prologue
);
2871 (void) add_stmt_cost (target_cost_data
, 2, cond_branch_not_taken
,
2872 NULL
, 0, vect_prologue
);
2873 /* FORNOW: Don't attempt to pass individual scalar instructions to
2874 the model; just assume linear cost for scalar iterations. */
2875 (void) add_stmt_cost (target_cost_data
,
2876 peel_iters_prologue
* scalar_single_iter_stmts
,
2877 scalar_stmt
, NULL
, 0, vect_prologue
);
2878 (void) add_stmt_cost (target_cost_data
,
2879 peel_iters_epilogue
* scalar_single_iter_stmts
,
2880 scalar_stmt
, NULL
, 0, vect_epilogue
);
2884 stmt_vector_for_cost prologue_cost_vec
, epilogue_cost_vec
;
2885 stmt_info_for_cost
*si
;
2887 void *data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
2889 prologue_cost_vec
.create (2);
2890 epilogue_cost_vec
.create (2);
2891 peel_iters_prologue
= npeel
;
2893 (void) vect_get_known_peeling_cost (loop_vinfo
, peel_iters_prologue
,
2894 &peel_iters_epilogue
,
2895 scalar_single_iter_stmts
,
2897 &epilogue_cost_vec
);
2899 FOR_EACH_VEC_ELT (prologue_cost_vec
, j
, si
)
2901 struct _stmt_vec_info
*stmt_info
2902 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
2903 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
2904 si
->misalign
, vect_prologue
);
2907 FOR_EACH_VEC_ELT (epilogue_cost_vec
, j
, si
)
2909 struct _stmt_vec_info
*stmt_info
2910 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
2911 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
2912 si
->misalign
, vect_epilogue
);
2915 prologue_cost_vec
.release ();
2916 epilogue_cost_vec
.release ();
2919 /* FORNOW: The scalar outside cost is incremented in one of the
2922 1. The vectorizer checks for alignment and aliasing and generates
2923 a condition that allows dynamic vectorization. A cost model
2924 check is ANDED with the versioning condition. Hence scalar code
2925 path now has the added cost of the versioning check.
2927 if (cost > th & versioning_check)
2930 Hence run-time scalar is incremented by not-taken branch cost.
2932 2. The vectorizer then checks if a prologue is required. If the
2933 cost model check was not done before during versioning, it has to
2934 be done before the prologue check.
2937 prologue = scalar_iters
2942 if (prologue == num_iters)
2945 Hence the run-time scalar cost is incremented by a taken branch,
2946 plus a not-taken branch, plus a taken branch cost.
2948 3. The vectorizer then checks if an epilogue is required. If the
2949 cost model check was not done before during prologue check, it
2950 has to be done with the epilogue check.
2956 if (prologue == num_iters)
2959 if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2962 Hence the run-time scalar cost should be incremented by 2 taken
2965 TODO: The back end may reorder the BBS's differently and reverse
2966 conditions/branch directions. Change the estimates below to
2967 something more reasonable. */
2969 /* If the number of iterations is known and we do not do versioning, we can
2970 decide whether to vectorize at compile time. Hence the scalar version
2971 do not carry cost model guard costs. */
2972 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
2973 || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
)
2974 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
2976 /* Cost model check occurs at versioning. */
2977 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
)
2978 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
2979 scalar_outside_cost
+= vect_get_stmt_cost (cond_branch_not_taken
);
2982 /* Cost model check occurs at prologue generation. */
2983 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
) < 0)
2984 scalar_outside_cost
+= 2 * vect_get_stmt_cost (cond_branch_taken
)
2985 + vect_get_stmt_cost (cond_branch_not_taken
);
2986 /* Cost model check occurs at epilogue generation. */
2988 scalar_outside_cost
+= 2 * vect_get_stmt_cost (cond_branch_taken
);
2992 /* Complete the target-specific cost calculations. */
2993 finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
), &vec_prologue_cost
,
2994 &vec_inside_cost
, &vec_epilogue_cost
);
2996 vec_outside_cost
= (int)(vec_prologue_cost
+ vec_epilogue_cost
);
2998 if (dump_enabled_p ())
3000 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
3001 dump_printf (MSG_NOTE
, " Vector inside of loop cost: %d\n",
3003 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n",
3005 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n",
3007 dump_printf (MSG_NOTE
, " Scalar iteration cost: %d\n",
3008 scalar_single_iter_cost
);
3009 dump_printf (MSG_NOTE
, " Scalar outside cost: %d\n",
3010 scalar_outside_cost
);
3011 dump_printf (MSG_NOTE
, " Vector outside cost: %d\n",
3013 dump_printf (MSG_NOTE
, " prologue iterations: %d\n",
3014 peel_iters_prologue
);
3015 dump_printf (MSG_NOTE
, " epilogue iterations: %d\n",
3016 peel_iters_epilogue
);
3019 /* Calculate number of iterations required to make the vector version
3020 profitable, relative to the loop bodies only. The following condition
3022 SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
3024 SIC = scalar iteration cost, VIC = vector iteration cost,
3025 VOC = vector outside cost, VF = vectorization factor,
3026 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
3027 SOC = scalar outside cost for run time cost model check. */
3029 if ((scalar_single_iter_cost
* vf
) > (int) vec_inside_cost
)
3031 if (vec_outside_cost
<= 0)
3032 min_profitable_iters
= 1;
3035 min_profitable_iters
= ((vec_outside_cost
- scalar_outside_cost
) * vf
3036 - vec_inside_cost
* peel_iters_prologue
3037 - vec_inside_cost
* peel_iters_epilogue
)
3038 / ((scalar_single_iter_cost
* vf
)
3041 if ((scalar_single_iter_cost
* vf
* min_profitable_iters
)
3042 <= (((int) vec_inside_cost
* min_profitable_iters
)
3043 + (((int) vec_outside_cost
- scalar_outside_cost
) * vf
)))
3044 min_profitable_iters
++;
3047 /* vector version will never be profitable. */
3050 if (LOOP_VINFO_LOOP (loop_vinfo
)->force_vectorize
)
3051 warning_at (vect_location
, OPT_Wopenmp_simd
, "vectorization "
3052 "did not happen for a simd loop");
3054 if (dump_enabled_p ())
3055 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3056 "cost model: the vector iteration cost = %d "
3057 "divided by the scalar iteration cost = %d "
3058 "is greater or equal to the vectorization factor = %d"
3060 vec_inside_cost
, scalar_single_iter_cost
, vf
);
3061 *ret_min_profitable_niters
= -1;
3062 *ret_min_profitable_estimate
= -1;
3066 dump_printf (MSG_NOTE
,
3067 " Calculated minimum iters for profitability: %d\n",
3068 min_profitable_iters
);
3070 min_profitable_iters
=
3071 min_profitable_iters
< vf
? vf
: min_profitable_iters
;
3073 /* Because the condition we create is:
3074 if (niters <= min_profitable_iters)
3075 then skip the vectorized loop. */
3076 min_profitable_iters
--;
3078 if (dump_enabled_p ())
3079 dump_printf_loc (MSG_NOTE
, vect_location
,
3080 " Runtime profitability threshold = %d\n",
3081 min_profitable_iters
);
3083 *ret_min_profitable_niters
= min_profitable_iters
;
3085 /* Calculate number of iterations required to make the vector version
3086 profitable, relative to the loop bodies only.
3088 Non-vectorized variant is SIC * niters and it must win over vector
3089 variant on the expected loop trip count. The following condition must hold true:
3090 SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC */
3092 if (vec_outside_cost
<= 0)
3093 min_profitable_estimate
= 1;
3096 min_profitable_estimate
= ((vec_outside_cost
+ scalar_outside_cost
) * vf
3097 - vec_inside_cost
* peel_iters_prologue
3098 - vec_inside_cost
* peel_iters_epilogue
)
3099 / ((scalar_single_iter_cost
* vf
)
3102 min_profitable_estimate
--;
3103 min_profitable_estimate
= MAX (min_profitable_estimate
, min_profitable_iters
);
3104 if (dump_enabled_p ())
3105 dump_printf_loc (MSG_NOTE
, vect_location
,
3106 " Static estimate profitability threshold = %d\n",
3107 min_profitable_iters
);
3109 *ret_min_profitable_estimate
= min_profitable_estimate
;
3112 /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
3113 vector elements (not bits) for a vector of mode MODE. */
3115 calc_vec_perm_mask_for_shift (enum machine_mode mode
, unsigned int offset
,
3118 unsigned int i
, nelt
= GET_MODE_NUNITS (mode
);
3120 for (i
= 0; i
< nelt
; i
++)
3121 sel
[i
] = (i
+ offset
) & (2*nelt
- 1);
3124 /* Checks whether the target supports whole-vector shifts for vectors of mode
3125 MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_
3126 it supports vec_perm_const with masks for all necessary shift amounts. */
3128 have_whole_vector_shift (enum machine_mode mode
)
3130 if (optab_handler (vec_shr_optab
, mode
) != CODE_FOR_nothing
)
3133 if (direct_optab_handler (vec_perm_const_optab
, mode
) == CODE_FOR_nothing
)
3136 unsigned int i
, nelt
= GET_MODE_NUNITS (mode
);
3137 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
3139 for (i
= nelt
/2; i
>= 1; i
/=2)
3141 calc_vec_perm_mask_for_shift (mode
, i
, sel
);
3142 if (!can_vec_perm_p (mode
, false, sel
))
3148 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
3149 functions. Design better to avoid maintenance issues. */
3151 /* Function vect_model_reduction_cost.
3153 Models cost for a reduction operation, including the vector ops
3154 generated within the strip-mine loop, the initial definition before
3155 the loop, and the epilogue code that must be generated. */
3158 vect_model_reduction_cost (stmt_vec_info stmt_info
, enum tree_code reduc_code
,
3161 int prologue_cost
= 0, epilogue_cost
= 0;
3162 enum tree_code code
;
3165 gimple stmt
, orig_stmt
;
3168 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3169 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3170 void *target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
3172 /* Cost of reduction op inside loop. */
3173 unsigned inside_cost
= add_stmt_cost (target_cost_data
, ncopies
, vector_stmt
,
3174 stmt_info
, 0, vect_body
);
3175 stmt
= STMT_VINFO_STMT (stmt_info
);
3177 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
)))
3179 case GIMPLE_SINGLE_RHS
:
3180 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt
)) == ternary_op
);
3181 reduction_op
= TREE_OPERAND (gimple_assign_rhs1 (stmt
), 2);
3183 case GIMPLE_UNARY_RHS
:
3184 reduction_op
= gimple_assign_rhs1 (stmt
);
3186 case GIMPLE_BINARY_RHS
:
3187 reduction_op
= gimple_assign_rhs2 (stmt
);
3189 case GIMPLE_TERNARY_RHS
:
3190 reduction_op
= gimple_assign_rhs3 (stmt
);
3196 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
3199 if (dump_enabled_p ())
3201 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3202 "unsupported data-type ");
3203 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3204 TREE_TYPE (reduction_op
));
3205 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3210 mode
= TYPE_MODE (vectype
);
3211 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3214 orig_stmt
= STMT_VINFO_STMT (stmt_info
);
3216 code
= gimple_assign_rhs_code (orig_stmt
);
3218 /* Add in cost for initial definition. */
3219 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, scalar_to_vec
,
3220 stmt_info
, 0, vect_prologue
);
3222 /* Determine cost of epilogue code.
3224 We have a reduction operator that will reduce the vector in one statement.
3225 Also requires scalar extract. */
3227 if (!nested_in_vect_loop_p (loop
, orig_stmt
))
3229 if (reduc_code
!= ERROR_MARK
)
3231 epilogue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
3232 stmt_info
, 0, vect_epilogue
);
3233 epilogue_cost
+= add_stmt_cost (target_cost_data
, 1, vec_to_scalar
,
3234 stmt_info
, 0, vect_epilogue
);
3238 int vec_size_in_bits
= tree_to_uhwi (TYPE_SIZE (vectype
));
3240 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt
)));
3241 int element_bitsize
= tree_to_uhwi (bitsize
);
3242 int nelements
= vec_size_in_bits
/ element_bitsize
;
3244 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3246 /* We have a whole vector shift available. */
3247 if (VECTOR_MODE_P (mode
)
3248 && optab_handler (optab
, mode
) != CODE_FOR_nothing
3249 && have_whole_vector_shift (mode
))
3251 /* Final reduction via vector shifts and the reduction operator.
3252 Also requires scalar extract. */
3253 epilogue_cost
+= add_stmt_cost (target_cost_data
,
3254 exact_log2 (nelements
) * 2,
3255 vector_stmt
, stmt_info
, 0,
3257 epilogue_cost
+= add_stmt_cost (target_cost_data
, 1,
3258 vec_to_scalar
, stmt_info
, 0,
3262 /* Use extracts and reduction op for final reduction. For N
3263 elements, we have N extracts and N-1 reduction ops. */
3264 epilogue_cost
+= add_stmt_cost (target_cost_data
,
3265 nelements
+ nelements
- 1,
3266 vector_stmt
, stmt_info
, 0,
3271 if (dump_enabled_p ())
3272 dump_printf (MSG_NOTE
,
3273 "vect_model_reduction_cost: inside_cost = %d, "
3274 "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost
,
3275 prologue_cost
, epilogue_cost
);
3281 /* Function vect_model_induction_cost.
3283 Models cost for induction operations. */
3286 vect_model_induction_cost (stmt_vec_info stmt_info
, int ncopies
)
3288 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3289 void *target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
3290 unsigned inside_cost
, prologue_cost
;
3292 /* loop cost for vec_loop. */
3293 inside_cost
= add_stmt_cost (target_cost_data
, ncopies
, vector_stmt
,
3294 stmt_info
, 0, vect_body
);
3296 /* prologue cost for vec_init and vec_step. */
3297 prologue_cost
= add_stmt_cost (target_cost_data
, 2, scalar_to_vec
,
3298 stmt_info
, 0, vect_prologue
);
3300 if (dump_enabled_p ())
3301 dump_printf_loc (MSG_NOTE
, vect_location
,
3302 "vect_model_induction_cost: inside_cost = %d, "
3303 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
3307 /* Function get_initial_def_for_induction
3310 STMT - a stmt that performs an induction operation in the loop.
3311 IV_PHI - the initial value of the induction variable
3314 Return a vector variable, initialized with the first VF values of
3315 the induction variable. E.g., for an iv with IV_PHI='X' and
3316 evolution S, for a vector of 4 units, we want to return:
3317 [X, X + S, X + 2*S, X + 3*S]. */
3320 get_initial_def_for_induction (gimple iv_phi
)
3322 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (iv_phi
);
3323 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3324 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3327 edge pe
= loop_preheader_edge (loop
);
3328 struct loop
*iv_loop
;
3330 tree new_vec
, vec_init
, vec_step
, t
;
3333 gimple init_stmt
, new_stmt
;
3334 gphi
*induction_phi
;
3335 tree induc_def
, vec_def
, vec_dest
;
3336 tree init_expr
, step_expr
;
3337 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3341 stmt_vec_info phi_info
= vinfo_for_stmt (iv_phi
);
3342 bool nested_in_vect_loop
= false;
3343 gimple_seq stmts
= NULL
;
3344 imm_use_iterator imm_iter
;
3345 use_operand_p use_p
;
3349 gimple_stmt_iterator si
;
3350 basic_block bb
= gimple_bb (iv_phi
);
3354 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
3355 if (nested_in_vect_loop_p (loop
, iv_phi
))
3357 nested_in_vect_loop
= true;
3358 iv_loop
= loop
->inner
;
3362 gcc_assert (iv_loop
== (gimple_bb (iv_phi
))->loop_father
);
3364 latch_e
= loop_latch_edge (iv_loop
);
3365 loop_arg
= PHI_ARG_DEF_FROM_EDGE (iv_phi
, latch_e
);
3367 step_expr
= STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info
);
3368 gcc_assert (step_expr
!= NULL_TREE
);
3370 pe
= loop_preheader_edge (iv_loop
);
3371 init_expr
= PHI_ARG_DEF_FROM_EDGE (iv_phi
,
3372 loop_preheader_edge (iv_loop
));
3374 vectype
= get_vectype_for_scalar_type (TREE_TYPE (init_expr
));
3375 resvectype
= get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi
)));
3376 gcc_assert (vectype
);
3377 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3378 ncopies
= vf
/ nunits
;
3380 gcc_assert (phi_info
);
3381 gcc_assert (ncopies
>= 1);
3383 /* Convert the step to the desired type. */
3384 step_expr
= force_gimple_operand (fold_convert (TREE_TYPE (vectype
),
3386 &stmts
, true, NULL_TREE
);
3389 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3390 gcc_assert (!new_bb
);
3393 /* Find the first insertion point in the BB. */
3394 si
= gsi_after_labels (bb
);
3396 /* Create the vector that holds the initial_value of the induction. */
3397 if (nested_in_vect_loop
)
3399 /* iv_loop is nested in the loop to be vectorized. init_expr had already
3400 been created during vectorization of previous stmts. We obtain it
3401 from the STMT_VINFO_VEC_STMT of the defining stmt. */
3402 vec_init
= vect_get_vec_def_for_operand (init_expr
, iv_phi
, NULL
);
3403 /* If the initial value is not of proper type, convert it. */
3404 if (!useless_type_conversion_p (vectype
, TREE_TYPE (vec_init
)))
3407 = gimple_build_assign (vect_get_new_vect_var (vectype
,
3411 build1 (VIEW_CONVERT_EXPR
, vectype
,
3413 vec_init
= make_ssa_name (gimple_assign_lhs (new_stmt
), new_stmt
);
3414 gimple_assign_set_lhs (new_stmt
, vec_init
);
3415 new_bb
= gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop
),
3417 gcc_assert (!new_bb
);
3418 set_vinfo_for_stmt (new_stmt
,
3419 new_stmt_vec_info (new_stmt
, loop_vinfo
, NULL
));
3424 vec
<constructor_elt
, va_gc
> *v
;
3426 /* iv_loop is the loop to be vectorized. Create:
3427 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
3428 new_var
= vect_get_new_vect_var (TREE_TYPE (vectype
),
3429 vect_scalar_var
, "var_");
3430 new_name
= force_gimple_operand (fold_convert (TREE_TYPE (vectype
),
3432 &stmts
, false, new_var
);
3435 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3436 gcc_assert (!new_bb
);
3439 vec_alloc (v
, nunits
);
3440 bool constant_p
= is_gimple_min_invariant (new_name
);
3441 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, new_name
);
3442 for (i
= 1; i
< nunits
; i
++)
3444 /* Create: new_name_i = new_name + step_expr */
3445 new_name
= fold_build2 (PLUS_EXPR
, TREE_TYPE (new_name
),
3446 new_name
, step_expr
);
3447 if (!is_gimple_min_invariant (new_name
))
3449 init_stmt
= gimple_build_assign (new_var
, new_name
);
3450 new_name
= make_ssa_name (new_var
, init_stmt
);
3451 gimple_assign_set_lhs (init_stmt
, new_name
);
3452 new_bb
= gsi_insert_on_edge_immediate (pe
, init_stmt
);
3453 gcc_assert (!new_bb
);
3454 if (dump_enabled_p ())
3456 dump_printf_loc (MSG_NOTE
, vect_location
,
3457 "created new init_stmt: ");
3458 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, init_stmt
, 0);
3459 dump_printf (MSG_NOTE
, "\n");
3463 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, new_name
);
3465 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
3467 new_vec
= build_vector_from_ctor (vectype
, v
);
3469 new_vec
= build_constructor (vectype
, v
);
3470 vec_init
= vect_init_vector (iv_phi
, new_vec
, vectype
, NULL
);
3474 /* Create the vector that holds the step of the induction. */
3475 if (nested_in_vect_loop
)
3476 /* iv_loop is nested in the loop to be vectorized. Generate:
3477 vec_step = [S, S, S, S] */
3478 new_name
= step_expr
;
3481 /* iv_loop is the loop to be vectorized. Generate:
3482 vec_step = [VF*S, VF*S, VF*S, VF*S] */
3483 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr
)))
3485 expr
= build_int_cst (integer_type_node
, vf
);
3486 expr
= fold_convert (TREE_TYPE (step_expr
), expr
);
3489 expr
= build_int_cst (TREE_TYPE (step_expr
), vf
);
3490 new_name
= fold_build2 (MULT_EXPR
, TREE_TYPE (step_expr
),
3492 if (TREE_CODE (step_expr
) == SSA_NAME
)
3493 new_name
= vect_init_vector (iv_phi
, new_name
,
3494 TREE_TYPE (step_expr
), NULL
);
3497 t
= unshare_expr (new_name
);
3498 gcc_assert (CONSTANT_CLASS_P (new_name
)
3499 || TREE_CODE (new_name
) == SSA_NAME
);
3500 stepvectype
= get_vectype_for_scalar_type (TREE_TYPE (new_name
));
3501 gcc_assert (stepvectype
);
3502 new_vec
= build_vector_from_val (stepvectype
, t
);
3503 vec_step
= vect_init_vector (iv_phi
, new_vec
, stepvectype
, NULL
);
3506 /* Create the following def-use cycle:
3511 vec_iv = PHI <vec_init, vec_loop>
3515 vec_loop = vec_iv + vec_step; */
3517 /* Create the induction-phi that defines the induction-operand. */
3518 vec_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, "vec_iv_");
3519 induction_phi
= create_phi_node (vec_dest
, iv_loop
->header
);
3520 set_vinfo_for_stmt (induction_phi
,
3521 new_stmt_vec_info (induction_phi
, loop_vinfo
, NULL
));
3522 induc_def
= PHI_RESULT (induction_phi
);
3524 /* Create the iv update inside the loop */
3525 new_stmt
= gimple_build_assign (vec_dest
, PLUS_EXPR
, induc_def
, vec_step
);
3526 vec_def
= make_ssa_name (vec_dest
, new_stmt
);
3527 gimple_assign_set_lhs (new_stmt
, vec_def
);
3528 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
3529 set_vinfo_for_stmt (new_stmt
, new_stmt_vec_info (new_stmt
, loop_vinfo
,
3532 /* Set the arguments of the phi node: */
3533 add_phi_arg (induction_phi
, vec_init
, pe
, UNKNOWN_LOCATION
);
3534 add_phi_arg (induction_phi
, vec_def
, loop_latch_edge (iv_loop
),
3538 /* In case that vectorization factor (VF) is bigger than the number
3539 of elements that we can fit in a vectype (nunits), we have to generate
3540 more than one vector stmt - i.e - we need to "unroll" the
3541 vector stmt by a factor VF/nunits. For more details see documentation
3542 in vectorizable_operation. */
3546 stmt_vec_info prev_stmt_vinfo
;
3547 /* FORNOW. This restriction should be relaxed. */
3548 gcc_assert (!nested_in_vect_loop
);
3550 /* Create the vector that holds the step of the induction. */
3551 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr
)))
3553 expr
= build_int_cst (integer_type_node
, nunits
);
3554 expr
= fold_convert (TREE_TYPE (step_expr
), expr
);
3557 expr
= build_int_cst (TREE_TYPE (step_expr
), nunits
);
3558 new_name
= fold_build2 (MULT_EXPR
, TREE_TYPE (step_expr
),
3560 if (TREE_CODE (step_expr
) == SSA_NAME
)
3561 new_name
= vect_init_vector (iv_phi
, new_name
,
3562 TREE_TYPE (step_expr
), NULL
);
3563 t
= unshare_expr (new_name
);
3564 gcc_assert (CONSTANT_CLASS_P (new_name
)
3565 || TREE_CODE (new_name
) == SSA_NAME
);
3566 new_vec
= build_vector_from_val (stepvectype
, t
);
3567 vec_step
= vect_init_vector (iv_phi
, new_vec
, stepvectype
, NULL
);
3569 vec_def
= induc_def
;
3570 prev_stmt_vinfo
= vinfo_for_stmt (induction_phi
);
3571 for (i
= 1; i
< ncopies
; i
++)
3573 /* vec_i = vec_prev + vec_step */
3574 new_stmt
= gimple_build_assign (vec_dest
, PLUS_EXPR
,
3576 vec_def
= make_ssa_name (vec_dest
, new_stmt
);
3577 gimple_assign_set_lhs (new_stmt
, vec_def
);
3579 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
3580 if (!useless_type_conversion_p (resvectype
, vectype
))
3583 = gimple_build_assign
3584 (vect_get_new_vect_var (resvectype
, vect_simple_var
,
3587 build1 (VIEW_CONVERT_EXPR
, resvectype
,
3588 gimple_assign_lhs (new_stmt
)));
3589 gimple_assign_set_lhs (new_stmt
,
3591 (gimple_assign_lhs (new_stmt
), new_stmt
));
3592 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
3594 set_vinfo_for_stmt (new_stmt
,
3595 new_stmt_vec_info (new_stmt
, loop_vinfo
, NULL
));
3596 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo
) = new_stmt
;
3597 prev_stmt_vinfo
= vinfo_for_stmt (new_stmt
);
3601 if (nested_in_vect_loop
)
3603 /* Find the loop-closed exit-phi of the induction, and record
3604 the final vector of induction results: */
3606 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, loop_arg
)
3608 gimple use_stmt
= USE_STMT (use_p
);
3609 if (is_gimple_debug (use_stmt
))
3612 if (!flow_bb_inside_loop_p (iv_loop
, gimple_bb (use_stmt
)))
3614 exit_phi
= use_stmt
;
3620 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (exit_phi
);
3621 /* FORNOW. Currently not supporting the case that an inner-loop induction
3622 is not used in the outer-loop (i.e. only outside the outer-loop). */
3623 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo
)
3624 && !STMT_VINFO_LIVE_P (stmt_vinfo
));
3626 STMT_VINFO_VEC_STMT (stmt_vinfo
) = new_stmt
;
3627 if (dump_enabled_p ())
3629 dump_printf_loc (MSG_NOTE
, vect_location
,
3630 "vector of inductions after inner-loop:");
3631 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
3632 dump_printf (MSG_NOTE
, "\n");
3638 if (dump_enabled_p ())
3640 dump_printf_loc (MSG_NOTE
, vect_location
,
3641 "transform induction: created def-use cycle: ");
3642 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, induction_phi
, 0);
3643 dump_printf (MSG_NOTE
, "\n");
3644 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
3645 SSA_NAME_DEF_STMT (vec_def
), 0);
3646 dump_printf (MSG_NOTE
, "\n");
3649 STMT_VINFO_VEC_STMT (phi_info
) = induction_phi
;
3650 if (!useless_type_conversion_p (resvectype
, vectype
))
3652 new_stmt
= gimple_build_assign (vect_get_new_vect_var (resvectype
,
3656 build1 (VIEW_CONVERT_EXPR
, resvectype
,
3658 induc_def
= make_ssa_name (gimple_assign_lhs (new_stmt
), new_stmt
);
3659 gimple_assign_set_lhs (new_stmt
, induc_def
);
3660 si
= gsi_after_labels (bb
);
3661 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
3662 set_vinfo_for_stmt (new_stmt
,
3663 new_stmt_vec_info (new_stmt
, loop_vinfo
, NULL
));
3664 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt
))
3665 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi
));
3672 /* Function get_initial_def_for_reduction
3675 STMT - a stmt that performs a reduction operation in the loop.
3676 INIT_VAL - the initial value of the reduction variable
3679 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3680 of the reduction (used for adjusting the epilog - see below).
3681 Return a vector variable, initialized according to the operation that STMT
3682 performs. This vector will be used as the initial value of the
3683 vector of partial results.
3685 Option1 (adjust in epilog): Initialize the vector as follows:
3686 add/bit or/xor: [0,0,...,0,0]
3687 mult/bit and: [1,1,...,1,1]
3688 min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3689 and when necessary (e.g. add/mult case) let the caller know
3690 that it needs to adjust the result by init_val.
3692 Option2: Initialize the vector as follows:
3693 add/bit or/xor: [init_val,0,0,...,0]
3694 mult/bit and: [init_val,1,1,...,1]
3695 min/max/cond_expr: [init_val,init_val,...,init_val]
3696 and no adjustments are needed.
3698 For example, for the following code:
3704 STMT is 's = s + a[i]', and the reduction variable is 's'.
3705 For a vector of 4 units, we want to return either [0,0,0,init_val],
3706 or [0,0,0,0] and let the caller know that it needs to adjust
3707 the result at the end by 'init_val'.
3709 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3710 initialization vector is simpler (same element in all entries), if
3711 ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3713 A cost model should help decide between these two schemes. */
3716 get_initial_def_for_reduction (gimple stmt
, tree init_val
,
3717 tree
*adjustment_def
)
3719 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3720 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3721 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3722 tree scalar_type
= TREE_TYPE (init_val
);
3723 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
3725 enum tree_code code
= gimple_assign_rhs_code (stmt
);
3730 bool nested_in_vect_loop
= false;
3732 REAL_VALUE_TYPE real_init_val
= dconst0
;
3733 int int_init_val
= 0;
3734 gimple def_stmt
= NULL
;
3736 gcc_assert (vectype
);
3737 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3739 gcc_assert (POINTER_TYPE_P (scalar_type
) || INTEGRAL_TYPE_P (scalar_type
)
3740 || SCALAR_FLOAT_TYPE_P (scalar_type
));
3742 if (nested_in_vect_loop_p (loop
, stmt
))
3743 nested_in_vect_loop
= true;
3745 gcc_assert (loop
== (gimple_bb (stmt
))->loop_father
);
3747 /* In case of double reduction we only create a vector variable to be put
3748 in the reduction phi node. The actual statement creation is done in
3749 vect_create_epilog_for_reduction. */
3750 if (adjustment_def
&& nested_in_vect_loop
3751 && TREE_CODE (init_val
) == SSA_NAME
3752 && (def_stmt
= SSA_NAME_DEF_STMT (init_val
))
3753 && gimple_code (def_stmt
) == GIMPLE_PHI
3754 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
3755 && vinfo_for_stmt (def_stmt
)
3756 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
3757 == vect_double_reduction_def
)
3759 *adjustment_def
= NULL
;
3760 return vect_create_destination_var (init_val
, vectype
);
3763 if (TREE_CONSTANT (init_val
))
3765 if (SCALAR_FLOAT_TYPE_P (scalar_type
))
3766 init_value
= build_real (scalar_type
, TREE_REAL_CST (init_val
));
3768 init_value
= build_int_cst (scalar_type
, TREE_INT_CST_LOW (init_val
));
3771 init_value
= init_val
;
3775 case WIDEN_SUM_EXPR
:
3784 /* ADJUSMENT_DEF is NULL when called from
3785 vect_create_epilog_for_reduction to vectorize double reduction. */
3788 if (nested_in_vect_loop
)
3789 *adjustment_def
= vect_get_vec_def_for_operand (init_val
, stmt
,
3792 *adjustment_def
= init_val
;
3795 if (code
== MULT_EXPR
)
3797 real_init_val
= dconst1
;
3801 if (code
== BIT_AND_EXPR
)
3804 if (SCALAR_FLOAT_TYPE_P (scalar_type
))
3805 def_for_init
= build_real (scalar_type
, real_init_val
);
3807 def_for_init
= build_int_cst (scalar_type
, int_init_val
);
3809 /* Create a vector of '0' or '1' except the first element. */
3810 elts
= XALLOCAVEC (tree
, nunits
);
3811 for (i
= nunits
- 2; i
>= 0; --i
)
3812 elts
[i
+ 1] = def_for_init
;
3814 /* Option1: the first element is '0' or '1' as well. */
3817 elts
[0] = def_for_init
;
3818 init_def
= build_vector (vectype
, elts
);
3822 /* Option2: the first element is INIT_VAL. */
3824 if (TREE_CONSTANT (init_val
))
3825 init_def
= build_vector (vectype
, elts
);
3828 vec
<constructor_elt
, va_gc
> *v
;
3829 vec_alloc (v
, nunits
);
3830 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, init_val
);
3831 for (i
= 1; i
< nunits
; ++i
)
3832 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[i
]);
3833 init_def
= build_constructor (vectype
, v
);
3843 *adjustment_def
= NULL_TREE
;
3844 init_def
= vect_get_vec_def_for_operand (init_val
, stmt
, NULL
);
3848 init_def
= build_vector_from_val (vectype
, init_value
);
3858 /* Function vect_create_epilog_for_reduction
3860 Create code at the loop-epilog to finalize the result of a reduction
3863 VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3864 reduction statements.
3865 STMT is the scalar reduction stmt that is being vectorized.
3866 NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3867 number of elements that we can fit in a vectype (nunits). In this case
3868 we have to generate more than one vector stmt - i.e - we need to "unroll"
3869 the vector stmt by a factor VF/nunits. For more details see documentation
3870 in vectorizable_operation.
3871 REDUC_CODE is the tree-code for the epilog reduction.
3872 REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3874 REDUC_INDEX is the index of the operand in the right hand side of the
3875 statement that is defined by REDUCTION_PHI.
3876 DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3877 SLP_NODE is an SLP node containing a group of reduction statements. The
3878 first one in this group is STMT.
3881 1. Creates the reduction def-use cycles: sets the arguments for
3883 The loop-entry argument is the vectorized initial-value of the reduction.
3884 The loop-latch argument is taken from VECT_DEFS - the vector of partial
3886 2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3887 by applying the operation specified by REDUC_CODE if available, or by
3888 other means (whole-vector shifts or a scalar loop).
3889 The function also creates a new phi node at the loop exit to preserve
3890 loop-closed form, as illustrated below.
3892 The flow at the entry to this function:
3895 vec_def = phi <null, null> # REDUCTION_PHI
3896 VECT_DEF = vector_stmt # vectorized form of STMT
3897 s_loop = scalar_stmt # (scalar) STMT
3899 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
3903 The above is transformed by this function into:
3906 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
3907 VECT_DEF = vector_stmt # vectorized form of STMT
3908 s_loop = scalar_stmt # (scalar) STMT
3910 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
3911 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
3912 v_out2 = reduce <v_out1>
3913 s_out3 = extract_field <v_out2, 0>
3914 s_out4 = adjust_result <s_out3>
3920 vect_create_epilog_for_reduction (vec
<tree
> vect_defs
, gimple stmt
,
3921 int ncopies
, enum tree_code reduc_code
,
3922 vec
<gimple
> reduction_phis
,
3923 int reduc_index
, bool double_reduc
,
3926 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3927 stmt_vec_info prev_phi_info
;
3930 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3931 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
), *outer_loop
= NULL
;
3932 basic_block exit_bb
;
3935 gimple new_phi
= NULL
, phi
;
3936 gimple_stmt_iterator exit_gsi
;
3938 tree new_temp
= NULL_TREE
, new_dest
, new_name
, new_scalar_dest
;
3939 gimple epilog_stmt
= NULL
;
3940 enum tree_code code
= gimple_assign_rhs_code (stmt
);
3943 tree adjustment_def
= NULL
;
3944 tree vec_initial_def
= NULL
;
3945 tree reduction_op
, expr
, def
;
3946 tree orig_name
, scalar_result
;
3947 imm_use_iterator imm_iter
, phi_imm_iter
;
3948 use_operand_p use_p
, phi_use_p
;
3949 gimple use_stmt
, orig_stmt
, reduction_phi
= NULL
;
3950 bool nested_in_vect_loop
= false;
3951 auto_vec
<gimple
> new_phis
;
3952 auto_vec
<gimple
> inner_phis
;
3953 enum vect_def_type dt
= vect_unknown_def_type
;
3955 auto_vec
<tree
> scalar_results
;
3956 unsigned int group_size
= 1, k
, ratio
;
3957 auto_vec
<tree
> vec_initial_defs
;
3958 auto_vec
<gimple
> phis
;
3959 bool slp_reduc
= false;
3960 tree new_phi_result
;
3961 gimple inner_phi
= NULL
;
3964 group_size
= SLP_TREE_SCALAR_STMTS (slp_node
).length ();
3966 if (nested_in_vect_loop_p (loop
, stmt
))
3970 nested_in_vect_loop
= true;
3971 gcc_assert (!slp_node
);
3974 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
)))
3976 case GIMPLE_SINGLE_RHS
:
3977 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt
))
3979 reduction_op
= TREE_OPERAND (gimple_assign_rhs1 (stmt
), reduc_index
);
3981 case GIMPLE_UNARY_RHS
:
3982 reduction_op
= gimple_assign_rhs1 (stmt
);
3984 case GIMPLE_BINARY_RHS
:
3985 reduction_op
= reduc_index
?
3986 gimple_assign_rhs2 (stmt
) : gimple_assign_rhs1 (stmt
);
3988 case GIMPLE_TERNARY_RHS
:
3989 reduction_op
= gimple_op (stmt
, reduc_index
+ 1);
3995 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
3996 gcc_assert (vectype
);
3997 mode
= TYPE_MODE (vectype
);
3999 /* 1. Create the reduction def-use cycle:
4000 Set the arguments of REDUCTION_PHIS, i.e., transform
4003 vec_def = phi <null, null> # REDUCTION_PHI
4004 VECT_DEF = vector_stmt # vectorized form of STMT
4010 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
4011 VECT_DEF = vector_stmt # vectorized form of STMT
4014 (in case of SLP, do it for all the phis). */
4016 /* Get the loop-entry arguments. */
4018 vect_get_vec_defs (reduction_op
, NULL_TREE
, stmt
, &vec_initial_defs
,
4019 NULL
, slp_node
, reduc_index
);
4022 vec_initial_defs
.create (1);
4023 /* For the case of reduction, vect_get_vec_def_for_operand returns
4024 the scalar def before the loop, that defines the initial value
4025 of the reduction variable. */
4026 vec_initial_def
= vect_get_vec_def_for_operand (reduction_op
, stmt
,
4028 vec_initial_defs
.quick_push (vec_initial_def
);
4031 /* Set phi nodes arguments. */
4032 FOR_EACH_VEC_ELT (reduction_phis
, i
, phi
)
4034 tree vec_init_def
, def
;
4036 vec_init_def
= force_gimple_operand (vec_initial_defs
[i
], &stmts
,
4038 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
4040 for (j
= 0; j
< ncopies
; j
++)
4042 /* Set the loop-entry arg of the reduction-phi. */
4043 add_phi_arg (as_a
<gphi
*> (phi
), vec_init_def
,
4044 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4046 /* Set the loop-latch arg for the reduction-phi. */
4048 def
= vect_get_vec_def_for_stmt_copy (vect_unknown_def_type
, def
);
4050 add_phi_arg (as_a
<gphi
*> (phi
), def
, loop_latch_edge (loop
),
4053 if (dump_enabled_p ())
4055 dump_printf_loc (MSG_NOTE
, vect_location
,
4056 "transform reduction: created def-use cycle: ");
4057 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
4058 dump_printf (MSG_NOTE
, "\n");
4059 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (def
), 0);
4060 dump_printf (MSG_NOTE
, "\n");
4063 phi
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi
));
4067 /* 2. Create epilog code.
4068 The reduction epilog code operates across the elements of the vector
4069 of partial results computed by the vectorized loop.
4070 The reduction epilog code consists of:
4072 step 1: compute the scalar result in a vector (v_out2)
4073 step 2: extract the scalar result (s_out3) from the vector (v_out2)
4074 step 3: adjust the scalar result (s_out3) if needed.
4076 Step 1 can be accomplished using one the following three schemes:
4077 (scheme 1) using reduc_code, if available.
4078 (scheme 2) using whole-vector shifts, if available.
4079 (scheme 3) using a scalar loop. In this case steps 1+2 above are
4082 The overall epilog code looks like this:
4084 s_out0 = phi <s_loop> # original EXIT_PHI
4085 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4086 v_out2 = reduce <v_out1> # step 1
4087 s_out3 = extract_field <v_out2, 0> # step 2
4088 s_out4 = adjust_result <s_out3> # step 3
4090 (step 3 is optional, and steps 1 and 2 may be combined).
4091 Lastly, the uses of s_out0 are replaced by s_out4. */
4094 /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
4095 v_out1 = phi <VECT_DEF>
4096 Store them in NEW_PHIS. */
4098 exit_bb
= single_exit (loop
)->dest
;
4099 prev_phi_info
= NULL
;
4100 new_phis
.create (vect_defs
.length ());
4101 FOR_EACH_VEC_ELT (vect_defs
, i
, def
)
4103 for (j
= 0; j
< ncopies
; j
++)
4105 tree new_def
= copy_ssa_name (def
);
4106 phi
= create_phi_node (new_def
, exit_bb
);
4107 set_vinfo_for_stmt (phi
, new_stmt_vec_info (phi
, loop_vinfo
, NULL
));
4109 new_phis
.quick_push (phi
);
4112 def
= vect_get_vec_def_for_stmt_copy (dt
, def
);
4113 STMT_VINFO_RELATED_STMT (prev_phi_info
) = phi
;
4116 SET_PHI_ARG_DEF (phi
, single_exit (loop
)->dest_idx
, def
);
4117 prev_phi_info
= vinfo_for_stmt (phi
);
4121 /* The epilogue is created for the outer-loop, i.e., for the loop being
4122 vectorized. Create exit phis for the outer loop. */
4126 exit_bb
= single_exit (loop
)->dest
;
4127 inner_phis
.create (vect_defs
.length ());
4128 FOR_EACH_VEC_ELT (new_phis
, i
, phi
)
4130 tree new_result
= copy_ssa_name (PHI_RESULT (phi
));
4131 gphi
*outer_phi
= create_phi_node (new_result
, exit_bb
);
4132 SET_PHI_ARG_DEF (outer_phi
, single_exit (loop
)->dest_idx
,
4134 set_vinfo_for_stmt (outer_phi
, new_stmt_vec_info (outer_phi
,
4136 inner_phis
.quick_push (phi
);
4137 new_phis
[i
] = outer_phi
;
4138 prev_phi_info
= vinfo_for_stmt (outer_phi
);
4139 while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi
)))
4141 phi
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi
));
4142 new_result
= copy_ssa_name (PHI_RESULT (phi
));
4143 outer_phi
= create_phi_node (new_result
, exit_bb
);
4144 SET_PHI_ARG_DEF (outer_phi
, single_exit (loop
)->dest_idx
,
4146 set_vinfo_for_stmt (outer_phi
, new_stmt_vec_info (outer_phi
,
4148 STMT_VINFO_RELATED_STMT (prev_phi_info
) = outer_phi
;
4149 prev_phi_info
= vinfo_for_stmt (outer_phi
);
4154 exit_gsi
= gsi_after_labels (exit_bb
);
4156 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
4157 (i.e. when reduc_code is not available) and in the final adjustment
4158 code (if needed). Also get the original scalar reduction variable as
4159 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
4160 represents a reduction pattern), the tree-code and scalar-def are
4161 taken from the original stmt that the pattern-stmt (STMT) replaces.
4162 Otherwise (it is a regular reduction) - the tree-code and scalar-def
4163 are taken from STMT. */
4165 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
4168 /* Regular reduction */
4173 /* Reduction pattern */
4174 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt
);
4175 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
));
4176 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
4179 code
= gimple_assign_rhs_code (orig_stmt
);
4180 /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4181 partial results are added and not subtracted. */
4182 if (code
== MINUS_EXPR
)
4185 scalar_dest
= gimple_assign_lhs (orig_stmt
);
4186 scalar_type
= TREE_TYPE (scalar_dest
);
4187 scalar_results
.create (group_size
);
4188 new_scalar_dest
= vect_create_destination_var (scalar_dest
, NULL
);
4189 bitsize
= TYPE_SIZE (scalar_type
);
4191 /* In case this is a reduction in an inner-loop while vectorizing an outer
4192 loop - we don't need to extract a single scalar result at the end of the
4193 inner-loop (unless it is double reduction, i.e., the use of reduction is
4194 outside the outer-loop). The final vector of partial results will be used
4195 in the vectorized outer-loop, or reduced to a scalar result at the end of
4197 if (nested_in_vect_loop
&& !double_reduc
)
4198 goto vect_finalize_reduction
;
4200 /* SLP reduction without reduction chain, e.g.,
4204 b2 = operation (b1) */
4205 slp_reduc
= (slp_node
&& !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)));
4207 /* In case of reduction chain, e.g.,
4210 a3 = operation (a2),
4212 we may end up with more than one vector result. Here we reduce them to
4214 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
4216 tree first_vect
= PHI_RESULT (new_phis
[0]);
4218 gassign
*new_vec_stmt
= NULL
;
4220 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4221 for (k
= 1; k
< new_phis
.length (); k
++)
4223 gimple next_phi
= new_phis
[k
];
4224 tree second_vect
= PHI_RESULT (next_phi
);
4226 tmp
= build2 (code
, vectype
, first_vect
, second_vect
);
4227 new_vec_stmt
= gimple_build_assign (vec_dest
, tmp
);
4228 first_vect
= make_ssa_name (vec_dest
, new_vec_stmt
);
4229 gimple_assign_set_lhs (new_vec_stmt
, first_vect
);
4230 gsi_insert_before (&exit_gsi
, new_vec_stmt
, GSI_SAME_STMT
);
4233 new_phi_result
= first_vect
;
4236 new_phis
.truncate (0);
4237 new_phis
.safe_push (new_vec_stmt
);
4241 new_phi_result
= PHI_RESULT (new_phis
[0]);
4243 /* 2.3 Create the reduction code, using one of the three schemes described
4244 above. In SLP we simply need to extract all the elements from the
4245 vector (without reducing them), so we use scalar shifts. */
4246 if (reduc_code
!= ERROR_MARK
&& !slp_reduc
)
4251 /*** Case 1: Create:
4252 v_out2 = reduc_expr <v_out1> */
4254 if (dump_enabled_p ())
4255 dump_printf_loc (MSG_NOTE
, vect_location
,
4256 "Reduce using direct vector reduction.\n");
4258 vec_elem_type
= TREE_TYPE (TREE_TYPE (new_phi_result
));
4259 if (!useless_type_conversion_p (scalar_type
, vec_elem_type
))
4262 vect_create_destination_var (scalar_dest
, vec_elem_type
);
4263 tmp
= build1 (reduc_code
, vec_elem_type
, new_phi_result
);
4264 epilog_stmt
= gimple_build_assign (tmp_dest
, tmp
);
4265 new_temp
= make_ssa_name (tmp_dest
, epilog_stmt
);
4266 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4267 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4269 tmp
= build1 (NOP_EXPR
, scalar_type
, new_temp
);
4272 tmp
= build1 (reduc_code
, scalar_type
, new_phi_result
);
4273 epilog_stmt
= gimple_build_assign (new_scalar_dest
, tmp
);
4274 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
4275 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4276 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4277 scalar_results
.safe_push (new_temp
);
4281 bool reduce_with_shift
= have_whole_vector_shift (mode
);
4282 int element_bitsize
= tree_to_uhwi (bitsize
);
4283 int vec_size_in_bits
= tree_to_uhwi (TYPE_SIZE (vectype
));
4286 /* Regardless of whether we have a whole vector shift, if we're
4287 emulating the operation via tree-vect-generic, we don't want
4288 to use it. Only the first round of the reduction is likely
4289 to still be profitable via emulation. */
4290 /* ??? It might be better to emit a reduction tree code here, so that
4291 tree-vect-generic can expand the first round via bit tricks. */
4292 if (!VECTOR_MODE_P (mode
))
4293 reduce_with_shift
= false;
4296 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4297 if (optab_handler (optab
, mode
) == CODE_FOR_nothing
)
4298 reduce_with_shift
= false;
4301 if (reduce_with_shift
&& !slp_reduc
)
4303 int nelements
= vec_size_in_bits
/ element_bitsize
;
4304 unsigned char *sel
= XALLOCAVEC (unsigned char, nelements
);
4308 tree zero_vec
= build_zero_cst (vectype
);
4309 /*** Case 2: Create:
4310 for (offset = nelements/2; offset >= 1; offset/=2)
4312 Create: va' = vec_shift <va, offset>
4313 Create: va = vop <va, va'>
4318 if (dump_enabled_p ())
4319 dump_printf_loc (MSG_NOTE
, vect_location
,
4320 "Reduce using vector shifts\n");
4322 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4323 new_temp
= new_phi_result
;
4324 for (elt_offset
= nelements
/ 2;
4328 calc_vec_perm_mask_for_shift (mode
, elt_offset
, sel
);
4329 tree mask
= vect_gen_perm_mask_any (vectype
, sel
);
4330 epilog_stmt
= gimple_build_assign (vec_dest
, VEC_PERM_EXPR
,
4331 new_temp
, zero_vec
, mask
);
4332 new_name
= make_ssa_name (vec_dest
, epilog_stmt
);
4333 gimple_assign_set_lhs (epilog_stmt
, new_name
);
4334 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4336 epilog_stmt
= gimple_build_assign (vec_dest
, code
, new_name
,
4338 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
4339 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4340 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4343 /* 2.4 Extract the final scalar result. Create:
4344 s_out3 = extract_field <v_out2, bitpos> */
4346 if (dump_enabled_p ())
4347 dump_printf_loc (MSG_NOTE
, vect_location
,
4348 "extract scalar result\n");
4350 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, new_temp
,
4351 bitsize
, bitsize_zero_node
);
4352 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
4353 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
4354 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4355 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4356 scalar_results
.safe_push (new_temp
);
4360 /*** Case 3: Create:
4361 s = extract_field <v_out2, 0>
4362 for (offset = element_size;
4363 offset < vector_size;
4364 offset += element_size;)
4366 Create: s' = extract_field <v_out2, offset>
4367 Create: s = op <s, s'> // For non SLP cases
4370 if (dump_enabled_p ())
4371 dump_printf_loc (MSG_NOTE
, vect_location
,
4372 "Reduce using scalar code.\n");
4374 vec_size_in_bits
= tree_to_uhwi (TYPE_SIZE (vectype
));
4375 FOR_EACH_VEC_ELT (new_phis
, i
, new_phi
)
4378 if (gimple_code (new_phi
) == GIMPLE_PHI
)
4379 vec_temp
= PHI_RESULT (new_phi
);
4381 vec_temp
= gimple_assign_lhs (new_phi
);
4382 tree rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
4384 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
4385 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
4386 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4387 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4389 /* In SLP we don't need to apply reduction operation, so we just
4390 collect s' values in SCALAR_RESULTS. */
4392 scalar_results
.safe_push (new_temp
);
4394 for (bit_offset
= element_bitsize
;
4395 bit_offset
< vec_size_in_bits
;
4396 bit_offset
+= element_bitsize
)
4398 tree bitpos
= bitsize_int (bit_offset
);
4399 tree rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
,
4402 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
4403 new_name
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
4404 gimple_assign_set_lhs (epilog_stmt
, new_name
);
4405 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4409 /* In SLP we don't need to apply reduction operation, so
4410 we just collect s' values in SCALAR_RESULTS. */
4411 new_temp
= new_name
;
4412 scalar_results
.safe_push (new_name
);
4416 epilog_stmt
= gimple_build_assign (new_scalar_dest
, code
,
4417 new_name
, new_temp
);
4418 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
4419 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4420 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4425 /* The only case where we need to reduce scalar results in SLP, is
4426 unrolling. If the size of SCALAR_RESULTS is greater than
4427 GROUP_SIZE, we reduce them combining elements modulo
4431 tree res
, first_res
, new_res
;
4434 /* Reduce multiple scalar results in case of SLP unrolling. */
4435 for (j
= group_size
; scalar_results
.iterate (j
, &res
);
4438 first_res
= scalar_results
[j
% group_size
];
4439 new_stmt
= gimple_build_assign (new_scalar_dest
, code
,
4441 new_res
= make_ssa_name (new_scalar_dest
, new_stmt
);
4442 gimple_assign_set_lhs (new_stmt
, new_res
);
4443 gsi_insert_before (&exit_gsi
, new_stmt
, GSI_SAME_STMT
);
4444 scalar_results
[j
% group_size
] = new_res
;
4448 /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
4449 scalar_results
.safe_push (new_temp
);
4453 vect_finalize_reduction
:
4458 /* 2.5 Adjust the final result by the initial value of the reduction
4459 variable. (When such adjustment is not needed, then
4460 'adjustment_def' is zero). For example, if code is PLUS we create:
4461 new_temp = loop_exit_def + adjustment_def */
4465 gcc_assert (!slp_reduc
);
4466 if (nested_in_vect_loop
)
4468 new_phi
= new_phis
[0];
4469 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def
)) == VECTOR_TYPE
);
4470 expr
= build2 (code
, vectype
, PHI_RESULT (new_phi
), adjustment_def
);
4471 new_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4475 new_temp
= scalar_results
[0];
4476 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def
)) != VECTOR_TYPE
);
4477 expr
= build2 (code
, scalar_type
, new_temp
, adjustment_def
);
4478 new_dest
= vect_create_destination_var (scalar_dest
, scalar_type
);
4481 epilog_stmt
= gimple_build_assign (new_dest
, expr
);
4482 new_temp
= make_ssa_name (new_dest
, epilog_stmt
);
4483 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4484 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4485 if (nested_in_vect_loop
)
4487 set_vinfo_for_stmt (epilog_stmt
,
4488 new_stmt_vec_info (epilog_stmt
, loop_vinfo
,
4490 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt
)) =
4491 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi
));
4494 scalar_results
.quick_push (new_temp
);
4496 scalar_results
[0] = new_temp
;
4499 scalar_results
[0] = new_temp
;
4501 new_phis
[0] = epilog_stmt
;
4504 /* 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit
4505 phis with new adjusted scalar results, i.e., replace use <s_out0>
4510 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4511 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4512 v_out2 = reduce <v_out1>
4513 s_out3 = extract_field <v_out2, 0>
4514 s_out4 = adjust_result <s_out3>
4521 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4522 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4523 v_out2 = reduce <v_out1>
4524 s_out3 = extract_field <v_out2, 0>
4525 s_out4 = adjust_result <s_out3>
4530 /* In SLP reduction chain we reduce vector results into one vector if
4531 necessary, hence we set here GROUP_SIZE to 1. SCALAR_DEST is the LHS of
4532 the last stmt in the reduction chain, since we are looking for the loop
4534 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
4536 scalar_dest
= gimple_assign_lhs (
4537 SLP_TREE_SCALAR_STMTS (slp_node
)[group_size
- 1]);
4541 /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4542 case that GROUP_SIZE is greater than vectorization factor). Therefore, we
4543 need to match SCALAR_RESULTS with corresponding statements. The first
4544 (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4545 the first vector stmt, etc.
4546 (RATIO is equal to (GROUP_SIZE / number of new vector stmts)). */
4547 if (group_size
> new_phis
.length ())
4549 ratio
= group_size
/ new_phis
.length ();
4550 gcc_assert (!(group_size
% new_phis
.length ()));
4555 for (k
= 0; k
< group_size
; k
++)
4559 epilog_stmt
= new_phis
[k
/ ratio
];
4560 reduction_phi
= reduction_phis
[k
/ ratio
];
4562 inner_phi
= inner_phis
[k
/ ratio
];
4567 gimple current_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[k
];
4569 orig_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt
));
4570 /* SLP statements can't participate in patterns. */
4571 gcc_assert (!orig_stmt
);
4572 scalar_dest
= gimple_assign_lhs (current_stmt
);
4576 /* Find the loop-closed-use at the loop exit of the original scalar
4577 result. (The reduction result is expected to have two immediate uses -
4578 one at the latch block, and one at the loop exit). */
4579 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
4580 if (!flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
)))
4581 && !is_gimple_debug (USE_STMT (use_p
)))
4582 phis
.safe_push (USE_STMT (use_p
));
4584 /* While we expect to have found an exit_phi because of loop-closed-ssa
4585 form we can end up without one if the scalar cycle is dead. */
4587 FOR_EACH_VEC_ELT (phis
, i
, exit_phi
)
4591 stmt_vec_info exit_phi_vinfo
= vinfo_for_stmt (exit_phi
);
4594 /* FORNOW. Currently not supporting the case that an inner-loop
4595 reduction is not used in the outer-loop (but only outside the
4596 outer-loop), unless it is double reduction. */
4597 gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo
)
4598 && !STMT_VINFO_LIVE_P (exit_phi_vinfo
))
4602 STMT_VINFO_VEC_STMT (exit_phi_vinfo
) = inner_phi
;
4604 STMT_VINFO_VEC_STMT (exit_phi_vinfo
) = epilog_stmt
;
4606 || STMT_VINFO_DEF_TYPE (exit_phi_vinfo
)
4607 != vect_double_reduction_def
)
4610 /* Handle double reduction:
4612 stmt1: s1 = phi <s0, s2> - double reduction phi (outer loop)
4613 stmt2: s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4614 stmt3: s4 = use (s3) - (regular) reduc stmt (inner loop)
4615 stmt4: s2 = phi <s4> - double reduction stmt (outer loop)
4617 At that point the regular reduction (stmt2 and stmt3) is
4618 already vectorized, as well as the exit phi node, stmt4.
4619 Here we vectorize the phi node of double reduction, stmt1, and
4620 update all relevant statements. */
4622 /* Go through all the uses of s2 to find double reduction phi
4623 node, i.e., stmt1 above. */
4624 orig_name
= PHI_RESULT (exit_phi
);
4625 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, orig_name
)
4627 stmt_vec_info use_stmt_vinfo
;
4628 stmt_vec_info new_phi_vinfo
;
4629 tree vect_phi_init
, preheader_arg
, vect_phi_res
, init_def
;
4630 basic_block bb
= gimple_bb (use_stmt
);
4633 /* Check that USE_STMT is really double reduction phi
4635 if (gimple_code (use_stmt
) != GIMPLE_PHI
4636 || gimple_phi_num_args (use_stmt
) != 2
4637 || bb
->loop_father
!= outer_loop
)
4639 use_stmt_vinfo
= vinfo_for_stmt (use_stmt
);
4641 || STMT_VINFO_DEF_TYPE (use_stmt_vinfo
)
4642 != vect_double_reduction_def
)
4645 /* Create vector phi node for double reduction:
4646 vs1 = phi <vs0, vs2>
4647 vs1 was created previously in this function by a call to
4648 vect_get_vec_def_for_operand and is stored in
4650 vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4651 vs0 is created here. */
4653 /* Create vector phi node. */
4654 vect_phi
= create_phi_node (vec_initial_def
, bb
);
4655 new_phi_vinfo
= new_stmt_vec_info (vect_phi
,
4656 loop_vec_info_for_loop (outer_loop
), NULL
);
4657 set_vinfo_for_stmt (vect_phi
, new_phi_vinfo
);
4659 /* Create vs0 - initial def of the double reduction phi. */
4660 preheader_arg
= PHI_ARG_DEF_FROM_EDGE (use_stmt
,
4661 loop_preheader_edge (outer_loop
));
4662 init_def
= get_initial_def_for_reduction (stmt
,
4663 preheader_arg
, NULL
);
4664 vect_phi_init
= vect_init_vector (use_stmt
, init_def
,
4667 /* Update phi node arguments with vs0 and vs2. */
4668 add_phi_arg (vect_phi
, vect_phi_init
,
4669 loop_preheader_edge (outer_loop
),
4671 add_phi_arg (vect_phi
, PHI_RESULT (inner_phi
),
4672 loop_latch_edge (outer_loop
), UNKNOWN_LOCATION
);
4673 if (dump_enabled_p ())
4675 dump_printf_loc (MSG_NOTE
, vect_location
,
4676 "created double reduction phi node: ");
4677 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vect_phi
, 0);
4678 dump_printf (MSG_NOTE
, "\n");
4681 vect_phi_res
= PHI_RESULT (vect_phi
);
4683 /* Replace the use, i.e., set the correct vs1 in the regular
4684 reduction phi node. FORNOW, NCOPIES is always 1, so the
4685 loop is redundant. */
4686 use
= reduction_phi
;
4687 for (j
= 0; j
< ncopies
; j
++)
4689 edge pr_edge
= loop_preheader_edge (loop
);
4690 SET_PHI_ARG_DEF (use
, pr_edge
->dest_idx
, vect_phi_res
);
4691 use
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use
));
4698 if (nested_in_vect_loop
)
4707 /* Find the loop-closed-use at the loop exit of the original scalar
4708 result. (The reduction result is expected to have two immediate uses,
4709 one at the latch block, and one at the loop exit). For double
4710 reductions we are looking for exit phis of the outer loop. */
4711 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
4713 if (!flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
4715 if (!is_gimple_debug (USE_STMT (use_p
)))
4716 phis
.safe_push (USE_STMT (use_p
));
4720 if (double_reduc
&& gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
)
4722 tree phi_res
= PHI_RESULT (USE_STMT (use_p
));
4724 FOR_EACH_IMM_USE_FAST (phi_use_p
, phi_imm_iter
, phi_res
)
4726 if (!flow_bb_inside_loop_p (loop
,
4727 gimple_bb (USE_STMT (phi_use_p
)))
4728 && !is_gimple_debug (USE_STMT (phi_use_p
)))
4729 phis
.safe_push (USE_STMT (phi_use_p
));
4735 FOR_EACH_VEC_ELT (phis
, i
, exit_phi
)
4737 /* Replace the uses: */
4738 orig_name
= PHI_RESULT (exit_phi
);
4739 scalar_result
= scalar_results
[k
];
4740 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, orig_name
)
4741 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
4742 SET_USE (use_p
, scalar_result
);
4750 /* Function vectorizable_reduction.
4752 Check if STMT performs a reduction operation that can be vectorized.
4753 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4754 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4755 Return FALSE if not a vectorizable STMT, TRUE otherwise.
4757 This function also handles reduction idioms (patterns) that have been
4758 recognized in advance during vect_pattern_recog. In this case, STMT may be
4760 X = pattern_expr (arg0, arg1, ..., X)
4761 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4762 sequence that had been detected and replaced by the pattern-stmt (STMT).
4764 In some cases of reduction patterns, the type of the reduction variable X is
4765 different than the type of the other arguments of STMT.
4766 In such cases, the vectype that is used when transforming STMT into a vector
4767 stmt is different than the vectype that is used to determine the
4768 vectorization factor, because it consists of a different number of elements
4769 than the actual number of elements that are being operated upon in parallel.
4771 For example, consider an accumulation of shorts into an int accumulator.
4772 On some targets it's possible to vectorize this pattern operating on 8
4773 shorts at a time (hence, the vectype for purposes of determining the
4774 vectorization factor should be V8HI); on the other hand, the vectype that
4775 is used to create the vector form is actually V4SI (the type of the result).
4777 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4778 indicates what is the actual level of parallelism (V8HI in the example), so
4779 that the right vectorization factor would be derived. This vectype
4780 corresponds to the type of arguments to the reduction stmt, and should *NOT*
4781 be used to create the vectorized stmt. The right vectype for the vectorized
4782 stmt is obtained from the type of the result X:
4783 get_vectype_for_scalar_type (TREE_TYPE (X))
4785 This means that, contrary to "regular" reductions (or "regular" stmts in
4786 general), the following equation:
4787 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4788 does *NOT* necessarily hold for reduction patterns. */
4791 vectorizable_reduction (gimple stmt
, gimple_stmt_iterator
*gsi
,
4792 gimple
*vec_stmt
, slp_tree slp_node
)
4796 tree loop_vec_def0
= NULL_TREE
, loop_vec_def1
= NULL_TREE
;
4797 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4798 tree vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4799 tree vectype_in
= NULL_TREE
;
4800 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4801 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4802 enum tree_code code
, orig_code
, epilog_reduc_code
;
4803 machine_mode vec_mode
;
4805 optab optab
, reduc_optab
;
4806 tree new_temp
= NULL_TREE
;
4809 enum vect_def_type dt
;
4810 gphi
*new_phi
= NULL
;
4814 stmt_vec_info orig_stmt_info
;
4815 tree expr
= NULL_TREE
;
4819 stmt_vec_info prev_stmt_info
, prev_phi_info
;
4820 bool single_defuse_cycle
= false;
4821 tree reduc_def
= NULL_TREE
;
4822 gimple new_stmt
= NULL
;
4825 bool nested_cycle
= false, found_nested_cycle_def
= false;
4826 gimple reduc_def_stmt
= NULL
;
4827 /* The default is that the reduction variable is the last in statement. */
4828 int reduc_index
= 2;
4829 bool double_reduc
= false, dummy
;
4831 struct loop
* def_stmt_loop
, *outer_loop
= NULL
;
4833 gimple def_arg_stmt
;
4834 auto_vec
<tree
> vec_oprnds0
;
4835 auto_vec
<tree
> vec_oprnds1
;
4836 auto_vec
<tree
> vect_defs
;
4837 auto_vec
<gimple
> phis
;
4839 tree def0
, def1
, tem
, op0
, op1
= NULL_TREE
;
4841 /* In case of reduction chain we switch to the first stmt in the chain, but
4842 we don't update STMT_INFO, since only the last stmt is marked as reduction
4843 and has reduction properties. */
4844 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
4845 stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4847 if (nested_in_vect_loop_p (loop
, stmt
))
4851 nested_cycle
= true;
4854 /* 1. Is vectorizable reduction? */
4855 /* Not supportable if the reduction variable is used in the loop, unless
4856 it's a reduction chain. */
4857 if (STMT_VINFO_RELEVANT (stmt_info
) > vect_used_in_outer
4858 && !GROUP_FIRST_ELEMENT (stmt_info
))
4861 /* Reductions that are not used even in an enclosing outer-loop,
4862 are expected to be "live" (used out of the loop). */
4863 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_unused_in_scope
4864 && !STMT_VINFO_LIVE_P (stmt_info
))
4867 /* Make sure it was already recognized as a reduction computation. */
4868 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
4869 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
)
4872 /* 2. Has this been recognized as a reduction pattern?
4874 Check if STMT represents a pattern that has been recognized
4875 in earlier analysis stages. For stmts that represent a pattern,
4876 the STMT_VINFO_RELATED_STMT field records the last stmt in
4877 the original sequence that constitutes the pattern. */
4879 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
4882 orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
4883 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info
));
4884 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info
));
4887 /* 3. Check the operands of the operation. The first operands are defined
4888 inside the loop body. The last operand is the reduction variable,
4889 which is defined by the loop-header-phi. */
4891 gcc_assert (is_gimple_assign (stmt
));
4894 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
)))
4896 case GIMPLE_SINGLE_RHS
:
4897 op_type
= TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt
));
4898 if (op_type
== ternary_op
)
4900 tree rhs
= gimple_assign_rhs1 (stmt
);
4901 ops
[0] = TREE_OPERAND (rhs
, 0);
4902 ops
[1] = TREE_OPERAND (rhs
, 1);
4903 ops
[2] = TREE_OPERAND (rhs
, 2);
4904 code
= TREE_CODE (rhs
);
4910 case GIMPLE_BINARY_RHS
:
4911 code
= gimple_assign_rhs_code (stmt
);
4912 op_type
= TREE_CODE_LENGTH (code
);
4913 gcc_assert (op_type
== binary_op
);
4914 ops
[0] = gimple_assign_rhs1 (stmt
);
4915 ops
[1] = gimple_assign_rhs2 (stmt
);
4918 case GIMPLE_TERNARY_RHS
:
4919 code
= gimple_assign_rhs_code (stmt
);
4920 op_type
= TREE_CODE_LENGTH (code
);
4921 gcc_assert (op_type
== ternary_op
);
4922 ops
[0] = gimple_assign_rhs1 (stmt
);
4923 ops
[1] = gimple_assign_rhs2 (stmt
);
4924 ops
[2] = gimple_assign_rhs3 (stmt
);
4927 case GIMPLE_UNARY_RHS
:
4934 if (code
== COND_EXPR
&& slp_node
)
4937 scalar_dest
= gimple_assign_lhs (stmt
);
4938 scalar_type
= TREE_TYPE (scalar_dest
);
4939 if (!POINTER_TYPE_P (scalar_type
) && !INTEGRAL_TYPE_P (scalar_type
)
4940 && !SCALAR_FLOAT_TYPE_P (scalar_type
))
4943 /* Do not try to vectorize bit-precision reductions. */
4944 if ((TYPE_PRECISION (scalar_type
)
4945 != GET_MODE_PRECISION (TYPE_MODE (scalar_type
))))
4948 /* All uses but the last are expected to be defined in the loop.
4949 The last use is the reduction variable. In case of nested cycle this
4950 assumption is not true: we use reduc_index to record the index of the
4951 reduction variable. */
4952 for (i
= 0; i
< op_type
- 1; i
++)
4954 /* The condition of COND_EXPR is checked in vectorizable_condition(). */
4955 if (i
== 0 && code
== COND_EXPR
)
4958 is_simple_use
= vect_is_simple_use_1 (ops
[i
], stmt
, loop_vinfo
, NULL
,
4959 &def_stmt
, &def
, &dt
, &tem
);
4962 gcc_assert (is_simple_use
);
4964 if (dt
!= vect_internal_def
4965 && dt
!= vect_external_def
4966 && dt
!= vect_constant_def
4967 && dt
!= vect_induction_def
4968 && !(dt
== vect_nested_cycle
&& nested_cycle
))
4971 if (dt
== vect_nested_cycle
)
4973 found_nested_cycle_def
= true;
4974 reduc_def_stmt
= def_stmt
;
4979 is_simple_use
= vect_is_simple_use_1 (ops
[i
], stmt
, loop_vinfo
, NULL
,
4980 &def_stmt
, &def
, &dt
, &tem
);
4983 gcc_assert (is_simple_use
);
4984 if (!found_nested_cycle_def
)
4985 reduc_def_stmt
= def_stmt
;
4987 if (reduc_def_stmt
&& gimple_code (reduc_def_stmt
) != GIMPLE_PHI
)
4990 if (!(dt
== vect_reduction_def
4991 || dt
== vect_nested_cycle
4992 || ((dt
== vect_internal_def
|| dt
== vect_external_def
4993 || dt
== vect_constant_def
|| dt
== vect_induction_def
)
4994 && nested_cycle
&& found_nested_cycle_def
)))
4996 /* For pattern recognized stmts, orig_stmt might be a reduction,
4997 but some helper statements for the pattern might not, or
4998 might be COND_EXPRs with reduction uses in the condition. */
4999 gcc_assert (orig_stmt
);
5004 gcc_assert (orig_stmt
== vect_is_simple_reduction (loop_vinfo
,
5010 gimple tmp
= vect_is_simple_reduction (loop_vinfo
, reduc_def_stmt
,
5011 !nested_cycle
, &dummy
);
5012 /* We changed STMT to be the first stmt in reduction chain, hence we
5013 check that in this case the first element in the chain is STMT. */
5014 gcc_assert (stmt
== tmp
5015 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp
)) == stmt
);
5018 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt
)))
5021 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
5024 ncopies
= (LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5025 / TYPE_VECTOR_SUBPARTS (vectype_in
));
5027 gcc_assert (ncopies
>= 1);
5029 vec_mode
= TYPE_MODE (vectype_in
);
5031 if (code
== COND_EXPR
)
5033 if (!vectorizable_condition (stmt
, gsi
, NULL
, ops
[reduc_index
], 0, NULL
))
5035 if (dump_enabled_p ())
5036 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5037 "unsupported condition in reduction\n");
5044 /* 4. Supportable by target? */
5046 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
5047 || code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
5049 /* Shifts and rotates are only supported by vectorizable_shifts,
5050 not vectorizable_reduction. */
5051 if (dump_enabled_p ())
5052 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5053 "unsupported shift or rotation.\n");
5057 /* 4.1. check support for the operation in the loop */
5058 optab
= optab_for_tree_code (code
, vectype_in
, optab_default
);
5061 if (dump_enabled_p ())
5062 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5068 if (optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
5070 if (dump_enabled_p ())
5071 dump_printf (MSG_NOTE
, "op not supported by target.\n");
5073 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5074 || LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5075 < vect_min_worthwhile_factor (code
))
5078 if (dump_enabled_p ())
5079 dump_printf (MSG_NOTE
, "proceeding using word mode.\n");
5082 /* Worthwhile without SIMD support? */
5083 if (!VECTOR_MODE_P (TYPE_MODE (vectype_in
))
5084 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5085 < vect_min_worthwhile_factor (code
))
5087 if (dump_enabled_p ())
5088 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5089 "not worthwhile without SIMD support.\n");
5095 /* 4.2. Check support for the epilog operation.
5097 If STMT represents a reduction pattern, then the type of the
5098 reduction variable may be different than the type of the rest
5099 of the arguments. For example, consider the case of accumulation
5100 of shorts into an int accumulator; The original code:
5101 S1: int_a = (int) short_a;
5102 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
5105 STMT: int_acc = widen_sum <short_a, int_acc>
5108 1. The tree-code that is used to create the vector operation in the
5109 epilog code (that reduces the partial results) is not the
5110 tree-code of STMT, but is rather the tree-code of the original
5111 stmt from the pattern that STMT is replacing. I.e, in the example
5112 above we want to use 'widen_sum' in the loop, but 'plus' in the
5114 2. The type (mode) we use to check available target support
5115 for the vector operation to be created in the *epilog*, is
5116 determined by the type of the reduction variable (in the example
5117 above we'd check this: optab_handler (plus_optab, vect_int_mode])).
5118 However the type (mode) we use to check available target support
5119 for the vector operation to be created *inside the loop*, is
5120 determined by the type of the other arguments to STMT (in the
5121 example we'd check this: optab_handler (widen_sum_optab,
5124 This is contrary to "regular" reductions, in which the types of all
5125 the arguments are the same as the type of the reduction variable.
5126 For "regular" reductions we can therefore use the same vector type
5127 (and also the same tree-code) when generating the epilog code and
5128 when generating the code inside the loop. */
5132 /* This is a reduction pattern: get the vectype from the type of the
5133 reduction variable, and get the tree-code from orig_stmt. */
5134 orig_code
= gimple_assign_rhs_code (orig_stmt
);
5135 gcc_assert (vectype_out
);
5136 vec_mode
= TYPE_MODE (vectype_out
);
5140 /* Regular reduction: use the same vectype and tree-code as used for
5141 the vector code inside the loop can be used for the epilog code. */
5147 def_bb
= gimple_bb (reduc_def_stmt
);
5148 def_stmt_loop
= def_bb
->loop_father
;
5149 def_arg
= PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt
,
5150 loop_preheader_edge (def_stmt_loop
));
5151 if (TREE_CODE (def_arg
) == SSA_NAME
5152 && (def_arg_stmt
= SSA_NAME_DEF_STMT (def_arg
))
5153 && gimple_code (def_arg_stmt
) == GIMPLE_PHI
5154 && flow_bb_inside_loop_p (outer_loop
, gimple_bb (def_arg_stmt
))
5155 && vinfo_for_stmt (def_arg_stmt
)
5156 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt
))
5157 == vect_double_reduction_def
)
5158 double_reduc
= true;
5161 epilog_reduc_code
= ERROR_MARK
;
5162 if (reduction_code_for_scalar_code (orig_code
, &epilog_reduc_code
))
5164 reduc_optab
= optab_for_tree_code (epilog_reduc_code
, vectype_out
,
5168 if (dump_enabled_p ())
5169 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5170 "no optab for reduction.\n");
5172 epilog_reduc_code
= ERROR_MARK
;
5174 else if (optab_handler (reduc_optab
, vec_mode
) == CODE_FOR_nothing
)
5176 optab
= scalar_reduc_to_vector (reduc_optab
, vectype_out
);
5177 if (optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
5179 if (dump_enabled_p ())
5180 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5181 "reduc op not supported by target.\n");
5183 epilog_reduc_code
= ERROR_MARK
;
5189 if (!nested_cycle
|| double_reduc
)
5191 if (dump_enabled_p ())
5192 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5193 "no reduc code for scalar code.\n");
5199 if (double_reduc
&& ncopies
> 1)
5201 if (dump_enabled_p ())
5202 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5203 "multiple types in double reduction\n");
5208 /* In case of widenning multiplication by a constant, we update the type
5209 of the constant to be the type of the other operand. We check that the
5210 constant fits the type in the pattern recognition pass. */
5211 if (code
== DOT_PROD_EXPR
5212 && !types_compatible_p (TREE_TYPE (ops
[0]), TREE_TYPE (ops
[1])))
5214 if (TREE_CODE (ops
[0]) == INTEGER_CST
)
5215 ops
[0] = fold_convert (TREE_TYPE (ops
[1]), ops
[0]);
5216 else if (TREE_CODE (ops
[1]) == INTEGER_CST
)
5217 ops
[1] = fold_convert (TREE_TYPE (ops
[0]), ops
[1]);
5220 if (dump_enabled_p ())
5221 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5222 "invalid types in dot-prod\n");
5228 if (!vec_stmt
) /* transformation not required. */
5230 if (!vect_model_reduction_cost (stmt_info
, epilog_reduc_code
, ncopies
))
5232 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
5238 if (dump_enabled_p ())
5239 dump_printf_loc (MSG_NOTE
, vect_location
, "transform reduction.\n");
5241 /* FORNOW: Multiple types are not supported for condition. */
5242 if (code
== COND_EXPR
)
5243 gcc_assert (ncopies
== 1);
5245 /* Create the destination vector */
5246 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
5248 /* In case the vectorization factor (VF) is bigger than the number
5249 of elements that we can fit in a vectype (nunits), we have to generate
5250 more than one vector stmt - i.e - we need to "unroll" the
5251 vector stmt by a factor VF/nunits. For more details see documentation
5252 in vectorizable_operation. */
5254 /* If the reduction is used in an outer loop we need to generate
5255 VF intermediate results, like so (e.g. for ncopies=2):
5260 (i.e. we generate VF results in 2 registers).
5261 In this case we have a separate def-use cycle for each copy, and therefore
5262 for each copy we get the vector def for the reduction variable from the
5263 respective phi node created for this copy.
5265 Otherwise (the reduction is unused in the loop nest), we can combine
5266 together intermediate results, like so (e.g. for ncopies=2):
5270 (i.e. we generate VF/2 results in a single register).
5271 In this case for each copy we get the vector def for the reduction variable
5272 from the vectorized reduction operation generated in the previous iteration.
5275 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_unused_in_scope
)
5277 single_defuse_cycle
= true;
5281 epilog_copies
= ncopies
;
5283 prev_stmt_info
= NULL
;
5284 prev_phi_info
= NULL
;
5287 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5288 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out
)
5289 == TYPE_VECTOR_SUBPARTS (vectype_in
));
5294 vec_oprnds0
.create (1);
5295 if (op_type
== ternary_op
)
5296 vec_oprnds1
.create (1);
5299 phis
.create (vec_num
);
5300 vect_defs
.create (vec_num
);
5302 vect_defs
.quick_push (NULL_TREE
);
5304 for (j
= 0; j
< ncopies
; j
++)
5306 if (j
== 0 || !single_defuse_cycle
)
5308 for (i
= 0; i
< vec_num
; i
++)
5310 /* Create the reduction-phi that defines the reduction
5312 new_phi
= create_phi_node (vec_dest
, loop
->header
);
5313 set_vinfo_for_stmt (new_phi
,
5314 new_stmt_vec_info (new_phi
, loop_vinfo
,
5316 if (j
== 0 || slp_node
)
5317 phis
.quick_push (new_phi
);
5321 if (code
== COND_EXPR
)
5323 gcc_assert (!slp_node
);
5324 vectorizable_condition (stmt
, gsi
, vec_stmt
,
5325 PHI_RESULT (phis
[0]),
5327 /* Multiple types are not supported for condition. */
5334 op0
= ops
[!reduc_index
];
5335 if (op_type
== ternary_op
)
5337 if (reduc_index
== 0)
5344 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5348 loop_vec_def0
= vect_get_vec_def_for_operand (ops
[!reduc_index
],
5350 vec_oprnds0
.quick_push (loop_vec_def0
);
5351 if (op_type
== ternary_op
)
5353 loop_vec_def1
= vect_get_vec_def_for_operand (op1
, stmt
,
5355 vec_oprnds1
.quick_push (loop_vec_def1
);
5363 enum vect_def_type dt
;
5367 vect_is_simple_use (ops
[!reduc_index
], stmt
, loop_vinfo
, NULL
,
5368 &dummy_stmt
, &dummy
, &dt
);
5369 loop_vec_def0
= vect_get_vec_def_for_stmt_copy (dt
,
5371 vec_oprnds0
[0] = loop_vec_def0
;
5372 if (op_type
== ternary_op
)
5374 vect_is_simple_use (op1
, stmt
, loop_vinfo
, NULL
, &dummy_stmt
,
5376 loop_vec_def1
= vect_get_vec_def_for_stmt_copy (dt
,
5378 vec_oprnds1
[0] = loop_vec_def1
;
5382 if (single_defuse_cycle
)
5383 reduc_def
= gimple_assign_lhs (new_stmt
);
5385 STMT_VINFO_RELATED_STMT (prev_phi_info
) = new_phi
;
5388 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, def0
)
5391 reduc_def
= PHI_RESULT (phis
[i
]);
5394 if (!single_defuse_cycle
|| j
== 0)
5395 reduc_def
= PHI_RESULT (new_phi
);
5398 def1
= ((op_type
== ternary_op
)
5399 ? vec_oprnds1
[i
] : NULL
);
5400 if (op_type
== binary_op
)
5402 if (reduc_index
== 0)
5403 expr
= build2 (code
, vectype_out
, reduc_def
, def0
);
5405 expr
= build2 (code
, vectype_out
, def0
, reduc_def
);
5409 if (reduc_index
== 0)
5410 expr
= build3 (code
, vectype_out
, reduc_def
, def0
, def1
);
5413 if (reduc_index
== 1)
5414 expr
= build3 (code
, vectype_out
, def0
, reduc_def
, def1
);
5416 expr
= build3 (code
, vectype_out
, def0
, def1
, reduc_def
);
5420 new_stmt
= gimple_build_assign (vec_dest
, expr
);
5421 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5422 gimple_assign_set_lhs (new_stmt
, new_temp
);
5423 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5427 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5428 vect_defs
.quick_push (new_temp
);
5431 vect_defs
[0] = new_temp
;
5438 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5440 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5442 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5443 prev_phi_info
= vinfo_for_stmt (new_phi
);
5446 /* Finalize the reduction-phi (set its arguments) and create the
5447 epilog reduction code. */
5448 if ((!single_defuse_cycle
|| code
== COND_EXPR
) && !slp_node
)
5450 new_temp
= gimple_assign_lhs (*vec_stmt
);
5451 vect_defs
[0] = new_temp
;
5454 vect_create_epilog_for_reduction (vect_defs
, stmt
, epilog_copies
,
5455 epilog_reduc_code
, phis
, reduc_index
,
5456 double_reduc
, slp_node
);
5461 /* Function vect_min_worthwhile_factor.
5463 For a loop where we could vectorize the operation indicated by CODE,
5464 return the minimum vectorization factor that makes it worthwhile
5465 to use generic vectors. */
5467 vect_min_worthwhile_factor (enum tree_code code
)
5488 /* Function vectorizable_induction
5490 Check if PHI performs an induction computation that can be vectorized.
5491 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5492 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5493 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5496 vectorizable_induction (gimple phi
, gimple_stmt_iterator
*gsi ATTRIBUTE_UNUSED
,
5499 stmt_vec_info stmt_info
= vinfo_for_stmt (phi
);
5500 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5501 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5502 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5503 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5504 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5507 gcc_assert (ncopies
>= 1);
5508 /* FORNOW. These restrictions should be relaxed. */
5509 if (nested_in_vect_loop_p (loop
, phi
))
5511 imm_use_iterator imm_iter
;
5512 use_operand_p use_p
;
5519 if (dump_enabled_p ())
5520 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5521 "multiple types in nested loop.\n");
5526 latch_e
= loop_latch_edge (loop
->inner
);
5527 loop_arg
= PHI_ARG_DEF_FROM_EDGE (phi
, latch_e
);
5528 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, loop_arg
)
5530 gimple use_stmt
= USE_STMT (use_p
);
5531 if (is_gimple_debug (use_stmt
))
5534 if (!flow_bb_inside_loop_p (loop
->inner
, gimple_bb (use_stmt
)))
5536 exit_phi
= use_stmt
;
5542 stmt_vec_info exit_phi_vinfo
= vinfo_for_stmt (exit_phi
);
5543 if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo
)
5544 && !STMT_VINFO_LIVE_P (exit_phi_vinfo
)))
5546 if (dump_enabled_p ())
5547 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5548 "inner-loop induction only used outside "
5549 "of the outer vectorized loop.\n");
5555 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
5558 /* FORNOW: SLP not supported. */
5559 if (STMT_SLP_TYPE (stmt_info
))
5562 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
);
5564 if (gimple_code (phi
) != GIMPLE_PHI
)
5567 if (!vec_stmt
) /* transformation not required. */
5569 STMT_VINFO_TYPE (stmt_info
) = induc_vec_info_type
;
5570 if (dump_enabled_p ())
5571 dump_printf_loc (MSG_NOTE
, vect_location
,
5572 "=== vectorizable_induction ===\n");
5573 vect_model_induction_cost (stmt_info
, ncopies
);
5579 if (dump_enabled_p ())
5580 dump_printf_loc (MSG_NOTE
, vect_location
, "transform induction phi.\n");
5582 vec_def
= get_initial_def_for_induction (phi
);
5583 *vec_stmt
= SSA_NAME_DEF_STMT (vec_def
);
5587 /* Function vectorizable_live_operation.
5589 STMT computes a value that is used outside the loop. Check if
5590 it can be supported. */
5593 vectorizable_live_operation (gimple stmt
,
5594 gimple_stmt_iterator
*gsi ATTRIBUTE_UNUSED
,
5597 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5598 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5599 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5605 enum vect_def_type dt
;
5606 enum tree_code code
;
5607 enum gimple_rhs_class rhs_class
;
5609 gcc_assert (STMT_VINFO_LIVE_P (stmt_info
));
5611 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
)
5614 if (!is_gimple_assign (stmt
))
5616 if (gimple_call_internal_p (stmt
)
5617 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
5618 && gimple_call_lhs (stmt
)
5620 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
5622 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
5624 edge e
= single_exit (loop
);
5625 basic_block merge_bb
= e
->dest
;
5626 imm_use_iterator imm_iter
;
5627 use_operand_p use_p
;
5628 tree lhs
= gimple_call_lhs (stmt
);
5630 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
5632 gimple use_stmt
= USE_STMT (use_p
);
5633 if (gimple_code (use_stmt
) == GIMPLE_PHI
5634 && gimple_bb (use_stmt
) == merge_bb
)
5639 = build_int_cst (unsigned_type_node
,
5640 loop_vinfo
->vectorization_factor
- 1);
5641 SET_PHI_ARG_DEF (use_stmt
, e
->dest_idx
, vfm1
);
5651 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5654 /* FORNOW. CHECKME. */
5655 if (nested_in_vect_loop_p (loop
, stmt
))
5658 code
= gimple_assign_rhs_code (stmt
);
5659 op_type
= TREE_CODE_LENGTH (code
);
5660 rhs_class
= get_gimple_rhs_class (code
);
5661 gcc_assert (rhs_class
!= GIMPLE_UNARY_RHS
|| op_type
== unary_op
);
5662 gcc_assert (rhs_class
!= GIMPLE_BINARY_RHS
|| op_type
== binary_op
);
5664 /* FORNOW: support only if all uses are invariant. This means
5665 that the scalar operations can remain in place, unvectorized.
5666 The original last scalar value that they compute will be used. */
5668 for (i
= 0; i
< op_type
; i
++)
5670 if (rhs_class
== GIMPLE_SINGLE_RHS
)
5671 op
= TREE_OPERAND (gimple_op (stmt
, 1), i
);
5673 op
= gimple_op (stmt
, i
+ 1);
5675 && !vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
,
5678 if (dump_enabled_p ())
5679 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5680 "use not simple.\n");
5684 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
5688 /* No transformation is required for the cases we currently support. */
5692 /* Kill any debug uses outside LOOP of SSA names defined in STMT. */
5695 vect_loop_kill_debug_uses (struct loop
*loop
, gimple stmt
)
5697 ssa_op_iter op_iter
;
5698 imm_use_iterator imm_iter
;
5699 def_operand_p def_p
;
5702 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
5704 FOR_EACH_IMM_USE_STMT (ustmt
, imm_iter
, DEF_FROM_PTR (def_p
))
5708 if (!is_gimple_debug (ustmt
))
5711 bb
= gimple_bb (ustmt
);
5713 if (!flow_bb_inside_loop_p (loop
, bb
))
5715 if (gimple_debug_bind_p (ustmt
))
5717 if (dump_enabled_p ())
5718 dump_printf_loc (MSG_NOTE
, vect_location
,
5719 "killing debug use\n");
5721 gimple_debug_bind_reset_value (ustmt
);
5722 update_stmt (ustmt
);
5732 /* This function builds ni_name = number of iterations. Statements
5733 are emitted on the loop preheader edge. */
5736 vect_build_loop_niters (loop_vec_info loop_vinfo
)
5738 tree ni
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
5739 if (TREE_CODE (ni
) == INTEGER_CST
)
5744 gimple_seq stmts
= NULL
;
5745 edge pe
= loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo
));
5747 var
= create_tmp_var (TREE_TYPE (ni
), "niters");
5748 ni_name
= force_gimple_operand (ni
, &stmts
, false, var
);
5750 gsi_insert_seq_on_edge_immediate (pe
, stmts
);
5757 /* This function generates the following statements:
5759 ni_name = number of iterations loop executes
5760 ratio = ni_name / vf
5761 ratio_mult_vf_name = ratio * vf
5763 and places them on the loop preheader edge. */
5766 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo
,
5768 tree
*ratio_mult_vf_name_ptr
,
5769 tree
*ratio_name_ptr
)
5771 tree ni_minus_gap_name
;
5774 tree ratio_mult_vf_name
;
5775 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5776 edge pe
= loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo
));
5779 log_vf
= build_int_cst (TREE_TYPE (ni_name
), exact_log2 (vf
));
5781 /* If epilogue loop is required because of data accesses with gaps, we
5782 subtract one iteration from the total number of iterations here for
5783 correct calculation of RATIO. */
5784 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
))
5786 ni_minus_gap_name
= fold_build2 (MINUS_EXPR
, TREE_TYPE (ni_name
),
5788 build_one_cst (TREE_TYPE (ni_name
)));
5789 if (!is_gimple_val (ni_minus_gap_name
))
5791 var
= create_tmp_var (TREE_TYPE (ni_name
), "ni_gap");
5792 gimple stmts
= NULL
;
5793 ni_minus_gap_name
= force_gimple_operand (ni_minus_gap_name
, &stmts
,
5795 gsi_insert_seq_on_edge_immediate (pe
, stmts
);
5799 ni_minus_gap_name
= ni_name
;
5801 /* Create: ratio = ni >> log2(vf) */
5802 /* ??? As we have ni == number of latch executions + 1, ni could
5803 have overflown to zero. So avoid computing ratio based on ni
5804 but compute it using the fact that we know ratio will be at least
5805 one, thus via (ni - vf) >> log2(vf) + 1. */
5807 = fold_build2 (PLUS_EXPR
, TREE_TYPE (ni_name
),
5808 fold_build2 (RSHIFT_EXPR
, TREE_TYPE (ni_name
),
5809 fold_build2 (MINUS_EXPR
, TREE_TYPE (ni_name
),
5812 (TREE_TYPE (ni_name
), vf
)),
5814 build_int_cst (TREE_TYPE (ni_name
), 1));
5815 if (!is_gimple_val (ratio_name
))
5817 var
= create_tmp_var (TREE_TYPE (ni_name
), "bnd");
5818 gimple stmts
= NULL
;
5819 ratio_name
= force_gimple_operand (ratio_name
, &stmts
, true, var
);
5820 gsi_insert_seq_on_edge_immediate (pe
, stmts
);
5822 *ratio_name_ptr
= ratio_name
;
5824 /* Create: ratio_mult_vf = ratio << log2 (vf). */
5826 if (ratio_mult_vf_name_ptr
)
5828 ratio_mult_vf_name
= fold_build2 (LSHIFT_EXPR
, TREE_TYPE (ratio_name
),
5829 ratio_name
, log_vf
);
5830 if (!is_gimple_val (ratio_mult_vf_name
))
5832 var
= create_tmp_var (TREE_TYPE (ni_name
), "ratio_mult_vf");
5833 gimple stmts
= NULL
;
5834 ratio_mult_vf_name
= force_gimple_operand (ratio_mult_vf_name
, &stmts
,
5836 gsi_insert_seq_on_edge_immediate (pe
, stmts
);
5838 *ratio_mult_vf_name_ptr
= ratio_mult_vf_name
;
5845 /* Function vect_transform_loop.
5847 The analysis phase has determined that the loop is vectorizable.
5848 Vectorize the loop - created vectorized stmts to replace the scalar
5849 stmts in the loop, and update the loop exit condition. */
5852 vect_transform_loop (loop_vec_info loop_vinfo
)
5854 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5855 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
5856 int nbbs
= loop
->num_nodes
;
5859 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5861 bool slp_scheduled
= false;
5862 gimple stmt
, pattern_stmt
;
5863 gimple_seq pattern_def_seq
= NULL
;
5864 gimple_stmt_iterator pattern_def_si
= gsi_none ();
5865 bool transform_pattern_stmt
= false;
5866 bool check_profitability
= false;
5868 /* Record number of iterations before we started tampering with the profile. */
5869 gcov_type expected_iterations
= expected_loop_iterations_unbounded (loop
);
5871 if (dump_enabled_p ())
5872 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vec_transform_loop ===\n");
5874 /* If profile is inprecise, we have chance to fix it up. */
5875 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
5876 expected_iterations
= LOOP_VINFO_INT_NITERS (loop_vinfo
);
5878 /* Use the more conservative vectorization threshold. If the number
5879 of iterations is constant assume the cost check has been performed
5880 by our caller. If the threshold makes all loops profitable that
5881 run at least the vectorization factor number of times checking
5882 is pointless, too. */
5883 th
= LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo
);
5884 if (th
>= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) - 1
5885 && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
5887 if (dump_enabled_p ())
5888 dump_printf_loc (MSG_NOTE
, vect_location
,
5889 "Profitability threshold is %d loop iterations.\n",
5891 check_profitability
= true;
5894 /* Version the loop first, if required, so the profitability check
5897 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
)
5898 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
5900 vect_loop_versioning (loop_vinfo
, th
, check_profitability
);
5901 check_profitability
= false;
5904 tree ni_name
= vect_build_loop_niters (loop_vinfo
);
5905 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
) = ni_name
;
5907 /* Peel the loop if there are data refs with unknown alignment.
5908 Only one data ref with unknown store is allowed. */
5910 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
))
5912 vect_do_peeling_for_alignment (loop_vinfo
, ni_name
,
5913 th
, check_profitability
);
5914 check_profitability
= false;
5915 /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
5917 ni_name
= NULL_TREE
;
5920 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5921 compile time constant), or it is a constant that doesn't divide by the
5922 vectorization factor, then an epilog loop needs to be created.
5923 We therefore duplicate the loop: the original loop will be vectorized,
5924 and will compute the first (n/VF) iterations. The second copy of the loop
5925 will remain scalar and will compute the remaining (n%VF) iterations.
5926 (VF is the vectorization factor). */
5928 if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo
)
5929 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
))
5933 ni_name
= vect_build_loop_niters (loop_vinfo
);
5934 vect_generate_tmps_on_preheader (loop_vinfo
, ni_name
, &ratio_mult_vf
,
5936 vect_do_peeling_for_loop_bound (loop_vinfo
, ni_name
, ratio_mult_vf
,
5937 th
, check_profitability
);
5939 else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
5940 ratio
= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
)),
5941 LOOP_VINFO_INT_NITERS (loop_vinfo
) / vectorization_factor
);
5945 ni_name
= vect_build_loop_niters (loop_vinfo
);
5946 vect_generate_tmps_on_preheader (loop_vinfo
, ni_name
, NULL
, &ratio
);
5949 /* 1) Make sure the loop header has exactly two entries
5950 2) Make sure we have a preheader basic block. */
5952 gcc_assert (EDGE_COUNT (loop
->header
->preds
) == 2);
5954 split_edge (loop_preheader_edge (loop
));
5956 /* FORNOW: the vectorizer supports only loops which body consist
5957 of one basic block (header + empty latch). When the vectorizer will
5958 support more involved loop forms, the order by which the BBs are
5959 traversed need to be reconsidered. */
5961 for (i
= 0; i
< nbbs
; i
++)
5963 basic_block bb
= bbs
[i
];
5964 stmt_vec_info stmt_info
;
5966 for (gphi_iterator si
= gsi_start_phis (bb
); !gsi_end_p (si
);
5969 gphi
*phi
= si
.phi ();
5970 if (dump_enabled_p ())
5972 dump_printf_loc (MSG_NOTE
, vect_location
,
5973 "------>vectorizing phi: ");
5974 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
5975 dump_printf (MSG_NOTE
, "\n");
5977 stmt_info
= vinfo_for_stmt (phi
);
5981 if (MAY_HAVE_DEBUG_STMTS
&& !STMT_VINFO_LIVE_P (stmt_info
))
5982 vect_loop_kill_debug_uses (loop
, phi
);
5984 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5985 && !STMT_VINFO_LIVE_P (stmt_info
))
5988 if (STMT_VINFO_VECTYPE (stmt_info
)
5989 && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
))
5990 != (unsigned HOST_WIDE_INT
) vectorization_factor
)
5991 && dump_enabled_p ())
5992 dump_printf_loc (MSG_NOTE
, vect_location
, "multiple-types.\n");
5994 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
5996 if (dump_enabled_p ())
5997 dump_printf_loc (MSG_NOTE
, vect_location
, "transform phi.\n");
5998 vect_transform_stmt (phi
, NULL
, NULL
, NULL
, NULL
);
6002 pattern_stmt
= NULL
;
6003 for (gimple_stmt_iterator si
= gsi_start_bb (bb
);
6004 !gsi_end_p (si
) || transform_pattern_stmt
;)
6008 if (transform_pattern_stmt
)
6009 stmt
= pattern_stmt
;
6012 stmt
= gsi_stmt (si
);
6013 /* During vectorization remove existing clobber stmts. */
6014 if (gimple_clobber_p (stmt
))
6016 unlink_stmt_vdef (stmt
);
6017 gsi_remove (&si
, true);
6018 release_defs (stmt
);
6023 if (dump_enabled_p ())
6025 dump_printf_loc (MSG_NOTE
, vect_location
,
6026 "------>vectorizing statement: ");
6027 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6028 dump_printf (MSG_NOTE
, "\n");
6031 stmt_info
= vinfo_for_stmt (stmt
);
6033 /* vector stmts created in the outer-loop during vectorization of
6034 stmts in an inner-loop may not have a stmt_info, and do not
6035 need to be vectorized. */
6042 if (MAY_HAVE_DEBUG_STMTS
&& !STMT_VINFO_LIVE_P (stmt_info
))
6043 vect_loop_kill_debug_uses (loop
, stmt
);
6045 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
6046 && !STMT_VINFO_LIVE_P (stmt_info
))
6048 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6049 && (pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
))
6050 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
6051 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
6053 stmt
= pattern_stmt
;
6054 stmt_info
= vinfo_for_stmt (stmt
);
6062 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6063 && (pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
))
6064 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
6065 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
6066 transform_pattern_stmt
= true;
6068 /* If pattern statement has def stmts, vectorize them too. */
6069 if (is_pattern_stmt_p (stmt_info
))
6071 if (pattern_def_seq
== NULL
)
6073 pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
);
6074 pattern_def_si
= gsi_start (pattern_def_seq
);
6076 else if (!gsi_end_p (pattern_def_si
))
6077 gsi_next (&pattern_def_si
);
6078 if (pattern_def_seq
!= NULL
)
6080 gimple pattern_def_stmt
= NULL
;
6081 stmt_vec_info pattern_def_stmt_info
= NULL
;
6083 while (!gsi_end_p (pattern_def_si
))
6085 pattern_def_stmt
= gsi_stmt (pattern_def_si
);
6086 pattern_def_stmt_info
6087 = vinfo_for_stmt (pattern_def_stmt
);
6088 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
6089 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
6091 gsi_next (&pattern_def_si
);
6094 if (!gsi_end_p (pattern_def_si
))
6096 if (dump_enabled_p ())
6098 dump_printf_loc (MSG_NOTE
, vect_location
,
6099 "==> vectorizing pattern def "
6101 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
6102 pattern_def_stmt
, 0);
6103 dump_printf (MSG_NOTE
, "\n");
6106 stmt
= pattern_def_stmt
;
6107 stmt_info
= pattern_def_stmt_info
;
6111 pattern_def_si
= gsi_none ();
6112 transform_pattern_stmt
= false;
6116 transform_pattern_stmt
= false;
6119 if (STMT_VINFO_VECTYPE (stmt_info
))
6123 TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
6124 if (!STMT_SLP_TYPE (stmt_info
)
6125 && nunits
!= (unsigned int) vectorization_factor
6126 && dump_enabled_p ())
6127 /* For SLP VF is set according to unrolling factor, and not
6128 to vector size, hence for SLP this print is not valid. */
6129 dump_printf_loc (MSG_NOTE
, vect_location
, "multiple-types.\n");
6132 /* SLP. Schedule all the SLP instances when the first SLP stmt is
6134 if (STMT_SLP_TYPE (stmt_info
))
6138 slp_scheduled
= true;
6140 if (dump_enabled_p ())
6141 dump_printf_loc (MSG_NOTE
, vect_location
,
6142 "=== scheduling SLP instances ===\n");
6144 vect_schedule_slp (loop_vinfo
, NULL
);
6147 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
6148 if (!vinfo_for_stmt (stmt
) || PURE_SLP_STMT (stmt_info
))
6150 if (!transform_pattern_stmt
&& gsi_end_p (pattern_def_si
))
6152 pattern_def_seq
= NULL
;
6159 /* -------- vectorize statement ------------ */
6160 if (dump_enabled_p ())
6161 dump_printf_loc (MSG_NOTE
, vect_location
, "transform statement.\n");
6163 grouped_store
= false;
6164 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, NULL
, NULL
);
6167 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6169 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
6170 interleaving chain was completed - free all the stores in
6173 vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info
));
6177 /* Free the attached stmt_vec_info and remove the stmt. */
6178 gimple store
= gsi_stmt (si
);
6179 free_stmt_vec_info (store
);
6180 unlink_stmt_vdef (store
);
6181 gsi_remove (&si
, true);
6182 release_defs (store
);
6185 /* Stores can only appear at the end of pattern statements. */
6186 gcc_assert (!transform_pattern_stmt
);
6187 pattern_def_seq
= NULL
;
6189 else if (!transform_pattern_stmt
&& gsi_end_p (pattern_def_si
))
6191 pattern_def_seq
= NULL
;
6197 slpeel_make_loop_iterate_ntimes (loop
, ratio
);
6199 /* Reduce loop iterations by the vectorization factor. */
6200 scale_loop_profile (loop
, GCOV_COMPUTE_SCALE (1, vectorization_factor
),
6201 expected_iterations
/ vectorization_factor
);
6202 loop
->nb_iterations_upper_bound
6203 = wi::udiv_floor (loop
->nb_iterations_upper_bound
, vectorization_factor
);
6204 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
6205 && loop
->nb_iterations_upper_bound
!= 0)
6206 loop
->nb_iterations_upper_bound
= loop
->nb_iterations_upper_bound
- 1;
6207 if (loop
->any_estimate
)
6209 loop
->nb_iterations_estimate
6210 = wi::udiv_floor (loop
->nb_iterations_estimate
, vectorization_factor
);
6211 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
6212 && loop
->nb_iterations_estimate
!= 0)
6213 loop
->nb_iterations_estimate
= loop
->nb_iterations_estimate
- 1;
6216 if (dump_enabled_p ())
6218 dump_printf_loc (MSG_NOTE
, vect_location
,
6219 "LOOP VECTORIZED\n");
6221 dump_printf_loc (MSG_NOTE
, vect_location
,
6222 "OUTER LOOP VECTORIZED\n");
6223 dump_printf (MSG_NOTE
, "\n");