Fix missing ChangeLog entry for Graphite head files fix.
[official-gcc.git] / gcc / tree-vect-loop.c
blobcc272b23e8b1cb5864e6ca6b3a25d88d5ebf8595
1 /* Loop Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com> and
4 Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "cfghooks.h"
31 #include "tree-pass.h"
32 #include "ssa.h"
33 #include "optabs-tree.h"
34 #include "diagnostic-core.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "cfganal.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "cfgloop.h"
45 #include "params.h"
46 #include "tree-scalar-evolution.h"
47 #include "tree-vectorizer.h"
48 #include "gimple-fold.h"
49 #include "cgraph.h"
51 /* Loop Vectorization Pass.
53 This pass tries to vectorize loops.
55 For example, the vectorizer transforms the following simple loop:
57 short a[N]; short b[N]; short c[N]; int i;
59 for (i=0; i<N; i++){
60 a[i] = b[i] + c[i];
63 as if it was manually vectorized by rewriting the source code into:
65 typedef int __attribute__((mode(V8HI))) v8hi;
66 short a[N]; short b[N]; short c[N]; int i;
67 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
68 v8hi va, vb, vc;
70 for (i=0; i<N/8; i++){
71 vb = pb[i];
72 vc = pc[i];
73 va = vb + vc;
74 pa[i] = va;
77 The main entry to this pass is vectorize_loops(), in which
78 the vectorizer applies a set of analyses on a given set of loops,
79 followed by the actual vectorization transformation for the loops that
80 had successfully passed the analysis phase.
81 Throughout this pass we make a distinction between two types of
82 data: scalars (which are represented by SSA_NAMES), and memory references
83 ("data-refs"). These two types of data require different handling both
84 during analysis and transformation. The types of data-refs that the
85 vectorizer currently supports are ARRAY_REFS which base is an array DECL
86 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
87 accesses are required to have a simple (consecutive) access pattern.
89 Analysis phase:
90 ===============
91 The driver for the analysis phase is vect_analyze_loop().
92 It applies a set of analyses, some of which rely on the scalar evolution
93 analyzer (scev) developed by Sebastian Pop.
95 During the analysis phase the vectorizer records some information
96 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
97 loop, as well as general information about the loop as a whole, which is
98 recorded in a "loop_vec_info" struct attached to each loop.
100 Transformation phase:
101 =====================
102 The loop transformation phase scans all the stmts in the loop, and
103 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
104 the loop that needs to be vectorized. It inserts the vector code sequence
105 just before the scalar stmt S, and records a pointer to the vector code
106 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
107 attached to S). This pointer will be used for the vectorization of following
108 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
109 otherwise, we rely on dead code elimination for removing it.
111 For example, say stmt S1 was vectorized into stmt VS1:
113 VS1: vb = px[i];
114 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
115 S2: a = b;
117 To vectorize stmt S2, the vectorizer first finds the stmt that defines
118 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
119 vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
120 resulting sequence would be:
122 VS1: vb = px[i];
123 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
124 VS2: va = vb;
125 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
127 Operands that are not SSA_NAMEs, are data-refs that appear in
128 load/store operations (like 'x[i]' in S1), and are handled differently.
130 Target modeling:
131 =================
132 Currently the only target specific information that is used is the
133 size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
134 Targets that can support different sizes of vectors, for now will need
135 to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
136 flexibility will be added in the future.
138 Since we only vectorize operations which vector form can be
139 expressed using existing tree codes, to verify that an operation is
140 supported, the vectorizer checks the relevant optab at the relevant
141 machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
142 the value found is CODE_FOR_nothing, then there's no target support, and
143 we can't vectorize the stmt.
145 For additional information on this project see:
146 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
149 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
151 /* Function vect_determine_vectorization_factor
153 Determine the vectorization factor (VF). VF is the number of data elements
154 that are operated upon in parallel in a single iteration of the vectorized
155 loop. For example, when vectorizing a loop that operates on 4byte elements,
156 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
157 elements can fit in a single vector register.
159 We currently support vectorization of loops in which all types operated upon
160 are of the same size. Therefore this function currently sets VF according to
161 the size of the types operated upon, and fails if there are multiple sizes
162 in the loop.
164 VF is also the factor by which the loop iterations are strip-mined, e.g.:
165 original loop:
166 for (i=0; i<N; i++){
167 a[i] = b[i] + c[i];
170 vectorized loop:
171 for (i=0; i<N; i+=VF){
172 a[i:VF] = b[i:VF] + c[i:VF];
176 static bool
177 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
179 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
180 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
181 unsigned nbbs = loop->num_nodes;
182 unsigned int vectorization_factor = 0;
183 tree scalar_type;
184 gphi *phi;
185 tree vectype;
186 unsigned int nunits;
187 stmt_vec_info stmt_info;
188 unsigned i;
189 HOST_WIDE_INT dummy;
190 gimple *stmt, *pattern_stmt = NULL;
191 gimple_seq pattern_def_seq = NULL;
192 gimple_stmt_iterator pattern_def_si = gsi_none ();
193 bool analyze_pattern_stmt = false;
194 bool bool_result;
195 auto_vec<stmt_vec_info> mask_producers;
197 if (dump_enabled_p ())
198 dump_printf_loc (MSG_NOTE, vect_location,
199 "=== vect_determine_vectorization_factor ===\n");
201 for (i = 0; i < nbbs; i++)
203 basic_block bb = bbs[i];
205 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
206 gsi_next (&si))
208 phi = si.phi ();
209 stmt_info = vinfo_for_stmt (phi);
210 if (dump_enabled_p ())
212 dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
213 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
214 dump_printf (MSG_NOTE, "\n");
217 gcc_assert (stmt_info);
219 if (STMT_VINFO_RELEVANT_P (stmt_info))
221 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
222 scalar_type = TREE_TYPE (PHI_RESULT (phi));
224 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "get vectype for scalar type: ");
228 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
229 dump_printf (MSG_NOTE, "\n");
232 vectype = get_vectype_for_scalar_type (scalar_type);
233 if (!vectype)
235 if (dump_enabled_p ())
237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
238 "not vectorized: unsupported "
239 "data-type ");
240 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
241 scalar_type);
242 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
244 return false;
246 STMT_VINFO_VECTYPE (stmt_info) = vectype;
248 if (dump_enabled_p ())
250 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
251 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
252 dump_printf (MSG_NOTE, "\n");
255 nunits = TYPE_VECTOR_SUBPARTS (vectype);
256 if (dump_enabled_p ())
257 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
258 nunits);
260 if (!vectorization_factor
261 || (nunits > vectorization_factor))
262 vectorization_factor = nunits;
266 for (gimple_stmt_iterator si = gsi_start_bb (bb);
267 !gsi_end_p (si) || analyze_pattern_stmt;)
269 tree vf_vectype;
271 if (analyze_pattern_stmt)
272 stmt = pattern_stmt;
273 else
274 stmt = gsi_stmt (si);
276 stmt_info = vinfo_for_stmt (stmt);
278 if (dump_enabled_p ())
280 dump_printf_loc (MSG_NOTE, vect_location,
281 "==> examining statement: ");
282 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
283 dump_printf (MSG_NOTE, "\n");
286 gcc_assert (stmt_info);
288 /* Skip stmts which do not need to be vectorized. */
289 if ((!STMT_VINFO_RELEVANT_P (stmt_info)
290 && !STMT_VINFO_LIVE_P (stmt_info))
291 || gimple_clobber_p (stmt))
293 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
294 && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
295 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
296 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
298 stmt = pattern_stmt;
299 stmt_info = vinfo_for_stmt (pattern_stmt);
300 if (dump_enabled_p ())
302 dump_printf_loc (MSG_NOTE, vect_location,
303 "==> examining pattern statement: ");
304 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
305 dump_printf (MSG_NOTE, "\n");
308 else
310 if (dump_enabled_p ())
311 dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
312 gsi_next (&si);
313 continue;
316 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
317 && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
318 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
319 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
320 analyze_pattern_stmt = true;
322 /* If a pattern statement has def stmts, analyze them too. */
323 if (is_pattern_stmt_p (stmt_info))
325 if (pattern_def_seq == NULL)
327 pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
328 pattern_def_si = gsi_start (pattern_def_seq);
330 else if (!gsi_end_p (pattern_def_si))
331 gsi_next (&pattern_def_si);
332 if (pattern_def_seq != NULL)
334 gimple *pattern_def_stmt = NULL;
335 stmt_vec_info pattern_def_stmt_info = NULL;
337 while (!gsi_end_p (pattern_def_si))
339 pattern_def_stmt = gsi_stmt (pattern_def_si);
340 pattern_def_stmt_info
341 = vinfo_for_stmt (pattern_def_stmt);
342 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
343 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
344 break;
345 gsi_next (&pattern_def_si);
348 if (!gsi_end_p (pattern_def_si))
350 if (dump_enabled_p ())
352 dump_printf_loc (MSG_NOTE, vect_location,
353 "==> examining pattern def stmt: ");
354 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
355 pattern_def_stmt, 0);
356 dump_printf (MSG_NOTE, "\n");
359 stmt = pattern_def_stmt;
360 stmt_info = pattern_def_stmt_info;
362 else
364 pattern_def_si = gsi_none ();
365 analyze_pattern_stmt = false;
368 else
369 analyze_pattern_stmt = false;
372 if (gimple_get_lhs (stmt) == NULL_TREE
373 /* MASK_STORE has no lhs, but is ok. */
374 && (!is_gimple_call (stmt)
375 || !gimple_call_internal_p (stmt)
376 || gimple_call_internal_fn (stmt) != IFN_MASK_STORE))
378 if (is_gimple_call (stmt))
380 /* Ignore calls with no lhs. These must be calls to
381 #pragma omp simd functions, and what vectorization factor
382 it really needs can't be determined until
383 vectorizable_simd_clone_call. */
384 if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
386 pattern_def_seq = NULL;
387 gsi_next (&si);
389 continue;
391 if (dump_enabled_p ())
393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
394 "not vectorized: irregular stmt.");
395 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
397 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
399 return false;
402 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
404 if (dump_enabled_p ())
406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
407 "not vectorized: vector stmt in loop:");
408 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
409 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
411 return false;
414 bool_result = false;
416 if (STMT_VINFO_VECTYPE (stmt_info))
418 /* The only case when a vectype had been already set is for stmts
419 that contain a dataref, or for "pattern-stmts" (stmts
420 generated by the vectorizer to represent/replace a certain
421 idiom). */
422 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
423 || is_pattern_stmt_p (stmt_info)
424 || !gsi_end_p (pattern_def_si));
425 vectype = STMT_VINFO_VECTYPE (stmt_info);
427 else
429 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
430 if (is_gimple_call (stmt)
431 && gimple_call_internal_p (stmt)
432 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
433 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
434 else
435 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
437 /* Bool ops don't participate in vectorization factor
438 computation. For comparison use compared types to
439 compute a factor. */
440 if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
442 if (STMT_VINFO_RELEVANT_P (stmt_info))
443 mask_producers.safe_push (stmt_info);
444 bool_result = true;
446 if (gimple_code (stmt) == GIMPLE_ASSIGN
447 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
448 == tcc_comparison
449 && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt)))
450 != BOOLEAN_TYPE)
451 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
452 else
454 if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
456 pattern_def_seq = NULL;
457 gsi_next (&si);
459 continue;
463 if (dump_enabled_p ())
465 dump_printf_loc (MSG_NOTE, vect_location,
466 "get vectype for scalar type: ");
467 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
468 dump_printf (MSG_NOTE, "\n");
470 vectype = get_vectype_for_scalar_type (scalar_type);
471 if (!vectype)
473 if (dump_enabled_p ())
475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
476 "not vectorized: unsupported "
477 "data-type ");
478 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
479 scalar_type);
480 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
482 return false;
485 if (!bool_result)
486 STMT_VINFO_VECTYPE (stmt_info) = vectype;
488 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
491 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
492 dump_printf (MSG_NOTE, "\n");
496 /* Don't try to compute VF out scalar types if we stmt
497 produces boolean vector. Use result vectype instead. */
498 if (VECTOR_BOOLEAN_TYPE_P (vectype))
499 vf_vectype = vectype;
500 else
502 /* The vectorization factor is according to the smallest
503 scalar type (or the largest vector size, but we only
504 support one vector size per loop). */
505 if (!bool_result)
506 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
507 &dummy);
508 if (dump_enabled_p ())
510 dump_printf_loc (MSG_NOTE, vect_location,
511 "get vectype for scalar type: ");
512 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
513 dump_printf (MSG_NOTE, "\n");
515 vf_vectype = get_vectype_for_scalar_type (scalar_type);
517 if (!vf_vectype)
519 if (dump_enabled_p ())
521 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
522 "not vectorized: unsupported data-type ");
523 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
524 scalar_type);
525 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
527 return false;
530 if ((GET_MODE_SIZE (TYPE_MODE (vectype))
531 != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
533 if (dump_enabled_p ())
535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
536 "not vectorized: different sized vector "
537 "types in statement, ");
538 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
539 vectype);
540 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
541 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
542 vf_vectype);
543 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
545 return false;
548 if (dump_enabled_p ())
550 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
551 dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
552 dump_printf (MSG_NOTE, "\n");
555 nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
556 if (dump_enabled_p ())
557 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
558 if (!vectorization_factor
559 || (nunits > vectorization_factor))
560 vectorization_factor = nunits;
562 if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
564 pattern_def_seq = NULL;
565 gsi_next (&si);
570 /* TODO: Analyze cost. Decide if worth while to vectorize. */
571 if (dump_enabled_p ())
572 dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
573 vectorization_factor);
574 if (vectorization_factor <= 1)
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
578 "not vectorized: unsupported data-type\n");
579 return false;
581 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
583 for (i = 0; i < mask_producers.length (); i++)
585 tree mask_type = NULL;
587 stmt = STMT_VINFO_STMT (mask_producers[i]);
589 if (gimple_code (stmt) == GIMPLE_ASSIGN
590 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
591 && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt))) != BOOLEAN_TYPE)
593 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
594 mask_type = get_mask_type_for_scalar_type (scalar_type);
596 if (!mask_type)
598 if (dump_enabled_p ())
599 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
600 "not vectorized: unsupported mask\n");
601 return false;
604 else
606 tree rhs;
607 ssa_op_iter iter;
608 gimple *def_stmt;
609 enum vect_def_type dt;
611 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
613 if (!vect_is_simple_use (rhs, mask_producers[i]->vinfo,
614 &def_stmt, &dt, &vectype))
616 if (dump_enabled_p ())
618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
619 "not vectorized: can't compute mask type "
620 "for statement, ");
621 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
623 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
625 return false;
628 /* No vectype probably means external definition.
629 Allow it in case there is another operand which
630 allows to determine mask type. */
631 if (!vectype)
632 continue;
634 if (!mask_type)
635 mask_type = vectype;
636 else if (TYPE_VECTOR_SUBPARTS (mask_type)
637 != TYPE_VECTOR_SUBPARTS (vectype))
639 if (dump_enabled_p ())
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
642 "not vectorized: different sized masks "
643 "types in statement, ");
644 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
645 mask_type);
646 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
647 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
648 vectype);
649 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
651 return false;
653 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
654 != VECTOR_BOOLEAN_TYPE_P (vectype))
656 if (dump_enabled_p ())
658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
659 "not vectorized: mixed mask and "
660 "nonmask vector types in statement, ");
661 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
662 mask_type);
663 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
664 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
665 vectype);
666 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
668 return false;
672 /* We may compare boolean value loaded as vector of integers.
673 Fix mask_type in such case. */
674 if (mask_type
675 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
676 && gimple_code (stmt) == GIMPLE_ASSIGN
677 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
678 mask_type = build_same_sized_truth_vector_type (mask_type);
681 /* No mask_type should mean loop invariant predicate.
682 This is probably a subject for optimization in
683 if-conversion. */
684 if (!mask_type)
686 if (dump_enabled_p ())
688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
689 "not vectorized: can't compute mask type "
690 "for statement, ");
691 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
693 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
695 return false;
698 STMT_VINFO_VECTYPE (mask_producers[i]) = mask_type;
701 return true;
705 /* Function vect_is_simple_iv_evolution.
707 FORNOW: A simple evolution of an induction variables in the loop is
708 considered a polynomial evolution. */
710 static bool
711 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
712 tree * step)
714 tree init_expr;
715 tree step_expr;
716 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
717 basic_block bb;
719 /* When there is no evolution in this loop, the evolution function
720 is not "simple". */
721 if (evolution_part == NULL_TREE)
722 return false;
724 /* When the evolution is a polynomial of degree >= 2
725 the evolution function is not "simple". */
726 if (tree_is_chrec (evolution_part))
727 return false;
729 step_expr = evolution_part;
730 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
732 if (dump_enabled_p ())
734 dump_printf_loc (MSG_NOTE, vect_location, "step: ");
735 dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
736 dump_printf (MSG_NOTE, ", init: ");
737 dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
738 dump_printf (MSG_NOTE, "\n");
741 *init = init_expr;
742 *step = step_expr;
744 if (TREE_CODE (step_expr) != INTEGER_CST
745 && (TREE_CODE (step_expr) != SSA_NAME
746 || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
747 && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
748 || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
749 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
750 || !flag_associative_math)))
751 && (TREE_CODE (step_expr) != REAL_CST
752 || !flag_associative_math))
754 if (dump_enabled_p ())
755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
756 "step unknown.\n");
757 return false;
760 return true;
763 /* Function vect_analyze_scalar_cycles_1.
765 Examine the cross iteration def-use cycles of scalar variables
766 in LOOP. LOOP_VINFO represents the loop that is now being
767 considered for vectorization (can be LOOP, or an outer-loop
768 enclosing LOOP). */
770 static void
771 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
773 basic_block bb = loop->header;
774 tree init, step;
775 auto_vec<gimple *, 64> worklist;
776 gphi_iterator gsi;
777 bool double_reduc;
779 if (dump_enabled_p ())
780 dump_printf_loc (MSG_NOTE, vect_location,
781 "=== vect_analyze_scalar_cycles ===\n");
783 /* First - identify all inductions. Reduction detection assumes that all the
784 inductions have been identified, therefore, this order must not be
785 changed. */
786 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
788 gphi *phi = gsi.phi ();
789 tree access_fn = NULL;
790 tree def = PHI_RESULT (phi);
791 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
793 if (dump_enabled_p ())
795 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
796 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
797 dump_printf (MSG_NOTE, "\n");
800 /* Skip virtual phi's. The data dependences that are associated with
801 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
802 if (virtual_operand_p (def))
803 continue;
805 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
807 /* Analyze the evolution function. */
808 access_fn = analyze_scalar_evolution (loop, def);
809 if (access_fn)
811 STRIP_NOPS (access_fn);
812 if (dump_enabled_p ())
814 dump_printf_loc (MSG_NOTE, vect_location,
815 "Access function of PHI: ");
816 dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
817 dump_printf (MSG_NOTE, "\n");
819 STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
820 = initial_condition_in_loop_num (access_fn, loop->num);
821 STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
822 = evolution_part_in_loop_num (access_fn, loop->num);
825 if (!access_fn
826 || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
827 || (LOOP_VINFO_LOOP (loop_vinfo) != loop
828 && TREE_CODE (step) != INTEGER_CST))
830 worklist.safe_push (phi);
831 continue;
834 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
835 != NULL_TREE);
836 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
838 if (dump_enabled_p ())
839 dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
840 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
844 /* Second - identify all reductions and nested cycles. */
845 while (worklist.length () > 0)
847 gimple *phi = worklist.pop ();
848 tree def = PHI_RESULT (phi);
849 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
850 gimple *reduc_stmt;
851 bool nested_cycle;
853 if (dump_enabled_p ())
855 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
856 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
857 dump_printf (MSG_NOTE, "\n");
860 gcc_assert (!virtual_operand_p (def)
861 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
863 nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
864 reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
865 &double_reduc, false);
866 if (reduc_stmt)
868 if (double_reduc)
870 if (dump_enabled_p ())
871 dump_printf_loc (MSG_NOTE, vect_location,
872 "Detected double reduction.\n");
874 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
875 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
876 vect_double_reduction_def;
878 else
880 if (nested_cycle)
882 if (dump_enabled_p ())
883 dump_printf_loc (MSG_NOTE, vect_location,
884 "Detected vectorizable nested cycle.\n");
886 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
887 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
888 vect_nested_cycle;
890 else
892 if (dump_enabled_p ())
893 dump_printf_loc (MSG_NOTE, vect_location,
894 "Detected reduction.\n");
896 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
897 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
898 vect_reduction_def;
899 /* Store the reduction cycles for possible vectorization in
900 loop-aware SLP. */
901 LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
905 else
906 if (dump_enabled_p ())
907 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
908 "Unknown def-use cycle pattern.\n");
913 /* Function vect_analyze_scalar_cycles.
915 Examine the cross iteration def-use cycles of scalar variables, by
916 analyzing the loop-header PHIs of scalar variables. Classify each
917 cycle as one of the following: invariant, induction, reduction, unknown.
918 We do that for the loop represented by LOOP_VINFO, and also to its
919 inner-loop, if exists.
920 Examples for scalar cycles:
922 Example1: reduction:
924 loop1:
925 for (i=0; i<N; i++)
926 sum += a[i];
928 Example2: induction:
930 loop2:
931 for (i=0; i<N; i++)
932 a[i] = i; */
934 static void
935 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
937 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
939 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
941 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
942 Reductions in such inner-loop therefore have different properties than
943 the reductions in the nest that gets vectorized:
944 1. When vectorized, they are executed in the same order as in the original
945 scalar loop, so we can't change the order of computation when
946 vectorizing them.
947 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
948 current checks are too strict. */
950 if (loop->inner)
951 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
954 /* Transfer group and reduction information from STMT to its pattern stmt. */
956 static void
957 vect_fixup_reduc_chain (gimple *stmt)
959 gimple *firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
960 gimple *stmtp;
961 gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
962 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
963 GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
966 stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
967 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
968 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
969 if (stmt)
970 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
971 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
973 while (stmt);
974 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
977 /* Fixup scalar cycles that now have their stmts detected as patterns. */
979 static void
980 vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
982 gimple *first;
983 unsigned i;
985 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
986 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
988 vect_fixup_reduc_chain (first);
989 LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
990 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
994 /* Function vect_get_loop_niters.
996 Determine how many iterations the loop is executed and place it
997 in NUMBER_OF_ITERATIONS. Place the number of latch iterations
998 in NUMBER_OF_ITERATIONSM1.
1000 Return the loop exit condition. */
1003 static gcond *
1004 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations,
1005 tree *number_of_iterationsm1)
1007 tree niters;
1009 if (dump_enabled_p ())
1010 dump_printf_loc (MSG_NOTE, vect_location,
1011 "=== get_loop_niters ===\n");
1013 niters = number_of_latch_executions (loop);
1014 *number_of_iterationsm1 = niters;
1016 /* We want the number of loop header executions which is the number
1017 of latch executions plus one.
1018 ??? For UINT_MAX latch executions this number overflows to zero
1019 for loops like do { n++; } while (n != 0); */
1020 if (niters && !chrec_contains_undetermined (niters))
1021 niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), unshare_expr (niters),
1022 build_int_cst (TREE_TYPE (niters), 1));
1023 *number_of_iterations = niters;
1025 return get_loop_exit_condition (loop);
1029 /* Function bb_in_loop_p
1031 Used as predicate for dfs order traversal of the loop bbs. */
1033 static bool
1034 bb_in_loop_p (const_basic_block bb, const void *data)
1036 const struct loop *const loop = (const struct loop *)data;
1037 if (flow_bb_inside_loop_p (loop, bb))
1038 return true;
1039 return false;
1043 /* Function new_loop_vec_info.
1045 Create and initialize a new loop_vec_info struct for LOOP, as well as
1046 stmt_vec_info structs for all the stmts in LOOP. */
1048 static loop_vec_info
1049 new_loop_vec_info (struct loop *loop)
1051 loop_vec_info res;
1052 basic_block *bbs;
1053 gimple_stmt_iterator si;
1054 unsigned int i, nbbs;
1056 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1057 res->kind = vec_info::loop;
1058 LOOP_VINFO_LOOP (res) = loop;
1060 bbs = get_loop_body (loop);
1062 /* Create/Update stmt_info for all stmts in the loop. */
1063 for (i = 0; i < loop->num_nodes; i++)
1065 basic_block bb = bbs[i];
1067 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1069 gimple *phi = gsi_stmt (si);
1070 gimple_set_uid (phi, 0);
1071 set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res));
1074 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1076 gimple *stmt = gsi_stmt (si);
1077 gimple_set_uid (stmt, 0);
1078 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
1082 /* CHECKME: We want to visit all BBs before their successors (except for
1083 latch blocks, for which this assertion wouldn't hold). In the simple
1084 case of the loop forms we allow, a dfs order of the BBs would the same
1085 as reversed postorder traversal, so we are safe. */
1087 free (bbs);
1088 bbs = XCNEWVEC (basic_block, loop->num_nodes);
1089 nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
1090 bbs, loop->num_nodes, loop);
1091 gcc_assert (nbbs == loop->num_nodes);
1093 LOOP_VINFO_BBS (res) = bbs;
1094 LOOP_VINFO_NITERSM1 (res) = NULL;
1095 LOOP_VINFO_NITERS (res) = NULL;
1096 LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
1097 LOOP_VINFO_COST_MODEL_THRESHOLD (res) = 0;
1098 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1099 LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
1100 LOOP_VINFO_VECT_FACTOR (res) = 0;
1101 LOOP_VINFO_LOOP_NEST (res) = vNULL;
1102 LOOP_VINFO_DATAREFS (res) = vNULL;
1103 LOOP_VINFO_DDRS (res) = vNULL;
1104 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1105 LOOP_VINFO_MAY_MISALIGN_STMTS (res) = vNULL;
1106 LOOP_VINFO_MAY_ALIAS_DDRS (res) = vNULL;
1107 LOOP_VINFO_GROUPED_STORES (res) = vNULL;
1108 LOOP_VINFO_REDUCTIONS (res) = vNULL;
1109 LOOP_VINFO_REDUCTION_CHAINS (res) = vNULL;
1110 LOOP_VINFO_SLP_INSTANCES (res) = vNULL;
1111 LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
1112 LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
1113 LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
1114 LOOP_VINFO_PEELING_FOR_NITER (res) = false;
1115 LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
1117 return res;
1121 /* Function destroy_loop_vec_info.
1123 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1124 stmts in the loop. */
1126 void
1127 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
1129 struct loop *loop;
1130 basic_block *bbs;
1131 int nbbs;
1132 gimple_stmt_iterator si;
1133 int j;
1134 vec<slp_instance> slp_instances;
1135 slp_instance instance;
1136 bool swapped;
1138 if (!loop_vinfo)
1139 return;
1141 loop = LOOP_VINFO_LOOP (loop_vinfo);
1143 bbs = LOOP_VINFO_BBS (loop_vinfo);
1144 nbbs = clean_stmts ? loop->num_nodes : 0;
1145 swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
1147 for (j = 0; j < nbbs; j++)
1149 basic_block bb = bbs[j];
1150 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1151 free_stmt_vec_info (gsi_stmt (si));
1153 for (si = gsi_start_bb (bb); !gsi_end_p (si); )
1155 gimple *stmt = gsi_stmt (si);
1157 /* We may have broken canonical form by moving a constant
1158 into RHS1 of a commutative op. Fix such occurrences. */
1159 if (swapped && is_gimple_assign (stmt))
1161 enum tree_code code = gimple_assign_rhs_code (stmt);
1163 if ((code == PLUS_EXPR
1164 || code == POINTER_PLUS_EXPR
1165 || code == MULT_EXPR)
1166 && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
1167 swap_ssa_operands (stmt,
1168 gimple_assign_rhs1_ptr (stmt),
1169 gimple_assign_rhs2_ptr (stmt));
1172 /* Free stmt_vec_info. */
1173 free_stmt_vec_info (stmt);
1174 gsi_next (&si);
1178 free (LOOP_VINFO_BBS (loop_vinfo));
1179 vect_destroy_datarefs (loop_vinfo);
1180 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1181 LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
1182 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
1183 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
1184 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1185 FOR_EACH_VEC_ELT (slp_instances, j, instance)
1186 vect_free_slp_instance (instance);
1188 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
1189 LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
1190 LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
1191 LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
1193 destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1194 loop_vinfo->scalar_cost_vec.release ();
1196 free (loop_vinfo);
1197 loop->aux = NULL;
1201 /* Calculate the cost of one scalar iteration of the loop. */
1202 static void
1203 vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
1205 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1206 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1207 int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
1208 int innerloop_iters, i;
1210 /* Count statements in scalar loop. Using this as scalar cost for a single
1211 iteration for now.
1213 TODO: Add outer loop support.
1215 TODO: Consider assigning different costs to different scalar
1216 statements. */
1218 /* FORNOW. */
1219 innerloop_iters = 1;
1220 if (loop->inner)
1221 innerloop_iters = 50; /* FIXME */
1223 for (i = 0; i < nbbs; i++)
1225 gimple_stmt_iterator si;
1226 basic_block bb = bbs[i];
1228 if (bb->loop_father == loop->inner)
1229 factor = innerloop_iters;
1230 else
1231 factor = 1;
1233 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1235 gimple *stmt = gsi_stmt (si);
1236 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1238 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1239 continue;
1241 /* Skip stmts that are not vectorized inside the loop. */
1242 if (stmt_info
1243 && !STMT_VINFO_RELEVANT_P (stmt_info)
1244 && (!STMT_VINFO_LIVE_P (stmt_info)
1245 || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1246 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
1247 continue;
1249 vect_cost_for_stmt kind;
1250 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1252 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1253 kind = scalar_load;
1254 else
1255 kind = scalar_store;
1257 else
1258 kind = scalar_stmt;
1260 scalar_single_iter_cost
1261 += record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1262 factor, kind, NULL, 0, vect_prologue);
1265 LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)
1266 = scalar_single_iter_cost;
1270 /* Function vect_analyze_loop_form_1.
1272 Verify that certain CFG restrictions hold, including:
1273 - the loop has a pre-header
1274 - the loop has a single entry and exit
1275 - the loop exit condition is simple enough, and the number of iterations
1276 can be analyzed (a countable loop). */
1278 bool
1279 vect_analyze_loop_form_1 (struct loop *loop, gcond **loop_cond,
1280 tree *number_of_iterationsm1,
1281 tree *number_of_iterations, gcond **inner_loop_cond)
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "=== vect_analyze_loop_form ===\n");
1287 /* Different restrictions apply when we are considering an inner-most loop,
1288 vs. an outer (nested) loop.
1289 (FORNOW. May want to relax some of these restrictions in the future). */
1291 if (!loop->inner)
1293 /* Inner-most loop. We currently require that the number of BBs is
1294 exactly 2 (the header and latch). Vectorizable inner-most loops
1295 look like this:
1297 (pre-header)
1299 header <--------+
1300 | | |
1301 | +--> latch --+
1303 (exit-bb) */
1305 if (loop->num_nodes != 2)
1307 if (dump_enabled_p ())
1308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1309 "not vectorized: control flow in loop.\n");
1310 return false;
1313 if (empty_block_p (loop->header))
1315 if (dump_enabled_p ())
1316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1317 "not vectorized: empty loop.\n");
1318 return false;
1321 else
1323 struct loop *innerloop = loop->inner;
1324 edge entryedge;
1326 /* Nested loop. We currently require that the loop is doubly-nested,
1327 contains a single inner loop, and the number of BBs is exactly 5.
1328 Vectorizable outer-loops look like this:
1330 (pre-header)
1332 header <---+
1334 inner-loop |
1336 tail ------+
1338 (exit-bb)
1340 The inner-loop has the properties expected of inner-most loops
1341 as described above. */
1343 if ((loop->inner)->inner || (loop->inner)->next)
1345 if (dump_enabled_p ())
1346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1347 "not vectorized: multiple nested loops.\n");
1348 return false;
1351 if (loop->num_nodes != 5)
1353 if (dump_enabled_p ())
1354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1355 "not vectorized: control flow in loop.\n");
1356 return false;
1359 entryedge = loop_preheader_edge (innerloop);
1360 if (entryedge->src != loop->header
1361 || !single_exit (innerloop)
1362 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
1364 if (dump_enabled_p ())
1365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1366 "not vectorized: unsupported outerloop form.\n");
1367 return false;
1370 /* Analyze the inner-loop. */
1371 tree inner_niterm1, inner_niter;
1372 if (! vect_analyze_loop_form_1 (loop->inner, inner_loop_cond,
1373 &inner_niterm1, &inner_niter, NULL))
1375 if (dump_enabled_p ())
1376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1377 "not vectorized: Bad inner loop.\n");
1378 return false;
1381 if (!expr_invariant_in_loop_p (loop, inner_niter))
1383 if (dump_enabled_p ())
1384 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1385 "not vectorized: inner-loop count not"
1386 " invariant.\n");
1387 return false;
1390 if (dump_enabled_p ())
1391 dump_printf_loc (MSG_NOTE, vect_location,
1392 "Considering outer-loop vectorization.\n");
1395 if (!single_exit (loop)
1396 || EDGE_COUNT (loop->header->preds) != 2)
1398 if (dump_enabled_p ())
1400 if (!single_exit (loop))
1401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1402 "not vectorized: multiple exits.\n");
1403 else if (EDGE_COUNT (loop->header->preds) != 2)
1404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1405 "not vectorized: too many incoming edges.\n");
1407 return false;
1410 /* We assume that the loop exit condition is at the end of the loop. i.e,
1411 that the loop is represented as a do-while (with a proper if-guard
1412 before the loop if needed), where the loop header contains all the
1413 executable statements, and the latch is empty. */
1414 if (!empty_block_p (loop->latch)
1415 || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1417 if (dump_enabled_p ())
1418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1419 "not vectorized: latch block not empty.\n");
1420 return false;
1423 /* Make sure there exists a single-predecessor exit bb: */
1424 if (!single_pred_p (single_exit (loop)->dest))
1426 edge e = single_exit (loop);
1427 if (!(e->flags & EDGE_ABNORMAL))
1429 split_loop_exit_edge (e);
1430 if (dump_enabled_p ())
1431 dump_printf (MSG_NOTE, "split exit edge.\n");
1433 else
1435 if (dump_enabled_p ())
1436 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1437 "not vectorized: abnormal loop exit edge.\n");
1438 return false;
1442 *loop_cond = vect_get_loop_niters (loop, number_of_iterations,
1443 number_of_iterationsm1);
1444 if (!*loop_cond)
1446 if (dump_enabled_p ())
1447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1448 "not vectorized: complicated exit condition.\n");
1449 return false;
1452 if (!*number_of_iterations
1453 || chrec_contains_undetermined (*number_of_iterations))
1455 if (dump_enabled_p ())
1456 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1457 "not vectorized: number of iterations cannot be "
1458 "computed.\n");
1459 return false;
1462 if (integer_zerop (*number_of_iterations))
1464 if (dump_enabled_p ())
1465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1466 "not vectorized: number of iterations = 0.\n");
1467 return false;
1470 return true;
1473 /* Analyze LOOP form and return a loop_vec_info if it is of suitable form. */
1475 loop_vec_info
1476 vect_analyze_loop_form (struct loop *loop)
1478 tree number_of_iterations, number_of_iterationsm1;
1479 gcond *loop_cond, *inner_loop_cond = NULL;
1481 if (! vect_analyze_loop_form_1 (loop, &loop_cond, &number_of_iterationsm1,
1482 &number_of_iterations, &inner_loop_cond))
1483 return NULL;
1485 loop_vec_info loop_vinfo = new_loop_vec_info (loop);
1486 LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
1487 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1488 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1490 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1492 if (dump_enabled_p ())
1494 dump_printf_loc (MSG_NOTE, vect_location,
1495 "Symbolic number of iterations is ");
1496 dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1497 dump_printf (MSG_NOTE, "\n");
1501 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1502 if (inner_loop_cond)
1503 STMT_VINFO_TYPE (vinfo_for_stmt (inner_loop_cond))
1504 = loop_exit_ctrl_vec_info_type;
1506 gcc_assert (!loop->aux);
1507 loop->aux = loop_vinfo;
1508 return loop_vinfo;
1513 /* Scan the loop stmts and dependent on whether there are any (non-)SLP
1514 statements update the vectorization factor. */
1516 static void
1517 vect_update_vf_for_slp (loop_vec_info loop_vinfo)
1519 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1520 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1521 int nbbs = loop->num_nodes;
1522 unsigned int vectorization_factor;
1523 int i;
1525 if (dump_enabled_p ())
1526 dump_printf_loc (MSG_NOTE, vect_location,
1527 "=== vect_update_vf_for_slp ===\n");
1529 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1530 gcc_assert (vectorization_factor != 0);
1532 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1533 vectorization factor of the loop is the unrolling factor required by
1534 the SLP instances. If that unrolling factor is 1, we say, that we
1535 perform pure SLP on loop - cross iteration parallelism is not
1536 exploited. */
1537 bool only_slp_in_loop = true;
1538 for (i = 0; i < nbbs; i++)
1540 basic_block bb = bbs[i];
1541 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1542 gsi_next (&si))
1544 gimple *stmt = gsi_stmt (si);
1545 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1546 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1547 && STMT_VINFO_RELATED_STMT (stmt_info))
1549 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1550 stmt_info = vinfo_for_stmt (stmt);
1552 if ((STMT_VINFO_RELEVANT_P (stmt_info)
1553 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1554 && !PURE_SLP_STMT (stmt_info))
1555 /* STMT needs both SLP and loop-based vectorization. */
1556 only_slp_in_loop = false;
1560 if (only_slp_in_loop)
1561 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1562 else
1563 vectorization_factor
1564 = least_common_multiple (vectorization_factor,
1565 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1567 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1568 if (dump_enabled_p ())
1569 dump_printf_loc (MSG_NOTE, vect_location,
1570 "Updating vectorization factor to %d\n",
1571 vectorization_factor);
1574 /* Function vect_analyze_loop_operations.
1576 Scan the loop stmts and make sure they are all vectorizable. */
1578 static bool
1579 vect_analyze_loop_operations (loop_vec_info loop_vinfo)
1581 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1582 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1583 int nbbs = loop->num_nodes;
1584 int i;
1585 stmt_vec_info stmt_info;
1586 bool need_to_vectorize = false;
1587 bool ok;
1589 if (dump_enabled_p ())
1590 dump_printf_loc (MSG_NOTE, vect_location,
1591 "=== vect_analyze_loop_operations ===\n");
1593 for (i = 0; i < nbbs; i++)
1595 basic_block bb = bbs[i];
1597 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1598 gsi_next (&si))
1600 gphi *phi = si.phi ();
1601 ok = true;
1603 stmt_info = vinfo_for_stmt (phi);
1604 if (dump_enabled_p ())
1606 dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1607 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1608 dump_printf (MSG_NOTE, "\n");
1610 if (virtual_operand_p (gimple_phi_result (phi)))
1611 continue;
1613 /* Inner-loop loop-closed exit phi in outer-loop vectorization
1614 (i.e., a phi in the tail of the outer-loop). */
1615 if (! is_loop_header_bb_p (bb))
1617 /* FORNOW: we currently don't support the case that these phis
1618 are not used in the outerloop (unless it is double reduction,
1619 i.e., this phi is vect_reduction_def), cause this case
1620 requires to actually do something here. */
1621 if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1622 || STMT_VINFO_LIVE_P (stmt_info))
1623 && STMT_VINFO_DEF_TYPE (stmt_info)
1624 != vect_double_reduction_def)
1626 if (dump_enabled_p ())
1627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1628 "Unsupported loop-closed phi in "
1629 "outer-loop.\n");
1630 return false;
1633 /* If PHI is used in the outer loop, we check that its operand
1634 is defined in the inner loop. */
1635 if (STMT_VINFO_RELEVANT_P (stmt_info))
1637 tree phi_op;
1638 gimple *op_def_stmt;
1640 if (gimple_phi_num_args (phi) != 1)
1641 return false;
1643 phi_op = PHI_ARG_DEF (phi, 0);
1644 if (TREE_CODE (phi_op) != SSA_NAME)
1645 return false;
1647 op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1648 if (gimple_nop_p (op_def_stmt)
1649 || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1650 || !vinfo_for_stmt (op_def_stmt))
1651 return false;
1653 if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1654 != vect_used_in_outer
1655 && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1656 != vect_used_in_outer_by_reduction)
1657 return false;
1660 continue;
1663 gcc_assert (stmt_info);
1665 if (STMT_VINFO_LIVE_P (stmt_info))
1667 /* FORNOW: not yet supported. */
1668 if (dump_enabled_p ())
1669 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1670 "not vectorized: value used after loop.\n");
1671 return false;
1674 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1675 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1677 /* A scalar-dependence cycle that we don't support. */
1678 if (dump_enabled_p ())
1679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1680 "not vectorized: scalar dependence cycle.\n");
1681 return false;
1684 if (STMT_VINFO_RELEVANT_P (stmt_info))
1686 need_to_vectorize = true;
1687 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1688 ok = vectorizable_induction (phi, NULL, NULL);
1691 if (!ok)
1693 if (dump_enabled_p ())
1695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1696 "not vectorized: relevant phi not "
1697 "supported: ");
1698 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1699 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1701 return false;
1705 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1706 gsi_next (&si))
1708 gimple *stmt = gsi_stmt (si);
1709 if (!gimple_clobber_p (stmt)
1710 && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1711 return false;
1713 } /* bbs */
1715 /* All operations in the loop are either irrelevant (deal with loop
1716 control, or dead), or only used outside the loop and can be moved
1717 out of the loop (e.g. invariants, inductions). The loop can be
1718 optimized away by scalar optimizations. We're better off not
1719 touching this loop. */
1720 if (!need_to_vectorize)
1722 if (dump_enabled_p ())
1723 dump_printf_loc (MSG_NOTE, vect_location,
1724 "All the computation can be taken out of the loop.\n");
1725 if (dump_enabled_p ())
1726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1727 "not vectorized: redundant loop. no profit to "
1728 "vectorize.\n");
1729 return false;
1732 return true;
1736 /* Function vect_analyze_loop_2.
1738 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1739 for it. The different analyses will record information in the
1740 loop_vec_info struct. */
1741 static bool
1742 vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
1744 bool ok;
1745 int max_vf = MAX_VECTORIZATION_FACTOR;
1746 int min_vf = 2;
1747 unsigned int n_stmts = 0;
1749 /* The first group of checks is independent of the vector size. */
1750 fatal = true;
1752 /* Find all data references in the loop (which correspond to vdefs/vuses)
1753 and analyze their evolution in the loop. */
1755 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1757 loop_p loop = LOOP_VINFO_LOOP (loop_vinfo);
1758 if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
1760 if (dump_enabled_p ())
1761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1762 "not vectorized: loop contains function calls"
1763 " or data references that cannot be analyzed\n");
1764 return false;
1767 for (unsigned i = 0; i < loop->num_nodes; i++)
1768 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1769 !gsi_end_p (gsi); gsi_next (&gsi))
1771 gimple *stmt = gsi_stmt (gsi);
1772 if (is_gimple_debug (stmt))
1773 continue;
1774 ++n_stmts;
1775 if (!find_data_references_in_stmt (loop, stmt,
1776 &LOOP_VINFO_DATAREFS (loop_vinfo)))
1778 if (is_gimple_call (stmt) && loop->safelen)
1780 tree fndecl = gimple_call_fndecl (stmt), op;
1781 if (fndecl != NULL_TREE)
1783 cgraph_node *node = cgraph_node::get (fndecl);
1784 if (node != NULL && node->simd_clones != NULL)
1786 unsigned int j, n = gimple_call_num_args (stmt);
1787 for (j = 0; j < n; j++)
1789 op = gimple_call_arg (stmt, j);
1790 if (DECL_P (op)
1791 || (REFERENCE_CLASS_P (op)
1792 && get_base_address (op)))
1793 break;
1795 op = gimple_call_lhs (stmt);
1796 /* Ignore #pragma omp declare simd functions
1797 if they don't have data references in the
1798 call stmt itself. */
1799 if (j == n
1800 && !(op
1801 && (DECL_P (op)
1802 || (REFERENCE_CLASS_P (op)
1803 && get_base_address (op)))))
1804 continue;
1808 if (dump_enabled_p ())
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1810 "not vectorized: loop contains function "
1811 "calls or data references that cannot "
1812 "be analyzed\n");
1813 return false;
1817 /* Analyze the data references and also adjust the minimal
1818 vectorization factor according to the loads and stores. */
1820 ok = vect_analyze_data_refs (loop_vinfo, &min_vf);
1821 if (!ok)
1823 if (dump_enabled_p ())
1824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1825 "bad data references.\n");
1826 return false;
1829 /* Classify all cross-iteration scalar data-flow cycles.
1830 Cross-iteration cycles caused by virtual phis are analyzed separately. */
1831 vect_analyze_scalar_cycles (loop_vinfo);
1833 vect_pattern_recog (loop_vinfo);
1835 vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
1837 /* Analyze the access patterns of the data-refs in the loop (consecutive,
1838 complex, etc.). FORNOW: Only handle consecutive access pattern. */
1840 ok = vect_analyze_data_ref_accesses (loop_vinfo);
1841 if (!ok)
1843 if (dump_enabled_p ())
1844 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1845 "bad data access.\n");
1846 return false;
1849 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
1851 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1852 if (!ok)
1854 if (dump_enabled_p ())
1855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1856 "unexpected pattern.\n");
1857 return false;
1860 /* While the rest of the analysis below depends on it in some way. */
1861 fatal = false;
1863 /* Analyze data dependences between the data-refs in the loop
1864 and adjust the maximum vectorization factor according to
1865 the dependences.
1866 FORNOW: fail at the first data dependence that we encounter. */
1868 ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
1869 if (!ok
1870 || max_vf < min_vf)
1872 if (dump_enabled_p ())
1873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1874 "bad data dependence.\n");
1875 return false;
1878 ok = vect_determine_vectorization_factor (loop_vinfo);
1879 if (!ok)
1881 if (dump_enabled_p ())
1882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1883 "can't determine vectorization factor.\n");
1884 return false;
1886 if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1888 if (dump_enabled_p ())
1889 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1890 "bad data dependence.\n");
1891 return false;
1894 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
1895 ok = vect_analyze_slp (loop_vinfo, n_stmts);
1896 if (!ok)
1897 return false;
1899 /* If there are any SLP instances mark them as pure_slp. */
1900 bool slp = vect_make_slp_decision (loop_vinfo);
1901 if (slp)
1903 /* Find stmts that need to be both vectorized and SLPed. */
1904 vect_detect_hybrid_slp (loop_vinfo);
1906 /* Update the vectorization factor based on the SLP decision. */
1907 vect_update_vf_for_slp (loop_vinfo);
1910 /* Now the vectorization factor is final. */
1911 unsigned vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1912 gcc_assert (vectorization_factor != 0);
1914 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
1915 dump_printf_loc (MSG_NOTE, vect_location,
1916 "vectorization_factor = %d, niters = "
1917 HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
1918 LOOP_VINFO_INT_NITERS (loop_vinfo));
1920 HOST_WIDE_INT max_niter
1921 = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
1922 if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1923 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1924 || (max_niter != -1
1925 && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
1927 if (dump_enabled_p ())
1928 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1929 "not vectorized: iteration count too small.\n");
1930 if (dump_enabled_p ())
1931 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1932 "not vectorized: iteration count smaller than "
1933 "vectorization factor.\n");
1934 return false;
1937 /* Analyze the alignment of the data-refs in the loop.
1938 Fail if a data reference is found that cannot be vectorized. */
1940 ok = vect_analyze_data_refs_alignment (loop_vinfo);
1941 if (!ok)
1943 if (dump_enabled_p ())
1944 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1945 "bad data alignment.\n");
1946 return false;
1949 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1950 It is important to call pruning after vect_analyze_data_ref_accesses,
1951 since we use grouping information gathered by interleaving analysis. */
1952 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1953 if (!ok)
1955 if (dump_enabled_p ())
1956 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1957 "number of versioning for alias "
1958 "run-time tests exceeds %d "
1959 "(--param vect-max-version-for-alias-checks)\n",
1960 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
1961 return false;
1964 /* Compute the scalar iteration cost. */
1965 vect_compute_single_scalar_iteration_cost (loop_vinfo);
1967 /* This pass will decide on using loop versioning and/or loop peeling in
1968 order to enhance the alignment of data references in the loop. */
1970 ok = vect_enhance_data_refs_alignment (loop_vinfo);
1971 if (!ok)
1973 if (dump_enabled_p ())
1974 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1975 "bad data alignment.\n");
1976 return false;
1979 if (slp)
1981 /* Analyze operations in the SLP instances. Note this may
1982 remove unsupported SLP instances which makes the above
1983 SLP kind detection invalid. */
1984 unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
1985 vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1986 LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1987 if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
1988 return false;
1991 /* Scan all the remaining operations in the loop that are not subject
1992 to SLP and make sure they are vectorizable. */
1993 ok = vect_analyze_loop_operations (loop_vinfo);
1994 if (!ok)
1996 if (dump_enabled_p ())
1997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1998 "bad operation or unsupported loop bound.\n");
1999 return false;
2002 /* Analyze cost. Decide if worth while to vectorize. */
2003 int min_profitable_estimate, min_profitable_iters;
2004 vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
2005 &min_profitable_estimate);
2007 if (min_profitable_iters < 0)
2009 if (dump_enabled_p ())
2010 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2011 "not vectorized: vectorization not profitable.\n");
2012 if (dump_enabled_p ())
2013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2014 "not vectorized: vector version will never be "
2015 "profitable.\n");
2016 return false;
2019 int min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
2020 * vectorization_factor) - 1);
2022 /* Use the cost model only if it is more conservative than user specified
2023 threshold. */
2024 unsigned th = (unsigned) min_scalar_loop_bound;
2025 if (min_profitable_iters
2026 && (!min_scalar_loop_bound
2027 || min_profitable_iters > min_scalar_loop_bound))
2028 th = (unsigned) min_profitable_iters;
2030 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
2032 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2033 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
2035 if (dump_enabled_p ())
2036 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2037 "not vectorized: vectorization not profitable.\n");
2038 if (dump_enabled_p ())
2039 dump_printf_loc (MSG_NOTE, vect_location,
2040 "not vectorized: iteration count smaller than user "
2041 "specified loop bound parameter or minimum profitable "
2042 "iterations (whichever is more conservative).\n");
2043 return false;
2046 HOST_WIDE_INT estimated_niter
2047 = estimated_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
2048 if (estimated_niter != -1
2049 && ((unsigned HOST_WIDE_INT) estimated_niter
2050 <= MAX (th, (unsigned)min_profitable_estimate)))
2052 if (dump_enabled_p ())
2053 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2054 "not vectorized: estimated iteration count too "
2055 "small.\n");
2056 if (dump_enabled_p ())
2057 dump_printf_loc (MSG_NOTE, vect_location,
2058 "not vectorized: estimated iteration count smaller "
2059 "than specified loop bound parameter or minimum "
2060 "profitable iterations (whichever is more "
2061 "conservative).\n");
2062 return false;
2065 /* Decide whether we need to create an epilogue loop to handle
2066 remaining scalar iterations. */
2067 th = ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) + 1)
2068 / LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2069 * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2071 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2072 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2074 if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
2075 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
2076 < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
2077 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2079 else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2080 || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
2081 < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2082 /* In case of versioning, check if the maximum number of
2083 iterations is greater than th. If they are identical,
2084 the epilogue is unnecessary. */
2085 && ((!LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
2086 && !LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2087 || (unsigned HOST_WIDE_INT) max_niter > th)))
2088 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2090 /* If an epilogue loop is required make sure we can create one. */
2091 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2092 || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
2094 if (dump_enabled_p ())
2095 dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
2096 if (!vect_can_advance_ivs_p (loop_vinfo)
2097 || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
2098 single_exit (LOOP_VINFO_LOOP
2099 (loop_vinfo))))
2101 if (dump_enabled_p ())
2102 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2103 "not vectorized: can't create required "
2104 "epilog loop\n");
2105 return false;
2109 gcc_assert (vectorization_factor
2110 == (unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2112 return true;
2115 /* Function vect_analyze_loop.
2117 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2118 for it. The different analyses will record information in the
2119 loop_vec_info struct. */
2120 loop_vec_info
2121 vect_analyze_loop (struct loop *loop)
2123 loop_vec_info loop_vinfo;
2124 unsigned int vector_sizes;
2126 /* Autodetect first vector size we try. */
2127 current_vector_size = 0;
2128 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2130 if (dump_enabled_p ())
2131 dump_printf_loc (MSG_NOTE, vect_location,
2132 "===== analyze_loop_nest =====\n");
2134 if (loop_outer (loop)
2135 && loop_vec_info_for_loop (loop_outer (loop))
2136 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
2138 if (dump_enabled_p ())
2139 dump_printf_loc (MSG_NOTE, vect_location,
2140 "outer-loop already vectorized.\n");
2141 return NULL;
2144 while (1)
2146 /* Check the CFG characteristics of the loop (nesting, entry/exit). */
2147 loop_vinfo = vect_analyze_loop_form (loop);
2148 if (!loop_vinfo)
2150 if (dump_enabled_p ())
2151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2152 "bad loop form.\n");
2153 return NULL;
2156 bool fatal = false;
2157 if (vect_analyze_loop_2 (loop_vinfo, fatal))
2159 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2161 return loop_vinfo;
2164 destroy_loop_vec_info (loop_vinfo, true);
2166 vector_sizes &= ~current_vector_size;
2167 if (fatal
2168 || vector_sizes == 0
2169 || current_vector_size == 0)
2170 return NULL;
2172 /* Try the next biggest vector size. */
2173 current_vector_size = 1 << floor_log2 (vector_sizes);
2174 if (dump_enabled_p ())
2175 dump_printf_loc (MSG_NOTE, vect_location,
2176 "***** Re-trying analysis with "
2177 "vector size %d\n", current_vector_size);
2182 /* Function reduction_code_for_scalar_code
2184 Input:
2185 CODE - tree_code of a reduction operations.
2187 Output:
2188 REDUC_CODE - the corresponding tree-code to be used to reduce the
2189 vector of partial results into a single scalar result, or ERROR_MARK
2190 if the operation is a supported reduction operation, but does not have
2191 such a tree-code.
2193 Return FALSE if CODE currently cannot be vectorized as reduction. */
2195 static bool
2196 reduction_code_for_scalar_code (enum tree_code code,
2197 enum tree_code *reduc_code)
2199 switch (code)
2201 case MAX_EXPR:
2202 *reduc_code = REDUC_MAX_EXPR;
2203 return true;
2205 case MIN_EXPR:
2206 *reduc_code = REDUC_MIN_EXPR;
2207 return true;
2209 case PLUS_EXPR:
2210 *reduc_code = REDUC_PLUS_EXPR;
2211 return true;
2213 case MULT_EXPR:
2214 case MINUS_EXPR:
2215 case BIT_IOR_EXPR:
2216 case BIT_XOR_EXPR:
2217 case BIT_AND_EXPR:
2218 *reduc_code = ERROR_MARK;
2219 return true;
2221 default:
2222 return false;
2227 /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
2228 STMT is printed with a message MSG. */
2230 static void
2231 report_vect_op (int msg_type, gimple *stmt, const char *msg)
2233 dump_printf_loc (msg_type, vect_location, "%s", msg);
2234 dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
2235 dump_printf (msg_type, "\n");
2239 /* Detect SLP reduction of the form:
2241 #a1 = phi <a5, a0>
2242 a2 = operation (a1)
2243 a3 = operation (a2)
2244 a4 = operation (a3)
2245 a5 = operation (a4)
2247 #a = phi <a5>
2249 PHI is the reduction phi node (#a1 = phi <a5, a0> above)
2250 FIRST_STMT is the first reduction stmt in the chain
2251 (a2 = operation (a1)).
2253 Return TRUE if a reduction chain was detected. */
2255 static bool
2256 vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
2257 gimple *first_stmt)
2259 struct loop *loop = (gimple_bb (phi))->loop_father;
2260 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2261 enum tree_code code;
2262 gimple *current_stmt = NULL, *loop_use_stmt = NULL, *first, *next_stmt;
2263 stmt_vec_info use_stmt_info, current_stmt_info;
2264 tree lhs;
2265 imm_use_iterator imm_iter;
2266 use_operand_p use_p;
2267 int nloop_uses, size = 0, n_out_of_loop_uses;
2268 bool found = false;
2270 if (loop != vect_loop)
2271 return false;
2273 lhs = PHI_RESULT (phi);
2274 code = gimple_assign_rhs_code (first_stmt);
2275 while (1)
2277 nloop_uses = 0;
2278 n_out_of_loop_uses = 0;
2279 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2281 gimple *use_stmt = USE_STMT (use_p);
2282 if (is_gimple_debug (use_stmt))
2283 continue;
2285 /* Check if we got back to the reduction phi. */
2286 if (use_stmt == phi)
2288 loop_use_stmt = use_stmt;
2289 found = true;
2290 break;
2293 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2295 loop_use_stmt = use_stmt;
2296 nloop_uses++;
2298 else
2299 n_out_of_loop_uses++;
2301 /* There are can be either a single use in the loop or two uses in
2302 phi nodes. */
2303 if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
2304 return false;
2307 if (found)
2308 break;
2310 /* We reached a statement with no loop uses. */
2311 if (nloop_uses == 0)
2312 return false;
2314 /* This is a loop exit phi, and we haven't reached the reduction phi. */
2315 if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
2316 return false;
2318 if (!is_gimple_assign (loop_use_stmt)
2319 || code != gimple_assign_rhs_code (loop_use_stmt)
2320 || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
2321 return false;
2323 /* Insert USE_STMT into reduction chain. */
2324 use_stmt_info = vinfo_for_stmt (loop_use_stmt);
2325 if (current_stmt)
2327 current_stmt_info = vinfo_for_stmt (current_stmt);
2328 GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
2329 GROUP_FIRST_ELEMENT (use_stmt_info)
2330 = GROUP_FIRST_ELEMENT (current_stmt_info);
2332 else
2333 GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
2335 lhs = gimple_assign_lhs (loop_use_stmt);
2336 current_stmt = loop_use_stmt;
2337 size++;
2340 if (!found || loop_use_stmt != phi || size < 2)
2341 return false;
2343 /* Swap the operands, if needed, to make the reduction operand be the second
2344 operand. */
2345 lhs = PHI_RESULT (phi);
2346 next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2347 while (next_stmt)
2349 if (gimple_assign_rhs2 (next_stmt) == lhs)
2351 tree op = gimple_assign_rhs1 (next_stmt);
2352 gimple *def_stmt = NULL;
2354 if (TREE_CODE (op) == SSA_NAME)
2355 def_stmt = SSA_NAME_DEF_STMT (op);
2357 /* Check that the other def is either defined in the loop
2358 ("vect_internal_def"), or it's an induction (defined by a
2359 loop-header phi-node). */
2360 if (def_stmt
2361 && gimple_bb (def_stmt)
2362 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2363 && (is_gimple_assign (def_stmt)
2364 || is_gimple_call (def_stmt)
2365 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2366 == vect_induction_def
2367 || (gimple_code (def_stmt) == GIMPLE_PHI
2368 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2369 == vect_internal_def
2370 && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2372 lhs = gimple_assign_lhs (next_stmt);
2373 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2374 continue;
2377 return false;
2379 else
2381 tree op = gimple_assign_rhs2 (next_stmt);
2382 gimple *def_stmt = NULL;
2384 if (TREE_CODE (op) == SSA_NAME)
2385 def_stmt = SSA_NAME_DEF_STMT (op);
2387 /* Check that the other def is either defined in the loop
2388 ("vect_internal_def"), or it's an induction (defined by a
2389 loop-header phi-node). */
2390 if (def_stmt
2391 && gimple_bb (def_stmt)
2392 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2393 && (is_gimple_assign (def_stmt)
2394 || is_gimple_call (def_stmt)
2395 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2396 == vect_induction_def
2397 || (gimple_code (def_stmt) == GIMPLE_PHI
2398 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2399 == vect_internal_def
2400 && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2402 if (dump_enabled_p ())
2404 dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2405 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2406 dump_printf (MSG_NOTE, "\n");
2409 swap_ssa_operands (next_stmt,
2410 gimple_assign_rhs1_ptr (next_stmt),
2411 gimple_assign_rhs2_ptr (next_stmt));
2412 update_stmt (next_stmt);
2414 if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2415 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2417 else
2418 return false;
2421 lhs = gimple_assign_lhs (next_stmt);
2422 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2425 /* Save the chain for further analysis in SLP detection. */
2426 first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2427 LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
2428 GROUP_SIZE (vinfo_for_stmt (first)) = size;
2430 return true;
2434 /* Function vect_is_simple_reduction_1
2436 (1) Detect a cross-iteration def-use cycle that represents a simple
2437 reduction computation. We look for the following pattern:
2439 loop_header:
2440 a1 = phi < a0, a2 >
2441 a3 = ...
2442 a2 = operation (a3, a1)
2446 a3 = ...
2447 loop_header:
2448 a1 = phi < a0, a2 >
2449 a2 = operation (a3, a1)
2451 such that:
2452 1. operation is commutative and associative and it is safe to
2453 change the order of the computation (if CHECK_REDUCTION is true)
2454 2. no uses for a2 in the loop (a2 is used out of the loop)
2455 3. no uses of a1 in the loop besides the reduction operation
2456 4. no uses of a1 outside the loop.
2458 Conditions 1,4 are tested here.
2459 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2461 (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2462 nested cycles, if CHECK_REDUCTION is false.
2464 (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2465 reductions:
2467 a1 = phi < a0, a2 >
2468 inner loop (def of a3)
2469 a2 = phi < a3 >
2471 (4) Detect condition expressions, ie:
2472 for (int i = 0; i < N; i++)
2473 if (a[i] < val)
2474 ret_val = a[i];
2478 static gimple *
2479 vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi,
2480 bool check_reduction, bool *double_reduc,
2481 bool need_wrapping_integral_overflow,
2482 enum vect_reduction_type *v_reduc_type)
2484 struct loop *loop = (gimple_bb (phi))->loop_father;
2485 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2486 edge latch_e = loop_latch_edge (loop);
2487 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2488 gimple *def_stmt, *def1 = NULL, *def2 = NULL;
2489 enum tree_code orig_code, code;
2490 tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2491 tree type;
2492 int nloop_uses;
2493 tree name;
2494 imm_use_iterator imm_iter;
2495 use_operand_p use_p;
2496 bool phi_def;
2498 *double_reduc = false;
2499 *v_reduc_type = TREE_CODE_REDUCTION;
2501 /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2502 otherwise, we assume outer loop vectorization. */
2503 gcc_assert ((check_reduction && loop == vect_loop)
2504 || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2506 name = PHI_RESULT (phi);
2507 /* ??? If there are no uses of the PHI result the inner loop reduction
2508 won't be detected as possibly double-reduction by vectorizable_reduction
2509 because that tries to walk the PHI arg from the preheader edge which
2510 can be constant. See PR60382. */
2511 if (has_zero_uses (name))
2512 return NULL;
2513 nloop_uses = 0;
2514 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2516 gimple *use_stmt = USE_STMT (use_p);
2517 if (is_gimple_debug (use_stmt))
2518 continue;
2520 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2522 if (dump_enabled_p ())
2523 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2524 "intermediate value used outside loop.\n");
2526 return NULL;
2529 nloop_uses++;
2530 if (nloop_uses > 1)
2532 if (dump_enabled_p ())
2533 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2534 "reduction used in loop.\n");
2535 return NULL;
2539 if (TREE_CODE (loop_arg) != SSA_NAME)
2541 if (dump_enabled_p ())
2543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2544 "reduction: not ssa_name: ");
2545 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2546 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2548 return NULL;
2551 def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2552 if (!def_stmt)
2554 if (dump_enabled_p ())
2555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2556 "reduction: no def_stmt.\n");
2557 return NULL;
2560 if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2562 if (dump_enabled_p ())
2564 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2565 dump_printf (MSG_NOTE, "\n");
2567 return NULL;
2570 if (is_gimple_assign (def_stmt))
2572 name = gimple_assign_lhs (def_stmt);
2573 phi_def = false;
2575 else
2577 name = PHI_RESULT (def_stmt);
2578 phi_def = true;
2581 nloop_uses = 0;
2582 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2584 gimple *use_stmt = USE_STMT (use_p);
2585 if (is_gimple_debug (use_stmt))
2586 continue;
2587 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2588 nloop_uses++;
2589 if (nloop_uses > 1)
2591 if (dump_enabled_p ())
2592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2593 "reduction used in loop.\n");
2594 return NULL;
2598 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2599 defined in the inner loop. */
2600 if (phi_def)
2602 op1 = PHI_ARG_DEF (def_stmt, 0);
2604 if (gimple_phi_num_args (def_stmt) != 1
2605 || TREE_CODE (op1) != SSA_NAME)
2607 if (dump_enabled_p ())
2608 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2609 "unsupported phi node definition.\n");
2611 return NULL;
2614 def1 = SSA_NAME_DEF_STMT (op1);
2615 if (gimple_bb (def1)
2616 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2617 && loop->inner
2618 && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2619 && is_gimple_assign (def1))
2621 if (dump_enabled_p ())
2622 report_vect_op (MSG_NOTE, def_stmt,
2623 "detected double reduction: ");
2625 *double_reduc = true;
2626 return def_stmt;
2629 return NULL;
2632 code = orig_code = gimple_assign_rhs_code (def_stmt);
2634 /* We can handle "res -= x[i]", which is non-associative by
2635 simply rewriting this into "res += -x[i]". Avoid changing
2636 gimple instruction for the first simple tests and only do this
2637 if we're allowed to change code at all. */
2638 if (code == MINUS_EXPR
2639 && (op1 = gimple_assign_rhs1 (def_stmt))
2640 && TREE_CODE (op1) == SSA_NAME
2641 && SSA_NAME_DEF_STMT (op1) == phi)
2642 code = PLUS_EXPR;
2644 if (check_reduction)
2646 if (code == COND_EXPR)
2647 *v_reduc_type = COND_REDUCTION;
2648 else if (!commutative_tree_code (code) || !associative_tree_code (code))
2650 if (dump_enabled_p ())
2651 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2652 "reduction: not commutative/associative: ");
2653 return NULL;
2657 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2659 if (code != COND_EXPR)
2661 if (dump_enabled_p ())
2662 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2663 "reduction: not binary operation: ");
2665 return NULL;
2668 op3 = gimple_assign_rhs1 (def_stmt);
2669 if (COMPARISON_CLASS_P (op3))
2671 op4 = TREE_OPERAND (op3, 1);
2672 op3 = TREE_OPERAND (op3, 0);
2675 op1 = gimple_assign_rhs2 (def_stmt);
2676 op2 = gimple_assign_rhs3 (def_stmt);
2678 if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2680 if (dump_enabled_p ())
2681 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2682 "reduction: uses not ssa_names: ");
2684 return NULL;
2687 else
2689 op1 = gimple_assign_rhs1 (def_stmt);
2690 op2 = gimple_assign_rhs2 (def_stmt);
2692 if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2694 if (dump_enabled_p ())
2695 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2696 "reduction: uses not ssa_names: ");
2698 return NULL;
2702 type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2703 if ((TREE_CODE (op1) == SSA_NAME
2704 && !types_compatible_p (type,TREE_TYPE (op1)))
2705 || (TREE_CODE (op2) == SSA_NAME
2706 && !types_compatible_p (type, TREE_TYPE (op2)))
2707 || (op3 && TREE_CODE (op3) == SSA_NAME
2708 && !types_compatible_p (type, TREE_TYPE (op3)))
2709 || (op4 && TREE_CODE (op4) == SSA_NAME
2710 && !types_compatible_p (type, TREE_TYPE (op4))))
2712 if (dump_enabled_p ())
2714 dump_printf_loc (MSG_NOTE, vect_location,
2715 "reduction: multiple types: operation type: ");
2716 dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2717 dump_printf (MSG_NOTE, ", operands types: ");
2718 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2719 TREE_TYPE (op1));
2720 dump_printf (MSG_NOTE, ",");
2721 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2722 TREE_TYPE (op2));
2723 if (op3)
2725 dump_printf (MSG_NOTE, ",");
2726 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2727 TREE_TYPE (op3));
2730 if (op4)
2732 dump_printf (MSG_NOTE, ",");
2733 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2734 TREE_TYPE (op4));
2736 dump_printf (MSG_NOTE, "\n");
2739 return NULL;
2742 /* Check that it's ok to change the order of the computation.
2743 Generally, when vectorizing a reduction we change the order of the
2744 computation. This may change the behavior of the program in some
2745 cases, so we need to check that this is ok. One exception is when
2746 vectorizing an outer-loop: the inner-loop is executed sequentially,
2747 and therefore vectorizing reductions in the inner-loop during
2748 outer-loop vectorization is safe. */
2750 if (*v_reduc_type != COND_REDUCTION)
2752 /* CHECKME: check for !flag_finite_math_only too? */
2753 if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2754 && check_reduction)
2756 /* Changing the order of operations changes the semantics. */
2757 if (dump_enabled_p ())
2758 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2759 "reduction: unsafe fp math optimization: ");
2760 return NULL;
2762 else if (INTEGRAL_TYPE_P (type) && check_reduction)
2764 if (!operation_no_trapping_overflow (type, code))
2766 /* Changing the order of operations changes the semantics. */
2767 if (dump_enabled_p ())
2768 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2769 "reduction: unsafe int math optimization"
2770 " (overflow traps): ");
2771 return NULL;
2773 if (need_wrapping_integral_overflow
2774 && !TYPE_OVERFLOW_WRAPS (type)
2775 && operation_can_overflow (code))
2777 /* Changing the order of operations changes the semantics. */
2778 if (dump_enabled_p ())
2779 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2780 "reduction: unsafe int math optimization"
2781 " (overflow doesn't wrap): ");
2782 return NULL;
2785 else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2787 /* Changing the order of operations changes the semantics. */
2788 if (dump_enabled_p ())
2789 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2790 "reduction: unsafe fixed-point math optimization: ");
2791 return NULL;
2795 /* Reduction is safe. We're dealing with one of the following:
2796 1) integer arithmetic and no trapv
2797 2) floating point arithmetic, and special flags permit this optimization
2798 3) nested cycle (i.e., outer loop vectorization). */
2799 if (TREE_CODE (op1) == SSA_NAME)
2800 def1 = SSA_NAME_DEF_STMT (op1);
2802 if (TREE_CODE (op2) == SSA_NAME)
2803 def2 = SSA_NAME_DEF_STMT (op2);
2805 if (code != COND_EXPR
2806 && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2808 if (dump_enabled_p ())
2809 report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
2810 return NULL;
2813 /* Check that one def is the reduction def, defined by PHI,
2814 the other def is either defined in the loop ("vect_internal_def"),
2815 or it's an induction (defined by a loop-header phi-node). */
2817 if (def2 && def2 == phi
2818 && (code == COND_EXPR
2819 || !def1 || gimple_nop_p (def1)
2820 || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
2821 || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2822 && (is_gimple_assign (def1)
2823 || is_gimple_call (def1)
2824 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2825 == vect_induction_def
2826 || (gimple_code (def1) == GIMPLE_PHI
2827 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2828 == vect_internal_def
2829 && !is_loop_header_bb_p (gimple_bb (def1)))))))
2831 if (dump_enabled_p ())
2832 report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2833 return def_stmt;
2836 if (def1 && def1 == phi
2837 && (code == COND_EXPR
2838 || !def2 || gimple_nop_p (def2)
2839 || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
2840 || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2841 && (is_gimple_assign (def2)
2842 || is_gimple_call (def2)
2843 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2844 == vect_induction_def
2845 || (gimple_code (def2) == GIMPLE_PHI
2846 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2847 == vect_internal_def
2848 && !is_loop_header_bb_p (gimple_bb (def2)))))))
2850 if (check_reduction
2851 && orig_code != MINUS_EXPR)
2853 if (code == COND_EXPR)
2855 /* No current known use where this case would be useful. */
2856 if (dump_enabled_p ())
2857 report_vect_op (MSG_NOTE, def_stmt,
2858 "detected reduction: cannot currently swap "
2859 "operands for cond_expr");
2860 return NULL;
2863 /* Swap operands (just for simplicity - so that the rest of the code
2864 can assume that the reduction variable is always the last (second)
2865 argument). */
2866 if (dump_enabled_p ())
2867 report_vect_op (MSG_NOTE, def_stmt,
2868 "detected reduction: need to swap operands: ");
2870 swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2871 gimple_assign_rhs2_ptr (def_stmt));
2873 if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
2874 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2876 else
2878 if (dump_enabled_p ())
2879 report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2882 return def_stmt;
2885 /* Try to find SLP reduction chain. */
2886 if (check_reduction && code != COND_EXPR
2887 && vect_is_slp_reduction (loop_info, phi, def_stmt))
2889 if (dump_enabled_p ())
2890 report_vect_op (MSG_NOTE, def_stmt,
2891 "reduction: detected reduction chain: ");
2893 return def_stmt;
2896 if (dump_enabled_p ())
2897 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2898 "reduction: unknown pattern: ");
2900 return NULL;
2903 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2904 in-place if it enables detection of more reductions. Arguments
2905 as there. */
2907 gimple *
2908 vect_force_simple_reduction (loop_vec_info loop_info, gimple *phi,
2909 bool check_reduction, bool *double_reduc,
2910 bool need_wrapping_integral_overflow)
2912 enum vect_reduction_type v_reduc_type;
2913 return vect_is_simple_reduction (loop_info, phi, check_reduction,
2914 double_reduc,
2915 need_wrapping_integral_overflow,
2916 &v_reduc_type);
2919 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
2921 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2922 int *peel_iters_epilogue,
2923 stmt_vector_for_cost *scalar_cost_vec,
2924 stmt_vector_for_cost *prologue_cost_vec,
2925 stmt_vector_for_cost *epilogue_cost_vec)
2927 int retval = 0;
2928 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2930 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2932 *peel_iters_epilogue = vf/2;
2933 if (dump_enabled_p ())
2934 dump_printf_loc (MSG_NOTE, vect_location,
2935 "cost model: epilogue peel iters set to vf/2 "
2936 "because loop iterations are unknown .\n");
2938 /* If peeled iterations are known but number of scalar loop
2939 iterations are unknown, count a taken branch per peeled loop. */
2940 retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
2941 NULL, 0, vect_prologue);
2942 retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
2943 NULL, 0, vect_epilogue);
2945 else
2947 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2948 peel_iters_prologue = niters < peel_iters_prologue ?
2949 niters : peel_iters_prologue;
2950 *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2951 /* If we need to peel for gaps, but no peeling is required, we have to
2952 peel VF iterations. */
2953 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2954 *peel_iters_epilogue = vf;
2957 stmt_info_for_cost *si;
2958 int j;
2959 if (peel_iters_prologue)
2960 FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
2961 retval += record_stmt_cost (prologue_cost_vec,
2962 si->count * peel_iters_prologue,
2963 si->kind, NULL, si->misalign,
2964 vect_prologue);
2965 if (*peel_iters_epilogue)
2966 FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
2967 retval += record_stmt_cost (epilogue_cost_vec,
2968 si->count * *peel_iters_epilogue,
2969 si->kind, NULL, si->misalign,
2970 vect_epilogue);
2972 return retval;
2975 /* Function vect_estimate_min_profitable_iters
2977 Return the number of iterations required for the vector version of the
2978 loop to be profitable relative to the cost of the scalar version of the
2979 loop. */
2981 static void
2982 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
2983 int *ret_min_profitable_niters,
2984 int *ret_min_profitable_estimate)
2986 int min_profitable_iters;
2987 int min_profitable_estimate;
2988 int peel_iters_prologue;
2989 int peel_iters_epilogue;
2990 unsigned vec_inside_cost = 0;
2991 int vec_outside_cost = 0;
2992 unsigned vec_prologue_cost = 0;
2993 unsigned vec_epilogue_cost = 0;
2994 int scalar_single_iter_cost = 0;
2995 int scalar_outside_cost = 0;
2996 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2997 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2998 void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3000 /* Cost model disabled. */
3001 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
3003 dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
3004 *ret_min_profitable_niters = 0;
3005 *ret_min_profitable_estimate = 0;
3006 return;
3009 /* Requires loop versioning tests to handle misalignment. */
3010 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
3012 /* FIXME: Make cost depend on complexity of individual check. */
3013 unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
3014 (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
3015 vect_prologue);
3016 dump_printf (MSG_NOTE,
3017 "cost model: Adding cost of checks for loop "
3018 "versioning to treat misalignment.\n");
3021 /* Requires loop versioning with alias checks. */
3022 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3024 /* FIXME: Make cost depend on complexity of individual check. */
3025 unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
3026 (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
3027 vect_prologue);
3028 dump_printf (MSG_NOTE,
3029 "cost model: Adding cost of checks for loop "
3030 "versioning aliasing.\n");
3033 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3034 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3035 (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
3036 vect_prologue);
3038 /* Count statements in scalar loop. Using this as scalar cost for a single
3039 iteration for now.
3041 TODO: Add outer loop support.
3043 TODO: Consider assigning different costs to different scalar
3044 statements. */
3046 scalar_single_iter_cost
3047 = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo);
3049 /* Add additional cost for the peeled instructions in prologue and epilogue
3050 loop.
3052 FORNOW: If we don't know the value of peel_iters for prologue or epilogue
3053 at compile-time - we assume it's vf/2 (the worst would be vf-1).
3055 TODO: Build an expression that represents peel_iters for prologue and
3056 epilogue to be used in a run-time test. */
3058 if (npeel < 0)
3060 peel_iters_prologue = vf/2;
3061 dump_printf (MSG_NOTE, "cost model: "
3062 "prologue peel iters set to vf/2.\n");
3064 /* If peeling for alignment is unknown, loop bound of main loop becomes
3065 unknown. */
3066 peel_iters_epilogue = vf/2;
3067 dump_printf (MSG_NOTE, "cost model: "
3068 "epilogue peel iters set to vf/2 because "
3069 "peeling for alignment is unknown.\n");
3071 /* If peeled iterations are unknown, count a taken branch and a not taken
3072 branch per peeled loop. Even if scalar loop iterations are known,
3073 vector iterations are not known since peeled prologue iterations are
3074 not known. Hence guards remain the same. */
3075 (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3076 NULL, 0, vect_prologue);
3077 (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
3078 NULL, 0, vect_prologue);
3079 (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3080 NULL, 0, vect_epilogue);
3081 (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
3082 NULL, 0, vect_epilogue);
3083 stmt_info_for_cost *si;
3084 int j;
3085 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
3087 struct _stmt_vec_info *stmt_info
3088 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3089 (void) add_stmt_cost (target_cost_data,
3090 si->count * peel_iters_prologue,
3091 si->kind, stmt_info, si->misalign,
3092 vect_prologue);
3093 (void) add_stmt_cost (target_cost_data,
3094 si->count * peel_iters_epilogue,
3095 si->kind, stmt_info, si->misalign,
3096 vect_epilogue);
3099 else
3101 stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
3102 stmt_info_for_cost *si;
3103 int j;
3104 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3106 prologue_cost_vec.create (2);
3107 epilogue_cost_vec.create (2);
3108 peel_iters_prologue = npeel;
3110 (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
3111 &peel_iters_epilogue,
3112 &LOOP_VINFO_SCALAR_ITERATION_COST
3113 (loop_vinfo),
3114 &prologue_cost_vec,
3115 &epilogue_cost_vec);
3117 FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
3119 struct _stmt_vec_info *stmt_info
3120 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3121 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3122 si->misalign, vect_prologue);
3125 FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
3127 struct _stmt_vec_info *stmt_info
3128 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3129 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3130 si->misalign, vect_epilogue);
3133 prologue_cost_vec.release ();
3134 epilogue_cost_vec.release ();
3137 /* FORNOW: The scalar outside cost is incremented in one of the
3138 following ways:
3140 1. The vectorizer checks for alignment and aliasing and generates
3141 a condition that allows dynamic vectorization. A cost model
3142 check is ANDED with the versioning condition. Hence scalar code
3143 path now has the added cost of the versioning check.
3145 if (cost > th & versioning_check)
3146 jmp to vector code
3148 Hence run-time scalar is incremented by not-taken branch cost.
3150 2. The vectorizer then checks if a prologue is required. If the
3151 cost model check was not done before during versioning, it has to
3152 be done before the prologue check.
3154 if (cost <= th)
3155 prologue = scalar_iters
3156 if (prologue == 0)
3157 jmp to vector code
3158 else
3159 execute prologue
3160 if (prologue == num_iters)
3161 go to exit
3163 Hence the run-time scalar cost is incremented by a taken branch,
3164 plus a not-taken branch, plus a taken branch cost.
3166 3. The vectorizer then checks if an epilogue is required. If the
3167 cost model check was not done before during prologue check, it
3168 has to be done with the epilogue check.
3170 if (prologue == 0)
3171 jmp to vector code
3172 else
3173 execute prologue
3174 if (prologue == num_iters)
3175 go to exit
3176 vector code:
3177 if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
3178 jmp to epilogue
3180 Hence the run-time scalar cost should be incremented by 2 taken
3181 branches.
3183 TODO: The back end may reorder the BBS's differently and reverse
3184 conditions/branch directions. Change the estimates below to
3185 something more reasonable. */
3187 /* If the number of iterations is known and we do not do versioning, we can
3188 decide whether to vectorize at compile time. Hence the scalar version
3189 do not carry cost model guard costs. */
3190 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3191 || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3192 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3194 /* Cost model check occurs at versioning. */
3195 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3196 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3197 scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
3198 else
3200 /* Cost model check occurs at prologue generation. */
3201 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
3202 scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
3203 + vect_get_stmt_cost (cond_branch_not_taken);
3204 /* Cost model check occurs at epilogue generation. */
3205 else
3206 scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
3210 /* Complete the target-specific cost calculations. */
3211 finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
3212 &vec_inside_cost, &vec_epilogue_cost);
3214 vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
3216 if (dump_enabled_p ())
3218 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3219 dump_printf (MSG_NOTE, " Vector inside of loop cost: %d\n",
3220 vec_inside_cost);
3221 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n",
3222 vec_prologue_cost);
3223 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n",
3224 vec_epilogue_cost);
3225 dump_printf (MSG_NOTE, " Scalar iteration cost: %d\n",
3226 scalar_single_iter_cost);
3227 dump_printf (MSG_NOTE, " Scalar outside cost: %d\n",
3228 scalar_outside_cost);
3229 dump_printf (MSG_NOTE, " Vector outside cost: %d\n",
3230 vec_outside_cost);
3231 dump_printf (MSG_NOTE, " prologue iterations: %d\n",
3232 peel_iters_prologue);
3233 dump_printf (MSG_NOTE, " epilogue iterations: %d\n",
3234 peel_iters_epilogue);
3237 /* Calculate number of iterations required to make the vector version
3238 profitable, relative to the loop bodies only. The following condition
3239 must hold true:
3240 SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
3241 where
3242 SIC = scalar iteration cost, VIC = vector iteration cost,
3243 VOC = vector outside cost, VF = vectorization factor,
3244 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
3245 SOC = scalar outside cost for run time cost model check. */
3247 if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
3249 if (vec_outside_cost <= 0)
3250 min_profitable_iters = 1;
3251 else
3253 min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
3254 - vec_inside_cost * peel_iters_prologue
3255 - vec_inside_cost * peel_iters_epilogue)
3256 / ((scalar_single_iter_cost * vf)
3257 - vec_inside_cost);
3259 if ((scalar_single_iter_cost * vf * min_profitable_iters)
3260 <= (((int) vec_inside_cost * min_profitable_iters)
3261 + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
3262 min_profitable_iters++;
3265 /* vector version will never be profitable. */
3266 else
3268 if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
3269 warning_at (vect_location, OPT_Wopenmp_simd, "vectorization "
3270 "did not happen for a simd loop");
3272 if (dump_enabled_p ())
3273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3274 "cost model: the vector iteration cost = %d "
3275 "divided by the scalar iteration cost = %d "
3276 "is greater or equal to the vectorization factor = %d"
3277 ".\n",
3278 vec_inside_cost, scalar_single_iter_cost, vf);
3279 *ret_min_profitable_niters = -1;
3280 *ret_min_profitable_estimate = -1;
3281 return;
3284 dump_printf (MSG_NOTE,
3285 " Calculated minimum iters for profitability: %d\n",
3286 min_profitable_iters);
3288 min_profitable_iters =
3289 min_profitable_iters < vf ? vf : min_profitable_iters;
3291 /* Because the condition we create is:
3292 if (niters <= min_profitable_iters)
3293 then skip the vectorized loop. */
3294 min_profitable_iters--;
3296 if (dump_enabled_p ())
3297 dump_printf_loc (MSG_NOTE, vect_location,
3298 " Runtime profitability threshold = %d\n",
3299 min_profitable_iters);
3301 *ret_min_profitable_niters = min_profitable_iters;
3303 /* Calculate number of iterations required to make the vector version
3304 profitable, relative to the loop bodies only.
3306 Non-vectorized variant is SIC * niters and it must win over vector
3307 variant on the expected loop trip count. The following condition must hold true:
3308 SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC */
3310 if (vec_outside_cost <= 0)
3311 min_profitable_estimate = 1;
3312 else
3314 min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
3315 - vec_inside_cost * peel_iters_prologue
3316 - vec_inside_cost * peel_iters_epilogue)
3317 / ((scalar_single_iter_cost * vf)
3318 - vec_inside_cost);
3320 min_profitable_estimate --;
3321 min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
3322 if (dump_enabled_p ())
3323 dump_printf_loc (MSG_NOTE, vect_location,
3324 " Static estimate profitability threshold = %d\n",
3325 min_profitable_iters);
3327 *ret_min_profitable_estimate = min_profitable_estimate;
3330 /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
3331 vector elements (not bits) for a vector of mode MODE. */
3332 static void
3333 calc_vec_perm_mask_for_shift (enum machine_mode mode, unsigned int offset,
3334 unsigned char *sel)
3336 unsigned int i, nelt = GET_MODE_NUNITS (mode);
3338 for (i = 0; i < nelt; i++)
3339 sel[i] = (i + offset) & (2*nelt - 1);
3342 /* Checks whether the target supports whole-vector shifts for vectors of mode
3343 MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_
3344 it supports vec_perm_const with masks for all necessary shift amounts. */
3345 static bool
3346 have_whole_vector_shift (enum machine_mode mode)
3348 if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3349 return true;
3351 if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing)
3352 return false;
3354 unsigned int i, nelt = GET_MODE_NUNITS (mode);
3355 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3357 for (i = nelt/2; i >= 1; i/=2)
3359 calc_vec_perm_mask_for_shift (mode, i, sel);
3360 if (!can_vec_perm_p (mode, false, sel))
3361 return false;
3363 return true;
3366 /* Return the reduction operand (with index REDUC_INDEX) of STMT. */
3368 static tree
3369 get_reduction_op (gimple *stmt, int reduc_index)
3371 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3373 case GIMPLE_SINGLE_RHS:
3374 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3375 == ternary_op);
3376 return TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3377 case GIMPLE_UNARY_RHS:
3378 return gimple_assign_rhs1 (stmt);
3379 case GIMPLE_BINARY_RHS:
3380 return (reduc_index
3381 ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt));
3382 case GIMPLE_TERNARY_RHS:
3383 return gimple_op (stmt, reduc_index + 1);
3384 default:
3385 gcc_unreachable ();
3389 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
3390 functions. Design better to avoid maintenance issues. */
3392 /* Function vect_model_reduction_cost.
3394 Models cost for a reduction operation, including the vector ops
3395 generated within the strip-mine loop, the initial definition before
3396 the loop, and the epilogue code that must be generated. */
3398 static bool
3399 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
3400 int ncopies, int reduc_index)
3402 int prologue_cost = 0, epilogue_cost = 0;
3403 enum tree_code code;
3404 optab optab;
3405 tree vectype;
3406 gimple *stmt, *orig_stmt;
3407 tree reduction_op;
3408 machine_mode mode;
3409 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3410 struct loop *loop = NULL;
3411 void *target_cost_data;
3413 if (loop_vinfo)
3415 loop = LOOP_VINFO_LOOP (loop_vinfo);
3416 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3418 else
3419 target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
3421 /* Condition reductions generate two reductions in the loop. */
3422 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
3423 ncopies *= 2;
3425 /* Cost of reduction op inside loop. */
3426 unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3427 stmt_info, 0, vect_body);
3428 stmt = STMT_VINFO_STMT (stmt_info);
3430 reduction_op = get_reduction_op (stmt, reduc_index);
3432 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3433 if (!vectype)
3435 if (dump_enabled_p ())
3437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3438 "unsupported data-type ");
3439 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3440 TREE_TYPE (reduction_op));
3441 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3443 return false;
3446 mode = TYPE_MODE (vectype);
3447 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3449 if (!orig_stmt)
3450 orig_stmt = STMT_VINFO_STMT (stmt_info);
3452 code = gimple_assign_rhs_code (orig_stmt);
3454 /* Add in cost for initial definition.
3455 For cond reduction we have four vectors: initial index, step, initial
3456 result of the data reduction, initial value of the index reduction. */
3457 int prologue_stmts = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
3458 == COND_REDUCTION ? 4 : 1;
3459 prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts,
3460 scalar_to_vec, stmt_info, 0,
3461 vect_prologue);
3463 /* Determine cost of epilogue code.
3465 We have a reduction operator that will reduce the vector in one statement.
3466 Also requires scalar extract. */
3468 if (!loop || !nested_in_vect_loop_p (loop, orig_stmt))
3470 if (reduc_code != ERROR_MARK)
3472 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
3474 /* An EQ stmt and an COND_EXPR stmt. */
3475 epilogue_cost += add_stmt_cost (target_cost_data, 2,
3476 vector_stmt, stmt_info, 0,
3477 vect_epilogue);
3478 /* Reduction of the max index and a reduction of the found
3479 values. */
3480 epilogue_cost += add_stmt_cost (target_cost_data, 2,
3481 vec_to_scalar, stmt_info, 0,
3482 vect_epilogue);
3483 /* A broadcast of the max value. */
3484 epilogue_cost += add_stmt_cost (target_cost_data, 1,
3485 scalar_to_vec, stmt_info, 0,
3486 vect_epilogue);
3488 else
3490 epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3491 stmt_info, 0, vect_epilogue);
3492 epilogue_cost += add_stmt_cost (target_cost_data, 1,
3493 vec_to_scalar, stmt_info, 0,
3494 vect_epilogue);
3497 else
3499 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
3500 tree bitsize =
3501 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3502 int element_bitsize = tree_to_uhwi (bitsize);
3503 int nelements = vec_size_in_bits / element_bitsize;
3505 optab = optab_for_tree_code (code, vectype, optab_default);
3507 /* We have a whole vector shift available. */
3508 if (VECTOR_MODE_P (mode)
3509 && optab_handler (optab, mode) != CODE_FOR_nothing
3510 && have_whole_vector_shift (mode))
3512 /* Final reduction via vector shifts and the reduction operator.
3513 Also requires scalar extract. */
3514 epilogue_cost += add_stmt_cost (target_cost_data,
3515 exact_log2 (nelements) * 2,
3516 vector_stmt, stmt_info, 0,
3517 vect_epilogue);
3518 epilogue_cost += add_stmt_cost (target_cost_data, 1,
3519 vec_to_scalar, stmt_info, 0,
3520 vect_epilogue);
3522 else
3523 /* Use extracts and reduction op for final reduction. For N
3524 elements, we have N extracts and N-1 reduction ops. */
3525 epilogue_cost += add_stmt_cost (target_cost_data,
3526 nelements + nelements - 1,
3527 vector_stmt, stmt_info, 0,
3528 vect_epilogue);
3532 if (dump_enabled_p ())
3533 dump_printf (MSG_NOTE,
3534 "vect_model_reduction_cost: inside_cost = %d, "
3535 "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
3536 prologue_cost, epilogue_cost);
3538 return true;
3542 /* Function vect_model_induction_cost.
3544 Models cost for induction operations. */
3546 static void
3547 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3549 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3550 void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3551 unsigned inside_cost, prologue_cost;
3553 /* loop cost for vec_loop. */
3554 inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3555 stmt_info, 0, vect_body);
3557 /* prologue cost for vec_init and vec_step. */
3558 prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3559 stmt_info, 0, vect_prologue);
3561 if (dump_enabled_p ())
3562 dump_printf_loc (MSG_NOTE, vect_location,
3563 "vect_model_induction_cost: inside_cost = %d, "
3564 "prologue_cost = %d .\n", inside_cost, prologue_cost);
3568 /* Function get_initial_def_for_induction
3570 Input:
3571 STMT - a stmt that performs an induction operation in the loop.
3572 IV_PHI - the initial value of the induction variable
3574 Output:
3575 Return a vector variable, initialized with the first VF values of
3576 the induction variable. E.g., for an iv with IV_PHI='X' and
3577 evolution S, for a vector of 4 units, we want to return:
3578 [X, X + S, X + 2*S, X + 3*S]. */
3580 static tree
3581 get_initial_def_for_induction (gimple *iv_phi)
3583 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
3584 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3585 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3586 tree vectype;
3587 int nunits;
3588 edge pe = loop_preheader_edge (loop);
3589 struct loop *iv_loop;
3590 basic_block new_bb;
3591 tree new_vec, vec_init, vec_step, t;
3592 tree new_name;
3593 gimple *new_stmt;
3594 gphi *induction_phi;
3595 tree induc_def, vec_def, vec_dest;
3596 tree init_expr, step_expr;
3597 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3598 int i;
3599 int ncopies;
3600 tree expr;
3601 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3602 bool nested_in_vect_loop = false;
3603 gimple_seq stmts;
3604 imm_use_iterator imm_iter;
3605 use_operand_p use_p;
3606 gimple *exit_phi;
3607 edge latch_e;
3608 tree loop_arg;
3609 gimple_stmt_iterator si;
3610 basic_block bb = gimple_bb (iv_phi);
3611 tree stepvectype;
3612 tree resvectype;
3614 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
3615 if (nested_in_vect_loop_p (loop, iv_phi))
3617 nested_in_vect_loop = true;
3618 iv_loop = loop->inner;
3620 else
3621 iv_loop = loop;
3622 gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3624 latch_e = loop_latch_edge (iv_loop);
3625 loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3627 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
3628 gcc_assert (step_expr != NULL_TREE);
3630 pe = loop_preheader_edge (iv_loop);
3631 init_expr = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3632 loop_preheader_edge (iv_loop));
3634 vectype = get_vectype_for_scalar_type (TREE_TYPE (init_expr));
3635 resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3636 gcc_assert (vectype);
3637 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3638 ncopies = vf / nunits;
3640 gcc_assert (phi_info);
3641 gcc_assert (ncopies >= 1);
3643 /* Convert the step to the desired type. */
3644 stmts = NULL;
3645 step_expr = gimple_convert (&stmts, TREE_TYPE (vectype), step_expr);
3646 if (stmts)
3648 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3649 gcc_assert (!new_bb);
3652 /* Find the first insertion point in the BB. */
3653 si = gsi_after_labels (bb);
3655 /* Create the vector that holds the initial_value of the induction. */
3656 if (nested_in_vect_loop)
3658 /* iv_loop is nested in the loop to be vectorized. init_expr had already
3659 been created during vectorization of previous stmts. We obtain it
3660 from the STMT_VINFO_VEC_STMT of the defining stmt. */
3661 vec_init = vect_get_vec_def_for_operand (init_expr, iv_phi);
3662 /* If the initial value is not of proper type, convert it. */
3663 if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
3665 new_stmt
3666 = gimple_build_assign (vect_get_new_ssa_name (vectype,
3667 vect_simple_var,
3668 "vec_iv_"),
3669 VIEW_CONVERT_EXPR,
3670 build1 (VIEW_CONVERT_EXPR, vectype,
3671 vec_init));
3672 vec_init = gimple_assign_lhs (new_stmt);
3673 new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
3674 new_stmt);
3675 gcc_assert (!new_bb);
3676 set_vinfo_for_stmt (new_stmt,
3677 new_stmt_vec_info (new_stmt, loop_vinfo));
3680 else
3682 vec<constructor_elt, va_gc> *v;
3684 /* iv_loop is the loop to be vectorized. Create:
3685 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
3686 stmts = NULL;
3687 new_name = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr);
3689 vec_alloc (v, nunits);
3690 bool constant_p = is_gimple_min_invariant (new_name);
3691 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3692 for (i = 1; i < nunits; i++)
3694 /* Create: new_name_i = new_name + step_expr */
3695 new_name = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (new_name),
3696 new_name, step_expr);
3697 if (!is_gimple_min_invariant (new_name))
3698 constant_p = false;
3699 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3701 if (stmts)
3703 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3704 gcc_assert (!new_bb);
3707 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
3708 if (constant_p)
3709 new_vec = build_vector_from_ctor (vectype, v);
3710 else
3711 new_vec = build_constructor (vectype, v);
3712 vec_init = vect_init_vector (iv_phi, new_vec, vectype, NULL);
3716 /* Create the vector that holds the step of the induction. */
3717 if (nested_in_vect_loop)
3718 /* iv_loop is nested in the loop to be vectorized. Generate:
3719 vec_step = [S, S, S, S] */
3720 new_name = step_expr;
3721 else
3723 /* iv_loop is the loop to be vectorized. Generate:
3724 vec_step = [VF*S, VF*S, VF*S, VF*S] */
3725 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3727 expr = build_int_cst (integer_type_node, vf);
3728 expr = fold_convert (TREE_TYPE (step_expr), expr);
3730 else
3731 expr = build_int_cst (TREE_TYPE (step_expr), vf);
3732 new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3733 expr, step_expr);
3734 if (TREE_CODE (step_expr) == SSA_NAME)
3735 new_name = vect_init_vector (iv_phi, new_name,
3736 TREE_TYPE (step_expr), NULL);
3739 t = unshare_expr (new_name);
3740 gcc_assert (CONSTANT_CLASS_P (new_name)
3741 || TREE_CODE (new_name) == SSA_NAME);
3742 stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3743 gcc_assert (stepvectype);
3744 new_vec = build_vector_from_val (stepvectype, t);
3745 vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3748 /* Create the following def-use cycle:
3749 loop prolog:
3750 vec_init = ...
3751 vec_step = ...
3752 loop:
3753 vec_iv = PHI <vec_init, vec_loop>
3755 STMT
3757 vec_loop = vec_iv + vec_step; */
3759 /* Create the induction-phi that defines the induction-operand. */
3760 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3761 induction_phi = create_phi_node (vec_dest, iv_loop->header);
3762 set_vinfo_for_stmt (induction_phi,
3763 new_stmt_vec_info (induction_phi, loop_vinfo));
3764 induc_def = PHI_RESULT (induction_phi);
3766 /* Create the iv update inside the loop */
3767 new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR, induc_def, vec_step);
3768 vec_def = make_ssa_name (vec_dest, new_stmt);
3769 gimple_assign_set_lhs (new_stmt, vec_def);
3770 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3771 set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo));
3773 /* Set the arguments of the phi node: */
3774 add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3775 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3776 UNKNOWN_LOCATION);
3779 /* In case that vectorization factor (VF) is bigger than the number
3780 of elements that we can fit in a vectype (nunits), we have to generate
3781 more than one vector stmt - i.e - we need to "unroll" the
3782 vector stmt by a factor VF/nunits. For more details see documentation
3783 in vectorizable_operation. */
3785 if (ncopies > 1)
3787 stmt_vec_info prev_stmt_vinfo;
3788 /* FORNOW. This restriction should be relaxed. */
3789 gcc_assert (!nested_in_vect_loop);
3791 /* Create the vector that holds the step of the induction. */
3792 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3794 expr = build_int_cst (integer_type_node, nunits);
3795 expr = fold_convert (TREE_TYPE (step_expr), expr);
3797 else
3798 expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3799 new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3800 expr, step_expr);
3801 if (TREE_CODE (step_expr) == SSA_NAME)
3802 new_name = vect_init_vector (iv_phi, new_name,
3803 TREE_TYPE (step_expr), NULL);
3804 t = unshare_expr (new_name);
3805 gcc_assert (CONSTANT_CLASS_P (new_name)
3806 || TREE_CODE (new_name) == SSA_NAME);
3807 new_vec = build_vector_from_val (stepvectype, t);
3808 vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3810 vec_def = induc_def;
3811 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3812 for (i = 1; i < ncopies; i++)
3814 /* vec_i = vec_prev + vec_step */
3815 new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR,
3816 vec_def, vec_step);
3817 vec_def = make_ssa_name (vec_dest, new_stmt);
3818 gimple_assign_set_lhs (new_stmt, vec_def);
3820 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3821 if (!useless_type_conversion_p (resvectype, vectype))
3823 new_stmt
3824 = gimple_build_assign
3825 (vect_get_new_vect_var (resvectype, vect_simple_var,
3826 "vec_iv_"),
3827 VIEW_CONVERT_EXPR,
3828 build1 (VIEW_CONVERT_EXPR, resvectype,
3829 gimple_assign_lhs (new_stmt)));
3830 gimple_assign_set_lhs (new_stmt,
3831 make_ssa_name
3832 (gimple_assign_lhs (new_stmt), new_stmt));
3833 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3835 set_vinfo_for_stmt (new_stmt,
3836 new_stmt_vec_info (new_stmt, loop_vinfo));
3837 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3838 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3842 if (nested_in_vect_loop)
3844 /* Find the loop-closed exit-phi of the induction, and record
3845 the final vector of induction results: */
3846 exit_phi = NULL;
3847 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3849 gimple *use_stmt = USE_STMT (use_p);
3850 if (is_gimple_debug (use_stmt))
3851 continue;
3853 if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (use_stmt)))
3855 exit_phi = use_stmt;
3856 break;
3859 if (exit_phi)
3861 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3862 /* FORNOW. Currently not supporting the case that an inner-loop induction
3863 is not used in the outer-loop (i.e. only outside the outer-loop). */
3864 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3865 && !STMT_VINFO_LIVE_P (stmt_vinfo));
3867 STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3868 if (dump_enabled_p ())
3870 dump_printf_loc (MSG_NOTE, vect_location,
3871 "vector of inductions after inner-loop:");
3872 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
3873 dump_printf (MSG_NOTE, "\n");
3879 if (dump_enabled_p ())
3881 dump_printf_loc (MSG_NOTE, vect_location,
3882 "transform induction: created def-use cycle: ");
3883 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
3884 dump_printf (MSG_NOTE, "\n");
3885 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
3886 SSA_NAME_DEF_STMT (vec_def), 0);
3887 dump_printf (MSG_NOTE, "\n");
3890 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3891 if (!useless_type_conversion_p (resvectype, vectype))
3893 new_stmt = gimple_build_assign (vect_get_new_vect_var (resvectype,
3894 vect_simple_var,
3895 "vec_iv_"),
3896 VIEW_CONVERT_EXPR,
3897 build1 (VIEW_CONVERT_EXPR, resvectype,
3898 induc_def));
3899 induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3900 gimple_assign_set_lhs (new_stmt, induc_def);
3901 si = gsi_after_labels (bb);
3902 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3903 set_vinfo_for_stmt (new_stmt,
3904 new_stmt_vec_info (new_stmt, loop_vinfo));
3905 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3906 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3909 return induc_def;
3913 /* Function get_initial_def_for_reduction
3915 Input:
3916 STMT - a stmt that performs a reduction operation in the loop.
3917 INIT_VAL - the initial value of the reduction variable
3919 Output:
3920 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3921 of the reduction (used for adjusting the epilog - see below).
3922 Return a vector variable, initialized according to the operation that STMT
3923 performs. This vector will be used as the initial value of the
3924 vector of partial results.
3926 Option1 (adjust in epilog): Initialize the vector as follows:
3927 add/bit or/xor: [0,0,...,0,0]
3928 mult/bit and: [1,1,...,1,1]
3929 min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3930 and when necessary (e.g. add/mult case) let the caller know
3931 that it needs to adjust the result by init_val.
3933 Option2: Initialize the vector as follows:
3934 add/bit or/xor: [init_val,0,0,...,0]
3935 mult/bit and: [init_val,1,1,...,1]
3936 min/max/cond_expr: [init_val,init_val,...,init_val]
3937 and no adjustments are needed.
3939 For example, for the following code:
3941 s = init_val;
3942 for (i=0;i<n;i++)
3943 s = s + a[i];
3945 STMT is 's = s + a[i]', and the reduction variable is 's'.
3946 For a vector of 4 units, we want to return either [0,0,0,init_val],
3947 or [0,0,0,0] and let the caller know that it needs to adjust
3948 the result at the end by 'init_val'.
3950 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3951 initialization vector is simpler (same element in all entries), if
3952 ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3954 A cost model should help decide between these two schemes. */
3956 tree
3957 get_initial_def_for_reduction (gimple *stmt, tree init_val,
3958 tree *adjustment_def)
3960 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3961 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3962 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3963 tree scalar_type = TREE_TYPE (init_val);
3964 tree vectype = get_vectype_for_scalar_type (scalar_type);
3965 int nunits;
3966 enum tree_code code = gimple_assign_rhs_code (stmt);
3967 tree def_for_init;
3968 tree init_def;
3969 tree *elts;
3970 int i;
3971 bool nested_in_vect_loop = false;
3972 tree init_value;
3973 REAL_VALUE_TYPE real_init_val = dconst0;
3974 int int_init_val = 0;
3975 gimple *def_stmt = NULL;
3977 gcc_assert (vectype);
3978 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3980 gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3981 || SCALAR_FLOAT_TYPE_P (scalar_type));
3983 if (nested_in_vect_loop_p (loop, stmt))
3984 nested_in_vect_loop = true;
3985 else
3986 gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3988 /* In case of double reduction we only create a vector variable to be put
3989 in the reduction phi node. The actual statement creation is done in
3990 vect_create_epilog_for_reduction. */
3991 if (adjustment_def && nested_in_vect_loop
3992 && TREE_CODE (init_val) == SSA_NAME
3993 && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3994 && gimple_code (def_stmt) == GIMPLE_PHI
3995 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3996 && vinfo_for_stmt (def_stmt)
3997 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3998 == vect_double_reduction_def)
4000 *adjustment_def = NULL;
4001 return vect_create_destination_var (init_val, vectype);
4004 if (TREE_CONSTANT (init_val))
4006 if (SCALAR_FLOAT_TYPE_P (scalar_type))
4007 init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
4008 else
4009 init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
4011 else
4012 init_value = init_val;
4014 switch (code)
4016 case WIDEN_SUM_EXPR:
4017 case DOT_PROD_EXPR:
4018 case SAD_EXPR:
4019 case PLUS_EXPR:
4020 case MINUS_EXPR:
4021 case BIT_IOR_EXPR:
4022 case BIT_XOR_EXPR:
4023 case MULT_EXPR:
4024 case BIT_AND_EXPR:
4025 /* ADJUSMENT_DEF is NULL when called from
4026 vect_create_epilog_for_reduction to vectorize double reduction. */
4027 if (adjustment_def)
4029 if (nested_in_vect_loop)
4030 *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt);
4031 else
4032 *adjustment_def = init_val;
4035 if (code == MULT_EXPR)
4037 real_init_val = dconst1;
4038 int_init_val = 1;
4041 if (code == BIT_AND_EXPR)
4042 int_init_val = -1;
4044 if (SCALAR_FLOAT_TYPE_P (scalar_type))
4045 def_for_init = build_real (scalar_type, real_init_val);
4046 else
4047 def_for_init = build_int_cst (scalar_type, int_init_val);
4049 /* Create a vector of '0' or '1' except the first element. */
4050 elts = XALLOCAVEC (tree, nunits);
4051 for (i = nunits - 2; i >= 0; --i)
4052 elts[i + 1] = def_for_init;
4054 /* Option1: the first element is '0' or '1' as well. */
4055 if (adjustment_def)
4057 elts[0] = def_for_init;
4058 init_def = build_vector (vectype, elts);
4059 break;
4062 /* Option2: the first element is INIT_VAL. */
4063 elts[0] = init_val;
4064 if (TREE_CONSTANT (init_val))
4065 init_def = build_vector (vectype, elts);
4066 else
4068 vec<constructor_elt, va_gc> *v;
4069 vec_alloc (v, nunits);
4070 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
4071 for (i = 1; i < nunits; ++i)
4072 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
4073 init_def = build_constructor (vectype, v);
4076 break;
4078 case MIN_EXPR:
4079 case MAX_EXPR:
4080 case COND_EXPR:
4081 if (adjustment_def)
4083 *adjustment_def = NULL_TREE;
4084 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo) != COND_REDUCTION)
4086 init_def = vect_get_vec_def_for_operand (init_val, stmt);
4087 break;
4090 init_def = build_vector_from_val (vectype, init_value);
4091 break;
4093 default:
4094 gcc_unreachable ();
4097 return init_def;
4100 /* Function vect_create_epilog_for_reduction
4102 Create code at the loop-epilog to finalize the result of a reduction
4103 computation.
4105 VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
4106 reduction statements.
4107 STMT is the scalar reduction stmt that is being vectorized.
4108 NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
4109 number of elements that we can fit in a vectype (nunits). In this case
4110 we have to generate more than one vector stmt - i.e - we need to "unroll"
4111 the vector stmt by a factor VF/nunits. For more details see documentation
4112 in vectorizable_operation.
4113 REDUC_CODE is the tree-code for the epilog reduction.
4114 REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
4115 computation.
4116 REDUC_INDEX is the index of the operand in the right hand side of the
4117 statement that is defined by REDUCTION_PHI.
4118 DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
4119 SLP_NODE is an SLP node containing a group of reduction statements. The
4120 first one in this group is STMT.
4121 INDUCTION_INDEX is the index of the loop for condition reductions.
4122 Otherwise it is undefined.
4124 This function:
4125 1. Creates the reduction def-use cycles: sets the arguments for
4126 REDUCTION_PHIS:
4127 The loop-entry argument is the vectorized initial-value of the reduction.
4128 The loop-latch argument is taken from VECT_DEFS - the vector of partial
4129 sums.
4130 2. "Reduces" each vector of partial results VECT_DEFS into a single result,
4131 by applying the operation specified by REDUC_CODE if available, or by
4132 other means (whole-vector shifts or a scalar loop).
4133 The function also creates a new phi node at the loop exit to preserve
4134 loop-closed form, as illustrated below.
4136 The flow at the entry to this function:
4138 loop:
4139 vec_def = phi <null, null> # REDUCTION_PHI
4140 VECT_DEF = vector_stmt # vectorized form of STMT
4141 s_loop = scalar_stmt # (scalar) STMT
4142 loop_exit:
4143 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4144 use <s_out0>
4145 use <s_out0>
4147 The above is transformed by this function into:
4149 loop:
4150 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
4151 VECT_DEF = vector_stmt # vectorized form of STMT
4152 s_loop = scalar_stmt # (scalar) STMT
4153 loop_exit:
4154 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4155 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4156 v_out2 = reduce <v_out1>
4157 s_out3 = extract_field <v_out2, 0>
4158 s_out4 = adjust_result <s_out3>
4159 use <s_out4>
4160 use <s_out4>
4163 static void
4164 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
4165 int ncopies, enum tree_code reduc_code,
4166 vec<gimple *> reduction_phis,
4167 int reduc_index, bool double_reduc,
4168 slp_tree slp_node, tree induction_index)
4170 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4171 stmt_vec_info prev_phi_info;
4172 tree vectype;
4173 machine_mode mode;
4174 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4175 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
4176 basic_block exit_bb;
4177 tree scalar_dest;
4178 tree scalar_type;
4179 gimple *new_phi = NULL, *phi;
4180 gimple_stmt_iterator exit_gsi;
4181 tree vec_dest;
4182 tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
4183 gimple *epilog_stmt = NULL;
4184 enum tree_code code = gimple_assign_rhs_code (stmt);
4185 gimple *exit_phi;
4186 tree bitsize;
4187 tree adjustment_def = NULL;
4188 tree vec_initial_def = NULL;
4189 tree reduction_op, expr, def, initial_def = NULL;
4190 tree orig_name, scalar_result;
4191 imm_use_iterator imm_iter, phi_imm_iter;
4192 use_operand_p use_p, phi_use_p;
4193 gimple *use_stmt, *orig_stmt, *reduction_phi = NULL;
4194 bool nested_in_vect_loop = false;
4195 auto_vec<gimple *> new_phis;
4196 auto_vec<gimple *> inner_phis;
4197 enum vect_def_type dt = vect_unknown_def_type;
4198 int j, i;
4199 auto_vec<tree> scalar_results;
4200 unsigned int group_size = 1, k, ratio;
4201 auto_vec<tree> vec_initial_defs;
4202 auto_vec<gimple *> phis;
4203 bool slp_reduc = false;
4204 tree new_phi_result;
4205 gimple *inner_phi = NULL;
4207 if (slp_node)
4208 group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
4210 if (nested_in_vect_loop_p (loop, stmt))
4212 outer_loop = loop;
4213 loop = loop->inner;
4214 nested_in_vect_loop = true;
4215 gcc_assert (!slp_node);
4218 reduction_op = get_reduction_op (stmt, reduc_index);
4220 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
4221 gcc_assert (vectype);
4222 mode = TYPE_MODE (vectype);
4224 /* 1. Create the reduction def-use cycle:
4225 Set the arguments of REDUCTION_PHIS, i.e., transform
4227 loop:
4228 vec_def = phi <null, null> # REDUCTION_PHI
4229 VECT_DEF = vector_stmt # vectorized form of STMT
4232 into:
4234 loop:
4235 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
4236 VECT_DEF = vector_stmt # vectorized form of STMT
4239 (in case of SLP, do it for all the phis). */
4241 /* Get the loop-entry arguments. */
4242 if (slp_node)
4243 vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
4244 NULL, slp_node, reduc_index);
4245 else
4247 /* Get at the scalar def before the loop, that defines the initial value
4248 of the reduction variable. */
4249 gimple *def_stmt = SSA_NAME_DEF_STMT (reduction_op);
4250 initial_def = PHI_ARG_DEF_FROM_EDGE (def_stmt,
4251 loop_preheader_edge (loop));
4252 vec_initial_defs.create (1);
4253 vec_initial_def = get_initial_def_for_reduction (stmt, initial_def,
4254 &adjustment_def);
4255 vec_initial_defs.quick_push (vec_initial_def);
4258 /* Set phi nodes arguments. */
4259 FOR_EACH_VEC_ELT (reduction_phis, i, phi)
4261 tree vec_init_def, def;
4262 gimple_seq stmts;
4263 vec_init_def = force_gimple_operand (vec_initial_defs[i], &stmts,
4264 true, NULL_TREE);
4265 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4266 def = vect_defs[i];
4267 for (j = 0; j < ncopies; j++)
4269 /* Set the loop-entry arg of the reduction-phi. */
4271 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4272 == INTEGER_INDUC_COND_REDUCTION)
4274 /* Initialise the reduction phi to zero. This prevents initial
4275 values of non-zero interferring with the reduction op. */
4276 gcc_assert (ncopies == 1);
4277 gcc_assert (i == 0);
4279 tree vec_init_def_type = TREE_TYPE (vec_init_def);
4280 tree zero_vec = build_zero_cst (vec_init_def_type);
4282 add_phi_arg (as_a <gphi *> (phi), zero_vec,
4283 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4285 else
4286 add_phi_arg (as_a <gphi *> (phi), vec_init_def,
4287 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4289 /* Set the loop-latch arg for the reduction-phi. */
4290 if (j > 0)
4291 def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
4293 add_phi_arg (as_a <gphi *> (phi), def, loop_latch_edge (loop),
4294 UNKNOWN_LOCATION);
4296 if (dump_enabled_p ())
4298 dump_printf_loc (MSG_NOTE, vect_location,
4299 "transform reduction: created def-use cycle: ");
4300 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
4301 dump_printf (MSG_NOTE, "\n");
4302 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
4303 dump_printf (MSG_NOTE, "\n");
4306 phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4310 /* 2. Create epilog code.
4311 The reduction epilog code operates across the elements of the vector
4312 of partial results computed by the vectorized loop.
4313 The reduction epilog code consists of:
4315 step 1: compute the scalar result in a vector (v_out2)
4316 step 2: extract the scalar result (s_out3) from the vector (v_out2)
4317 step 3: adjust the scalar result (s_out3) if needed.
4319 Step 1 can be accomplished using one the following three schemes:
4320 (scheme 1) using reduc_code, if available.
4321 (scheme 2) using whole-vector shifts, if available.
4322 (scheme 3) using a scalar loop. In this case steps 1+2 above are
4323 combined.
4325 The overall epilog code looks like this:
4327 s_out0 = phi <s_loop> # original EXIT_PHI
4328 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4329 v_out2 = reduce <v_out1> # step 1
4330 s_out3 = extract_field <v_out2, 0> # step 2
4331 s_out4 = adjust_result <s_out3> # step 3
4333 (step 3 is optional, and steps 1 and 2 may be combined).
4334 Lastly, the uses of s_out0 are replaced by s_out4. */
4337 /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
4338 v_out1 = phi <VECT_DEF>
4339 Store them in NEW_PHIS. */
4341 exit_bb = single_exit (loop)->dest;
4342 prev_phi_info = NULL;
4343 new_phis.create (vect_defs.length ());
4344 FOR_EACH_VEC_ELT (vect_defs, i, def)
4346 for (j = 0; j < ncopies; j++)
4348 tree new_def = copy_ssa_name (def);
4349 phi = create_phi_node (new_def, exit_bb);
4350 set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo));
4351 if (j == 0)
4352 new_phis.quick_push (phi);
4353 else
4355 def = vect_get_vec_def_for_stmt_copy (dt, def);
4356 STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
4359 SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
4360 prev_phi_info = vinfo_for_stmt (phi);
4364 /* The epilogue is created for the outer-loop, i.e., for the loop being
4365 vectorized. Create exit phis for the outer loop. */
4366 if (double_reduc)
4368 loop = outer_loop;
4369 exit_bb = single_exit (loop)->dest;
4370 inner_phis.create (vect_defs.length ());
4371 FOR_EACH_VEC_ELT (new_phis, i, phi)
4373 tree new_result = copy_ssa_name (PHI_RESULT (phi));
4374 gphi *outer_phi = create_phi_node (new_result, exit_bb);
4375 SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4376 PHI_RESULT (phi));
4377 set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4378 loop_vinfo));
4379 inner_phis.quick_push (phi);
4380 new_phis[i] = outer_phi;
4381 prev_phi_info = vinfo_for_stmt (outer_phi);
4382 while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
4384 phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4385 new_result = copy_ssa_name (PHI_RESULT (phi));
4386 outer_phi = create_phi_node (new_result, exit_bb);
4387 SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4388 PHI_RESULT (phi));
4389 set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4390 loop_vinfo));
4391 STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
4392 prev_phi_info = vinfo_for_stmt (outer_phi);
4397 exit_gsi = gsi_after_labels (exit_bb);
4399 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
4400 (i.e. when reduc_code is not available) and in the final adjustment
4401 code (if needed). Also get the original scalar reduction variable as
4402 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
4403 represents a reduction pattern), the tree-code and scalar-def are
4404 taken from the original stmt that the pattern-stmt (STMT) replaces.
4405 Otherwise (it is a regular reduction) - the tree-code and scalar-def
4406 are taken from STMT. */
4408 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4409 if (!orig_stmt)
4411 /* Regular reduction */
4412 orig_stmt = stmt;
4414 else
4416 /* Reduction pattern */
4417 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
4418 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
4419 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4422 code = gimple_assign_rhs_code (orig_stmt);
4423 /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4424 partial results are added and not subtracted. */
4425 if (code == MINUS_EXPR)
4426 code = PLUS_EXPR;
4428 scalar_dest = gimple_assign_lhs (orig_stmt);
4429 scalar_type = TREE_TYPE (scalar_dest);
4430 scalar_results.create (group_size);
4431 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
4432 bitsize = TYPE_SIZE (scalar_type);
4434 /* In case this is a reduction in an inner-loop while vectorizing an outer
4435 loop - we don't need to extract a single scalar result at the end of the
4436 inner-loop (unless it is double reduction, i.e., the use of reduction is
4437 outside the outer-loop). The final vector of partial results will be used
4438 in the vectorized outer-loop, or reduced to a scalar result at the end of
4439 the outer-loop. */
4440 if (nested_in_vect_loop && !double_reduc)
4441 goto vect_finalize_reduction;
4443 /* SLP reduction without reduction chain, e.g.,
4444 # a1 = phi <a2, a0>
4445 # b1 = phi <b2, b0>
4446 a2 = operation (a1)
4447 b2 = operation (b1) */
4448 slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
4450 /* In case of reduction chain, e.g.,
4451 # a1 = phi <a3, a0>
4452 a2 = operation (a1)
4453 a3 = operation (a2),
4455 we may end up with more than one vector result. Here we reduce them to
4456 one vector. */
4457 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4459 tree first_vect = PHI_RESULT (new_phis[0]);
4460 tree tmp;
4461 gassign *new_vec_stmt = NULL;
4463 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4464 for (k = 1; k < new_phis.length (); k++)
4466 gimple *next_phi = new_phis[k];
4467 tree second_vect = PHI_RESULT (next_phi);
4469 tmp = build2 (code, vectype, first_vect, second_vect);
4470 new_vec_stmt = gimple_build_assign (vec_dest, tmp);
4471 first_vect = make_ssa_name (vec_dest, new_vec_stmt);
4472 gimple_assign_set_lhs (new_vec_stmt, first_vect);
4473 gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4476 new_phi_result = first_vect;
4477 if (new_vec_stmt)
4479 new_phis.truncate (0);
4480 new_phis.safe_push (new_vec_stmt);
4483 else
4484 new_phi_result = PHI_RESULT (new_phis[0]);
4486 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
4488 /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
4489 various data values where the condition matched and another vector
4490 (INDUCTION_INDEX) containing all the indexes of those matches. We
4491 need to extract the last matching index (which will be the index with
4492 highest value) and use this to index into the data vector.
4493 For the case where there were no matches, the data vector will contain
4494 all default values and the index vector will be all zeros. */
4496 /* Get various versions of the type of the vector of indexes. */
4497 tree index_vec_type = TREE_TYPE (induction_index);
4498 gcc_checking_assert (TYPE_UNSIGNED (index_vec_type));
4499 tree index_scalar_type = TREE_TYPE (index_vec_type);
4500 tree index_vec_cmp_type = build_same_sized_truth_vector_type
4501 (index_vec_type);
4503 /* Get an unsigned integer version of the type of the data vector. */
4504 int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
4505 tree scalar_type_unsigned = make_unsigned_type (scalar_precision);
4506 tree vectype_unsigned = build_vector_type
4507 (scalar_type_unsigned, TYPE_VECTOR_SUBPARTS (vectype));
4509 /* First we need to create a vector (ZERO_VEC) of zeros and another
4510 vector (MAX_INDEX_VEC) filled with the last matching index, which we
4511 can create using a MAX reduction and then expanding.
4512 In the case where the loop never made any matches, the max index will
4513 be zero. */
4515 /* Vector of {0, 0, 0,...}. */
4516 tree zero_vec = make_ssa_name (vectype);
4517 tree zero_vec_rhs = build_zero_cst (vectype);
4518 gimple *zero_vec_stmt = gimple_build_assign (zero_vec, zero_vec_rhs);
4519 gsi_insert_before (&exit_gsi, zero_vec_stmt, GSI_SAME_STMT);
4521 /* Find maximum value from the vector of found indexes. */
4522 tree max_index = make_ssa_name (index_scalar_type);
4523 gimple *max_index_stmt = gimple_build_assign (max_index, REDUC_MAX_EXPR,
4524 induction_index);
4525 gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT);
4527 /* Vector of {max_index, max_index, max_index,...}. */
4528 tree max_index_vec = make_ssa_name (index_vec_type);
4529 tree max_index_vec_rhs = build_vector_from_val (index_vec_type,
4530 max_index);
4531 gimple *max_index_vec_stmt = gimple_build_assign (max_index_vec,
4532 max_index_vec_rhs);
4533 gsi_insert_before (&exit_gsi, max_index_vec_stmt, GSI_SAME_STMT);
4535 /* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes
4536 with the vector (INDUCTION_INDEX) of found indexes, choosing values
4537 from the data vector (NEW_PHI_RESULT) for matches, 0 (ZERO_VEC)
4538 otherwise. Only one value should match, resulting in a vector
4539 (VEC_COND) with one data value and the rest zeros.
4540 In the case where the loop never made any matches, every index will
4541 match, resulting in a vector with all data values (which will all be
4542 the default value). */
4544 /* Compare the max index vector to the vector of found indexes to find
4545 the position of the max value. */
4546 tree vec_compare = make_ssa_name (index_vec_cmp_type);
4547 gimple *vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR,
4548 induction_index,
4549 max_index_vec);
4550 gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT);
4552 /* Use the compare to choose either values from the data vector or
4553 zero. */
4554 tree vec_cond = make_ssa_name (vectype);
4555 gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR,
4556 vec_compare, new_phi_result,
4557 zero_vec);
4558 gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT);
4560 /* Finally we need to extract the data value from the vector (VEC_COND)
4561 into a scalar (MATCHED_DATA_REDUC). Logically we want to do a OR
4562 reduction, but because this doesn't exist, we can use a MAX reduction
4563 instead. The data value might be signed or a float so we need to cast
4564 it first.
4565 In the case where the loop never made any matches, the data values are
4566 all identical, and so will reduce down correctly. */
4568 /* Make the matched data values unsigned. */
4569 tree vec_cond_cast = make_ssa_name (vectype_unsigned);
4570 tree vec_cond_cast_rhs = build1 (VIEW_CONVERT_EXPR, vectype_unsigned,
4571 vec_cond);
4572 gimple *vec_cond_cast_stmt = gimple_build_assign (vec_cond_cast,
4573 VIEW_CONVERT_EXPR,
4574 vec_cond_cast_rhs);
4575 gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT);
4577 /* Reduce down to a scalar value. */
4578 tree data_reduc = make_ssa_name (scalar_type_unsigned);
4579 optab ot = optab_for_tree_code (REDUC_MAX_EXPR, vectype_unsigned,
4580 optab_default);
4581 gcc_assert (optab_handler (ot, TYPE_MODE (vectype_unsigned))
4582 != CODE_FOR_nothing);
4583 gimple *data_reduc_stmt = gimple_build_assign (data_reduc,
4584 REDUC_MAX_EXPR,
4585 vec_cond_cast);
4586 gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT);
4588 /* Convert the reduced value back to the result type and set as the
4589 result. */
4590 tree data_reduc_cast = build1 (VIEW_CONVERT_EXPR, scalar_type,
4591 data_reduc);
4592 epilog_stmt = gimple_build_assign (new_scalar_dest, data_reduc_cast);
4593 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4594 gimple_assign_set_lhs (epilog_stmt, new_temp);
4595 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4596 scalar_results.safe_push (new_temp);
4599 /* 2.3 Create the reduction code, using one of the three schemes described
4600 above. In SLP we simply need to extract all the elements from the
4601 vector (without reducing them), so we use scalar shifts. */
4602 else if (reduc_code != ERROR_MARK && !slp_reduc)
4604 tree tmp;
4605 tree vec_elem_type;
4607 /*** Case 1: Create:
4608 v_out2 = reduc_expr <v_out1> */
4610 if (dump_enabled_p ())
4611 dump_printf_loc (MSG_NOTE, vect_location,
4612 "Reduce using direct vector reduction.\n");
4614 vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
4615 if (!useless_type_conversion_p (scalar_type, vec_elem_type))
4617 tree tmp_dest =
4618 vect_create_destination_var (scalar_dest, vec_elem_type);
4619 tmp = build1 (reduc_code, vec_elem_type, new_phi_result);
4620 epilog_stmt = gimple_build_assign (tmp_dest, tmp);
4621 new_temp = make_ssa_name (tmp_dest, epilog_stmt);
4622 gimple_assign_set_lhs (epilog_stmt, new_temp);
4623 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4625 tmp = build1 (NOP_EXPR, scalar_type, new_temp);
4627 else
4628 tmp = build1 (reduc_code, scalar_type, new_phi_result);
4630 epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
4631 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4632 gimple_assign_set_lhs (epilog_stmt, new_temp);
4633 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4635 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4636 == INTEGER_INDUC_COND_REDUCTION)
4638 /* Earlier we set the initial value to be zero. Check the result
4639 and if it is zero then replace with the original initial
4640 value. */
4641 tree zero = build_zero_cst (scalar_type);
4642 tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero);
4644 tmp = make_ssa_name (new_scalar_dest);
4645 epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
4646 initial_def, new_temp);
4647 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4648 new_temp = tmp;
4651 scalar_results.safe_push (new_temp);
4653 else
4655 bool reduce_with_shift = have_whole_vector_shift (mode);
4656 int element_bitsize = tree_to_uhwi (bitsize);
4657 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4658 tree vec_temp;
4660 /* Regardless of whether we have a whole vector shift, if we're
4661 emulating the operation via tree-vect-generic, we don't want
4662 to use it. Only the first round of the reduction is likely
4663 to still be profitable via emulation. */
4664 /* ??? It might be better to emit a reduction tree code here, so that
4665 tree-vect-generic can expand the first round via bit tricks. */
4666 if (!VECTOR_MODE_P (mode))
4667 reduce_with_shift = false;
4668 else
4670 optab optab = optab_for_tree_code (code, vectype, optab_default);
4671 if (optab_handler (optab, mode) == CODE_FOR_nothing)
4672 reduce_with_shift = false;
4675 if (reduce_with_shift && !slp_reduc)
4677 int nelements = vec_size_in_bits / element_bitsize;
4678 unsigned char *sel = XALLOCAVEC (unsigned char, nelements);
4680 int elt_offset;
4682 tree zero_vec = build_zero_cst (vectype);
4683 /*** Case 2: Create:
4684 for (offset = nelements/2; offset >= 1; offset/=2)
4686 Create: va' = vec_shift <va, offset>
4687 Create: va = vop <va, va'>
4688 } */
4690 tree rhs;
4692 if (dump_enabled_p ())
4693 dump_printf_loc (MSG_NOTE, vect_location,
4694 "Reduce using vector shifts\n");
4696 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4697 new_temp = new_phi_result;
4698 for (elt_offset = nelements / 2;
4699 elt_offset >= 1;
4700 elt_offset /= 2)
4702 calc_vec_perm_mask_for_shift (mode, elt_offset, sel);
4703 tree mask = vect_gen_perm_mask_any (vectype, sel);
4704 epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
4705 new_temp, zero_vec, mask);
4706 new_name = make_ssa_name (vec_dest, epilog_stmt);
4707 gimple_assign_set_lhs (epilog_stmt, new_name);
4708 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4710 epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
4711 new_temp);
4712 new_temp = make_ssa_name (vec_dest, epilog_stmt);
4713 gimple_assign_set_lhs (epilog_stmt, new_temp);
4714 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4717 /* 2.4 Extract the final scalar result. Create:
4718 s_out3 = extract_field <v_out2, bitpos> */
4720 if (dump_enabled_p ())
4721 dump_printf_loc (MSG_NOTE, vect_location,
4722 "extract scalar result\n");
4724 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp,
4725 bitsize, bitsize_zero_node);
4726 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4727 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4728 gimple_assign_set_lhs (epilog_stmt, new_temp);
4729 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4730 scalar_results.safe_push (new_temp);
4732 else
4734 /*** Case 3: Create:
4735 s = extract_field <v_out2, 0>
4736 for (offset = element_size;
4737 offset < vector_size;
4738 offset += element_size;)
4740 Create: s' = extract_field <v_out2, offset>
4741 Create: s = op <s, s'> // For non SLP cases
4742 } */
4744 if (dump_enabled_p ())
4745 dump_printf_loc (MSG_NOTE, vect_location,
4746 "Reduce using scalar code.\n");
4748 vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4749 FOR_EACH_VEC_ELT (new_phis, i, new_phi)
4751 int bit_offset;
4752 if (gimple_code (new_phi) == GIMPLE_PHI)
4753 vec_temp = PHI_RESULT (new_phi);
4754 else
4755 vec_temp = gimple_assign_lhs (new_phi);
4756 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
4757 bitsize_zero_node);
4758 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4759 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4760 gimple_assign_set_lhs (epilog_stmt, new_temp);
4761 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4763 /* In SLP we don't need to apply reduction operation, so we just
4764 collect s' values in SCALAR_RESULTS. */
4765 if (slp_reduc)
4766 scalar_results.safe_push (new_temp);
4768 for (bit_offset = element_bitsize;
4769 bit_offset < vec_size_in_bits;
4770 bit_offset += element_bitsize)
4772 tree bitpos = bitsize_int (bit_offset);
4773 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
4774 bitsize, bitpos);
4776 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4777 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
4778 gimple_assign_set_lhs (epilog_stmt, new_name);
4779 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4781 if (slp_reduc)
4783 /* In SLP we don't need to apply reduction operation, so
4784 we just collect s' values in SCALAR_RESULTS. */
4785 new_temp = new_name;
4786 scalar_results.safe_push (new_name);
4788 else
4790 epilog_stmt = gimple_build_assign (new_scalar_dest, code,
4791 new_name, new_temp);
4792 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4793 gimple_assign_set_lhs (epilog_stmt, new_temp);
4794 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4799 /* The only case where we need to reduce scalar results in SLP, is
4800 unrolling. If the size of SCALAR_RESULTS is greater than
4801 GROUP_SIZE, we reduce them combining elements modulo
4802 GROUP_SIZE. */
4803 if (slp_reduc)
4805 tree res, first_res, new_res;
4806 gimple *new_stmt;
4808 /* Reduce multiple scalar results in case of SLP unrolling. */
4809 for (j = group_size; scalar_results.iterate (j, &res);
4810 j++)
4812 first_res = scalar_results[j % group_size];
4813 new_stmt = gimple_build_assign (new_scalar_dest, code,
4814 first_res, res);
4815 new_res = make_ssa_name (new_scalar_dest, new_stmt);
4816 gimple_assign_set_lhs (new_stmt, new_res);
4817 gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4818 scalar_results[j % group_size] = new_res;
4821 else
4822 /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
4823 scalar_results.safe_push (new_temp);
4827 vect_finalize_reduction:
4829 if (double_reduc)
4830 loop = loop->inner;
4832 /* 2.5 Adjust the final result by the initial value of the reduction
4833 variable. (When such adjustment is not needed, then
4834 'adjustment_def' is zero). For example, if code is PLUS we create:
4835 new_temp = loop_exit_def + adjustment_def */
4837 if (adjustment_def)
4839 gcc_assert (!slp_reduc);
4840 if (nested_in_vect_loop)
4842 new_phi = new_phis[0];
4843 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4844 expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4845 new_dest = vect_create_destination_var (scalar_dest, vectype);
4847 else
4849 new_temp = scalar_results[0];
4850 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4851 expr = build2 (code, scalar_type, new_temp, adjustment_def);
4852 new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4855 epilog_stmt = gimple_build_assign (new_dest, expr);
4856 new_temp = make_ssa_name (new_dest, epilog_stmt);
4857 gimple_assign_set_lhs (epilog_stmt, new_temp);
4858 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4859 if (nested_in_vect_loop)
4861 set_vinfo_for_stmt (epilog_stmt,
4862 new_stmt_vec_info (epilog_stmt, loop_vinfo));
4863 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4864 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4866 if (!double_reduc)
4867 scalar_results.quick_push (new_temp);
4868 else
4869 scalar_results[0] = new_temp;
4871 else
4872 scalar_results[0] = new_temp;
4874 new_phis[0] = epilog_stmt;
4877 /* 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit
4878 phis with new adjusted scalar results, i.e., replace use <s_out0>
4879 with use <s_out4>.
4881 Transform:
4882 loop_exit:
4883 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4884 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4885 v_out2 = reduce <v_out1>
4886 s_out3 = extract_field <v_out2, 0>
4887 s_out4 = adjust_result <s_out3>
4888 use <s_out0>
4889 use <s_out0>
4891 into:
4893 loop_exit:
4894 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4895 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4896 v_out2 = reduce <v_out1>
4897 s_out3 = extract_field <v_out2, 0>
4898 s_out4 = adjust_result <s_out3>
4899 use <s_out4>
4900 use <s_out4> */
4903 /* In SLP reduction chain we reduce vector results into one vector if
4904 necessary, hence we set here GROUP_SIZE to 1. SCALAR_DEST is the LHS of
4905 the last stmt in the reduction chain, since we are looking for the loop
4906 exit phi node. */
4907 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4909 gimple *dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
4910 /* Handle reduction patterns. */
4911 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
4912 dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
4914 scalar_dest = gimple_assign_lhs (dest_stmt);
4915 group_size = 1;
4918 /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4919 case that GROUP_SIZE is greater than vectorization factor). Therefore, we
4920 need to match SCALAR_RESULTS with corresponding statements. The first
4921 (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4922 the first vector stmt, etc.
4923 (RATIO is equal to (GROUP_SIZE / number of new vector stmts)). */
4924 if (group_size > new_phis.length ())
4926 ratio = group_size / new_phis.length ();
4927 gcc_assert (!(group_size % new_phis.length ()));
4929 else
4930 ratio = 1;
4932 for (k = 0; k < group_size; k++)
4934 if (k % ratio == 0)
4936 epilog_stmt = new_phis[k / ratio];
4937 reduction_phi = reduction_phis[k / ratio];
4938 if (double_reduc)
4939 inner_phi = inner_phis[k / ratio];
4942 if (slp_reduc)
4944 gimple *current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
4946 orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4947 /* SLP statements can't participate in patterns. */
4948 gcc_assert (!orig_stmt);
4949 scalar_dest = gimple_assign_lhs (current_stmt);
4952 phis.create (3);
4953 /* Find the loop-closed-use at the loop exit of the original scalar
4954 result. (The reduction result is expected to have two immediate uses -
4955 one at the latch block, and one at the loop exit). */
4956 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4957 if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
4958 && !is_gimple_debug (USE_STMT (use_p)))
4959 phis.safe_push (USE_STMT (use_p));
4961 /* While we expect to have found an exit_phi because of loop-closed-ssa
4962 form we can end up without one if the scalar cycle is dead. */
4964 FOR_EACH_VEC_ELT (phis, i, exit_phi)
4966 if (outer_loop)
4968 stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4969 gphi *vect_phi;
4971 /* FORNOW. Currently not supporting the case that an inner-loop
4972 reduction is not used in the outer-loop (but only outside the
4973 outer-loop), unless it is double reduction. */
4974 gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4975 && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4976 || double_reduc);
4978 if (double_reduc)
4979 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
4980 else
4981 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4982 if (!double_reduc
4983 || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4984 != vect_double_reduction_def)
4985 continue;
4987 /* Handle double reduction:
4989 stmt1: s1 = phi <s0, s2> - double reduction phi (outer loop)
4990 stmt2: s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4991 stmt3: s4 = use (s3) - (regular) reduc stmt (inner loop)
4992 stmt4: s2 = phi <s4> - double reduction stmt (outer loop)
4994 At that point the regular reduction (stmt2 and stmt3) is
4995 already vectorized, as well as the exit phi node, stmt4.
4996 Here we vectorize the phi node of double reduction, stmt1, and
4997 update all relevant statements. */
4999 /* Go through all the uses of s2 to find double reduction phi
5000 node, i.e., stmt1 above. */
5001 orig_name = PHI_RESULT (exit_phi);
5002 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
5004 stmt_vec_info use_stmt_vinfo;
5005 stmt_vec_info new_phi_vinfo;
5006 tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
5007 basic_block bb = gimple_bb (use_stmt);
5008 gimple *use;
5010 /* Check that USE_STMT is really double reduction phi
5011 node. */
5012 if (gimple_code (use_stmt) != GIMPLE_PHI
5013 || gimple_phi_num_args (use_stmt) != 2
5014 || bb->loop_father != outer_loop)
5015 continue;
5016 use_stmt_vinfo = vinfo_for_stmt (use_stmt);
5017 if (!use_stmt_vinfo
5018 || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
5019 != vect_double_reduction_def)
5020 continue;
5022 /* Create vector phi node for double reduction:
5023 vs1 = phi <vs0, vs2>
5024 vs1 was created previously in this function by a call to
5025 vect_get_vec_def_for_operand and is stored in
5026 vec_initial_def;
5027 vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
5028 vs0 is created here. */
5030 /* Create vector phi node. */
5031 vect_phi = create_phi_node (vec_initial_def, bb);
5032 new_phi_vinfo = new_stmt_vec_info (vect_phi,
5033 loop_vec_info_for_loop (outer_loop));
5034 set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
5036 /* Create vs0 - initial def of the double reduction phi. */
5037 preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
5038 loop_preheader_edge (outer_loop));
5039 init_def = get_initial_def_for_reduction (stmt,
5040 preheader_arg, NULL);
5041 vect_phi_init = vect_init_vector (use_stmt, init_def,
5042 vectype, NULL);
5044 /* Update phi node arguments with vs0 and vs2. */
5045 add_phi_arg (vect_phi, vect_phi_init,
5046 loop_preheader_edge (outer_loop),
5047 UNKNOWN_LOCATION);
5048 add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
5049 loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
5050 if (dump_enabled_p ())
5052 dump_printf_loc (MSG_NOTE, vect_location,
5053 "created double reduction phi node: ");
5054 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
5055 dump_printf (MSG_NOTE, "\n");
5058 vect_phi_res = PHI_RESULT (vect_phi);
5060 /* Replace the use, i.e., set the correct vs1 in the regular
5061 reduction phi node. FORNOW, NCOPIES is always 1, so the
5062 loop is redundant. */
5063 use = reduction_phi;
5064 for (j = 0; j < ncopies; j++)
5066 edge pr_edge = loop_preheader_edge (loop);
5067 SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
5068 use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
5074 phis.release ();
5075 if (nested_in_vect_loop)
5077 if (double_reduc)
5078 loop = outer_loop;
5079 else
5080 continue;
5083 phis.create (3);
5084 /* Find the loop-closed-use at the loop exit of the original scalar
5085 result. (The reduction result is expected to have two immediate uses,
5086 one at the latch block, and one at the loop exit). For double
5087 reductions we are looking for exit phis of the outer loop. */
5088 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5090 if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
5092 if (!is_gimple_debug (USE_STMT (use_p)))
5093 phis.safe_push (USE_STMT (use_p));
5095 else
5097 if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
5099 tree phi_res = PHI_RESULT (USE_STMT (use_p));
5101 FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
5103 if (!flow_bb_inside_loop_p (loop,
5104 gimple_bb (USE_STMT (phi_use_p)))
5105 && !is_gimple_debug (USE_STMT (phi_use_p)))
5106 phis.safe_push (USE_STMT (phi_use_p));
5112 FOR_EACH_VEC_ELT (phis, i, exit_phi)
5114 /* Replace the uses: */
5115 orig_name = PHI_RESULT (exit_phi);
5116 scalar_result = scalar_results[k];
5117 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
5118 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
5119 SET_USE (use_p, scalar_result);
5122 phis.release ();
5127 /* Function is_nonwrapping_integer_induction.
5129 Check if STMT (which is part of loop LOOP) both increments and
5130 does not cause overflow. */
5132 static bool
5133 is_nonwrapping_integer_induction (gimple *stmt, struct loop *loop)
5135 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
5136 tree base = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo);
5137 tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo);
5138 tree lhs_type = TREE_TYPE (gimple_phi_result (stmt));
5139 widest_int ni, max_loop_value, lhs_max;
5140 bool overflow = false;
5142 /* Make sure the loop is integer based. */
5143 if (TREE_CODE (base) != INTEGER_CST
5144 || TREE_CODE (step) != INTEGER_CST)
5145 return false;
5147 /* Check that the induction increments. */
5148 if (tree_int_cst_sgn (step) == -1)
5149 return false;
5151 /* Check that the max size of the loop will not wrap. */
5153 if (TYPE_OVERFLOW_UNDEFINED (lhs_type))
5154 return true;
5156 if (! max_stmt_executions (loop, &ni))
5157 return false;
5159 max_loop_value = wi::mul (wi::to_widest (step), ni, TYPE_SIGN (lhs_type),
5160 &overflow);
5161 if (overflow)
5162 return false;
5164 max_loop_value = wi::add (wi::to_widest (base), max_loop_value,
5165 TYPE_SIGN (lhs_type), &overflow);
5166 if (overflow)
5167 return false;
5169 return (wi::min_precision (max_loop_value, TYPE_SIGN (lhs_type))
5170 <= TYPE_PRECISION (lhs_type));
5173 /* Function vectorizable_reduction.
5175 Check if STMT performs a reduction operation that can be vectorized.
5176 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5177 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5178 Return FALSE if not a vectorizable STMT, TRUE otherwise.
5180 This function also handles reduction idioms (patterns) that have been
5181 recognized in advance during vect_pattern_recog. In this case, STMT may be
5182 of this form:
5183 X = pattern_expr (arg0, arg1, ..., X)
5184 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
5185 sequence that had been detected and replaced by the pattern-stmt (STMT).
5187 This function also handles reduction of condition expressions, for example:
5188 for (int i = 0; i < N; i++)
5189 if (a[i] < value)
5190 last = a[i];
5191 This is handled by vectorising the loop and creating an additional vector
5192 containing the loop indexes for which "a[i] < value" was true. In the
5193 function epilogue this is reduced to a single max value and then used to
5194 index into the vector of results.
5196 In some cases of reduction patterns, the type of the reduction variable X is
5197 different than the type of the other arguments of STMT.
5198 In such cases, the vectype that is used when transforming STMT into a vector
5199 stmt is different than the vectype that is used to determine the
5200 vectorization factor, because it consists of a different number of elements
5201 than the actual number of elements that are being operated upon in parallel.
5203 For example, consider an accumulation of shorts into an int accumulator.
5204 On some targets it's possible to vectorize this pattern operating on 8
5205 shorts at a time (hence, the vectype for purposes of determining the
5206 vectorization factor should be V8HI); on the other hand, the vectype that
5207 is used to create the vector form is actually V4SI (the type of the result).
5209 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
5210 indicates what is the actual level of parallelism (V8HI in the example), so
5211 that the right vectorization factor would be derived. This vectype
5212 corresponds to the type of arguments to the reduction stmt, and should *NOT*
5213 be used to create the vectorized stmt. The right vectype for the vectorized
5214 stmt is obtained from the type of the result X:
5215 get_vectype_for_scalar_type (TREE_TYPE (X))
5217 This means that, contrary to "regular" reductions (or "regular" stmts in
5218 general), the following equation:
5219 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
5220 does *NOT* necessarily hold for reduction patterns. */
5222 bool
5223 vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
5224 gimple **vec_stmt, slp_tree slp_node)
5226 tree vec_dest;
5227 tree scalar_dest;
5228 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
5229 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5230 tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5231 tree vectype_in = NULL_TREE;
5232 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5233 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5234 enum tree_code code, orig_code, epilog_reduc_code;
5235 machine_mode vec_mode;
5236 int op_type;
5237 optab optab, reduc_optab;
5238 tree new_temp = NULL_TREE;
5239 gimple *def_stmt;
5240 enum vect_def_type dt;
5241 gphi *new_phi = NULL;
5242 tree scalar_type;
5243 bool is_simple_use;
5244 gimple *orig_stmt;
5245 stmt_vec_info orig_stmt_info;
5246 tree expr = NULL_TREE;
5247 int i;
5248 int ncopies;
5249 int epilog_copies;
5250 stmt_vec_info prev_stmt_info, prev_phi_info;
5251 bool single_defuse_cycle = false;
5252 tree reduc_def = NULL_TREE;
5253 gimple *new_stmt = NULL;
5254 int j;
5255 tree ops[3];
5256 bool nested_cycle = false, found_nested_cycle_def = false;
5257 gimple *reduc_def_stmt = NULL;
5258 bool double_reduc = false, dummy;
5259 basic_block def_bb;
5260 struct loop * def_stmt_loop, *outer_loop = NULL;
5261 tree def_arg;
5262 gimple *def_arg_stmt;
5263 auto_vec<tree> vec_oprnds0;
5264 auto_vec<tree> vec_oprnds1;
5265 auto_vec<tree> vect_defs;
5266 auto_vec<gimple *> phis;
5267 int vec_num;
5268 tree def0, def1, tem, op0, op1 = NULL_TREE;
5269 bool first_p = true;
5270 tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
5271 gimple *cond_expr_induction_def_stmt = NULL;
5273 /* In case of reduction chain we switch to the first stmt in the chain, but
5274 we don't update STMT_INFO, since only the last stmt is marked as reduction
5275 and has reduction properties. */
5276 if (GROUP_FIRST_ELEMENT (stmt_info)
5277 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
5279 stmt = GROUP_FIRST_ELEMENT (stmt_info);
5280 first_p = false;
5283 if (nested_in_vect_loop_p (loop, stmt))
5285 outer_loop = loop;
5286 loop = loop->inner;
5287 nested_cycle = true;
5290 /* 1. Is vectorizable reduction? */
5291 /* Not supportable if the reduction variable is used in the loop, unless
5292 it's a reduction chain. */
5293 if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
5294 && !GROUP_FIRST_ELEMENT (stmt_info))
5295 return false;
5297 /* Reductions that are not used even in an enclosing outer-loop,
5298 are expected to be "live" (used out of the loop). */
5299 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
5300 && !STMT_VINFO_LIVE_P (stmt_info))
5301 return false;
5303 /* Make sure it was already recognized as a reduction computation. */
5304 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
5305 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
5306 return false;
5308 /* 2. Has this been recognized as a reduction pattern?
5310 Check if STMT represents a pattern that has been recognized
5311 in earlier analysis stages. For stmts that represent a pattern,
5312 the STMT_VINFO_RELATED_STMT field records the last stmt in
5313 the original sequence that constitutes the pattern. */
5315 orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
5316 if (orig_stmt)
5318 orig_stmt_info = vinfo_for_stmt (orig_stmt);
5319 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
5320 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
5323 /* 3. Check the operands of the operation. The first operands are defined
5324 inside the loop body. The last operand is the reduction variable,
5325 which is defined by the loop-header-phi. */
5327 gcc_assert (is_gimple_assign (stmt));
5329 /* Flatten RHS. */
5330 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
5332 case GIMPLE_SINGLE_RHS:
5333 op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
5334 if (op_type == ternary_op)
5336 tree rhs = gimple_assign_rhs1 (stmt);
5337 ops[0] = TREE_OPERAND (rhs, 0);
5338 ops[1] = TREE_OPERAND (rhs, 1);
5339 ops[2] = TREE_OPERAND (rhs, 2);
5340 code = TREE_CODE (rhs);
5342 else
5343 return false;
5344 break;
5346 case GIMPLE_BINARY_RHS:
5347 code = gimple_assign_rhs_code (stmt);
5348 op_type = TREE_CODE_LENGTH (code);
5349 gcc_assert (op_type == binary_op);
5350 ops[0] = gimple_assign_rhs1 (stmt);
5351 ops[1] = gimple_assign_rhs2 (stmt);
5352 break;
5354 case GIMPLE_TERNARY_RHS:
5355 code = gimple_assign_rhs_code (stmt);
5356 op_type = TREE_CODE_LENGTH (code);
5357 gcc_assert (op_type == ternary_op);
5358 ops[0] = gimple_assign_rhs1 (stmt);
5359 ops[1] = gimple_assign_rhs2 (stmt);
5360 ops[2] = gimple_assign_rhs3 (stmt);
5361 break;
5363 case GIMPLE_UNARY_RHS:
5364 return false;
5366 default:
5367 gcc_unreachable ();
5369 /* The default is that the reduction variable is the last in statement. */
5370 int reduc_index = op_type - 1;
5371 if (code == MINUS_EXPR)
5372 reduc_index = 0;
5374 if (code == COND_EXPR && slp_node)
5375 return false;
5377 scalar_dest = gimple_assign_lhs (stmt);
5378 scalar_type = TREE_TYPE (scalar_dest);
5379 if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
5380 && !SCALAR_FLOAT_TYPE_P (scalar_type))
5381 return false;
5383 /* Do not try to vectorize bit-precision reductions. */
5384 if ((TYPE_PRECISION (scalar_type)
5385 != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
5386 return false;
5388 /* All uses but the last are expected to be defined in the loop.
5389 The last use is the reduction variable. In case of nested cycle this
5390 assumption is not true: we use reduc_index to record the index of the
5391 reduction variable. */
5392 for (i = 0; i < op_type; i++)
5394 if (i == reduc_index)
5395 continue;
5397 /* The condition of COND_EXPR is checked in vectorizable_condition(). */
5398 if (i == 0 && code == COND_EXPR)
5399 continue;
5401 is_simple_use = vect_is_simple_use (ops[i], loop_vinfo,
5402 &def_stmt, &dt, &tem);
5403 if (!vectype_in)
5404 vectype_in = tem;
5405 gcc_assert (is_simple_use);
5407 if (dt != vect_internal_def
5408 && dt != vect_external_def
5409 && dt != vect_constant_def
5410 && dt != vect_induction_def
5411 && !(dt == vect_nested_cycle && nested_cycle))
5412 return false;
5414 if (dt == vect_nested_cycle)
5416 found_nested_cycle_def = true;
5417 reduc_def_stmt = def_stmt;
5418 reduc_index = i;
5421 if (i == 1 && code == COND_EXPR && dt == vect_induction_def)
5422 cond_expr_induction_def_stmt = def_stmt;
5425 is_simple_use = vect_is_simple_use (ops[reduc_index], loop_vinfo,
5426 &def_stmt, &dt, &tem);
5427 if (!vectype_in)
5428 vectype_in = tem;
5429 gcc_assert (is_simple_use);
5430 if (!found_nested_cycle_def)
5431 reduc_def_stmt = def_stmt;
5433 if (reduc_def_stmt && gimple_code (reduc_def_stmt) != GIMPLE_PHI)
5434 return false;
5436 if (!(dt == vect_reduction_def
5437 || dt == vect_nested_cycle
5438 || ((dt == vect_internal_def || dt == vect_external_def
5439 || dt == vect_constant_def || dt == vect_induction_def)
5440 && nested_cycle && found_nested_cycle_def)))
5442 /* For pattern recognized stmts, orig_stmt might be a reduction,
5443 but some helper statements for the pattern might not, or
5444 might be COND_EXPRs with reduction uses in the condition. */
5445 gcc_assert (orig_stmt);
5446 return false;
5449 enum vect_reduction_type v_reduc_type;
5450 gimple *tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
5451 !nested_cycle, &dummy, false,
5452 &v_reduc_type);
5454 /* If we have a condition reduction, see if we can simplify it further. */
5455 if (v_reduc_type == COND_REDUCTION
5456 && cond_expr_induction_def_stmt != NULL
5457 && is_nonwrapping_integer_induction (cond_expr_induction_def_stmt, loop))
5459 if (dump_enabled_p ())
5460 dump_printf_loc (MSG_NOTE, vect_location,
5461 "condition expression based on integer induction.\n");
5462 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = INTEGER_INDUC_COND_REDUCTION;
5464 else
5465 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type;
5467 if (orig_stmt)
5468 gcc_assert (tmp == orig_stmt
5469 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
5470 else
5471 /* We changed STMT to be the first stmt in reduction chain, hence we
5472 check that in this case the first element in the chain is STMT. */
5473 gcc_assert (stmt == tmp
5474 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
5476 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
5477 return false;
5479 if (slp_node || PURE_SLP_STMT (stmt_info))
5480 ncopies = 1;
5481 else
5482 ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5483 / TYPE_VECTOR_SUBPARTS (vectype_in));
5485 gcc_assert (ncopies >= 1);
5487 vec_mode = TYPE_MODE (vectype_in);
5489 if (code == COND_EXPR)
5491 /* Only call during the analysis stage, otherwise we'll lose
5492 STMT_VINFO_TYPE. */
5493 if (!vec_stmt && !vectorizable_condition (stmt, gsi, NULL,
5494 ops[reduc_index], 0, NULL))
5496 if (dump_enabled_p ())
5497 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5498 "unsupported condition in reduction\n");
5499 return false;
5502 else
5504 /* 4. Supportable by target? */
5506 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
5507 || code == LROTATE_EXPR || code == RROTATE_EXPR)
5509 /* Shifts and rotates are only supported by vectorizable_shifts,
5510 not vectorizable_reduction. */
5511 if (dump_enabled_p ())
5512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5513 "unsupported shift or rotation.\n");
5514 return false;
5517 /* 4.1. check support for the operation in the loop */
5518 optab = optab_for_tree_code (code, vectype_in, optab_default);
5519 if (!optab)
5521 if (dump_enabled_p ())
5522 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5523 "no optab.\n");
5525 return false;
5528 if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5530 if (dump_enabled_p ())
5531 dump_printf (MSG_NOTE, "op not supported by target.\n");
5533 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5534 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5535 < vect_min_worthwhile_factor (code))
5536 return false;
5538 if (dump_enabled_p ())
5539 dump_printf (MSG_NOTE, "proceeding using word mode.\n");
5542 /* Worthwhile without SIMD support? */
5543 if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
5544 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5545 < vect_min_worthwhile_factor (code))
5547 if (dump_enabled_p ())
5548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5549 "not worthwhile without SIMD support.\n");
5551 return false;
5555 /* 4.2. Check support for the epilog operation.
5557 If STMT represents a reduction pattern, then the type of the
5558 reduction variable may be different than the type of the rest
5559 of the arguments. For example, consider the case of accumulation
5560 of shorts into an int accumulator; The original code:
5561 S1: int_a = (int) short_a;
5562 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
5564 was replaced with:
5565 STMT: int_acc = widen_sum <short_a, int_acc>
5567 This means that:
5568 1. The tree-code that is used to create the vector operation in the
5569 epilog code (that reduces the partial results) is not the
5570 tree-code of STMT, but is rather the tree-code of the original
5571 stmt from the pattern that STMT is replacing. I.e, in the example
5572 above we want to use 'widen_sum' in the loop, but 'plus' in the
5573 epilog.
5574 2. The type (mode) we use to check available target support
5575 for the vector operation to be created in the *epilog*, is
5576 determined by the type of the reduction variable (in the example
5577 above we'd check this: optab_handler (plus_optab, vect_int_mode])).
5578 However the type (mode) we use to check available target support
5579 for the vector operation to be created *inside the loop*, is
5580 determined by the type of the other arguments to STMT (in the
5581 example we'd check this: optab_handler (widen_sum_optab,
5582 vect_short_mode)).
5584 This is contrary to "regular" reductions, in which the types of all
5585 the arguments are the same as the type of the reduction variable.
5586 For "regular" reductions we can therefore use the same vector type
5587 (and also the same tree-code) when generating the epilog code and
5588 when generating the code inside the loop. */
5590 if (orig_stmt)
5592 /* This is a reduction pattern: get the vectype from the type of the
5593 reduction variable, and get the tree-code from orig_stmt. */
5594 gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5595 == TREE_CODE_REDUCTION);
5596 orig_code = gimple_assign_rhs_code (orig_stmt);
5597 gcc_assert (vectype_out);
5598 vec_mode = TYPE_MODE (vectype_out);
5600 else
5602 /* Regular reduction: use the same vectype and tree-code as used for
5603 the vector code inside the loop can be used for the epilog code. */
5604 orig_code = code;
5606 if (code == MINUS_EXPR)
5607 orig_code = PLUS_EXPR;
5609 /* For simple condition reductions, replace with the actual expression
5610 we want to base our reduction around. */
5611 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5612 == INTEGER_INDUC_COND_REDUCTION)
5613 orig_code = MAX_EXPR;
5616 if (nested_cycle)
5618 def_bb = gimple_bb (reduc_def_stmt);
5619 def_stmt_loop = def_bb->loop_father;
5620 def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
5621 loop_preheader_edge (def_stmt_loop));
5622 if (TREE_CODE (def_arg) == SSA_NAME
5623 && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
5624 && gimple_code (def_arg_stmt) == GIMPLE_PHI
5625 && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
5626 && vinfo_for_stmt (def_arg_stmt)
5627 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
5628 == vect_double_reduction_def)
5629 double_reduc = true;
5632 epilog_reduc_code = ERROR_MARK;
5634 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION
5635 || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5636 == INTEGER_INDUC_COND_REDUCTION)
5638 if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
5640 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
5641 optab_default);
5642 if (!reduc_optab)
5644 if (dump_enabled_p ())
5645 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5646 "no optab for reduction.\n");
5648 epilog_reduc_code = ERROR_MARK;
5650 else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
5652 optab = scalar_reduc_to_vector (reduc_optab, vectype_out);
5653 if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5655 if (dump_enabled_p ())
5656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5657 "reduc op not supported by target.\n");
5659 epilog_reduc_code = ERROR_MARK;
5663 /* When epilog_reduc_code is ERROR_MARK then a reduction will be
5664 generated in the epilog using multiple expressions. This does not
5665 work for condition reductions. */
5666 if (epilog_reduc_code == ERROR_MARK
5667 && STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5668 == INTEGER_INDUC_COND_REDUCTION)
5670 if (dump_enabled_p ())
5671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5672 "no reduc code for scalar code.\n");
5673 return false;
5676 else
5678 if (!nested_cycle || double_reduc)
5680 if (dump_enabled_p ())
5681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5682 "no reduc code for scalar code.\n");
5684 return false;
5688 else
5690 int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
5691 cr_index_scalar_type = make_unsigned_type (scalar_precision);
5692 cr_index_vector_type = build_vector_type
5693 (cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out));
5695 epilog_reduc_code = REDUC_MAX_EXPR;
5696 optab = optab_for_tree_code (REDUC_MAX_EXPR, cr_index_vector_type,
5697 optab_default);
5698 if (optab_handler (optab, TYPE_MODE (cr_index_vector_type))
5699 == CODE_FOR_nothing)
5701 if (dump_enabled_p ())
5702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5703 "reduc max op not supported by target.\n");
5704 return false;
5708 if ((double_reduc
5709 || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
5710 || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5711 == INTEGER_INDUC_COND_REDUCTION)
5712 && ncopies > 1)
5714 if (dump_enabled_p ())
5715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5716 "multiple types in double reduction or condition "
5717 "reduction.\n");
5718 return false;
5721 /* In case of widenning multiplication by a constant, we update the type
5722 of the constant to be the type of the other operand. We check that the
5723 constant fits the type in the pattern recognition pass. */
5724 if (code == DOT_PROD_EXPR
5725 && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
5727 if (TREE_CODE (ops[0]) == INTEGER_CST)
5728 ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
5729 else if (TREE_CODE (ops[1]) == INTEGER_CST)
5730 ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
5731 else
5733 if (dump_enabled_p ())
5734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5735 "invalid types in dot-prod\n");
5737 return false;
5741 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
5743 widest_int ni;
5745 if (! max_loop_iterations (loop, &ni))
5747 if (dump_enabled_p ())
5748 dump_printf_loc (MSG_NOTE, vect_location,
5749 "loop count not known, cannot create cond "
5750 "reduction.\n");
5751 return false;
5753 /* Convert backedges to iterations. */
5754 ni += 1;
5756 /* The additional index will be the same type as the condition. Check
5757 that the loop can fit into this less one (because we'll use up the
5758 zero slot for when there are no matches). */
5759 tree max_index = TYPE_MAX_VALUE (cr_index_scalar_type);
5760 if (wi::geu_p (ni, wi::to_widest (max_index)))
5762 if (dump_enabled_p ())
5763 dump_printf_loc (MSG_NOTE, vect_location,
5764 "loop size is greater than data size.\n");
5765 return false;
5769 if (!vec_stmt) /* transformation not required. */
5771 if (first_p
5772 && !vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
5773 reduc_index))
5774 return false;
5775 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
5776 return true;
5779 /** Transform. **/
5781 if (dump_enabled_p ())
5782 dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
5784 /* FORNOW: Multiple types are not supported for condition. */
5785 if (code == COND_EXPR)
5786 gcc_assert (ncopies == 1);
5788 /* Create the destination vector */
5789 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5791 /* In case the vectorization factor (VF) is bigger than the number
5792 of elements that we can fit in a vectype (nunits), we have to generate
5793 more than one vector stmt - i.e - we need to "unroll" the
5794 vector stmt by a factor VF/nunits. For more details see documentation
5795 in vectorizable_operation. */
5797 /* If the reduction is used in an outer loop we need to generate
5798 VF intermediate results, like so (e.g. for ncopies=2):
5799 r0 = phi (init, r0)
5800 r1 = phi (init, r1)
5801 r0 = x0 + r0;
5802 r1 = x1 + r1;
5803 (i.e. we generate VF results in 2 registers).
5804 In this case we have a separate def-use cycle for each copy, and therefore
5805 for each copy we get the vector def for the reduction variable from the
5806 respective phi node created for this copy.
5808 Otherwise (the reduction is unused in the loop nest), we can combine
5809 together intermediate results, like so (e.g. for ncopies=2):
5810 r = phi (init, r)
5811 r = x0 + r;
5812 r = x1 + r;
5813 (i.e. we generate VF/2 results in a single register).
5814 In this case for each copy we get the vector def for the reduction variable
5815 from the vectorized reduction operation generated in the previous iteration.
5818 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
5820 single_defuse_cycle = true;
5821 epilog_copies = 1;
5823 else
5824 epilog_copies = ncopies;
5826 prev_stmt_info = NULL;
5827 prev_phi_info = NULL;
5828 if (slp_node)
5829 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5830 else
5832 vec_num = 1;
5833 vec_oprnds0.create (1);
5834 if (op_type == ternary_op)
5835 vec_oprnds1.create (1);
5838 phis.create (vec_num);
5839 vect_defs.create (vec_num);
5840 if (!slp_node)
5841 vect_defs.quick_push (NULL_TREE);
5843 for (j = 0; j < ncopies; j++)
5845 if (j == 0 || !single_defuse_cycle)
5847 for (i = 0; i < vec_num; i++)
5849 /* Create the reduction-phi that defines the reduction
5850 operand. */
5851 new_phi = create_phi_node (vec_dest, loop->header);
5852 set_vinfo_for_stmt (new_phi,
5853 new_stmt_vec_info (new_phi, loop_vinfo));
5854 if (j == 0 || slp_node)
5855 phis.quick_push (new_phi);
5859 if (code == COND_EXPR)
5861 gcc_assert (!slp_node);
5862 vectorizable_condition (stmt, gsi, vec_stmt,
5863 PHI_RESULT (phis[0]),
5864 reduc_index, NULL);
5865 /* Multiple types are not supported for condition. */
5866 break;
5869 /* Handle uses. */
5870 if (j == 0)
5872 op0 = ops[!reduc_index];
5873 if (op_type == ternary_op)
5875 if (reduc_index == 0)
5876 op1 = ops[2];
5877 else
5878 op1 = ops[1];
5881 if (slp_node)
5882 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5883 slp_node, -1);
5884 else
5886 loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
5887 stmt);
5888 vec_oprnds0.quick_push (loop_vec_def0);
5889 if (op_type == ternary_op)
5891 loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt);
5892 vec_oprnds1.quick_push (loop_vec_def1);
5896 else
5898 if (!slp_node)
5900 enum vect_def_type dt;
5901 gimple *dummy_stmt;
5903 vect_is_simple_use (ops[!reduc_index], loop_vinfo,
5904 &dummy_stmt, &dt);
5905 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
5906 loop_vec_def0);
5907 vec_oprnds0[0] = loop_vec_def0;
5908 if (op_type == ternary_op)
5910 vect_is_simple_use (op1, loop_vinfo, &dummy_stmt, &dt);
5911 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
5912 loop_vec_def1);
5913 vec_oprnds1[0] = loop_vec_def1;
5917 if (single_defuse_cycle)
5918 reduc_def = gimple_assign_lhs (new_stmt);
5920 STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
5923 FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
5925 if (slp_node)
5926 reduc_def = PHI_RESULT (phis[i]);
5927 else
5929 if (!single_defuse_cycle || j == 0)
5930 reduc_def = PHI_RESULT (new_phi);
5933 def1 = ((op_type == ternary_op)
5934 ? vec_oprnds1[i] : NULL);
5935 if (op_type == binary_op)
5937 if (reduc_index == 0)
5938 expr = build2 (code, vectype_out, reduc_def, def0);
5939 else
5940 expr = build2 (code, vectype_out, def0, reduc_def);
5942 else
5944 if (reduc_index == 0)
5945 expr = build3 (code, vectype_out, reduc_def, def0, def1);
5946 else
5948 if (reduc_index == 1)
5949 expr = build3 (code, vectype_out, def0, reduc_def, def1);
5950 else
5951 expr = build3 (code, vectype_out, def0, def1, reduc_def);
5955 new_stmt = gimple_build_assign (vec_dest, expr);
5956 new_temp = make_ssa_name (vec_dest, new_stmt);
5957 gimple_assign_set_lhs (new_stmt, new_temp);
5958 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5960 if (slp_node)
5962 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5963 vect_defs.quick_push (new_temp);
5965 else
5966 vect_defs[0] = new_temp;
5969 if (slp_node)
5970 continue;
5972 if (j == 0)
5973 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5974 else
5975 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5977 prev_stmt_info = vinfo_for_stmt (new_stmt);
5978 prev_phi_info = vinfo_for_stmt (new_phi);
5981 tree indx_before_incr, indx_after_incr, cond_name = NULL;
5983 /* Finalize the reduction-phi (set its arguments) and create the
5984 epilog reduction code. */
5985 if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
5987 new_temp = gimple_assign_lhs (*vec_stmt);
5988 vect_defs[0] = new_temp;
5990 /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
5991 which is updated with the current index of the loop for every match of
5992 the original loop's cond_expr (VEC_STMT). This results in a vector
5993 containing the last time the condition passed for that vector lane.
5994 The first match will be a 1 to allow 0 to be used for non-matching
5995 indexes. If there are no matches at all then the vector will be all
5996 zeroes. */
5997 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
5999 int nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6000 int k;
6002 gcc_assert (gimple_assign_rhs_code (*vec_stmt) == VEC_COND_EXPR);
6004 /* First we create a simple vector induction variable which starts
6005 with the values {1,2,3,...} (SERIES_VECT) and increments by the
6006 vector size (STEP). */
6008 /* Create a {1,2,3,...} vector. */
6009 tree *vtemp = XALLOCAVEC (tree, nunits_out);
6010 for (k = 0; k < nunits_out; ++k)
6011 vtemp[k] = build_int_cst (cr_index_scalar_type, k + 1);
6012 tree series_vect = build_vector (cr_index_vector_type, vtemp);
6014 /* Create a vector of the step value. */
6015 tree step = build_int_cst (cr_index_scalar_type, nunits_out);
6016 tree vec_step = build_vector_from_val (cr_index_vector_type, step);
6018 /* Create an induction variable. */
6019 gimple_stmt_iterator incr_gsi;
6020 bool insert_after;
6021 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6022 create_iv (series_vect, vec_step, NULL_TREE, loop, &incr_gsi,
6023 insert_after, &indx_before_incr, &indx_after_incr);
6025 /* Next create a new phi node vector (NEW_PHI_TREE) which starts
6026 filled with zeros (VEC_ZERO). */
6028 /* Create a vector of 0s. */
6029 tree zero = build_zero_cst (cr_index_scalar_type);
6030 tree vec_zero = build_vector_from_val (cr_index_vector_type, zero);
6032 /* Create a vector phi node. */
6033 tree new_phi_tree = make_ssa_name (cr_index_vector_type);
6034 new_phi = create_phi_node (new_phi_tree, loop->header);
6035 set_vinfo_for_stmt (new_phi,
6036 new_stmt_vec_info (new_phi, loop_vinfo));
6037 add_phi_arg (new_phi, vec_zero, loop_preheader_edge (loop),
6038 UNKNOWN_LOCATION);
6040 /* Now take the condition from the loops original cond_expr
6041 (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for
6042 every match uses values from the induction variable
6043 (INDEX_BEFORE_INCR) otherwise uses values from the phi node
6044 (NEW_PHI_TREE).
6045 Finally, we update the phi (NEW_PHI_TREE) to take the value of
6046 the new cond_expr (INDEX_COND_EXPR). */
6048 /* Turn the condition from vec_stmt into an ssa name. */
6049 gimple_stmt_iterator vec_stmt_gsi = gsi_for_stmt (*vec_stmt);
6050 tree ccompare = gimple_assign_rhs1 (*vec_stmt);
6051 tree ccompare_name = make_ssa_name (TREE_TYPE (ccompare));
6052 gimple *ccompare_stmt = gimple_build_assign (ccompare_name,
6053 ccompare);
6054 gsi_insert_before (&vec_stmt_gsi, ccompare_stmt, GSI_SAME_STMT);
6055 gimple_assign_set_rhs1 (*vec_stmt, ccompare_name);
6056 update_stmt (*vec_stmt);
6058 /* Create a conditional, where the condition is taken from vec_stmt
6059 (CCOMPARE_NAME), then is the induction index (INDEX_BEFORE_INCR)
6060 and else is the phi (NEW_PHI_TREE). */
6061 tree index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
6062 ccompare_name, indx_before_incr,
6063 new_phi_tree);
6064 cond_name = make_ssa_name (cr_index_vector_type);
6065 gimple *index_condition = gimple_build_assign (cond_name,
6066 index_cond_expr);
6067 gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT);
6068 stmt_vec_info index_vec_info = new_stmt_vec_info (index_condition,
6069 loop_vinfo);
6070 STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type;
6071 set_vinfo_for_stmt (index_condition, index_vec_info);
6073 /* Update the phi with the vec cond. */
6074 add_phi_arg (new_phi, cond_name, loop_latch_edge (loop),
6075 UNKNOWN_LOCATION);
6079 vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
6080 epilog_reduc_code, phis, reduc_index,
6081 double_reduc, slp_node, cond_name);
6083 return true;
6086 /* Function vect_min_worthwhile_factor.
6088 For a loop where we could vectorize the operation indicated by CODE,
6089 return the minimum vectorization factor that makes it worthwhile
6090 to use generic vectors. */
6092 vect_min_worthwhile_factor (enum tree_code code)
6094 switch (code)
6096 case PLUS_EXPR:
6097 case MINUS_EXPR:
6098 case NEGATE_EXPR:
6099 return 4;
6101 case BIT_AND_EXPR:
6102 case BIT_IOR_EXPR:
6103 case BIT_XOR_EXPR:
6104 case BIT_NOT_EXPR:
6105 return 2;
6107 default:
6108 return INT_MAX;
6113 /* Function vectorizable_induction
6115 Check if PHI performs an induction computation that can be vectorized.
6116 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
6117 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
6118 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6120 bool
6121 vectorizable_induction (gimple *phi,
6122 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6123 gimple **vec_stmt)
6125 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
6126 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6127 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6128 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6129 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6130 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6131 tree vec_def;
6133 gcc_assert (ncopies >= 1);
6134 /* FORNOW. These restrictions should be relaxed. */
6135 if (nested_in_vect_loop_p (loop, phi))
6137 imm_use_iterator imm_iter;
6138 use_operand_p use_p;
6139 gimple *exit_phi;
6140 edge latch_e;
6141 tree loop_arg;
6143 if (ncopies > 1)
6145 if (dump_enabled_p ())
6146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6147 "multiple types in nested loop.\n");
6148 return false;
6151 exit_phi = NULL;
6152 latch_e = loop_latch_edge (loop->inner);
6153 loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
6154 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
6156 gimple *use_stmt = USE_STMT (use_p);
6157 if (is_gimple_debug (use_stmt))
6158 continue;
6160 if (!flow_bb_inside_loop_p (loop->inner, gimple_bb (use_stmt)))
6162 exit_phi = use_stmt;
6163 break;
6166 if (exit_phi)
6168 stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
6169 if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
6170 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
6172 if (dump_enabled_p ())
6173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6174 "inner-loop induction only used outside "
6175 "of the outer vectorized loop.\n");
6176 return false;
6181 if (!STMT_VINFO_RELEVANT_P (stmt_info))
6182 return false;
6184 /* FORNOW: SLP not supported. */
6185 if (STMT_SLP_TYPE (stmt_info))
6186 return false;
6188 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
6190 if (gimple_code (phi) != GIMPLE_PHI)
6191 return false;
6193 if (!vec_stmt) /* transformation not required. */
6195 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
6196 if (dump_enabled_p ())
6197 dump_printf_loc (MSG_NOTE, vect_location,
6198 "=== vectorizable_induction ===\n");
6199 vect_model_induction_cost (stmt_info, ncopies);
6200 return true;
6203 /** Transform. **/
6205 if (dump_enabled_p ())
6206 dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
6208 vec_def = get_initial_def_for_induction (phi);
6209 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
6210 return true;
6213 /* Function vectorizable_live_operation.
6215 STMT computes a value that is used outside the loop. Check if
6216 it can be supported. */
6218 bool
6219 vectorizable_live_operation (gimple *stmt,
6220 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6221 gimple **vec_stmt)
6223 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6224 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6225 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6226 tree op;
6227 gimple *def_stmt;
6228 ssa_op_iter iter;
6230 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
6232 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
6233 return false;
6235 if (!is_gimple_assign (stmt))
6237 if (gimple_call_internal_p (stmt)
6238 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
6239 && gimple_call_lhs (stmt)
6240 && loop->simduid
6241 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
6242 && loop->simduid
6243 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
6245 edge e = single_exit (loop);
6246 basic_block merge_bb = e->dest;
6247 imm_use_iterator imm_iter;
6248 use_operand_p use_p;
6249 tree lhs = gimple_call_lhs (stmt);
6251 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
6253 gimple *use_stmt = USE_STMT (use_p);
6254 if (gimple_code (use_stmt) == GIMPLE_PHI
6255 && gimple_bb (use_stmt) == merge_bb)
6257 if (vec_stmt)
6259 tree vfm1
6260 = build_int_cst (unsigned_type_node,
6261 loop_vinfo->vectorization_factor - 1);
6262 SET_PHI_ARG_DEF (use_stmt, e->dest_idx, vfm1);
6264 return true;
6269 return false;
6272 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6273 return false;
6275 /* FORNOW. CHECKME. */
6276 if (nested_in_vect_loop_p (loop, stmt))
6277 return false;
6279 /* FORNOW: support only if all uses are invariant. This means
6280 that the scalar operations can remain in place, unvectorized.
6281 The original last scalar value that they compute will be used. */
6282 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
6284 enum vect_def_type dt = vect_uninitialized_def;
6286 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
6288 if (dump_enabled_p ())
6289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6290 "use not simple.\n");
6291 return false;
6294 if (dt != vect_external_def && dt != vect_constant_def)
6295 return false;
6298 /* No transformation is required for the cases we currently support. */
6299 return true;
6302 /* Kill any debug uses outside LOOP of SSA names defined in STMT. */
6304 static void
6305 vect_loop_kill_debug_uses (struct loop *loop, gimple *stmt)
6307 ssa_op_iter op_iter;
6308 imm_use_iterator imm_iter;
6309 def_operand_p def_p;
6310 gimple *ustmt;
6312 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
6314 FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
6316 basic_block bb;
6318 if (!is_gimple_debug (ustmt))
6319 continue;
6321 bb = gimple_bb (ustmt);
6323 if (!flow_bb_inside_loop_p (loop, bb))
6325 if (gimple_debug_bind_p (ustmt))
6327 if (dump_enabled_p ())
6328 dump_printf_loc (MSG_NOTE, vect_location,
6329 "killing debug use\n");
6331 gimple_debug_bind_reset_value (ustmt);
6332 update_stmt (ustmt);
6334 else
6335 gcc_unreachable ();
6342 /* This function builds ni_name = number of iterations. Statements
6343 are emitted on the loop preheader edge. */
6345 static tree
6346 vect_build_loop_niters (loop_vec_info loop_vinfo)
6348 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
6349 if (TREE_CODE (ni) == INTEGER_CST)
6350 return ni;
6351 else
6353 tree ni_name, var;
6354 gimple_seq stmts = NULL;
6355 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
6357 var = create_tmp_var (TREE_TYPE (ni), "niters");
6358 ni_name = force_gimple_operand (ni, &stmts, false, var);
6359 if (stmts)
6360 gsi_insert_seq_on_edge_immediate (pe, stmts);
6362 return ni_name;
6367 /* This function generates the following statements:
6369 ni_name = number of iterations loop executes
6370 ratio = ni_name / vf
6371 ratio_mult_vf_name = ratio * vf
6373 and places them on the loop preheader edge. */
6375 static void
6376 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
6377 tree ni_name,
6378 tree *ratio_mult_vf_name_ptr,
6379 tree *ratio_name_ptr)
6381 tree ni_minus_gap_name;
6382 tree var;
6383 tree ratio_name;
6384 tree ratio_mult_vf_name;
6385 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6386 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
6387 tree log_vf;
6389 log_vf = build_int_cst (TREE_TYPE (ni_name), exact_log2 (vf));
6391 /* If epilogue loop is required because of data accesses with gaps, we
6392 subtract one iteration from the total number of iterations here for
6393 correct calculation of RATIO. */
6394 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
6396 ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
6397 ni_name,
6398 build_one_cst (TREE_TYPE (ni_name)));
6399 if (!is_gimple_val (ni_minus_gap_name))
6401 var = create_tmp_var (TREE_TYPE (ni_name), "ni_gap");
6402 gimple *stmts = NULL;
6403 ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
6404 true, var);
6405 gsi_insert_seq_on_edge_immediate (pe, stmts);
6408 else
6409 ni_minus_gap_name = ni_name;
6411 /* Create: ratio = ni >> log2(vf) */
6412 /* ??? As we have ni == number of latch executions + 1, ni could
6413 have overflown to zero. So avoid computing ratio based on ni
6414 but compute it using the fact that we know ratio will be at least
6415 one, thus via (ni - vf) >> log2(vf) + 1. */
6416 ratio_name
6417 = fold_build2 (PLUS_EXPR, TREE_TYPE (ni_name),
6418 fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name),
6419 fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
6420 ni_minus_gap_name,
6421 build_int_cst
6422 (TREE_TYPE (ni_name), vf)),
6423 log_vf),
6424 build_int_cst (TREE_TYPE (ni_name), 1));
6425 if (!is_gimple_val (ratio_name))
6427 var = create_tmp_var (TREE_TYPE (ni_name), "bnd");
6428 gimple *stmts = NULL;
6429 ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
6430 gsi_insert_seq_on_edge_immediate (pe, stmts);
6432 *ratio_name_ptr = ratio_name;
6434 /* Create: ratio_mult_vf = ratio << log2 (vf). */
6436 if (ratio_mult_vf_name_ptr)
6438 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
6439 ratio_name, log_vf);
6440 if (!is_gimple_val (ratio_mult_vf_name))
6442 var = create_tmp_var (TREE_TYPE (ni_name), "ratio_mult_vf");
6443 gimple *stmts = NULL;
6444 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
6445 true, var);
6446 gsi_insert_seq_on_edge_immediate (pe, stmts);
6448 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
6451 return;
6455 /* Function vect_transform_loop.
6457 The analysis phase has determined that the loop is vectorizable.
6458 Vectorize the loop - created vectorized stmts to replace the scalar
6459 stmts in the loop, and update the loop exit condition. */
6461 void
6462 vect_transform_loop (loop_vec_info loop_vinfo)
6464 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6465 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
6466 int nbbs = loop->num_nodes;
6467 int i;
6468 tree ratio = NULL;
6469 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6470 bool grouped_store;
6471 bool slp_scheduled = false;
6472 gimple *stmt, *pattern_stmt;
6473 gimple_seq pattern_def_seq = NULL;
6474 gimple_stmt_iterator pattern_def_si = gsi_none ();
6475 bool transform_pattern_stmt = false;
6476 bool check_profitability = false;
6477 int th;
6478 /* Record number of iterations before we started tampering with the profile. */
6479 gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
6481 if (dump_enabled_p ())
6482 dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
6484 /* If profile is inprecise, we have chance to fix it up. */
6485 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6486 expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
6488 /* Use the more conservative vectorization threshold. If the number
6489 of iterations is constant assume the cost check has been performed
6490 by our caller. If the threshold makes all loops profitable that
6491 run at least the vectorization factor number of times checking
6492 is pointless, too. */
6493 th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
6494 if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
6495 && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6497 if (dump_enabled_p ())
6498 dump_printf_loc (MSG_NOTE, vect_location,
6499 "Profitability threshold is %d loop iterations.\n",
6500 th);
6501 check_profitability = true;
6504 /* Version the loop first, if required, so the profitability check
6505 comes first. */
6507 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
6508 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
6510 vect_loop_versioning (loop_vinfo, th, check_profitability);
6511 check_profitability = false;
6514 tree ni_name = vect_build_loop_niters (loop_vinfo);
6515 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name;
6517 /* Peel the loop if there are data refs with unknown alignment.
6518 Only one data ref with unknown store is allowed. */
6520 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
6522 vect_do_peeling_for_alignment (loop_vinfo, ni_name,
6523 th, check_profitability);
6524 check_profitability = false;
6525 /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
6526 be re-computed. */
6527 ni_name = NULL_TREE;
6530 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
6531 compile time constant), or it is a constant that doesn't divide by the
6532 vectorization factor, then an epilog loop needs to be created.
6533 We therefore duplicate the loop: the original loop will be vectorized,
6534 and will compute the first (n/VF) iterations. The second copy of the loop
6535 will remain scalar and will compute the remaining (n%VF) iterations.
6536 (VF is the vectorization factor). */
6538 if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
6539 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
6541 tree ratio_mult_vf;
6542 if (!ni_name)
6543 ni_name = vect_build_loop_niters (loop_vinfo);
6544 vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
6545 &ratio);
6546 vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
6547 th, check_profitability);
6549 else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6550 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
6551 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
6552 else
6554 if (!ni_name)
6555 ni_name = vect_build_loop_niters (loop_vinfo);
6556 vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio);
6559 /* 1) Make sure the loop header has exactly two entries
6560 2) Make sure we have a preheader basic block. */
6562 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
6564 split_edge (loop_preheader_edge (loop));
6566 /* FORNOW: the vectorizer supports only loops which body consist
6567 of one basic block (header + empty latch). When the vectorizer will
6568 support more involved loop forms, the order by which the BBs are
6569 traversed need to be reconsidered. */
6571 for (i = 0; i < nbbs; i++)
6573 basic_block bb = bbs[i];
6574 stmt_vec_info stmt_info;
6576 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
6577 gsi_next (&si))
6579 gphi *phi = si.phi ();
6580 if (dump_enabled_p ())
6582 dump_printf_loc (MSG_NOTE, vect_location,
6583 "------>vectorizing phi: ");
6584 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
6585 dump_printf (MSG_NOTE, "\n");
6587 stmt_info = vinfo_for_stmt (phi);
6588 if (!stmt_info)
6589 continue;
6591 if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6592 vect_loop_kill_debug_uses (loop, phi);
6594 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6595 && !STMT_VINFO_LIVE_P (stmt_info))
6596 continue;
6598 if (STMT_VINFO_VECTYPE (stmt_info)
6599 && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
6600 != (unsigned HOST_WIDE_INT) vectorization_factor)
6601 && dump_enabled_p ())
6602 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6604 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
6606 if (dump_enabled_p ())
6607 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
6608 vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
6612 pattern_stmt = NULL;
6613 for (gimple_stmt_iterator si = gsi_start_bb (bb);
6614 !gsi_end_p (si) || transform_pattern_stmt;)
6616 bool is_store;
6618 if (transform_pattern_stmt)
6619 stmt = pattern_stmt;
6620 else
6622 stmt = gsi_stmt (si);
6623 /* During vectorization remove existing clobber stmts. */
6624 if (gimple_clobber_p (stmt))
6626 unlink_stmt_vdef (stmt);
6627 gsi_remove (&si, true);
6628 release_defs (stmt);
6629 continue;
6633 if (dump_enabled_p ())
6635 dump_printf_loc (MSG_NOTE, vect_location,
6636 "------>vectorizing statement: ");
6637 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6638 dump_printf (MSG_NOTE, "\n");
6641 stmt_info = vinfo_for_stmt (stmt);
6643 /* vector stmts created in the outer-loop during vectorization of
6644 stmts in an inner-loop may not have a stmt_info, and do not
6645 need to be vectorized. */
6646 if (!stmt_info)
6648 gsi_next (&si);
6649 continue;
6652 if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6653 vect_loop_kill_debug_uses (loop, stmt);
6655 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6656 && !STMT_VINFO_LIVE_P (stmt_info))
6658 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6659 && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6660 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6661 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6663 stmt = pattern_stmt;
6664 stmt_info = vinfo_for_stmt (stmt);
6666 else
6668 gsi_next (&si);
6669 continue;
6672 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6673 && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6674 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6675 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6676 transform_pattern_stmt = true;
6678 /* If pattern statement has def stmts, vectorize them too. */
6679 if (is_pattern_stmt_p (stmt_info))
6681 if (pattern_def_seq == NULL)
6683 pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
6684 pattern_def_si = gsi_start (pattern_def_seq);
6686 else if (!gsi_end_p (pattern_def_si))
6687 gsi_next (&pattern_def_si);
6688 if (pattern_def_seq != NULL)
6690 gimple *pattern_def_stmt = NULL;
6691 stmt_vec_info pattern_def_stmt_info = NULL;
6693 while (!gsi_end_p (pattern_def_si))
6695 pattern_def_stmt = gsi_stmt (pattern_def_si);
6696 pattern_def_stmt_info
6697 = vinfo_for_stmt (pattern_def_stmt);
6698 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
6699 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
6700 break;
6701 gsi_next (&pattern_def_si);
6704 if (!gsi_end_p (pattern_def_si))
6706 if (dump_enabled_p ())
6708 dump_printf_loc (MSG_NOTE, vect_location,
6709 "==> vectorizing pattern def "
6710 "stmt: ");
6711 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
6712 pattern_def_stmt, 0);
6713 dump_printf (MSG_NOTE, "\n");
6716 stmt = pattern_def_stmt;
6717 stmt_info = pattern_def_stmt_info;
6719 else
6721 pattern_def_si = gsi_none ();
6722 transform_pattern_stmt = false;
6725 else
6726 transform_pattern_stmt = false;
6729 if (STMT_VINFO_VECTYPE (stmt_info))
6731 unsigned int nunits
6732 = (unsigned int)
6733 TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
6734 if (!STMT_SLP_TYPE (stmt_info)
6735 && nunits != (unsigned int) vectorization_factor
6736 && dump_enabled_p ())
6737 /* For SLP VF is set according to unrolling factor, and not
6738 to vector size, hence for SLP this print is not valid. */
6739 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6742 /* SLP. Schedule all the SLP instances when the first SLP stmt is
6743 reached. */
6744 if (STMT_SLP_TYPE (stmt_info))
6746 if (!slp_scheduled)
6748 slp_scheduled = true;
6750 if (dump_enabled_p ())
6751 dump_printf_loc (MSG_NOTE, vect_location,
6752 "=== scheduling SLP instances ===\n");
6754 vect_schedule_slp (loop_vinfo);
6757 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
6758 if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
6760 if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
6762 pattern_def_seq = NULL;
6763 gsi_next (&si);
6765 continue;
6769 /* -------- vectorize statement ------------ */
6770 if (dump_enabled_p ())
6771 dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
6773 grouped_store = false;
6774 is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
6775 if (is_store)
6777 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6779 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
6780 interleaving chain was completed - free all the stores in
6781 the chain. */
6782 gsi_next (&si);
6783 vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
6785 else
6787 /* Free the attached stmt_vec_info and remove the stmt. */
6788 gimple *store = gsi_stmt (si);
6789 free_stmt_vec_info (store);
6790 unlink_stmt_vdef (store);
6791 gsi_remove (&si, true);
6792 release_defs (store);
6795 /* Stores can only appear at the end of pattern statements. */
6796 gcc_assert (!transform_pattern_stmt);
6797 pattern_def_seq = NULL;
6799 else if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
6801 pattern_def_seq = NULL;
6802 gsi_next (&si);
6804 } /* stmts in BB */
6805 } /* BBs in loop */
6807 slpeel_make_loop_iterate_ntimes (loop, ratio);
6809 /* Reduce loop iterations by the vectorization factor. */
6810 scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor),
6811 expected_iterations / vectorization_factor);
6812 loop->nb_iterations_upper_bound
6813 = wi::udiv_floor (loop->nb_iterations_upper_bound, vectorization_factor);
6814 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
6815 && loop->nb_iterations_upper_bound != 0)
6816 loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - 1;
6817 if (loop->any_estimate)
6819 loop->nb_iterations_estimate
6820 = wi::udiv_floor (loop->nb_iterations_estimate, vectorization_factor);
6821 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
6822 && loop->nb_iterations_estimate != 0)
6823 loop->nb_iterations_estimate = loop->nb_iterations_estimate - 1;
6826 if (dump_enabled_p ())
6828 dump_printf_loc (MSG_NOTE, vect_location,
6829 "LOOP VECTORIZED\n");
6830 if (loop->inner)
6831 dump_printf_loc (MSG_NOTE, vect_location,
6832 "OUTER LOOP VECTORIZED\n");
6833 dump_printf (MSG_NOTE, "\n");