Mark as release
[official-gcc.git] / gcc / tree-vect-analyze.c
blobe753ccbabf427c41fae7a76b71114b0351b839ba
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "params.h"
38 #include "tree-chrec.h"
39 #include "tree-data-ref.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
42 #include "toplev.h"
43 #include "recog.h"
45 static bool vect_can_advance_ivs_p (loop_vec_info);
47 /* Return the smallest scalar part of STMT.
48 This is used to determine the vectype of the stmt. We generally set the
49 vectype according to the type of the result (lhs). For stmts whose
50 result-type is different than the type of the arguments (e.g., demotion,
51 promotion), vectype will be reset appropriately (later). Note that we have
52 to visit the smallest datatype in this function, because that determines the
53 VF. If the smallest datatype in the loop is present only as the rhs of a
54 promotion operation - we'd miss it.
55 Such a case, where a variable of this datatype does not appear in the lhs
56 anywhere in the loop, can only occur if it's an invariant: e.g.:
57 'int_x = (int) short_inv', which we'd expect to have been optimized away by
58 invariant motion. However, we cannot rely on invariant motion to always take
59 invariants out of the loop, and so in the case of promotion we also have to
60 check the rhs.
61 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
62 types. */
64 tree
65 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
66 HOST_WIDE_INT *rhs_size_unit)
68 tree scalar_type = gimple_expr_type (stmt);
69 HOST_WIDE_INT lhs, rhs;
71 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
73 if (is_gimple_assign (stmt)
74 && (gimple_assign_cast_p (stmt)
75 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
76 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
78 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
80 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
81 if (rhs < lhs)
82 scalar_type = rhs_type;
85 *lhs_size_unit = lhs;
86 *rhs_size_unit = rhs;
87 return scalar_type;
91 /* Function vect_determine_vectorization_factor
93 Determine the vectorization factor (VF). VF is the number of data elements
94 that are operated upon in parallel in a single iteration of the vectorized
95 loop. For example, when vectorizing a loop that operates on 4byte elements,
96 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
97 elements can fit in a single vector register.
99 We currently support vectorization of loops in which all types operated upon
100 are of the same size. Therefore this function currently sets VF according to
101 the size of the types operated upon, and fails if there are multiple sizes
102 in the loop.
104 VF is also the factor by which the loop iterations are strip-mined, e.g.:
105 original loop:
106 for (i=0; i<N; i++){
107 a[i] = b[i] + c[i];
110 vectorized loop:
111 for (i=0; i<N; i+=VF){
112 a[i:VF] = b[i:VF] + c[i:VF];
116 static bool
117 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
119 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
120 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
121 int nbbs = loop->num_nodes;
122 gimple_stmt_iterator si;
123 unsigned int vectorization_factor = 0;
124 tree scalar_type;
125 gimple phi;
126 tree vectype;
127 unsigned int nunits;
128 stmt_vec_info stmt_info;
129 int i;
130 HOST_WIDE_INT dummy;
132 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
135 for (i = 0; i < nbbs; i++)
137 basic_block bb = bbs[i];
139 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
141 phi = gsi_stmt (si);
142 stmt_info = vinfo_for_stmt (phi);
143 if (vect_print_dump_info (REPORT_DETAILS))
145 fprintf (vect_dump, "==> examining phi: ");
146 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
149 gcc_assert (stmt_info);
151 if (STMT_VINFO_RELEVANT_P (stmt_info))
153 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
154 scalar_type = TREE_TYPE (PHI_RESULT (phi));
156 if (vect_print_dump_info (REPORT_DETAILS))
158 fprintf (vect_dump, "get vectype for scalar type: ");
159 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
162 vectype = get_vectype_for_scalar_type (scalar_type);
163 if (!vectype)
165 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
167 fprintf (vect_dump,
168 "not vectorized: unsupported data-type ");
169 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
171 return false;
173 STMT_VINFO_VECTYPE (stmt_info) = vectype;
175 if (vect_print_dump_info (REPORT_DETAILS))
177 fprintf (vect_dump, "vectype: ");
178 print_generic_expr (vect_dump, vectype, TDF_SLIM);
181 nunits = TYPE_VECTOR_SUBPARTS (vectype);
182 if (vect_print_dump_info (REPORT_DETAILS))
183 fprintf (vect_dump, "nunits = %d", nunits);
185 if (!vectorization_factor
186 || (nunits > vectorization_factor))
187 vectorization_factor = nunits;
191 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
193 gimple stmt = gsi_stmt (si);
194 stmt_info = vinfo_for_stmt (stmt);
196 if (vect_print_dump_info (REPORT_DETAILS))
198 fprintf (vect_dump, "==> examining statement: ");
199 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
202 gcc_assert (stmt_info);
204 /* skip stmts which do not need to be vectorized. */
205 if (!STMT_VINFO_RELEVANT_P (stmt_info)
206 && !STMT_VINFO_LIVE_P (stmt_info))
208 if (vect_print_dump_info (REPORT_DETAILS))
209 fprintf (vect_dump, "skip.");
210 continue;
213 if (gimple_get_lhs (stmt) == NULL_TREE)
215 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
217 fprintf (vect_dump, "not vectorized: irregular stmt.");
218 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
220 return false;
223 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
225 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
227 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
228 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
230 return false;
233 if (STMT_VINFO_VECTYPE (stmt_info))
235 /* The only case when a vectype had been already set is for stmts
236 that contain a dataref, or for "pattern-stmts" (stmts generated
237 by the vectorizer to represent/replace a certain idiom). */
238 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
239 || is_pattern_stmt_p (stmt_info));
240 vectype = STMT_VINFO_VECTYPE (stmt_info);
242 else
245 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
246 && !is_pattern_stmt_p (stmt_info));
248 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
249 &dummy);
250 if (vect_print_dump_info (REPORT_DETAILS))
252 fprintf (vect_dump, "get vectype for scalar type: ");
253 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
256 vectype = get_vectype_for_scalar_type (scalar_type);
257 if (!vectype)
259 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
261 fprintf (vect_dump,
262 "not vectorized: unsupported data-type ");
263 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
265 return false;
267 STMT_VINFO_VECTYPE (stmt_info) = vectype;
270 if (vect_print_dump_info (REPORT_DETAILS))
272 fprintf (vect_dump, "vectype: ");
273 print_generic_expr (vect_dump, vectype, TDF_SLIM);
276 nunits = TYPE_VECTOR_SUBPARTS (vectype);
277 if (vect_print_dump_info (REPORT_DETAILS))
278 fprintf (vect_dump, "nunits = %d", nunits);
280 if (!vectorization_factor
281 || (nunits > vectorization_factor))
282 vectorization_factor = nunits;
287 /* TODO: Analyze cost. Decide if worth while to vectorize. */
288 if (vect_print_dump_info (REPORT_DETAILS))
289 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
290 if (vectorization_factor <= 1)
292 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
293 fprintf (vect_dump, "not vectorized: unsupported data-type");
294 return false;
296 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
298 return true;
302 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
303 the number of created vector stmts depends on the unrolling factor). However,
304 the actual number of vector stmts for every SLP node depends on VF which is
305 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
306 In this function we assume that the inside costs calculated in
307 vect_model_xxx_cost are linear in ncopies. */
309 static void
310 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
312 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
313 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
314 slp_instance instance;
316 if (vect_print_dump_info (REPORT_SLP))
317 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
319 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
320 /* We assume that costs are linear in ncopies. */
321 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
322 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
326 /* Function vect_analyze_operations.
328 Scan the loop stmts and make sure they are all vectorizable. */
330 static bool
331 vect_analyze_operations (loop_vec_info loop_vinfo)
333 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
334 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
335 int nbbs = loop->num_nodes;
336 gimple_stmt_iterator si;
337 unsigned int vectorization_factor = 0;
338 int i;
339 bool ok;
340 gimple phi;
341 stmt_vec_info stmt_info;
342 bool need_to_vectorize = false;
343 int min_profitable_iters;
344 int min_scalar_loop_bound;
345 unsigned int th;
346 bool only_slp_in_loop = true;
348 if (vect_print_dump_info (REPORT_DETAILS))
349 fprintf (vect_dump, "=== vect_analyze_operations ===");
351 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
352 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
354 for (i = 0; i < nbbs; i++)
356 basic_block bb = bbs[i];
358 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
360 phi = gsi_stmt (si);
361 ok = true;
363 stmt_info = vinfo_for_stmt (phi);
364 if (vect_print_dump_info (REPORT_DETAILS))
366 fprintf (vect_dump, "examining phi: ");
367 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
370 if (! is_loop_header_bb_p (bb))
372 /* inner-loop loop-closed exit phi in outer-loop vectorization
373 (i.e. a phi in the tail of the outer-loop).
374 FORNOW: we currently don't support the case that these phis
375 are not used in the outerloop, cause this case requires
376 to actually do something here. */
377 if (!STMT_VINFO_RELEVANT_P (stmt_info)
378 || STMT_VINFO_LIVE_P (stmt_info))
380 if (vect_print_dump_info (REPORT_DETAILS))
381 fprintf (vect_dump,
382 "Unsupported loop-closed phi in outer-loop.");
383 return false;
385 continue;
388 gcc_assert (stmt_info);
390 if (STMT_VINFO_LIVE_P (stmt_info))
392 /* FORNOW: not yet supported. */
393 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
394 fprintf (vect_dump, "not vectorized: value used after loop.");
395 return false;
398 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
399 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
401 /* A scalar-dependence cycle that we don't support. */
402 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
403 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
404 return false;
407 if (STMT_VINFO_RELEVANT_P (stmt_info))
409 need_to_vectorize = true;
410 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
411 ok = vectorizable_induction (phi, NULL, NULL);
414 if (!ok)
416 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
418 fprintf (vect_dump,
419 "not vectorized: relevant phi not supported: ");
420 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
422 return false;
426 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
428 gimple stmt = gsi_stmt (si);
429 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
430 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
432 if (vect_print_dump_info (REPORT_DETAILS))
434 fprintf (vect_dump, "==> examining statement: ");
435 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
438 gcc_assert (stmt_info);
440 /* skip stmts which do not need to be vectorized.
441 this is expected to include:
442 - the COND_EXPR which is the loop exit condition
443 - any LABEL_EXPRs in the loop
444 - computations that are used only for array indexing or loop
445 control */
447 if (!STMT_VINFO_RELEVANT_P (stmt_info)
448 && !STMT_VINFO_LIVE_P (stmt_info))
450 if (vect_print_dump_info (REPORT_DETAILS))
451 fprintf (vect_dump, "irrelevant.");
452 continue;
455 switch (STMT_VINFO_DEF_TYPE (stmt_info))
457 case vect_loop_def:
458 break;
460 case vect_reduction_def:
461 gcc_assert (relevance == vect_used_in_outer
462 || relevance == vect_used_in_outer_by_reduction
463 || relevance == vect_unused_in_loop);
464 break;
466 case vect_induction_def:
467 case vect_constant_def:
468 case vect_invariant_def:
469 case vect_unknown_def_type:
470 default:
471 gcc_unreachable ();
474 if (STMT_VINFO_RELEVANT_P (stmt_info))
476 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
477 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
478 need_to_vectorize = true;
481 ok = true;
482 if (STMT_VINFO_RELEVANT_P (stmt_info)
483 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
484 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
485 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
486 || vectorizable_conversion (stmt, NULL, NULL, NULL)
487 || vectorizable_operation (stmt, NULL, NULL, NULL)
488 || vectorizable_assignment (stmt, NULL, NULL, NULL)
489 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
490 || vectorizable_call (stmt, NULL, NULL)
491 || vectorizable_store (stmt, NULL, NULL, NULL)
492 || vectorizable_condition (stmt, NULL, NULL)
493 || vectorizable_reduction (stmt, NULL, NULL));
495 if (!ok)
497 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
499 fprintf (vect_dump, "not vectorized: relevant stmt not ");
500 fprintf (vect_dump, "supported: ");
501 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
503 return false;
506 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
507 need extra handling, except for vectorizable reductions. */
508 if (STMT_VINFO_LIVE_P (stmt_info)
509 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
510 ok = vectorizable_live_operation (stmt, NULL, NULL);
512 if (!ok)
514 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
516 fprintf (vect_dump, "not vectorized: live stmt not ");
517 fprintf (vect_dump, "supported: ");
518 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
520 return false;
523 if (!PURE_SLP_STMT (stmt_info))
525 /* STMT needs loop-based vectorization. */
526 only_slp_in_loop = false;
528 /* Groups of strided accesses whose size is not a power of 2 are
529 not vectorizable yet using loop-vectorization. Therefore, if
530 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
531 both SLPed and loop-based vectorized), the loop cannot be
532 vectorized. */
533 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
534 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
535 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
537 if (vect_print_dump_info (REPORT_DETAILS))
539 fprintf (vect_dump, "not vectorized: the size of group "
540 "of strided accesses is not a power of 2");
541 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
543 return false;
546 } /* stmts in bb */
547 } /* bbs */
549 /* All operations in the loop are either irrelevant (deal with loop
550 control, or dead), or only used outside the loop and can be moved
551 out of the loop (e.g. invariants, inductions). The loop can be
552 optimized away by scalar optimizations. We're better off not
553 touching this loop. */
554 if (!need_to_vectorize)
556 if (vect_print_dump_info (REPORT_DETAILS))
557 fprintf (vect_dump,
558 "All the computation can be taken out of the loop.");
559 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
560 fprintf (vect_dump,
561 "not vectorized: redundant loop. no profit to vectorize.");
562 return false;
565 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
566 vectorization factor of the loop is the unrolling factor required by the
567 SLP instances. If that unrolling factor is 1, we say, that we perform
568 pure SLP on loop - cross iteration parallelism is not exploited. */
569 if (only_slp_in_loop)
570 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
571 else
572 vectorization_factor = least_common_multiple (vectorization_factor,
573 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
575 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
577 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
578 && vect_print_dump_info (REPORT_DETAILS))
579 fprintf (vect_dump,
580 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
581 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
583 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
584 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
586 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
587 fprintf (vect_dump, "not vectorized: iteration count too small.");
588 if (vect_print_dump_info (REPORT_DETAILS))
589 fprintf (vect_dump,"not vectorized: iteration count smaller than "
590 "vectorization factor.");
591 return false;
594 /* Analyze cost. Decide if worth while to vectorize. */
596 /* Once VF is set, SLP costs should be updated since the number of created
597 vector stmts depends on VF. */
598 vect_update_slp_costs_according_to_vf (loop_vinfo);
600 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
601 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
603 if (min_profitable_iters < 0)
605 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
606 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
607 if (vect_print_dump_info (REPORT_DETAILS))
608 fprintf (vect_dump, "not vectorized: vector version will never be "
609 "profitable.");
610 return false;
613 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
614 * vectorization_factor) - 1);
616 /* Use the cost model only if it is more conservative than user specified
617 threshold. */
619 th = (unsigned) min_scalar_loop_bound;
620 if (min_profitable_iters
621 && (!min_scalar_loop_bound
622 || min_profitable_iters > min_scalar_loop_bound))
623 th = (unsigned) min_profitable_iters;
625 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
626 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
628 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
629 fprintf (vect_dump, "not vectorized: vectorization not "
630 "profitable.");
631 if (vect_print_dump_info (REPORT_DETAILS))
632 fprintf (vect_dump, "not vectorized: iteration count smaller than "
633 "user specified loop bound parameter or minimum "
634 "profitable iterations (whichever is more conservative).");
635 return false;
638 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
639 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
640 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
642 if (vect_print_dump_info (REPORT_DETAILS))
643 fprintf (vect_dump, "epilog loop required.");
644 if (!vect_can_advance_ivs_p (loop_vinfo))
646 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
647 fprintf (vect_dump,
648 "not vectorized: can't create epilog loop 1.");
649 return false;
651 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
653 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
654 fprintf (vect_dump,
655 "not vectorized: can't create epilog loop 2.");
656 return false;
660 return true;
664 /* Function exist_non_indexing_operands_for_use_p
666 USE is one of the uses attached to STMT. Check if USE is
667 used in STMT for anything other than indexing an array. */
669 static bool
670 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
672 tree operand;
673 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
675 /* USE corresponds to some operand in STMT. If there is no data
676 reference in STMT, then any operand that corresponds to USE
677 is not indexing an array. */
678 if (!STMT_VINFO_DATA_REF (stmt_info))
679 return true;
681 /* STMT has a data_ref. FORNOW this means that its of one of
682 the following forms:
683 -1- ARRAY_REF = var
684 -2- var = ARRAY_REF
685 (This should have been verified in analyze_data_refs).
687 'var' in the second case corresponds to a def, not a use,
688 so USE cannot correspond to any operands that are not used
689 for array indexing.
691 Therefore, all we need to check is if STMT falls into the
692 first case, and whether var corresponds to USE. */
694 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
695 return false;
697 if (!gimple_assign_copy_p (stmt))
698 return false;
699 operand = gimple_assign_rhs1 (stmt);
701 if (TREE_CODE (operand) != SSA_NAME)
702 return false;
704 if (operand == use)
705 return true;
707 return false;
711 /* Function vect_analyze_scalar_cycles_1.
713 Examine the cross iteration def-use cycles of scalar variables
714 in LOOP. LOOP_VINFO represents the loop that is now being
715 considered for vectorization (can be LOOP, or an outer-loop
716 enclosing LOOP). */
718 static void
719 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
721 basic_block bb = loop->header;
722 tree dumy;
723 VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64);
724 gimple_stmt_iterator gsi;
726 if (vect_print_dump_info (REPORT_DETAILS))
727 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
729 /* First - identify all inductions. */
730 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
732 gimple phi = gsi_stmt (gsi);
733 tree access_fn = NULL;
734 tree def = PHI_RESULT (phi);
735 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
737 if (vect_print_dump_info (REPORT_DETAILS))
739 fprintf (vect_dump, "Analyze phi: ");
740 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
743 /* Skip virtual phi's. The data dependences that are associated with
744 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
745 if (!is_gimple_reg (SSA_NAME_VAR (def)))
746 continue;
748 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
750 /* Analyze the evolution function. */
751 access_fn = analyze_scalar_evolution (loop, def);
752 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
754 fprintf (vect_dump, "Access function of PHI: ");
755 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
758 if (!access_fn
759 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
761 VEC_safe_push (gimple, heap, worklist, phi);
762 continue;
765 if (vect_print_dump_info (REPORT_DETAILS))
766 fprintf (vect_dump, "Detected induction.");
767 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
771 /* Second - identify all reductions. */
772 while (VEC_length (gimple, worklist) > 0)
774 gimple phi = VEC_pop (gimple, worklist);
775 tree def = PHI_RESULT (phi);
776 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
777 gimple reduc_stmt;
779 if (vect_print_dump_info (REPORT_DETAILS))
781 fprintf (vect_dump, "Analyze phi: ");
782 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
785 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
786 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
788 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
789 if (reduc_stmt)
791 if (vect_print_dump_info (REPORT_DETAILS))
792 fprintf (vect_dump, "Detected reduction.");
793 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
794 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
795 vect_reduction_def;
797 else
798 if (vect_print_dump_info (REPORT_DETAILS))
799 fprintf (vect_dump, "Unknown def-use cycle pattern.");
802 VEC_free (gimple, heap, worklist);
803 return;
807 /* Function vect_analyze_scalar_cycles.
809 Examine the cross iteration def-use cycles of scalar variables, by
810 analyzing the loop-header PHIs of scalar variables; Classify each
811 cycle as one of the following: invariant, induction, reduction, unknown.
812 We do that for the loop represented by LOOP_VINFO, and also to its
813 inner-loop, if exists.
814 Examples for scalar cycles:
816 Example1: reduction:
818 loop1:
819 for (i=0; i<N; i++)
820 sum += a[i];
822 Example2: induction:
824 loop2:
825 for (i=0; i<N; i++)
826 a[i] = i; */
828 static void
829 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
831 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
833 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
835 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
836 Reductions in such inner-loop therefore have different properties than
837 the reductions in the nest that gets vectorized:
838 1. When vectorized, they are executed in the same order as in the original
839 scalar loop, so we can't change the order of computation when
840 vectorizing them.
841 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
842 current checks are too strict. */
844 if (loop->inner)
845 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
849 /* Find the place of the data-ref in STMT in the interleaving chain that starts
850 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
852 static int
853 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
855 gimple next_stmt = first_stmt;
856 int result = 0;
858 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
859 return -1;
861 while (next_stmt && next_stmt != stmt)
863 result++;
864 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
867 if (next_stmt)
868 return result;
869 else
870 return -1;
874 /* Function vect_insert_into_interleaving_chain.
876 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
878 static void
879 vect_insert_into_interleaving_chain (struct data_reference *dra,
880 struct data_reference *drb)
882 gimple prev, next;
883 tree next_init;
884 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
885 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
887 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
888 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
889 while (next)
891 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
892 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
894 /* Insert here. */
895 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
896 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
897 return;
899 prev = next;
900 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
903 /* We got to the end of the list. Insert here. */
904 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
905 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
909 /* Function vect_update_interleaving_chain.
911 For two data-refs DRA and DRB that are a part of a chain interleaved data
912 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
914 There are four possible cases:
915 1. New stmts - both DRA and DRB are not a part of any chain:
916 FIRST_DR = DRB
917 NEXT_DR (DRB) = DRA
918 2. DRB is a part of a chain and DRA is not:
919 no need to update FIRST_DR
920 no need to insert DRB
921 insert DRA according to init
922 3. DRA is a part of a chain and DRB is not:
923 if (init of FIRST_DR > init of DRB)
924 FIRST_DR = DRB
925 NEXT(FIRST_DR) = previous FIRST_DR
926 else
927 insert DRB according to its init
928 4. both DRA and DRB are in some interleaving chains:
929 choose the chain with the smallest init of FIRST_DR
930 insert the nodes of the second chain into the first one. */
932 static void
933 vect_update_interleaving_chain (struct data_reference *drb,
934 struct data_reference *dra)
936 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
937 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
938 tree next_init, init_dra_chain, init_drb_chain;
939 gimple first_a, first_b;
940 tree node_init;
941 gimple node, prev, next, first_stmt;
943 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
944 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
946 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
947 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
948 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
949 return;
952 /* 2. DRB is a part of a chain and DRA is not. */
953 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
955 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
956 /* Insert DRA into the chain of DRB. */
957 vect_insert_into_interleaving_chain (dra, drb);
958 return;
961 /* 3. DRA is a part of a chain and DRB is not. */
962 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
964 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
965 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
966 old_first_stmt)));
967 gimple tmp;
969 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
971 /* DRB's init is smaller than the init of the stmt previously marked
972 as the first stmt of the interleaving chain of DRA. Therefore, we
973 update FIRST_STMT and put DRB in the head of the list. */
974 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
975 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
977 /* Update all the stmts in the list to point to the new FIRST_STMT. */
978 tmp = old_first_stmt;
979 while (tmp)
981 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
982 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
985 else
987 /* Insert DRB in the list of DRA. */
988 vect_insert_into_interleaving_chain (drb, dra);
989 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
991 return;
994 /* 4. both DRA and DRB are in some interleaving chains. */
995 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
996 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
997 if (first_a == first_b)
998 return;
999 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
1000 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
1002 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
1004 /* Insert the nodes of DRA chain into the DRB chain.
1005 After inserting a node, continue from this node of the DRB chain (don't
1006 start from the beginning. */
1007 node = DR_GROUP_FIRST_DR (stmtinfo_a);
1008 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
1009 first_stmt = first_b;
1011 else
1013 /* Insert the nodes of DRB chain into the DRA chain.
1014 After inserting a node, continue from this node of the DRA chain (don't
1015 start from the beginning. */
1016 node = DR_GROUP_FIRST_DR (stmtinfo_b);
1017 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
1018 first_stmt = first_a;
1021 while (node)
1023 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
1024 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
1025 while (next)
1027 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1028 if (tree_int_cst_compare (next_init, node_init) > 0)
1030 /* Insert here. */
1031 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
1032 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
1033 prev = node;
1034 break;
1036 prev = next;
1037 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
1039 if (!next)
1041 /* We got to the end of the list. Insert here. */
1042 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
1043 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
1044 prev = node;
1046 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
1047 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1052 /* Function vect_equal_offsets.
1054 Check if OFFSET1 and OFFSET2 are identical expressions. */
1056 static bool
1057 vect_equal_offsets (tree offset1, tree offset2)
1059 bool res0, res1;
1061 STRIP_NOPS (offset1);
1062 STRIP_NOPS (offset2);
1064 if (offset1 == offset2)
1065 return true;
1067 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1068 || !BINARY_CLASS_P (offset1)
1069 || !BINARY_CLASS_P (offset2))
1070 return false;
1072 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1073 TREE_OPERAND (offset2, 0));
1074 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1075 TREE_OPERAND (offset2, 1));
1077 return (res0 && res1);
1081 /* Function vect_check_interleaving.
1083 Check if DRA and DRB are a part of interleaving. In case they are, insert
1084 DRA and DRB in an interleaving chain. */
1086 static void
1087 vect_check_interleaving (struct data_reference *dra,
1088 struct data_reference *drb)
1090 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1092 /* Check that the data-refs have same first location (except init) and they
1093 are both either store or load (not load and store). */
1094 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1095 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1096 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1097 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1098 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1099 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1100 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1101 || DR_IS_READ (dra) != DR_IS_READ (drb))
1102 return;
1104 /* Check:
1105 1. data-refs are of the same type
1106 2. their steps are equal
1107 3. the step is greater than the difference between data-refs' inits */
1108 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1109 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1111 if (type_size_a != type_size_b
1112 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
1113 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
1114 TREE_TYPE (DR_REF (drb))))
1115 return;
1117 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1118 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1119 step = TREE_INT_CST_LOW (DR_STEP (dra));
1121 if (init_a > init_b)
1123 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1124 and DRB is accessed before DRA. */
1125 diff_mod_size = (init_a - init_b) % type_size_a;
1127 if ((init_a - init_b) > step)
1128 return;
1130 if (diff_mod_size == 0)
1132 vect_update_interleaving_chain (drb, dra);
1133 if (vect_print_dump_info (REPORT_DR_DETAILS))
1135 fprintf (vect_dump, "Detected interleaving ");
1136 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1137 fprintf (vect_dump, " and ");
1138 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1140 return;
1143 else
1145 /* If init_b == init_a + the size of the type * k, we have an
1146 interleaving, and DRA is accessed before DRB. */
1147 diff_mod_size = (init_b - init_a) % type_size_a;
1149 if ((init_b - init_a) > step)
1150 return;
1152 if (diff_mod_size == 0)
1154 vect_update_interleaving_chain (dra, drb);
1155 if (vect_print_dump_info (REPORT_DR_DETAILS))
1157 fprintf (vect_dump, "Detected interleaving ");
1158 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1159 fprintf (vect_dump, " and ");
1160 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1162 return;
1167 /* Check if data references pointed by DR_I and DR_J are same or
1168 belong to same interleaving group. Return FALSE if drs are
1169 different, otherwise return TRUE. */
1171 static bool
1172 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
1174 gimple stmt_i = DR_STMT (dr_i);
1175 gimple stmt_j = DR_STMT (dr_j);
1177 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
1178 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1179 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
1180 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1181 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
1182 return true;
1183 else
1184 return false;
1187 /* If address ranges represented by DDR_I and DDR_J are equal,
1188 return TRUE, otherwise return FALSE. */
1190 static bool
1191 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
1193 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
1194 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
1195 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
1196 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
1197 return true;
1198 else
1199 return false;
1202 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1203 tested at run-time. Return TRUE if DDR was successfully inserted.
1204 Return false if versioning is not supported. */
1206 static bool
1207 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1209 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1211 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
1212 return false;
1214 if (vect_print_dump_info (REPORT_DR_DETAILS))
1216 fprintf (vect_dump, "mark for run-time aliasing test between ");
1217 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1218 fprintf (vect_dump, " and ");
1219 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1222 if (optimize_loop_nest_for_size_p (loop))
1224 if (vect_print_dump_info (REPORT_DR_DETAILS))
1225 fprintf (vect_dump, "versioning not supported when optimizing for size.");
1226 return false;
1229 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1230 if (loop->inner)
1232 if (vect_print_dump_info (REPORT_DR_DETAILS))
1233 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1234 return false;
1237 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1238 return true;
1241 /* Function vect_analyze_data_ref_dependence.
1243 Return TRUE if there (might) exist a dependence between a memory-reference
1244 DRA and a memory-reference DRB. When versioning for alias may check a
1245 dependence at run-time, return FALSE. */
1247 static bool
1248 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1249 loop_vec_info loop_vinfo)
1251 unsigned int i;
1252 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1253 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1254 struct data_reference *dra = DDR_A (ddr);
1255 struct data_reference *drb = DDR_B (ddr);
1256 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1257 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1258 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1259 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1260 lambda_vector dist_v;
1261 unsigned int loop_depth;
1263 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1265 /* Independent data accesses. */
1266 vect_check_interleaving (dra, drb);
1267 return false;
1270 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1271 return false;
1273 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1275 if (vect_print_dump_info (REPORT_DR_DETAILS))
1277 fprintf (vect_dump,
1278 "versioning for alias required: can't determine dependence between ");
1279 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1280 fprintf (vect_dump, " and ");
1281 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1283 /* Add to list of ddrs that need to be tested at run-time. */
1284 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1287 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1289 if (vect_print_dump_info (REPORT_DR_DETAILS))
1291 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1292 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1293 fprintf (vect_dump, " and ");
1294 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1296 /* Add to list of ddrs that need to be tested at run-time. */
1297 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1300 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1301 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1303 int dist = dist_v[loop_depth];
1305 if (vect_print_dump_info (REPORT_DR_DETAILS))
1306 fprintf (vect_dump, "dependence distance = %d.", dist);
1308 /* Same loop iteration. */
1309 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1311 /* Two references with distance zero have the same alignment. */
1312 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1313 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1314 if (vect_print_dump_info (REPORT_ALIGNMENT))
1315 fprintf (vect_dump, "accesses have the same alignment.");
1316 if (vect_print_dump_info (REPORT_DR_DETAILS))
1318 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1319 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1320 fprintf (vect_dump, " and ");
1321 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1324 /* For interleaving, mark that there is a read-write dependency if
1325 necessary. We check before that one of the data-refs is store. */
1326 if (DR_IS_READ (dra))
1327 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1328 else
1330 if (DR_IS_READ (drb))
1331 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1334 continue;
1337 if (abs (dist) >= vectorization_factor
1338 || (dist > 0 && DDR_REVERSED_P (ddr)))
1340 /* Dependence distance does not create dependence, as far as
1341 vectorization is concerned, in this case. If DDR_REVERSED_P the
1342 order of the data-refs in DDR was reversed (to make distance
1343 vector positive), and the actual distance is negative. */
1344 if (vect_print_dump_info (REPORT_DR_DETAILS))
1345 fprintf (vect_dump, "dependence distance >= VF or negative.");
1346 continue;
1349 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1351 fprintf (vect_dump,
1352 "not vectorized, possible dependence "
1353 "between data-refs ");
1354 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1355 fprintf (vect_dump, " and ");
1356 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1359 return true;
1362 return false;
1365 /* Function vect_analyze_data_ref_dependences.
1367 Examine all the data references in the loop, and make sure there do not
1368 exist any data dependences between them. */
1370 static bool
1371 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1373 unsigned int i;
1374 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1375 struct data_dependence_relation *ddr;
1377 if (vect_print_dump_info (REPORT_DETAILS))
1378 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1380 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1381 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1382 return false;
1384 return true;
1388 /* Function vect_compute_data_ref_alignment
1390 Compute the misalignment of the data reference DR.
1392 Output:
1393 1. If during the misalignment computation it is found that the data reference
1394 cannot be vectorized then false is returned.
1395 2. DR_MISALIGNMENT (DR) is defined.
1397 FOR NOW: No analysis is actually performed. Misalignment is calculated
1398 only for trivial cases. TODO. */
1400 static bool
1401 vect_compute_data_ref_alignment (struct data_reference *dr)
1403 gimple stmt = DR_STMT (dr);
1404 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1405 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1406 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1407 tree ref = DR_REF (dr);
1408 tree vectype;
1409 tree base, base_addr;
1410 bool base_aligned;
1411 tree misalign;
1412 tree aligned_to, alignment;
1414 if (vect_print_dump_info (REPORT_DETAILS))
1415 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1417 /* Initialize misalignment to unknown. */
1418 SET_DR_MISALIGNMENT (dr, -1);
1420 misalign = DR_INIT (dr);
1421 aligned_to = DR_ALIGNED_TO (dr);
1422 base_addr = DR_BASE_ADDRESS (dr);
1423 vectype = STMT_VINFO_VECTYPE (stmt_info);
1425 /* In case the dataref is in an inner-loop of the loop that is being
1426 vectorized (LOOP), we use the base and misalignment information
1427 relative to the outer-loop (LOOP). This is ok only if the misalignment
1428 stays the same throughout the execution of the inner-loop, which is why
1429 we have to check that the stride of the dataref in the inner-loop evenly
1430 divides by the vector size. */
1431 if (nested_in_vect_loop_p (loop, stmt))
1433 tree step = DR_STEP (dr);
1434 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1436 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
1438 if (vect_print_dump_info (REPORT_ALIGNMENT))
1439 fprintf (vect_dump, "inner step divides the vector-size.");
1440 misalign = STMT_VINFO_DR_INIT (stmt_info);
1441 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1442 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1444 else
1446 if (vect_print_dump_info (REPORT_ALIGNMENT))
1447 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1448 misalign = NULL_TREE;
1452 base = build_fold_indirect_ref (base_addr);
1453 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1455 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1456 || !misalign)
1458 if (vect_print_dump_info (REPORT_ALIGNMENT))
1460 fprintf (vect_dump, "Unknown alignment for access: ");
1461 print_generic_expr (vect_dump, base, TDF_SLIM);
1463 return true;
1466 if ((DECL_P (base)
1467 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1468 alignment) >= 0)
1469 || (TREE_CODE (base_addr) == SSA_NAME
1470 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1471 TREE_TYPE (base_addr)))),
1472 alignment) >= 0))
1473 base_aligned = true;
1474 else
1475 base_aligned = false;
1477 if (!base_aligned)
1479 /* Do not change the alignment of global variables if
1480 flag_section_anchors is enabled. */
1481 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1482 || (TREE_STATIC (base) && flag_section_anchors))
1484 if (vect_print_dump_info (REPORT_DETAILS))
1486 fprintf (vect_dump, "can't force alignment of ref: ");
1487 print_generic_expr (vect_dump, ref, TDF_SLIM);
1489 return true;
1492 /* Force the alignment of the decl.
1493 NOTE: This is the only change to the code we make during
1494 the analysis phase, before deciding to vectorize the loop. */
1495 if (vect_print_dump_info (REPORT_DETAILS))
1496 fprintf (vect_dump, "force alignment");
1497 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1498 DECL_USER_ALIGN (base) = 1;
1501 /* At this point we assume that the base is aligned. */
1502 gcc_assert (base_aligned
1503 || (TREE_CODE (base) == VAR_DECL
1504 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1506 /* Modulo alignment. */
1507 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1509 if (!host_integerp (misalign, 1))
1511 /* Negative or overflowed misalignment value. */
1512 if (vect_print_dump_info (REPORT_DETAILS))
1513 fprintf (vect_dump, "unexpected misalign value");
1514 return false;
1517 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1519 if (vect_print_dump_info (REPORT_DETAILS))
1521 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1522 print_generic_expr (vect_dump, ref, TDF_SLIM);
1525 return true;
1529 /* Function vect_compute_data_refs_alignment
1531 Compute the misalignment of data references in the loop.
1532 Return FALSE if a data reference is found that cannot be vectorized. */
1534 static bool
1535 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1537 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1538 struct data_reference *dr;
1539 unsigned int i;
1541 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1542 if (!vect_compute_data_ref_alignment (dr))
1543 return false;
1545 return true;
1549 /* Function vect_update_misalignment_for_peel
1551 DR - the data reference whose misalignment is to be adjusted.
1552 DR_PEEL - the data reference whose misalignment is being made
1553 zero in the vector loop by the peel.
1554 NPEEL - the number of iterations in the peel loop if the misalignment
1555 of DR_PEEL is known at compile time. */
1557 static void
1558 vect_update_misalignment_for_peel (struct data_reference *dr,
1559 struct data_reference *dr_peel, int npeel)
1561 unsigned int i;
1562 VEC(dr_p,heap) *same_align_drs;
1563 struct data_reference *current_dr;
1564 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1565 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1566 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1567 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1569 /* For interleaved data accesses the step in the loop must be multiplied by
1570 the size of the interleaving group. */
1571 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1572 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1573 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1574 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1576 /* It can be assumed that the data refs with the same alignment as dr_peel
1577 are aligned in the vector loop. */
1578 same_align_drs
1579 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1580 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1582 if (current_dr != dr)
1583 continue;
1584 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1585 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1586 SET_DR_MISALIGNMENT (dr, 0);
1587 return;
1590 if (known_alignment_for_access_p (dr)
1591 && known_alignment_for_access_p (dr_peel))
1593 int misal = DR_MISALIGNMENT (dr);
1594 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1595 misal += npeel * dr_size;
1596 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
1597 SET_DR_MISALIGNMENT (dr, misal);
1598 return;
1601 if (vect_print_dump_info (REPORT_DETAILS))
1602 fprintf (vect_dump, "Setting misalignment to -1.");
1603 SET_DR_MISALIGNMENT (dr, -1);
1607 /* Function vect_verify_datarefs_alignment
1609 Return TRUE if all data references in the loop can be
1610 handled with respect to alignment. */
1612 static bool
1613 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1615 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1616 struct data_reference *dr;
1617 enum dr_alignment_support supportable_dr_alignment;
1618 unsigned int i;
1620 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1622 gimple stmt = DR_STMT (dr);
1623 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1625 /* For interleaving, only the alignment of the first access matters. */
1626 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1627 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1628 continue;
1630 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1631 if (!supportable_dr_alignment)
1633 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1635 if (DR_IS_READ (dr))
1636 fprintf (vect_dump,
1637 "not vectorized: unsupported unaligned load.");
1638 else
1639 fprintf (vect_dump,
1640 "not vectorized: unsupported unaligned store.");
1642 return false;
1644 if (supportable_dr_alignment != dr_aligned
1645 && vect_print_dump_info (REPORT_ALIGNMENT))
1646 fprintf (vect_dump, "Vectorizing an unaligned access.");
1648 return true;
1652 /* Function vector_alignment_reachable_p
1654 Return true if vector alignment for DR is reachable by peeling
1655 a few loop iterations. Return false otherwise. */
1657 static bool
1658 vector_alignment_reachable_p (struct data_reference *dr)
1660 gimple stmt = DR_STMT (dr);
1661 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1662 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1664 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1666 /* For interleaved access we peel only if number of iterations in
1667 the prolog loop ({VF - misalignment}), is a multiple of the
1668 number of the interleaved accesses. */
1669 int elem_size, mis_in_elements;
1670 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1672 /* FORNOW: handle only known alignment. */
1673 if (!known_alignment_for_access_p (dr))
1674 return false;
1676 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1677 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1679 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1680 return false;
1683 /* If misalignment is known at the compile time then allow peeling
1684 only if natural alignment is reachable through peeling. */
1685 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1687 HOST_WIDE_INT elmsize =
1688 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1689 if (vect_print_dump_info (REPORT_DETAILS))
1691 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1692 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1694 if (DR_MISALIGNMENT (dr) % elmsize)
1696 if (vect_print_dump_info (REPORT_DETAILS))
1697 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1698 return false;
1702 if (!known_alignment_for_access_p (dr))
1704 tree type = (TREE_TYPE (DR_REF (dr)));
1705 tree ba = DR_BASE_OBJECT (dr);
1706 bool is_packed = false;
1708 if (ba)
1709 is_packed = contains_packed_reference (ba);
1711 if (vect_print_dump_info (REPORT_DETAILS))
1712 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1713 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1714 return true;
1715 else
1716 return false;
1719 return true;
1722 /* Function vect_enhance_data_refs_alignment
1724 This pass will use loop versioning and loop peeling in order to enhance
1725 the alignment of data references in the loop.
1727 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1728 original loop is to be vectorized; Any other loops that are created by
1729 the transformations performed in this pass - are not supposed to be
1730 vectorized. This restriction will be relaxed.
1732 This pass will require a cost model to guide it whether to apply peeling
1733 or versioning or a combination of the two. For example, the scheme that
1734 intel uses when given a loop with several memory accesses, is as follows:
1735 choose one memory access ('p') which alignment you want to force by doing
1736 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1737 other accesses are not necessarily aligned, or (2) use loop versioning to
1738 generate one loop in which all accesses are aligned, and another loop in
1739 which only 'p' is necessarily aligned.
1741 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1742 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1743 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1745 Devising a cost model is the most critical aspect of this work. It will
1746 guide us on which access to peel for, whether to use loop versioning, how
1747 many versions to create, etc. The cost model will probably consist of
1748 generic considerations as well as target specific considerations (on
1749 powerpc for example, misaligned stores are more painful than misaligned
1750 loads).
1752 Here are the general steps involved in alignment enhancements:
1754 -- original loop, before alignment analysis:
1755 for (i=0; i<N; i++){
1756 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1757 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1760 -- After vect_compute_data_refs_alignment:
1761 for (i=0; i<N; i++){
1762 x = q[i]; # DR_MISALIGNMENT(q) = 3
1763 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1766 -- Possibility 1: we do loop versioning:
1767 if (p is aligned) {
1768 for (i=0; i<N; i++){ # loop 1A
1769 x = q[i]; # DR_MISALIGNMENT(q) = 3
1770 p[i] = y; # DR_MISALIGNMENT(p) = 0
1773 else {
1774 for (i=0; i<N; i++){ # loop 1B
1775 x = q[i]; # DR_MISALIGNMENT(q) = 3
1776 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1780 -- Possibility 2: we do loop peeling:
1781 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1782 x = q[i];
1783 p[i] = y;
1785 for (i = 3; i < N; i++){ # loop 2A
1786 x = q[i]; # DR_MISALIGNMENT(q) = 0
1787 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1790 -- Possibility 3: combination of loop peeling and versioning:
1791 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1792 x = q[i];
1793 p[i] = y;
1795 if (p is aligned) {
1796 for (i = 3; i<N; i++){ # loop 3A
1797 x = q[i]; # DR_MISALIGNMENT(q) = 0
1798 p[i] = y; # DR_MISALIGNMENT(p) = 0
1801 else {
1802 for (i = 3; i<N; i++){ # loop 3B
1803 x = q[i]; # DR_MISALIGNMENT(q) = 0
1804 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1808 These loops are later passed to loop_transform to be vectorized. The
1809 vectorizer will use the alignment information to guide the transformation
1810 (whether to generate regular loads/stores, or with special handling for
1811 misalignment). */
1813 static bool
1814 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1816 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1817 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1818 enum dr_alignment_support supportable_dr_alignment;
1819 struct data_reference *dr0 = NULL;
1820 struct data_reference *dr;
1821 unsigned int i;
1822 bool do_peeling = false;
1823 bool do_versioning = false;
1824 bool stat;
1825 gimple stmt;
1826 stmt_vec_info stmt_info;
1827 int vect_versioning_for_alias_required;
1829 if (vect_print_dump_info (REPORT_DETAILS))
1830 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1832 /* While cost model enhancements are expected in the future, the high level
1833 view of the code at this time is as follows:
1835 A) If there is a misaligned write then see if peeling to align this write
1836 can make all data references satisfy vect_supportable_dr_alignment.
1837 If so, update data structures as needed and return true. Note that
1838 at this time vect_supportable_dr_alignment is known to return false
1839 for a misaligned write.
1841 B) If peeling wasn't possible and there is a data reference with an
1842 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1843 then see if loop versioning checks can be used to make all data
1844 references satisfy vect_supportable_dr_alignment. If so, update
1845 data structures as needed and return true.
1847 C) If neither peeling nor versioning were successful then return false if
1848 any data reference does not satisfy vect_supportable_dr_alignment.
1850 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1852 Note, Possibility 3 above (which is peeling and versioning together) is not
1853 being done at this time. */
1855 /* (1) Peeling to force alignment. */
1857 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1858 Considerations:
1859 + How many accesses will become aligned due to the peeling
1860 - How many accesses will become unaligned due to the peeling,
1861 and the cost of misaligned accesses.
1862 - The cost of peeling (the extra runtime checks, the increase
1863 in code size).
1865 The scheme we use FORNOW: peel to force the alignment of the first
1866 misaligned store in the loop.
1867 Rationale: misaligned stores are not yet supported.
1869 TODO: Use a cost model. */
1871 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1873 stmt = DR_STMT (dr);
1874 stmt_info = vinfo_for_stmt (stmt);
1876 /* For interleaving, only the alignment of the first access
1877 matters. */
1878 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1879 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1880 continue;
1882 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1884 do_peeling = vector_alignment_reachable_p (dr);
1885 if (do_peeling)
1886 dr0 = dr;
1887 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1888 fprintf (vect_dump, "vector alignment may not be reachable");
1889 break;
1893 vect_versioning_for_alias_required =
1894 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1896 /* Temporarily, if versioning for alias is required, we disable peeling
1897 until we support peeling and versioning. Often peeling for alignment
1898 will require peeling for loop-bound, which in turn requires that we
1899 know how to adjust the loop ivs after the loop. */
1900 if (vect_versioning_for_alias_required
1901 || !vect_can_advance_ivs_p (loop_vinfo)
1902 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1903 do_peeling = false;
1905 if (do_peeling)
1907 int mis;
1908 int npeel = 0;
1909 gimple stmt = DR_STMT (dr0);
1910 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1911 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1912 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1914 if (known_alignment_for_access_p (dr0))
1916 /* Since it's known at compile time, compute the number of iterations
1917 in the peeled loop (the peeling factor) for use in updating
1918 DR_MISALIGNMENT values. The peeling factor is the vectorization
1919 factor minus the misalignment as an element count. */
1920 mis = DR_MISALIGNMENT (dr0);
1921 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1922 npeel = nelements - mis;
1924 /* For interleaved data access every iteration accesses all the
1925 members of the group, therefore we divide the number of iterations
1926 by the group size. */
1927 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1928 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1929 npeel /= DR_GROUP_SIZE (stmt_info);
1931 if (vect_print_dump_info (REPORT_DETAILS))
1932 fprintf (vect_dump, "Try peeling by %d", npeel);
1935 /* Ensure that all data refs can be vectorized after the peel. */
1936 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1938 int save_misalignment;
1940 if (dr == dr0)
1941 continue;
1943 stmt = DR_STMT (dr);
1944 stmt_info = vinfo_for_stmt (stmt);
1945 /* For interleaving, only the alignment of the first access
1946 matters. */
1947 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1948 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1949 continue;
1951 save_misalignment = DR_MISALIGNMENT (dr);
1952 vect_update_misalignment_for_peel (dr, dr0, npeel);
1953 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1954 SET_DR_MISALIGNMENT (dr, save_misalignment);
1956 if (!supportable_dr_alignment)
1958 do_peeling = false;
1959 break;
1963 if (do_peeling)
1965 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1966 If the misalignment of DR_i is identical to that of dr0 then set
1967 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1968 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1969 by the peeling factor times the element size of DR_i (MOD the
1970 vectorization factor times the size). Otherwise, the
1971 misalignment of DR_i must be set to unknown. */
1972 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1973 if (dr != dr0)
1974 vect_update_misalignment_for_peel (dr, dr0, npeel);
1976 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1977 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1978 SET_DR_MISALIGNMENT (dr0, 0);
1979 if (vect_print_dump_info (REPORT_ALIGNMENT))
1980 fprintf (vect_dump, "Alignment of access forced using peeling.");
1982 if (vect_print_dump_info (REPORT_DETAILS))
1983 fprintf (vect_dump, "Peeling for alignment will be applied.");
1985 stat = vect_verify_datarefs_alignment (loop_vinfo);
1986 gcc_assert (stat);
1987 return stat;
1992 /* (2) Versioning to force alignment. */
1994 /* Try versioning if:
1995 1) flag_tree_vect_loop_version is TRUE
1996 2) optimize loop for speed
1997 3) there is at least one unsupported misaligned data ref with an unknown
1998 misalignment, and
1999 4) all misaligned data refs with a known misalignment are supported, and
2000 5) the number of runtime alignment checks is within reason. */
2002 do_versioning =
2003 flag_tree_vect_loop_version
2004 && optimize_loop_nest_for_speed_p (loop)
2005 && (!loop->inner); /* FORNOW */
2007 if (do_versioning)
2009 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2011 stmt = DR_STMT (dr);
2012 stmt_info = vinfo_for_stmt (stmt);
2014 /* For interleaving, only the alignment of the first access
2015 matters. */
2016 if (aligned_access_p (dr)
2017 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2018 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
2019 continue;
2021 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
2023 if (!supportable_dr_alignment)
2025 gimple stmt;
2026 int mask;
2027 tree vectype;
2029 if (known_alignment_for_access_p (dr)
2030 || VEC_length (gimple,
2031 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
2032 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
2034 do_versioning = false;
2035 break;
2038 stmt = DR_STMT (dr);
2039 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2040 gcc_assert (vectype);
2042 /* The rightmost bits of an aligned address must be zeros.
2043 Construct the mask needed for this test. For example,
2044 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2045 mask must be 15 = 0xf. */
2046 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2048 /* FORNOW: use the same mask to test all potentially unaligned
2049 references in the loop. The vectorizer currently supports
2050 a single vector size, see the reference to
2051 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2052 vectorization factor is computed. */
2053 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2054 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2055 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2056 VEC_safe_push (gimple, heap,
2057 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2058 DR_STMT (dr));
2062 /* Versioning requires at least one misaligned data reference. */
2063 if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2064 do_versioning = false;
2065 else if (!do_versioning)
2066 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2069 if (do_versioning)
2071 VEC(gimple,heap) *may_misalign_stmts
2072 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2073 gimple stmt;
2075 /* It can now be assumed that the data references in the statements
2076 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2077 of the loop being vectorized. */
2078 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
2080 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2081 dr = STMT_VINFO_DATA_REF (stmt_info);
2082 SET_DR_MISALIGNMENT (dr, 0);
2083 if (vect_print_dump_info (REPORT_ALIGNMENT))
2084 fprintf (vect_dump, "Alignment of access forced using versioning.");
2087 if (vect_print_dump_info (REPORT_DETAILS))
2088 fprintf (vect_dump, "Versioning for alignment will be applied.");
2090 /* Peeling and versioning can't be done together at this time. */
2091 gcc_assert (! (do_peeling && do_versioning));
2093 stat = vect_verify_datarefs_alignment (loop_vinfo);
2094 gcc_assert (stat);
2095 return stat;
2098 /* This point is reached if neither peeling nor versioning is being done. */
2099 gcc_assert (! (do_peeling || do_versioning));
2101 stat = vect_verify_datarefs_alignment (loop_vinfo);
2102 return stat;
2106 /* Function vect_analyze_data_refs_alignment
2108 Analyze the alignment of the data-references in the loop.
2109 Return FALSE if a data reference is found that cannot be vectorized. */
2111 static bool
2112 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2114 if (vect_print_dump_info (REPORT_DETAILS))
2115 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2117 if (!vect_compute_data_refs_alignment (loop_vinfo))
2119 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2120 fprintf (vect_dump,
2121 "not vectorized: can't calculate alignment for data ref.");
2122 return false;
2125 return true;
2129 /* Analyze groups of strided accesses: check that DR belongs to a group of
2130 strided accesses of legal size, step, etc. Detect gaps, single element
2131 interleaving, and other special cases. Set strided access info.
2132 Collect groups of strided stores for further use in SLP analysis. */
2134 static bool
2135 vect_analyze_group_access (struct data_reference *dr)
2137 tree step = DR_STEP (dr);
2138 tree scalar_type = TREE_TYPE (DR_REF (dr));
2139 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2140 gimple stmt = DR_STMT (dr);
2141 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2142 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2143 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2144 HOST_WIDE_INT stride;
2145 bool slp_impossible = false;
2147 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2148 interleaving group (including gaps). */
2149 stride = dr_step / type_size;
2151 /* Not consecutive access is possible only if it is a part of interleaving. */
2152 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2154 /* Check if it this DR is a part of interleaving, and is a single
2155 element of the group that is accessed in the loop. */
2157 /* Gaps are supported only for loads. STEP must be a multiple of the type
2158 size. The size of the group must be a power of 2. */
2159 if (DR_IS_READ (dr)
2160 && (dr_step % type_size) == 0
2161 && stride > 0
2162 && exact_log2 (stride) != -1)
2164 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2165 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2166 if (vect_print_dump_info (REPORT_DR_DETAILS))
2168 fprintf (vect_dump, "Detected single element interleaving %d ",
2169 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2170 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2171 fprintf (vect_dump, " step ");
2172 print_generic_expr (vect_dump, step, TDF_SLIM);
2174 return true;
2176 if (vect_print_dump_info (REPORT_DETAILS))
2177 fprintf (vect_dump, "not consecutive access");
2178 return false;
2181 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2183 /* First stmt in the interleaving chain. Check the chain. */
2184 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2185 struct data_reference *data_ref = dr;
2186 unsigned int count = 1;
2187 tree next_step;
2188 tree prev_init = DR_INIT (data_ref);
2189 gimple prev = stmt;
2190 HOST_WIDE_INT diff, count_in_bytes;
2192 while (next)
2194 /* Skip same data-refs. In case that two or more stmts share data-ref
2195 (supported only for loads), we vectorize only the first stmt, and
2196 the rest get their vectorized loads from the first one. */
2197 if (!tree_int_cst_compare (DR_INIT (data_ref),
2198 DR_INIT (STMT_VINFO_DATA_REF (
2199 vinfo_for_stmt (next)))))
2201 if (!DR_IS_READ (data_ref))
2203 if (vect_print_dump_info (REPORT_DETAILS))
2204 fprintf (vect_dump, "Two store stmts share the same dr.");
2205 return false;
2208 /* Check that there is no load-store dependencies for this loads
2209 to prevent a case of load-store-load to the same location. */
2210 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2211 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2213 if (vect_print_dump_info (REPORT_DETAILS))
2214 fprintf (vect_dump,
2215 "READ_WRITE dependence in interleaving.");
2216 return false;
2219 /* For load use the same data-ref load. */
2220 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2222 prev = next;
2223 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2224 continue;
2226 prev = next;
2228 /* Check that all the accesses have the same STEP. */
2229 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2230 if (tree_int_cst_compare (step, next_step))
2232 if (vect_print_dump_info (REPORT_DETAILS))
2233 fprintf (vect_dump, "not consecutive access in interleaving");
2234 return false;
2237 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2238 /* Check that the distance between two accesses is equal to the type
2239 size. Otherwise, we have gaps. */
2240 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2241 - TREE_INT_CST_LOW (prev_init)) / type_size;
2242 if (diff != 1)
2244 /* FORNOW: SLP of accesses with gaps is not supported. */
2245 slp_impossible = true;
2246 if (!DR_IS_READ (data_ref))
2248 if (vect_print_dump_info (REPORT_DETAILS))
2249 fprintf (vect_dump, "interleaved store with gaps");
2250 return false;
2254 /* Store the gap from the previous member of the group. If there is no
2255 gap in the access, DR_GROUP_GAP is always 1. */
2256 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2258 prev_init = DR_INIT (data_ref);
2259 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2260 /* Count the number of data-refs in the chain. */
2261 count++;
2264 /* COUNT is the number of accesses found, we multiply it by the size of
2265 the type to get COUNT_IN_BYTES. */
2266 count_in_bytes = type_size * count;
2268 /* Check that the size of the interleaving is not greater than STEP. */
2269 if (dr_step < count_in_bytes)
2271 if (vect_print_dump_info (REPORT_DETAILS))
2273 fprintf (vect_dump, "interleaving size is greater than step for ");
2274 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2276 return false;
2279 /* Check that the size of the interleaving is equal to STEP for stores,
2280 i.e., that there are no gaps. */
2281 if (dr_step != count_in_bytes)
2283 if (DR_IS_READ (dr))
2285 slp_impossible = true;
2286 /* There is a gap after the last load in the group. This gap is a
2287 difference between the stride and the number of elements. When
2288 there is no gap, this difference should be 0. */
2289 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2291 else
2293 if (vect_print_dump_info (REPORT_DETAILS))
2294 fprintf (vect_dump, "interleaved store with gaps");
2295 return false;
2299 /* Check that STEP is a multiple of type size. */
2300 if ((dr_step % type_size) != 0)
2302 if (vect_print_dump_info (REPORT_DETAILS))
2304 fprintf (vect_dump, "step is not a multiple of type size: step ");
2305 print_generic_expr (vect_dump, step, TDF_SLIM);
2306 fprintf (vect_dump, " size ");
2307 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2308 TDF_SLIM);
2310 return false;
2313 /* FORNOW: we handle only interleaving that is a power of 2.
2314 We don't fail here if it may be still possible to vectorize the
2315 group using SLP. If not, the size of the group will be checked in
2316 vect_analyze_operations, and the vectorization will fail. */
2317 if (exact_log2 (stride) == -1)
2319 if (vect_print_dump_info (REPORT_DETAILS))
2320 fprintf (vect_dump, "interleaving is not a power of 2");
2322 if (slp_impossible)
2323 return false;
2325 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2326 if (vect_print_dump_info (REPORT_DETAILS))
2327 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2329 /* SLP: create an SLP data structure for every interleaving group of
2330 stores for further analysis in vect_analyse_slp. */
2331 if (!DR_IS_READ (dr) && !slp_impossible)
2332 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2335 return true;
2339 /* Analyze the access pattern of the data-reference DR.
2340 In case of non-consecutive accesses call vect_analyze_group_access() to
2341 analyze groups of strided accesses. */
2343 static bool
2344 vect_analyze_data_ref_access (struct data_reference *dr)
2346 tree step = DR_STEP (dr);
2347 tree scalar_type = TREE_TYPE (DR_REF (dr));
2348 gimple stmt = DR_STMT (dr);
2349 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2350 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2351 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2352 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2354 if (!step)
2356 if (vect_print_dump_info (REPORT_DETAILS))
2357 fprintf (vect_dump, "bad data-ref access");
2358 return false;
2361 /* Don't allow invariant accesses. */
2362 if (dr_step == 0)
2363 return false;
2365 if (nested_in_vect_loop_p (loop, stmt))
2367 /* Interleaved accesses are not yet supported within outer-loop
2368 vectorization for references in the inner-loop. */
2369 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2371 /* For the rest of the analysis we use the outer-loop step. */
2372 step = STMT_VINFO_DR_STEP (stmt_info);
2373 dr_step = TREE_INT_CST_LOW (step);
2375 if (dr_step == 0)
2377 if (vect_print_dump_info (REPORT_ALIGNMENT))
2378 fprintf (vect_dump, "zero step in outer loop.");
2379 if (DR_IS_READ (dr))
2380 return true;
2381 else
2382 return false;
2386 /* Consecutive? */
2387 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2389 /* Mark that it is not interleaving. */
2390 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2391 return true;
2394 if (nested_in_vect_loop_p (loop, stmt))
2396 if (vect_print_dump_info (REPORT_ALIGNMENT))
2397 fprintf (vect_dump, "strided access in outer loop.");
2398 return false;
2401 /* Not consecutive access - check if it's a part of interleaving group. */
2402 return vect_analyze_group_access (dr);
2406 /* Function vect_analyze_data_ref_accesses.
2408 Analyze the access pattern of all the data references in the loop.
2410 FORNOW: the only access pattern that is considered vectorizable is a
2411 simple step 1 (consecutive) access.
2413 FORNOW: handle only arrays and pointer accesses. */
2415 static bool
2416 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2418 unsigned int i;
2419 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2420 struct data_reference *dr;
2422 if (vect_print_dump_info (REPORT_DETAILS))
2423 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2425 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2426 if (!vect_analyze_data_ref_access (dr))
2428 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2429 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2430 return false;
2433 return true;
2436 /* Function vect_prune_runtime_alias_test_list.
2438 Prune a list of ddrs to be tested at run-time by versioning for alias.
2439 Return FALSE if resulting list of ddrs is longer then allowed by
2440 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2442 static bool
2443 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2445 VEC (ddr_p, heap) * ddrs =
2446 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2447 unsigned i, j;
2449 if (vect_print_dump_info (REPORT_DETAILS))
2450 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2452 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2454 bool found;
2455 ddr_p ddr_i;
2457 ddr_i = VEC_index (ddr_p, ddrs, i);
2458 found = false;
2460 for (j = 0; j < i; j++)
2462 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2464 if (vect_vfa_range_equal (ddr_i, ddr_j))
2466 if (vect_print_dump_info (REPORT_DR_DETAILS))
2468 fprintf (vect_dump, "found equal ranges ");
2469 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2470 fprintf (vect_dump, ", ");
2471 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2472 fprintf (vect_dump, " and ");
2473 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2474 fprintf (vect_dump, ", ");
2475 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2477 found = true;
2478 break;
2482 if (found)
2484 VEC_ordered_remove (ddr_p, ddrs, i);
2485 continue;
2487 i++;
2490 if (VEC_length (ddr_p, ddrs) >
2491 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2493 if (vect_print_dump_info (REPORT_DR_DETAILS))
2495 fprintf (vect_dump,
2496 "disable versioning for alias - max number of generated "
2497 "checks exceeded.");
2500 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2502 return false;
2505 return true;
2508 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2510 static void
2511 vect_free_slp_tree (slp_tree node)
2513 if (!node)
2514 return;
2516 if (SLP_TREE_LEFT (node))
2517 vect_free_slp_tree (SLP_TREE_LEFT (node));
2519 if (SLP_TREE_RIGHT (node))
2520 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2522 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
2524 if (SLP_TREE_VEC_STMTS (node))
2525 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
2527 free (node);
2531 /* Free the memory allocated for the SLP instance. */
2533 void
2534 vect_free_slp_instance (slp_instance instance)
2536 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
2537 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
2538 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
2542 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
2543 they are of a legal type and that they match the defs of the first stmt of
2544 the SLP group (stored in FIRST_STMT_...). */
2546 static bool
2547 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2548 gimple stmt, VEC (gimple, heap) **def_stmts0,
2549 VEC (gimple, heap) **def_stmts1,
2550 enum vect_def_type *first_stmt_dt0,
2551 enum vect_def_type *first_stmt_dt1,
2552 tree *first_stmt_def0_type,
2553 tree *first_stmt_def1_type,
2554 tree *first_stmt_const_oprnd,
2555 int ncopies_for_cost,
2556 bool *pattern0, bool *pattern1)
2558 tree oprnd;
2559 unsigned int i, number_of_oprnds;
2560 tree def;
2561 gimple def_stmt;
2562 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2563 stmt_vec_info stmt_info =
2564 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2565 enum gimple_rhs_class rhs_class;
2566 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2568 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
2569 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
2571 for (i = 0; i < number_of_oprnds; i++)
2573 oprnd = gimple_op (stmt, i + 1);
2575 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2576 || (!def_stmt && dt[i] != vect_constant_def))
2578 if (vect_print_dump_info (REPORT_SLP))
2580 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2581 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2584 return false;
2587 /* Check if DEF_STMT is a part of a pattern and get the def stmt from
2588 the pattern. Check that all the stmts of the node are in the
2589 pattern. */
2590 if (def_stmt && gimple_bb (def_stmt)
2591 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2592 && vinfo_for_stmt (def_stmt)
2593 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
2595 if (!*first_stmt_dt0)
2596 *pattern0 = true;
2597 else
2599 if (i == 1 && !*first_stmt_dt1)
2600 *pattern1 = true;
2601 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
2603 if (vect_print_dump_info (REPORT_DETAILS))
2605 fprintf (vect_dump, "Build SLP failed: some of the stmts"
2606 " are in a pattern, and others are not ");
2607 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2610 return false;
2614 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
2615 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
2617 if (*dt == vect_unknown_def_type)
2619 if (vect_print_dump_info (REPORT_DETAILS))
2620 fprintf (vect_dump, "Unsupported pattern.");
2621 return false;
2624 switch (gimple_code (def_stmt))
2626 case GIMPLE_PHI:
2627 def = gimple_phi_result (def_stmt);
2628 break;
2630 case GIMPLE_ASSIGN:
2631 def = gimple_assign_lhs (def_stmt);
2632 break;
2634 default:
2635 if (vect_print_dump_info (REPORT_DETAILS))
2636 fprintf (vect_dump, "unsupported defining stmt: ");
2637 return false;
2641 if (!*first_stmt_dt0)
2643 /* op0 of the first stmt of the group - store its info. */
2644 *first_stmt_dt0 = dt[i];
2645 if (def)
2646 *first_stmt_def0_type = TREE_TYPE (def);
2647 else
2648 *first_stmt_const_oprnd = oprnd;
2650 /* Analyze costs (for the first stmt of the group only). */
2651 if (rhs_class != GIMPLE_SINGLE_RHS)
2652 /* Not memory operation (we don't call this functions for loads). */
2653 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2654 else
2655 /* Store. */
2656 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2659 else
2661 if (!*first_stmt_dt1 && i == 1)
2663 /* op1 of the first stmt of the group - store its info. */
2664 *first_stmt_dt1 = dt[i];
2665 if (def)
2666 *first_stmt_def1_type = TREE_TYPE (def);
2667 else
2669 /* We assume that the stmt contains only one constant
2670 operand. We fail otherwise, to be on the safe side. */
2671 if (*first_stmt_const_oprnd)
2673 if (vect_print_dump_info (REPORT_SLP))
2674 fprintf (vect_dump, "Build SLP failed: two constant "
2675 "oprnds in stmt");
2676 return false;
2678 *first_stmt_const_oprnd = oprnd;
2681 else
2683 /* Not first stmt of the group, check that the def-stmt/s match
2684 the def-stmt/s of the first stmt. */
2685 if ((i == 0
2686 && (*first_stmt_dt0 != dt[i]
2687 || (*first_stmt_def0_type && def
2688 && *first_stmt_def0_type != TREE_TYPE (def))))
2689 || (i == 1
2690 && (*first_stmt_dt1 != dt[i]
2691 || (*first_stmt_def1_type && def
2692 && *first_stmt_def1_type != TREE_TYPE (def))))
2693 || (!def
2694 && TREE_TYPE (*first_stmt_const_oprnd)
2695 != TREE_TYPE (oprnd)))
2697 if (vect_print_dump_info (REPORT_SLP))
2698 fprintf (vect_dump, "Build SLP failed: different types ");
2700 return false;
2705 /* Check the types of the definitions. */
2706 switch (dt[i])
2708 case vect_constant_def:
2709 case vect_invariant_def:
2710 break;
2712 case vect_loop_def:
2713 if (i == 0)
2714 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
2715 else
2716 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
2717 break;
2719 default:
2720 /* FORNOW: Not supported. */
2721 if (vect_print_dump_info (REPORT_SLP))
2723 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2724 print_generic_expr (vect_dump, def, TDF_SLIM);
2727 return false;
2731 return true;
2735 /* Recursively build an SLP tree starting from NODE.
2736 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2737 permutation or are of unsupported types of operation. Otherwise, return
2738 TRUE. */
2740 static bool
2741 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2742 unsigned int group_size,
2743 int *inside_cost, int *outside_cost,
2744 int ncopies_for_cost, unsigned int *max_nunits,
2745 VEC (int, heap) **load_permutation,
2746 VEC (slp_tree, heap) **loads)
2748 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
2749 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
2750 unsigned int i;
2751 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2752 gimple stmt = VEC_index (gimple, stmts, 0);
2753 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2754 enum tree_code first_stmt_code = 0, rhs_code;
2755 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2756 tree lhs;
2757 bool stop_recursion = false, need_same_oprnds = false;
2758 tree vectype, scalar_type, first_op1 = NULL_TREE;
2759 unsigned int vectorization_factor = 0, ncopies;
2760 optab optab;
2761 int icode;
2762 enum machine_mode optab_op2_mode;
2763 enum machine_mode vec_mode;
2764 tree first_stmt_const_oprnd = NULL_TREE;
2765 struct data_reference *first_dr;
2766 bool pattern0 = false, pattern1 = false;
2767 HOST_WIDE_INT dummy;
2768 bool permutation = false;
2769 unsigned int load_place;
2770 gimple first_load;
2772 /* For every stmt in NODE find its def stmt/s. */
2773 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
2775 if (vect_print_dump_info (REPORT_SLP))
2777 fprintf (vect_dump, "Build SLP for ");
2778 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2781 lhs = gimple_get_lhs (stmt);
2782 if (lhs == NULL_TREE)
2784 if (vect_print_dump_info (REPORT_SLP))
2786 fprintf (vect_dump,
2787 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
2788 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2791 return false;
2794 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
2795 vectype = get_vectype_for_scalar_type (scalar_type);
2796 if (!vectype)
2798 if (vect_print_dump_info (REPORT_SLP))
2800 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
2801 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2803 return false;
2806 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2807 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2808 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2809 if (ncopies > 1 && vect_print_dump_info (REPORT_SLP))
2810 fprintf (vect_dump, "SLP with multiple types ");
2812 /* In case of multiple types we need to detect the smallest type. */
2813 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
2814 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
2816 if (is_gimple_call (stmt))
2817 rhs_code = CALL_EXPR;
2818 else
2819 rhs_code = gimple_assign_rhs_code (stmt);
2821 /* Check the operation. */
2822 if (i == 0)
2824 first_stmt_code = rhs_code;
2826 /* Shift arguments should be equal in all the packed stmts for a
2827 vector shift with scalar shift operand. */
2828 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
2829 || rhs_code == LROTATE_EXPR
2830 || rhs_code == RROTATE_EXPR)
2832 vec_mode = TYPE_MODE (vectype);
2834 /* First see if we have a vector/vector shift. */
2835 optab = optab_for_tree_code (rhs_code, vectype,
2836 optab_vector);
2838 if (!optab
2839 || (optab->handlers[(int) vec_mode].insn_code
2840 == CODE_FOR_nothing))
2842 /* No vector/vector shift, try for a vector/scalar shift. */
2843 optab = optab_for_tree_code (rhs_code, vectype,
2844 optab_scalar);
2846 if (!optab)
2848 if (vect_print_dump_info (REPORT_SLP))
2849 fprintf (vect_dump, "Build SLP failed: no optab.");
2850 return false;
2852 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2853 if (icode == CODE_FOR_nothing)
2855 if (vect_print_dump_info (REPORT_SLP))
2856 fprintf (vect_dump, "Build SLP failed: "
2857 "op not supported by target.");
2858 return false;
2860 optab_op2_mode = insn_data[icode].operand[2].mode;
2861 if (!VECTOR_MODE_P (optab_op2_mode))
2863 need_same_oprnds = true;
2864 first_op1 = gimple_assign_rhs2 (stmt);
2869 else
2871 if (first_stmt_code != rhs_code
2872 && (first_stmt_code != IMAGPART_EXPR
2873 || rhs_code != REALPART_EXPR)
2874 && (first_stmt_code != REALPART_EXPR
2875 || rhs_code != IMAGPART_EXPR))
2877 if (vect_print_dump_info (REPORT_SLP))
2879 fprintf (vect_dump,
2880 "Build SLP failed: different operation in stmt ");
2881 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2884 return false;
2887 if (need_same_oprnds
2888 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
2890 if (vect_print_dump_info (REPORT_SLP))
2892 fprintf (vect_dump,
2893 "Build SLP failed: different shift arguments in ");
2894 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2897 return false;
2901 /* Strided store or load. */
2902 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2904 if (REFERENCE_CLASS_P (lhs))
2906 /* Store. */
2907 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
2908 &def_stmts0, &def_stmts1,
2909 &first_stmt_dt0,
2910 &first_stmt_dt1,
2911 &first_stmt_def0_type,
2912 &first_stmt_def1_type,
2913 &first_stmt_const_oprnd,
2914 ncopies_for_cost,
2915 &pattern0, &pattern1))
2916 return false;
2918 else
2920 /* Load. */
2921 /* FORNOW: Check that there is no gap between the loads. */
2922 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
2923 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
2924 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
2925 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
2927 if (vect_print_dump_info (REPORT_SLP))
2929 fprintf (vect_dump, "Build SLP failed: strided "
2930 "loads have gaps ");
2931 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2934 return false;
2937 /* Check that the size of interleaved loads group is not
2938 greater than the SLP group size. */
2939 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt))
2940 > ncopies * group_size)
2942 if (vect_print_dump_info (REPORT_SLP))
2944 fprintf (vect_dump, "Build SLP failed: the number of "
2945 "interleaved loads is greater than"
2946 " the SLP group size ");
2947 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2950 return false;
2953 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
2955 if (first_load == stmt)
2957 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2958 if (vect_supportable_dr_alignment (first_dr)
2959 == dr_unaligned_unsupported)
2961 if (vect_print_dump_info (REPORT_SLP))
2963 fprintf (vect_dump, "Build SLP failed: unsupported "
2964 "unaligned load ");
2965 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2968 return false;
2971 /* Analyze costs (for the first stmt in the group). */
2972 vect_model_load_cost (vinfo_for_stmt (stmt),
2973 ncopies_for_cost, *node);
2976 /* Store the place of this load in the interleaving chain. In
2977 case that permutation is needed we later decide if a specific
2978 permutation is supported. */
2979 load_place = vect_get_place_in_interleaving_chain (stmt,
2980 first_load);
2981 if (load_place != i)
2982 permutation = true;
2984 VEC_safe_push (int, heap, *load_permutation, load_place);
2986 /* We stop the tree when we reach a group of loads. */
2987 stop_recursion = true;
2988 continue;
2990 } /* Strided access. */
2991 else
2993 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
2995 /* Not strided load. */
2996 if (vect_print_dump_info (REPORT_SLP))
2998 fprintf (vect_dump, "Build SLP failed: not strided load ");
2999 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3002 /* FORNOW: Not strided loads are not supported. */
3003 return false;
3006 /* Not memory operation. */
3007 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
3008 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
3010 if (vect_print_dump_info (REPORT_SLP))
3012 fprintf (vect_dump, "Build SLP failed: operation");
3013 fprintf (vect_dump, " unsupported ");
3014 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3017 return false;
3020 /* Find the def-stmts. */
3021 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
3022 &def_stmts0, &def_stmts1,
3023 &first_stmt_dt0, &first_stmt_dt1,
3024 &first_stmt_def0_type,
3025 &first_stmt_def1_type,
3026 &first_stmt_const_oprnd,
3027 ncopies_for_cost,
3028 &pattern0, &pattern1))
3029 return false;
3033 /* Add the costs of the node to the overall instance costs. */
3034 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
3035 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
3037 /* Strided loads were reached - stop the recursion. */
3038 if (stop_recursion)
3040 if (permutation)
3042 VEC_safe_push (slp_tree, heap, *loads, *node);
3043 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
3046 return true;
3049 /* Create SLP_TREE nodes for the definition node/s. */
3050 if (first_stmt_dt0 == vect_loop_def)
3052 slp_tree left_node = XNEW (struct _slp_tree);
3053 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
3054 SLP_TREE_VEC_STMTS (left_node) = NULL;
3055 SLP_TREE_LEFT (left_node) = NULL;
3056 SLP_TREE_RIGHT (left_node) = NULL;
3057 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
3058 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
3059 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
3060 inside_cost, outside_cost, ncopies_for_cost,
3061 max_nunits, load_permutation, loads))
3062 return false;
3064 SLP_TREE_LEFT (*node) = left_node;
3067 if (first_stmt_dt1 == vect_loop_def)
3069 slp_tree right_node = XNEW (struct _slp_tree);
3070 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
3071 SLP_TREE_VEC_STMTS (right_node) = NULL;
3072 SLP_TREE_LEFT (right_node) = NULL;
3073 SLP_TREE_RIGHT (right_node) = NULL;
3074 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
3075 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
3076 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
3077 inside_cost, outside_cost, ncopies_for_cost,
3078 max_nunits, load_permutation, loads))
3079 return false;
3081 SLP_TREE_RIGHT (*node) = right_node;
3084 return true;
3088 static void
3089 vect_print_slp_tree (slp_tree node)
3091 int i;
3092 gimple stmt;
3094 if (!node)
3095 return;
3097 fprintf (vect_dump, "node ");
3098 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3100 fprintf (vect_dump, "\n\tstmt %d ", i);
3101 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3103 fprintf (vect_dump, "\n");
3105 vect_print_slp_tree (SLP_TREE_LEFT (node));
3106 vect_print_slp_tree (SLP_TREE_RIGHT (node));
3110 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
3111 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
3112 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
3113 stmts in NODE are to be marked. */
3115 static void
3116 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
3118 int i;
3119 gimple stmt;
3121 if (!node)
3122 return;
3124 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3125 if (j < 0 || i == j)
3126 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
3128 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
3129 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
3133 /* Check if the permutation required by the SLP INSTANCE is supported.
3134 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
3136 static bool
3137 vect_supported_slp_permutation_p (slp_instance instance)
3139 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
3140 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
3141 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
3142 VEC (slp_tree, heap) *sorted_loads = NULL;
3143 int index;
3144 slp_tree *tmp_loads = NULL;
3145 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
3146 slp_tree load;
3148 /* FORNOW: The only supported loads permutation is loads from the same
3149 location in all the loads in the node, when the data-refs in
3150 nodes of LOADS constitute an interleaving chain.
3151 Sort the nodes according to the order of accesses in the chain. */
3152 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
3153 for (i = 0, j = 0;
3154 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
3155 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
3156 i += group_size, j++)
3158 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
3159 /* Check that the loads are all in the same interleaving chain. */
3160 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
3162 if (vect_print_dump_info (REPORT_DETAILS))
3164 fprintf (vect_dump, "Build SLP failed: unsupported data "
3165 "permutation ");
3166 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
3169 free (tmp_loads);
3170 return false;
3173 tmp_loads[index] = load;
3176 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
3177 for (i = 0; i < group_size; i++)
3178 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
3180 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
3181 SLP_INSTANCE_LOADS (instance) = sorted_loads;
3182 free (tmp_loads);
3184 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
3185 SLP_INSTANCE_UNROLLING_FACTOR (instance),
3186 instance, true))
3187 return false;
3189 return true;
3193 /* Check if the required load permutation is supported.
3194 LOAD_PERMUTATION contains a list of indices of the loads.
3195 In SLP this permutation is relative to the order of strided stores that are
3196 the base of the SLP instance. */
3198 static bool
3199 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
3200 VEC (int, heap) *load_permutation)
3202 int i = 0, j, prev = -1, next, k;
3203 bool supported;
3205 /* FORNOW: permutations are only supported for loop-aware SLP. */
3206 if (!slp_instn)
3207 return false;
3209 if (vect_print_dump_info (REPORT_SLP))
3211 fprintf (vect_dump, "Load permutation ");
3212 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
3213 fprintf (vect_dump, "%d ", next);
3216 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
3217 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
3218 well. */
3219 if (VEC_length (int, load_permutation)
3220 != (unsigned int) (group_size * group_size))
3221 return false;
3223 supported = true;
3224 for (j = 0; j < group_size; j++)
3226 for (i = j * group_size, k = 0;
3227 VEC_iterate (int, load_permutation, i, next) && k < group_size;
3228 i++, k++)
3230 if (i != j * group_size && next != prev)
3232 supported = false;
3233 break;
3236 prev = next;
3240 if (supported && i == group_size * group_size
3241 && vect_supported_slp_permutation_p (slp_instn))
3242 return true;
3244 return false;
3248 /* Find the first load in the loop that belongs to INSTANCE.
3249 When loads are in several SLP nodes, there can be a case in which the first
3250 load does not appear in the first SLP node to be transformed, causing
3251 incorrect order of statements. Since we generate all the loads together,
3252 they must be inserted before the first load of the SLP instance and not
3253 before the first load of the first node of the instance. */
3254 static gimple
3255 vect_find_first_load_in_slp_instance (slp_instance instance)
3257 int i, j;
3258 slp_tree load_node;
3259 gimple first_load = NULL, load;
3261 for (i = 0;
3262 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
3263 i++)
3264 for (j = 0;
3265 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
3266 j++)
3267 first_load = get_earlier_stmt (load, first_load);
3269 return first_load;
3273 /* Analyze an SLP instance starting from a group of strided stores. Call
3274 vect_build_slp_tree to build a tree of packed stmts if possible.
3275 Return FALSE if it's impossible to SLP any stmt in the loop. */
3277 static bool
3278 vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt)
3280 slp_instance new_instance;
3281 slp_tree node = XNEW (struct _slp_tree);
3282 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
3283 unsigned int unrolling_factor = 1, nunits;
3284 tree vectype, scalar_type;
3285 gimple next;
3286 unsigned int vectorization_factor = 0, ncopies;
3287 bool slp_impossible = false;
3288 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
3289 unsigned int max_nunits = 0;
3290 VEC (int, heap) *load_permutation;
3291 VEC (slp_tree, heap) *loads;
3293 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
3294 vinfo_for_stmt (stmt))));
3295 vectype = get_vectype_for_scalar_type (scalar_type);
3296 if (!vectype)
3298 if (vect_print_dump_info (REPORT_SLP))
3300 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
3301 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3303 return false;
3306 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3307 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3308 ncopies = vectorization_factor / nunits;
3310 /* Create a node (a root of the SLP tree) for the packed strided stores. */
3311 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
3312 next = stmt;
3313 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
3314 while (next)
3316 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
3317 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
3320 SLP_TREE_VEC_STMTS (node) = NULL;
3321 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
3322 SLP_TREE_LEFT (node) = NULL;
3323 SLP_TREE_RIGHT (node) = NULL;
3324 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
3325 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
3327 /* Calculate the unrolling factor. */
3328 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
3330 /* Calculate the number of vector stmts to create based on the unrolling
3331 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
3332 GROUP_SIZE / NUNITS otherwise. */
3333 ncopies_for_cost = unrolling_factor * group_size / nunits;
3335 load_permutation = VEC_alloc (int, heap, group_size * group_size);
3336 loads = VEC_alloc (slp_tree, heap, group_size);
3338 /* Build the tree for the SLP instance. */
3339 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &inside_cost,
3340 &outside_cost, ncopies_for_cost, &max_nunits,
3341 &load_permutation, &loads))
3343 /* Create a new SLP instance. */
3344 new_instance = XNEW (struct _slp_instance);
3345 SLP_INSTANCE_TREE (new_instance) = node;
3346 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
3347 /* Calculate the unrolling factor based on the smallest type in the
3348 loop. */
3349 if (max_nunits > nunits)
3350 unrolling_factor = least_common_multiple (max_nunits, group_size)
3351 / group_size;
3353 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
3354 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
3355 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
3356 SLP_INSTANCE_LOADS (new_instance) = loads;
3357 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
3358 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
3359 if (VEC_length (slp_tree, loads))
3361 if (!vect_supported_load_permutation_p (new_instance, group_size,
3362 load_permutation))
3364 if (vect_print_dump_info (REPORT_SLP))
3366 fprintf (vect_dump, "Build SLP failed: unsupported load "
3367 "permutation ");
3368 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3371 vect_free_slp_instance (new_instance);
3372 return false;
3375 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
3376 = vect_find_first_load_in_slp_instance (new_instance);
3378 else
3379 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
3381 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
3382 new_instance);
3383 if (vect_print_dump_info (REPORT_SLP))
3384 vect_print_slp_tree (node);
3386 return true;
3389 /* Failed to SLP. */
3390 /* Free the allocated memory. */
3391 vect_free_slp_tree (node);
3392 VEC_free (int, heap, load_permutation);
3393 VEC_free (slp_tree, heap, loads);
3395 if (slp_impossible)
3396 return false;
3398 /* SLP failed for this instance, but it is still possible to SLP other stmts
3399 in the loop. */
3400 return true;
3404 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3405 trees of packed scalar stmts if SLP is possible. */
3407 static bool
3408 vect_analyze_slp (loop_vec_info loop_vinfo)
3410 unsigned int i;
3411 VEC (gimple, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
3412 gimple store;
3414 if (vect_print_dump_info (REPORT_SLP))
3415 fprintf (vect_dump, "=== vect_analyze_slp ===");
3417 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
3418 if (!vect_analyze_slp_instance (loop_vinfo, store))
3420 /* SLP failed. No instance can be SLPed in the loop. */
3421 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3422 fprintf (vect_dump, "SLP failed.");
3424 return false;
3427 return true;
3431 /* For each possible SLP instance decide whether to SLP it and calculate overall
3432 unrolling factor needed to SLP the loop. */
3434 static void
3435 vect_make_slp_decision (loop_vec_info loop_vinfo)
3437 unsigned int i, unrolling_factor = 1;
3438 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3439 slp_instance instance;
3440 int decided_to_slp = 0;
3442 if (vect_print_dump_info (REPORT_SLP))
3443 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3445 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3447 /* FORNOW: SLP if you can. */
3448 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3449 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3451 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3452 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3453 loop-based vectorization. Such stmts will be marked as HYBRID. */
3454 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3455 decided_to_slp++;
3458 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3460 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3461 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3462 decided_to_slp, unrolling_factor);
3466 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3467 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3469 static void
3470 vect_detect_hybrid_slp_stmts (slp_tree node)
3472 int i;
3473 gimple stmt;
3474 imm_use_iterator imm_iter;
3475 gimple use_stmt;
3477 if (!node)
3478 return;
3480 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3481 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3482 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
3483 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
3484 if (vinfo_for_stmt (use_stmt)
3485 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt))
3486 && STMT_VINFO_RELEVANT (vinfo_for_stmt (use_stmt)))
3487 vect_mark_slp_stmts (node, hybrid, i);
3489 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3490 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3494 /* Find stmts that must be both vectorized and SLPed. */
3496 static void
3497 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3499 unsigned int i;
3500 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3501 slp_instance instance;
3503 if (vect_print_dump_info (REPORT_SLP))
3504 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3506 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3507 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3511 /* Function vect_analyze_data_refs.
3513 Find all the data references in the loop.
3515 The general structure of the analysis of data refs in the vectorizer is as
3516 follows:
3517 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3518 find and analyze all data-refs in the loop and their dependences.
3519 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3520 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3521 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3525 static bool
3526 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3528 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3529 unsigned int i;
3530 VEC (data_reference_p, heap) *datarefs;
3531 struct data_reference *dr;
3532 tree scalar_type;
3534 if (vect_print_dump_info (REPORT_DETAILS))
3535 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3537 compute_data_dependences_for_loop (loop, true,
3538 &LOOP_VINFO_DATAREFS (loop_vinfo),
3539 &LOOP_VINFO_DDRS (loop_vinfo));
3541 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3542 from stmt_vec_info struct to DR and vectype. */
3543 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3545 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3547 gimple stmt;
3548 stmt_vec_info stmt_info;
3549 basic_block bb;
3550 tree base, offset, init;
3552 if (!dr || !DR_REF (dr))
3554 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3555 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3556 return false;
3559 stmt = DR_STMT (dr);
3560 stmt_info = vinfo_for_stmt (stmt);
3562 /* Check that analysis of the data-ref succeeded. */
3563 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3564 || !DR_STEP (dr))
3566 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3568 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3569 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3571 return false;
3574 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3576 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3577 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3578 "constant");
3579 return false;
3582 if (!DR_SYMBOL_TAG (dr))
3584 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3586 fprintf (vect_dump, "not vectorized: no memory tag for ");
3587 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3589 return false;
3592 base = unshare_expr (DR_BASE_ADDRESS (dr));
3593 offset = unshare_expr (DR_OFFSET (dr));
3594 init = unshare_expr (DR_INIT (dr));
3596 /* Update DR field in stmt_vec_info struct. */
3597 bb = gimple_bb (stmt);
3599 /* If the dataref is in an inner-loop of the loop that is considered for
3600 for vectorization, we also want to analyze the access relative to
3601 the outer-loop (DR contains information only relative to the
3602 inner-most enclosing loop). We do that by building a reference to the
3603 first location accessed by the inner-loop, and analyze it relative to
3604 the outer-loop. */
3605 if (nested_in_vect_loop_p (loop, stmt))
3607 tree outer_step, outer_base, outer_init;
3608 HOST_WIDE_INT pbitsize, pbitpos;
3609 tree poffset;
3610 enum machine_mode pmode;
3611 int punsignedp, pvolatilep;
3612 affine_iv base_iv, offset_iv;
3613 tree dinit;
3615 /* Build a reference to the first location accessed by the
3616 inner-loop: *(BASE+INIT). (The first location is actually
3617 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3618 tree inner_base = build_fold_indirect_ref
3619 (fold_build2 (POINTER_PLUS_EXPR,
3620 TREE_TYPE (base), base,
3621 fold_convert (sizetype, init)));
3623 if (vect_print_dump_info (REPORT_DETAILS))
3625 fprintf (vect_dump, "analyze in outer-loop: ");
3626 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
3629 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3630 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3631 gcc_assert (outer_base != NULL_TREE);
3633 if (pbitpos % BITS_PER_UNIT != 0)
3635 if (vect_print_dump_info (REPORT_DETAILS))
3636 fprintf (vect_dump, "failed: bit offset alignment.\n");
3637 return false;
3640 outer_base = build_fold_addr_expr (outer_base);
3641 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
3642 &base_iv, false))
3644 if (vect_print_dump_info (REPORT_DETAILS))
3645 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
3646 return false;
3649 if (offset)
3651 if (poffset)
3652 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3653 else
3654 poffset = offset;
3657 if (!poffset)
3659 offset_iv.base = ssize_int (0);
3660 offset_iv.step = ssize_int (0);
3662 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
3663 &offset_iv, false))
3665 if (vect_print_dump_info (REPORT_DETAILS))
3666 fprintf (vect_dump, "evolution of offset is not affine.\n");
3667 return false;
3670 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3671 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3672 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3673 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3674 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3676 outer_step = size_binop (PLUS_EXPR,
3677 fold_convert (ssizetype, base_iv.step),
3678 fold_convert (ssizetype, offset_iv.step));
3680 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3681 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3682 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3683 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3684 STMT_VINFO_DR_OFFSET (stmt_info) =
3685 fold_convert (ssizetype, offset_iv.base);
3686 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3687 size_int (highest_pow2_factor (offset_iv.base));
3689 if (vect_print_dump_info (REPORT_DETAILS))
3691 fprintf (vect_dump, "\touter base_address: ");
3692 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3693 fprintf (vect_dump, "\n\touter offset from base address: ");
3694 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3695 fprintf (vect_dump, "\n\touter constant offset from base address: ");
3696 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3697 fprintf (vect_dump, "\n\touter step: ");
3698 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3699 fprintf (vect_dump, "\n\touter aligned to: ");
3700 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3704 if (STMT_VINFO_DATA_REF (stmt_info))
3706 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3708 fprintf (vect_dump,
3709 "not vectorized: more than one data ref in stmt: ");
3710 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3712 return false;
3714 STMT_VINFO_DATA_REF (stmt_info) = dr;
3716 /* Set vectype for STMT. */
3717 scalar_type = TREE_TYPE (DR_REF (dr));
3718 STMT_VINFO_VECTYPE (stmt_info) =
3719 get_vectype_for_scalar_type (scalar_type);
3720 if (!STMT_VINFO_VECTYPE (stmt_info))
3722 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3724 fprintf (vect_dump,
3725 "not vectorized: no vectype for stmt: ");
3726 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3727 fprintf (vect_dump, " scalar_type: ");
3728 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3730 return false;
3734 return true;
3738 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3740 /* Function vect_mark_relevant.
3742 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3744 static void
3745 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
3746 enum vect_relevant relevant, bool live_p)
3748 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3749 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3750 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3752 if (vect_print_dump_info (REPORT_DETAILS))
3753 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3755 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3757 gimple pattern_stmt;
3759 /* This is the last stmt in a sequence that was detected as a
3760 pattern that can potentially be vectorized. Don't mark the stmt
3761 as relevant/live because it's not going to be vectorized.
3762 Instead mark the pattern-stmt that replaces it. */
3764 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3766 if (vect_print_dump_info (REPORT_DETAILS))
3767 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3768 stmt_info = vinfo_for_stmt (pattern_stmt);
3769 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3770 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3771 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3772 stmt = pattern_stmt;
3775 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3776 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3777 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3779 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3780 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3782 if (vect_print_dump_info (REPORT_DETAILS))
3783 fprintf (vect_dump, "already marked relevant/live.");
3784 return;
3787 VEC_safe_push (gimple, heap, *worklist, stmt);
3791 /* Function vect_stmt_relevant_p.
3793 Return true if STMT in loop that is represented by LOOP_VINFO is
3794 "relevant for vectorization".
3796 A stmt is considered "relevant for vectorization" if:
3797 - it has uses outside the loop.
3798 - it has vdefs (it alters memory).
3799 - control stmts in the loop (except for the exit condition).
3801 CHECKME: what other side effects would the vectorizer allow? */
3803 static bool
3804 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
3805 enum vect_relevant *relevant, bool *live_p)
3807 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3808 ssa_op_iter op_iter;
3809 imm_use_iterator imm_iter;
3810 use_operand_p use_p;
3811 def_operand_p def_p;
3813 *relevant = vect_unused_in_loop;
3814 *live_p = false;
3816 /* cond stmt other than loop exit cond. */
3817 if (is_ctrl_stmt (stmt)
3818 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3819 *relevant = vect_used_in_loop;
3821 /* changing memory. */
3822 if (gimple_code (stmt) != GIMPLE_PHI)
3823 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3825 if (vect_print_dump_info (REPORT_DETAILS))
3826 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3827 *relevant = vect_used_in_loop;
3830 /* uses outside the loop. */
3831 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3833 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3835 basic_block bb = gimple_bb (USE_STMT (use_p));
3836 if (!flow_bb_inside_loop_p (loop, bb))
3838 if (vect_print_dump_info (REPORT_DETAILS))
3839 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3841 /* We expect all such uses to be in the loop exit phis
3842 (because of loop closed form) */
3843 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
3844 gcc_assert (bb == single_exit (loop)->dest);
3846 *live_p = true;
3851 return (*live_p || *relevant);
3856 Function process_use.
3858 Inputs:
3859 - a USE in STMT in a loop represented by LOOP_VINFO
3860 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3861 that defined USE. This is done by calling mark_relevant and passing it
3862 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
3864 Outputs:
3865 Generally, LIVE_P and RELEVANT are used to define the liveness and
3866 relevance info of the DEF_STMT of this USE:
3867 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3868 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3869 Exceptions:
3870 - case 1: If USE is used only for address computations (e.g. array indexing),
3871 which does not need to be directly vectorized, then the liveness/relevance
3872 of the respective DEF_STMT is left unchanged.
3873 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3874 skip DEF_STMT cause it had already been processed.
3875 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3876 be modified accordingly.
3878 Return true if everything is as expected. Return false otherwise. */
3880 static bool
3881 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3882 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
3884 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3885 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3886 stmt_vec_info dstmt_vinfo;
3887 basic_block bb, def_bb;
3888 tree def;
3889 gimple def_stmt;
3890 enum vect_def_type dt;
3892 /* case 1: we are only interested in uses that need to be vectorized. Uses
3893 that are used for address computation are not considered relevant. */
3894 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3895 return true;
3897 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3899 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3900 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3901 return false;
3904 if (!def_stmt || gimple_nop_p (def_stmt))
3905 return true;
3907 def_bb = gimple_bb (def_stmt);
3908 if (!flow_bb_inside_loop_p (loop, def_bb))
3910 if (vect_print_dump_info (REPORT_DETAILS))
3911 fprintf (vect_dump, "def_stmt is out of loop.");
3912 return true;
3915 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3916 DEF_STMT must have already been processed, because this should be the
3917 only way that STMT, which is a reduction-phi, was put in the worklist,
3918 as there should be no other uses for DEF_STMT in the loop. So we just
3919 check that everything is as expected, and we are done. */
3920 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3921 bb = gimple_bb (stmt);
3922 if (gimple_code (stmt) == GIMPLE_PHI
3923 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3924 && gimple_code (def_stmt) != GIMPLE_PHI
3925 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3926 && bb->loop_father == def_bb->loop_father)
3928 if (vect_print_dump_info (REPORT_DETAILS))
3929 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3930 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3931 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3932 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3933 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3934 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3935 return true;
3938 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3939 outer-loop-header-bb:
3940 d = def_stmt
3941 inner-loop:
3942 stmt # use (d)
3943 outer-loop-tail-bb:
3944 ... */
3945 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3947 if (vect_print_dump_info (REPORT_DETAILS))
3948 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3949 switch (relevant)
3951 case vect_unused_in_loop:
3952 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3953 vect_used_by_reduction : vect_unused_in_loop;
3954 break;
3955 case vect_used_in_outer_by_reduction:
3956 relevant = vect_used_by_reduction;
3957 break;
3958 case vect_used_in_outer:
3959 relevant = vect_used_in_loop;
3960 break;
3961 case vect_used_by_reduction:
3962 case vect_used_in_loop:
3963 break;
3965 default:
3966 gcc_unreachable ();
3970 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3971 outer-loop-header-bb:
3973 inner-loop:
3974 d = def_stmt
3975 outer-loop-tail-bb:
3976 stmt # use (d) */
3977 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3979 if (vect_print_dump_info (REPORT_DETAILS))
3980 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3981 switch (relevant)
3983 case vect_unused_in_loop:
3984 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3985 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3986 break;
3988 case vect_used_in_outer_by_reduction:
3989 case vect_used_in_outer:
3990 break;
3992 case vect_used_by_reduction:
3993 relevant = vect_used_in_outer_by_reduction;
3994 break;
3996 case vect_used_in_loop:
3997 relevant = vect_used_in_outer;
3998 break;
4000 default:
4001 gcc_unreachable ();
4005 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
4006 return true;
4010 /* Function vect_mark_stmts_to_be_vectorized.
4012 Not all stmts in the loop need to be vectorized. For example:
4014 for i...
4015 for j...
4016 1. T0 = i + j
4017 2. T1 = a[T0]
4019 3. j = j + 1
4021 Stmt 1 and 3 do not need to be vectorized, because loop control and
4022 addressing of vectorized data-refs are handled differently.
4024 This pass detects such stmts. */
4026 static bool
4027 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
4029 VEC(gimple,heap) *worklist;
4030 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4031 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4032 unsigned int nbbs = loop->num_nodes;
4033 gimple_stmt_iterator si;
4034 gimple stmt;
4035 unsigned int i;
4036 stmt_vec_info stmt_vinfo;
4037 basic_block bb;
4038 gimple phi;
4039 bool live_p;
4040 enum vect_relevant relevant;
4042 if (vect_print_dump_info (REPORT_DETAILS))
4043 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
4045 worklist = VEC_alloc (gimple, heap, 64);
4047 /* 1. Init worklist. */
4048 for (i = 0; i < nbbs; i++)
4050 bb = bbs[i];
4051 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
4053 phi = gsi_stmt (si);
4054 if (vect_print_dump_info (REPORT_DETAILS))
4056 fprintf (vect_dump, "init: phi relevant? ");
4057 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
4060 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
4061 vect_mark_relevant (&worklist, phi, relevant, live_p);
4063 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4065 stmt = gsi_stmt (si);
4066 if (vect_print_dump_info (REPORT_DETAILS))
4068 fprintf (vect_dump, "init: stmt relevant? ");
4069 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4072 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
4073 vect_mark_relevant (&worklist, stmt, relevant, live_p);
4077 /* 2. Process_worklist */
4078 while (VEC_length (gimple, worklist) > 0)
4080 use_operand_p use_p;
4081 ssa_op_iter iter;
4083 stmt = VEC_pop (gimple, worklist);
4084 if (vect_print_dump_info (REPORT_DETAILS))
4086 fprintf (vect_dump, "worklist: examine stmt: ");
4087 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4090 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
4091 (DEF_STMT) as relevant/irrelevant and live/dead according to the
4092 liveness and relevance properties of STMT. */
4093 stmt_vinfo = vinfo_for_stmt (stmt);
4094 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
4095 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
4097 /* Generally, the liveness and relevance properties of STMT are
4098 propagated as is to the DEF_STMTs of its USEs:
4099 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
4100 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
4102 One exception is when STMT has been identified as defining a reduction
4103 variable; in this case we set the liveness/relevance as follows:
4104 live_p = false
4105 relevant = vect_used_by_reduction
4106 This is because we distinguish between two kinds of relevant stmts -
4107 those that are used by a reduction computation, and those that are
4108 (also) used by a regular computation. This allows us later on to
4109 identify stmts that are used solely by a reduction, and therefore the
4110 order of the results that they produce does not have to be kept.
4112 Reduction phis are expected to be used by a reduction stmt, or by
4113 in an outer loop; Other reduction stmts are expected to be
4114 in the loop, and possibly used by a stmt in an outer loop.
4115 Here are the expected values of "relevant" for reduction phis/stmts:
4117 relevance: phi stmt
4118 vect_unused_in_loop ok
4119 vect_used_in_outer_by_reduction ok ok
4120 vect_used_in_outer ok ok
4121 vect_used_by_reduction ok
4122 vect_used_in_loop */
4124 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
4126 enum vect_relevant tmp_relevant = relevant;
4127 switch (tmp_relevant)
4129 case vect_unused_in_loop:
4130 gcc_assert (gimple_code (stmt) != GIMPLE_PHI);
4131 relevant = vect_used_by_reduction;
4132 break;
4134 case vect_used_in_outer_by_reduction:
4135 case vect_used_in_outer:
4136 gcc_assert (gimple_code (stmt) != GIMPLE_ASSIGN
4137 || (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR
4138 && (gimple_assign_rhs_code (stmt)
4139 != DOT_PROD_EXPR)));
4140 break;
4142 case vect_used_by_reduction:
4143 if (gimple_code (stmt) == GIMPLE_PHI)
4144 break;
4145 /* fall through */
4146 case vect_used_in_loop:
4147 default:
4148 if (vect_print_dump_info (REPORT_DETAILS))
4149 fprintf (vect_dump, "unsupported use of reduction.");
4150 VEC_free (gimple, heap, worklist);
4151 return false;
4153 live_p = false;
4156 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
4158 tree op = USE_FROM_PTR (use_p);
4159 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
4161 VEC_free (gimple, heap, worklist);
4162 return false;
4165 } /* while worklist */
4167 VEC_free (gimple, heap, worklist);
4168 return true;
4172 /* Function vect_can_advance_ivs_p
4174 In case the number of iterations that LOOP iterates is unknown at compile
4175 time, an epilog loop will be generated, and the loop induction variables
4176 (IVs) will be "advanced" to the value they are supposed to take just before
4177 the epilog loop. Here we check that the access function of the loop IVs
4178 and the expression that represents the loop bound are simple enough.
4179 These restrictions will be relaxed in the future. */
4181 static bool
4182 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
4184 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4185 basic_block bb = loop->header;
4186 gimple phi;
4187 gimple_stmt_iterator gsi;
4189 /* Analyze phi functions of the loop header. */
4191 if (vect_print_dump_info (REPORT_DETAILS))
4192 fprintf (vect_dump, "vect_can_advance_ivs_p:");
4194 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4196 tree access_fn = NULL;
4197 tree evolution_part;
4199 phi = gsi_stmt (gsi);
4200 if (vect_print_dump_info (REPORT_DETAILS))
4202 fprintf (vect_dump, "Analyze phi: ");
4203 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
4206 /* Skip virtual phi's. The data dependences that are associated with
4207 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
4209 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
4211 if (vect_print_dump_info (REPORT_DETAILS))
4212 fprintf (vect_dump, "virtual phi. skip.");
4213 continue;
4216 /* Skip reduction phis. */
4218 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
4220 if (vect_print_dump_info (REPORT_DETAILS))
4221 fprintf (vect_dump, "reduc phi. skip.");
4222 continue;
4225 /* Analyze the evolution function. */
4227 access_fn = instantiate_parameters
4228 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
4230 if (!access_fn)
4232 if (vect_print_dump_info (REPORT_DETAILS))
4233 fprintf (vect_dump, "No Access function.");
4234 return false;
4237 if (vect_print_dump_info (REPORT_DETAILS))
4239 fprintf (vect_dump, "Access function of PHI: ");
4240 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
4243 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
4245 if (evolution_part == NULL_TREE)
4247 if (vect_print_dump_info (REPORT_DETAILS))
4248 fprintf (vect_dump, "No evolution.");
4249 return false;
4252 /* FORNOW: We do not transform initial conditions of IVs
4253 which evolution functions are a polynomial of degree >= 2. */
4255 if (tree_is_chrec (evolution_part))
4256 return false;
4259 return true;
4263 /* Function vect_get_loop_niters.
4265 Determine how many iterations the loop is executed.
4266 If an expression that represents the number of iterations
4267 can be constructed, place it in NUMBER_OF_ITERATIONS.
4268 Return the loop exit condition. */
4270 static gimple
4271 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
4273 tree niters;
4275 if (vect_print_dump_info (REPORT_DETAILS))
4276 fprintf (vect_dump, "=== get_loop_niters ===");
4278 niters = number_of_exit_cond_executions (loop);
4280 if (niters != NULL_TREE
4281 && niters != chrec_dont_know)
4283 *number_of_iterations = niters;
4285 if (vect_print_dump_info (REPORT_DETAILS))
4287 fprintf (vect_dump, "==> get_loop_niters:" );
4288 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
4292 return get_loop_exit_condition (loop);
4296 /* Function vect_analyze_loop_1.
4298 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4299 for it. The different analyses will record information in the
4300 loop_vec_info struct. This is a subset of the analyses applied in
4301 vect_analyze_loop, to be applied on an inner-loop nested in the loop
4302 that is now considered for (outer-loop) vectorization. */
4304 static loop_vec_info
4305 vect_analyze_loop_1 (struct loop *loop)
4307 loop_vec_info loop_vinfo;
4309 if (vect_print_dump_info (REPORT_DETAILS))
4310 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
4312 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4314 loop_vinfo = vect_analyze_loop_form (loop);
4315 if (!loop_vinfo)
4317 if (vect_print_dump_info (REPORT_DETAILS))
4318 fprintf (vect_dump, "bad inner-loop form.");
4319 return NULL;
4322 return loop_vinfo;
4326 /* Function vect_analyze_loop_form.
4328 Verify that certain CFG restrictions hold, including:
4329 - the loop has a pre-header
4330 - the loop has a single entry and exit
4331 - the loop exit condition is simple enough, and the number of iterations
4332 can be analyzed (a countable loop). */
4334 loop_vec_info
4335 vect_analyze_loop_form (struct loop *loop)
4337 loop_vec_info loop_vinfo;
4338 gimple loop_cond;
4339 tree number_of_iterations = NULL;
4340 loop_vec_info inner_loop_vinfo = NULL;
4342 if (vect_print_dump_info (REPORT_DETAILS))
4343 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
4345 /* Different restrictions apply when we are considering an inner-most loop,
4346 vs. an outer (nested) loop.
4347 (FORNOW. May want to relax some of these restrictions in the future). */
4349 if (!loop->inner)
4351 /* Inner-most loop. We currently require that the number of BBs is
4352 exactly 2 (the header and latch). Vectorizable inner-most loops
4353 look like this:
4355 (pre-header)
4357 header <--------+
4358 | | |
4359 | +--> latch --+
4361 (exit-bb) */
4363 if (loop->num_nodes != 2)
4365 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4366 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4367 return NULL;
4370 if (empty_block_p (loop->header))
4372 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4373 fprintf (vect_dump, "not vectorized: empty loop.");
4374 return NULL;
4377 else
4379 struct loop *innerloop = loop->inner;
4380 edge backedge, entryedge;
4382 /* Nested loop. We currently require that the loop is doubly-nested,
4383 contains a single inner loop, and the number of BBs is exactly 5.
4384 Vectorizable outer-loops look like this:
4386 (pre-header)
4388 header <---+
4390 inner-loop |
4392 tail ------+
4394 (exit-bb)
4396 The inner-loop has the properties expected of inner-most loops
4397 as described above. */
4399 if ((loop->inner)->inner || (loop->inner)->next)
4401 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4402 fprintf (vect_dump, "not vectorized: multiple nested loops.");
4403 return NULL;
4406 /* Analyze the inner-loop. */
4407 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
4408 if (!inner_loop_vinfo)
4410 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4411 fprintf (vect_dump, "not vectorized: Bad inner loop.");
4412 return NULL;
4415 if (!expr_invariant_in_loop_p (loop,
4416 LOOP_VINFO_NITERS (inner_loop_vinfo)))
4418 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4419 fprintf (vect_dump,
4420 "not vectorized: inner-loop count not invariant.");
4421 destroy_loop_vec_info (inner_loop_vinfo, true);
4422 return NULL;
4425 if (loop->num_nodes != 5)
4427 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4428 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4429 destroy_loop_vec_info (inner_loop_vinfo, true);
4430 return NULL;
4433 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
4434 backedge = EDGE_PRED (innerloop->header, 1);
4435 entryedge = EDGE_PRED (innerloop->header, 0);
4436 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
4438 backedge = EDGE_PRED (innerloop->header, 0);
4439 entryedge = EDGE_PRED (innerloop->header, 1);
4442 if (entryedge->src != loop->header
4443 || !single_exit (innerloop)
4444 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
4446 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4447 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4448 destroy_loop_vec_info (inner_loop_vinfo, true);
4449 return NULL;
4452 if (vect_print_dump_info (REPORT_DETAILS))
4453 fprintf (vect_dump, "Considering outer-loop vectorization.");
4456 if (!single_exit (loop)
4457 || EDGE_COUNT (loop->header->preds) != 2)
4459 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4461 if (!single_exit (loop))
4462 fprintf (vect_dump, "not vectorized: multiple exits.");
4463 else if (EDGE_COUNT (loop->header->preds) != 2)
4464 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4466 if (inner_loop_vinfo)
4467 destroy_loop_vec_info (inner_loop_vinfo, true);
4468 return NULL;
4471 /* We assume that the loop exit condition is at the end of the loop. i.e,
4472 that the loop is represented as a do-while (with a proper if-guard
4473 before the loop if needed), where the loop header contains all the
4474 executable statements, and the latch is empty. */
4475 if (!empty_block_p (loop->latch)
4476 || phi_nodes (loop->latch))
4478 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4479 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4480 if (inner_loop_vinfo)
4481 destroy_loop_vec_info (inner_loop_vinfo, true);
4482 return NULL;
4485 /* Make sure there exists a single-predecessor exit bb: */
4486 if (!single_pred_p (single_exit (loop)->dest))
4488 edge e = single_exit (loop);
4489 if (!(e->flags & EDGE_ABNORMAL))
4491 split_loop_exit_edge (e);
4492 if (vect_print_dump_info (REPORT_DETAILS))
4493 fprintf (vect_dump, "split exit edge.");
4495 else
4497 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4498 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4499 if (inner_loop_vinfo)
4500 destroy_loop_vec_info (inner_loop_vinfo, true);
4501 return NULL;
4505 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4506 if (!loop_cond)
4508 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4509 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4510 if (inner_loop_vinfo)
4511 destroy_loop_vec_info (inner_loop_vinfo, true);
4512 return NULL;
4515 if (!number_of_iterations)
4517 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4518 fprintf (vect_dump,
4519 "not vectorized: number of iterations cannot be computed.");
4520 if (inner_loop_vinfo)
4521 destroy_loop_vec_info (inner_loop_vinfo, true);
4522 return NULL;
4525 if (chrec_contains_undetermined (number_of_iterations))
4527 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4528 fprintf (vect_dump, "Infinite number of iterations.");
4529 if (inner_loop_vinfo)
4530 destroy_loop_vec_info (inner_loop_vinfo, true);
4531 return NULL;
4534 if (!NITERS_KNOWN_P (number_of_iterations))
4536 if (vect_print_dump_info (REPORT_DETAILS))
4538 fprintf (vect_dump, "Symbolic number of iterations is ");
4539 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4542 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4544 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4545 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4546 if (inner_loop_vinfo)
4547 destroy_loop_vec_info (inner_loop_vinfo, false);
4548 return NULL;
4551 loop_vinfo = new_loop_vec_info (loop);
4552 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4553 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
4555 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4557 /* CHECKME: May want to keep it around it in the future. */
4558 if (inner_loop_vinfo)
4559 destroy_loop_vec_info (inner_loop_vinfo, false);
4561 gcc_assert (!loop->aux);
4562 loop->aux = loop_vinfo;
4563 return loop_vinfo;
4567 /* Function vect_analyze_loop.
4569 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4570 for it. The different analyses will record information in the
4571 loop_vec_info struct. */
4572 loop_vec_info
4573 vect_analyze_loop (struct loop *loop)
4575 bool ok;
4576 loop_vec_info loop_vinfo;
4578 if (vect_print_dump_info (REPORT_DETAILS))
4579 fprintf (vect_dump, "===== analyze_loop_nest =====");
4581 if (loop_outer (loop)
4582 && loop_vec_info_for_loop (loop_outer (loop))
4583 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4585 if (vect_print_dump_info (REPORT_DETAILS))
4586 fprintf (vect_dump, "outer-loop already vectorized.");
4587 return NULL;
4590 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4592 loop_vinfo = vect_analyze_loop_form (loop);
4593 if (!loop_vinfo)
4595 if (vect_print_dump_info (REPORT_DETAILS))
4596 fprintf (vect_dump, "bad loop form.");
4597 return NULL;
4600 /* Find all data references in the loop (which correspond to vdefs/vuses)
4601 and analyze their evolution in the loop.
4603 FORNOW: Handle only simple, array references, which
4604 alignment can be forced, and aligned pointer-references. */
4606 ok = vect_analyze_data_refs (loop_vinfo);
4607 if (!ok)
4609 if (vect_print_dump_info (REPORT_DETAILS))
4610 fprintf (vect_dump, "bad data references.");
4611 destroy_loop_vec_info (loop_vinfo, true);
4612 return NULL;
4615 /* Classify all cross-iteration scalar data-flow cycles.
4616 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4618 vect_analyze_scalar_cycles (loop_vinfo);
4620 vect_pattern_recog (loop_vinfo);
4622 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4624 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4625 if (!ok)
4627 if (vect_print_dump_info (REPORT_DETAILS))
4628 fprintf (vect_dump, "unexpected pattern.");
4629 destroy_loop_vec_info (loop_vinfo, true);
4630 return NULL;
4633 /* Analyze the alignment of the data-refs in the loop.
4634 Fail if a data reference is found that cannot be vectorized. */
4636 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4637 if (!ok)
4639 if (vect_print_dump_info (REPORT_DETAILS))
4640 fprintf (vect_dump, "bad data alignment.");
4641 destroy_loop_vec_info (loop_vinfo, true);
4642 return NULL;
4645 ok = vect_determine_vectorization_factor (loop_vinfo);
4646 if (!ok)
4648 if (vect_print_dump_info (REPORT_DETAILS))
4649 fprintf (vect_dump, "can't determine vectorization factor.");
4650 destroy_loop_vec_info (loop_vinfo, true);
4651 return NULL;
4654 /* Analyze data dependences between the data-refs in the loop.
4655 FORNOW: fail at the first data dependence that we encounter. */
4657 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4658 if (!ok)
4660 if (vect_print_dump_info (REPORT_DETAILS))
4661 fprintf (vect_dump, "bad data dependence.");
4662 destroy_loop_vec_info (loop_vinfo, true);
4663 return NULL;
4666 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4667 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4669 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4670 if (!ok)
4672 if (vect_print_dump_info (REPORT_DETAILS))
4673 fprintf (vect_dump, "bad data access.");
4674 destroy_loop_vec_info (loop_vinfo, true);
4675 return NULL;
4678 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4679 It is important to call pruning after vect_analyze_data_ref_accesses,
4680 since we use grouping information gathered by interleaving analysis. */
4681 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
4682 if (!ok)
4684 if (vect_print_dump_info (REPORT_DETAILS))
4685 fprintf (vect_dump, "too long list of versioning for alias "
4686 "run-time tests.");
4687 destroy_loop_vec_info (loop_vinfo, true);
4688 return NULL;
4691 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4692 ok = vect_analyze_slp (loop_vinfo);
4693 if (ok)
4695 /* Decide which possible SLP instances to SLP. */
4696 vect_make_slp_decision (loop_vinfo);
4698 /* Find stmts that need to be both vectorized and SLPed. */
4699 vect_detect_hybrid_slp (loop_vinfo);
4702 /* This pass will decide on using loop versioning and/or loop peeling in
4703 order to enhance the alignment of data references in the loop. */
4705 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4706 if (!ok)
4708 if (vect_print_dump_info (REPORT_DETAILS))
4709 fprintf (vect_dump, "bad data alignment.");
4710 destroy_loop_vec_info (loop_vinfo, true);
4711 return NULL;
4714 /* Scan all the operations in the loop and make sure they are
4715 vectorizable. */
4717 ok = vect_analyze_operations (loop_vinfo);
4718 if (!ok)
4720 if (vect_print_dump_info (REPORT_DETAILS))
4721 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4722 destroy_loop_vec_info (loop_vinfo, true);
4723 return NULL;
4726 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4728 return loop_vinfo;