AMD64 - Fix format conversions and other warnings.
[dragonfly.git] / contrib / gcc-4.4 / gcc / tree-vect-analyze.c
bloba7d96e8b87f4d89d597637b046be977d3a500121
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "params.h"
38 #include "tree-chrec.h"
39 #include "tree-data-ref.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
42 #include "toplev.h"
43 #include "recog.h"
45 static bool vect_can_advance_ivs_p (loop_vec_info);
47 /* Return the smallest scalar part of STMT.
48 This is used to determine the vectype of the stmt. We generally set the
49 vectype according to the type of the result (lhs). For stmts whose
50 result-type is different than the type of the arguments (e.g., demotion,
51 promotion), vectype will be reset appropriately (later). Note that we have
52 to visit the smallest datatype in this function, because that determines the
53 VF. If the smallest datatype in the loop is present only as the rhs of a
54 promotion operation - we'd miss it.
55 Such a case, where a variable of this datatype does not appear in the lhs
56 anywhere in the loop, can only occur if it's an invariant: e.g.:
57 'int_x = (int) short_inv', which we'd expect to have been optimized away by
58 invariant motion. However, we cannot rely on invariant motion to always take
59 invariants out of the loop, and so in the case of promotion we also have to
60 check the rhs.
61 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
62 types. */
64 tree
65 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
66 HOST_WIDE_INT *rhs_size_unit)
68 tree scalar_type = gimple_expr_type (stmt);
69 HOST_WIDE_INT lhs, rhs;
71 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
73 if (is_gimple_assign (stmt)
74 && (gimple_assign_cast_p (stmt)
75 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
76 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
78 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
80 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
81 if (rhs < lhs)
82 scalar_type = rhs_type;
85 *lhs_size_unit = lhs;
86 *rhs_size_unit = rhs;
87 return scalar_type;
91 /* Function vect_determine_vectorization_factor
93 Determine the vectorization factor (VF). VF is the number of data elements
94 that are operated upon in parallel in a single iteration of the vectorized
95 loop. For example, when vectorizing a loop that operates on 4byte elements,
96 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
97 elements can fit in a single vector register.
99 We currently support vectorization of loops in which all types operated upon
100 are of the same size. Therefore this function currently sets VF according to
101 the size of the types operated upon, and fails if there are multiple sizes
102 in the loop.
104 VF is also the factor by which the loop iterations are strip-mined, e.g.:
105 original loop:
106 for (i=0; i<N; i++){
107 a[i] = b[i] + c[i];
110 vectorized loop:
111 for (i=0; i<N; i+=VF){
112 a[i:VF] = b[i:VF] + c[i:VF];
116 static bool
117 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
119 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
120 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
121 int nbbs = loop->num_nodes;
122 gimple_stmt_iterator si;
123 unsigned int vectorization_factor = 0;
124 tree scalar_type;
125 gimple phi;
126 tree vectype;
127 unsigned int nunits;
128 stmt_vec_info stmt_info;
129 int i;
130 HOST_WIDE_INT dummy;
132 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
135 for (i = 0; i < nbbs; i++)
137 basic_block bb = bbs[i];
139 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
141 phi = gsi_stmt (si);
142 stmt_info = vinfo_for_stmt (phi);
143 if (vect_print_dump_info (REPORT_DETAILS))
145 fprintf (vect_dump, "==> examining phi: ");
146 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
149 gcc_assert (stmt_info);
151 if (STMT_VINFO_RELEVANT_P (stmt_info))
153 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
154 scalar_type = TREE_TYPE (PHI_RESULT (phi));
156 if (vect_print_dump_info (REPORT_DETAILS))
158 fprintf (vect_dump, "get vectype for scalar type: ");
159 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
162 vectype = get_vectype_for_scalar_type (scalar_type);
163 if (!vectype)
165 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
167 fprintf (vect_dump,
168 "not vectorized: unsupported data-type ");
169 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
171 return false;
173 STMT_VINFO_VECTYPE (stmt_info) = vectype;
175 if (vect_print_dump_info (REPORT_DETAILS))
177 fprintf (vect_dump, "vectype: ");
178 print_generic_expr (vect_dump, vectype, TDF_SLIM);
181 nunits = TYPE_VECTOR_SUBPARTS (vectype);
182 if (vect_print_dump_info (REPORT_DETAILS))
183 fprintf (vect_dump, "nunits = %d", nunits);
185 if (!vectorization_factor
186 || (nunits > vectorization_factor))
187 vectorization_factor = nunits;
191 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
193 gimple stmt = gsi_stmt (si);
194 stmt_info = vinfo_for_stmt (stmt);
196 if (vect_print_dump_info (REPORT_DETAILS))
198 fprintf (vect_dump, "==> examining statement: ");
199 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
202 if (gimple_has_volatile_ops (stmt))
204 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
205 fprintf (vect_dump, "not vectorized: stmt has volatile"
206 " operands");
208 return false;
211 gcc_assert (stmt_info);
213 /* skip stmts which do not need to be vectorized. */
214 if (!STMT_VINFO_RELEVANT_P (stmt_info)
215 && !STMT_VINFO_LIVE_P (stmt_info))
217 if (vect_print_dump_info (REPORT_DETAILS))
218 fprintf (vect_dump, "skip.");
219 continue;
222 if (gimple_get_lhs (stmt) == NULL_TREE)
224 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
226 fprintf (vect_dump, "not vectorized: irregular stmt.");
227 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
229 return false;
232 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
234 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
236 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
237 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
239 return false;
242 if (STMT_VINFO_VECTYPE (stmt_info))
244 /* The only case when a vectype had been already set is for stmts
245 that contain a dataref, or for "pattern-stmts" (stmts generated
246 by the vectorizer to represent/replace a certain idiom). */
247 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
248 || is_pattern_stmt_p (stmt_info));
249 vectype = STMT_VINFO_VECTYPE (stmt_info);
251 else
254 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
255 && !is_pattern_stmt_p (stmt_info));
257 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
258 &dummy);
259 if (vect_print_dump_info (REPORT_DETAILS))
261 fprintf (vect_dump, "get vectype for scalar type: ");
262 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
265 vectype = get_vectype_for_scalar_type (scalar_type);
266 if (!vectype)
268 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
270 fprintf (vect_dump,
271 "not vectorized: unsupported data-type ");
272 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
274 return false;
276 STMT_VINFO_VECTYPE (stmt_info) = vectype;
279 if (vect_print_dump_info (REPORT_DETAILS))
281 fprintf (vect_dump, "vectype: ");
282 print_generic_expr (vect_dump, vectype, TDF_SLIM);
285 nunits = TYPE_VECTOR_SUBPARTS (vectype);
286 if (vect_print_dump_info (REPORT_DETAILS))
287 fprintf (vect_dump, "nunits = %d", nunits);
289 if (!vectorization_factor
290 || (nunits > vectorization_factor))
291 vectorization_factor = nunits;
296 /* TODO: Analyze cost. Decide if worth while to vectorize. */
297 if (vect_print_dump_info (REPORT_DETAILS))
298 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
299 if (vectorization_factor <= 1)
301 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
302 fprintf (vect_dump, "not vectorized: unsupported data-type");
303 return false;
305 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
307 return true;
311 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
312 the number of created vector stmts depends on the unrolling factor). However,
313 the actual number of vector stmts for every SLP node depends on VF which is
314 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
315 In this function we assume that the inside costs calculated in
316 vect_model_xxx_cost are linear in ncopies. */
318 static void
319 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
321 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
322 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
323 slp_instance instance;
325 if (vect_print_dump_info (REPORT_SLP))
326 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
328 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
329 /* We assume that costs are linear in ncopies. */
330 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
331 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
335 /* Function vect_analyze_operations.
337 Scan the loop stmts and make sure they are all vectorizable. */
339 static bool
340 vect_analyze_operations (loop_vec_info loop_vinfo)
342 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
343 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
344 int nbbs = loop->num_nodes;
345 gimple_stmt_iterator si;
346 unsigned int vectorization_factor = 0;
347 int i;
348 bool ok;
349 gimple phi;
350 stmt_vec_info stmt_info;
351 bool need_to_vectorize = false;
352 int min_profitable_iters;
353 int min_scalar_loop_bound;
354 unsigned int th;
355 bool only_slp_in_loop = true;
357 if (vect_print_dump_info (REPORT_DETAILS))
358 fprintf (vect_dump, "=== vect_analyze_operations ===");
360 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
361 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
363 for (i = 0; i < nbbs; i++)
365 basic_block bb = bbs[i];
367 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
369 phi = gsi_stmt (si);
370 ok = true;
372 stmt_info = vinfo_for_stmt (phi);
373 if (vect_print_dump_info (REPORT_DETAILS))
375 fprintf (vect_dump, "examining phi: ");
376 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
379 if (! is_loop_header_bb_p (bb))
381 /* inner-loop loop-closed exit phi in outer-loop vectorization
382 (i.e. a phi in the tail of the outer-loop).
383 FORNOW: we currently don't support the case that these phis
384 are not used in the outerloop, cause this case requires
385 to actually do something here. */
386 if (!STMT_VINFO_RELEVANT_P (stmt_info)
387 || STMT_VINFO_LIVE_P (stmt_info))
389 if (vect_print_dump_info (REPORT_DETAILS))
390 fprintf (vect_dump,
391 "Unsupported loop-closed phi in outer-loop.");
392 return false;
394 continue;
397 gcc_assert (stmt_info);
399 if (STMT_VINFO_LIVE_P (stmt_info))
401 /* FORNOW: not yet supported. */
402 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
403 fprintf (vect_dump, "not vectorized: value used after loop.");
404 return false;
407 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
408 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
410 /* A scalar-dependence cycle that we don't support. */
411 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
412 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
413 return false;
416 if (STMT_VINFO_RELEVANT_P (stmt_info))
418 need_to_vectorize = true;
419 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
420 ok = vectorizable_induction (phi, NULL, NULL);
423 if (!ok)
425 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
427 fprintf (vect_dump,
428 "not vectorized: relevant phi not supported: ");
429 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
431 return false;
435 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
437 gimple stmt = gsi_stmt (si);
438 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
439 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
441 if (vect_print_dump_info (REPORT_DETAILS))
443 fprintf (vect_dump, "==> examining statement: ");
444 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
447 gcc_assert (stmt_info);
449 /* skip stmts which do not need to be vectorized.
450 this is expected to include:
451 - the COND_EXPR which is the loop exit condition
452 - any LABEL_EXPRs in the loop
453 - computations that are used only for array indexing or loop
454 control */
456 if (!STMT_VINFO_RELEVANT_P (stmt_info)
457 && !STMT_VINFO_LIVE_P (stmt_info))
459 if (vect_print_dump_info (REPORT_DETAILS))
460 fprintf (vect_dump, "irrelevant.");
461 continue;
464 switch (STMT_VINFO_DEF_TYPE (stmt_info))
466 case vect_loop_def:
467 break;
469 case vect_reduction_def:
470 gcc_assert (relevance == vect_used_in_outer
471 || relevance == vect_used_in_outer_by_reduction
472 || relevance == vect_unused_in_loop);
473 break;
475 case vect_induction_def:
476 case vect_constant_def:
477 case vect_invariant_def:
478 case vect_unknown_def_type:
479 default:
480 gcc_unreachable ();
483 if (STMT_VINFO_RELEVANT_P (stmt_info))
485 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
486 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
487 need_to_vectorize = true;
490 ok = true;
491 if (STMT_VINFO_RELEVANT_P (stmt_info)
492 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
493 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
494 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
495 || vectorizable_conversion (stmt, NULL, NULL, NULL)
496 || vectorizable_operation (stmt, NULL, NULL, NULL)
497 || vectorizable_assignment (stmt, NULL, NULL, NULL)
498 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
499 || vectorizable_call (stmt, NULL, NULL)
500 || vectorizable_store (stmt, NULL, NULL, NULL)
501 || vectorizable_condition (stmt, NULL, NULL)
502 || vectorizable_reduction (stmt, NULL, NULL));
504 if (!ok)
506 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
508 fprintf (vect_dump, "not vectorized: relevant stmt not ");
509 fprintf (vect_dump, "supported: ");
510 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
512 return false;
515 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
516 need extra handling, except for vectorizable reductions. */
517 if (STMT_VINFO_LIVE_P (stmt_info)
518 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
519 ok = vectorizable_live_operation (stmt, NULL, NULL);
521 if (!ok)
523 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
525 fprintf (vect_dump, "not vectorized: live stmt not ");
526 fprintf (vect_dump, "supported: ");
527 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
529 return false;
532 if (!PURE_SLP_STMT (stmt_info))
534 /* STMT needs loop-based vectorization. */
535 only_slp_in_loop = false;
537 /* Groups of strided accesses whose size is not a power of 2 are
538 not vectorizable yet using loop-vectorization. Therefore, if
539 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
540 both SLPed and loop-based vectorized), the loop cannot be
541 vectorized. */
542 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
543 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
544 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
546 if (vect_print_dump_info (REPORT_DETAILS))
548 fprintf (vect_dump, "not vectorized: the size of group "
549 "of strided accesses is not a power of 2");
550 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
552 return false;
555 } /* stmts in bb */
556 } /* bbs */
558 /* All operations in the loop are either irrelevant (deal with loop
559 control, or dead), or only used outside the loop and can be moved
560 out of the loop (e.g. invariants, inductions). The loop can be
561 optimized away by scalar optimizations. We're better off not
562 touching this loop. */
563 if (!need_to_vectorize)
565 if (vect_print_dump_info (REPORT_DETAILS))
566 fprintf (vect_dump,
567 "All the computation can be taken out of the loop.");
568 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
569 fprintf (vect_dump,
570 "not vectorized: redundant loop. no profit to vectorize.");
571 return false;
574 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
575 vectorization factor of the loop is the unrolling factor required by the
576 SLP instances. If that unrolling factor is 1, we say, that we perform
577 pure SLP on loop - cross iteration parallelism is not exploited. */
578 if (only_slp_in_loop)
579 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
580 else
581 vectorization_factor = least_common_multiple (vectorization_factor,
582 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
584 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
586 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
587 && vect_print_dump_info (REPORT_DETAILS))
588 fprintf (vect_dump,
589 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
590 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
592 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
593 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
595 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
596 fprintf (vect_dump, "not vectorized: iteration count too small.");
597 if (vect_print_dump_info (REPORT_DETAILS))
598 fprintf (vect_dump,"not vectorized: iteration count smaller than "
599 "vectorization factor.");
600 return false;
603 /* Analyze cost. Decide if worth while to vectorize. */
605 /* Once VF is set, SLP costs should be updated since the number of created
606 vector stmts depends on VF. */
607 vect_update_slp_costs_according_to_vf (loop_vinfo);
609 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
610 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
612 if (min_profitable_iters < 0)
614 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
615 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
616 if (vect_print_dump_info (REPORT_DETAILS))
617 fprintf (vect_dump, "not vectorized: vector version will never be "
618 "profitable.");
619 return false;
622 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
623 * vectorization_factor) - 1);
625 /* Use the cost model only if it is more conservative than user specified
626 threshold. */
628 th = (unsigned) min_scalar_loop_bound;
629 if (min_profitable_iters
630 && (!min_scalar_loop_bound
631 || min_profitable_iters > min_scalar_loop_bound))
632 th = (unsigned) min_profitable_iters;
634 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
635 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
637 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
638 fprintf (vect_dump, "not vectorized: vectorization not "
639 "profitable.");
640 if (vect_print_dump_info (REPORT_DETAILS))
641 fprintf (vect_dump, "not vectorized: iteration count smaller than "
642 "user specified loop bound parameter or minimum "
643 "profitable iterations (whichever is more conservative).");
644 return false;
647 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
648 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
649 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
651 if (vect_print_dump_info (REPORT_DETAILS))
652 fprintf (vect_dump, "epilog loop required.");
653 if (!vect_can_advance_ivs_p (loop_vinfo))
655 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
656 fprintf (vect_dump,
657 "not vectorized: can't create epilog loop 1.");
658 return false;
660 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
662 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
663 fprintf (vect_dump,
664 "not vectorized: can't create epilog loop 2.");
665 return false;
669 return true;
673 /* Function exist_non_indexing_operands_for_use_p
675 USE is one of the uses attached to STMT. Check if USE is
676 used in STMT for anything other than indexing an array. */
678 static bool
679 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
681 tree operand;
682 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
684 /* USE corresponds to some operand in STMT. If there is no data
685 reference in STMT, then any operand that corresponds to USE
686 is not indexing an array. */
687 if (!STMT_VINFO_DATA_REF (stmt_info))
688 return true;
690 /* STMT has a data_ref. FORNOW this means that its of one of
691 the following forms:
692 -1- ARRAY_REF = var
693 -2- var = ARRAY_REF
694 (This should have been verified in analyze_data_refs).
696 'var' in the second case corresponds to a def, not a use,
697 so USE cannot correspond to any operands that are not used
698 for array indexing.
700 Therefore, all we need to check is if STMT falls into the
701 first case, and whether var corresponds to USE. */
703 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
704 return false;
706 if (!gimple_assign_copy_p (stmt))
707 return false;
708 operand = gimple_assign_rhs1 (stmt);
710 if (TREE_CODE (operand) != SSA_NAME)
711 return false;
713 if (operand == use)
714 return true;
716 return false;
720 /* Function vect_analyze_scalar_cycles_1.
722 Examine the cross iteration def-use cycles of scalar variables
723 in LOOP. LOOP_VINFO represents the loop that is now being
724 considered for vectorization (can be LOOP, or an outer-loop
725 enclosing LOOP). */
727 static void
728 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
730 basic_block bb = loop->header;
731 tree dumy;
732 VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64);
733 gimple_stmt_iterator gsi;
735 if (vect_print_dump_info (REPORT_DETAILS))
736 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
738 /* First - identify all inductions. */
739 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
741 gimple phi = gsi_stmt (gsi);
742 tree access_fn = NULL;
743 tree def = PHI_RESULT (phi);
744 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
746 if (vect_print_dump_info (REPORT_DETAILS))
748 fprintf (vect_dump, "Analyze phi: ");
749 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
752 /* Skip virtual phi's. The data dependences that are associated with
753 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
754 if (!is_gimple_reg (SSA_NAME_VAR (def)))
755 continue;
757 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
759 /* Analyze the evolution function. */
760 access_fn = analyze_scalar_evolution (loop, def);
761 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
763 fprintf (vect_dump, "Access function of PHI: ");
764 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
767 if (!access_fn
768 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
770 VEC_safe_push (gimple, heap, worklist, phi);
771 continue;
774 if (vect_print_dump_info (REPORT_DETAILS))
775 fprintf (vect_dump, "Detected induction.");
776 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
780 /* Second - identify all reductions. */
781 while (VEC_length (gimple, worklist) > 0)
783 gimple phi = VEC_pop (gimple, worklist);
784 tree def = PHI_RESULT (phi);
785 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
786 gimple reduc_stmt;
788 if (vect_print_dump_info (REPORT_DETAILS))
790 fprintf (vect_dump, "Analyze phi: ");
791 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
794 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
795 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
797 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
798 if (reduc_stmt)
800 if (vect_print_dump_info (REPORT_DETAILS))
801 fprintf (vect_dump, "Detected reduction.");
802 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
803 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
804 vect_reduction_def;
806 else
807 if (vect_print_dump_info (REPORT_DETAILS))
808 fprintf (vect_dump, "Unknown def-use cycle pattern.");
811 VEC_free (gimple, heap, worklist);
812 return;
816 /* Function vect_analyze_scalar_cycles.
818 Examine the cross iteration def-use cycles of scalar variables, by
819 analyzing the loop-header PHIs of scalar variables; Classify each
820 cycle as one of the following: invariant, induction, reduction, unknown.
821 We do that for the loop represented by LOOP_VINFO, and also to its
822 inner-loop, if exists.
823 Examples for scalar cycles:
825 Example1: reduction:
827 loop1:
828 for (i=0; i<N; i++)
829 sum += a[i];
831 Example2: induction:
833 loop2:
834 for (i=0; i<N; i++)
835 a[i] = i; */
837 static void
838 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
840 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
842 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
844 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
845 Reductions in such inner-loop therefore have different properties than
846 the reductions in the nest that gets vectorized:
847 1. When vectorized, they are executed in the same order as in the original
848 scalar loop, so we can't change the order of computation when
849 vectorizing them.
850 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
851 current checks are too strict. */
853 if (loop->inner)
854 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
858 /* Find the place of the data-ref in STMT in the interleaving chain that starts
859 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
861 static int
862 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
864 gimple next_stmt = first_stmt;
865 int result = 0;
867 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
868 return -1;
870 while (next_stmt && next_stmt != stmt)
872 result++;
873 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
876 if (next_stmt)
877 return result;
878 else
879 return -1;
883 /* Function vect_insert_into_interleaving_chain.
885 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
887 static void
888 vect_insert_into_interleaving_chain (struct data_reference *dra,
889 struct data_reference *drb)
891 gimple prev, next;
892 tree next_init;
893 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
894 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
896 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
897 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
898 while (next)
900 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
901 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
903 /* Insert here. */
904 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
905 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
906 return;
908 prev = next;
909 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
912 /* We got to the end of the list. Insert here. */
913 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
914 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
918 /* Function vect_update_interleaving_chain.
920 For two data-refs DRA and DRB that are a part of a chain interleaved data
921 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
923 There are four possible cases:
924 1. New stmts - both DRA and DRB are not a part of any chain:
925 FIRST_DR = DRB
926 NEXT_DR (DRB) = DRA
927 2. DRB is a part of a chain and DRA is not:
928 no need to update FIRST_DR
929 no need to insert DRB
930 insert DRA according to init
931 3. DRA is a part of a chain and DRB is not:
932 if (init of FIRST_DR > init of DRB)
933 FIRST_DR = DRB
934 NEXT(FIRST_DR) = previous FIRST_DR
935 else
936 insert DRB according to its init
937 4. both DRA and DRB are in some interleaving chains:
938 choose the chain with the smallest init of FIRST_DR
939 insert the nodes of the second chain into the first one. */
941 static void
942 vect_update_interleaving_chain (struct data_reference *drb,
943 struct data_reference *dra)
945 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
946 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
947 tree next_init, init_dra_chain, init_drb_chain;
948 gimple first_a, first_b;
949 tree node_init;
950 gimple node, prev, next, first_stmt;
952 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
953 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
955 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
956 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
957 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
958 return;
961 /* 2. DRB is a part of a chain and DRA is not. */
962 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
964 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
965 /* Insert DRA into the chain of DRB. */
966 vect_insert_into_interleaving_chain (dra, drb);
967 return;
970 /* 3. DRA is a part of a chain and DRB is not. */
971 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
973 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
974 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
975 old_first_stmt)));
976 gimple tmp;
978 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
980 /* DRB's init is smaller than the init of the stmt previously marked
981 as the first stmt of the interleaving chain of DRA. Therefore, we
982 update FIRST_STMT and put DRB in the head of the list. */
983 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
984 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
986 /* Update all the stmts in the list to point to the new FIRST_STMT. */
987 tmp = old_first_stmt;
988 while (tmp)
990 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
991 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
994 else
996 /* Insert DRB in the list of DRA. */
997 vect_insert_into_interleaving_chain (drb, dra);
998 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
1000 return;
1003 /* 4. both DRA and DRB are in some interleaving chains. */
1004 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
1005 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
1006 if (first_a == first_b)
1007 return;
1008 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
1009 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
1011 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
1013 /* Insert the nodes of DRA chain into the DRB chain.
1014 After inserting a node, continue from this node of the DRB chain (don't
1015 start from the beginning. */
1016 node = DR_GROUP_FIRST_DR (stmtinfo_a);
1017 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
1018 first_stmt = first_b;
1020 else
1022 /* Insert the nodes of DRB chain into the DRA chain.
1023 After inserting a node, continue from this node of the DRA chain (don't
1024 start from the beginning. */
1025 node = DR_GROUP_FIRST_DR (stmtinfo_b);
1026 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
1027 first_stmt = first_a;
1030 while (node)
1032 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
1033 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
1034 while (next)
1036 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1037 if (tree_int_cst_compare (next_init, node_init) > 0)
1039 /* Insert here. */
1040 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
1041 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
1042 prev = node;
1043 break;
1045 prev = next;
1046 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
1048 if (!next)
1050 /* We got to the end of the list. Insert here. */
1051 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
1052 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
1053 prev = node;
1055 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
1056 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1061 /* Function vect_equal_offsets.
1063 Check if OFFSET1 and OFFSET2 are identical expressions. */
1065 static bool
1066 vect_equal_offsets (tree offset1, tree offset2)
1068 bool res0, res1;
1070 STRIP_NOPS (offset1);
1071 STRIP_NOPS (offset2);
1073 if (offset1 == offset2)
1074 return true;
1076 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1077 || !BINARY_CLASS_P (offset1)
1078 || !BINARY_CLASS_P (offset2))
1079 return false;
1081 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1082 TREE_OPERAND (offset2, 0));
1083 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1084 TREE_OPERAND (offset2, 1));
1086 return (res0 && res1);
1090 /* Function vect_check_interleaving.
1092 Check if DRA and DRB are a part of interleaving. In case they are, insert
1093 DRA and DRB in an interleaving chain. */
1095 static void
1096 vect_check_interleaving (struct data_reference *dra,
1097 struct data_reference *drb)
1099 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1101 /* Check that the data-refs have same first location (except init) and they
1102 are both either store or load (not load and store). */
1103 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1104 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1105 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1106 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1107 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1108 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1109 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1110 || DR_IS_READ (dra) != DR_IS_READ (drb))
1111 return;
1113 /* Check:
1114 1. data-refs are of the same type
1115 2. their steps are equal
1116 3. the step is greater than the difference between data-refs' inits */
1117 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1118 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1120 if (type_size_a != type_size_b
1121 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
1122 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
1123 TREE_TYPE (DR_REF (drb))))
1124 return;
1126 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1127 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1128 step = TREE_INT_CST_LOW (DR_STEP (dra));
1130 if (init_a > init_b)
1132 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1133 and DRB is accessed before DRA. */
1134 diff_mod_size = (init_a - init_b) % type_size_a;
1136 if ((init_a - init_b) > step)
1137 return;
1139 if (diff_mod_size == 0)
1141 vect_update_interleaving_chain (drb, dra);
1142 if (vect_print_dump_info (REPORT_DR_DETAILS))
1144 fprintf (vect_dump, "Detected interleaving ");
1145 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1146 fprintf (vect_dump, " and ");
1147 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1149 return;
1152 else
1154 /* If init_b == init_a + the size of the type * k, we have an
1155 interleaving, and DRA is accessed before DRB. */
1156 diff_mod_size = (init_b - init_a) % type_size_a;
1158 if ((init_b - init_a) > step)
1159 return;
1161 if (diff_mod_size == 0)
1163 vect_update_interleaving_chain (dra, drb);
1164 if (vect_print_dump_info (REPORT_DR_DETAILS))
1166 fprintf (vect_dump, "Detected interleaving ");
1167 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1168 fprintf (vect_dump, " and ");
1169 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1171 return;
1176 /* Check if data references pointed by DR_I and DR_J are same or
1177 belong to same interleaving group. Return FALSE if drs are
1178 different, otherwise return TRUE. */
1180 static bool
1181 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
1183 gimple stmt_i = DR_STMT (dr_i);
1184 gimple stmt_j = DR_STMT (dr_j);
1186 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
1187 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1188 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
1189 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1190 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
1191 return true;
1192 else
1193 return false;
1196 /* If address ranges represented by DDR_I and DDR_J are equal,
1197 return TRUE, otherwise return FALSE. */
1199 static bool
1200 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
1202 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
1203 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
1204 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
1205 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
1206 return true;
1207 else
1208 return false;
1211 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1212 tested at run-time. Return TRUE if DDR was successfully inserted.
1213 Return false if versioning is not supported. */
1215 static bool
1216 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1218 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1220 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
1221 return false;
1223 if (vect_print_dump_info (REPORT_DR_DETAILS))
1225 fprintf (vect_dump, "mark for run-time aliasing test between ");
1226 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1227 fprintf (vect_dump, " and ");
1228 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1231 if (optimize_loop_nest_for_size_p (loop))
1233 if (vect_print_dump_info (REPORT_DR_DETAILS))
1234 fprintf (vect_dump, "versioning not supported when optimizing for size.");
1235 return false;
1238 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1239 if (loop->inner)
1241 if (vect_print_dump_info (REPORT_DR_DETAILS))
1242 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1243 return false;
1246 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1247 return true;
1250 /* Function vect_analyze_data_ref_dependence.
1252 Return TRUE if there (might) exist a dependence between a memory-reference
1253 DRA and a memory-reference DRB. When versioning for alias may check a
1254 dependence at run-time, return FALSE. */
1256 static bool
1257 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1258 loop_vec_info loop_vinfo)
1260 unsigned int i;
1261 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1262 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1263 struct data_reference *dra = DDR_A (ddr);
1264 struct data_reference *drb = DDR_B (ddr);
1265 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1266 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1267 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1268 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1269 lambda_vector dist_v;
1270 unsigned int loop_depth;
1272 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1274 /* Independent data accesses. */
1275 vect_check_interleaving (dra, drb);
1276 return false;
1279 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1280 return false;
1282 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1284 if (vect_print_dump_info (REPORT_DR_DETAILS))
1286 fprintf (vect_dump,
1287 "versioning for alias required: can't determine dependence between ");
1288 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1289 fprintf (vect_dump, " and ");
1290 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1292 /* Add to list of ddrs that need to be tested at run-time. */
1293 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1296 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1298 if (vect_print_dump_info (REPORT_DR_DETAILS))
1300 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1301 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1302 fprintf (vect_dump, " and ");
1303 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1305 /* Add to list of ddrs that need to be tested at run-time. */
1306 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1309 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1310 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1312 int dist = dist_v[loop_depth];
1314 if (vect_print_dump_info (REPORT_DR_DETAILS))
1315 fprintf (vect_dump, "dependence distance = %d.", dist);
1317 /* Same loop iteration. */
1318 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1320 /* Two references with distance zero have the same alignment. */
1321 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1322 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1323 if (vect_print_dump_info (REPORT_ALIGNMENT))
1324 fprintf (vect_dump, "accesses have the same alignment.");
1325 if (vect_print_dump_info (REPORT_DR_DETAILS))
1327 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1328 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1329 fprintf (vect_dump, " and ");
1330 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1333 /* For interleaving, mark that there is a read-write dependency if
1334 necessary. We check before that one of the data-refs is store. */
1335 if (DR_IS_READ (dra))
1336 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1337 else
1339 if (DR_IS_READ (drb))
1340 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1343 continue;
1346 if (abs (dist) >= vectorization_factor
1347 || (dist > 0 && DDR_REVERSED_P (ddr)))
1349 /* Dependence distance does not create dependence, as far as
1350 vectorization is concerned, in this case. If DDR_REVERSED_P the
1351 order of the data-refs in DDR was reversed (to make distance
1352 vector positive), and the actual distance is negative. */
1353 if (vect_print_dump_info (REPORT_DR_DETAILS))
1354 fprintf (vect_dump, "dependence distance >= VF or negative.");
1355 continue;
1358 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1360 fprintf (vect_dump,
1361 "not vectorized, possible dependence "
1362 "between data-refs ");
1363 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1364 fprintf (vect_dump, " and ");
1365 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1368 return true;
1371 return false;
1374 /* Function vect_analyze_data_ref_dependences.
1376 Examine all the data references in the loop, and make sure there do not
1377 exist any data dependences between them. */
1379 static bool
1380 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1382 unsigned int i;
1383 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1384 struct data_dependence_relation *ddr;
1386 if (vect_print_dump_info (REPORT_DETAILS))
1387 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1389 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1390 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1391 return false;
1393 return true;
1397 /* Function vect_compute_data_ref_alignment
1399 Compute the misalignment of the data reference DR.
1401 Output:
1402 1. If during the misalignment computation it is found that the data reference
1403 cannot be vectorized then false is returned.
1404 2. DR_MISALIGNMENT (DR) is defined.
1406 FOR NOW: No analysis is actually performed. Misalignment is calculated
1407 only for trivial cases. TODO. */
1409 static bool
1410 vect_compute_data_ref_alignment (struct data_reference *dr)
1412 gimple stmt = DR_STMT (dr);
1413 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1414 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1415 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1416 tree ref = DR_REF (dr);
1417 tree vectype;
1418 tree base, base_addr;
1419 bool base_aligned;
1420 tree misalign;
1421 tree aligned_to, alignment;
1423 if (vect_print_dump_info (REPORT_DETAILS))
1424 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1426 /* Initialize misalignment to unknown. */
1427 SET_DR_MISALIGNMENT (dr, -1);
1429 misalign = DR_INIT (dr);
1430 aligned_to = DR_ALIGNED_TO (dr);
1431 base_addr = DR_BASE_ADDRESS (dr);
1432 vectype = STMT_VINFO_VECTYPE (stmt_info);
1434 /* In case the dataref is in an inner-loop of the loop that is being
1435 vectorized (LOOP), we use the base and misalignment information
1436 relative to the outer-loop (LOOP). This is ok only if the misalignment
1437 stays the same throughout the execution of the inner-loop, which is why
1438 we have to check that the stride of the dataref in the inner-loop evenly
1439 divides by the vector size. */
1440 if (nested_in_vect_loop_p (loop, stmt))
1442 tree step = DR_STEP (dr);
1443 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1445 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
1447 if (vect_print_dump_info (REPORT_ALIGNMENT))
1448 fprintf (vect_dump, "inner step divides the vector-size.");
1449 misalign = STMT_VINFO_DR_INIT (stmt_info);
1450 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1451 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1453 else
1455 if (vect_print_dump_info (REPORT_ALIGNMENT))
1456 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1457 misalign = NULL_TREE;
1461 base = build_fold_indirect_ref (base_addr);
1462 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1464 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1465 || !misalign)
1467 if (vect_print_dump_info (REPORT_ALIGNMENT))
1469 fprintf (vect_dump, "Unknown alignment for access: ");
1470 print_generic_expr (vect_dump, base, TDF_SLIM);
1472 return true;
1475 if ((DECL_P (base)
1476 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1477 alignment) >= 0)
1478 || (TREE_CODE (base_addr) == SSA_NAME
1479 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1480 TREE_TYPE (base_addr)))),
1481 alignment) >= 0))
1482 base_aligned = true;
1483 else
1484 base_aligned = false;
1486 if (!base_aligned)
1488 /* Do not change the alignment of global variables if
1489 flag_section_anchors is enabled. */
1490 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1491 || (TREE_STATIC (base) && flag_section_anchors))
1493 if (vect_print_dump_info (REPORT_DETAILS))
1495 fprintf (vect_dump, "can't force alignment of ref: ");
1496 print_generic_expr (vect_dump, ref, TDF_SLIM);
1498 return true;
1501 /* Force the alignment of the decl.
1502 NOTE: This is the only change to the code we make during
1503 the analysis phase, before deciding to vectorize the loop. */
1504 if (vect_print_dump_info (REPORT_DETAILS))
1505 fprintf (vect_dump, "force alignment");
1506 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1507 DECL_USER_ALIGN (base) = 1;
1510 /* At this point we assume that the base is aligned. */
1511 gcc_assert (base_aligned
1512 || (TREE_CODE (base) == VAR_DECL
1513 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1515 /* Modulo alignment. */
1516 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1518 if (!host_integerp (misalign, 1))
1520 /* Negative or overflowed misalignment value. */
1521 if (vect_print_dump_info (REPORT_DETAILS))
1522 fprintf (vect_dump, "unexpected misalign value");
1523 return false;
1526 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1528 if (vect_print_dump_info (REPORT_DETAILS))
1530 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1531 print_generic_expr (vect_dump, ref, TDF_SLIM);
1534 return true;
1538 /* Function vect_compute_data_refs_alignment
1540 Compute the misalignment of data references in the loop.
1541 Return FALSE if a data reference is found that cannot be vectorized. */
1543 static bool
1544 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1546 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1547 struct data_reference *dr;
1548 unsigned int i;
1550 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1551 if (!vect_compute_data_ref_alignment (dr))
1552 return false;
1554 return true;
1558 /* Function vect_update_misalignment_for_peel
1560 DR - the data reference whose misalignment is to be adjusted.
1561 DR_PEEL - the data reference whose misalignment is being made
1562 zero in the vector loop by the peel.
1563 NPEEL - the number of iterations in the peel loop if the misalignment
1564 of DR_PEEL is known at compile time. */
1566 static void
1567 vect_update_misalignment_for_peel (struct data_reference *dr,
1568 struct data_reference *dr_peel, int npeel)
1570 unsigned int i;
1571 VEC(dr_p,heap) *same_align_drs;
1572 struct data_reference *current_dr;
1573 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1574 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1575 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1576 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1578 /* For interleaved data accesses the step in the loop must be multiplied by
1579 the size of the interleaving group. */
1580 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1581 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1582 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1583 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1585 /* It can be assumed that the data refs with the same alignment as dr_peel
1586 are aligned in the vector loop. */
1587 same_align_drs
1588 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1589 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1591 if (current_dr != dr)
1592 continue;
1593 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1594 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1595 SET_DR_MISALIGNMENT (dr, 0);
1596 return;
1599 if (known_alignment_for_access_p (dr)
1600 && known_alignment_for_access_p (dr_peel))
1602 int misal = DR_MISALIGNMENT (dr);
1603 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1604 misal += npeel * dr_size;
1605 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
1606 SET_DR_MISALIGNMENT (dr, misal);
1607 return;
1610 if (vect_print_dump_info (REPORT_DETAILS))
1611 fprintf (vect_dump, "Setting misalignment to -1.");
1612 SET_DR_MISALIGNMENT (dr, -1);
1616 /* Function vect_verify_datarefs_alignment
1618 Return TRUE if all data references in the loop can be
1619 handled with respect to alignment. */
1621 static bool
1622 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1624 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1625 struct data_reference *dr;
1626 enum dr_alignment_support supportable_dr_alignment;
1627 unsigned int i;
1629 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1631 gimple stmt = DR_STMT (dr);
1632 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1634 /* For interleaving, only the alignment of the first access matters. */
1635 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1636 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1637 continue;
1639 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1640 if (!supportable_dr_alignment)
1642 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1644 if (DR_IS_READ (dr))
1645 fprintf (vect_dump,
1646 "not vectorized: unsupported unaligned load.");
1647 else
1648 fprintf (vect_dump,
1649 "not vectorized: unsupported unaligned store.");
1651 return false;
1653 if (supportable_dr_alignment != dr_aligned
1654 && vect_print_dump_info (REPORT_ALIGNMENT))
1655 fprintf (vect_dump, "Vectorizing an unaligned access.");
1657 return true;
1661 /* Function vector_alignment_reachable_p
1663 Return true if vector alignment for DR is reachable by peeling
1664 a few loop iterations. Return false otherwise. */
1666 static bool
1667 vector_alignment_reachable_p (struct data_reference *dr)
1669 gimple stmt = DR_STMT (dr);
1670 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1671 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1673 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1675 /* For interleaved access we peel only if number of iterations in
1676 the prolog loop ({VF - misalignment}), is a multiple of the
1677 number of the interleaved accesses. */
1678 int elem_size, mis_in_elements;
1679 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1681 /* FORNOW: handle only known alignment. */
1682 if (!known_alignment_for_access_p (dr))
1683 return false;
1685 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1686 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1688 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1689 return false;
1692 /* If misalignment is known at the compile time then allow peeling
1693 only if natural alignment is reachable through peeling. */
1694 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1696 HOST_WIDE_INT elmsize =
1697 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1698 if (vect_print_dump_info (REPORT_DETAILS))
1700 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1701 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1703 if (DR_MISALIGNMENT (dr) % elmsize)
1705 if (vect_print_dump_info (REPORT_DETAILS))
1706 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1707 return false;
1711 if (!known_alignment_for_access_p (dr))
1713 tree type = (TREE_TYPE (DR_REF (dr)));
1714 tree ba = DR_BASE_OBJECT (dr);
1715 bool is_packed = false;
1717 if (ba)
1718 is_packed = contains_packed_reference (ba);
1720 if (vect_print_dump_info (REPORT_DETAILS))
1721 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1722 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1723 return true;
1724 else
1725 return false;
1728 return true;
1731 /* Function vect_enhance_data_refs_alignment
1733 This pass will use loop versioning and loop peeling in order to enhance
1734 the alignment of data references in the loop.
1736 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1737 original loop is to be vectorized; Any other loops that are created by
1738 the transformations performed in this pass - are not supposed to be
1739 vectorized. This restriction will be relaxed.
1741 This pass will require a cost model to guide it whether to apply peeling
1742 or versioning or a combination of the two. For example, the scheme that
1743 intel uses when given a loop with several memory accesses, is as follows:
1744 choose one memory access ('p') which alignment you want to force by doing
1745 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1746 other accesses are not necessarily aligned, or (2) use loop versioning to
1747 generate one loop in which all accesses are aligned, and another loop in
1748 which only 'p' is necessarily aligned.
1750 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1751 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1752 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1754 Devising a cost model is the most critical aspect of this work. It will
1755 guide us on which access to peel for, whether to use loop versioning, how
1756 many versions to create, etc. The cost model will probably consist of
1757 generic considerations as well as target specific considerations (on
1758 powerpc for example, misaligned stores are more painful than misaligned
1759 loads).
1761 Here are the general steps involved in alignment enhancements:
1763 -- original loop, before alignment analysis:
1764 for (i=0; i<N; i++){
1765 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1766 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1769 -- After vect_compute_data_refs_alignment:
1770 for (i=0; i<N; i++){
1771 x = q[i]; # DR_MISALIGNMENT(q) = 3
1772 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1775 -- Possibility 1: we do loop versioning:
1776 if (p is aligned) {
1777 for (i=0; i<N; i++){ # loop 1A
1778 x = q[i]; # DR_MISALIGNMENT(q) = 3
1779 p[i] = y; # DR_MISALIGNMENT(p) = 0
1782 else {
1783 for (i=0; i<N; i++){ # loop 1B
1784 x = q[i]; # DR_MISALIGNMENT(q) = 3
1785 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1789 -- Possibility 2: we do loop peeling:
1790 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1791 x = q[i];
1792 p[i] = y;
1794 for (i = 3; i < N; i++){ # loop 2A
1795 x = q[i]; # DR_MISALIGNMENT(q) = 0
1796 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1799 -- Possibility 3: combination of loop peeling and versioning:
1800 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1801 x = q[i];
1802 p[i] = y;
1804 if (p is aligned) {
1805 for (i = 3; i<N; i++){ # loop 3A
1806 x = q[i]; # DR_MISALIGNMENT(q) = 0
1807 p[i] = y; # DR_MISALIGNMENT(p) = 0
1810 else {
1811 for (i = 3; i<N; i++){ # loop 3B
1812 x = q[i]; # DR_MISALIGNMENT(q) = 0
1813 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1817 These loops are later passed to loop_transform to be vectorized. The
1818 vectorizer will use the alignment information to guide the transformation
1819 (whether to generate regular loads/stores, or with special handling for
1820 misalignment). */
1822 static bool
1823 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1825 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1826 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1827 enum dr_alignment_support supportable_dr_alignment;
1828 struct data_reference *dr0 = NULL;
1829 struct data_reference *dr;
1830 unsigned int i;
1831 bool do_peeling = false;
1832 bool do_versioning = false;
1833 bool stat;
1834 gimple stmt;
1835 stmt_vec_info stmt_info;
1836 int vect_versioning_for_alias_required;
1838 if (vect_print_dump_info (REPORT_DETAILS))
1839 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1841 /* While cost model enhancements are expected in the future, the high level
1842 view of the code at this time is as follows:
1844 A) If there is a misaligned write then see if peeling to align this write
1845 can make all data references satisfy vect_supportable_dr_alignment.
1846 If so, update data structures as needed and return true. Note that
1847 at this time vect_supportable_dr_alignment is known to return false
1848 for a misaligned write.
1850 B) If peeling wasn't possible and there is a data reference with an
1851 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1852 then see if loop versioning checks can be used to make all data
1853 references satisfy vect_supportable_dr_alignment. If so, update
1854 data structures as needed and return true.
1856 C) If neither peeling nor versioning were successful then return false if
1857 any data reference does not satisfy vect_supportable_dr_alignment.
1859 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1861 Note, Possibility 3 above (which is peeling and versioning together) is not
1862 being done at this time. */
1864 /* (1) Peeling to force alignment. */
1866 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1867 Considerations:
1868 + How many accesses will become aligned due to the peeling
1869 - How many accesses will become unaligned due to the peeling,
1870 and the cost of misaligned accesses.
1871 - The cost of peeling (the extra runtime checks, the increase
1872 in code size).
1874 The scheme we use FORNOW: peel to force the alignment of the first
1875 misaligned store in the loop.
1876 Rationale: misaligned stores are not yet supported.
1878 TODO: Use a cost model. */
1880 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1882 stmt = DR_STMT (dr);
1883 stmt_info = vinfo_for_stmt (stmt);
1885 /* For interleaving, only the alignment of the first access
1886 matters. */
1887 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1888 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1889 continue;
1891 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1893 do_peeling = vector_alignment_reachable_p (dr);
1894 if (do_peeling)
1895 dr0 = dr;
1896 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1897 fprintf (vect_dump, "vector alignment may not be reachable");
1898 break;
1902 vect_versioning_for_alias_required =
1903 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1905 /* Temporarily, if versioning for alias is required, we disable peeling
1906 until we support peeling and versioning. Often peeling for alignment
1907 will require peeling for loop-bound, which in turn requires that we
1908 know how to adjust the loop ivs after the loop. */
1909 if (vect_versioning_for_alias_required
1910 || !vect_can_advance_ivs_p (loop_vinfo)
1911 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1912 do_peeling = false;
1914 if (do_peeling)
1916 int mis;
1917 int npeel = 0;
1918 gimple stmt = DR_STMT (dr0);
1919 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1920 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1921 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1923 if (known_alignment_for_access_p (dr0))
1925 /* Since it's known at compile time, compute the number of iterations
1926 in the peeled loop (the peeling factor) for use in updating
1927 DR_MISALIGNMENT values. The peeling factor is the vectorization
1928 factor minus the misalignment as an element count. */
1929 mis = DR_MISALIGNMENT (dr0);
1930 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1931 npeel = nelements - mis;
1933 /* For interleaved data access every iteration accesses all the
1934 members of the group, therefore we divide the number of iterations
1935 by the group size. */
1936 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1937 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1938 npeel /= DR_GROUP_SIZE (stmt_info);
1940 if (vect_print_dump_info (REPORT_DETAILS))
1941 fprintf (vect_dump, "Try peeling by %d", npeel);
1944 /* Ensure that all data refs can be vectorized after the peel. */
1945 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1947 int save_misalignment;
1949 if (dr == dr0)
1950 continue;
1952 stmt = DR_STMT (dr);
1953 stmt_info = vinfo_for_stmt (stmt);
1954 /* For interleaving, only the alignment of the first access
1955 matters. */
1956 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1957 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1958 continue;
1960 save_misalignment = DR_MISALIGNMENT (dr);
1961 vect_update_misalignment_for_peel (dr, dr0, npeel);
1962 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1963 SET_DR_MISALIGNMENT (dr, save_misalignment);
1965 if (!supportable_dr_alignment)
1967 do_peeling = false;
1968 break;
1972 if (do_peeling)
1974 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1975 If the misalignment of DR_i is identical to that of dr0 then set
1976 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1977 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1978 by the peeling factor times the element size of DR_i (MOD the
1979 vectorization factor times the size). Otherwise, the
1980 misalignment of DR_i must be set to unknown. */
1981 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1982 if (dr != dr0)
1983 vect_update_misalignment_for_peel (dr, dr0, npeel);
1985 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1986 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1987 SET_DR_MISALIGNMENT (dr0, 0);
1988 if (vect_print_dump_info (REPORT_ALIGNMENT))
1989 fprintf (vect_dump, "Alignment of access forced using peeling.");
1991 if (vect_print_dump_info (REPORT_DETAILS))
1992 fprintf (vect_dump, "Peeling for alignment will be applied.");
1994 stat = vect_verify_datarefs_alignment (loop_vinfo);
1995 gcc_assert (stat);
1996 return stat;
2001 /* (2) Versioning to force alignment. */
2003 /* Try versioning if:
2004 1) flag_tree_vect_loop_version is TRUE
2005 2) optimize loop for speed
2006 3) there is at least one unsupported misaligned data ref with an unknown
2007 misalignment, and
2008 4) all misaligned data refs with a known misalignment are supported, and
2009 5) the number of runtime alignment checks is within reason. */
2011 do_versioning =
2012 flag_tree_vect_loop_version
2013 && optimize_loop_nest_for_speed_p (loop)
2014 && (!loop->inner); /* FORNOW */
2016 if (do_versioning)
2018 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2020 stmt = DR_STMT (dr);
2021 stmt_info = vinfo_for_stmt (stmt);
2023 /* For interleaving, only the alignment of the first access
2024 matters. */
2025 if (aligned_access_p (dr)
2026 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2027 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
2028 continue;
2030 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
2032 if (!supportable_dr_alignment)
2034 gimple stmt;
2035 int mask;
2036 tree vectype;
2038 if (known_alignment_for_access_p (dr)
2039 || VEC_length (gimple,
2040 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
2041 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
2043 do_versioning = false;
2044 break;
2047 stmt = DR_STMT (dr);
2048 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2049 gcc_assert (vectype);
2051 /* The rightmost bits of an aligned address must be zeros.
2052 Construct the mask needed for this test. For example,
2053 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2054 mask must be 15 = 0xf. */
2055 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2057 /* FORNOW: use the same mask to test all potentially unaligned
2058 references in the loop. The vectorizer currently supports
2059 a single vector size, see the reference to
2060 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2061 vectorization factor is computed. */
2062 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2063 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2064 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2065 VEC_safe_push (gimple, heap,
2066 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2067 DR_STMT (dr));
2071 /* Versioning requires at least one misaligned data reference. */
2072 if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2073 do_versioning = false;
2074 else if (!do_versioning)
2075 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2078 if (do_versioning)
2080 VEC(gimple,heap) *may_misalign_stmts
2081 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2082 gimple stmt;
2084 /* It can now be assumed that the data references in the statements
2085 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2086 of the loop being vectorized. */
2087 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
2089 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2090 dr = STMT_VINFO_DATA_REF (stmt_info);
2091 SET_DR_MISALIGNMENT (dr, 0);
2092 if (vect_print_dump_info (REPORT_ALIGNMENT))
2093 fprintf (vect_dump, "Alignment of access forced using versioning.");
2096 if (vect_print_dump_info (REPORT_DETAILS))
2097 fprintf (vect_dump, "Versioning for alignment will be applied.");
2099 /* Peeling and versioning can't be done together at this time. */
2100 gcc_assert (! (do_peeling && do_versioning));
2102 stat = vect_verify_datarefs_alignment (loop_vinfo);
2103 gcc_assert (stat);
2104 return stat;
2107 /* This point is reached if neither peeling nor versioning is being done. */
2108 gcc_assert (! (do_peeling || do_versioning));
2110 stat = vect_verify_datarefs_alignment (loop_vinfo);
2111 return stat;
2115 /* Function vect_analyze_data_refs_alignment
2117 Analyze the alignment of the data-references in the loop.
2118 Return FALSE if a data reference is found that cannot be vectorized. */
2120 static bool
2121 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2123 if (vect_print_dump_info (REPORT_DETAILS))
2124 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2126 if (!vect_compute_data_refs_alignment (loop_vinfo))
2128 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2129 fprintf (vect_dump,
2130 "not vectorized: can't calculate alignment for data ref.");
2131 return false;
2134 return true;
2138 /* Analyze groups of strided accesses: check that DR belongs to a group of
2139 strided accesses of legal size, step, etc. Detect gaps, single element
2140 interleaving, and other special cases. Set strided access info.
2141 Collect groups of strided stores for further use in SLP analysis. */
2143 static bool
2144 vect_analyze_group_access (struct data_reference *dr)
2146 tree step = DR_STEP (dr);
2147 tree scalar_type = TREE_TYPE (DR_REF (dr));
2148 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2149 gimple stmt = DR_STMT (dr);
2150 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2151 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2152 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2153 HOST_WIDE_INT stride;
2154 bool slp_impossible = false;
2156 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2157 interleaving group (including gaps). */
2158 stride = dr_step / type_size;
2160 /* Not consecutive access is possible only if it is a part of interleaving. */
2161 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2163 /* Check if it this DR is a part of interleaving, and is a single
2164 element of the group that is accessed in the loop. */
2166 /* Gaps are supported only for loads. STEP must be a multiple of the type
2167 size. The size of the group must be a power of 2. */
2168 if (DR_IS_READ (dr)
2169 && (dr_step % type_size) == 0
2170 && stride > 0
2171 && exact_log2 (stride) != -1)
2173 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2174 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2175 if (vect_print_dump_info (REPORT_DR_DETAILS))
2177 fprintf (vect_dump, "Detected single element interleaving %d ",
2178 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2179 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2180 fprintf (vect_dump, " step ");
2181 print_generic_expr (vect_dump, step, TDF_SLIM);
2183 return true;
2185 if (vect_print_dump_info (REPORT_DETAILS))
2186 fprintf (vect_dump, "not consecutive access");
2187 return false;
2190 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2192 /* First stmt in the interleaving chain. Check the chain. */
2193 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2194 struct data_reference *data_ref = dr;
2195 unsigned int count = 1;
2196 tree next_step;
2197 tree prev_init = DR_INIT (data_ref);
2198 gimple prev = stmt;
2199 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2201 while (next)
2203 /* Skip same data-refs. In case that two or more stmts share data-ref
2204 (supported only for loads), we vectorize only the first stmt, and
2205 the rest get their vectorized loads from the first one. */
2206 if (!tree_int_cst_compare (DR_INIT (data_ref),
2207 DR_INIT (STMT_VINFO_DATA_REF (
2208 vinfo_for_stmt (next)))))
2210 if (!DR_IS_READ (data_ref))
2212 if (vect_print_dump_info (REPORT_DETAILS))
2213 fprintf (vect_dump, "Two store stmts share the same dr.");
2214 return false;
2217 /* Check that there is no load-store dependencies for this loads
2218 to prevent a case of load-store-load to the same location. */
2219 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2220 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2222 if (vect_print_dump_info (REPORT_DETAILS))
2223 fprintf (vect_dump,
2224 "READ_WRITE dependence in interleaving.");
2225 return false;
2228 /* For load use the same data-ref load. */
2229 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2231 prev = next;
2232 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2233 continue;
2235 prev = next;
2237 /* Check that all the accesses have the same STEP. */
2238 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2239 if (tree_int_cst_compare (step, next_step))
2241 if (vect_print_dump_info (REPORT_DETAILS))
2242 fprintf (vect_dump, "not consecutive access in interleaving");
2243 return false;
2246 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2247 /* Check that the distance between two accesses is equal to the type
2248 size. Otherwise, we have gaps. */
2249 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2250 - TREE_INT_CST_LOW (prev_init)) / type_size;
2251 if (diff != 1)
2253 /* FORNOW: SLP of accesses with gaps is not supported. */
2254 slp_impossible = true;
2255 if (!DR_IS_READ (data_ref))
2257 if (vect_print_dump_info (REPORT_DETAILS))
2258 fprintf (vect_dump, "interleaved store with gaps");
2259 return false;
2262 gaps += diff - 1;
2265 /* Store the gap from the previous member of the group. If there is no
2266 gap in the access, DR_GROUP_GAP is always 1. */
2267 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2269 prev_init = DR_INIT (data_ref);
2270 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2271 /* Count the number of data-refs in the chain. */
2272 count++;
2275 /* COUNT is the number of accesses found, we multiply it by the size of
2276 the type to get COUNT_IN_BYTES. */
2277 count_in_bytes = type_size * count;
2279 /* Check that the size of the interleaving (including gaps) is not greater
2280 than STEP. */
2281 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2283 if (vect_print_dump_info (REPORT_DETAILS))
2285 fprintf (vect_dump, "interleaving size is greater than step for ");
2286 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2288 return false;
2291 /* Check that the size of the interleaving is equal to STEP for stores,
2292 i.e., that there are no gaps. */
2293 if (dr_step != count_in_bytes)
2295 if (DR_IS_READ (dr))
2297 slp_impossible = true;
2298 /* There is a gap after the last load in the group. This gap is a
2299 difference between the stride and the number of elements. When
2300 there is no gap, this difference should be 0. */
2301 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2303 else
2305 if (vect_print_dump_info (REPORT_DETAILS))
2306 fprintf (vect_dump, "interleaved store with gaps");
2307 return false;
2311 /* Check that STEP is a multiple of type size. */
2312 if ((dr_step % type_size) != 0)
2314 if (vect_print_dump_info (REPORT_DETAILS))
2316 fprintf (vect_dump, "step is not a multiple of type size: step ");
2317 print_generic_expr (vect_dump, step, TDF_SLIM);
2318 fprintf (vect_dump, " size ");
2319 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2320 TDF_SLIM);
2322 return false;
2325 /* FORNOW: we handle only interleaving that is a power of 2.
2326 We don't fail here if it may be still possible to vectorize the
2327 group using SLP. If not, the size of the group will be checked in
2328 vect_analyze_operations, and the vectorization will fail. */
2329 if (exact_log2 (stride) == -1)
2331 if (vect_print_dump_info (REPORT_DETAILS))
2332 fprintf (vect_dump, "interleaving is not a power of 2");
2334 if (slp_impossible)
2335 return false;
2337 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2338 if (vect_print_dump_info (REPORT_DETAILS))
2339 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2341 /* SLP: create an SLP data structure for every interleaving group of
2342 stores for further analysis in vect_analyse_slp. */
2343 if (!DR_IS_READ (dr) && !slp_impossible)
2344 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2347 return true;
2351 /* Analyze the access pattern of the data-reference DR.
2352 In case of non-consecutive accesses call vect_analyze_group_access() to
2353 analyze groups of strided accesses. */
2355 static bool
2356 vect_analyze_data_ref_access (struct data_reference *dr)
2358 tree step = DR_STEP (dr);
2359 tree scalar_type = TREE_TYPE (DR_REF (dr));
2360 gimple stmt = DR_STMT (dr);
2361 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2362 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2363 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2364 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2366 if (!step)
2368 if (vect_print_dump_info (REPORT_DETAILS))
2369 fprintf (vect_dump, "bad data-ref access");
2370 return false;
2373 /* Don't allow invariant accesses. */
2374 if (dr_step == 0)
2375 return false;
2377 if (nested_in_vect_loop_p (loop, stmt))
2379 /* Interleaved accesses are not yet supported within outer-loop
2380 vectorization for references in the inner-loop. */
2381 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2383 /* For the rest of the analysis we use the outer-loop step. */
2384 step = STMT_VINFO_DR_STEP (stmt_info);
2385 dr_step = TREE_INT_CST_LOW (step);
2387 if (dr_step == 0)
2389 if (vect_print_dump_info (REPORT_ALIGNMENT))
2390 fprintf (vect_dump, "zero step in outer loop.");
2391 if (DR_IS_READ (dr))
2392 return true;
2393 else
2394 return false;
2398 /* Consecutive? */
2399 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2401 /* Mark that it is not interleaving. */
2402 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2403 return true;
2406 if (nested_in_vect_loop_p (loop, stmt))
2408 if (vect_print_dump_info (REPORT_ALIGNMENT))
2409 fprintf (vect_dump, "strided access in outer loop.");
2410 return false;
2413 /* Not consecutive access - check if it's a part of interleaving group. */
2414 return vect_analyze_group_access (dr);
2418 /* Function vect_analyze_data_ref_accesses.
2420 Analyze the access pattern of all the data references in the loop.
2422 FORNOW: the only access pattern that is considered vectorizable is a
2423 simple step 1 (consecutive) access.
2425 FORNOW: handle only arrays and pointer accesses. */
2427 static bool
2428 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2430 unsigned int i;
2431 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2432 struct data_reference *dr;
2434 if (vect_print_dump_info (REPORT_DETAILS))
2435 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2437 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2438 if (!vect_analyze_data_ref_access (dr))
2440 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2441 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2442 return false;
2445 return true;
2448 /* Function vect_prune_runtime_alias_test_list.
2450 Prune a list of ddrs to be tested at run-time by versioning for alias.
2451 Return FALSE if resulting list of ddrs is longer then allowed by
2452 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2454 static bool
2455 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2457 VEC (ddr_p, heap) * ddrs =
2458 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2459 unsigned i, j;
2461 if (vect_print_dump_info (REPORT_DETAILS))
2462 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2464 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2466 bool found;
2467 ddr_p ddr_i;
2469 ddr_i = VEC_index (ddr_p, ddrs, i);
2470 found = false;
2472 for (j = 0; j < i; j++)
2474 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2476 if (vect_vfa_range_equal (ddr_i, ddr_j))
2478 if (vect_print_dump_info (REPORT_DR_DETAILS))
2480 fprintf (vect_dump, "found equal ranges ");
2481 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2482 fprintf (vect_dump, ", ");
2483 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2484 fprintf (vect_dump, " and ");
2485 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2486 fprintf (vect_dump, ", ");
2487 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2489 found = true;
2490 break;
2494 if (found)
2496 VEC_ordered_remove (ddr_p, ddrs, i);
2497 continue;
2499 i++;
2502 if (VEC_length (ddr_p, ddrs) >
2503 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2505 if (vect_print_dump_info (REPORT_DR_DETAILS))
2507 fprintf (vect_dump,
2508 "disable versioning for alias - max number of generated "
2509 "checks exceeded.");
2512 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2514 return false;
2517 return true;
2520 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2522 static void
2523 vect_free_slp_tree (slp_tree node)
2525 if (!node)
2526 return;
2528 if (SLP_TREE_LEFT (node))
2529 vect_free_slp_tree (SLP_TREE_LEFT (node));
2531 if (SLP_TREE_RIGHT (node))
2532 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2534 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
2536 if (SLP_TREE_VEC_STMTS (node))
2537 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
2539 free (node);
2543 /* Free the memory allocated for the SLP instance. */
2545 void
2546 vect_free_slp_instance (slp_instance instance)
2548 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
2549 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
2550 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
2554 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
2555 they are of a legal type and that they match the defs of the first stmt of
2556 the SLP group (stored in FIRST_STMT_...). */
2558 static bool
2559 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2560 gimple stmt, VEC (gimple, heap) **def_stmts0,
2561 VEC (gimple, heap) **def_stmts1,
2562 enum vect_def_type *first_stmt_dt0,
2563 enum vect_def_type *first_stmt_dt1,
2564 tree *first_stmt_def0_type,
2565 tree *first_stmt_def1_type,
2566 tree *first_stmt_const_oprnd,
2567 int ncopies_for_cost,
2568 bool *pattern0, bool *pattern1)
2570 tree oprnd;
2571 unsigned int i, number_of_oprnds;
2572 tree def;
2573 gimple def_stmt;
2574 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2575 stmt_vec_info stmt_info =
2576 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2577 enum gimple_rhs_class rhs_class;
2578 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2580 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
2581 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
2583 for (i = 0; i < number_of_oprnds; i++)
2585 oprnd = gimple_op (stmt, i + 1);
2587 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2588 || (!def_stmt && dt[i] != vect_constant_def))
2590 if (vect_print_dump_info (REPORT_SLP))
2592 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2593 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2596 return false;
2599 /* Check if DEF_STMT is a part of a pattern and get the def stmt from
2600 the pattern. Check that all the stmts of the node are in the
2601 pattern. */
2602 if (def_stmt && gimple_bb (def_stmt)
2603 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2604 && vinfo_for_stmt (def_stmt)
2605 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
2607 if (!*first_stmt_dt0)
2608 *pattern0 = true;
2609 else
2611 if (i == 1 && !*first_stmt_dt1)
2612 *pattern1 = true;
2613 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
2615 if (vect_print_dump_info (REPORT_DETAILS))
2617 fprintf (vect_dump, "Build SLP failed: some of the stmts"
2618 " are in a pattern, and others are not ");
2619 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2622 return false;
2626 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
2627 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
2629 if (*dt == vect_unknown_def_type)
2631 if (vect_print_dump_info (REPORT_DETAILS))
2632 fprintf (vect_dump, "Unsupported pattern.");
2633 return false;
2636 switch (gimple_code (def_stmt))
2638 case GIMPLE_PHI:
2639 def = gimple_phi_result (def_stmt);
2640 break;
2642 case GIMPLE_ASSIGN:
2643 def = gimple_assign_lhs (def_stmt);
2644 break;
2646 default:
2647 if (vect_print_dump_info (REPORT_DETAILS))
2648 fprintf (vect_dump, "unsupported defining stmt: ");
2649 return false;
2653 if (!*first_stmt_dt0)
2655 /* op0 of the first stmt of the group - store its info. */
2656 *first_stmt_dt0 = dt[i];
2657 if (def)
2658 *first_stmt_def0_type = TREE_TYPE (def);
2659 else
2660 *first_stmt_const_oprnd = oprnd;
2662 /* Analyze costs (for the first stmt of the group only). */
2663 if (rhs_class != GIMPLE_SINGLE_RHS)
2664 /* Not memory operation (we don't call this functions for loads). */
2665 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2666 else
2667 /* Store. */
2668 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2671 else
2673 if (!*first_stmt_dt1 && i == 1)
2675 /* op1 of the first stmt of the group - store its info. */
2676 *first_stmt_dt1 = dt[i];
2677 if (def)
2678 *first_stmt_def1_type = TREE_TYPE (def);
2679 else
2681 /* We assume that the stmt contains only one constant
2682 operand. We fail otherwise, to be on the safe side. */
2683 if (*first_stmt_const_oprnd)
2685 if (vect_print_dump_info (REPORT_SLP))
2686 fprintf (vect_dump, "Build SLP failed: two constant "
2687 "oprnds in stmt");
2688 return false;
2690 *first_stmt_const_oprnd = oprnd;
2693 else
2695 /* Not first stmt of the group, check that the def-stmt/s match
2696 the def-stmt/s of the first stmt. */
2697 if ((i == 0
2698 && (*first_stmt_dt0 != dt[i]
2699 || (*first_stmt_def0_type && def
2700 && *first_stmt_def0_type != TREE_TYPE (def))))
2701 || (i == 1
2702 && (*first_stmt_dt1 != dt[i]
2703 || (*first_stmt_def1_type && def
2704 && *first_stmt_def1_type != TREE_TYPE (def))))
2705 || (!def
2706 && TREE_TYPE (*first_stmt_const_oprnd)
2707 != TREE_TYPE (oprnd)))
2709 if (vect_print_dump_info (REPORT_SLP))
2710 fprintf (vect_dump, "Build SLP failed: different types ");
2712 return false;
2717 /* Check the types of the definitions. */
2718 switch (dt[i])
2720 case vect_constant_def:
2721 case vect_invariant_def:
2722 break;
2724 case vect_loop_def:
2725 if (i == 0)
2726 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
2727 else
2728 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
2729 break;
2731 default:
2732 /* FORNOW: Not supported. */
2733 if (vect_print_dump_info (REPORT_SLP))
2735 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2736 print_generic_expr (vect_dump, def, TDF_SLIM);
2739 return false;
2743 return true;
2747 /* Recursively build an SLP tree starting from NODE.
2748 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2749 permutation or are of unsupported types of operation. Otherwise, return
2750 TRUE. */
2752 static bool
2753 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2754 unsigned int group_size,
2755 int *inside_cost, int *outside_cost,
2756 int ncopies_for_cost, unsigned int *max_nunits,
2757 VEC (int, heap) **load_permutation,
2758 VEC (slp_tree, heap) **loads)
2760 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
2761 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
2762 unsigned int i;
2763 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2764 gimple stmt = VEC_index (gimple, stmts, 0);
2765 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2766 enum tree_code first_stmt_code = 0, rhs_code;
2767 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2768 tree lhs;
2769 bool stop_recursion = false, need_same_oprnds = false;
2770 tree vectype, scalar_type, first_op1 = NULL_TREE;
2771 unsigned int vectorization_factor = 0, ncopies;
2772 optab optab;
2773 int icode;
2774 enum machine_mode optab_op2_mode;
2775 enum machine_mode vec_mode;
2776 tree first_stmt_const_oprnd = NULL_TREE;
2777 struct data_reference *first_dr;
2778 bool pattern0 = false, pattern1 = false;
2779 HOST_WIDE_INT dummy;
2780 bool permutation = false;
2781 unsigned int load_place;
2782 gimple first_load;
2784 /* For every stmt in NODE find its def stmt/s. */
2785 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
2787 if (vect_print_dump_info (REPORT_SLP))
2789 fprintf (vect_dump, "Build SLP for ");
2790 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2793 lhs = gimple_get_lhs (stmt);
2794 if (lhs == NULL_TREE)
2796 if (vect_print_dump_info (REPORT_SLP))
2798 fprintf (vect_dump,
2799 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
2800 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2803 return false;
2806 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
2807 vectype = get_vectype_for_scalar_type (scalar_type);
2808 if (!vectype)
2810 if (vect_print_dump_info (REPORT_SLP))
2812 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
2813 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2815 return false;
2818 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2819 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2820 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2821 if (ncopies > 1 && vect_print_dump_info (REPORT_SLP))
2822 fprintf (vect_dump, "SLP with multiple types ");
2824 /* In case of multiple types we need to detect the smallest type. */
2825 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
2826 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
2828 if (is_gimple_call (stmt))
2829 rhs_code = CALL_EXPR;
2830 else
2831 rhs_code = gimple_assign_rhs_code (stmt);
2833 /* Check the operation. */
2834 if (i == 0)
2836 first_stmt_code = rhs_code;
2838 /* Shift arguments should be equal in all the packed stmts for a
2839 vector shift with scalar shift operand. */
2840 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
2841 || rhs_code == LROTATE_EXPR
2842 || rhs_code == RROTATE_EXPR)
2844 vec_mode = TYPE_MODE (vectype);
2846 /* First see if we have a vector/vector shift. */
2847 optab = optab_for_tree_code (rhs_code, vectype,
2848 optab_vector);
2850 if (!optab
2851 || (optab->handlers[(int) vec_mode].insn_code
2852 == CODE_FOR_nothing))
2854 /* No vector/vector shift, try for a vector/scalar shift. */
2855 optab = optab_for_tree_code (rhs_code, vectype,
2856 optab_scalar);
2858 if (!optab)
2860 if (vect_print_dump_info (REPORT_SLP))
2861 fprintf (vect_dump, "Build SLP failed: no optab.");
2862 return false;
2864 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2865 if (icode == CODE_FOR_nothing)
2867 if (vect_print_dump_info (REPORT_SLP))
2868 fprintf (vect_dump, "Build SLP failed: "
2869 "op not supported by target.");
2870 return false;
2872 optab_op2_mode = insn_data[icode].operand[2].mode;
2873 if (!VECTOR_MODE_P (optab_op2_mode))
2875 need_same_oprnds = true;
2876 first_op1 = gimple_assign_rhs2 (stmt);
2881 else
2883 if (first_stmt_code != rhs_code
2884 && (first_stmt_code != IMAGPART_EXPR
2885 || rhs_code != REALPART_EXPR)
2886 && (first_stmt_code != REALPART_EXPR
2887 || rhs_code != IMAGPART_EXPR))
2889 if (vect_print_dump_info (REPORT_SLP))
2891 fprintf (vect_dump,
2892 "Build SLP failed: different operation in stmt ");
2893 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2896 return false;
2899 if (need_same_oprnds
2900 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
2902 if (vect_print_dump_info (REPORT_SLP))
2904 fprintf (vect_dump,
2905 "Build SLP failed: different shift arguments in ");
2906 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2909 return false;
2913 /* Strided store or load. */
2914 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2916 if (REFERENCE_CLASS_P (lhs))
2918 /* Store. */
2919 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
2920 &def_stmts0, &def_stmts1,
2921 &first_stmt_dt0,
2922 &first_stmt_dt1,
2923 &first_stmt_def0_type,
2924 &first_stmt_def1_type,
2925 &first_stmt_const_oprnd,
2926 ncopies_for_cost,
2927 &pattern0, &pattern1))
2928 return false;
2930 else
2932 /* Load. */
2933 /* FORNOW: Check that there is no gap between the loads. */
2934 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
2935 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
2936 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
2937 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
2939 if (vect_print_dump_info (REPORT_SLP))
2941 fprintf (vect_dump, "Build SLP failed: strided "
2942 "loads have gaps ");
2943 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2946 return false;
2949 /* Check that the size of interleaved loads group is not
2950 greater than the SLP group size. */
2951 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt))
2952 > ncopies * group_size)
2954 if (vect_print_dump_info (REPORT_SLP))
2956 fprintf (vect_dump, "Build SLP failed: the number of "
2957 "interleaved loads is greater than"
2958 " the SLP group size ");
2959 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2962 return false;
2965 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
2967 if (first_load == stmt)
2969 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2970 if (vect_supportable_dr_alignment (first_dr)
2971 == dr_unaligned_unsupported)
2973 if (vect_print_dump_info (REPORT_SLP))
2975 fprintf (vect_dump, "Build SLP failed: unsupported "
2976 "unaligned load ");
2977 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2980 return false;
2983 /* Analyze costs (for the first stmt in the group). */
2984 vect_model_load_cost (vinfo_for_stmt (stmt),
2985 ncopies_for_cost, *node);
2988 /* Store the place of this load in the interleaving chain. In
2989 case that permutation is needed we later decide if a specific
2990 permutation is supported. */
2991 load_place = vect_get_place_in_interleaving_chain (stmt,
2992 first_load);
2993 if (load_place != i)
2994 permutation = true;
2996 VEC_safe_push (int, heap, *load_permutation, load_place);
2998 /* We stop the tree when we reach a group of loads. */
2999 stop_recursion = true;
3000 continue;
3002 } /* Strided access. */
3003 else
3005 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
3007 /* Not strided load. */
3008 if (vect_print_dump_info (REPORT_SLP))
3010 fprintf (vect_dump, "Build SLP failed: not strided load ");
3011 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3014 /* FORNOW: Not strided loads are not supported. */
3015 return false;
3018 /* Not memory operation. */
3019 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
3020 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
3022 if (vect_print_dump_info (REPORT_SLP))
3024 fprintf (vect_dump, "Build SLP failed: operation");
3025 fprintf (vect_dump, " unsupported ");
3026 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3029 return false;
3032 /* Find the def-stmts. */
3033 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
3034 &def_stmts0, &def_stmts1,
3035 &first_stmt_dt0, &first_stmt_dt1,
3036 &first_stmt_def0_type,
3037 &first_stmt_def1_type,
3038 &first_stmt_const_oprnd,
3039 ncopies_for_cost,
3040 &pattern0, &pattern1))
3041 return false;
3045 /* Add the costs of the node to the overall instance costs. */
3046 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
3047 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
3049 /* Strided loads were reached - stop the recursion. */
3050 if (stop_recursion)
3052 if (permutation)
3054 VEC_safe_push (slp_tree, heap, *loads, *node);
3055 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
3058 return true;
3061 /* Create SLP_TREE nodes for the definition node/s. */
3062 if (first_stmt_dt0 == vect_loop_def)
3064 slp_tree left_node = XNEW (struct _slp_tree);
3065 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
3066 SLP_TREE_VEC_STMTS (left_node) = NULL;
3067 SLP_TREE_LEFT (left_node) = NULL;
3068 SLP_TREE_RIGHT (left_node) = NULL;
3069 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
3070 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
3071 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
3072 inside_cost, outside_cost, ncopies_for_cost,
3073 max_nunits, load_permutation, loads))
3074 return false;
3076 SLP_TREE_LEFT (*node) = left_node;
3079 if (first_stmt_dt1 == vect_loop_def)
3081 slp_tree right_node = XNEW (struct _slp_tree);
3082 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
3083 SLP_TREE_VEC_STMTS (right_node) = NULL;
3084 SLP_TREE_LEFT (right_node) = NULL;
3085 SLP_TREE_RIGHT (right_node) = NULL;
3086 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
3087 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
3088 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
3089 inside_cost, outside_cost, ncopies_for_cost,
3090 max_nunits, load_permutation, loads))
3091 return false;
3093 SLP_TREE_RIGHT (*node) = right_node;
3096 return true;
3100 static void
3101 vect_print_slp_tree (slp_tree node)
3103 int i;
3104 gimple stmt;
3106 if (!node)
3107 return;
3109 fprintf (vect_dump, "node ");
3110 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3112 fprintf (vect_dump, "\n\tstmt %d ", i);
3113 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3115 fprintf (vect_dump, "\n");
3117 vect_print_slp_tree (SLP_TREE_LEFT (node));
3118 vect_print_slp_tree (SLP_TREE_RIGHT (node));
3122 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
3123 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
3124 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
3125 stmts in NODE are to be marked. */
3127 static void
3128 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
3130 int i;
3131 gimple stmt;
3133 if (!node)
3134 return;
3136 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3137 if (j < 0 || i == j)
3138 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
3140 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
3141 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
3145 /* Check if the permutation required by the SLP INSTANCE is supported.
3146 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
3148 static bool
3149 vect_supported_slp_permutation_p (slp_instance instance)
3151 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
3152 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
3153 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
3154 VEC (slp_tree, heap) *sorted_loads = NULL;
3155 int index;
3156 slp_tree *tmp_loads = NULL;
3157 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
3158 slp_tree load;
3160 /* FORNOW: The only supported loads permutation is loads from the same
3161 location in all the loads in the node, when the data-refs in
3162 nodes of LOADS constitute an interleaving chain.
3163 Sort the nodes according to the order of accesses in the chain. */
3164 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
3165 for (i = 0, j = 0;
3166 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
3167 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
3168 i += group_size, j++)
3170 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
3171 /* Check that the loads are all in the same interleaving chain. */
3172 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
3174 if (vect_print_dump_info (REPORT_DETAILS))
3176 fprintf (vect_dump, "Build SLP failed: unsupported data "
3177 "permutation ");
3178 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
3181 free (tmp_loads);
3182 return false;
3185 tmp_loads[index] = load;
3188 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
3189 for (i = 0; i < group_size; i++)
3190 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
3192 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
3193 SLP_INSTANCE_LOADS (instance) = sorted_loads;
3194 free (tmp_loads);
3196 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
3197 SLP_INSTANCE_UNROLLING_FACTOR (instance),
3198 instance, true))
3199 return false;
3201 return true;
3205 /* Check if the required load permutation is supported.
3206 LOAD_PERMUTATION contains a list of indices of the loads.
3207 In SLP this permutation is relative to the order of strided stores that are
3208 the base of the SLP instance. */
3210 static bool
3211 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
3212 VEC (int, heap) *load_permutation)
3214 int i = 0, j, prev = -1, next, k;
3215 bool supported;
3217 /* FORNOW: permutations are only supported for loop-aware SLP. */
3218 if (!slp_instn)
3219 return false;
3221 if (vect_print_dump_info (REPORT_SLP))
3223 fprintf (vect_dump, "Load permutation ");
3224 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
3225 fprintf (vect_dump, "%d ", next);
3228 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
3229 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
3230 well. */
3231 if (VEC_length (int, load_permutation)
3232 != (unsigned int) (group_size * group_size))
3233 return false;
3235 supported = true;
3236 for (j = 0; j < group_size; j++)
3238 for (i = j * group_size, k = 0;
3239 VEC_iterate (int, load_permutation, i, next) && k < group_size;
3240 i++, k++)
3242 if (i != j * group_size && next != prev)
3244 supported = false;
3245 break;
3248 prev = next;
3252 if (supported && i == group_size * group_size
3253 && vect_supported_slp_permutation_p (slp_instn))
3254 return true;
3256 return false;
3260 /* Find the first load in the loop that belongs to INSTANCE.
3261 When loads are in several SLP nodes, there can be a case in which the first
3262 load does not appear in the first SLP node to be transformed, causing
3263 incorrect order of statements. Since we generate all the loads together,
3264 they must be inserted before the first load of the SLP instance and not
3265 before the first load of the first node of the instance. */
3266 static gimple
3267 vect_find_first_load_in_slp_instance (slp_instance instance)
3269 int i, j;
3270 slp_tree load_node;
3271 gimple first_load = NULL, load;
3273 for (i = 0;
3274 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
3275 i++)
3276 for (j = 0;
3277 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
3278 j++)
3279 first_load = get_earlier_stmt (load, first_load);
3281 return first_load;
3285 /* Analyze an SLP instance starting from a group of strided stores. Call
3286 vect_build_slp_tree to build a tree of packed stmts if possible.
3287 Return FALSE if it's impossible to SLP any stmt in the loop. */
3289 static bool
3290 vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt)
3292 slp_instance new_instance;
3293 slp_tree node = XNEW (struct _slp_tree);
3294 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
3295 unsigned int unrolling_factor = 1, nunits;
3296 tree vectype, scalar_type;
3297 gimple next;
3298 unsigned int vectorization_factor = 0, ncopies;
3299 bool slp_impossible = false;
3300 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
3301 unsigned int max_nunits = 0;
3302 VEC (int, heap) *load_permutation;
3303 VEC (slp_tree, heap) *loads;
3305 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
3306 vinfo_for_stmt (stmt))));
3307 vectype = get_vectype_for_scalar_type (scalar_type);
3308 if (!vectype)
3310 if (vect_print_dump_info (REPORT_SLP))
3312 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
3313 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3315 return false;
3318 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3319 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3320 ncopies = vectorization_factor / nunits;
3322 /* Create a node (a root of the SLP tree) for the packed strided stores. */
3323 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
3324 next = stmt;
3325 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
3326 while (next)
3328 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
3329 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
3332 SLP_TREE_VEC_STMTS (node) = NULL;
3333 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
3334 SLP_TREE_LEFT (node) = NULL;
3335 SLP_TREE_RIGHT (node) = NULL;
3336 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
3337 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
3339 /* Calculate the unrolling factor. */
3340 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
3342 /* Calculate the number of vector stmts to create based on the unrolling
3343 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
3344 GROUP_SIZE / NUNITS otherwise. */
3345 ncopies_for_cost = unrolling_factor * group_size / nunits;
3347 load_permutation = VEC_alloc (int, heap, group_size * group_size);
3348 loads = VEC_alloc (slp_tree, heap, group_size);
3350 /* Build the tree for the SLP instance. */
3351 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &inside_cost,
3352 &outside_cost, ncopies_for_cost, &max_nunits,
3353 &load_permutation, &loads))
3355 /* Create a new SLP instance. */
3356 new_instance = XNEW (struct _slp_instance);
3357 SLP_INSTANCE_TREE (new_instance) = node;
3358 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
3359 /* Calculate the unrolling factor based on the smallest type in the
3360 loop. */
3361 if (max_nunits > nunits)
3362 unrolling_factor = least_common_multiple (max_nunits, group_size)
3363 / group_size;
3365 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
3366 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
3367 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
3368 SLP_INSTANCE_LOADS (new_instance) = loads;
3369 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
3370 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
3371 if (VEC_length (slp_tree, loads))
3373 if (!vect_supported_load_permutation_p (new_instance, group_size,
3374 load_permutation))
3376 if (vect_print_dump_info (REPORT_SLP))
3378 fprintf (vect_dump, "Build SLP failed: unsupported load "
3379 "permutation ");
3380 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3383 vect_free_slp_instance (new_instance);
3384 return false;
3387 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
3388 = vect_find_first_load_in_slp_instance (new_instance);
3390 else
3391 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
3393 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
3394 new_instance);
3395 if (vect_print_dump_info (REPORT_SLP))
3396 vect_print_slp_tree (node);
3398 return true;
3401 /* Failed to SLP. */
3402 /* Free the allocated memory. */
3403 vect_free_slp_tree (node);
3404 VEC_free (int, heap, load_permutation);
3405 VEC_free (slp_tree, heap, loads);
3407 if (slp_impossible)
3408 return false;
3410 /* SLP failed for this instance, but it is still possible to SLP other stmts
3411 in the loop. */
3412 return true;
3416 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3417 trees of packed scalar stmts if SLP is possible. */
3419 static bool
3420 vect_analyze_slp (loop_vec_info loop_vinfo)
3422 unsigned int i;
3423 VEC (gimple, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
3424 gimple store;
3426 if (vect_print_dump_info (REPORT_SLP))
3427 fprintf (vect_dump, "=== vect_analyze_slp ===");
3429 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
3430 if (!vect_analyze_slp_instance (loop_vinfo, store))
3432 /* SLP failed. No instance can be SLPed in the loop. */
3433 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3434 fprintf (vect_dump, "SLP failed.");
3436 return false;
3439 return true;
3443 /* For each possible SLP instance decide whether to SLP it and calculate overall
3444 unrolling factor needed to SLP the loop. */
3446 static void
3447 vect_make_slp_decision (loop_vec_info loop_vinfo)
3449 unsigned int i, unrolling_factor = 1;
3450 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3451 slp_instance instance;
3452 int decided_to_slp = 0;
3454 if (vect_print_dump_info (REPORT_SLP))
3455 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3457 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3459 /* FORNOW: SLP if you can. */
3460 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3461 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3463 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3464 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3465 loop-based vectorization. Such stmts will be marked as HYBRID. */
3466 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3467 decided_to_slp++;
3470 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3472 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3473 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3474 decided_to_slp, unrolling_factor);
3478 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3479 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3481 static void
3482 vect_detect_hybrid_slp_stmts (slp_tree node)
3484 int i;
3485 gimple stmt;
3486 imm_use_iterator imm_iter;
3487 gimple use_stmt;
3489 if (!node)
3490 return;
3492 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3493 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3494 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
3495 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
3496 if (vinfo_for_stmt (use_stmt)
3497 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt))
3498 && STMT_VINFO_RELEVANT (vinfo_for_stmt (use_stmt)))
3499 vect_mark_slp_stmts (node, hybrid, i);
3501 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3502 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3506 /* Find stmts that must be both vectorized and SLPed. */
3508 static void
3509 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3511 unsigned int i;
3512 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3513 slp_instance instance;
3515 if (vect_print_dump_info (REPORT_SLP))
3516 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3518 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3519 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3523 /* Function vect_analyze_data_refs.
3525 Find all the data references in the loop.
3527 The general structure of the analysis of data refs in the vectorizer is as
3528 follows:
3529 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3530 find and analyze all data-refs in the loop and their dependences.
3531 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3532 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3533 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3537 static bool
3538 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3540 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3541 unsigned int i;
3542 VEC (data_reference_p, heap) *datarefs;
3543 struct data_reference *dr;
3544 tree scalar_type;
3546 if (vect_print_dump_info (REPORT_DETAILS))
3547 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3549 compute_data_dependences_for_loop (loop, true,
3550 &LOOP_VINFO_DATAREFS (loop_vinfo),
3551 &LOOP_VINFO_DDRS (loop_vinfo));
3553 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3554 from stmt_vec_info struct to DR and vectype. */
3555 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3557 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3559 gimple stmt;
3560 stmt_vec_info stmt_info;
3561 basic_block bb;
3562 tree base, offset, init;
3564 if (!dr || !DR_REF (dr))
3566 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3567 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3568 return false;
3571 stmt = DR_STMT (dr);
3572 stmt_info = vinfo_for_stmt (stmt);
3574 /* Check that analysis of the data-ref succeeded. */
3575 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3576 || !DR_STEP (dr))
3578 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3580 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3581 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3583 return false;
3586 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3588 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3589 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3590 "constant");
3591 return false;
3594 if (!DR_SYMBOL_TAG (dr))
3596 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3598 fprintf (vect_dump, "not vectorized: no memory tag for ");
3599 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3601 return false;
3604 base = unshare_expr (DR_BASE_ADDRESS (dr));
3605 offset = unshare_expr (DR_OFFSET (dr));
3606 init = unshare_expr (DR_INIT (dr));
3608 /* Update DR field in stmt_vec_info struct. */
3609 bb = gimple_bb (stmt);
3611 /* If the dataref is in an inner-loop of the loop that is considered for
3612 for vectorization, we also want to analyze the access relative to
3613 the outer-loop (DR contains information only relative to the
3614 inner-most enclosing loop). We do that by building a reference to the
3615 first location accessed by the inner-loop, and analyze it relative to
3616 the outer-loop. */
3617 if (nested_in_vect_loop_p (loop, stmt))
3619 tree outer_step, outer_base, outer_init;
3620 HOST_WIDE_INT pbitsize, pbitpos;
3621 tree poffset;
3622 enum machine_mode pmode;
3623 int punsignedp, pvolatilep;
3624 affine_iv base_iv, offset_iv;
3625 tree dinit;
3627 /* Build a reference to the first location accessed by the
3628 inner-loop: *(BASE+INIT). (The first location is actually
3629 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3630 tree inner_base = build_fold_indirect_ref
3631 (fold_build2 (POINTER_PLUS_EXPR,
3632 TREE_TYPE (base), base,
3633 fold_convert (sizetype, init)));
3635 if (vect_print_dump_info (REPORT_DETAILS))
3637 fprintf (vect_dump, "analyze in outer-loop: ");
3638 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
3641 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3642 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3643 gcc_assert (outer_base != NULL_TREE);
3645 if (pbitpos % BITS_PER_UNIT != 0)
3647 if (vect_print_dump_info (REPORT_DETAILS))
3648 fprintf (vect_dump, "failed: bit offset alignment.\n");
3649 return false;
3652 outer_base = build_fold_addr_expr (outer_base);
3653 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
3654 &base_iv, false))
3656 if (vect_print_dump_info (REPORT_DETAILS))
3657 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
3658 return false;
3661 if (offset)
3663 if (poffset)
3664 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3665 else
3666 poffset = offset;
3669 if (!poffset)
3671 offset_iv.base = ssize_int (0);
3672 offset_iv.step = ssize_int (0);
3674 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
3675 &offset_iv, false))
3677 if (vect_print_dump_info (REPORT_DETAILS))
3678 fprintf (vect_dump, "evolution of offset is not affine.\n");
3679 return false;
3682 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3683 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3684 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3685 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3686 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3688 outer_step = size_binop (PLUS_EXPR,
3689 fold_convert (ssizetype, base_iv.step),
3690 fold_convert (ssizetype, offset_iv.step));
3692 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3693 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3694 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3695 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3696 STMT_VINFO_DR_OFFSET (stmt_info) =
3697 fold_convert (ssizetype, offset_iv.base);
3698 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3699 size_int (highest_pow2_factor (offset_iv.base));
3701 if (vect_print_dump_info (REPORT_DETAILS))
3703 fprintf (vect_dump, "\touter base_address: ");
3704 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3705 fprintf (vect_dump, "\n\touter offset from base address: ");
3706 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3707 fprintf (vect_dump, "\n\touter constant offset from base address: ");
3708 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3709 fprintf (vect_dump, "\n\touter step: ");
3710 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3711 fprintf (vect_dump, "\n\touter aligned to: ");
3712 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3716 if (STMT_VINFO_DATA_REF (stmt_info))
3718 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3720 fprintf (vect_dump,
3721 "not vectorized: more than one data ref in stmt: ");
3722 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3724 return false;
3726 STMT_VINFO_DATA_REF (stmt_info) = dr;
3728 /* Set vectype for STMT. */
3729 scalar_type = TREE_TYPE (DR_REF (dr));
3730 STMT_VINFO_VECTYPE (stmt_info) =
3731 get_vectype_for_scalar_type (scalar_type);
3732 if (!STMT_VINFO_VECTYPE (stmt_info))
3734 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3736 fprintf (vect_dump,
3737 "not vectorized: no vectype for stmt: ");
3738 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3739 fprintf (vect_dump, " scalar_type: ");
3740 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3742 return false;
3746 return true;
3750 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3752 /* Function vect_mark_relevant.
3754 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3756 static void
3757 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
3758 enum vect_relevant relevant, bool live_p)
3760 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3761 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3762 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3764 if (vect_print_dump_info (REPORT_DETAILS))
3765 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3767 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3769 gimple pattern_stmt;
3771 /* This is the last stmt in a sequence that was detected as a
3772 pattern that can potentially be vectorized. Don't mark the stmt
3773 as relevant/live because it's not going to be vectorized.
3774 Instead mark the pattern-stmt that replaces it. */
3776 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3778 if (vect_print_dump_info (REPORT_DETAILS))
3779 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3780 stmt_info = vinfo_for_stmt (pattern_stmt);
3781 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3782 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3783 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3784 stmt = pattern_stmt;
3787 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3788 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3789 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3791 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3792 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3794 if (vect_print_dump_info (REPORT_DETAILS))
3795 fprintf (vect_dump, "already marked relevant/live.");
3796 return;
3799 VEC_safe_push (gimple, heap, *worklist, stmt);
3803 /* Function vect_stmt_relevant_p.
3805 Return true if STMT in loop that is represented by LOOP_VINFO is
3806 "relevant for vectorization".
3808 A stmt is considered "relevant for vectorization" if:
3809 - it has uses outside the loop.
3810 - it has vdefs (it alters memory).
3811 - control stmts in the loop (except for the exit condition).
3813 CHECKME: what other side effects would the vectorizer allow? */
3815 static bool
3816 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
3817 enum vect_relevant *relevant, bool *live_p)
3819 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3820 ssa_op_iter op_iter;
3821 imm_use_iterator imm_iter;
3822 use_operand_p use_p;
3823 def_operand_p def_p;
3825 *relevant = vect_unused_in_loop;
3826 *live_p = false;
3828 /* cond stmt other than loop exit cond. */
3829 if (is_ctrl_stmt (stmt)
3830 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3831 *relevant = vect_used_in_loop;
3833 /* changing memory. */
3834 if (gimple_code (stmt) != GIMPLE_PHI)
3835 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3837 if (vect_print_dump_info (REPORT_DETAILS))
3838 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3839 *relevant = vect_used_in_loop;
3842 /* uses outside the loop. */
3843 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3845 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3847 basic_block bb = gimple_bb (USE_STMT (use_p));
3848 if (!flow_bb_inside_loop_p (loop, bb))
3850 if (vect_print_dump_info (REPORT_DETAILS))
3851 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3853 /* We expect all such uses to be in the loop exit phis
3854 (because of loop closed form) */
3855 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
3856 gcc_assert (bb == single_exit (loop)->dest);
3858 *live_p = true;
3863 return (*live_p || *relevant);
3868 Function process_use.
3870 Inputs:
3871 - a USE in STMT in a loop represented by LOOP_VINFO
3872 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3873 that defined USE. This is done by calling mark_relevant and passing it
3874 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
3876 Outputs:
3877 Generally, LIVE_P and RELEVANT are used to define the liveness and
3878 relevance info of the DEF_STMT of this USE:
3879 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3880 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3881 Exceptions:
3882 - case 1: If USE is used only for address computations (e.g. array indexing),
3883 which does not need to be directly vectorized, then the liveness/relevance
3884 of the respective DEF_STMT is left unchanged.
3885 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3886 skip DEF_STMT cause it had already been processed.
3887 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3888 be modified accordingly.
3890 Return true if everything is as expected. Return false otherwise. */
3892 static bool
3893 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3894 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
3896 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3897 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3898 stmt_vec_info dstmt_vinfo;
3899 basic_block bb, def_bb;
3900 tree def;
3901 gimple def_stmt;
3902 enum vect_def_type dt;
3904 /* case 1: we are only interested in uses that need to be vectorized. Uses
3905 that are used for address computation are not considered relevant. */
3906 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3907 return true;
3909 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3911 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3912 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3913 return false;
3916 if (!def_stmt || gimple_nop_p (def_stmt))
3917 return true;
3919 def_bb = gimple_bb (def_stmt);
3920 if (!flow_bb_inside_loop_p (loop, def_bb))
3922 if (vect_print_dump_info (REPORT_DETAILS))
3923 fprintf (vect_dump, "def_stmt is out of loop.");
3924 return true;
3927 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3928 DEF_STMT must have already been processed, because this should be the
3929 only way that STMT, which is a reduction-phi, was put in the worklist,
3930 as there should be no other uses for DEF_STMT in the loop. So we just
3931 check that everything is as expected, and we are done. */
3932 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3933 bb = gimple_bb (stmt);
3934 if (gimple_code (stmt) == GIMPLE_PHI
3935 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3936 && gimple_code (def_stmt) != GIMPLE_PHI
3937 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3938 && bb->loop_father == def_bb->loop_father)
3940 if (vect_print_dump_info (REPORT_DETAILS))
3941 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3942 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3943 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3944 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3945 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3946 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3947 return true;
3950 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3951 outer-loop-header-bb:
3952 d = def_stmt
3953 inner-loop:
3954 stmt # use (d)
3955 outer-loop-tail-bb:
3956 ... */
3957 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3959 if (vect_print_dump_info (REPORT_DETAILS))
3960 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3961 switch (relevant)
3963 case vect_unused_in_loop:
3964 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3965 vect_used_by_reduction : vect_unused_in_loop;
3966 break;
3967 case vect_used_in_outer_by_reduction:
3968 relevant = vect_used_by_reduction;
3969 break;
3970 case vect_used_in_outer:
3971 relevant = vect_used_in_loop;
3972 break;
3973 case vect_used_by_reduction:
3974 case vect_used_in_loop:
3975 break;
3977 default:
3978 gcc_unreachable ();
3982 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3983 outer-loop-header-bb:
3985 inner-loop:
3986 d = def_stmt
3987 outer-loop-tail-bb:
3988 stmt # use (d) */
3989 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3991 if (vect_print_dump_info (REPORT_DETAILS))
3992 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3993 switch (relevant)
3995 case vect_unused_in_loop:
3996 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3997 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3998 break;
4000 case vect_used_in_outer_by_reduction:
4001 case vect_used_in_outer:
4002 break;
4004 case vect_used_by_reduction:
4005 relevant = vect_used_in_outer_by_reduction;
4006 break;
4008 case vect_used_in_loop:
4009 relevant = vect_used_in_outer;
4010 break;
4012 default:
4013 gcc_unreachable ();
4017 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
4018 return true;
4022 /* Function vect_mark_stmts_to_be_vectorized.
4024 Not all stmts in the loop need to be vectorized. For example:
4026 for i...
4027 for j...
4028 1. T0 = i + j
4029 2. T1 = a[T0]
4031 3. j = j + 1
4033 Stmt 1 and 3 do not need to be vectorized, because loop control and
4034 addressing of vectorized data-refs are handled differently.
4036 This pass detects such stmts. */
4038 static bool
4039 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
4041 VEC(gimple,heap) *worklist;
4042 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4043 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4044 unsigned int nbbs = loop->num_nodes;
4045 gimple_stmt_iterator si;
4046 gimple stmt;
4047 unsigned int i;
4048 stmt_vec_info stmt_vinfo;
4049 basic_block bb;
4050 gimple phi;
4051 bool live_p;
4052 enum vect_relevant relevant;
4054 if (vect_print_dump_info (REPORT_DETAILS))
4055 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
4057 worklist = VEC_alloc (gimple, heap, 64);
4059 /* 1. Init worklist. */
4060 for (i = 0; i < nbbs; i++)
4062 bb = bbs[i];
4063 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
4065 phi = gsi_stmt (si);
4066 if (vect_print_dump_info (REPORT_DETAILS))
4068 fprintf (vect_dump, "init: phi relevant? ");
4069 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
4072 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
4073 vect_mark_relevant (&worklist, phi, relevant, live_p);
4075 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4077 stmt = gsi_stmt (si);
4078 if (vect_print_dump_info (REPORT_DETAILS))
4080 fprintf (vect_dump, "init: stmt relevant? ");
4081 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4084 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
4085 vect_mark_relevant (&worklist, stmt, relevant, live_p);
4089 /* 2. Process_worklist */
4090 while (VEC_length (gimple, worklist) > 0)
4092 use_operand_p use_p;
4093 ssa_op_iter iter;
4095 stmt = VEC_pop (gimple, worklist);
4096 if (vect_print_dump_info (REPORT_DETAILS))
4098 fprintf (vect_dump, "worklist: examine stmt: ");
4099 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4102 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
4103 (DEF_STMT) as relevant/irrelevant and live/dead according to the
4104 liveness and relevance properties of STMT. */
4105 stmt_vinfo = vinfo_for_stmt (stmt);
4106 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
4107 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
4109 /* Generally, the liveness and relevance properties of STMT are
4110 propagated as is to the DEF_STMTs of its USEs:
4111 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
4112 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
4114 One exception is when STMT has been identified as defining a reduction
4115 variable; in this case we set the liveness/relevance as follows:
4116 live_p = false
4117 relevant = vect_used_by_reduction
4118 This is because we distinguish between two kinds of relevant stmts -
4119 those that are used by a reduction computation, and those that are
4120 (also) used by a regular computation. This allows us later on to
4121 identify stmts that are used solely by a reduction, and therefore the
4122 order of the results that they produce does not have to be kept.
4124 Reduction phis are expected to be used by a reduction stmt, or by
4125 in an outer loop; Other reduction stmts are expected to be
4126 in the loop, and possibly used by a stmt in an outer loop.
4127 Here are the expected values of "relevant" for reduction phis/stmts:
4129 relevance: phi stmt
4130 vect_unused_in_loop ok
4131 vect_used_in_outer_by_reduction ok ok
4132 vect_used_in_outer ok ok
4133 vect_used_by_reduction ok
4134 vect_used_in_loop */
4136 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
4138 enum vect_relevant tmp_relevant = relevant;
4139 switch (tmp_relevant)
4141 case vect_unused_in_loop:
4142 gcc_assert (gimple_code (stmt) != GIMPLE_PHI);
4143 relevant = vect_used_by_reduction;
4144 break;
4146 case vect_used_in_outer_by_reduction:
4147 case vect_used_in_outer:
4148 gcc_assert (gimple_code (stmt) != GIMPLE_ASSIGN
4149 || (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR
4150 && (gimple_assign_rhs_code (stmt)
4151 != DOT_PROD_EXPR)));
4152 break;
4154 case vect_used_by_reduction:
4155 if (gimple_code (stmt) == GIMPLE_PHI)
4156 break;
4157 /* fall through */
4158 case vect_used_in_loop:
4159 default:
4160 if (vect_print_dump_info (REPORT_DETAILS))
4161 fprintf (vect_dump, "unsupported use of reduction.");
4162 VEC_free (gimple, heap, worklist);
4163 return false;
4165 live_p = false;
4168 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
4170 tree op = USE_FROM_PTR (use_p);
4171 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
4173 VEC_free (gimple, heap, worklist);
4174 return false;
4177 } /* while worklist */
4179 VEC_free (gimple, heap, worklist);
4180 return true;
4184 /* Function vect_can_advance_ivs_p
4186 In case the number of iterations that LOOP iterates is unknown at compile
4187 time, an epilog loop will be generated, and the loop induction variables
4188 (IVs) will be "advanced" to the value they are supposed to take just before
4189 the epilog loop. Here we check that the access function of the loop IVs
4190 and the expression that represents the loop bound are simple enough.
4191 These restrictions will be relaxed in the future. */
4193 static bool
4194 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
4196 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4197 basic_block bb = loop->header;
4198 gimple phi;
4199 gimple_stmt_iterator gsi;
4201 /* Analyze phi functions of the loop header. */
4203 if (vect_print_dump_info (REPORT_DETAILS))
4204 fprintf (vect_dump, "vect_can_advance_ivs_p:");
4206 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4208 tree access_fn = NULL;
4209 tree evolution_part;
4211 phi = gsi_stmt (gsi);
4212 if (vect_print_dump_info (REPORT_DETAILS))
4214 fprintf (vect_dump, "Analyze phi: ");
4215 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
4218 /* Skip virtual phi's. The data dependences that are associated with
4219 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
4221 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
4223 if (vect_print_dump_info (REPORT_DETAILS))
4224 fprintf (vect_dump, "virtual phi. skip.");
4225 continue;
4228 /* Skip reduction phis. */
4230 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
4232 if (vect_print_dump_info (REPORT_DETAILS))
4233 fprintf (vect_dump, "reduc phi. skip.");
4234 continue;
4237 /* Analyze the evolution function. */
4239 access_fn = instantiate_parameters
4240 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
4242 if (!access_fn)
4244 if (vect_print_dump_info (REPORT_DETAILS))
4245 fprintf (vect_dump, "No Access function.");
4246 return false;
4249 if (vect_print_dump_info (REPORT_DETAILS))
4251 fprintf (vect_dump, "Access function of PHI: ");
4252 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
4255 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
4257 if (evolution_part == NULL_TREE)
4259 if (vect_print_dump_info (REPORT_DETAILS))
4260 fprintf (vect_dump, "No evolution.");
4261 return false;
4264 /* FORNOW: We do not transform initial conditions of IVs
4265 which evolution functions are a polynomial of degree >= 2. */
4267 if (tree_is_chrec (evolution_part))
4268 return false;
4271 return true;
4275 /* Function vect_get_loop_niters.
4277 Determine how many iterations the loop is executed.
4278 If an expression that represents the number of iterations
4279 can be constructed, place it in NUMBER_OF_ITERATIONS.
4280 Return the loop exit condition. */
4282 static gimple
4283 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
4285 tree niters;
4287 if (vect_print_dump_info (REPORT_DETAILS))
4288 fprintf (vect_dump, "=== get_loop_niters ===");
4290 niters = number_of_exit_cond_executions (loop);
4292 if (niters != NULL_TREE
4293 && niters != chrec_dont_know)
4295 *number_of_iterations = niters;
4297 if (vect_print_dump_info (REPORT_DETAILS))
4299 fprintf (vect_dump, "==> get_loop_niters:" );
4300 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
4304 return get_loop_exit_condition (loop);
4308 /* Function vect_analyze_loop_1.
4310 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4311 for it. The different analyses will record information in the
4312 loop_vec_info struct. This is a subset of the analyses applied in
4313 vect_analyze_loop, to be applied on an inner-loop nested in the loop
4314 that is now considered for (outer-loop) vectorization. */
4316 static loop_vec_info
4317 vect_analyze_loop_1 (struct loop *loop)
4319 loop_vec_info loop_vinfo;
4321 if (vect_print_dump_info (REPORT_DETAILS))
4322 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
4324 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4326 loop_vinfo = vect_analyze_loop_form (loop);
4327 if (!loop_vinfo)
4329 if (vect_print_dump_info (REPORT_DETAILS))
4330 fprintf (vect_dump, "bad inner-loop form.");
4331 return NULL;
4334 return loop_vinfo;
4338 /* Function vect_analyze_loop_form.
4340 Verify that certain CFG restrictions hold, including:
4341 - the loop has a pre-header
4342 - the loop has a single entry and exit
4343 - the loop exit condition is simple enough, and the number of iterations
4344 can be analyzed (a countable loop). */
4346 loop_vec_info
4347 vect_analyze_loop_form (struct loop *loop)
4349 loop_vec_info loop_vinfo;
4350 gimple loop_cond;
4351 tree number_of_iterations = NULL;
4352 loop_vec_info inner_loop_vinfo = NULL;
4354 if (vect_print_dump_info (REPORT_DETAILS))
4355 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
4357 /* Different restrictions apply when we are considering an inner-most loop,
4358 vs. an outer (nested) loop.
4359 (FORNOW. May want to relax some of these restrictions in the future). */
4361 if (!loop->inner)
4363 /* Inner-most loop. We currently require that the number of BBs is
4364 exactly 2 (the header and latch). Vectorizable inner-most loops
4365 look like this:
4367 (pre-header)
4369 header <--------+
4370 | | |
4371 | +--> latch --+
4373 (exit-bb) */
4375 if (loop->num_nodes != 2)
4377 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4378 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4379 return NULL;
4382 if (empty_block_p (loop->header))
4384 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4385 fprintf (vect_dump, "not vectorized: empty loop.");
4386 return NULL;
4389 else
4391 struct loop *innerloop = loop->inner;
4392 edge backedge, entryedge;
4394 /* Nested loop. We currently require that the loop is doubly-nested,
4395 contains a single inner loop, and the number of BBs is exactly 5.
4396 Vectorizable outer-loops look like this:
4398 (pre-header)
4400 header <---+
4402 inner-loop |
4404 tail ------+
4406 (exit-bb)
4408 The inner-loop has the properties expected of inner-most loops
4409 as described above. */
4411 if ((loop->inner)->inner || (loop->inner)->next)
4413 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4414 fprintf (vect_dump, "not vectorized: multiple nested loops.");
4415 return NULL;
4418 /* Analyze the inner-loop. */
4419 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
4420 if (!inner_loop_vinfo)
4422 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4423 fprintf (vect_dump, "not vectorized: Bad inner loop.");
4424 return NULL;
4427 if (!expr_invariant_in_loop_p (loop,
4428 LOOP_VINFO_NITERS (inner_loop_vinfo)))
4430 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4431 fprintf (vect_dump,
4432 "not vectorized: inner-loop count not invariant.");
4433 destroy_loop_vec_info (inner_loop_vinfo, true);
4434 return NULL;
4437 if (loop->num_nodes != 5)
4439 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4440 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4441 destroy_loop_vec_info (inner_loop_vinfo, true);
4442 return NULL;
4445 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
4446 backedge = EDGE_PRED (innerloop->header, 1);
4447 entryedge = EDGE_PRED (innerloop->header, 0);
4448 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
4450 backedge = EDGE_PRED (innerloop->header, 0);
4451 entryedge = EDGE_PRED (innerloop->header, 1);
4454 if (entryedge->src != loop->header
4455 || !single_exit (innerloop)
4456 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
4458 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4459 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4460 destroy_loop_vec_info (inner_loop_vinfo, true);
4461 return NULL;
4464 if (vect_print_dump_info (REPORT_DETAILS))
4465 fprintf (vect_dump, "Considering outer-loop vectorization.");
4468 if (!single_exit (loop)
4469 || EDGE_COUNT (loop->header->preds) != 2)
4471 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4473 if (!single_exit (loop))
4474 fprintf (vect_dump, "not vectorized: multiple exits.");
4475 else if (EDGE_COUNT (loop->header->preds) != 2)
4476 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4478 if (inner_loop_vinfo)
4479 destroy_loop_vec_info (inner_loop_vinfo, true);
4480 return NULL;
4483 /* We assume that the loop exit condition is at the end of the loop. i.e,
4484 that the loop is represented as a do-while (with a proper if-guard
4485 before the loop if needed), where the loop header contains all the
4486 executable statements, and the latch is empty. */
4487 if (!empty_block_p (loop->latch)
4488 || phi_nodes (loop->latch))
4490 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4491 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4492 if (inner_loop_vinfo)
4493 destroy_loop_vec_info (inner_loop_vinfo, true);
4494 return NULL;
4497 /* Make sure there exists a single-predecessor exit bb: */
4498 if (!single_pred_p (single_exit (loop)->dest))
4500 edge e = single_exit (loop);
4501 if (!(e->flags & EDGE_ABNORMAL))
4503 split_loop_exit_edge (e);
4504 if (vect_print_dump_info (REPORT_DETAILS))
4505 fprintf (vect_dump, "split exit edge.");
4507 else
4509 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4510 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4511 if (inner_loop_vinfo)
4512 destroy_loop_vec_info (inner_loop_vinfo, true);
4513 return NULL;
4517 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4518 if (!loop_cond)
4520 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4521 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4522 if (inner_loop_vinfo)
4523 destroy_loop_vec_info (inner_loop_vinfo, true);
4524 return NULL;
4527 if (!number_of_iterations)
4529 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4530 fprintf (vect_dump,
4531 "not vectorized: number of iterations cannot be computed.");
4532 if (inner_loop_vinfo)
4533 destroy_loop_vec_info (inner_loop_vinfo, true);
4534 return NULL;
4537 if (chrec_contains_undetermined (number_of_iterations))
4539 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4540 fprintf (vect_dump, "Infinite number of iterations.");
4541 if (inner_loop_vinfo)
4542 destroy_loop_vec_info (inner_loop_vinfo, true);
4543 return NULL;
4546 if (!NITERS_KNOWN_P (number_of_iterations))
4548 if (vect_print_dump_info (REPORT_DETAILS))
4550 fprintf (vect_dump, "Symbolic number of iterations is ");
4551 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4554 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4556 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4557 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4558 if (inner_loop_vinfo)
4559 destroy_loop_vec_info (inner_loop_vinfo, false);
4560 return NULL;
4563 loop_vinfo = new_loop_vec_info (loop);
4564 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4565 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
4567 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4569 /* CHECKME: May want to keep it around it in the future. */
4570 if (inner_loop_vinfo)
4571 destroy_loop_vec_info (inner_loop_vinfo, false);
4573 gcc_assert (!loop->aux);
4574 loop->aux = loop_vinfo;
4575 return loop_vinfo;
4579 /* Function vect_analyze_loop.
4581 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4582 for it. The different analyses will record information in the
4583 loop_vec_info struct. */
4584 loop_vec_info
4585 vect_analyze_loop (struct loop *loop)
4587 bool ok;
4588 loop_vec_info loop_vinfo;
4590 if (vect_print_dump_info (REPORT_DETAILS))
4591 fprintf (vect_dump, "===== analyze_loop_nest =====");
4593 if (loop_outer (loop)
4594 && loop_vec_info_for_loop (loop_outer (loop))
4595 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4597 if (vect_print_dump_info (REPORT_DETAILS))
4598 fprintf (vect_dump, "outer-loop already vectorized.");
4599 return NULL;
4602 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4604 loop_vinfo = vect_analyze_loop_form (loop);
4605 if (!loop_vinfo)
4607 if (vect_print_dump_info (REPORT_DETAILS))
4608 fprintf (vect_dump, "bad loop form.");
4609 return NULL;
4612 /* Find all data references in the loop (which correspond to vdefs/vuses)
4613 and analyze their evolution in the loop.
4615 FORNOW: Handle only simple, array references, which
4616 alignment can be forced, and aligned pointer-references. */
4618 ok = vect_analyze_data_refs (loop_vinfo);
4619 if (!ok)
4621 if (vect_print_dump_info (REPORT_DETAILS))
4622 fprintf (vect_dump, "bad data references.");
4623 destroy_loop_vec_info (loop_vinfo, true);
4624 return NULL;
4627 /* Classify all cross-iteration scalar data-flow cycles.
4628 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4630 vect_analyze_scalar_cycles (loop_vinfo);
4632 vect_pattern_recog (loop_vinfo);
4634 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4636 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4637 if (!ok)
4639 if (vect_print_dump_info (REPORT_DETAILS))
4640 fprintf (vect_dump, "unexpected pattern.");
4641 destroy_loop_vec_info (loop_vinfo, true);
4642 return NULL;
4645 /* Analyze the alignment of the data-refs in the loop.
4646 Fail if a data reference is found that cannot be vectorized. */
4648 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4649 if (!ok)
4651 if (vect_print_dump_info (REPORT_DETAILS))
4652 fprintf (vect_dump, "bad data alignment.");
4653 destroy_loop_vec_info (loop_vinfo, true);
4654 return NULL;
4657 ok = vect_determine_vectorization_factor (loop_vinfo);
4658 if (!ok)
4660 if (vect_print_dump_info (REPORT_DETAILS))
4661 fprintf (vect_dump, "can't determine vectorization factor.");
4662 destroy_loop_vec_info (loop_vinfo, true);
4663 return NULL;
4666 /* Analyze data dependences between the data-refs in the loop.
4667 FORNOW: fail at the first data dependence that we encounter. */
4669 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4670 if (!ok)
4672 if (vect_print_dump_info (REPORT_DETAILS))
4673 fprintf (vect_dump, "bad data dependence.");
4674 destroy_loop_vec_info (loop_vinfo, true);
4675 return NULL;
4678 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4679 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4681 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4682 if (!ok)
4684 if (vect_print_dump_info (REPORT_DETAILS))
4685 fprintf (vect_dump, "bad data access.");
4686 destroy_loop_vec_info (loop_vinfo, true);
4687 return NULL;
4690 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4691 It is important to call pruning after vect_analyze_data_ref_accesses,
4692 since we use grouping information gathered by interleaving analysis. */
4693 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
4694 if (!ok)
4696 if (vect_print_dump_info (REPORT_DETAILS))
4697 fprintf (vect_dump, "too long list of versioning for alias "
4698 "run-time tests.");
4699 destroy_loop_vec_info (loop_vinfo, true);
4700 return NULL;
4703 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4704 ok = vect_analyze_slp (loop_vinfo);
4705 if (ok)
4707 /* Decide which possible SLP instances to SLP. */
4708 vect_make_slp_decision (loop_vinfo);
4710 /* Find stmts that need to be both vectorized and SLPed. */
4711 vect_detect_hybrid_slp (loop_vinfo);
4714 /* This pass will decide on using loop versioning and/or loop peeling in
4715 order to enhance the alignment of data references in the loop. */
4717 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4718 if (!ok)
4720 if (vect_print_dump_info (REPORT_DETAILS))
4721 fprintf (vect_dump, "bad data alignment.");
4722 destroy_loop_vec_info (loop_vinfo, true);
4723 return NULL;
4726 /* Scan all the operations in the loop and make sure they are
4727 vectorizable. */
4729 ok = vect_analyze_operations (loop_vinfo);
4730 if (!ok)
4732 if (vect_print_dump_info (REPORT_DETAILS))
4733 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4734 destroy_loop_vec_info (loop_vinfo, true);
4735 return NULL;
4738 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4740 return loop_vinfo;