merge with trunk @ 139506
[official-gcc.git] / gcc / tree-vect-analyze.c
blobfcb9cbd5a24235fdf37037881a42dc4b02fdd3c0
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "params.h"
38 #include "tree-chrec.h"
39 #include "tree-data-ref.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
42 #include "toplev.h"
43 #include "recog.h"
45 static bool vect_can_advance_ivs_p (loop_vec_info);
47 /* Function vect_determine_vectorization_factor
49 Determine the vectorization factor (VF). VF is the number of data elements
50 that are operated upon in parallel in a single iteration of the vectorized
51 loop. For example, when vectorizing a loop that operates on 4byte elements,
52 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
53 elements can fit in a single vector register.
55 We currently support vectorization of loops in which all types operated upon
56 are of the same size. Therefore this function currently sets VF according to
57 the size of the types operated upon, and fails if there are multiple sizes
58 in the loop.
60 VF is also the factor by which the loop iterations are strip-mined, e.g.:
61 original loop:
62 for (i=0; i<N; i++){
63 a[i] = b[i] + c[i];
66 vectorized loop:
67 for (i=0; i<N; i+=VF){
68 a[i:VF] = b[i:VF] + c[i:VF];
72 static bool
73 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
75 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
76 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
77 int nbbs = loop->num_nodes;
78 gimple_stmt_iterator si;
79 unsigned int vectorization_factor = 0;
80 tree scalar_type;
81 gimple phi;
82 tree vectype;
83 unsigned int nunits;
84 stmt_vec_info stmt_info;
85 int i;
87 if (vect_print_dump_info (REPORT_DETAILS))
88 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
90 for (i = 0; i < nbbs; i++)
92 basic_block bb = bbs[i];
94 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
96 phi = gsi_stmt (si);
97 stmt_info = vinfo_for_stmt (phi);
98 if (vect_print_dump_info (REPORT_DETAILS))
100 fprintf (vect_dump, "==> examining phi: ");
101 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
104 gcc_assert (stmt_info);
106 if (STMT_VINFO_RELEVANT_P (stmt_info))
108 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
109 scalar_type = TREE_TYPE (PHI_RESULT (phi));
111 if (vect_print_dump_info (REPORT_DETAILS))
113 fprintf (vect_dump, "get vectype for scalar type: ");
114 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
117 vectype = get_vectype_for_scalar_type (scalar_type);
118 if (!vectype)
120 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
122 fprintf (vect_dump,
123 "not vectorized: unsupported data-type ");
124 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
126 return false;
128 STMT_VINFO_VECTYPE (stmt_info) = vectype;
130 if (vect_print_dump_info (REPORT_DETAILS))
132 fprintf (vect_dump, "vectype: ");
133 print_generic_expr (vect_dump, vectype, TDF_SLIM);
136 nunits = TYPE_VECTOR_SUBPARTS (vectype);
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "nunits = %d", nunits);
140 if (!vectorization_factor
141 || (nunits > vectorization_factor))
142 vectorization_factor = nunits;
146 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
148 gimple stmt = gsi_stmt (si);
149 stmt_info = vinfo_for_stmt (stmt);
151 if (vect_print_dump_info (REPORT_DETAILS))
153 fprintf (vect_dump, "==> examining statement: ");
154 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
157 gcc_assert (stmt_info);
159 /* skip stmts which do not need to be vectorized. */
160 if (!STMT_VINFO_RELEVANT_P (stmt_info)
161 && !STMT_VINFO_LIVE_P (stmt_info))
163 if (vect_print_dump_info (REPORT_DETAILS))
164 fprintf (vect_dump, "skip.");
165 continue;
168 if (gimple_get_lhs (stmt) == NULL_TREE)
170 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
172 fprintf (vect_dump, "not vectorized: irregular stmt.");
173 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
175 return false;
178 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
180 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
182 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
183 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
185 return false;
188 if (STMT_VINFO_VECTYPE (stmt_info))
190 /* The only case when a vectype had been already set is for stmts
191 that contain a dataref, or for "pattern-stmts" (stmts generated
192 by the vectorizer to represent/replace a certain idiom). */
193 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
194 || is_pattern_stmt_p (stmt_info));
195 vectype = STMT_VINFO_VECTYPE (stmt_info);
197 else
200 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
201 && !is_pattern_stmt_p (stmt_info));
203 /* We generally set the vectype according to the type of the
204 result (lhs).
205 For stmts whose result-type is different than the type of the
206 arguments (e.g. demotion, promotion), vectype will be reset
207 appropriately (later). Note that we have to visit the smallest
208 datatype in this function, because that determines the VF.
209 If the smallest datatype in the loop is present only as the
210 rhs of a promotion operation - we'd miss it here.
211 Such a case, where a variable of this datatype does not appear
212 in the lhs anywhere in the loop, can only occur if it's an
213 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
214 to have been optimized away by invariant motion. However, we
215 cannot rely on invariant motion to always take invariants out
216 of the loop, and so in the case of promotion we also have to
217 check the rhs. */
218 scalar_type = gimple_expr_type (stmt);
220 if (is_gimple_assign (stmt)
221 && (gimple_assign_cast_p (stmt)
222 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
223 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
225 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
226 if (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type))
227 < TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)))
228 scalar_type = rhs_type;
231 if (vect_print_dump_info (REPORT_DETAILS))
233 fprintf (vect_dump, "get vectype for scalar type: ");
234 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
237 vectype = get_vectype_for_scalar_type (scalar_type);
238 if (!vectype)
240 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
242 fprintf (vect_dump,
243 "not vectorized: unsupported data-type ");
244 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
246 return false;
248 STMT_VINFO_VECTYPE (stmt_info) = vectype;
251 if (vect_print_dump_info (REPORT_DETAILS))
253 fprintf (vect_dump, "vectype: ");
254 print_generic_expr (vect_dump, vectype, TDF_SLIM);
257 nunits = TYPE_VECTOR_SUBPARTS (vectype);
258 if (vect_print_dump_info (REPORT_DETAILS))
259 fprintf (vect_dump, "nunits = %d", nunits);
261 if (!vectorization_factor
262 || (nunits > vectorization_factor))
263 vectorization_factor = nunits;
268 /* TODO: Analyze cost. Decide if worth while to vectorize. */
269 if (vect_print_dump_info (REPORT_DETAILS))
270 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
271 if (vectorization_factor <= 1)
273 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
274 fprintf (vect_dump, "not vectorized: unsupported data-type");
275 return false;
277 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
279 return true;
283 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
284 the number of created vector stmts depends on the unrolling factor). However,
285 the actual number of vector stmts for every SLP node depends on VF which is
286 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
287 In this function we assume that the inside costs calculated in
288 vect_model_xxx_cost are linear in ncopies. */
290 static void
291 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
293 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
294 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
295 slp_instance instance;
297 if (vect_print_dump_info (REPORT_SLP))
298 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
300 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
301 /* We assume that costs are linear in ncopies. */
302 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
303 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
307 /* Function vect_analyze_operations.
309 Scan the loop stmts and make sure they are all vectorizable. */
311 static bool
312 vect_analyze_operations (loop_vec_info loop_vinfo)
314 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
315 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
316 int nbbs = loop->num_nodes;
317 gimple_stmt_iterator si;
318 unsigned int vectorization_factor = 0;
319 int i;
320 bool ok;
321 gimple phi;
322 stmt_vec_info stmt_info;
323 bool need_to_vectorize = false;
324 int min_profitable_iters;
325 int min_scalar_loop_bound;
326 unsigned int th;
327 bool only_slp_in_loop = true;
329 if (vect_print_dump_info (REPORT_DETAILS))
330 fprintf (vect_dump, "=== vect_analyze_operations ===");
332 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
333 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
335 for (i = 0; i < nbbs; i++)
337 basic_block bb = bbs[i];
339 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
341 phi = gsi_stmt (si);
342 ok = true;
344 stmt_info = vinfo_for_stmt (phi);
345 if (vect_print_dump_info (REPORT_DETAILS))
347 fprintf (vect_dump, "examining phi: ");
348 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
351 if (! is_loop_header_bb_p (bb))
353 /* inner-loop loop-closed exit phi in outer-loop vectorization
354 (i.e. a phi in the tail of the outer-loop).
355 FORNOW: we currently don't support the case that these phis
356 are not used in the outerloop, cause this case requires
357 to actually do something here. */
358 if (!STMT_VINFO_RELEVANT_P (stmt_info)
359 || STMT_VINFO_LIVE_P (stmt_info))
361 if (vect_print_dump_info (REPORT_DETAILS))
362 fprintf (vect_dump,
363 "Unsupported loop-closed phi in outer-loop.");
364 return false;
366 continue;
369 gcc_assert (stmt_info);
371 if (STMT_VINFO_LIVE_P (stmt_info))
373 /* FORNOW: not yet supported. */
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
375 fprintf (vect_dump, "not vectorized: value used after loop.");
376 return false;
379 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
380 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
382 /* A scalar-dependence cycle that we don't support. */
383 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
384 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
385 return false;
388 if (STMT_VINFO_RELEVANT_P (stmt_info))
390 need_to_vectorize = true;
391 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
392 ok = vectorizable_induction (phi, NULL, NULL);
395 if (!ok)
397 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
399 fprintf (vect_dump,
400 "not vectorized: relevant phi not supported: ");
401 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
403 return false;
407 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
409 gimple stmt = gsi_stmt (si);
410 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
411 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
413 if (vect_print_dump_info (REPORT_DETAILS))
415 fprintf (vect_dump, "==> examining statement: ");
416 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
419 gcc_assert (stmt_info);
421 /* skip stmts which do not need to be vectorized.
422 this is expected to include:
423 - the COND_EXPR which is the loop exit condition
424 - any LABEL_EXPRs in the loop
425 - computations that are used only for array indexing or loop
426 control */
428 if (!STMT_VINFO_RELEVANT_P (stmt_info)
429 && !STMT_VINFO_LIVE_P (stmt_info))
431 if (vect_print_dump_info (REPORT_DETAILS))
432 fprintf (vect_dump, "irrelevant.");
433 continue;
436 switch (STMT_VINFO_DEF_TYPE (stmt_info))
438 case vect_loop_def:
439 break;
441 case vect_reduction_def:
442 gcc_assert (relevance == vect_used_in_outer
443 || relevance == vect_used_in_outer_by_reduction
444 || relevance == vect_unused_in_loop);
445 break;
447 case vect_induction_def:
448 case vect_constant_def:
449 case vect_invariant_def:
450 case vect_unknown_def_type:
451 default:
452 gcc_unreachable ();
455 if (STMT_VINFO_RELEVANT_P (stmt_info))
457 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
458 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
459 need_to_vectorize = true;
462 ok = true;
463 if (STMT_VINFO_RELEVANT_P (stmt_info)
464 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
465 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
466 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
467 || vectorizable_conversion (stmt, NULL, NULL, NULL)
468 || vectorizable_operation (stmt, NULL, NULL, NULL)
469 || vectorizable_assignment (stmt, NULL, NULL, NULL)
470 || vectorizable_load (stmt, NULL, NULL, NULL)
471 || vectorizable_call (stmt, NULL, NULL)
472 || vectorizable_store (stmt, NULL, NULL, NULL)
473 || vectorizable_condition (stmt, NULL, NULL)
474 || vectorizable_reduction (stmt, NULL, NULL));
476 if (!ok)
478 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
480 fprintf (vect_dump, "not vectorized: relevant stmt not ");
481 fprintf (vect_dump, "supported: ");
482 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
484 return false;
487 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
488 need extra handling, except for vectorizable reductions. */
489 if (STMT_VINFO_LIVE_P (stmt_info)
490 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
491 ok = vectorizable_live_operation (stmt, NULL, NULL);
493 if (!ok)
495 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
497 fprintf (vect_dump, "not vectorized: live stmt not ");
498 fprintf (vect_dump, "supported: ");
499 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
501 return false;
504 if (!PURE_SLP_STMT (stmt_info))
506 /* STMT needs loop-based vectorization. */
507 only_slp_in_loop = false;
509 /* Groups of strided accesses whose size is not a power of 2 are
510 not vectorizable yet using loop-vectorization. Therefore, if
511 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
512 both SLPed and loop-based vectorized), the loop cannot be
513 vectorized. */
514 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
515 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
516 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
518 if (vect_print_dump_info (REPORT_DETAILS))
520 fprintf (vect_dump, "not vectorized: the size of group "
521 "of strided accesses is not a power of 2");
522 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
524 return false;
527 } /* stmts in bb */
528 } /* bbs */
530 /* All operations in the loop are either irrelevant (deal with loop
531 control, or dead), or only used outside the loop and can be moved
532 out of the loop (e.g. invariants, inductions). The loop can be
533 optimized away by scalar optimizations. We're better off not
534 touching this loop. */
535 if (!need_to_vectorize)
537 if (vect_print_dump_info (REPORT_DETAILS))
538 fprintf (vect_dump,
539 "All the computation can be taken out of the loop.");
540 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
541 fprintf (vect_dump,
542 "not vectorized: redundant loop. no profit to vectorize.");
543 return false;
546 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
547 vectorization factor of the loop is the unrolling factor required by the
548 SLP instances. If that unrolling factor is 1, we say, that we perform
549 pure SLP on loop - cross iteration parallelism is not exploited. */
550 if (only_slp_in_loop)
551 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
552 else
553 vectorization_factor = least_common_multiple (vectorization_factor,
554 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
556 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
558 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
559 && vect_print_dump_info (REPORT_DETAILS))
560 fprintf (vect_dump,
561 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
562 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
564 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
565 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
567 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
568 fprintf (vect_dump, "not vectorized: iteration count too small.");
569 if (vect_print_dump_info (REPORT_DETAILS))
570 fprintf (vect_dump,"not vectorized: iteration count smaller than "
571 "vectorization factor.");
572 return false;
575 /* Analyze cost. Decide if worth while to vectorize. */
577 /* Once VF is set, SLP costs should be updated since the number of created
578 vector stmts depends on VF. */
579 vect_update_slp_costs_according_to_vf (loop_vinfo);
581 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
582 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
584 if (min_profitable_iters < 0)
586 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
587 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
588 if (vect_print_dump_info (REPORT_DETAILS))
589 fprintf (vect_dump, "not vectorized: vector version will never be "
590 "profitable.");
591 return false;
594 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
595 * vectorization_factor) - 1);
597 /* Use the cost model only if it is more conservative than user specified
598 threshold. */
600 th = (unsigned) min_scalar_loop_bound;
601 if (min_profitable_iters
602 && (!min_scalar_loop_bound
603 || min_profitable_iters > min_scalar_loop_bound))
604 th = (unsigned) min_profitable_iters;
606 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
607 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
609 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
610 fprintf (vect_dump, "not vectorized: vectorization not "
611 "profitable.");
612 if (vect_print_dump_info (REPORT_DETAILS))
613 fprintf (vect_dump, "not vectorized: iteration count smaller than "
614 "user specified loop bound parameter or minimum "
615 "profitable iterations (whichever is more conservative).");
616 return false;
619 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
620 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
621 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
623 if (vect_print_dump_info (REPORT_DETAILS))
624 fprintf (vect_dump, "epilog loop required.");
625 if (!vect_can_advance_ivs_p (loop_vinfo))
627 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
628 fprintf (vect_dump,
629 "not vectorized: can't create epilog loop 1.");
630 return false;
632 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
634 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
635 fprintf (vect_dump,
636 "not vectorized: can't create epilog loop 2.");
637 return false;
641 return true;
645 /* Function exist_non_indexing_operands_for_use_p
647 USE is one of the uses attached to STMT. Check if USE is
648 used in STMT for anything other than indexing an array. */
650 static bool
651 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
653 tree operand;
654 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
656 /* USE corresponds to some operand in STMT. If there is no data
657 reference in STMT, then any operand that corresponds to USE
658 is not indexing an array. */
659 if (!STMT_VINFO_DATA_REF (stmt_info))
660 return true;
662 /* STMT has a data_ref. FORNOW this means that its of one of
663 the following forms:
664 -1- ARRAY_REF = var
665 -2- var = ARRAY_REF
666 (This should have been verified in analyze_data_refs).
668 'var' in the second case corresponds to a def, not a use,
669 so USE cannot correspond to any operands that are not used
670 for array indexing.
672 Therefore, all we need to check is if STMT falls into the
673 first case, and whether var corresponds to USE. */
675 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
676 return false;
678 if (!gimple_assign_copy_p (stmt))
679 return false;
680 operand = gimple_assign_rhs1 (stmt);
682 if (TREE_CODE (operand) != SSA_NAME)
683 return false;
685 if (operand == use)
686 return true;
688 return false;
692 /* Function vect_analyze_scalar_cycles_1.
694 Examine the cross iteration def-use cycles of scalar variables
695 in LOOP. LOOP_VINFO represents the loop that is now being
696 considered for vectorization (can be LOOP, or an outer-loop
697 enclosing LOOP). */
699 static void
700 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
702 basic_block bb = loop->header;
703 tree dumy;
704 VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64);
705 gimple_stmt_iterator gsi;
707 if (vect_print_dump_info (REPORT_DETAILS))
708 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
710 /* First - identify all inductions. */
711 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
713 gimple phi = gsi_stmt (gsi);
714 tree access_fn = NULL;
715 tree def = PHI_RESULT (phi);
716 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
718 if (vect_print_dump_info (REPORT_DETAILS))
720 fprintf (vect_dump, "Analyze phi: ");
721 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
724 /* Skip virtual phi's. The data dependences that are associated with
725 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
726 if (!is_gimple_reg (SSA_NAME_VAR (def)))
727 continue;
729 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
731 /* Analyze the evolution function. */
732 access_fn = analyze_scalar_evolution (loop, def);
733 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
735 fprintf (vect_dump, "Access function of PHI: ");
736 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
739 if (!access_fn
740 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
742 VEC_safe_push (gimple, heap, worklist, phi);
743 continue;
746 if (vect_print_dump_info (REPORT_DETAILS))
747 fprintf (vect_dump, "Detected induction.");
748 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
752 /* Second - identify all reductions. */
753 while (VEC_length (gimple, worklist) > 0)
755 gimple phi = VEC_pop (gimple, worklist);
756 tree def = PHI_RESULT (phi);
757 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
758 gimple reduc_stmt;
760 if (vect_print_dump_info (REPORT_DETAILS))
762 fprintf (vect_dump, "Analyze phi: ");
763 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
766 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
767 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
769 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
770 if (reduc_stmt)
772 if (vect_print_dump_info (REPORT_DETAILS))
773 fprintf (vect_dump, "Detected reduction.");
774 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
775 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
776 vect_reduction_def;
778 else
779 if (vect_print_dump_info (REPORT_DETAILS))
780 fprintf (vect_dump, "Unknown def-use cycle pattern.");
783 VEC_free (gimple, heap, worklist);
784 return;
788 /* Function vect_analyze_scalar_cycles.
790 Examine the cross iteration def-use cycles of scalar variables, by
791 analyzing the loop-header PHIs of scalar variables; Classify each
792 cycle as one of the following: invariant, induction, reduction, unknown.
793 We do that for the loop represented by LOOP_VINFO, and also to its
794 inner-loop, if exists.
795 Examples for scalar cycles:
797 Example1: reduction:
799 loop1:
800 for (i=0; i<N; i++)
801 sum += a[i];
803 Example2: induction:
805 loop2:
806 for (i=0; i<N; i++)
807 a[i] = i; */
809 static void
810 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
812 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
814 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
816 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
817 Reductions in such inner-loop therefore have different properties than
818 the reductions in the nest that gets vectorized:
819 1. When vectorized, they are executed in the same order as in the original
820 scalar loop, so we can't change the order of computation when
821 vectorizing them.
822 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
823 current checks are too strict. */
825 if (loop->inner)
826 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
830 /* Function vect_insert_into_interleaving_chain.
832 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
834 static void
835 vect_insert_into_interleaving_chain (struct data_reference *dra,
836 struct data_reference *drb)
838 gimple prev, next;
839 tree next_init;
840 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
841 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
843 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
844 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
845 while (next)
847 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
848 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
850 /* Insert here. */
851 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
852 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
853 return;
855 prev = next;
856 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
859 /* We got to the end of the list. Insert here. */
860 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
861 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
865 /* Function vect_update_interleaving_chain.
867 For two data-refs DRA and DRB that are a part of a chain interleaved data
868 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
870 There are four possible cases:
871 1. New stmts - both DRA and DRB are not a part of any chain:
872 FIRST_DR = DRB
873 NEXT_DR (DRB) = DRA
874 2. DRB is a part of a chain and DRA is not:
875 no need to update FIRST_DR
876 no need to insert DRB
877 insert DRA according to init
878 3. DRA is a part of a chain and DRB is not:
879 if (init of FIRST_DR > init of DRB)
880 FIRST_DR = DRB
881 NEXT(FIRST_DR) = previous FIRST_DR
882 else
883 insert DRB according to its init
884 4. both DRA and DRB are in some interleaving chains:
885 choose the chain with the smallest init of FIRST_DR
886 insert the nodes of the second chain into the first one. */
888 static void
889 vect_update_interleaving_chain (struct data_reference *drb,
890 struct data_reference *dra)
892 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
893 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
894 tree next_init, init_dra_chain, init_drb_chain;
895 gimple first_a, first_b;
896 tree node_init;
897 gimple node, prev, next, first_stmt;
899 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
900 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
902 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
903 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
904 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
905 return;
908 /* 2. DRB is a part of a chain and DRA is not. */
909 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
911 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
912 /* Insert DRA into the chain of DRB. */
913 vect_insert_into_interleaving_chain (dra, drb);
914 return;
917 /* 3. DRA is a part of a chain and DRB is not. */
918 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
920 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
921 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
922 old_first_stmt)));
923 gimple tmp;
925 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
927 /* DRB's init is smaller than the init of the stmt previously marked
928 as the first stmt of the interleaving chain of DRA. Therefore, we
929 update FIRST_STMT and put DRB in the head of the list. */
930 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
931 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
933 /* Update all the stmts in the list to point to the new FIRST_STMT. */
934 tmp = old_first_stmt;
935 while (tmp)
937 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
938 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
941 else
943 /* Insert DRB in the list of DRA. */
944 vect_insert_into_interleaving_chain (drb, dra);
945 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
947 return;
950 /* 4. both DRA and DRB are in some interleaving chains. */
951 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
952 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
953 if (first_a == first_b)
954 return;
955 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
956 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
958 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
960 /* Insert the nodes of DRA chain into the DRB chain.
961 After inserting a node, continue from this node of the DRB chain (don't
962 start from the beginning. */
963 node = DR_GROUP_FIRST_DR (stmtinfo_a);
964 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
965 first_stmt = first_b;
967 else
969 /* Insert the nodes of DRB chain into the DRA chain.
970 After inserting a node, continue from this node of the DRA chain (don't
971 start from the beginning. */
972 node = DR_GROUP_FIRST_DR (stmtinfo_b);
973 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
974 first_stmt = first_a;
977 while (node)
979 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
980 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
981 while (next)
983 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
984 if (tree_int_cst_compare (next_init, node_init) > 0)
986 /* Insert here. */
987 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
988 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
989 prev = node;
990 break;
992 prev = next;
993 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
995 if (!next)
997 /* We got to the end of the list. Insert here. */
998 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
999 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
1000 prev = node;
1002 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
1003 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1008 /* Function vect_equal_offsets.
1010 Check if OFFSET1 and OFFSET2 are identical expressions. */
1012 static bool
1013 vect_equal_offsets (tree offset1, tree offset2)
1015 bool res0, res1;
1017 STRIP_NOPS (offset1);
1018 STRIP_NOPS (offset2);
1020 if (offset1 == offset2)
1021 return true;
1023 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1024 || !BINARY_CLASS_P (offset1)
1025 || !BINARY_CLASS_P (offset2))
1026 return false;
1028 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1029 TREE_OPERAND (offset2, 0));
1030 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1031 TREE_OPERAND (offset2, 1));
1033 return (res0 && res1);
1037 /* Function vect_check_interleaving.
1039 Check if DRA and DRB are a part of interleaving. In case they are, insert
1040 DRA and DRB in an interleaving chain. */
1042 static void
1043 vect_check_interleaving (struct data_reference *dra,
1044 struct data_reference *drb)
1046 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1048 /* Check that the data-refs have same first location (except init) and they
1049 are both either store or load (not load and store). */
1050 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1051 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1052 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1053 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1054 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1055 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1056 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1057 || DR_IS_READ (dra) != DR_IS_READ (drb))
1058 return;
1060 /* Check:
1061 1. data-refs are of the same type
1062 2. their steps are equal
1063 3. the step is greater than the difference between data-refs' inits */
1064 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1065 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1067 if (type_size_a != type_size_b
1068 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
1069 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
1070 TREE_TYPE (DR_REF (drb))))
1071 return;
1073 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1074 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1075 step = TREE_INT_CST_LOW (DR_STEP (dra));
1077 if (init_a > init_b)
1079 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1080 and DRB is accessed before DRA. */
1081 diff_mod_size = (init_a - init_b) % type_size_a;
1083 if ((init_a - init_b) > step)
1084 return;
1086 if (diff_mod_size == 0)
1088 vect_update_interleaving_chain (drb, dra);
1089 if (vect_print_dump_info (REPORT_DR_DETAILS))
1091 fprintf (vect_dump, "Detected interleaving ");
1092 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1093 fprintf (vect_dump, " and ");
1094 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1096 return;
1099 else
1101 /* If init_b == init_a + the size of the type * k, we have an
1102 interleaving, and DRA is accessed before DRB. */
1103 diff_mod_size = (init_b - init_a) % type_size_a;
1105 if ((init_b - init_a) > step)
1106 return;
1108 if (diff_mod_size == 0)
1110 vect_update_interleaving_chain (dra, drb);
1111 if (vect_print_dump_info (REPORT_DR_DETAILS))
1113 fprintf (vect_dump, "Detected interleaving ");
1114 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1115 fprintf (vect_dump, " and ");
1116 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1118 return;
1123 /* Check if data references pointed by DR_I and DR_J are same or
1124 belong to same interleaving group. Return FALSE if drs are
1125 different, otherwise return TRUE. */
1127 static bool
1128 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
1130 gimple stmt_i = DR_STMT (dr_i);
1131 gimple stmt_j = DR_STMT (dr_j);
1133 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
1134 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1135 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
1136 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1137 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
1138 return true;
1139 else
1140 return false;
1143 /* If address ranges represented by DDR_I and DDR_J are equal,
1144 return TRUE, otherwise return FALSE. */
1146 static bool
1147 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
1149 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
1150 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
1151 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
1152 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
1153 return true;
1154 else
1155 return false;
1158 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1159 tested at run-time. Return TRUE if DDR was successfully inserted.
1160 Return false if versioning is not supported. */
1162 static bool
1163 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1165 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1167 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
1168 return false;
1170 if (vect_print_dump_info (REPORT_DR_DETAILS))
1172 fprintf (vect_dump, "mark for run-time aliasing test between ");
1173 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1174 fprintf (vect_dump, " and ");
1175 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1178 if (optimize_size)
1180 if (vect_print_dump_info (REPORT_DR_DETAILS))
1181 fprintf (vect_dump, "versioning not supported when optimizing for size.");
1182 return false;
1185 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1186 if (loop->inner)
1188 if (vect_print_dump_info (REPORT_DR_DETAILS))
1189 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1190 return false;
1193 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1194 return true;
1197 /* Function vect_analyze_data_ref_dependence.
1199 Return TRUE if there (might) exist a dependence between a memory-reference
1200 DRA and a memory-reference DRB. When versioning for alias may check a
1201 dependence at run-time, return FALSE. */
1203 static bool
1204 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1205 loop_vec_info loop_vinfo)
1207 unsigned int i;
1208 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1209 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1210 struct data_reference *dra = DDR_A (ddr);
1211 struct data_reference *drb = DDR_B (ddr);
1212 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1213 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1214 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1215 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1216 lambda_vector dist_v;
1217 unsigned int loop_depth;
1219 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1221 /* Independent data accesses. */
1222 vect_check_interleaving (dra, drb);
1223 return false;
1226 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1227 return false;
1229 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1231 if (vect_print_dump_info (REPORT_DR_DETAILS))
1233 fprintf (vect_dump,
1234 "versioning for alias required: can't determine dependence between ");
1235 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1236 fprintf (vect_dump, " and ");
1237 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1239 /* Add to list of ddrs that need to be tested at run-time. */
1240 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1243 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1245 if (vect_print_dump_info (REPORT_DR_DETAILS))
1247 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1248 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1249 fprintf (vect_dump, " and ");
1250 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1252 /* Add to list of ddrs that need to be tested at run-time. */
1253 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1256 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1257 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1259 int dist = dist_v[loop_depth];
1261 if (vect_print_dump_info (REPORT_DR_DETAILS))
1262 fprintf (vect_dump, "dependence distance = %d.", dist);
1264 /* Same loop iteration. */
1265 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1267 /* Two references with distance zero have the same alignment. */
1268 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1269 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1270 if (vect_print_dump_info (REPORT_ALIGNMENT))
1271 fprintf (vect_dump, "accesses have the same alignment.");
1272 if (vect_print_dump_info (REPORT_DR_DETAILS))
1274 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1275 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1276 fprintf (vect_dump, " and ");
1277 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1280 /* For interleaving, mark that there is a read-write dependency if
1281 necessary. We check before that one of the data-refs is store. */
1282 if (DR_IS_READ (dra))
1283 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1284 else
1286 if (DR_IS_READ (drb))
1287 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1290 continue;
1293 if (abs (dist) >= vectorization_factor
1294 || (dist > 0 && DDR_REVERSED_P (ddr)))
1296 /* Dependence distance does not create dependence, as far as
1297 vectorization is concerned, in this case. If DDR_REVERSED_P the
1298 order of the data-refs in DDR was reversed (to make distance
1299 vector positive), and the actual distance is negative. */
1300 if (vect_print_dump_info (REPORT_DR_DETAILS))
1301 fprintf (vect_dump, "dependence distance >= VF or negative.");
1302 continue;
1305 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1307 fprintf (vect_dump,
1308 "not vectorized, possible dependence "
1309 "between data-refs ");
1310 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1311 fprintf (vect_dump, " and ");
1312 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1315 return true;
1318 return false;
1321 /* Function vect_analyze_data_ref_dependences.
1323 Examine all the data references in the loop, and make sure there do not
1324 exist any data dependences between them. */
1326 static bool
1327 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1329 unsigned int i;
1330 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1331 struct data_dependence_relation *ddr;
1333 if (vect_print_dump_info (REPORT_DETAILS))
1334 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1336 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1337 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1338 return false;
1340 return true;
1344 /* Function vect_compute_data_ref_alignment
1346 Compute the misalignment of the data reference DR.
1348 Output:
1349 1. If during the misalignment computation it is found that the data reference
1350 cannot be vectorized then false is returned.
1351 2. DR_MISALIGNMENT (DR) is defined.
1353 FOR NOW: No analysis is actually performed. Misalignment is calculated
1354 only for trivial cases. TODO. */
1356 static bool
1357 vect_compute_data_ref_alignment (struct data_reference *dr)
1359 gimple stmt = DR_STMT (dr);
1360 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1361 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1362 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1363 tree ref = DR_REF (dr);
1364 tree vectype;
1365 tree base, base_addr;
1366 bool base_aligned;
1367 tree misalign;
1368 tree aligned_to, alignment;
1370 if (vect_print_dump_info (REPORT_DETAILS))
1371 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1373 /* Initialize misalignment to unknown. */
1374 SET_DR_MISALIGNMENT (dr, -1);
1376 misalign = DR_INIT (dr);
1377 aligned_to = DR_ALIGNED_TO (dr);
1378 base_addr = DR_BASE_ADDRESS (dr);
1379 vectype = STMT_VINFO_VECTYPE (stmt_info);
1381 /* In case the dataref is in an inner-loop of the loop that is being
1382 vectorized (LOOP), we use the base and misalignment information
1383 relative to the outer-loop (LOOP). This is ok only if the misalignment
1384 stays the same throughout the execution of the inner-loop, which is why
1385 we have to check that the stride of the dataref in the inner-loop evenly
1386 divides by the vector size. */
1387 if (nested_in_vect_loop_p (loop, stmt))
1389 tree step = DR_STEP (dr);
1390 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1392 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
1394 if (vect_print_dump_info (REPORT_ALIGNMENT))
1395 fprintf (vect_dump, "inner step divides the vector-size.");
1396 misalign = STMT_VINFO_DR_INIT (stmt_info);
1397 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1398 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1400 else
1402 if (vect_print_dump_info (REPORT_ALIGNMENT))
1403 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1404 misalign = NULL_TREE;
1408 base = build_fold_indirect_ref (base_addr);
1409 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1411 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1412 || !misalign)
1414 if (vect_print_dump_info (REPORT_ALIGNMENT))
1416 fprintf (vect_dump, "Unknown alignment for access: ");
1417 print_generic_expr (vect_dump, base, TDF_SLIM);
1419 return true;
1422 if ((DECL_P (base)
1423 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1424 alignment) >= 0)
1425 || (TREE_CODE (base_addr) == SSA_NAME
1426 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1427 TREE_TYPE (base_addr)))),
1428 alignment) >= 0))
1429 base_aligned = true;
1430 else
1431 base_aligned = false;
1433 if (!base_aligned)
1435 /* Do not change the alignment of global variables if
1436 flag_section_anchors is enabled. */
1437 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1438 || (TREE_STATIC (base) && flag_section_anchors))
1440 if (vect_print_dump_info (REPORT_DETAILS))
1442 fprintf (vect_dump, "can't force alignment of ref: ");
1443 print_generic_expr (vect_dump, ref, TDF_SLIM);
1445 return true;
1448 /* Force the alignment of the decl.
1449 NOTE: This is the only change to the code we make during
1450 the analysis phase, before deciding to vectorize the loop. */
1451 if (vect_print_dump_info (REPORT_DETAILS))
1452 fprintf (vect_dump, "force alignment");
1453 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1454 DECL_USER_ALIGN (base) = 1;
1457 /* At this point we assume that the base is aligned. */
1458 gcc_assert (base_aligned
1459 || (TREE_CODE (base) == VAR_DECL
1460 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1462 /* Modulo alignment. */
1463 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1465 if (!host_integerp (misalign, 1))
1467 /* Negative or overflowed misalignment value. */
1468 if (vect_print_dump_info (REPORT_DETAILS))
1469 fprintf (vect_dump, "unexpected misalign value");
1470 return false;
1473 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1475 if (vect_print_dump_info (REPORT_DETAILS))
1477 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1478 print_generic_expr (vect_dump, ref, TDF_SLIM);
1481 return true;
1485 /* Function vect_compute_data_refs_alignment
1487 Compute the misalignment of data references in the loop.
1488 Return FALSE if a data reference is found that cannot be vectorized. */
1490 static bool
1491 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1493 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1494 struct data_reference *dr;
1495 unsigned int i;
1497 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1498 if (!vect_compute_data_ref_alignment (dr))
1499 return false;
1501 return true;
1505 /* Function vect_update_misalignment_for_peel
1507 DR - the data reference whose misalignment is to be adjusted.
1508 DR_PEEL - the data reference whose misalignment is being made
1509 zero in the vector loop by the peel.
1510 NPEEL - the number of iterations in the peel loop if the misalignment
1511 of DR_PEEL is known at compile time. */
1513 static void
1514 vect_update_misalignment_for_peel (struct data_reference *dr,
1515 struct data_reference *dr_peel, int npeel)
1517 unsigned int i;
1518 VEC(dr_p,heap) *same_align_drs;
1519 struct data_reference *current_dr;
1520 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1521 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1522 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1523 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1525 /* For interleaved data accesses the step in the loop must be multiplied by
1526 the size of the interleaving group. */
1527 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1528 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1529 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1530 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1532 /* It can be assumed that the data refs with the same alignment as dr_peel
1533 are aligned in the vector loop. */
1534 same_align_drs
1535 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1536 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1538 if (current_dr != dr)
1539 continue;
1540 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1541 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1542 SET_DR_MISALIGNMENT (dr, 0);
1543 return;
1546 if (known_alignment_for_access_p (dr)
1547 && known_alignment_for_access_p (dr_peel))
1549 int misal = DR_MISALIGNMENT (dr);
1550 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1551 misal += npeel * dr_size;
1552 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
1553 SET_DR_MISALIGNMENT (dr, misal);
1554 return;
1557 if (vect_print_dump_info (REPORT_DETAILS))
1558 fprintf (vect_dump, "Setting misalignment to -1.");
1559 SET_DR_MISALIGNMENT (dr, -1);
1563 /* Function vect_verify_datarefs_alignment
1565 Return TRUE if all data references in the loop can be
1566 handled with respect to alignment. */
1568 static bool
1569 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1571 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1572 struct data_reference *dr;
1573 enum dr_alignment_support supportable_dr_alignment;
1574 unsigned int i;
1576 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1578 gimple stmt = DR_STMT (dr);
1579 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1581 /* For interleaving, only the alignment of the first access matters. */
1582 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1583 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1584 continue;
1586 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1587 if (!supportable_dr_alignment)
1589 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1591 if (DR_IS_READ (dr))
1592 fprintf (vect_dump,
1593 "not vectorized: unsupported unaligned load.");
1594 else
1595 fprintf (vect_dump,
1596 "not vectorized: unsupported unaligned store.");
1598 return false;
1600 if (supportable_dr_alignment != dr_aligned
1601 && vect_print_dump_info (REPORT_ALIGNMENT))
1602 fprintf (vect_dump, "Vectorizing an unaligned access.");
1604 return true;
1608 /* Function vector_alignment_reachable_p
1610 Return true if vector alignment for DR is reachable by peeling
1611 a few loop iterations. Return false otherwise. */
1613 static bool
1614 vector_alignment_reachable_p (struct data_reference *dr)
1616 gimple stmt = DR_STMT (dr);
1617 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1618 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1620 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1622 /* For interleaved access we peel only if number of iterations in
1623 the prolog loop ({VF - misalignment}), is a multiple of the
1624 number of the interleaved accesses. */
1625 int elem_size, mis_in_elements;
1626 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1628 /* FORNOW: handle only known alignment. */
1629 if (!known_alignment_for_access_p (dr))
1630 return false;
1632 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1633 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1635 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1636 return false;
1639 /* If misalignment is known at the compile time then allow peeling
1640 only if natural alignment is reachable through peeling. */
1641 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1643 HOST_WIDE_INT elmsize =
1644 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1645 if (vect_print_dump_info (REPORT_DETAILS))
1647 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1648 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1650 if (DR_MISALIGNMENT (dr) % elmsize)
1652 if (vect_print_dump_info (REPORT_DETAILS))
1653 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1654 return false;
1658 if (!known_alignment_for_access_p (dr))
1660 tree type = (TREE_TYPE (DR_REF (dr)));
1661 tree ba = DR_BASE_OBJECT (dr);
1662 bool is_packed = false;
1664 if (ba)
1665 is_packed = contains_packed_reference (ba);
1667 if (vect_print_dump_info (REPORT_DETAILS))
1668 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1669 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1670 return true;
1671 else
1672 return false;
1675 return true;
1678 /* Function vect_enhance_data_refs_alignment
1680 This pass will use loop versioning and loop peeling in order to enhance
1681 the alignment of data references in the loop.
1683 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1684 original loop is to be vectorized; Any other loops that are created by
1685 the transformations performed in this pass - are not supposed to be
1686 vectorized. This restriction will be relaxed.
1688 This pass will require a cost model to guide it whether to apply peeling
1689 or versioning or a combination of the two. For example, the scheme that
1690 intel uses when given a loop with several memory accesses, is as follows:
1691 choose one memory access ('p') which alignment you want to force by doing
1692 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1693 other accesses are not necessarily aligned, or (2) use loop versioning to
1694 generate one loop in which all accesses are aligned, and another loop in
1695 which only 'p' is necessarily aligned.
1697 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1698 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1699 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1701 Devising a cost model is the most critical aspect of this work. It will
1702 guide us on which access to peel for, whether to use loop versioning, how
1703 many versions to create, etc. The cost model will probably consist of
1704 generic considerations as well as target specific considerations (on
1705 powerpc for example, misaligned stores are more painful than misaligned
1706 loads).
1708 Here are the general steps involved in alignment enhancements:
1710 -- original loop, before alignment analysis:
1711 for (i=0; i<N; i++){
1712 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1713 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1716 -- After vect_compute_data_refs_alignment:
1717 for (i=0; i<N; i++){
1718 x = q[i]; # DR_MISALIGNMENT(q) = 3
1719 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1722 -- Possibility 1: we do loop versioning:
1723 if (p is aligned) {
1724 for (i=0; i<N; i++){ # loop 1A
1725 x = q[i]; # DR_MISALIGNMENT(q) = 3
1726 p[i] = y; # DR_MISALIGNMENT(p) = 0
1729 else {
1730 for (i=0; i<N; i++){ # loop 1B
1731 x = q[i]; # DR_MISALIGNMENT(q) = 3
1732 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1736 -- Possibility 2: we do loop peeling:
1737 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1738 x = q[i];
1739 p[i] = y;
1741 for (i = 3; i < N; i++){ # loop 2A
1742 x = q[i]; # DR_MISALIGNMENT(q) = 0
1743 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1746 -- Possibility 3: combination of loop peeling and versioning:
1747 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1748 x = q[i];
1749 p[i] = y;
1751 if (p is aligned) {
1752 for (i = 3; i<N; i++){ # loop 3A
1753 x = q[i]; # DR_MISALIGNMENT(q) = 0
1754 p[i] = y; # DR_MISALIGNMENT(p) = 0
1757 else {
1758 for (i = 3; i<N; i++){ # loop 3B
1759 x = q[i]; # DR_MISALIGNMENT(q) = 0
1760 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1764 These loops are later passed to loop_transform to be vectorized. The
1765 vectorizer will use the alignment information to guide the transformation
1766 (whether to generate regular loads/stores, or with special handling for
1767 misalignment). */
1769 static bool
1770 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1772 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1773 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1774 enum dr_alignment_support supportable_dr_alignment;
1775 struct data_reference *dr0 = NULL;
1776 struct data_reference *dr;
1777 unsigned int i;
1778 bool do_peeling = false;
1779 bool do_versioning = false;
1780 bool stat;
1781 gimple stmt;
1782 stmt_vec_info stmt_info;
1783 int vect_versioning_for_alias_required;
1785 if (vect_print_dump_info (REPORT_DETAILS))
1786 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1788 /* While cost model enhancements are expected in the future, the high level
1789 view of the code at this time is as follows:
1791 A) If there is a misaligned write then see if peeling to align this write
1792 can make all data references satisfy vect_supportable_dr_alignment.
1793 If so, update data structures as needed and return true. Note that
1794 at this time vect_supportable_dr_alignment is known to return false
1795 for a misaligned write.
1797 B) If peeling wasn't possible and there is a data reference with an
1798 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1799 then see if loop versioning checks can be used to make all data
1800 references satisfy vect_supportable_dr_alignment. If so, update
1801 data structures as needed and return true.
1803 C) If neither peeling nor versioning were successful then return false if
1804 any data reference does not satisfy vect_supportable_dr_alignment.
1806 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1808 Note, Possibility 3 above (which is peeling and versioning together) is not
1809 being done at this time. */
1811 /* (1) Peeling to force alignment. */
1813 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1814 Considerations:
1815 + How many accesses will become aligned due to the peeling
1816 - How many accesses will become unaligned due to the peeling,
1817 and the cost of misaligned accesses.
1818 - The cost of peeling (the extra runtime checks, the increase
1819 in code size).
1821 The scheme we use FORNOW: peel to force the alignment of the first
1822 misaligned store in the loop.
1823 Rationale: misaligned stores are not yet supported.
1825 TODO: Use a cost model. */
1827 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1829 stmt = DR_STMT (dr);
1830 stmt_info = vinfo_for_stmt (stmt);
1832 /* For interleaving, only the alignment of the first access
1833 matters. */
1834 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1835 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1836 continue;
1838 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1840 do_peeling = vector_alignment_reachable_p (dr);
1841 if (do_peeling)
1842 dr0 = dr;
1843 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1844 fprintf (vect_dump, "vector alignment may not be reachable");
1845 break;
1849 vect_versioning_for_alias_required =
1850 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1852 /* Temporarily, if versioning for alias is required, we disable peeling
1853 until we support peeling and versioning. Often peeling for alignment
1854 will require peeling for loop-bound, which in turn requires that we
1855 know how to adjust the loop ivs after the loop. */
1856 if (vect_versioning_for_alias_required
1857 || !vect_can_advance_ivs_p (loop_vinfo)
1858 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1859 do_peeling = false;
1861 if (do_peeling)
1863 int mis;
1864 int npeel = 0;
1865 gimple stmt = DR_STMT (dr0);
1866 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1867 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1868 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1870 if (known_alignment_for_access_p (dr0))
1872 /* Since it's known at compile time, compute the number of iterations
1873 in the peeled loop (the peeling factor) for use in updating
1874 DR_MISALIGNMENT values. The peeling factor is the vectorization
1875 factor minus the misalignment as an element count. */
1876 mis = DR_MISALIGNMENT (dr0);
1877 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1878 npeel = nelements - mis;
1880 /* For interleaved data access every iteration accesses all the
1881 members of the group, therefore we divide the number of iterations
1882 by the group size. */
1883 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1884 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1885 npeel /= DR_GROUP_SIZE (stmt_info);
1887 if (vect_print_dump_info (REPORT_DETAILS))
1888 fprintf (vect_dump, "Try peeling by %d", npeel);
1891 /* Ensure that all data refs can be vectorized after the peel. */
1892 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1894 int save_misalignment;
1896 if (dr == dr0)
1897 continue;
1899 stmt = DR_STMT (dr);
1900 stmt_info = vinfo_for_stmt (stmt);
1901 /* For interleaving, only the alignment of the first access
1902 matters. */
1903 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1904 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1905 continue;
1907 save_misalignment = DR_MISALIGNMENT (dr);
1908 vect_update_misalignment_for_peel (dr, dr0, npeel);
1909 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1910 SET_DR_MISALIGNMENT (dr, save_misalignment);
1912 if (!supportable_dr_alignment)
1914 do_peeling = false;
1915 break;
1919 if (do_peeling)
1921 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1922 If the misalignment of DR_i is identical to that of dr0 then set
1923 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1924 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1925 by the peeling factor times the element size of DR_i (MOD the
1926 vectorization factor times the size). Otherwise, the
1927 misalignment of DR_i must be set to unknown. */
1928 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1929 if (dr != dr0)
1930 vect_update_misalignment_for_peel (dr, dr0, npeel);
1932 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1933 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1934 SET_DR_MISALIGNMENT (dr0, 0);
1935 if (vect_print_dump_info (REPORT_ALIGNMENT))
1936 fprintf (vect_dump, "Alignment of access forced using peeling.");
1938 if (vect_print_dump_info (REPORT_DETAILS))
1939 fprintf (vect_dump, "Peeling for alignment will be applied.");
1941 stat = vect_verify_datarefs_alignment (loop_vinfo);
1942 gcc_assert (stat);
1943 return stat;
1948 /* (2) Versioning to force alignment. */
1950 /* Try versioning if:
1951 1) flag_tree_vect_loop_version is TRUE
1952 2) optimize_size is FALSE
1953 3) there is at least one unsupported misaligned data ref with an unknown
1954 misalignment, and
1955 4) all misaligned data refs with a known misalignment are supported, and
1956 5) the number of runtime alignment checks is within reason. */
1958 do_versioning =
1959 flag_tree_vect_loop_version
1960 && (!optimize_size)
1961 && (!loop->inner); /* FORNOW */
1963 if (do_versioning)
1965 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1967 stmt = DR_STMT (dr);
1968 stmt_info = vinfo_for_stmt (stmt);
1970 /* For interleaving, only the alignment of the first access
1971 matters. */
1972 if (aligned_access_p (dr)
1973 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1974 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1975 continue;
1977 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1979 if (!supportable_dr_alignment)
1981 gimple stmt;
1982 int mask;
1983 tree vectype;
1985 if (known_alignment_for_access_p (dr)
1986 || VEC_length (gimple,
1987 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1988 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1990 do_versioning = false;
1991 break;
1994 stmt = DR_STMT (dr);
1995 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1996 gcc_assert (vectype);
1998 /* The rightmost bits of an aligned address must be zeros.
1999 Construct the mask needed for this test. For example,
2000 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2001 mask must be 15 = 0xf. */
2002 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2004 /* FORNOW: use the same mask to test all potentially unaligned
2005 references in the loop. The vectorizer currently supports
2006 a single vector size, see the reference to
2007 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2008 vectorization factor is computed. */
2009 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2010 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2011 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2012 VEC_safe_push (gimple, heap,
2013 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2014 DR_STMT (dr));
2018 /* Versioning requires at least one misaligned data reference. */
2019 if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2020 do_versioning = false;
2021 else if (!do_versioning)
2022 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2025 if (do_versioning)
2027 VEC(gimple,heap) *may_misalign_stmts
2028 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2029 gimple stmt;
2031 /* It can now be assumed that the data references in the statements
2032 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2033 of the loop being vectorized. */
2034 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
2036 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2037 dr = STMT_VINFO_DATA_REF (stmt_info);
2038 SET_DR_MISALIGNMENT (dr, 0);
2039 if (vect_print_dump_info (REPORT_ALIGNMENT))
2040 fprintf (vect_dump, "Alignment of access forced using versioning.");
2043 if (vect_print_dump_info (REPORT_DETAILS))
2044 fprintf (vect_dump, "Versioning for alignment will be applied.");
2046 /* Peeling and versioning can't be done together at this time. */
2047 gcc_assert (! (do_peeling && do_versioning));
2049 stat = vect_verify_datarefs_alignment (loop_vinfo);
2050 gcc_assert (stat);
2051 return stat;
2054 /* This point is reached if neither peeling nor versioning is being done. */
2055 gcc_assert (! (do_peeling || do_versioning));
2057 stat = vect_verify_datarefs_alignment (loop_vinfo);
2058 return stat;
2062 /* Function vect_analyze_data_refs_alignment
2064 Analyze the alignment of the data-references in the loop.
2065 Return FALSE if a data reference is found that cannot be vectorized. */
2067 static bool
2068 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2070 if (vect_print_dump_info (REPORT_DETAILS))
2071 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2073 if (!vect_compute_data_refs_alignment (loop_vinfo))
2075 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2076 fprintf (vect_dump,
2077 "not vectorized: can't calculate alignment for data ref.");
2078 return false;
2081 return true;
2085 /* Analyze groups of strided accesses: check that DR belongs to a group of
2086 strided accesses of legal size, step, etc. Detect gaps, single element
2087 interleaving, and other special cases. Set strided access info.
2088 Collect groups of strided stores for further use in SLP analysis. */
2090 static bool
2091 vect_analyze_group_access (struct data_reference *dr)
2093 tree step = DR_STEP (dr);
2094 tree scalar_type = TREE_TYPE (DR_REF (dr));
2095 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2096 gimple stmt = DR_STMT (dr);
2097 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2098 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2099 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2100 HOST_WIDE_INT stride;
2101 bool slp_impossible = false;
2103 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2104 interleaving group (including gaps). */
2105 stride = dr_step / type_size;
2107 /* Not consecutive access is possible only if it is a part of interleaving. */
2108 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2110 /* Check if it this DR is a part of interleaving, and is a single
2111 element of the group that is accessed in the loop. */
2113 /* Gaps are supported only for loads. STEP must be a multiple of the type
2114 size. The size of the group must be a power of 2. */
2115 if (DR_IS_READ (dr)
2116 && (dr_step % type_size) == 0
2117 && stride > 0
2118 && exact_log2 (stride) != -1)
2120 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2121 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2122 if (vect_print_dump_info (REPORT_DR_DETAILS))
2124 fprintf (vect_dump, "Detected single element interleaving %d ",
2125 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2126 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2127 fprintf (vect_dump, " step ");
2128 print_generic_expr (vect_dump, step, TDF_SLIM);
2130 return true;
2132 if (vect_print_dump_info (REPORT_DETAILS))
2133 fprintf (vect_dump, "not consecutive access");
2134 return false;
2137 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2139 /* First stmt in the interleaving chain. Check the chain. */
2140 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2141 struct data_reference *data_ref = dr;
2142 unsigned int count = 1;
2143 tree next_step;
2144 tree prev_init = DR_INIT (data_ref);
2145 gimple prev = stmt;
2146 HOST_WIDE_INT diff, count_in_bytes;
2148 while (next)
2150 /* Skip same data-refs. In case that two or more stmts share data-ref
2151 (supported only for loads), we vectorize only the first stmt, and
2152 the rest get their vectorized loads from the first one. */
2153 if (!tree_int_cst_compare (DR_INIT (data_ref),
2154 DR_INIT (STMT_VINFO_DATA_REF (
2155 vinfo_for_stmt (next)))))
2157 if (!DR_IS_READ (data_ref))
2159 if (vect_print_dump_info (REPORT_DETAILS))
2160 fprintf (vect_dump, "Two store stmts share the same dr.");
2161 return false;
2164 /* Check that there is no load-store dependencies for this loads
2165 to prevent a case of load-store-load to the same location. */
2166 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2167 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2169 if (vect_print_dump_info (REPORT_DETAILS))
2170 fprintf (vect_dump,
2171 "READ_WRITE dependence in interleaving.");
2172 return false;
2175 /* For load use the same data-ref load. */
2176 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2178 prev = next;
2179 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2180 continue;
2182 prev = next;
2184 /* Check that all the accesses have the same STEP. */
2185 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2186 if (tree_int_cst_compare (step, next_step))
2188 if (vect_print_dump_info (REPORT_DETAILS))
2189 fprintf (vect_dump, "not consecutive access in interleaving");
2190 return false;
2193 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2194 /* Check that the distance between two accesses is equal to the type
2195 size. Otherwise, we have gaps. */
2196 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2197 - TREE_INT_CST_LOW (prev_init)) / type_size;
2198 if (diff != 1)
2200 /* FORNOW: SLP of accesses with gaps is not supported. */
2201 slp_impossible = true;
2202 if (!DR_IS_READ (data_ref))
2204 if (vect_print_dump_info (REPORT_DETAILS))
2205 fprintf (vect_dump, "interleaved store with gaps");
2206 return false;
2210 /* Store the gap from the previous member of the group. If there is no
2211 gap in the access, DR_GROUP_GAP is always 1. */
2212 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2214 prev_init = DR_INIT (data_ref);
2215 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2216 /* Count the number of data-refs in the chain. */
2217 count++;
2220 /* COUNT is the number of accesses found, we multiply it by the size of
2221 the type to get COUNT_IN_BYTES. */
2222 count_in_bytes = type_size * count;
2224 /* Check that the size of the interleaving is not greater than STEP. */
2225 if (dr_step < count_in_bytes)
2227 if (vect_print_dump_info (REPORT_DETAILS))
2229 fprintf (vect_dump, "interleaving size is greater than step for ");
2230 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2232 return false;
2235 /* Check that the size of the interleaving is equal to STEP for stores,
2236 i.e., that there are no gaps. */
2237 if (dr_step != count_in_bytes)
2239 if (DR_IS_READ (dr))
2241 slp_impossible = true;
2242 /* There is a gap after the last load in the group. This gap is a
2243 difference between the stride and the number of elements. When
2244 there is no gap, this difference should be 0. */
2245 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2247 else
2249 if (vect_print_dump_info (REPORT_DETAILS))
2250 fprintf (vect_dump, "interleaved store with gaps");
2251 return false;
2255 /* Check that STEP is a multiple of type size. */
2256 if ((dr_step % type_size) != 0)
2258 if (vect_print_dump_info (REPORT_DETAILS))
2260 fprintf (vect_dump, "step is not a multiple of type size: step ");
2261 print_generic_expr (vect_dump, step, TDF_SLIM);
2262 fprintf (vect_dump, " size ");
2263 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2264 TDF_SLIM);
2266 return false;
2269 /* FORNOW: we handle only interleaving that is a power of 2.
2270 We don't fail here if it may be still possible to vectorize the
2271 group using SLP. If not, the size of the group will be checked in
2272 vect_analyze_operations, and the vectorization will fail. */
2273 if (exact_log2 (stride) == -1)
2275 if (vect_print_dump_info (REPORT_DETAILS))
2276 fprintf (vect_dump, "interleaving is not a power of 2");
2278 if (slp_impossible)
2279 return false;
2281 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2282 if (vect_print_dump_info (REPORT_DETAILS))
2283 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2285 /* SLP: create an SLP data structure for every interleaving group of
2286 stores for further analysis in vect_analyse_slp. */
2287 if (!DR_IS_READ (dr) && !slp_impossible)
2288 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2291 return true;
2295 /* Analyze the access pattern of the data-reference DR.
2296 In case of non-consecutive accesses call vect_analyze_group_access() to
2297 analyze groups of strided accesses. */
2299 static bool
2300 vect_analyze_data_ref_access (struct data_reference *dr)
2302 tree step = DR_STEP (dr);
2303 tree scalar_type = TREE_TYPE (DR_REF (dr));
2304 gimple stmt = DR_STMT (dr);
2305 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2306 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2307 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2308 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2310 if (!step)
2312 if (vect_print_dump_info (REPORT_DETAILS))
2313 fprintf (vect_dump, "bad data-ref access");
2314 return false;
2317 /* Don't allow invariant accesses. */
2318 if (dr_step == 0)
2319 return false;
2321 if (nested_in_vect_loop_p (loop, stmt))
2323 /* Interleaved accesses are not yet supported within outer-loop
2324 vectorization for references in the inner-loop. */
2325 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2327 /* For the rest of the analysis we use the outer-loop step. */
2328 step = STMT_VINFO_DR_STEP (stmt_info);
2329 dr_step = TREE_INT_CST_LOW (step);
2331 if (dr_step == 0)
2333 if (vect_print_dump_info (REPORT_ALIGNMENT))
2334 fprintf (vect_dump, "zero step in outer loop.");
2335 if (DR_IS_READ (dr))
2336 return true;
2337 else
2338 return false;
2342 /* Consecutive? */
2343 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2345 /* Mark that it is not interleaving. */
2346 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2347 return true;
2350 if (nested_in_vect_loop_p (loop, stmt))
2352 if (vect_print_dump_info (REPORT_ALIGNMENT))
2353 fprintf (vect_dump, "strided access in outer loop.");
2354 return false;
2357 /* Not consecutive access - check if it's a part of interleaving group. */
2358 return vect_analyze_group_access (dr);
2362 /* Function vect_analyze_data_ref_accesses.
2364 Analyze the access pattern of all the data references in the loop.
2366 FORNOW: the only access pattern that is considered vectorizable is a
2367 simple step 1 (consecutive) access.
2369 FORNOW: handle only arrays and pointer accesses. */
2371 static bool
2372 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2374 unsigned int i;
2375 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2376 struct data_reference *dr;
2378 if (vect_print_dump_info (REPORT_DETAILS))
2379 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2381 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2382 if (!vect_analyze_data_ref_access (dr))
2384 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2385 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2386 return false;
2389 return true;
2392 /* Function vect_prune_runtime_alias_test_list.
2394 Prune a list of ddrs to be tested at run-time by versioning for alias.
2395 Return FALSE if resulting list of ddrs is longer then allowed by
2396 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2398 static bool
2399 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2401 VEC (ddr_p, heap) * ddrs =
2402 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2403 unsigned i, j;
2405 if (vect_print_dump_info (REPORT_DETAILS))
2406 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2408 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2410 bool found;
2411 ddr_p ddr_i;
2413 ddr_i = VEC_index (ddr_p, ddrs, i);
2414 found = false;
2416 for (j = 0; j < i; j++)
2418 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2420 if (vect_vfa_range_equal (ddr_i, ddr_j))
2422 if (vect_print_dump_info (REPORT_DR_DETAILS))
2424 fprintf (vect_dump, "found equal ranges ");
2425 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2426 fprintf (vect_dump, ", ");
2427 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2428 fprintf (vect_dump, " and ");
2429 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2430 fprintf (vect_dump, ", ");
2431 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2433 found = true;
2434 break;
2438 if (found)
2440 VEC_ordered_remove (ddr_p, ddrs, i);
2441 continue;
2443 i++;
2446 if (VEC_length (ddr_p, ddrs) >
2447 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2449 if (vect_print_dump_info (REPORT_DR_DETAILS))
2451 fprintf (vect_dump,
2452 "disable versioning for alias - max number of generated "
2453 "checks exceeded.");
2456 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2458 return false;
2461 return true;
2464 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2466 void
2467 vect_free_slp_tree (slp_tree node)
2469 if (!node)
2470 return;
2472 if (SLP_TREE_LEFT (node))
2473 vect_free_slp_tree (SLP_TREE_LEFT (node));
2475 if (SLP_TREE_RIGHT (node))
2476 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2478 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
2480 if (SLP_TREE_VEC_STMTS (node))
2481 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
2483 free (node);
2487 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
2488 they are of a legal type and that they match the defs of the first stmt of
2489 the SLP group (stored in FIRST_STMT_...). */
2491 static bool
2492 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2493 gimple stmt, VEC (gimple, heap) **def_stmts0,
2494 VEC (gimple, heap) **def_stmts1,
2495 enum vect_def_type *first_stmt_dt0,
2496 enum vect_def_type *first_stmt_dt1,
2497 tree *first_stmt_def0_type,
2498 tree *first_stmt_def1_type,
2499 tree *first_stmt_const_oprnd,
2500 int ncopies_for_cost,
2501 bool *pattern0, bool *pattern1)
2503 tree oprnd;
2504 unsigned int i, number_of_oprnds;
2505 tree def;
2506 gimple def_stmt;
2507 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2508 stmt_vec_info stmt_info =
2509 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2510 enum gimple_rhs_class rhs_class;
2512 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
2513 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
2515 for (i = 0; i < number_of_oprnds; i++)
2517 oprnd = gimple_op (stmt, i + 1);
2519 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2520 || (!def_stmt && dt[i] != vect_constant_def))
2522 if (vect_print_dump_info (REPORT_SLP))
2524 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2525 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2528 return false;
2531 /* Check if DEF_STMT is a part of a pattern and get the def stmt from
2532 the pattern. Check that all the stmts of the node are in the
2533 pattern. */
2534 if (def_stmt && vinfo_for_stmt (def_stmt)
2535 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
2537 if (!*first_stmt_dt0)
2538 *pattern0 = true;
2539 else
2541 if (i == 1 && !*first_stmt_dt1)
2542 *pattern1 = true;
2543 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
2545 if (vect_print_dump_info (REPORT_DETAILS))
2547 fprintf (vect_dump, "Build SLP failed: some of the stmts"
2548 " are in a pattern, and others are not ");
2549 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2552 return false;
2556 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
2557 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
2559 if (*dt == vect_unknown_def_type)
2561 if (vect_print_dump_info (REPORT_DETAILS))
2562 fprintf (vect_dump, "Unsupported pattern.");
2563 return false;
2566 switch (gimple_code (def_stmt))
2568 case GIMPLE_PHI:
2569 def = gimple_phi_result (def_stmt);
2570 break;
2572 case GIMPLE_ASSIGN:
2573 def = gimple_assign_lhs (def_stmt);
2574 break;
2576 default:
2577 if (vect_print_dump_info (REPORT_DETAILS))
2578 fprintf (vect_dump, "unsupported defining stmt: ");
2579 return false;
2583 if (!*first_stmt_dt0)
2585 /* op0 of the first stmt of the group - store its info. */
2586 *first_stmt_dt0 = dt[i];
2587 if (def)
2588 *first_stmt_def0_type = TREE_TYPE (def);
2589 else
2590 *first_stmt_const_oprnd = oprnd;
2592 /* Analyze costs (for the first stmt of the group only). */
2593 if (rhs_class != GIMPLE_SINGLE_RHS)
2594 /* Not memory operation (we don't call this functions for loads). */
2595 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2596 else
2597 /* Store. */
2598 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2601 else
2603 if (!*first_stmt_dt1 && i == 1)
2605 /* op1 of the first stmt of the group - store its info. */
2606 *first_stmt_dt1 = dt[i];
2607 if (def)
2608 *first_stmt_def1_type = TREE_TYPE (def);
2609 else
2611 /* We assume that the stmt contains only one constant
2612 operand. We fail otherwise, to be on the safe side. */
2613 if (*first_stmt_const_oprnd)
2615 if (vect_print_dump_info (REPORT_SLP))
2616 fprintf (vect_dump, "Build SLP failed: two constant "
2617 "oprnds in stmt");
2618 return false;
2620 *first_stmt_const_oprnd = oprnd;
2623 else
2625 /* Not first stmt of the group, check that the def-stmt/s match
2626 the def-stmt/s of the first stmt. */
2627 if ((i == 0
2628 && (*first_stmt_dt0 != dt[i]
2629 || (*first_stmt_def0_type && def
2630 && *first_stmt_def0_type != TREE_TYPE (def))))
2631 || (i == 1
2632 && (*first_stmt_dt1 != dt[i]
2633 || (*first_stmt_def1_type && def
2634 && *first_stmt_def1_type != TREE_TYPE (def))))
2635 || (!def
2636 && TREE_TYPE (*first_stmt_const_oprnd)
2637 != TREE_TYPE (oprnd)))
2639 if (vect_print_dump_info (REPORT_SLP))
2640 fprintf (vect_dump, "Build SLP failed: different types ");
2642 return false;
2647 /* Check the types of the definitions. */
2648 switch (dt[i])
2650 case vect_constant_def:
2651 case vect_invariant_def:
2652 break;
2654 case vect_loop_def:
2655 if (i == 0)
2656 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
2657 else
2658 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
2659 break;
2661 default:
2662 /* FORNOW: Not supported. */
2663 if (vect_print_dump_info (REPORT_SLP))
2665 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2666 print_generic_expr (vect_dump, def, TDF_SLIM);
2669 return false;
2673 return true;
2677 /* Recursively build an SLP tree starting from NODE.
2678 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2679 permutation or are of unsupported types of operation. Otherwise, return
2680 TRUE. */
2682 static bool
2683 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2684 unsigned int group_size,
2685 int *inside_cost, int *outside_cost,
2686 int ncopies_for_cost, unsigned int *max_nunits)
2688 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
2689 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
2690 unsigned int i;
2691 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2692 gimple stmt = VEC_index (gimple, stmts, 0);
2693 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2694 enum tree_code first_stmt_code = 0, rhs_code;
2695 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2696 tree lhs;
2697 gimple prev_stmt = NULL;
2698 bool stop_recursion = false, need_same_oprnds = false;
2699 tree vectype, scalar_type, first_op1 = NULL_TREE;
2700 unsigned int vectorization_factor = 0, ncopies;
2701 optab optab;
2702 int icode;
2703 enum machine_mode optab_op2_mode;
2704 enum machine_mode vec_mode;
2705 tree first_stmt_const_oprnd = NULL_TREE;
2706 struct data_reference *first_dr;
2707 bool pattern0 = false, pattern1 = false;
2709 /* For every stmt in NODE find its def stmt/s. */
2710 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
2712 if (vect_print_dump_info (REPORT_SLP))
2714 fprintf (vect_dump, "Build SLP for ");
2715 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2718 lhs = gimple_get_lhs (stmt);
2719 if (lhs == NULL_TREE)
2721 if (vect_print_dump_info (REPORT_SLP))
2723 fprintf (vect_dump,
2724 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
2725 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2728 return false;
2731 scalar_type = TREE_TYPE (lhs);
2732 vectype = get_vectype_for_scalar_type (scalar_type);
2733 if (!vectype)
2735 if (vect_print_dump_info (REPORT_SLP))
2737 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
2738 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2740 return false;
2743 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2744 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2745 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2746 if (ncopies > 1 && vect_print_dump_info (REPORT_SLP))
2747 fprintf (vect_dump, "SLP with multiple types ");
2749 /* In case of multiple types we need to detect the smallest type. */
2750 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
2751 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
2753 if (is_gimple_call (stmt))
2754 rhs_code = CALL_EXPR;
2755 else
2756 rhs_code = gimple_assign_rhs_code (stmt);
2758 /* Check the operation. */
2759 if (i == 0)
2761 first_stmt_code = rhs_code;
2763 /* Shift arguments should be equal in all the packed stmts for a
2764 vector shift with scalar shift operand. */
2765 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
2766 || rhs_code == LROTATE_EXPR
2767 || rhs_code == RROTATE_EXPR)
2769 vec_mode = TYPE_MODE (vectype);
2771 /* First see if we have a vector/vector shift. */
2772 optab = optab_for_tree_code (rhs_code, vectype,
2773 optab_vector);
2775 if (!optab
2776 || (optab->handlers[(int) vec_mode].insn_code
2777 == CODE_FOR_nothing))
2779 /* No vector/vector shift, try for a vector/scalar shift. */
2780 optab = optab_for_tree_code (rhs_code, vectype,
2781 optab_scalar);
2783 if (!optab)
2785 if (vect_print_dump_info (REPORT_SLP))
2786 fprintf (vect_dump, "Build SLP failed: no optab.");
2787 return false;
2789 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2790 if (icode == CODE_FOR_nothing)
2792 if (vect_print_dump_info (REPORT_SLP))
2793 fprintf (vect_dump,
2794 "Build SLP failed: op not supported by target.");
2795 return false;
2797 optab_op2_mode = insn_data[icode].operand[2].mode;
2798 if (!VECTOR_MODE_P (optab_op2_mode))
2800 need_same_oprnds = true;
2801 first_op1 = gimple_assign_rhs2 (stmt);
2806 else
2808 if (first_stmt_code != rhs_code
2809 && (first_stmt_code != IMAGPART_EXPR
2810 || rhs_code != REALPART_EXPR)
2811 && (first_stmt_code != REALPART_EXPR
2812 || rhs_code != IMAGPART_EXPR))
2814 if (vect_print_dump_info (REPORT_SLP))
2816 fprintf (vect_dump,
2817 "Build SLP failed: different operation in stmt ");
2818 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2821 return false;
2824 if (need_same_oprnds
2825 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
2827 if (vect_print_dump_info (REPORT_SLP))
2829 fprintf (vect_dump,
2830 "Build SLP failed: different shift arguments in ");
2831 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2834 return false;
2838 /* Strided store or load. */
2839 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2841 if (REFERENCE_CLASS_P (lhs))
2843 /* Store. */
2844 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
2845 &def_stmts0, &def_stmts1,
2846 &first_stmt_dt0,
2847 &first_stmt_dt1,
2848 &first_stmt_def0_type,
2849 &first_stmt_def1_type,
2850 &first_stmt_const_oprnd,
2851 ncopies_for_cost,
2852 &pattern0, &pattern1))
2853 return false;
2855 else
2857 /* Load. */
2858 if (i == 0)
2860 /* In case of multiple types we need to detect the smallest
2861 type. */
2862 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
2863 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
2865 /* First stmt of the SLP group should be the first load of
2866 the interleaving loop if data permutation is not allowed.
2867 Check that there is no gap between the loads. */
2868 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
2869 || DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
2871 /* FORNOW: data permutations and gaps in loads are not
2872 supported. */
2873 if (vect_print_dump_info (REPORT_SLP))
2875 fprintf (vect_dump, "Build SLP failed: strided "
2876 " loads need permutation or have gaps ");
2877 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2880 return false;
2883 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2884 if (vect_supportable_dr_alignment (first_dr)
2885 == dr_unaligned_unsupported)
2887 if (vect_print_dump_info (REPORT_SLP))
2889 fprintf (vect_dump, "Build SLP failed: unsupported "
2890 " unaligned load ");
2891 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2894 return false;
2897 /* Analyze costs (for the first stmt in the group). */
2898 vect_model_load_cost (vinfo_for_stmt (stmt),
2899 ncopies_for_cost, *node);
2901 else
2903 /* Check that we have consecutive loads from interleaving
2904 chain and that there is no gap between the loads. */
2905 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt
2906 || DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1)
2908 /* FORNOW: data permutations and gaps in loads are not
2909 supported. */
2910 if (vect_print_dump_info (REPORT_SLP))
2912 fprintf (vect_dump, "Build SLP failed: strided "
2913 " loads need permutation or have gaps ");
2914 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2916 return false;
2920 prev_stmt = stmt;
2922 /* We stop the tree when we reach a group of loads. */
2923 stop_recursion = true;
2924 continue;
2926 } /* Strided access. */
2927 else
2929 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
2931 /* Not strided load. */
2932 if (vect_print_dump_info (REPORT_SLP))
2934 fprintf (vect_dump, "Build SLP failed: not strided load ");
2935 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2938 /* FORNOW: Not strided loads are not supported. */
2939 return false;
2942 /* Not memory operation. */
2943 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
2944 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
2946 if (vect_print_dump_info (REPORT_SLP))
2948 fprintf (vect_dump, "Build SLP failed: operation");
2949 fprintf (vect_dump, " unsupported ");
2950 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2953 return false;
2956 /* Find the def-stmts. */
2957 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
2958 &def_stmts0, &def_stmts1,
2959 &first_stmt_dt0, &first_stmt_dt1,
2960 &first_stmt_def0_type,
2961 &first_stmt_def1_type,
2962 &first_stmt_const_oprnd,
2963 ncopies_for_cost,
2964 &pattern0, &pattern1))
2965 return false;
2969 /* Add the costs of the node to the overall instance costs. */
2970 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
2971 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
2973 /* Strided loads were reached - stop the recursion. */
2974 if (stop_recursion)
2975 return true;
2977 /* Create SLP_TREE nodes for the definition node/s. */
2978 if (first_stmt_dt0 == vect_loop_def)
2980 slp_tree left_node = XNEW (struct _slp_tree);
2981 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
2982 SLP_TREE_VEC_STMTS (left_node) = NULL;
2983 SLP_TREE_LEFT (left_node) = NULL;
2984 SLP_TREE_RIGHT (left_node) = NULL;
2985 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
2986 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
2987 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
2988 inside_cost, outside_cost,
2989 ncopies_for_cost, max_nunits))
2990 return false;
2992 SLP_TREE_LEFT (*node) = left_node;
2995 if (first_stmt_dt1 == vect_loop_def)
2997 slp_tree right_node = XNEW (struct _slp_tree);
2998 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
2999 SLP_TREE_VEC_STMTS (right_node) = NULL;
3000 SLP_TREE_LEFT (right_node) = NULL;
3001 SLP_TREE_RIGHT (right_node) = NULL;
3002 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
3003 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
3004 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
3005 inside_cost, outside_cost,
3006 ncopies_for_cost, max_nunits))
3007 return false;
3009 SLP_TREE_RIGHT (*node) = right_node;
3012 return true;
3016 static void
3017 vect_print_slp_tree (slp_tree node)
3019 int i;
3020 gimple stmt;
3022 if (!node)
3023 return;
3025 fprintf (vect_dump, "node ");
3026 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3028 fprintf (vect_dump, "\n\tstmt %d ", i);
3029 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3031 fprintf (vect_dump, "\n");
3033 vect_print_slp_tree (SLP_TREE_LEFT (node));
3034 vect_print_slp_tree (SLP_TREE_RIGHT (node));
3038 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
3039 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
3040 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
3041 stmts in NODE are to be marked. */
3043 static void
3044 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
3046 int i;
3047 gimple stmt;
3049 if (!node)
3050 return;
3052 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3053 if (j < 0 || i == j)
3054 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
3056 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
3057 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
3061 /* Analyze an SLP instance starting from a group of strided stores. Call
3062 vect_build_slp_tree to build a tree of packed stmts if possible.
3063 Return FALSE if it's impossible to SLP any stmt in the loop. */
3065 static bool
3066 vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt)
3068 slp_instance new_instance;
3069 slp_tree node = XNEW (struct _slp_tree);
3070 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
3071 unsigned int unrolling_factor = 1, nunits;
3072 tree vectype, scalar_type;
3073 gimple next;
3074 unsigned int vectorization_factor = 0, ncopies;
3075 bool slp_impossible = false;
3076 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
3077 unsigned int max_nunits = 0;
3079 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
3080 vectype = get_vectype_for_scalar_type (scalar_type);
3081 if (!vectype)
3083 if (vect_print_dump_info (REPORT_SLP))
3085 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
3086 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3088 return false;
3091 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3092 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3093 ncopies = vectorization_factor / nunits;
3095 /* Create a node (a root of the SLP tree) for the packed strided stores. */
3096 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
3097 next = stmt;
3098 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
3099 while (next)
3101 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
3102 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
3105 SLP_TREE_VEC_STMTS (node) = NULL;
3106 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
3107 SLP_TREE_LEFT (node) = NULL;
3108 SLP_TREE_RIGHT (node) = NULL;
3109 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
3110 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
3112 /* Calculate the unrolling factor. */
3113 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
3115 /* Calculate the number of vector stmts to create based on the unrolling
3116 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
3117 GROUP_SIZE / NUNITS otherwise. */
3118 ncopies_for_cost = unrolling_factor * group_size / nunits;
3120 /* Build the tree for the SLP instance. */
3121 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &inside_cost,
3122 &outside_cost, ncopies_for_cost, &max_nunits))
3124 /* Create a new SLP instance. */
3125 new_instance = XNEW (struct _slp_instance);
3126 SLP_INSTANCE_TREE (new_instance) = node;
3127 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
3128 /* Calculate the unrolling factor based on the smallest type. */
3129 if (max_nunits > nunits)
3130 unrolling_factor = least_common_multiple (max_nunits, group_size)
3131 / group_size;
3133 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
3134 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
3135 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
3136 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
3137 new_instance);
3138 if (vect_print_dump_info (REPORT_SLP))
3139 vect_print_slp_tree (node);
3141 return true;
3144 /* Failed to SLP. */
3145 /* Free the allocated memory. */
3146 vect_free_slp_tree (node);
3148 if (slp_impossible)
3149 return false;
3151 /* SLP failed for this instance, but it is still possible to SLP other stmts
3152 in the loop. */
3153 return true;
3157 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3158 trees of packed scalar stmts if SLP is possible. */
3160 static bool
3161 vect_analyze_slp (loop_vec_info loop_vinfo)
3163 unsigned int i;
3164 VEC (gimple, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
3165 gimple store;
3167 if (vect_print_dump_info (REPORT_SLP))
3168 fprintf (vect_dump, "=== vect_analyze_slp ===");
3170 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
3171 if (!vect_analyze_slp_instance (loop_vinfo, store))
3173 /* SLP failed. No instance can be SLPed in the loop. */
3174 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3175 fprintf (vect_dump, "SLP failed.");
3177 return false;
3180 return true;
3184 /* For each possible SLP instance decide whether to SLP it and calculate overall
3185 unrolling factor needed to SLP the loop. */
3187 static void
3188 vect_make_slp_decision (loop_vec_info loop_vinfo)
3190 unsigned int i, unrolling_factor = 1;
3191 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3192 slp_instance instance;
3193 int decided_to_slp = 0;
3195 if (vect_print_dump_info (REPORT_SLP))
3196 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3198 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3200 /* FORNOW: SLP if you can. */
3201 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3202 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3204 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3205 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3206 loop-based vectorization. Such stmts will be marked as HYBRID. */
3207 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3208 decided_to_slp++;
3211 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3213 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3214 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3215 decided_to_slp, unrolling_factor);
3219 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3220 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3222 static void
3223 vect_detect_hybrid_slp_stmts (slp_tree node)
3225 int i;
3226 gimple stmt;
3227 imm_use_iterator imm_iter;
3228 gimple use_stmt;
3230 if (!node)
3231 return;
3233 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3234 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3235 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
3236 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
3237 if (vinfo_for_stmt (use_stmt)
3238 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt))
3239 && STMT_VINFO_RELEVANT (vinfo_for_stmt (use_stmt)))
3240 vect_mark_slp_stmts (node, hybrid, i);
3242 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3243 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3247 /* Find stmts that must be both vectorized and SLPed. */
3249 static void
3250 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3252 unsigned int i;
3253 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3254 slp_instance instance;
3256 if (vect_print_dump_info (REPORT_SLP))
3257 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3259 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3260 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3264 /* Function vect_analyze_data_refs.
3266 Find all the data references in the loop.
3268 The general structure of the analysis of data refs in the vectorizer is as
3269 follows:
3270 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3271 find and analyze all data-refs in the loop and their dependences.
3272 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3273 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3274 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3278 static bool
3279 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3281 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3282 unsigned int i;
3283 VEC (data_reference_p, heap) *datarefs;
3284 struct data_reference *dr;
3285 tree scalar_type;
3287 if (vect_print_dump_info (REPORT_DETAILS))
3288 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3290 compute_data_dependences_for_loop (loop, true,
3291 &LOOP_VINFO_DATAREFS (loop_vinfo),
3292 &LOOP_VINFO_DDRS (loop_vinfo));
3294 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3295 from stmt_vec_info struct to DR and vectype. */
3296 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3298 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3300 gimple stmt;
3301 stmt_vec_info stmt_info;
3302 basic_block bb;
3303 tree base, offset, init;
3305 if (!dr || !DR_REF (dr))
3307 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3308 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3309 return false;
3312 stmt = DR_STMT (dr);
3313 stmt_info = vinfo_for_stmt (stmt);
3315 /* Check that analysis of the data-ref succeeded. */
3316 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3317 || !DR_STEP (dr))
3319 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3321 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3322 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3324 return false;
3327 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3329 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3330 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3331 "constant");
3332 return false;
3335 if (!DR_SYMBOL_TAG (dr))
3337 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3339 fprintf (vect_dump, "not vectorized: no memory tag for ");
3340 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3342 return false;
3345 base = unshare_expr (DR_BASE_ADDRESS (dr));
3346 offset = unshare_expr (DR_OFFSET (dr));
3347 init = unshare_expr (DR_INIT (dr));
3349 /* Update DR field in stmt_vec_info struct. */
3350 bb = gimple_bb (stmt);
3352 /* If the dataref is in an inner-loop of the loop that is considered for
3353 for vectorization, we also want to analyze the access relative to
3354 the outer-loop (DR contains information only relative to the
3355 inner-most enclosing loop). We do that by building a reference to the
3356 first location accessed by the inner-loop, and analyze it relative to
3357 the outer-loop. */
3358 if (nested_in_vect_loop_p (loop, stmt))
3360 tree outer_step, outer_base, outer_init;
3361 HOST_WIDE_INT pbitsize, pbitpos;
3362 tree poffset;
3363 enum machine_mode pmode;
3364 int punsignedp, pvolatilep;
3365 affine_iv base_iv, offset_iv;
3366 tree dinit;
3368 /* Build a reference to the first location accessed by the
3369 inner-loop: *(BASE+INIT). (The first location is actually
3370 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3371 tree inner_base = build_fold_indirect_ref
3372 (fold_build2 (POINTER_PLUS_EXPR,
3373 TREE_TYPE (base), base,
3374 fold_convert (sizetype, init)));
3376 if (vect_print_dump_info (REPORT_DETAILS))
3378 fprintf (dump_file, "analyze in outer-loop: ");
3379 print_generic_expr (dump_file, inner_base, TDF_SLIM);
3382 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3383 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3384 gcc_assert (outer_base != NULL_TREE);
3386 if (pbitpos % BITS_PER_UNIT != 0)
3388 if (vect_print_dump_info (REPORT_DETAILS))
3389 fprintf (dump_file, "failed: bit offset alignment.\n");
3390 return false;
3393 outer_base = build_fold_addr_expr (outer_base);
3394 if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
3396 if (vect_print_dump_info (REPORT_DETAILS))
3397 fprintf (dump_file, "failed: evolution of base is not affine.\n");
3398 return false;
3401 if (offset)
3403 if (poffset)
3404 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3405 else
3406 poffset = offset;
3409 if (!poffset)
3411 offset_iv.base = ssize_int (0);
3412 offset_iv.step = ssize_int (0);
3414 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
3416 if (vect_print_dump_info (REPORT_DETAILS))
3417 fprintf (dump_file, "evolution of offset is not affine.\n");
3418 return false;
3421 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3422 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3423 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3424 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3425 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3427 outer_step = size_binop (PLUS_EXPR,
3428 fold_convert (ssizetype, base_iv.step),
3429 fold_convert (ssizetype, offset_iv.step));
3431 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3432 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3433 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3434 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3435 STMT_VINFO_DR_OFFSET (stmt_info) =
3436 fold_convert (ssizetype, offset_iv.base);
3437 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3438 size_int (highest_pow2_factor (offset_iv.base));
3440 if (dump_file && (dump_flags & TDF_DETAILS))
3442 fprintf (dump_file, "\touter base_address: ");
3443 print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3444 fprintf (dump_file, "\n\touter offset from base address: ");
3445 print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3446 fprintf (dump_file, "\n\touter constant offset from base address: ");
3447 print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3448 fprintf (dump_file, "\n\touter step: ");
3449 print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3450 fprintf (dump_file, "\n\touter aligned to: ");
3451 print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3455 if (STMT_VINFO_DATA_REF (stmt_info))
3457 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3459 fprintf (vect_dump,
3460 "not vectorized: more than one data ref in stmt: ");
3461 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3463 return false;
3465 STMT_VINFO_DATA_REF (stmt_info) = dr;
3467 /* Set vectype for STMT. */
3468 scalar_type = TREE_TYPE (DR_REF (dr));
3469 STMT_VINFO_VECTYPE (stmt_info) =
3470 get_vectype_for_scalar_type (scalar_type);
3471 if (!STMT_VINFO_VECTYPE (stmt_info))
3473 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3475 fprintf (vect_dump,
3476 "not vectorized: no vectype for stmt: ");
3477 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3478 fprintf (vect_dump, " scalar_type: ");
3479 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3481 return false;
3485 return true;
3489 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3491 /* Function vect_mark_relevant.
3493 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3495 static void
3496 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
3497 enum vect_relevant relevant, bool live_p)
3499 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3500 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3501 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3503 if (vect_print_dump_info (REPORT_DETAILS))
3504 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3506 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3508 gimple pattern_stmt;
3510 /* This is the last stmt in a sequence that was detected as a
3511 pattern that can potentially be vectorized. Don't mark the stmt
3512 as relevant/live because it's not going to be vectorized.
3513 Instead mark the pattern-stmt that replaces it. */
3515 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3517 if (vect_print_dump_info (REPORT_DETAILS))
3518 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3519 stmt_info = vinfo_for_stmt (pattern_stmt);
3520 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3521 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3522 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3523 stmt = pattern_stmt;
3526 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3527 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3528 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3530 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3531 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3533 if (vect_print_dump_info (REPORT_DETAILS))
3534 fprintf (vect_dump, "already marked relevant/live.");
3535 return;
3538 VEC_safe_push (gimple, heap, *worklist, stmt);
3542 /* Function vect_stmt_relevant_p.
3544 Return true if STMT in loop that is represented by LOOP_VINFO is
3545 "relevant for vectorization".
3547 A stmt is considered "relevant for vectorization" if:
3548 - it has uses outside the loop.
3549 - it has vdefs (it alters memory).
3550 - control stmts in the loop (except for the exit condition).
3552 CHECKME: what other side effects would the vectorizer allow? */
3554 static bool
3555 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
3556 enum vect_relevant *relevant, bool *live_p)
3558 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3559 ssa_op_iter op_iter;
3560 imm_use_iterator imm_iter;
3561 use_operand_p use_p;
3562 def_operand_p def_p;
3564 *relevant = vect_unused_in_loop;
3565 *live_p = false;
3567 /* cond stmt other than loop exit cond. */
3568 if (is_ctrl_stmt (stmt)
3569 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3570 *relevant = vect_used_in_loop;
3572 /* changing memory. */
3573 if (gimple_code (stmt) != GIMPLE_PHI)
3574 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3576 if (vect_print_dump_info (REPORT_DETAILS))
3577 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3578 *relevant = vect_used_in_loop;
3581 /* uses outside the loop. */
3582 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3584 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3586 basic_block bb = gimple_bb (USE_STMT (use_p));
3587 if (!flow_bb_inside_loop_p (loop, bb))
3589 if (vect_print_dump_info (REPORT_DETAILS))
3590 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3592 /* We expect all such uses to be in the loop exit phis
3593 (because of loop closed form) */
3594 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
3595 gcc_assert (bb == single_exit (loop)->dest);
3597 *live_p = true;
3602 return (*live_p || *relevant);
3607 Function process_use.
3609 Inputs:
3610 - a USE in STMT in a loop represented by LOOP_VINFO
3611 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3612 that defined USE. This is done by calling mark_relevant and passing it
3613 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
3615 Outputs:
3616 Generally, LIVE_P and RELEVANT are used to define the liveness and
3617 relevance info of the DEF_STMT of this USE:
3618 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3619 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3620 Exceptions:
3621 - case 1: If USE is used only for address computations (e.g. array indexing),
3622 which does not need to be directly vectorized, then the liveness/relevance
3623 of the respective DEF_STMT is left unchanged.
3624 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3625 skip DEF_STMT cause it had already been processed.
3626 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3627 be modified accordingly.
3629 Return true if everything is as expected. Return false otherwise. */
3631 static bool
3632 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3633 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
3635 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3636 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3637 stmt_vec_info dstmt_vinfo;
3638 basic_block bb, def_bb;
3639 tree def;
3640 gimple def_stmt;
3641 enum vect_def_type dt;
3643 /* case 1: we are only interested in uses that need to be vectorized. Uses
3644 that are used for address computation are not considered relevant. */
3645 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3646 return true;
3648 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3650 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3651 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3652 return false;
3655 if (!def_stmt || gimple_nop_p (def_stmt))
3656 return true;
3658 def_bb = gimple_bb (def_stmt);
3659 if (!flow_bb_inside_loop_p (loop, def_bb))
3661 if (vect_print_dump_info (REPORT_DETAILS))
3662 fprintf (vect_dump, "def_stmt is out of loop.");
3663 return true;
3666 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3667 DEF_STMT must have already been processed, because this should be the
3668 only way that STMT, which is a reduction-phi, was put in the worklist,
3669 as there should be no other uses for DEF_STMT in the loop. So we just
3670 check that everything is as expected, and we are done. */
3671 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3672 bb = gimple_bb (stmt);
3673 if (gimple_code (stmt) == GIMPLE_PHI
3674 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3675 && gimple_code (def_stmt) != GIMPLE_PHI
3676 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3677 && bb->loop_father == def_bb->loop_father)
3679 if (vect_print_dump_info (REPORT_DETAILS))
3680 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3681 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3682 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3683 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3684 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3685 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3686 return true;
3689 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3690 outer-loop-header-bb:
3691 d = def_stmt
3692 inner-loop:
3693 stmt # use (d)
3694 outer-loop-tail-bb:
3695 ... */
3696 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3698 if (vect_print_dump_info (REPORT_DETAILS))
3699 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3700 switch (relevant)
3702 case vect_unused_in_loop:
3703 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3704 vect_used_by_reduction : vect_unused_in_loop;
3705 break;
3706 case vect_used_in_outer_by_reduction:
3707 relevant = vect_used_by_reduction;
3708 break;
3709 case vect_used_in_outer:
3710 relevant = vect_used_in_loop;
3711 break;
3712 case vect_used_by_reduction:
3713 case vect_used_in_loop:
3714 break;
3716 default:
3717 gcc_unreachable ();
3721 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3722 outer-loop-header-bb:
3724 inner-loop:
3725 d = def_stmt
3726 outer-loop-tail-bb:
3727 stmt # use (d) */
3728 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3730 if (vect_print_dump_info (REPORT_DETAILS))
3731 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3732 switch (relevant)
3734 case vect_unused_in_loop:
3735 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3736 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3737 break;
3739 case vect_used_in_outer_by_reduction:
3740 case vect_used_in_outer:
3741 break;
3743 case vect_used_by_reduction:
3744 relevant = vect_used_in_outer_by_reduction;
3745 break;
3747 case vect_used_in_loop:
3748 relevant = vect_used_in_outer;
3749 break;
3751 default:
3752 gcc_unreachable ();
3756 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
3757 return true;
3761 /* Function vect_mark_stmts_to_be_vectorized.
3763 Not all stmts in the loop need to be vectorized. For example:
3765 for i...
3766 for j...
3767 1. T0 = i + j
3768 2. T1 = a[T0]
3770 3. j = j + 1
3772 Stmt 1 and 3 do not need to be vectorized, because loop control and
3773 addressing of vectorized data-refs are handled differently.
3775 This pass detects such stmts. */
3777 static bool
3778 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3780 VEC(gimple,heap) *worklist;
3781 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3782 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3783 unsigned int nbbs = loop->num_nodes;
3784 gimple_stmt_iterator si;
3785 gimple stmt;
3786 unsigned int i;
3787 stmt_vec_info stmt_vinfo;
3788 basic_block bb;
3789 gimple phi;
3790 bool live_p;
3791 enum vect_relevant relevant;
3793 if (vect_print_dump_info (REPORT_DETAILS))
3794 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
3796 worklist = VEC_alloc (gimple, heap, 64);
3798 /* 1. Init worklist. */
3799 for (i = 0; i < nbbs; i++)
3801 bb = bbs[i];
3802 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
3804 phi = gsi_stmt (si);
3805 if (vect_print_dump_info (REPORT_DETAILS))
3807 fprintf (vect_dump, "init: phi relevant? ");
3808 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
3811 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
3812 vect_mark_relevant (&worklist, phi, relevant, live_p);
3814 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3816 stmt = gsi_stmt (si);
3817 if (vect_print_dump_info (REPORT_DETAILS))
3819 fprintf (vect_dump, "init: stmt relevant? ");
3820 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3823 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
3824 vect_mark_relevant (&worklist, stmt, relevant, live_p);
3828 /* 2. Process_worklist */
3829 while (VEC_length (gimple, worklist) > 0)
3831 use_operand_p use_p;
3832 ssa_op_iter iter;
3834 stmt = VEC_pop (gimple, worklist);
3835 if (vect_print_dump_info (REPORT_DETAILS))
3837 fprintf (vect_dump, "worklist: examine stmt: ");
3838 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3841 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3842 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3843 liveness and relevance properties of STMT. */
3844 stmt_vinfo = vinfo_for_stmt (stmt);
3845 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
3846 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
3848 /* Generally, the liveness and relevance properties of STMT are
3849 propagated as is to the DEF_STMTs of its USEs:
3850 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3851 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3853 One exception is when STMT has been identified as defining a reduction
3854 variable; in this case we set the liveness/relevance as follows:
3855 live_p = false
3856 relevant = vect_used_by_reduction
3857 This is because we distinguish between two kinds of relevant stmts -
3858 those that are used by a reduction computation, and those that are
3859 (also) used by a regular computation. This allows us later on to
3860 identify stmts that are used solely by a reduction, and therefore the
3861 order of the results that they produce does not have to be kept.
3863 Reduction phis are expected to be used by a reduction stmt, or by
3864 in an outer loop; Other reduction stmts are expected to be
3865 in the loop, and possibly used by a stmt in an outer loop.
3866 Here are the expected values of "relevant" for reduction phis/stmts:
3868 relevance: phi stmt
3869 vect_unused_in_loop ok
3870 vect_used_in_outer_by_reduction ok ok
3871 vect_used_in_outer ok ok
3872 vect_used_by_reduction ok
3873 vect_used_in_loop */
3875 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
3877 enum vect_relevant tmp_relevant = relevant;
3878 switch (tmp_relevant)
3880 case vect_unused_in_loop:
3881 gcc_assert (gimple_code (stmt) != GIMPLE_PHI);
3882 relevant = vect_used_by_reduction;
3883 break;
3885 case vect_used_in_outer_by_reduction:
3886 case vect_used_in_outer:
3887 gcc_assert (gimple_code (stmt) != WIDEN_SUM_EXPR
3888 && gimple_code (stmt) != DOT_PROD_EXPR);
3889 break;
3891 case vect_used_by_reduction:
3892 if (gimple_code (stmt) == GIMPLE_PHI)
3893 break;
3894 /* fall through */
3895 case vect_used_in_loop:
3896 default:
3897 if (vect_print_dump_info (REPORT_DETAILS))
3898 fprintf (vect_dump, "unsupported use of reduction.");
3899 VEC_free (gimple, heap, worklist);
3900 return false;
3902 live_p = false;
3905 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3907 tree op = USE_FROM_PTR (use_p);
3908 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
3910 VEC_free (gimple, heap, worklist);
3911 return false;
3914 } /* while worklist */
3916 VEC_free (gimple, heap, worklist);
3917 return true;
3921 /* Function vect_can_advance_ivs_p
3923 In case the number of iterations that LOOP iterates is unknown at compile
3924 time, an epilog loop will be generated, and the loop induction variables
3925 (IVs) will be "advanced" to the value they are supposed to take just before
3926 the epilog loop. Here we check that the access function of the loop IVs
3927 and the expression that represents the loop bound are simple enough.
3928 These restrictions will be relaxed in the future. */
3930 static bool
3931 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
3933 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3934 basic_block bb = loop->header;
3935 gimple phi;
3936 gimple_stmt_iterator gsi;
3938 /* Analyze phi functions of the loop header. */
3940 if (vect_print_dump_info (REPORT_DETAILS))
3941 fprintf (vect_dump, "vect_can_advance_ivs_p:");
3943 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3945 tree access_fn = NULL;
3946 tree evolution_part;
3948 phi = gsi_stmt (gsi);
3949 if (vect_print_dump_info (REPORT_DETAILS))
3951 fprintf (vect_dump, "Analyze phi: ");
3952 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
3955 /* Skip virtual phi's. The data dependences that are associated with
3956 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3958 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3960 if (vect_print_dump_info (REPORT_DETAILS))
3961 fprintf (vect_dump, "virtual phi. skip.");
3962 continue;
3965 /* Skip reduction phis. */
3967 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3969 if (vect_print_dump_info (REPORT_DETAILS))
3970 fprintf (vect_dump, "reduc phi. skip.");
3971 continue;
3974 /* Analyze the evolution function. */
3976 access_fn = instantiate_parameters
3977 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
3979 if (!access_fn)
3981 if (vect_print_dump_info (REPORT_DETAILS))
3982 fprintf (vect_dump, "No Access function.");
3983 return false;
3986 if (vect_print_dump_info (REPORT_DETAILS))
3988 fprintf (vect_dump, "Access function of PHI: ");
3989 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
3992 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
3994 if (evolution_part == NULL_TREE)
3996 if (vect_print_dump_info (REPORT_DETAILS))
3997 fprintf (vect_dump, "No evolution.");
3998 return false;
4001 /* FORNOW: We do not transform initial conditions of IVs
4002 which evolution functions are a polynomial of degree >= 2. */
4004 if (tree_is_chrec (evolution_part))
4005 return false;
4008 return true;
4012 /* Function vect_get_loop_niters.
4014 Determine how many iterations the loop is executed.
4015 If an expression that represents the number of iterations
4016 can be constructed, place it in NUMBER_OF_ITERATIONS.
4017 Return the loop exit condition. */
4019 static gimple
4020 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
4022 tree niters;
4024 if (vect_print_dump_info (REPORT_DETAILS))
4025 fprintf (vect_dump, "=== get_loop_niters ===");
4027 niters = number_of_exit_cond_executions (loop);
4029 if (niters != NULL_TREE
4030 && niters != chrec_dont_know)
4032 *number_of_iterations = niters;
4034 if (vect_print_dump_info (REPORT_DETAILS))
4036 fprintf (vect_dump, "==> get_loop_niters:" );
4037 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
4041 return get_loop_exit_condition (loop);
4045 /* Function vect_analyze_loop_1.
4047 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4048 for it. The different analyses will record information in the
4049 loop_vec_info struct. This is a subset of the analyses applied in
4050 vect_analyze_loop, to be applied on an inner-loop nested in the loop
4051 that is now considered for (outer-loop) vectorization. */
4053 static loop_vec_info
4054 vect_analyze_loop_1 (struct loop *loop)
4056 loop_vec_info loop_vinfo;
4058 if (vect_print_dump_info (REPORT_DETAILS))
4059 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
4061 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4063 loop_vinfo = vect_analyze_loop_form (loop);
4064 if (!loop_vinfo)
4066 if (vect_print_dump_info (REPORT_DETAILS))
4067 fprintf (vect_dump, "bad inner-loop form.");
4068 return NULL;
4071 return loop_vinfo;
4075 /* Function vect_analyze_loop_form.
4077 Verify that certain CFG restrictions hold, including:
4078 - the loop has a pre-header
4079 - the loop has a single entry and exit
4080 - the loop exit condition is simple enough, and the number of iterations
4081 can be analyzed (a countable loop). */
4083 loop_vec_info
4084 vect_analyze_loop_form (struct loop *loop)
4086 loop_vec_info loop_vinfo;
4087 gimple loop_cond;
4088 tree number_of_iterations = NULL;
4089 loop_vec_info inner_loop_vinfo = NULL;
4091 if (vect_print_dump_info (REPORT_DETAILS))
4092 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
4094 /* Different restrictions apply when we are considering an inner-most loop,
4095 vs. an outer (nested) loop.
4096 (FORNOW. May want to relax some of these restrictions in the future). */
4098 if (!loop->inner)
4100 /* Inner-most loop. We currently require that the number of BBs is
4101 exactly 2 (the header and latch). Vectorizable inner-most loops
4102 look like this:
4104 (pre-header)
4106 header <--------+
4107 | | |
4108 | +--> latch --+
4110 (exit-bb) */
4112 if (loop->num_nodes != 2)
4114 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4115 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4116 return NULL;
4119 if (empty_block_p (loop->header))
4121 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4122 fprintf (vect_dump, "not vectorized: empty loop.");
4123 return NULL;
4126 else
4128 struct loop *innerloop = loop->inner;
4129 edge backedge, entryedge;
4131 /* Nested loop. We currently require that the loop is doubly-nested,
4132 contains a single inner loop, and the number of BBs is exactly 5.
4133 Vectorizable outer-loops look like this:
4135 (pre-header)
4137 header <---+
4139 inner-loop |
4141 tail ------+
4143 (exit-bb)
4145 The inner-loop has the properties expected of inner-most loops
4146 as described above. */
4148 if ((loop->inner)->inner || (loop->inner)->next)
4150 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4151 fprintf (vect_dump, "not vectorized: multiple nested loops.");
4152 return NULL;
4155 /* Analyze the inner-loop. */
4156 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
4157 if (!inner_loop_vinfo)
4159 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4160 fprintf (vect_dump, "not vectorized: Bad inner loop.");
4161 return NULL;
4164 if (!expr_invariant_in_loop_p (loop,
4165 LOOP_VINFO_NITERS (inner_loop_vinfo)))
4167 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4168 fprintf (vect_dump,
4169 "not vectorized: inner-loop count not invariant.");
4170 destroy_loop_vec_info (inner_loop_vinfo, true);
4171 return NULL;
4174 if (loop->num_nodes != 5)
4176 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4177 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4178 destroy_loop_vec_info (inner_loop_vinfo, true);
4179 return NULL;
4182 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
4183 backedge = EDGE_PRED (innerloop->header, 1);
4184 entryedge = EDGE_PRED (innerloop->header, 0);
4185 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
4187 backedge = EDGE_PRED (innerloop->header, 0);
4188 entryedge = EDGE_PRED (innerloop->header, 1);
4191 if (entryedge->src != loop->header
4192 || !single_exit (innerloop)
4193 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
4195 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4196 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4197 destroy_loop_vec_info (inner_loop_vinfo, true);
4198 return NULL;
4201 if (vect_print_dump_info (REPORT_DETAILS))
4202 fprintf (vect_dump, "Considering outer-loop vectorization.");
4205 if (!single_exit (loop)
4206 || EDGE_COUNT (loop->header->preds) != 2)
4208 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4210 if (!single_exit (loop))
4211 fprintf (vect_dump, "not vectorized: multiple exits.");
4212 else if (EDGE_COUNT (loop->header->preds) != 2)
4213 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4215 if (inner_loop_vinfo)
4216 destroy_loop_vec_info (inner_loop_vinfo, true);
4217 return NULL;
4220 /* We assume that the loop exit condition is at the end of the loop. i.e,
4221 that the loop is represented as a do-while (with a proper if-guard
4222 before the loop if needed), where the loop header contains all the
4223 executable statements, and the latch is empty. */
4224 if (!empty_block_p (loop->latch)
4225 || phi_nodes (loop->latch))
4227 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4228 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4229 if (inner_loop_vinfo)
4230 destroy_loop_vec_info (inner_loop_vinfo, true);
4231 return NULL;
4234 /* Make sure there exists a single-predecessor exit bb: */
4235 if (!single_pred_p (single_exit (loop)->dest))
4237 edge e = single_exit (loop);
4238 if (!(e->flags & EDGE_ABNORMAL))
4240 split_loop_exit_edge (e);
4241 if (vect_print_dump_info (REPORT_DETAILS))
4242 fprintf (vect_dump, "split exit edge.");
4244 else
4246 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4247 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4248 if (inner_loop_vinfo)
4249 destroy_loop_vec_info (inner_loop_vinfo, true);
4250 return NULL;
4254 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4255 if (!loop_cond)
4257 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4258 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4259 if (inner_loop_vinfo)
4260 destroy_loop_vec_info (inner_loop_vinfo, true);
4261 return NULL;
4264 if (!number_of_iterations)
4266 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4267 fprintf (vect_dump,
4268 "not vectorized: number of iterations cannot be computed.");
4269 if (inner_loop_vinfo)
4270 destroy_loop_vec_info (inner_loop_vinfo, true);
4271 return NULL;
4274 if (chrec_contains_undetermined (number_of_iterations))
4276 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4277 fprintf (vect_dump, "Infinite number of iterations.");
4278 if (inner_loop_vinfo)
4279 destroy_loop_vec_info (inner_loop_vinfo, true);
4280 return NULL;
4283 if (!NITERS_KNOWN_P (number_of_iterations))
4285 if (vect_print_dump_info (REPORT_DETAILS))
4287 fprintf (vect_dump, "Symbolic number of iterations is ");
4288 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4291 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4293 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4294 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4295 if (inner_loop_vinfo)
4296 destroy_loop_vec_info (inner_loop_vinfo, false);
4297 return NULL;
4300 loop_vinfo = new_loop_vec_info (loop);
4301 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4302 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
4304 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4306 /* CHECKME: May want to keep it around it in the future. */
4307 if (inner_loop_vinfo)
4308 destroy_loop_vec_info (inner_loop_vinfo, false);
4310 gcc_assert (!loop->aux);
4311 loop->aux = loop_vinfo;
4312 return loop_vinfo;
4316 /* Function vect_analyze_loop.
4318 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4319 for it. The different analyses will record information in the
4320 loop_vec_info struct. */
4321 loop_vec_info
4322 vect_analyze_loop (struct loop *loop)
4324 bool ok;
4325 loop_vec_info loop_vinfo;
4327 if (vect_print_dump_info (REPORT_DETAILS))
4328 fprintf (vect_dump, "===== analyze_loop_nest =====");
4330 if (loop_outer (loop)
4331 && loop_vec_info_for_loop (loop_outer (loop))
4332 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4334 if (vect_print_dump_info (REPORT_DETAILS))
4335 fprintf (vect_dump, "outer-loop already vectorized.");
4336 return NULL;
4339 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4341 loop_vinfo = vect_analyze_loop_form (loop);
4342 if (!loop_vinfo)
4344 if (vect_print_dump_info (REPORT_DETAILS))
4345 fprintf (vect_dump, "bad loop form.");
4346 return NULL;
4349 /* Find all data references in the loop (which correspond to vdefs/vuses)
4350 and analyze their evolution in the loop.
4352 FORNOW: Handle only simple, array references, which
4353 alignment can be forced, and aligned pointer-references. */
4355 ok = vect_analyze_data_refs (loop_vinfo);
4356 if (!ok)
4358 if (vect_print_dump_info (REPORT_DETAILS))
4359 fprintf (vect_dump, "bad data references.");
4360 destroy_loop_vec_info (loop_vinfo, true);
4361 return NULL;
4364 /* Classify all cross-iteration scalar data-flow cycles.
4365 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4367 vect_analyze_scalar_cycles (loop_vinfo);
4369 vect_pattern_recog (loop_vinfo);
4371 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4373 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4374 if (!ok)
4376 if (vect_print_dump_info (REPORT_DETAILS))
4377 fprintf (vect_dump, "unexpected pattern.");
4378 destroy_loop_vec_info (loop_vinfo, true);
4379 return NULL;
4382 /* Analyze the alignment of the data-refs in the loop.
4383 Fail if a data reference is found that cannot be vectorized. */
4385 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4386 if (!ok)
4388 if (vect_print_dump_info (REPORT_DETAILS))
4389 fprintf (vect_dump, "bad data alignment.");
4390 destroy_loop_vec_info (loop_vinfo, true);
4391 return NULL;
4394 ok = vect_determine_vectorization_factor (loop_vinfo);
4395 if (!ok)
4397 if (vect_print_dump_info (REPORT_DETAILS))
4398 fprintf (vect_dump, "can't determine vectorization factor.");
4399 destroy_loop_vec_info (loop_vinfo, true);
4400 return NULL;
4403 /* Analyze data dependences between the data-refs in the loop.
4404 FORNOW: fail at the first data dependence that we encounter. */
4406 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4407 if (!ok)
4409 if (vect_print_dump_info (REPORT_DETAILS))
4410 fprintf (vect_dump, "bad data dependence.");
4411 destroy_loop_vec_info (loop_vinfo, true);
4412 return NULL;
4415 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4416 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4418 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4419 if (!ok)
4421 if (vect_print_dump_info (REPORT_DETAILS))
4422 fprintf (vect_dump, "bad data access.");
4423 destroy_loop_vec_info (loop_vinfo, true);
4424 return NULL;
4427 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4428 It is important to call pruning after vect_analyze_data_ref_accesses,
4429 since we use grouping information gathered by interleaving analysis. */
4430 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
4431 if (!ok)
4433 if (vect_print_dump_info (REPORT_DETAILS))
4434 fprintf (vect_dump, "too long list of versioning for alias "
4435 "run-time tests.");
4436 destroy_loop_vec_info (loop_vinfo, true);
4437 return NULL;
4440 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4441 ok = vect_analyze_slp (loop_vinfo);
4442 if (ok)
4444 /* Decide which possible SLP instances to SLP. */
4445 vect_make_slp_decision (loop_vinfo);
4447 /* Find stmts that need to be both vectorized and SLPed. */
4448 vect_detect_hybrid_slp (loop_vinfo);
4451 /* This pass will decide on using loop versioning and/or loop peeling in
4452 order to enhance the alignment of data references in the loop. */
4454 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4455 if (!ok)
4457 if (vect_print_dump_info (REPORT_DETAILS))
4458 fprintf (vect_dump, "bad data alignment.");
4459 destroy_loop_vec_info (loop_vinfo, true);
4460 return NULL;
4463 /* Scan all the operations in the loop and make sure they are
4464 vectorizable. */
4466 ok = vect_analyze_operations (loop_vinfo);
4467 if (!ok)
4469 if (vect_print_dump_info (REPORT_DETAILS))
4470 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4471 destroy_loop_vec_info (loop_vinfo, true);
4472 return NULL;
4475 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4477 return loop_vinfo;