Daily bump.
[official-gcc.git] / gcc / tree-vect-analyze.c
blob86ca6b65ca4711ed41db97db3bb7480e62c8039e
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41 #include "toplev.h"
42 #include "recog.h"
44 /* Main analysis functions. */
45 static loop_vec_info vect_analyze_loop_form (struct loop *);
46 static bool vect_analyze_data_refs (loop_vec_info);
47 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
48 static void vect_analyze_scalar_cycles (loop_vec_info);
49 static bool vect_analyze_data_ref_accesses (loop_vec_info);
50 static bool vect_analyze_data_ref_dependences (loop_vec_info);
51 static bool vect_analyze_data_refs_alignment (loop_vec_info);
52 static bool vect_compute_data_refs_alignment (loop_vec_info);
53 static bool vect_enhance_data_refs_alignment (loop_vec_info);
54 static bool vect_analyze_operations (loop_vec_info);
55 static bool vect_determine_vectorization_factor (loop_vec_info);
57 /* Utility functions for the analyses. */
58 static bool exist_non_indexing_operands_for_use_p (tree, tree);
59 static tree vect_get_loop_niters (struct loop *, tree *);
60 static bool vect_analyze_data_ref_dependence
61 (struct data_dependence_relation *, loop_vec_info);
62 static bool vect_compute_data_ref_alignment (struct data_reference *);
63 static bool vect_analyze_data_ref_access (struct data_reference *);
64 static bool vect_can_advance_ivs_p (loop_vec_info);
65 static void vect_update_misalignment_for_peel
66 (struct data_reference *, struct data_reference *, int npeel);
68 /* Function vect_determine_vectorization_factor
70 Determine the vectorization factor (VF). VF is the number of data elements
71 that are operated upon in parallel in a single iteration of the vectorized
72 loop. For example, when vectorizing a loop that operates on 4byte elements,
73 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
74 elements can fit in a single vector register.
76 We currently support vectorization of loops in which all types operated upon
77 are of the same size. Therefore this function currently sets VF according to
78 the size of the types operated upon, and fails if there are multiple sizes
79 in the loop.
81 VF is also the factor by which the loop iterations are strip-mined, e.g.:
82 original loop:
83 for (i=0; i<N; i++){
84 a[i] = b[i] + c[i];
87 vectorized loop:
88 for (i=0; i<N; i+=VF){
89 a[i:VF] = b[i:VF] + c[i:VF];
93 static bool
94 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
96 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
97 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
98 int nbbs = loop->num_nodes;
99 block_stmt_iterator si;
100 unsigned int vectorization_factor = 0;
101 tree scalar_type;
102 tree phi;
103 tree vectype;
104 unsigned int nunits;
105 stmt_vec_info stmt_info;
106 int i;
108 if (vect_print_dump_info (REPORT_DETAILS))
109 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
111 for (i = 0; i < nbbs; i++)
113 basic_block bb = bbs[i];
115 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
117 stmt_info = vinfo_for_stmt (phi);
118 if (vect_print_dump_info (REPORT_DETAILS))
120 fprintf (vect_dump, "==> examining phi: ");
121 print_generic_expr (vect_dump, phi, TDF_SLIM);
124 gcc_assert (stmt_info);
126 if (STMT_VINFO_RELEVANT_P (stmt_info))
128 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
129 scalar_type = TREE_TYPE (PHI_RESULT (phi));
131 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "get vectype for scalar type: ");
134 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
137 vectype = get_vectype_for_scalar_type (scalar_type);
138 if (!vectype)
140 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
142 fprintf (vect_dump,
143 "not vectorized: unsupported data-type ");
144 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
146 return false;
148 STMT_VINFO_VECTYPE (stmt_info) = vectype;
150 if (vect_print_dump_info (REPORT_DETAILS))
152 fprintf (vect_dump, "vectype: ");
153 print_generic_expr (vect_dump, vectype, TDF_SLIM);
156 nunits = TYPE_VECTOR_SUBPARTS (vectype);
157 if (vect_print_dump_info (REPORT_DETAILS))
158 fprintf (vect_dump, "nunits = %d", nunits);
160 if (!vectorization_factor
161 || (nunits > vectorization_factor))
162 vectorization_factor = nunits;
166 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
168 tree stmt = bsi_stmt (si);
169 stmt_info = vinfo_for_stmt (stmt);
171 if (vect_print_dump_info (REPORT_DETAILS))
173 fprintf (vect_dump, "==> examining statement: ");
174 print_generic_expr (vect_dump, stmt, TDF_SLIM);
177 gcc_assert (stmt_info);
179 /* skip stmts which do not need to be vectorized. */
180 if (!STMT_VINFO_RELEVANT_P (stmt_info)
181 && !STMT_VINFO_LIVE_P (stmt_info))
183 if (vect_print_dump_info (REPORT_DETAILS))
184 fprintf (vect_dump, "skip.");
185 continue;
188 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
190 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
192 fprintf (vect_dump, "not vectorized: irregular stmt.");
193 print_generic_expr (vect_dump, stmt, TDF_SLIM);
195 return false;
198 if (!GIMPLE_STMT_P (stmt)
199 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
201 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
203 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
204 print_generic_expr (vect_dump, stmt, TDF_SLIM);
206 return false;
209 if (STMT_VINFO_VECTYPE (stmt_info))
211 /* The only case when a vectype had been already set is for stmts
212 that contain a dataref, or for "pattern-stmts" (stmts generated
213 by the vectorizer to represent/replace a certain idiom). */
214 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
215 || is_pattern_stmt_p (stmt_info));
216 vectype = STMT_VINFO_VECTYPE (stmt_info);
218 else
220 tree operation;
222 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
223 && !is_pattern_stmt_p (stmt_info));
225 /* We generally set the vectype according to the type of the
226 result (lhs).
227 For stmts whose result-type is different than the type of the
228 arguments (e.g. demotion, promotion), vectype will be reset
229 appropriately (later). Note that we have to visit the smallest
230 datatype in this function, because that determines the VF.
231 If the smallest datatype in the loop is present only as the
232 rhs of a promotion operation - we'd miss it here.
233 Such a case, where a variable of this datatype does not appear
234 in the lhs anywhere in the loop, can only occur if it's an
235 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
236 to have been optimized away by invariant motion. However, we
237 cannot rely on invariant motion to always take invariants out
238 of the loop, and so in the case of promotion we also have to
239 check the rhs. */
240 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
242 operation = GIMPLE_STMT_OPERAND (stmt, 1);
243 if (TREE_CODE (operation) == NOP_EXPR
244 || TREE_CODE (operation) == CONVERT_EXPR
245 || TREE_CODE (operation) == WIDEN_MULT_EXPR)
247 tree rhs_type = TREE_TYPE (TREE_OPERAND (operation, 0));
248 if (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)) <
249 TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)))
250 scalar_type = rhs_type;
253 if (vect_print_dump_info (REPORT_DETAILS))
255 fprintf (vect_dump, "get vectype for scalar type: ");
256 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
259 vectype = get_vectype_for_scalar_type (scalar_type);
260 if (!vectype)
262 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
264 fprintf (vect_dump,
265 "not vectorized: unsupported data-type ");
266 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
268 return false;
270 STMT_VINFO_VECTYPE (stmt_info) = vectype;
273 if (vect_print_dump_info (REPORT_DETAILS))
275 fprintf (vect_dump, "vectype: ");
276 print_generic_expr (vect_dump, vectype, TDF_SLIM);
279 nunits = TYPE_VECTOR_SUBPARTS (vectype);
280 if (vect_print_dump_info (REPORT_DETAILS))
281 fprintf (vect_dump, "nunits = %d", nunits);
283 if (!vectorization_factor
284 || (nunits > vectorization_factor))
285 vectorization_factor = nunits;
290 /* TODO: Analyze cost. Decide if worth while to vectorize. */
291 if (vect_print_dump_info (REPORT_DETAILS))
292 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
293 if (vectorization_factor <= 1)
295 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
296 fprintf (vect_dump, "not vectorized: unsupported data-type");
297 return false;
299 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
301 return true;
305 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
306 the number of created vector stmts depends on the unrolling factor). However,
307 the actual number of vector stmts for every SLP node depends on VF which is
308 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
309 In this function we assume that the inside costs calculated in
310 vect_model_xxx_cost are linear in ncopies. */
312 static void
313 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
315 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
316 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
317 slp_instance instance;
319 if (vect_print_dump_info (REPORT_SLP))
320 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
322 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
323 /* We assume that costs are linear in ncopies. */
324 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
325 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
329 /* Function vect_analyze_operations.
331 Scan the loop stmts and make sure they are all vectorizable. */
333 static bool
334 vect_analyze_operations (loop_vec_info loop_vinfo)
336 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
337 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
338 int nbbs = loop->num_nodes;
339 block_stmt_iterator si;
340 unsigned int vectorization_factor = 0;
341 int i;
342 bool ok;
343 tree phi;
344 stmt_vec_info stmt_info;
345 bool need_to_vectorize = false;
346 int min_profitable_iters;
347 int min_scalar_loop_bound;
348 unsigned int th;
349 bool only_slp_in_loop = true;
351 if (vect_print_dump_info (REPORT_DETAILS))
352 fprintf (vect_dump, "=== vect_analyze_operations ===");
354 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
355 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
357 for (i = 0; i < nbbs; i++)
359 basic_block bb = bbs[i];
361 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
363 ok = true;
365 stmt_info = vinfo_for_stmt (phi);
366 if (vect_print_dump_info (REPORT_DETAILS))
368 fprintf (vect_dump, "examining phi: ");
369 print_generic_expr (vect_dump, phi, TDF_SLIM);
372 if (! is_loop_header_bb_p (bb))
374 /* inner-loop loop-closed exit phi in outer-loop vectorization
375 (i.e. a phi in the tail of the outer-loop).
376 FORNOW: we currently don't support the case that these phis
377 are not used in the outerloop, cause this case requires
378 to actually do something here. */
379 if (!STMT_VINFO_RELEVANT_P (stmt_info)
380 || STMT_VINFO_LIVE_P (stmt_info))
382 if (vect_print_dump_info (REPORT_DETAILS))
383 fprintf (vect_dump,
384 "Unsupported loop-closed phi in outer-loop.");
385 return false;
387 continue;
390 gcc_assert (stmt_info);
392 if (STMT_VINFO_LIVE_P (stmt_info))
394 /* FORNOW: not yet supported. */
395 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
396 fprintf (vect_dump, "not vectorized: value used after loop.");
397 return false;
400 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
401 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
403 /* A scalar-dependence cycle that we don't support. */
404 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
405 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
406 return false;
409 if (STMT_VINFO_RELEVANT_P (stmt_info))
411 need_to_vectorize = true;
412 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
413 ok = vectorizable_induction (phi, NULL, NULL);
416 if (!ok)
418 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
420 fprintf (vect_dump,
421 "not vectorized: relevant phi not supported: ");
422 print_generic_expr (vect_dump, phi, TDF_SLIM);
424 return false;
428 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
430 tree stmt = bsi_stmt (si);
431 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
432 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
434 if (vect_print_dump_info (REPORT_DETAILS))
436 fprintf (vect_dump, "==> examining statement: ");
437 print_generic_expr (vect_dump, stmt, TDF_SLIM);
440 gcc_assert (stmt_info);
442 /* skip stmts which do not need to be vectorized.
443 this is expected to include:
444 - the COND_EXPR which is the loop exit condition
445 - any LABEL_EXPRs in the loop
446 - computations that are used only for array indexing or loop
447 control */
449 if (!STMT_VINFO_RELEVANT_P (stmt_info)
450 && !STMT_VINFO_LIVE_P (stmt_info))
452 if (vect_print_dump_info (REPORT_DETAILS))
453 fprintf (vect_dump, "irrelevant.");
454 continue;
457 switch (STMT_VINFO_DEF_TYPE (stmt_info))
459 case vect_loop_def:
460 break;
462 case vect_reduction_def:
463 gcc_assert (relevance == vect_used_in_outer
464 || relevance == vect_used_in_outer_by_reduction
465 || relevance == vect_unused_in_loop);
466 break;
468 case vect_induction_def:
469 case vect_constant_def:
470 case vect_invariant_def:
471 case vect_unknown_def_type:
472 default:
473 gcc_unreachable ();
476 if (STMT_VINFO_RELEVANT_P (stmt_info))
478 gcc_assert (GIMPLE_STMT_P (stmt)
479 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
480 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
481 need_to_vectorize = true;
484 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
485 || vectorizable_type_demotion (stmt, NULL, NULL)
486 || vectorizable_conversion (stmt, NULL, NULL, NULL)
487 || vectorizable_operation (stmt, NULL, NULL, NULL)
488 || vectorizable_assignment (stmt, NULL, NULL, NULL)
489 || vectorizable_load (stmt, NULL, NULL, NULL)
490 || vectorizable_call (stmt, NULL, NULL)
491 || vectorizable_store (stmt, NULL, NULL, NULL)
492 || vectorizable_condition (stmt, NULL, NULL)
493 || vectorizable_reduction (stmt, NULL, NULL));
495 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
496 need extra handling, except for vectorizable reductions. */
497 if (STMT_VINFO_LIVE_P (stmt_info)
498 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
499 ok |= vectorizable_live_operation (stmt, NULL, NULL);
501 if (!ok)
503 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
505 fprintf (vect_dump, "not vectorized: stmt not supported: ");
506 print_generic_expr (vect_dump, stmt, TDF_SLIM);
508 return false;
511 if (!PURE_SLP_STMT (stmt_info))
513 /* STMT needs loop-based vectorization. */
514 only_slp_in_loop = false;
516 /* Groups of strided accesses whose size is not a power of 2 are
517 not vectorizable yet using loop-vectorization. Therefore, if
518 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
519 both SLPed and loop-based vectorzed), the loop cannot be
520 vectorized. */
521 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
522 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
523 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
525 if (vect_print_dump_info (REPORT_DETAILS))
527 fprintf (vect_dump, "not vectorized: the size of group "
528 "of strided accesses is not a power of 2");
529 print_generic_expr (vect_dump, stmt, TDF_SLIM);
531 return false;
534 } /* stmts in bb */
535 } /* bbs */
537 /* All operations in the loop are either irrelevant (deal with loop
538 control, or dead), or only used outside the loop and can be moved
539 out of the loop (e.g. invariants, inductions). The loop can be
540 optimized away by scalar optimizations. We're better off not
541 touching this loop. */
542 if (!need_to_vectorize)
544 if (vect_print_dump_info (REPORT_DETAILS))
545 fprintf (vect_dump,
546 "All the computation can be taken out of the loop.");
547 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
548 fprintf (vect_dump,
549 "not vectorized: redundant loop. no profit to vectorize.");
550 return false;
553 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
554 vectorization factor of the loop is the unrolling factor required by the
555 SLP instances. If that unrolling factor is 1, we say, that we perform
556 pure SLP on loop - cross iteration parallelism is not exploited. */
557 if (only_slp_in_loop)
558 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
559 else
560 vectorization_factor = least_common_multiple (vectorization_factor,
561 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
563 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
565 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
566 && vect_print_dump_info (REPORT_DETAILS))
567 fprintf (vect_dump,
568 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
569 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
571 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
572 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
574 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
575 fprintf (vect_dump, "not vectorized: iteration count too small.");
576 if (vect_print_dump_info (REPORT_DETAILS))
577 fprintf (vect_dump,"not vectorized: iteration count smaller than "
578 "vectorization factor.");
579 return false;
582 /* Analyze cost. Decide if worth while to vectorize. */
584 /* Once VF is set, SLP costs should be updated since the number of created
585 vector stmts depends on VF. */
586 vect_update_slp_costs_according_to_vf (loop_vinfo);
588 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
589 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
590 if (min_profitable_iters < 0)
592 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
593 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
594 if (vect_print_dump_info (REPORT_DETAILS))
595 fprintf (vect_dump, "not vectorized: vector version will never be "
596 "profitable.");
597 return false;
600 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
601 * vectorization_factor) - 1);
603 /* Use the cost model only if it is more conservative than user specified
604 threshold. */
606 th = (unsigned) min_scalar_loop_bound;
607 if (min_profitable_iters
608 && (!min_scalar_loop_bound
609 || min_profitable_iters > min_scalar_loop_bound))
610 th = (unsigned) min_profitable_iters;
612 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
613 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
615 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
616 fprintf (vect_dump, "not vectorized: vectorization not "
617 "profitable.");
618 if (vect_print_dump_info (REPORT_DETAILS))
619 fprintf (vect_dump, "not vectorized: iteration count smaller than "
620 "user specified loop bound parameter or minimum "
621 "profitable iterations (whichever is more conservative).");
622 return false;
625 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
626 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
627 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
629 if (vect_print_dump_info (REPORT_DETAILS))
630 fprintf (vect_dump, "epilog loop required.");
631 if (!vect_can_advance_ivs_p (loop_vinfo))
633 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
634 fprintf (vect_dump,
635 "not vectorized: can't create epilog loop 1.");
636 return false;
638 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
640 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
641 fprintf (vect_dump,
642 "not vectorized: can't create epilog loop 2.");
643 return false;
647 return true;
651 /* Function exist_non_indexing_operands_for_use_p
653 USE is one of the uses attached to STMT. Check if USE is
654 used in STMT for anything other than indexing an array. */
656 static bool
657 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
659 tree operand;
660 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
662 /* USE corresponds to some operand in STMT. If there is no data
663 reference in STMT, then any operand that corresponds to USE
664 is not indexing an array. */
665 if (!STMT_VINFO_DATA_REF (stmt_info))
666 return true;
668 /* STMT has a data_ref. FORNOW this means that its of one of
669 the following forms:
670 -1- ARRAY_REF = var
671 -2- var = ARRAY_REF
672 (This should have been verified in analyze_data_refs).
674 'var' in the second case corresponds to a def, not a use,
675 so USE cannot correspond to any operands that are not used
676 for array indexing.
678 Therefore, all we need to check is if STMT falls into the
679 first case, and whether var corresponds to USE. */
681 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
682 return false;
684 operand = GIMPLE_STMT_OPERAND (stmt, 1);
686 if (TREE_CODE (operand) != SSA_NAME)
687 return false;
689 if (operand == use)
690 return true;
692 return false;
696 /* Function vect_analyze_scalar_cycles_1.
698 Examine the cross iteration def-use cycles of scalar variables
699 in LOOP. LOOP_VINFO represents the loop that is noe being
700 considered for vectorization (can be LOOP, or an outer-loop
701 enclosing LOOP). */
703 static void
704 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
706 tree phi;
707 basic_block bb = loop->header;
708 tree dumy;
709 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
711 if (vect_print_dump_info (REPORT_DETAILS))
712 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
714 /* First - identify all inductions. */
715 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
717 tree access_fn = NULL;
718 tree def = PHI_RESULT (phi);
719 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
721 if (vect_print_dump_info (REPORT_DETAILS))
723 fprintf (vect_dump, "Analyze phi: ");
724 print_generic_expr (vect_dump, phi, TDF_SLIM);
727 /* Skip virtual phi's. The data dependences that are associated with
728 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
729 if (!is_gimple_reg (SSA_NAME_VAR (def)))
730 continue;
732 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
734 /* Analyze the evolution function. */
735 access_fn = analyze_scalar_evolution (loop, def);
736 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
738 fprintf (vect_dump, "Access function of PHI: ");
739 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
742 if (!access_fn
743 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
745 VEC_safe_push (tree, heap, worklist, phi);
746 continue;
749 if (vect_print_dump_info (REPORT_DETAILS))
750 fprintf (vect_dump, "Detected induction.");
751 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
755 /* Second - identify all reductions. */
756 while (VEC_length (tree, worklist) > 0)
758 tree phi = VEC_pop (tree, worklist);
759 tree def = PHI_RESULT (phi);
760 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
761 tree reduc_stmt;
763 if (vect_print_dump_info (REPORT_DETAILS))
765 fprintf (vect_dump, "Analyze phi: ");
766 print_generic_expr (vect_dump, phi, TDF_SLIM);
769 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
770 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
772 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
773 if (reduc_stmt)
775 if (vect_print_dump_info (REPORT_DETAILS))
776 fprintf (vect_dump, "Detected reduction.");
777 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
778 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
779 vect_reduction_def;
781 else
782 if (vect_print_dump_info (REPORT_DETAILS))
783 fprintf (vect_dump, "Unknown def-use cycle pattern.");
786 VEC_free (tree, heap, worklist);
787 return;
791 /* Function vect_analyze_scalar_cycles.
793 Examine the cross iteration def-use cycles of scalar variables, by
794 analyzing the loop-header PHIs of scalar variables; Classify each
795 cycle as one of the following: invariant, induction, reduction, unknown.
796 We do that for the loop represented by LOOP_VINFO, and also to its
797 inner-loop, if exists.
798 Examples for scalar cycles:
800 Example1: reduction:
802 loop1:
803 for (i=0; i<N; i++)
804 sum += a[i];
806 Example2: induction:
808 loop2:
809 for (i=0; i<N; i++)
810 a[i] = i; */
812 static void
813 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
815 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
817 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
819 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
820 Reductions in such inner-loop therefore have different properties than
821 the reductions in the nest that gets vectorized:
822 1. When vectorized, they are executed in the same order as in the original
823 scalar loop, so we can't change the order of computation when
824 vectorizing them.
825 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
826 current checks are too strict. */
828 if (loop->inner)
829 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
833 /* Function vect_insert_into_interleaving_chain.
835 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
837 static void
838 vect_insert_into_interleaving_chain (struct data_reference *dra,
839 struct data_reference *drb)
841 tree prev, next, next_init;
842 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
843 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
845 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
846 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
847 while (next)
849 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
850 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
852 /* Insert here. */
853 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
854 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
855 return;
857 prev = next;
858 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
861 /* We got to the end of the list. Insert here. */
862 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
863 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
867 /* Function vect_update_interleaving_chain.
869 For two data-refs DRA and DRB that are a part of a chain interleaved data
870 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
872 There are four possible cases:
873 1. New stmts - both DRA and DRB are not a part of any chain:
874 FIRST_DR = DRB
875 NEXT_DR (DRB) = DRA
876 2. DRB is a part of a chain and DRA is not:
877 no need to update FIRST_DR
878 no need to insert DRB
879 insert DRA according to init
880 3. DRA is a part of a chain and DRB is not:
881 if (init of FIRST_DR > init of DRB)
882 FIRST_DR = DRB
883 NEXT(FIRST_DR) = previous FIRST_DR
884 else
885 insert DRB according to its init
886 4. both DRA and DRB are in some interleaving chains:
887 choose the chain with the smallest init of FIRST_DR
888 insert the nodes of the second chain into the first one. */
890 static void
891 vect_update_interleaving_chain (struct data_reference *drb,
892 struct data_reference *dra)
894 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
895 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
896 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
897 tree node, prev, next, node_init, first_stmt;
899 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
900 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
902 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
903 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
904 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
905 return;
908 /* 2. DRB is a part of a chain and DRA is not. */
909 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
911 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
912 /* Insert DRA into the chain of DRB. */
913 vect_insert_into_interleaving_chain (dra, drb);
914 return;
917 /* 3. DRA is a part of a chain and DRB is not. */
918 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
920 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
921 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
922 old_first_stmt)));
923 tree tmp;
925 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
927 /* DRB's init is smaller than the init of the stmt previously marked
928 as the first stmt of the interleaving chain of DRA. Therefore, we
929 update FIRST_STMT and put DRB in the head of the list. */
930 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
931 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
933 /* Update all the stmts in the list to point to the new FIRST_STMT. */
934 tmp = old_first_stmt;
935 while (tmp)
937 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
938 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
941 else
943 /* Insert DRB in the list of DRA. */
944 vect_insert_into_interleaving_chain (drb, dra);
945 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
947 return;
950 /* 4. both DRA and DRB are in some interleaving chains. */
951 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
952 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
953 if (first_a == first_b)
954 return;
955 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
956 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
958 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
960 /* Insert the nodes of DRA chain into the DRB chain.
961 After inserting a node, continue from this node of the DRB chain (don't
962 start from the beginning. */
963 node = DR_GROUP_FIRST_DR (stmtinfo_a);
964 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
965 first_stmt = first_b;
967 else
969 /* Insert the nodes of DRB chain into the DRA chain.
970 After inserting a node, continue from this node of the DRA chain (don't
971 start from the beginning. */
972 node = DR_GROUP_FIRST_DR (stmtinfo_b);
973 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
974 first_stmt = first_a;
977 while (node)
979 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
980 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
981 while (next)
983 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
984 if (tree_int_cst_compare (next_init, node_init) > 0)
986 /* Insert here. */
987 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
988 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
989 prev = node;
990 break;
992 prev = next;
993 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
995 if (!next)
997 /* We got to the end of the list. Insert here. */
998 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
999 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
1000 prev = node;
1002 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
1003 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1008 /* Function vect_equal_offsets.
1010 Check if OFFSET1 and OFFSET2 are identical expressions. */
1012 static bool
1013 vect_equal_offsets (tree offset1, tree offset2)
1015 bool res0, res1;
1017 STRIP_NOPS (offset1);
1018 STRIP_NOPS (offset2);
1020 if (offset1 == offset2)
1021 return true;
1023 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1024 || !BINARY_CLASS_P (offset1)
1025 || !BINARY_CLASS_P (offset2))
1026 return false;
1028 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1029 TREE_OPERAND (offset2, 0));
1030 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1031 TREE_OPERAND (offset2, 1));
1033 return (res0 && res1);
1037 /* Function vect_check_interleaving.
1039 Check if DRA and DRB are a part of interleaving. In case they are, insert
1040 DRA and DRB in an interleaving chain. */
1042 static void
1043 vect_check_interleaving (struct data_reference *dra,
1044 struct data_reference *drb)
1046 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1048 /* Check that the data-refs have same first location (except init) and they
1049 are both either store or load (not load and store). */
1050 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1051 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1052 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1053 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1054 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1055 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1056 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1057 || DR_IS_READ (dra) != DR_IS_READ (drb))
1058 return;
1060 /* Check:
1061 1. data-refs are of the same type
1062 2. their steps are equal
1063 3. the step is greater than the difference between data-refs' inits */
1064 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1065 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1067 if (type_size_a != type_size_b
1068 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
1069 return;
1071 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1072 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1073 step = TREE_INT_CST_LOW (DR_STEP (dra));
1075 if (init_a > init_b)
1077 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1078 and DRB is accessed before DRA. */
1079 diff_mod_size = (init_a - init_b) % type_size_a;
1081 if ((init_a - init_b) > step)
1082 return;
1084 if (diff_mod_size == 0)
1086 vect_update_interleaving_chain (drb, dra);
1087 if (vect_print_dump_info (REPORT_DR_DETAILS))
1089 fprintf (vect_dump, "Detected interleaving ");
1090 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1091 fprintf (vect_dump, " and ");
1092 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1094 return;
1097 else
1099 /* If init_b == init_a + the size of the type * k, we have an
1100 interleaving, and DRA is accessed before DRB. */
1101 diff_mod_size = (init_b - init_a) % type_size_a;
1103 if ((init_b - init_a) > step)
1104 return;
1106 if (diff_mod_size == 0)
1108 vect_update_interleaving_chain (dra, drb);
1109 if (vect_print_dump_info (REPORT_DR_DETAILS))
1111 fprintf (vect_dump, "Detected interleaving ");
1112 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1113 fprintf (vect_dump, " and ");
1114 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1116 return;
1121 /* Check if data references pointed by DR_I and DR_J are same or
1122 belong to same interleaving group. Return FALSE if drs are
1123 different, otherwise return TRUE. */
1125 static bool
1126 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
1128 tree stmt_i = DR_STMT (dr_i);
1129 tree stmt_j = DR_STMT (dr_j);
1131 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
1132 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1133 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
1134 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1135 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
1136 return true;
1137 else
1138 return false;
1141 /* If address ranges represented by DDR_I and DDR_J are equal,
1142 return TRUE, otherwise return FALSE. */
1144 static bool
1145 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
1147 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
1148 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
1149 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
1150 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
1151 return true;
1152 else
1153 return false;
1156 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1157 tested at run-time. Return TRUE if DDR was successfully inserted.
1158 Return false if versioning is not supported. */
1160 static bool
1161 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1163 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1165 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
1166 return false;
1168 if (vect_print_dump_info (REPORT_DR_DETAILS))
1170 fprintf (vect_dump, "mark for run-time aliasing test between ");
1171 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1172 fprintf (vect_dump, " and ");
1173 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1176 if (optimize_size)
1178 if (vect_print_dump_info (REPORT_DR_DETAILS))
1179 fprintf (vect_dump, "versioning not supported when optimizing for size.");
1180 return false;
1183 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1184 if (loop->inner)
1186 if (vect_print_dump_info (REPORT_DR_DETAILS))
1187 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1188 return false;
1191 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1192 return true;
1195 /* Function vect_analyze_data_ref_dependence.
1197 Return TRUE if there (might) exist a dependence between a memory-reference
1198 DRA and a memory-reference DRB. When versioning for alias may check a
1199 dependence at run-time, return FALSE. */
1201 static bool
1202 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1203 loop_vec_info loop_vinfo)
1205 unsigned int i;
1206 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1207 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1208 struct data_reference *dra = DDR_A (ddr);
1209 struct data_reference *drb = DDR_B (ddr);
1210 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1211 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1212 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1213 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1214 lambda_vector dist_v;
1215 unsigned int loop_depth;
1217 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1219 /* Independent data accesses. */
1220 vect_check_interleaving (dra, drb);
1221 return false;
1224 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1225 return false;
1227 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1229 if (vect_print_dump_info (REPORT_DR_DETAILS))
1231 fprintf (vect_dump,
1232 "versioning for alias required: can't determine dependence between ");
1233 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1234 fprintf (vect_dump, " and ");
1235 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1237 /* Add to list of ddrs that need to be tested at run-time. */
1238 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1241 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1243 if (vect_print_dump_info (REPORT_DR_DETAILS))
1245 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1246 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1247 fprintf (vect_dump, " and ");
1248 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1250 /* Add to list of ddrs that need to be tested at run-time. */
1251 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1254 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1255 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1257 int dist = dist_v[loop_depth];
1259 if (vect_print_dump_info (REPORT_DR_DETAILS))
1260 fprintf (vect_dump, "dependence distance = %d.", dist);
1262 /* Same loop iteration. */
1263 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1265 /* Two references with distance zero have the same alignment. */
1266 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1267 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1268 if (vect_print_dump_info (REPORT_ALIGNMENT))
1269 fprintf (vect_dump, "accesses have the same alignment.");
1270 if (vect_print_dump_info (REPORT_DR_DETAILS))
1272 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1273 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1274 fprintf (vect_dump, " and ");
1275 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1278 /* For interleaving, mark that there is a read-write dependency if
1279 necessary. We check before that one of the data-refs is store. */
1280 if (DR_IS_READ (dra))
1281 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1282 else
1284 if (DR_IS_READ (drb))
1285 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1288 continue;
1291 if (abs (dist) >= vectorization_factor
1292 || (dist > 0 && DDR_REVERSED_P (ddr)))
1294 /* Dependence distance does not create dependence, as far as
1295 vectorization is concerned, in this case. If DDR_REVERSED_P the
1296 order of the data-refs in DDR was reversed (to make distance
1297 vector positive), and the actual distance is negative. */
1298 if (vect_print_dump_info (REPORT_DR_DETAILS))
1299 fprintf (vect_dump, "dependence distance >= VF or negative.");
1300 continue;
1303 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1305 fprintf (vect_dump,
1306 "not vectorized, possible dependence "
1307 "between data-refs ");
1308 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1309 fprintf (vect_dump, " and ");
1310 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1313 return true;
1316 return false;
1319 /* Function vect_analyze_data_ref_dependences.
1321 Examine all the data references in the loop, and make sure there do not
1322 exist any data dependences between them. */
1324 static bool
1325 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1327 unsigned int i;
1328 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1329 struct data_dependence_relation *ddr;
1331 if (vect_print_dump_info (REPORT_DETAILS))
1332 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1334 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1335 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1336 return false;
1338 return true;
1342 /* Function vect_compute_data_ref_alignment
1344 Compute the misalignment of the data reference DR.
1346 Output:
1347 1. If during the misalignment computation it is found that the data reference
1348 cannot be vectorized then false is returned.
1349 2. DR_MISALIGNMENT (DR) is defined.
1351 FOR NOW: No analysis is actually performed. Misalignment is calculated
1352 only for trivial cases. TODO. */
1354 static bool
1355 vect_compute_data_ref_alignment (struct data_reference *dr)
1357 tree stmt = DR_STMT (dr);
1358 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1359 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1360 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1361 tree ref = DR_REF (dr);
1362 tree vectype;
1363 tree base, base_addr;
1364 bool base_aligned;
1365 tree misalign;
1366 tree aligned_to, alignment;
1368 if (vect_print_dump_info (REPORT_DETAILS))
1369 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1371 /* Initialize misalignment to unknown. */
1372 SET_DR_MISALIGNMENT (dr, -1);
1374 misalign = DR_INIT (dr);
1375 aligned_to = DR_ALIGNED_TO (dr);
1376 base_addr = DR_BASE_ADDRESS (dr);
1378 /* In case the dataref is in an inner-loop of the loop that is being
1379 vectorized (LOOP), we use the base and misalignment information
1380 relative to the outer-loop (LOOP). This is ok only if the misalignment
1381 stays the same throughout the execution of the inner-loop, which is why
1382 we have to check that the stride of the dataref in the inner-loop evenly
1383 divides by the vector size. */
1384 if (nested_in_vect_loop_p (loop, stmt))
1386 tree step = DR_STEP (dr);
1387 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1389 if (dr_step % UNITS_PER_SIMD_WORD == 0)
1391 if (vect_print_dump_info (REPORT_ALIGNMENT))
1392 fprintf (vect_dump, "inner step divides the vector-size.");
1393 misalign = STMT_VINFO_DR_INIT (stmt_info);
1394 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1395 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1397 else
1399 if (vect_print_dump_info (REPORT_ALIGNMENT))
1400 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1401 misalign = NULL_TREE;
1405 base = build_fold_indirect_ref (base_addr);
1406 vectype = STMT_VINFO_VECTYPE (stmt_info);
1407 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1409 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1410 || !misalign)
1412 if (vect_print_dump_info (REPORT_ALIGNMENT))
1414 fprintf (vect_dump, "Unknown alignment for access: ");
1415 print_generic_expr (vect_dump, base, TDF_SLIM);
1417 return true;
1420 if ((DECL_P (base)
1421 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1422 alignment) >= 0)
1423 || (TREE_CODE (base_addr) == SSA_NAME
1424 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1425 TREE_TYPE (base_addr)))),
1426 alignment) >= 0))
1427 base_aligned = true;
1428 else
1429 base_aligned = false;
1431 if (!base_aligned)
1433 /* Do not change the alignment of global variables if
1434 flag_section_anchors is enabled. */
1435 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1436 || (TREE_STATIC (base) && flag_section_anchors))
1438 if (vect_print_dump_info (REPORT_DETAILS))
1440 fprintf (vect_dump, "can't force alignment of ref: ");
1441 print_generic_expr (vect_dump, ref, TDF_SLIM);
1443 return true;
1446 /* Force the alignment of the decl.
1447 NOTE: This is the only change to the code we make during
1448 the analysis phase, before deciding to vectorize the loop. */
1449 if (vect_print_dump_info (REPORT_DETAILS))
1450 fprintf (vect_dump, "force alignment");
1451 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1452 DECL_USER_ALIGN (base) = 1;
1455 /* At this point we assume that the base is aligned. */
1456 gcc_assert (base_aligned
1457 || (TREE_CODE (base) == VAR_DECL
1458 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1460 /* Modulo alignment. */
1461 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1463 if (!host_integerp (misalign, 1))
1465 /* Negative or overflowed misalignment value. */
1466 if (vect_print_dump_info (REPORT_DETAILS))
1467 fprintf (vect_dump, "unexpected misalign value");
1468 return false;
1471 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1473 if (vect_print_dump_info (REPORT_DETAILS))
1475 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1476 print_generic_expr (vect_dump, ref, TDF_SLIM);
1479 return true;
1483 /* Function vect_compute_data_refs_alignment
1485 Compute the misalignment of data references in the loop.
1486 Return FALSE if a data reference is found that cannot be vectorized. */
1488 static bool
1489 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1491 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1492 struct data_reference *dr;
1493 unsigned int i;
1495 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1496 if (!vect_compute_data_ref_alignment (dr))
1497 return false;
1499 return true;
1503 /* Function vect_update_misalignment_for_peel
1505 DR - the data reference whose misalignment is to be adjusted.
1506 DR_PEEL - the data reference whose misalignment is being made
1507 zero in the vector loop by the peel.
1508 NPEEL - the number of iterations in the peel loop if the misalignment
1509 of DR_PEEL is known at compile time. */
1511 static void
1512 vect_update_misalignment_for_peel (struct data_reference *dr,
1513 struct data_reference *dr_peel, int npeel)
1515 unsigned int i;
1516 VEC(dr_p,heap) *same_align_drs;
1517 struct data_reference *current_dr;
1518 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1519 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1520 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1521 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1523 /* For interleaved data accesses the step in the loop must be multiplied by
1524 the size of the interleaving group. */
1525 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1526 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1527 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1528 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1530 /* It can be assumed that the data refs with the same alignment as dr_peel
1531 are aligned in the vector loop. */
1532 same_align_drs
1533 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1534 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1536 if (current_dr != dr)
1537 continue;
1538 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1539 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1540 SET_DR_MISALIGNMENT (dr, 0);
1541 return;
1544 if (known_alignment_for_access_p (dr)
1545 && known_alignment_for_access_p (dr_peel))
1547 int misal = DR_MISALIGNMENT (dr);
1548 misal += npeel * dr_size;
1549 misal %= UNITS_PER_SIMD_WORD;
1550 SET_DR_MISALIGNMENT (dr, misal);
1551 return;
1554 if (vect_print_dump_info (REPORT_DETAILS))
1555 fprintf (vect_dump, "Setting misalignment to -1.");
1556 SET_DR_MISALIGNMENT (dr, -1);
1560 /* Function vect_verify_datarefs_alignment
1562 Return TRUE if all data references in the loop can be
1563 handled with respect to alignment. */
1565 static bool
1566 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1568 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1569 struct data_reference *dr;
1570 enum dr_alignment_support supportable_dr_alignment;
1571 unsigned int i;
1573 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1575 tree stmt = DR_STMT (dr);
1576 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1578 /* For interleaving, only the alignment of the first access matters. */
1579 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1580 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1581 continue;
1583 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1584 if (!supportable_dr_alignment)
1586 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1588 if (DR_IS_READ (dr))
1589 fprintf (vect_dump,
1590 "not vectorized: unsupported unaligned load.");
1591 else
1592 fprintf (vect_dump,
1593 "not vectorized: unsupported unaligned store.");
1595 return false;
1597 if (supportable_dr_alignment != dr_aligned
1598 && vect_print_dump_info (REPORT_ALIGNMENT))
1599 fprintf (vect_dump, "Vectorizing an unaligned access.");
1601 return true;
1605 /* Function vector_alignment_reachable_p
1607 Return true if vector alignment for DR is reachable by peeling
1608 a few loop iterations. Return false otherwise. */
1610 static bool
1611 vector_alignment_reachable_p (struct data_reference *dr)
1613 tree stmt = DR_STMT (dr);
1614 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1615 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1617 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1619 /* For interleaved access we peel only if number of iterations in
1620 the prolog loop ({VF - misalignment}), is a multiple of the
1621 number of the interleaved accesses. */
1622 int elem_size, mis_in_elements;
1623 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1625 /* FORNOW: handle only known alignment. */
1626 if (!known_alignment_for_access_p (dr))
1627 return false;
1629 elem_size = UNITS_PER_SIMD_WORD / nelements;
1630 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1632 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1633 return false;
1636 /* If misalignment is known at the compile time then allow peeling
1637 only if natural alignment is reachable through peeling. */
1638 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1640 HOST_WIDE_INT elmsize =
1641 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1642 if (vect_print_dump_info (REPORT_DETAILS))
1644 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1645 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1647 if (DR_MISALIGNMENT (dr) % elmsize)
1649 if (vect_print_dump_info (REPORT_DETAILS))
1650 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1651 return false;
1655 if (!known_alignment_for_access_p (dr))
1657 tree type = (TREE_TYPE (DR_REF (dr)));
1658 tree ba = DR_BASE_OBJECT (dr);
1659 bool is_packed = false;
1661 if (ba)
1662 is_packed = contains_packed_reference (ba);
1664 if (vect_print_dump_info (REPORT_DETAILS))
1665 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1666 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1667 return true;
1668 else
1669 return false;
1672 return true;
1675 /* Function vect_enhance_data_refs_alignment
1677 This pass will use loop versioning and loop peeling in order to enhance
1678 the alignment of data references in the loop.
1680 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1681 original loop is to be vectorized; Any other loops that are created by
1682 the transformations performed in this pass - are not supposed to be
1683 vectorized. This restriction will be relaxed.
1685 This pass will require a cost model to guide it whether to apply peeling
1686 or versioning or a combination of the two. For example, the scheme that
1687 intel uses when given a loop with several memory accesses, is as follows:
1688 choose one memory access ('p') which alignment you want to force by doing
1689 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1690 other accesses are not necessarily aligned, or (2) use loop versioning to
1691 generate one loop in which all accesses are aligned, and another loop in
1692 which only 'p' is necessarily aligned.
1694 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1695 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1696 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1698 Devising a cost model is the most critical aspect of this work. It will
1699 guide us on which access to peel for, whether to use loop versioning, how
1700 many versions to create, etc. The cost model will probably consist of
1701 generic considerations as well as target specific considerations (on
1702 powerpc for example, misaligned stores are more painful than misaligned
1703 loads).
1705 Here are the general steps involved in alignment enhancements:
1707 -- original loop, before alignment analysis:
1708 for (i=0; i<N; i++){
1709 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1710 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1713 -- After vect_compute_data_refs_alignment:
1714 for (i=0; i<N; i++){
1715 x = q[i]; # DR_MISALIGNMENT(q) = 3
1716 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1719 -- Possibility 1: we do loop versioning:
1720 if (p is aligned) {
1721 for (i=0; i<N; i++){ # loop 1A
1722 x = q[i]; # DR_MISALIGNMENT(q) = 3
1723 p[i] = y; # DR_MISALIGNMENT(p) = 0
1726 else {
1727 for (i=0; i<N; i++){ # loop 1B
1728 x = q[i]; # DR_MISALIGNMENT(q) = 3
1729 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1733 -- Possibility 2: we do loop peeling:
1734 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1735 x = q[i];
1736 p[i] = y;
1738 for (i = 3; i < N; i++){ # loop 2A
1739 x = q[i]; # DR_MISALIGNMENT(q) = 0
1740 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1743 -- Possibility 3: combination of loop peeling and versioning:
1744 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1745 x = q[i];
1746 p[i] = y;
1748 if (p is aligned) {
1749 for (i = 3; i<N; i++){ # loop 3A
1750 x = q[i]; # DR_MISALIGNMENT(q) = 0
1751 p[i] = y; # DR_MISALIGNMENT(p) = 0
1754 else {
1755 for (i = 3; i<N; i++){ # loop 3B
1756 x = q[i]; # DR_MISALIGNMENT(q) = 0
1757 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1761 These loops are later passed to loop_transform to be vectorized. The
1762 vectorizer will use the alignment information to guide the transformation
1763 (whether to generate regular loads/stores, or with special handling for
1764 misalignment). */
1766 static bool
1767 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1769 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1770 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1771 enum dr_alignment_support supportable_dr_alignment;
1772 struct data_reference *dr0 = NULL;
1773 struct data_reference *dr;
1774 unsigned int i;
1775 bool do_peeling = false;
1776 bool do_versioning = false;
1777 bool stat;
1778 tree stmt;
1779 stmt_vec_info stmt_info;
1780 int vect_versioning_for_alias_required;
1782 if (vect_print_dump_info (REPORT_DETAILS))
1783 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1785 /* While cost model enhancements are expected in the future, the high level
1786 view of the code at this time is as follows:
1788 A) If there is a misaligned write then see if peeling to align this write
1789 can make all data references satisfy vect_supportable_dr_alignment.
1790 If so, update data structures as needed and return true. Note that
1791 at this time vect_supportable_dr_alignment is known to return false
1792 for a misaligned write.
1794 B) If peeling wasn't possible and there is a data reference with an
1795 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1796 then see if loop versioning checks can be used to make all data
1797 references satisfy vect_supportable_dr_alignment. If so, update
1798 data structures as needed and return true.
1800 C) If neither peeling nor versioning were successful then return false if
1801 any data reference does not satisfy vect_supportable_dr_alignment.
1803 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1805 Note, Possibility 3 above (which is peeling and versioning together) is not
1806 being done at this time. */
1808 /* (1) Peeling to force alignment. */
1810 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1811 Considerations:
1812 + How many accesses will become aligned due to the peeling
1813 - How many accesses will become unaligned due to the peeling,
1814 and the cost of misaligned accesses.
1815 - The cost of peeling (the extra runtime checks, the increase
1816 in code size).
1818 The scheme we use FORNOW: peel to force the alignment of the first
1819 misaligned store in the loop.
1820 Rationale: misaligned stores are not yet supported.
1822 TODO: Use a cost model. */
1824 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1826 stmt = DR_STMT (dr);
1827 stmt_info = vinfo_for_stmt (stmt);
1829 /* For interleaving, only the alignment of the first access
1830 matters. */
1831 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1832 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1833 continue;
1835 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1837 do_peeling = vector_alignment_reachable_p (dr);
1838 if (do_peeling)
1839 dr0 = dr;
1840 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1841 fprintf (vect_dump, "vector alignment may not be reachable");
1842 break;
1846 vect_versioning_for_alias_required =
1847 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1849 /* Temporarily, if versioning for alias is required, we disable peeling
1850 until we support peeling and versioning. Often peeling for alignment
1851 will require peeling for loop-bound, which in turn requires that we
1852 know how to adjust the loop ivs after the loop. */
1853 if (vect_versioning_for_alias_required
1854 || !vect_can_advance_ivs_p (loop_vinfo)
1855 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1856 do_peeling = false;
1858 if (do_peeling)
1860 int mis;
1861 int npeel = 0;
1862 tree stmt = DR_STMT (dr0);
1863 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1864 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1865 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1867 if (known_alignment_for_access_p (dr0))
1869 /* Since it's known at compile time, compute the number of iterations
1870 in the peeled loop (the peeling factor) for use in updating
1871 DR_MISALIGNMENT values. The peeling factor is the vectorization
1872 factor minus the misalignment as an element count. */
1873 mis = DR_MISALIGNMENT (dr0);
1874 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1875 npeel = nelements - mis;
1877 /* For interleaved data access every iteration accesses all the
1878 members of the group, therefore we divide the number of iterations
1879 by the group size. */
1880 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1881 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1882 npeel /= DR_GROUP_SIZE (stmt_info);
1884 if (vect_print_dump_info (REPORT_DETAILS))
1885 fprintf (vect_dump, "Try peeling by %d", npeel);
1888 /* Ensure that all data refs can be vectorized after the peel. */
1889 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1891 int save_misalignment;
1893 if (dr == dr0)
1894 continue;
1896 stmt = DR_STMT (dr);
1897 stmt_info = vinfo_for_stmt (stmt);
1898 /* For interleaving, only the alignment of the first access
1899 matters. */
1900 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1901 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1902 continue;
1904 save_misalignment = DR_MISALIGNMENT (dr);
1905 vect_update_misalignment_for_peel (dr, dr0, npeel);
1906 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1907 SET_DR_MISALIGNMENT (dr, save_misalignment);
1909 if (!supportable_dr_alignment)
1911 do_peeling = false;
1912 break;
1916 if (do_peeling)
1918 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1919 If the misalignment of DR_i is identical to that of dr0 then set
1920 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1921 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1922 by the peeling factor times the element size of DR_i (MOD the
1923 vectorization factor times the size). Otherwise, the
1924 misalignment of DR_i must be set to unknown. */
1925 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1926 if (dr != dr0)
1927 vect_update_misalignment_for_peel (dr, dr0, npeel);
1929 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1930 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1931 SET_DR_MISALIGNMENT (dr0, 0);
1932 if (vect_print_dump_info (REPORT_ALIGNMENT))
1933 fprintf (vect_dump, "Alignment of access forced using peeling.");
1935 if (vect_print_dump_info (REPORT_DETAILS))
1936 fprintf (vect_dump, "Peeling for alignment will be applied.");
1938 stat = vect_verify_datarefs_alignment (loop_vinfo);
1939 gcc_assert (stat);
1940 return stat;
1945 /* (2) Versioning to force alignment. */
1947 /* Try versioning if:
1948 1) flag_tree_vect_loop_version is TRUE
1949 2) optimize_size is FALSE
1950 3) there is at least one unsupported misaligned data ref with an unknown
1951 misalignment, and
1952 4) all misaligned data refs with a known misalignment are supported, and
1953 5) the number of runtime alignment checks is within reason. */
1955 do_versioning =
1956 flag_tree_vect_loop_version
1957 && (!optimize_size)
1958 && (!loop->inner); /* FORNOW */
1960 if (do_versioning)
1962 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1964 stmt = DR_STMT (dr);
1965 stmt_info = vinfo_for_stmt (stmt);
1967 /* For interleaving, only the alignment of the first access
1968 matters. */
1969 if (aligned_access_p (dr)
1970 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1971 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1972 continue;
1974 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1976 if (!supportable_dr_alignment)
1978 tree stmt;
1979 int mask;
1980 tree vectype;
1982 if (known_alignment_for_access_p (dr)
1983 || VEC_length (tree,
1984 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1985 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1987 do_versioning = false;
1988 break;
1991 stmt = DR_STMT (dr);
1992 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1993 gcc_assert (vectype);
1995 /* The rightmost bits of an aligned address must be zeros.
1996 Construct the mask needed for this test. For example,
1997 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1998 mask must be 15 = 0xf. */
1999 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2001 /* FORNOW: use the same mask to test all potentially unaligned
2002 references in the loop. The vectorizer currently supports
2003 a single vector size, see the reference to
2004 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2005 vectorization factor is computed. */
2006 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2007 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2008 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2009 VEC_safe_push (tree, heap,
2010 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2011 DR_STMT (dr));
2015 /* Versioning requires at least one misaligned data reference. */
2016 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2017 do_versioning = false;
2018 else if (!do_versioning)
2019 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2022 if (do_versioning)
2024 VEC(tree,heap) *may_misalign_stmts
2025 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2026 tree stmt;
2028 /* It can now be assumed that the data references in the statements
2029 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2030 of the loop being vectorized. */
2031 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
2033 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2034 dr = STMT_VINFO_DATA_REF (stmt_info);
2035 SET_DR_MISALIGNMENT (dr, 0);
2036 if (vect_print_dump_info (REPORT_ALIGNMENT))
2037 fprintf (vect_dump, "Alignment of access forced using versioning.");
2040 if (vect_print_dump_info (REPORT_DETAILS))
2041 fprintf (vect_dump, "Versioning for alignment will be applied.");
2043 /* Peeling and versioning can't be done together at this time. */
2044 gcc_assert (! (do_peeling && do_versioning));
2046 stat = vect_verify_datarefs_alignment (loop_vinfo);
2047 gcc_assert (stat);
2048 return stat;
2051 /* This point is reached if neither peeling nor versioning is being done. */
2052 gcc_assert (! (do_peeling || do_versioning));
2054 stat = vect_verify_datarefs_alignment (loop_vinfo);
2055 return stat;
2059 /* Function vect_analyze_data_refs_alignment
2061 Analyze the alignment of the data-references in the loop.
2062 Return FALSE if a data reference is found that cannot be vectorized. */
2064 static bool
2065 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2067 if (vect_print_dump_info (REPORT_DETAILS))
2068 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2070 if (!vect_compute_data_refs_alignment (loop_vinfo))
2072 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2073 fprintf (vect_dump,
2074 "not vectorized: can't calculate alignment for data ref.");
2075 return false;
2078 return true;
2082 /* Analyze groups of strided accesses: check that DR belongs to a group of
2083 strided accesses of legal size, step, etc. Detect gaps, single element
2084 interleaving, and other special cases. Set strided access info.
2085 Collect groups of strided stores for further use in SLP analysis. */
2087 static bool
2088 vect_analyze_group_access (struct data_reference *dr)
2090 tree step = DR_STEP (dr);
2091 tree scalar_type = TREE_TYPE (DR_REF (dr));
2092 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2093 tree stmt = DR_STMT (dr);
2094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2095 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2096 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2097 HOST_WIDE_INT stride;
2098 bool slp_impossible = false;
2100 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2101 interleaving group (including gaps). */
2102 stride = dr_step / type_size;
2104 /* Not consecutive access is possible only if it is a part of interleaving. */
2105 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2107 /* Check if it this DR is a part of interleaving, and is a single
2108 element of the group that is accessed in the loop. */
2110 /* Gaps are supported only for loads. STEP must be a multiple of the type
2111 size. The size of the group must be a power of 2. */
2112 if (DR_IS_READ (dr)
2113 && (dr_step % type_size) == 0
2114 && stride > 0
2115 && exact_log2 (stride) != -1)
2117 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2118 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2119 if (vect_print_dump_info (REPORT_DR_DETAILS))
2121 fprintf (vect_dump, "Detected single element interleaving %d ",
2122 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2123 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2124 fprintf (vect_dump, " step ");
2125 print_generic_expr (vect_dump, step, TDF_SLIM);
2127 return true;
2129 if (vect_print_dump_info (REPORT_DETAILS))
2130 fprintf (vect_dump, "not consecutive access");
2131 return false;
2134 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2136 /* First stmt in the interleaving chain. Check the chain. */
2137 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2138 struct data_reference *data_ref = dr;
2139 unsigned int count = 1;
2140 tree next_step;
2141 tree prev_init = DR_INIT (data_ref);
2142 tree prev = stmt;
2143 HOST_WIDE_INT diff, count_in_bytes;
2145 while (next)
2147 /* Skip same data-refs. In case that two or more stmts share data-ref
2148 (supported only for loads), we vectorize only the first stmt, and
2149 the rest get their vectorized loads from the first one. */
2150 if (!tree_int_cst_compare (DR_INIT (data_ref),
2151 DR_INIT (STMT_VINFO_DATA_REF (
2152 vinfo_for_stmt (next)))))
2154 if (!DR_IS_READ (data_ref))
2156 if (vect_print_dump_info (REPORT_DETAILS))
2157 fprintf (vect_dump, "Two store stmts share the same dr.");
2158 return false;
2161 /* Check that there is no load-store dependencies for this loads
2162 to prevent a case of load-store-load to the same location. */
2163 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2164 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2166 if (vect_print_dump_info (REPORT_DETAILS))
2167 fprintf (vect_dump,
2168 "READ_WRITE dependence in interleaving.");
2169 return false;
2172 /* For load use the same data-ref load. */
2173 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2175 prev = next;
2176 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2177 continue;
2179 prev = next;
2181 /* Check that all the accesses have the same STEP. */
2182 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2183 if (tree_int_cst_compare (step, next_step))
2185 if (vect_print_dump_info (REPORT_DETAILS))
2186 fprintf (vect_dump, "not consecutive access in interleaving");
2187 return false;
2190 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2191 /* Check that the distance between two accesses is equal to the type
2192 size. Otherwise, we have gaps. */
2193 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2194 - TREE_INT_CST_LOW (prev_init)) / type_size;
2195 if (diff != 1)
2197 /* FORNOW: SLP of accesses with gaps is not supported. */
2198 slp_impossible = true;
2199 if (!DR_IS_READ (data_ref))
2201 if (vect_print_dump_info (REPORT_DETAILS))
2202 fprintf (vect_dump, "interleaved store with gaps");
2203 return false;
2207 /* Store the gap from the previous member of the group. If there is no
2208 gap in the access, DR_GROUP_GAP is always 1. */
2209 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2211 prev_init = DR_INIT (data_ref);
2212 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2213 /* Count the number of data-refs in the chain. */
2214 count++;
2217 /* COUNT is the number of accesses found, we multiply it by the size of
2218 the type to get COUNT_IN_BYTES. */
2219 count_in_bytes = type_size * count;
2221 /* Check that the size of the interleaving is not greater than STEP. */
2222 if (dr_step < count_in_bytes)
2224 if (vect_print_dump_info (REPORT_DETAILS))
2226 fprintf (vect_dump, "interleaving size is greater than step for ");
2227 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2229 return false;
2232 /* Check that the size of the interleaving is equal to STEP for stores,
2233 i.e., that there are no gaps. */
2234 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
2236 if (vect_print_dump_info (REPORT_DETAILS))
2237 fprintf (vect_dump, "interleaved store with gaps");
2238 return false;
2241 /* Check that STEP is a multiple of type size. */
2242 if ((dr_step % type_size) != 0)
2244 if (vect_print_dump_info (REPORT_DETAILS))
2246 fprintf (vect_dump, "step is not a multiple of type size: step ");
2247 print_generic_expr (vect_dump, step, TDF_SLIM);
2248 fprintf (vect_dump, " size ");
2249 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2250 TDF_SLIM);
2252 return false;
2255 /* FORNOW: we handle only interleaving that is a power of 2.
2256 We don't fail here if it may be still possible to vectorize the
2257 group using SLP. If not, the size of the group will be checked in
2258 vect_analyze_operations, and the vectorization will fail. */
2259 if (exact_log2 (stride) == -1)
2261 if (vect_print_dump_info (REPORT_DETAILS))
2262 fprintf (vect_dump, "interleaving is not a power of 2");
2264 if (slp_impossible)
2265 return false;
2267 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2268 if (vect_print_dump_info (REPORT_DETAILS))
2269 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2271 /* SLP: create an SLP data structure for every interleaving group of
2272 stores for further analysis in vect_analyse_slp. */
2273 if (!DR_IS_READ (dr) && !slp_impossible)
2274 VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2277 return true;
2281 /* Analyze the access pattern of the data-reference DR.
2282 In case of non-consecutive accesse call vect_analyze_group_access() to
2283 analyze groups of strided accesses. */
2285 static bool
2286 vect_analyze_data_ref_access (struct data_reference *dr)
2288 tree step = DR_STEP (dr);
2289 tree scalar_type = TREE_TYPE (DR_REF (dr));
2290 tree stmt = DR_STMT (dr);
2291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2292 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2293 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2294 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2296 if (!step)
2298 if (vect_print_dump_info (REPORT_DETAILS))
2299 fprintf (vect_dump, "bad data-ref access");
2300 return false;
2303 /* Don't allow invariant accesses. */
2304 if (dr_step == 0)
2305 return false;
2307 if (nested_in_vect_loop_p (loop, stmt))
2309 /* For the rest of the analysis we use the outer-loop step. */
2310 step = STMT_VINFO_DR_STEP (stmt_info);
2311 dr_step = TREE_INT_CST_LOW (step);
2313 if (dr_step == 0)
2315 if (vect_print_dump_info (REPORT_ALIGNMENT))
2316 fprintf (vect_dump, "zero step in outer loop.");
2317 if (DR_IS_READ (dr))
2318 return true;
2319 else
2320 return false;
2324 /* Consecutive? */
2325 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2327 /* Mark that it is not interleaving. */
2328 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2329 return true;
2332 if (nested_in_vect_loop_p (loop, stmt))
2334 if (vect_print_dump_info (REPORT_ALIGNMENT))
2335 fprintf (vect_dump, "strided access in outer loop.");
2336 return false;
2339 /* Not consecutive access - check if it's a part of interleaving group. */
2340 return vect_analyze_group_access (dr);
2344 /* Function vect_analyze_data_ref_accesses.
2346 Analyze the access pattern of all the data references in the loop.
2348 FORNOW: the only access pattern that is considered vectorizable is a
2349 simple step 1 (consecutive) access.
2351 FORNOW: handle only arrays and pointer accesses. */
2353 static bool
2354 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2356 unsigned int i;
2357 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2358 struct data_reference *dr;
2360 if (vect_print_dump_info (REPORT_DETAILS))
2361 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2363 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2364 if (!vect_analyze_data_ref_access (dr))
2366 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2367 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2368 return false;
2371 return true;
2374 /* Function vect_prune_runtime_alias_test_list.
2376 Prune a list of ddrs to be tested at run-time by versioning for alias.
2377 Return FALSE if resulting list of ddrs is longer then allowed by
2378 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2380 static bool
2381 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2383 VEC (ddr_p, heap) * ddrs =
2384 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2385 unsigned i, j;
2387 if (vect_print_dump_info (REPORT_DETAILS))
2388 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2390 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2392 bool found;
2393 ddr_p ddr_i;
2395 ddr_i = VEC_index (ddr_p, ddrs, i);
2396 found = false;
2398 for (j = 0; j < i; j++)
2400 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2402 if (vect_vfa_range_equal (ddr_i, ddr_j))
2404 if (vect_print_dump_info (REPORT_DR_DETAILS))
2406 fprintf (vect_dump, "found equal ranges ");
2407 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2408 fprintf (vect_dump, ", ");
2409 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2410 fprintf (vect_dump, " and ");
2411 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2412 fprintf (vect_dump, ", ");
2413 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2415 found = true;
2416 break;
2420 if (found)
2422 VEC_ordered_remove (ddr_p, ddrs, i);
2423 continue;
2425 i++;
2428 if (VEC_length (ddr_p, ddrs) >
2429 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2431 if (vect_print_dump_info (REPORT_DR_DETAILS))
2433 fprintf (vect_dump,
2434 "disable versioning for alias - max number of generated "
2435 "checks exceeded.");
2438 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2440 return false;
2443 return true;
2446 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2448 void
2449 vect_free_slp_tree (slp_tree node)
2451 if (!node)
2452 return;
2454 if (SLP_TREE_LEFT (node))
2455 vect_free_slp_tree (SLP_TREE_LEFT (node));
2457 if (SLP_TREE_RIGHT (node))
2458 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2460 VEC_free (tree, heap, SLP_TREE_SCALAR_STMTS (node));
2462 if (SLP_TREE_VEC_STMTS (node))
2463 VEC_free (tree, heap, SLP_TREE_VEC_STMTS (node));
2465 free (node);
2469 /* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are
2470 of a legal type and that they match the defs of the first stmt of the SLP
2471 group (stored in FIRST_STMT_...). */
2473 static bool
2474 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2475 tree rhs, VEC (tree, heap) **def_stmts0,
2476 VEC (tree, heap) **def_stmts1,
2477 enum vect_def_type *first_stmt_dt0,
2478 enum vect_def_type *first_stmt_dt1,
2479 tree *first_stmt_def0_type,
2480 tree *first_stmt_def1_type,
2481 tree *first_stmt_const_oprnd,
2482 int ncopies_for_cost)
2484 tree oprnd;
2485 enum operation_type op_type = TREE_OPERAND_LENGTH (rhs);
2486 unsigned int i, number_of_oprnds = op_type;
2487 tree def, def_stmt;
2488 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2489 stmt_vec_info stmt_info =
2490 vinfo_for_stmt (VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2492 /* Store. */
2493 if (!op_type)
2494 number_of_oprnds = 1;
2495 else
2496 gcc_assert (op_type == unary_op || op_type == binary_op);
2498 for (i = 0; i < number_of_oprnds; i++)
2500 if (op_type)
2501 oprnd = TREE_OPERAND (rhs, i);
2502 else
2503 oprnd = rhs;
2505 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2506 || (!def_stmt && dt[i] != vect_constant_def))
2508 if (vect_print_dump_info (REPORT_SLP))
2510 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2511 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2514 return false;
2517 if (!*first_stmt_dt0)
2519 /* op0 of the first stmt of the group - store its info. */
2520 *first_stmt_dt0 = dt[i];
2521 if (def)
2522 *first_stmt_def0_type = TREE_TYPE (def);
2523 else
2524 *first_stmt_const_oprnd = oprnd;
2526 /* Analyze costs (for the first stmt of the group only). */
2527 if (op_type)
2528 /* Not memory operation (we don't call this functions for loads). */
2529 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2530 else
2531 /* Store. */
2532 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2535 else
2537 if (!*first_stmt_dt1 && i == 1)
2539 /* op1 of the first stmt of the group - store its info. */
2540 *first_stmt_dt1 = dt[i];
2541 if (def)
2542 *first_stmt_def1_type = TREE_TYPE (def);
2543 else
2545 /* We assume that the stmt contains only one constant
2546 operand. We fail otherwise, to be on the safe side. */
2547 if (*first_stmt_const_oprnd)
2549 if (vect_print_dump_info (REPORT_SLP))
2550 fprintf (vect_dump, "Build SLP failed: two constant "
2551 "oprnds in stmt");
2552 return false;
2554 *first_stmt_const_oprnd = oprnd;
2557 else
2559 /* Not first stmt of the group, check that the def-stmt/s match
2560 the def-stmt/s of the first stmt. */
2561 if ((i == 0
2562 && (*first_stmt_dt0 != dt[i]
2563 || (*first_stmt_def0_type && def
2564 && *first_stmt_def0_type != TREE_TYPE (def))))
2565 || (i == 1
2566 && (*first_stmt_dt1 != dt[i]
2567 || (*first_stmt_def1_type && def
2568 && *first_stmt_def1_type != TREE_TYPE (def))))
2569 || (!def
2570 && TREE_TYPE (*first_stmt_const_oprnd)
2571 != TREE_TYPE (oprnd)))
2573 if (vect_print_dump_info (REPORT_SLP))
2574 fprintf (vect_dump, "Build SLP failed: different types ");
2576 return false;
2581 /* Check the types of the definitions. */
2582 switch (dt[i])
2584 case vect_constant_def:
2585 case vect_invariant_def:
2586 break;
2588 case vect_loop_def:
2589 if (i == 0)
2590 VEC_safe_push (tree, heap, *def_stmts0, def_stmt);
2591 else
2592 VEC_safe_push (tree, heap, *def_stmts1, def_stmt);
2593 break;
2595 default:
2596 /* FORNOW: Not supported. */
2597 if (vect_print_dump_info (REPORT_SLP))
2599 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2600 print_generic_expr (vect_dump, def, TDF_SLIM);
2603 return false;
2607 return true;
2611 /* Recursively build an SLP tree starting from NODE.
2612 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2613 permutation or are of unsupported types of operation. Otherwise, return
2614 TRUE.
2615 SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2616 in the case of multiple types for now. */
2618 static bool
2619 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2620 unsigned int group_size, bool *slp_impossible,
2621 int *inside_cost, int *outside_cost,
2622 int ncopies_for_cost)
2624 VEC (tree, heap) *def_stmts0 = VEC_alloc (tree, heap, group_size);
2625 VEC (tree, heap) *def_stmts1 = VEC_alloc (tree, heap, group_size);
2626 unsigned int i;
2627 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2628 tree stmt = VEC_index (tree, stmts, 0);
2629 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2630 enum tree_code first_stmt_code = 0;
2631 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2632 tree lhs, rhs, prev_stmt = NULL_TREE;
2633 bool stop_recursion = false, need_same_oprnds = false;
2634 tree vectype, scalar_type, first_op1 = NULL_TREE;
2635 unsigned int vectorization_factor = 0, ncopies;
2636 optab optab;
2637 int icode;
2638 enum machine_mode optab_op2_mode;
2639 enum machine_mode vec_mode;
2640 tree first_stmt_const_oprnd = NULL_TREE;
2641 struct data_reference *first_dr;
2643 /* For every stmt in NODE find its def stmt/s. */
2644 for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
2646 if (vect_print_dump_info (REPORT_SLP))
2648 fprintf (vect_dump, "Build SLP for ");
2649 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2652 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2654 if (vect_print_dump_info (REPORT_SLP))
2656 fprintf (vect_dump, "Build SLP failed: not MODIFY_STMT ");
2657 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2660 return false;
2663 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2664 vectype = get_vectype_for_scalar_type (scalar_type);
2665 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2666 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2667 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2668 if (ncopies > 1)
2670 /* FORNOW. */
2671 if (vect_print_dump_info (REPORT_SLP))
2672 fprintf (vect_dump, "SLP failed - multiple types ");
2674 *slp_impossible = true;
2675 return false;
2678 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
2679 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2681 /* Check the operation. */
2682 if (i == 0)
2684 first_stmt_code = TREE_CODE (rhs);
2686 /* Shift arguments should be equal in all the packed stmts for a
2687 vector shift with scalar shift operand. */
2688 if (TREE_CODE (rhs) == LSHIFT_EXPR || TREE_CODE (rhs) == RSHIFT_EXPR)
2690 vec_mode = TYPE_MODE (vectype);
2691 optab = optab_for_tree_code (TREE_CODE (rhs), vectype);
2692 if (!optab)
2694 if (vect_print_dump_info (REPORT_SLP))
2695 fprintf (vect_dump, "Build SLP failed: no optab.");
2696 return false;
2698 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2699 optab_op2_mode = insn_data[icode].operand[2].mode;
2700 if (!VECTOR_MODE_P (optab_op2_mode))
2702 need_same_oprnds = true;
2703 first_op1 = TREE_OPERAND (rhs, 1);
2707 else
2709 if (first_stmt_code != TREE_CODE (rhs))
2711 if (vect_print_dump_info (REPORT_SLP))
2713 fprintf (vect_dump,
2714 "Build SLP failed: different operation in stmt ");
2715 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2718 return false;
2721 if (need_same_oprnds
2722 && !operand_equal_p (first_op1, TREE_OPERAND (rhs, 1), 0))
2724 if (vect_print_dump_info (REPORT_SLP))
2726 fprintf (vect_dump,
2727 "Build SLP failed: different shift arguments in ");
2728 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2731 return false;
2735 /* Strided store or load. */
2736 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2738 if (REFERENCE_CLASS_P (lhs))
2740 /* Store. */
2741 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs,
2742 &def_stmts0, &def_stmts1,
2743 &first_stmt_dt0,
2744 &first_stmt_dt1,
2745 &first_stmt_def0_type,
2746 &first_stmt_def1_type,
2747 &first_stmt_const_oprnd,
2748 ncopies_for_cost))
2749 return false;
2751 else
2753 /* Load. */
2754 if (i == 0)
2756 /* First stmt of the SLP group should be the first load of
2757 the interleaving loop if data permutation is not
2758 allowed. */
2759 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt)
2761 /* FORNOW: data permutations are not supported. */
2762 if (vect_print_dump_info (REPORT_SLP))
2764 fprintf (vect_dump, "Build SLP failed: strided "
2765 " loads need permutation ");
2766 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2769 return false;
2772 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2773 if (vect_supportable_dr_alignment (first_dr)
2774 == dr_unaligned_unsupported)
2776 if (vect_print_dump_info (REPORT_SLP))
2778 fprintf (vect_dump, "Build SLP failed: unsupported "
2779 " unaligned load ");
2780 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2783 return false;
2786 /* Analyze costs (for the first stmt in the group). */
2787 vect_model_load_cost (vinfo_for_stmt (stmt),
2788 ncopies_for_cost, *node);
2790 else
2792 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt)
2794 /* FORNOW: data permutations are not supported. */
2795 if (vect_print_dump_info (REPORT_SLP))
2797 fprintf (vect_dump, "Build SLP failed: strided "
2798 " loads need permutation ");
2799 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2801 return false;
2805 prev_stmt = stmt;
2807 /* We stop the tree when we reach a group of loads. */
2808 stop_recursion = true;
2809 continue;
2811 } /* Strided access. */
2812 else
2814 if (REFERENCE_CLASS_P (rhs))
2816 /* Not strided load. */
2817 if (vect_print_dump_info (REPORT_SLP))
2819 fprintf (vect_dump, "Build SLP failed: not strided load ");
2820 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2823 /* FORNOW: Not strided loads are not supported. */
2824 return false;
2827 /* Not memory operation. */
2828 if (!BINARY_CLASS_P (rhs) && !UNARY_CLASS_P (rhs))
2830 if (vect_print_dump_info (REPORT_SLP))
2832 fprintf (vect_dump, "Build SLP failed: operation");
2833 fprintf (vect_dump, " unsupported ");
2834 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2837 return false;
2840 /* Find the def-stmts. */
2841 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, &def_stmts0,
2842 &def_stmts1, &first_stmt_dt0,
2843 &first_stmt_dt1,
2844 &first_stmt_def0_type,
2845 &first_stmt_def1_type,
2846 &first_stmt_const_oprnd,
2847 ncopies_for_cost))
2848 return false;
2852 /* Add the costs of the node to the overall instance costs. */
2853 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
2854 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
2856 /* Strided loads were reached - stop the recursion. */
2857 if (stop_recursion)
2858 return true;
2860 /* Create SLP_TREE nodes for the definition node/s. */
2861 if (first_stmt_dt0 == vect_loop_def)
2863 slp_tree left_node = XNEW (struct _slp_tree);
2864 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
2865 SLP_TREE_VEC_STMTS (left_node) = NULL;
2866 SLP_TREE_LEFT (left_node) = NULL;
2867 SLP_TREE_RIGHT (left_node) = NULL;
2868 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
2869 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
2870 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
2871 slp_impossible, inside_cost, outside_cost,
2872 ncopies_for_cost))
2873 return false;
2875 SLP_TREE_LEFT (*node) = left_node;
2878 if (first_stmt_dt1 == vect_loop_def)
2880 slp_tree right_node = XNEW (struct _slp_tree);
2881 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
2882 SLP_TREE_VEC_STMTS (right_node) = NULL;
2883 SLP_TREE_LEFT (right_node) = NULL;
2884 SLP_TREE_RIGHT (right_node) = NULL;
2885 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
2886 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
2887 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
2888 slp_impossible, inside_cost, outside_cost,
2889 ncopies_for_cost))
2890 return false;
2892 SLP_TREE_RIGHT (*node) = right_node;
2895 return true;
2899 static void
2900 vect_print_slp_tree (slp_tree node)
2902 int i;
2903 tree stmt;
2905 if (!node)
2906 return;
2908 fprintf (vect_dump, "node ");
2909 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2911 fprintf (vect_dump, "\n\tstmt %d ", i);
2912 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2914 fprintf (vect_dump, "\n");
2916 vect_print_slp_tree (SLP_TREE_LEFT (node));
2917 vect_print_slp_tree (SLP_TREE_RIGHT (node));
2921 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2922 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2923 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2924 stmts in NODE are to be marked. */
2926 static void
2927 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
2929 int i;
2930 tree stmt;
2932 if (!node)
2933 return;
2935 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2936 if (j < 0 || i == j)
2937 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
2939 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
2940 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
2944 /* Analyze an SLP instance starting from a group of strided stores. Call
2945 vect_build_slp_tree to build a tree of packed stmts if possible.
2946 Return FALSE if it's impossible to SLP any stmt in the loop. */
2948 static bool
2949 vect_analyze_slp_instance (loop_vec_info loop_vinfo, tree stmt)
2951 slp_instance new_instance;
2952 slp_tree node = XNEW (struct _slp_tree);
2953 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
2954 unsigned int unrolling_factor = 1, nunits;
2955 tree vectype, scalar_type, next;
2956 unsigned int vectorization_factor = 0, ncopies;
2957 bool slp_impossible = false;
2958 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
2960 /* FORNOW: multiple types are not supported. */
2961 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
2962 vectype = get_vectype_for_scalar_type (scalar_type);
2963 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2964 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2965 ncopies = vectorization_factor / nunits;
2966 if (ncopies > 1)
2968 if (vect_print_dump_info (REPORT_SLP))
2969 fprintf (vect_dump, "SLP failed - multiple types ");
2971 return false;
2974 /* Create a node (a root of the SLP tree) for the packed strided stores. */
2975 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (tree, heap, group_size);
2976 next = stmt;
2977 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2978 while (next)
2980 VEC_safe_push (tree, heap, SLP_TREE_SCALAR_STMTS (node), next);
2981 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2984 SLP_TREE_VEC_STMTS (node) = NULL;
2985 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
2986 SLP_TREE_LEFT (node) = NULL;
2987 SLP_TREE_RIGHT (node) = NULL;
2988 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
2989 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
2991 /* Calculate the unrolling factor. */
2992 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
2994 /* Calculate the number of vector stmts to create based on the unrolling
2995 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
2996 GROUP_SIZE / NUNITS otherwise. */
2997 ncopies_for_cost = unrolling_factor * group_size / nunits;
2999 /* Build the tree for the SLP instance. */
3000 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
3001 &inside_cost, &outside_cost, ncopies_for_cost))
3003 /* Create a new SLP instance. */
3004 new_instance = XNEW (struct _slp_instance);
3005 SLP_INSTANCE_TREE (new_instance) = node;
3006 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
3007 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
3008 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
3009 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
3010 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
3011 new_instance);
3012 if (vect_print_dump_info (REPORT_SLP))
3013 vect_print_slp_tree (node);
3015 return true;
3018 /* Failed to SLP. */
3019 /* Free the allocated memory. */
3020 vect_free_slp_tree (node);
3022 if (slp_impossible)
3023 return false;
3025 /* SLP failed for this instance, but it is still possible to SLP other stmts
3026 in the loop. */
3027 return true;
3031 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3032 trees of packed scalar stmts if SLP is possible. */
3034 static bool
3035 vect_analyze_slp (loop_vec_info loop_vinfo)
3037 unsigned int i;
3038 VEC (tree, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
3039 tree store;
3041 if (vect_print_dump_info (REPORT_SLP))
3042 fprintf (vect_dump, "=== vect_analyze_slp ===");
3044 for (i = 0; VEC_iterate (tree, strided_stores, i, store); i++)
3045 if (!vect_analyze_slp_instance (loop_vinfo, store))
3047 /* SLP failed. No instance can be SLPed in the loop. */
3048 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3049 fprintf (vect_dump, "SLP failed.");
3051 return false;
3054 return true;
3058 /* For each possible SLP instance decide whether to SLP it and calculate overall
3059 unrolling factor needed to SLP the loop. */
3061 static void
3062 vect_make_slp_decision (loop_vec_info loop_vinfo)
3064 unsigned int i, unrolling_factor = 1;
3065 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3066 slp_instance instance;
3067 int decided_to_slp = 0;
3069 if (vect_print_dump_info (REPORT_SLP))
3070 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3072 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3074 /* FORNOW: SLP if you can. */
3075 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3076 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3078 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3079 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3080 loop-based vectorization. Such stmts will be marked as HYBRID. */
3081 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3082 decided_to_slp++;
3085 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3087 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3088 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3089 decided_to_slp, unrolling_factor);
3093 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3094 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3096 static void
3097 vect_detect_hybrid_slp_stmts (slp_tree node)
3099 int i;
3100 tree stmt;
3101 imm_use_iterator imm_iter;
3102 tree use_stmt;
3104 if (!node)
3105 return;
3107 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3108 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3109 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
3110 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
3111 if (vinfo_for_stmt (use_stmt)
3112 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
3113 vect_mark_slp_stmts (node, hybrid, i);
3115 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3116 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3120 /* Find stmts that must be both vectorized and SLPed. */
3122 static void
3123 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3125 unsigned int i;
3126 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3127 slp_instance instance;
3129 if (vect_print_dump_info (REPORT_SLP))
3130 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3132 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3133 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3137 /* Function vect_analyze_data_refs.
3139 Find all the data references in the loop.
3141 The general structure of the analysis of data refs in the vectorizer is as
3142 follows:
3143 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3144 find and analyze all data-refs in the loop and their dependences.
3145 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3146 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3147 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3151 static bool
3152 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3154 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3155 unsigned int i;
3156 VEC (data_reference_p, heap) *datarefs;
3157 struct data_reference *dr;
3158 tree scalar_type;
3160 if (vect_print_dump_info (REPORT_DETAILS))
3161 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3163 compute_data_dependences_for_loop (loop, true,
3164 &LOOP_VINFO_DATAREFS (loop_vinfo),
3165 &LOOP_VINFO_DDRS (loop_vinfo));
3167 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3168 from stmt_vec_info struct to DR and vectype. */
3169 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3171 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3173 tree stmt;
3174 stmt_vec_info stmt_info;
3175 basic_block bb;
3176 tree base, offset, init;
3178 if (!dr || !DR_REF (dr))
3180 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3181 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3182 return false;
3185 stmt = DR_STMT (dr);
3186 stmt_info = vinfo_for_stmt (stmt);
3188 /* Check that analysis of the data-ref succeeded. */
3189 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3190 || !DR_STEP (dr))
3192 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3194 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3195 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3197 return false;
3200 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3202 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3203 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3204 "constant");
3205 return false;
3208 if (!DR_SYMBOL_TAG (dr))
3210 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3212 fprintf (vect_dump, "not vectorized: no memory tag for ");
3213 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3215 return false;
3218 base = unshare_expr (DR_BASE_ADDRESS (dr));
3219 offset = unshare_expr (DR_OFFSET (dr));
3220 init = unshare_expr (DR_INIT (dr));
3222 /* Update DR field in stmt_vec_info struct. */
3223 bb = bb_for_stmt (stmt);
3225 /* If the dataref is in an inner-loop of the loop that is considered for
3226 for vectorization, we also want to analyze the access relative to
3227 the outer-loop (DR contains information only relative to the
3228 inner-most enclosing loop). We do that by building a reference to the
3229 first location accessed by the inner-loop, and analyze it relative to
3230 the outer-loop. */
3231 if (nested_in_vect_loop_p (loop, stmt))
3233 tree outer_step, outer_base, outer_init;
3234 HOST_WIDE_INT pbitsize, pbitpos;
3235 tree poffset;
3236 enum machine_mode pmode;
3237 int punsignedp, pvolatilep;
3238 affine_iv base_iv, offset_iv;
3239 tree dinit;
3241 /* Build a reference to the first location accessed by the
3242 inner-loop: *(BASE+INIT). (The first location is actually
3243 BASE+INIT+OFFSET, but we add OFFSET separately later. */
3244 tree inner_base = build_fold_indirect_ref
3245 (fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, init));
3247 if (vect_print_dump_info (REPORT_DETAILS))
3249 fprintf (dump_file, "analyze in outer-loop: ");
3250 print_generic_expr (dump_file, inner_base, TDF_SLIM);
3253 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3254 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3255 gcc_assert (outer_base != NULL_TREE);
3257 if (pbitpos % BITS_PER_UNIT != 0)
3259 if (vect_print_dump_info (REPORT_DETAILS))
3260 fprintf (dump_file, "failed: bit offset alignment.\n");
3261 return false;
3264 outer_base = build_fold_addr_expr (outer_base);
3265 if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
3267 if (vect_print_dump_info (REPORT_DETAILS))
3268 fprintf (dump_file, "failed: evolution of base is not affine.\n");
3269 return false;
3272 if (offset)
3274 if (poffset)
3275 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3276 else
3277 poffset = offset;
3280 if (!poffset)
3282 offset_iv.base = ssize_int (0);
3283 offset_iv.step = ssize_int (0);
3285 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
3287 if (vect_print_dump_info (REPORT_DETAILS))
3288 fprintf (dump_file, "evolution of offset is not affine.\n");
3289 return false;
3292 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3293 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3294 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3295 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3296 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3298 outer_step = size_binop (PLUS_EXPR,
3299 fold_convert (ssizetype, base_iv.step),
3300 fold_convert (ssizetype, offset_iv.step));
3302 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3303 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3304 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3305 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3306 STMT_VINFO_DR_OFFSET (stmt_info) =
3307 fold_convert (ssizetype, offset_iv.base);
3308 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3309 size_int (highest_pow2_factor (offset_iv.base));
3311 if (dump_file && (dump_flags & TDF_DETAILS))
3313 fprintf (dump_file, "\touter base_address: ");
3314 print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3315 fprintf (dump_file, "\n\touter offset from base address: ");
3316 print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3317 fprintf (dump_file, "\n\touter constant offset from base address: ");
3318 print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3319 fprintf (dump_file, "\n\touter step: ");
3320 print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3321 fprintf (dump_file, "\n\touter aligned to: ");
3322 print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3326 if (STMT_VINFO_DATA_REF (stmt_info))
3328 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3330 fprintf (vect_dump,
3331 "not vectorized: more than one data ref in stmt: ");
3332 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3334 return false;
3336 STMT_VINFO_DATA_REF (stmt_info) = dr;
3338 /* Set vectype for STMT. */
3339 scalar_type = TREE_TYPE (DR_REF (dr));
3340 STMT_VINFO_VECTYPE (stmt_info) =
3341 get_vectype_for_scalar_type (scalar_type);
3342 if (!STMT_VINFO_VECTYPE (stmt_info))
3344 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3346 fprintf (vect_dump,
3347 "not vectorized: no vectype for stmt: ");
3348 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3349 fprintf (vect_dump, " scalar_type: ");
3350 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3352 return false;
3356 return true;
3360 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3362 /* Function vect_mark_relevant.
3364 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3366 static void
3367 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
3368 enum vect_relevant relevant, bool live_p)
3370 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3371 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3372 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3374 if (vect_print_dump_info (REPORT_DETAILS))
3375 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3377 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3379 tree pattern_stmt;
3381 /* This is the last stmt in a sequence that was detected as a
3382 pattern that can potentially be vectorized. Don't mark the stmt
3383 as relevant/live because it's not going to be vectorized.
3384 Instead mark the pattern-stmt that replaces it. */
3386 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3388 if (vect_print_dump_info (REPORT_DETAILS))
3389 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3390 stmt_info = vinfo_for_stmt (pattern_stmt);
3391 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3392 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3393 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3394 stmt = pattern_stmt;
3397 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3398 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3399 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3401 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3402 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3404 if (vect_print_dump_info (REPORT_DETAILS))
3405 fprintf (vect_dump, "already marked relevant/live.");
3406 return;
3409 VEC_safe_push (tree, heap, *worklist, stmt);
3413 /* Function vect_stmt_relevant_p.
3415 Return true if STMT in loop that is represented by LOOP_VINFO is
3416 "relevant for vectorization".
3418 A stmt is considered "relevant for vectorization" if:
3419 - it has uses outside the loop.
3420 - it has vdefs (it alters memory).
3421 - control stmts in the loop (except for the exit condition).
3423 CHECKME: what other side effects would the vectorizer allow? */
3425 static bool
3426 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
3427 enum vect_relevant *relevant, bool *live_p)
3429 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3430 ssa_op_iter op_iter;
3431 imm_use_iterator imm_iter;
3432 use_operand_p use_p;
3433 def_operand_p def_p;
3435 *relevant = vect_unused_in_loop;
3436 *live_p = false;
3438 /* cond stmt other than loop exit cond. */
3439 if (is_ctrl_stmt (stmt)
3440 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3441 *relevant = vect_used_in_loop;
3443 /* changing memory. */
3444 if (TREE_CODE (stmt) != PHI_NODE)
3445 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3447 if (vect_print_dump_info (REPORT_DETAILS))
3448 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3449 *relevant = vect_used_in_loop;
3452 /* uses outside the loop. */
3453 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3455 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3457 basic_block bb = bb_for_stmt (USE_STMT (use_p));
3458 if (!flow_bb_inside_loop_p (loop, bb))
3460 if (vect_print_dump_info (REPORT_DETAILS))
3461 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3463 /* We expect all such uses to be in the loop exit phis
3464 (because of loop closed form) */
3465 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
3466 gcc_assert (bb == single_exit (loop)->dest);
3468 *live_p = true;
3473 return (*live_p || *relevant);
3478 Function process_use.
3480 Inputs:
3481 - a USE in STMT in a loop represented by LOOP_VINFO
3482 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3483 that defined USE. This is dont by calling mark_relevant and passing it
3484 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
3486 Outputs:
3487 Generally, LIVE_P and RELEVANT are used to define the liveness and
3488 relevance info of the DEF_STMT of this USE:
3489 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3490 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3491 Exceptions:
3492 - case 1: If USE is used only for address computations (e.g. array indexing),
3493 which does not need to be directly vectorized, then the liveness/relevance
3494 of the respective DEF_STMT is left unchanged.
3495 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3496 skip DEF_STMT cause it had already been processed.
3497 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3498 be modified accordingly.
3500 Return true if everything is as expected. Return false otherwise. */
3502 static bool
3503 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3504 enum vect_relevant relevant, VEC(tree,heap) **worklist)
3506 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3507 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3508 stmt_vec_info dstmt_vinfo;
3509 basic_block bb, def_bb;
3510 tree def, def_stmt;
3511 enum vect_def_type dt;
3513 /* case 1: we are only interested in uses that need to be vectorized. Uses
3514 that are used for address computation are not considered relevant. */
3515 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3516 return true;
3518 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3520 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3521 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3522 return false;
3525 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
3526 return true;
3528 def_bb = bb_for_stmt (def_stmt);
3529 if (!flow_bb_inside_loop_p (loop, def_bb))
3531 if (vect_print_dump_info (REPORT_DETAILS))
3532 fprintf (vect_dump, "def_stmt is out of loop.");
3533 return true;
3536 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3537 DEF_STMT must have already been processed, because this should be the
3538 only way that STMT, which is a reduction-phi, was put in the worklist,
3539 as there should be no other uses for DEF_STMT in the loop. So we just
3540 check that everything is as expected, and we are done. */
3541 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3542 bb = bb_for_stmt (stmt);
3543 if (TREE_CODE (stmt) == PHI_NODE
3544 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3545 && TREE_CODE (def_stmt) != PHI_NODE
3546 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3547 && bb->loop_father == def_bb->loop_father)
3549 if (vect_print_dump_info (REPORT_DETAILS))
3550 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3551 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3552 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3553 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3554 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3555 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3556 return true;
3559 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3560 outer-loop-header-bb:
3561 d = def_stmt
3562 inner-loop:
3563 stmt # use (d)
3564 outer-loop-tail-bb:
3565 ... */
3566 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3568 if (vect_print_dump_info (REPORT_DETAILS))
3569 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3570 switch (relevant)
3572 case vect_unused_in_loop:
3573 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3574 vect_used_by_reduction : vect_unused_in_loop;
3575 break;
3576 case vect_used_in_outer_by_reduction:
3577 relevant = vect_used_by_reduction;
3578 break;
3579 case vect_used_in_outer:
3580 relevant = vect_used_in_loop;
3581 break;
3582 case vect_used_by_reduction:
3583 case vect_used_in_loop:
3584 break;
3586 default:
3587 gcc_unreachable ();
3591 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3592 outer-loop-header-bb:
3594 inner-loop:
3595 d = def_stmt
3596 outer-loop-tail-bb:
3597 stmt # use (d) */
3598 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3600 if (vect_print_dump_info (REPORT_DETAILS))
3601 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3602 switch (relevant)
3604 case vect_unused_in_loop:
3605 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3606 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3607 break;
3609 case vect_used_in_outer_by_reduction:
3610 case vect_used_in_outer:
3611 break;
3613 case vect_used_by_reduction:
3614 relevant = vect_used_in_outer_by_reduction;
3615 break;
3617 case vect_used_in_loop:
3618 relevant = vect_used_in_outer;
3619 break;
3621 default:
3622 gcc_unreachable ();
3626 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
3627 return true;
3631 /* Function vect_mark_stmts_to_be_vectorized.
3633 Not all stmts in the loop need to be vectorized. For example:
3635 for i...
3636 for j...
3637 1. T0 = i + j
3638 2. T1 = a[T0]
3640 3. j = j + 1
3642 Stmt 1 and 3 do not need to be vectorized, because loop control and
3643 addressing of vectorized data-refs are handled differently.
3645 This pass detects such stmts. */
3647 static bool
3648 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3650 VEC(tree,heap) *worklist;
3651 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3652 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3653 unsigned int nbbs = loop->num_nodes;
3654 block_stmt_iterator si;
3655 tree stmt;
3656 stmt_ann_t ann;
3657 unsigned int i;
3658 stmt_vec_info stmt_vinfo;
3659 basic_block bb;
3660 tree phi;
3661 bool live_p;
3662 enum vect_relevant relevant;
3664 if (vect_print_dump_info (REPORT_DETAILS))
3665 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
3667 worklist = VEC_alloc (tree, heap, 64);
3669 /* 1. Init worklist. */
3670 for (i = 0; i < nbbs; i++)
3672 bb = bbs[i];
3673 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3675 if (vect_print_dump_info (REPORT_DETAILS))
3677 fprintf (vect_dump, "init: phi relevant? ");
3678 print_generic_expr (vect_dump, phi, TDF_SLIM);
3681 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
3682 vect_mark_relevant (&worklist, phi, relevant, live_p);
3684 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3686 stmt = bsi_stmt (si);
3687 if (vect_print_dump_info (REPORT_DETAILS))
3689 fprintf (vect_dump, "init: stmt relevant? ");
3690 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3693 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
3694 vect_mark_relevant (&worklist, stmt, relevant, live_p);
3698 /* 2. Process_worklist */
3699 while (VEC_length (tree, worklist) > 0)
3701 use_operand_p use_p;
3702 ssa_op_iter iter;
3704 stmt = VEC_pop (tree, worklist);
3705 if (vect_print_dump_info (REPORT_DETAILS))
3707 fprintf (vect_dump, "worklist: examine stmt: ");
3708 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3711 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3712 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3713 liveness and relevance properties of STMT. */
3714 ann = stmt_ann (stmt);
3715 stmt_vinfo = vinfo_for_stmt (stmt);
3716 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
3717 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
3719 /* Generally, the liveness and relevance properties of STMT are
3720 propagated as is to the DEF_STMTs of its USEs:
3721 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3722 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3724 One exception is when STMT has been identified as defining a reduction
3725 variable; in this case we set the liveness/relevance as follows:
3726 live_p = false
3727 relevant = vect_used_by_reduction
3728 This is because we distinguish between two kinds of relevant stmts -
3729 those that are used by a reduction computation, and those that are
3730 (also) used by a regular computation. This allows us later on to
3731 identify stmts that are used solely by a reduction, and therefore the
3732 order of the results that they produce does not have to be kept.
3734 Reduction phis are expected to be used by a reduction stmt, or by
3735 in an outer loop; Other reduction stmts are expected to be
3736 in the loop, and possibly used by a stmt in an outer loop.
3737 Here are the expected values of "relevant" for reduction phis/stmts:
3739 relevance: phi stmt
3740 vect_unused_in_loop ok
3741 vect_used_in_outer_by_reduction ok ok
3742 vect_used_in_outer ok ok
3743 vect_used_by_reduction ok
3744 vect_used_in_loop */
3746 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
3748 enum vect_relevant tmp_relevant = relevant;
3749 switch (tmp_relevant)
3751 case vect_unused_in_loop:
3752 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
3753 relevant = vect_used_by_reduction;
3754 break;
3756 case vect_used_in_outer_by_reduction:
3757 case vect_used_in_outer:
3758 gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR
3759 && TREE_CODE (stmt) != DOT_PROD_EXPR);
3760 break;
3762 case vect_used_by_reduction:
3763 if (TREE_CODE (stmt) == PHI_NODE)
3764 break;
3765 /* fall through */
3766 case vect_used_in_loop:
3767 default:
3768 if (vect_print_dump_info (REPORT_DETAILS))
3769 fprintf (vect_dump, "unsupported use of reduction.");
3770 VEC_free (tree, heap, worklist);
3771 return false;
3773 live_p = false;
3776 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3778 tree op = USE_FROM_PTR (use_p);
3779 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
3781 VEC_free (tree, heap, worklist);
3782 return false;
3785 } /* while worklist */
3787 VEC_free (tree, heap, worklist);
3788 return true;
3792 /* Function vect_can_advance_ivs_p
3794 In case the number of iterations that LOOP iterates is unknown at compile
3795 time, an epilog loop will be generated, and the loop induction variables
3796 (IVs) will be "advanced" to the value they are supposed to take just before
3797 the epilog loop. Here we check that the access function of the loop IVs
3798 and the expression that represents the loop bound are simple enough.
3799 These restrictions will be relaxed in the future. */
3801 static bool
3802 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
3804 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3805 basic_block bb = loop->header;
3806 tree phi;
3808 /* Analyze phi functions of the loop header. */
3810 if (vect_print_dump_info (REPORT_DETAILS))
3811 fprintf (vect_dump, "vect_can_advance_ivs_p:");
3813 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3815 tree access_fn = NULL;
3816 tree evolution_part;
3818 if (vect_print_dump_info (REPORT_DETAILS))
3820 fprintf (vect_dump, "Analyze phi: ");
3821 print_generic_expr (vect_dump, phi, TDF_SLIM);
3824 /* Skip virtual phi's. The data dependences that are associated with
3825 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3827 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3829 if (vect_print_dump_info (REPORT_DETAILS))
3830 fprintf (vect_dump, "virtual phi. skip.");
3831 continue;
3834 /* Skip reduction phis. */
3836 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3838 if (vect_print_dump_info (REPORT_DETAILS))
3839 fprintf (vect_dump, "reduc phi. skip.");
3840 continue;
3843 /* Analyze the evolution function. */
3845 access_fn = instantiate_parameters
3846 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
3848 if (!access_fn)
3850 if (vect_print_dump_info (REPORT_DETAILS))
3851 fprintf (vect_dump, "No Access function.");
3852 return false;
3855 if (vect_print_dump_info (REPORT_DETAILS))
3857 fprintf (vect_dump, "Access function of PHI: ");
3858 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
3861 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
3863 if (evolution_part == NULL_TREE)
3865 if (vect_print_dump_info (REPORT_DETAILS))
3866 fprintf (vect_dump, "No evolution.");
3867 return false;
3870 /* FORNOW: We do not transform initial conditions of IVs
3871 which evolution functions are a polynomial of degree >= 2. */
3873 if (tree_is_chrec (evolution_part))
3874 return false;
3877 return true;
3881 /* Function vect_get_loop_niters.
3883 Determine how many iterations the loop is executed.
3884 If an expression that represents the number of iterations
3885 can be constructed, place it in NUMBER_OF_ITERATIONS.
3886 Return the loop exit condition. */
3888 static tree
3889 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
3891 tree niters;
3893 if (vect_print_dump_info (REPORT_DETAILS))
3894 fprintf (vect_dump, "=== get_loop_niters ===");
3896 niters = number_of_exit_cond_executions (loop);
3898 if (niters != NULL_TREE
3899 && niters != chrec_dont_know)
3901 *number_of_iterations = niters;
3903 if (vect_print_dump_info (REPORT_DETAILS))
3905 fprintf (vect_dump, "==> get_loop_niters:" );
3906 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
3910 return get_loop_exit_condition (loop);
3914 /* Function vect_analyze_loop_1.
3916 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3917 for it. The different analyses will record information in the
3918 loop_vec_info struct. This is a subset of the analyses applied in
3919 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3920 that is now considered for (outer-loop) vectorization. */
3922 static loop_vec_info
3923 vect_analyze_loop_1 (struct loop *loop)
3925 loop_vec_info loop_vinfo;
3927 if (vect_print_dump_info (REPORT_DETAILS))
3928 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
3930 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3932 loop_vinfo = vect_analyze_loop_form (loop);
3933 if (!loop_vinfo)
3935 if (vect_print_dump_info (REPORT_DETAILS))
3936 fprintf (vect_dump, "bad inner-loop form.");
3937 return NULL;
3940 return loop_vinfo;
3944 /* Function vect_analyze_loop_form.
3946 Verify that certain CFG restrictions hold, including:
3947 - the loop has a pre-header
3948 - the loop has a single entry and exit
3949 - the loop exit condition is simple enough, and the number of iterations
3950 can be analyzed (a countable loop). */
3952 static loop_vec_info
3953 vect_analyze_loop_form (struct loop *loop)
3955 loop_vec_info loop_vinfo;
3956 tree loop_cond;
3957 tree number_of_iterations = NULL;
3958 loop_vec_info inner_loop_vinfo = NULL;
3960 if (vect_print_dump_info (REPORT_DETAILS))
3961 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
3963 /* Different restrictions apply when we are considering an inner-most loop,
3964 vs. an outer (nested) loop.
3965 (FORNOW. May want to relax some of these restrictions in the future). */
3967 if (!loop->inner)
3969 /* Inner-most loop. We currently require that the number of BBs is
3970 exactly 2 (the header and latch). Vectorizable inner-most loops
3971 look like this:
3973 (pre-header)
3975 header <--------+
3976 | | |
3977 | +--> latch --+
3979 (exit-bb) */
3981 if (loop->num_nodes != 2)
3983 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3984 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
3985 return NULL;
3988 if (empty_block_p (loop->header))
3990 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3991 fprintf (vect_dump, "not vectorized: empty loop.");
3992 return NULL;
3995 else
3997 struct loop *innerloop = loop->inner;
3998 edge backedge, entryedge;
4000 /* Nested loop. We currently require that the loop is doubly-nested,
4001 contains a single inner loop, and the number of BBs is exactly 5.
4002 Vectorizable outer-loops look like this:
4004 (pre-header)
4006 header <---+
4008 inner-loop |
4010 tail ------+
4012 (exit-bb)
4014 The inner-loop has the properties expected of inner-most loops
4015 as described above. */
4017 if ((loop->inner)->inner || (loop->inner)->next)
4019 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4020 fprintf (vect_dump, "not vectorized: multiple nested loops.");
4021 return NULL;
4024 /* Analyze the inner-loop. */
4025 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
4026 if (!inner_loop_vinfo)
4028 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4029 fprintf (vect_dump, "not vectorized: Bad inner loop.");
4030 return NULL;
4033 if (!expr_invariant_in_loop_p (loop,
4034 LOOP_VINFO_NITERS (inner_loop_vinfo)))
4036 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4037 fprintf (vect_dump,
4038 "not vectorized: inner-loop count not invariant.");
4039 destroy_loop_vec_info (inner_loop_vinfo, true);
4040 return NULL;
4043 if (loop->num_nodes != 5)
4045 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4046 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4047 destroy_loop_vec_info (inner_loop_vinfo, true);
4048 return NULL;
4051 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
4052 backedge = EDGE_PRED (innerloop->header, 1);
4053 entryedge = EDGE_PRED (innerloop->header, 0);
4054 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
4056 backedge = EDGE_PRED (innerloop->header, 0);
4057 entryedge = EDGE_PRED (innerloop->header, 1);
4060 if (entryedge->src != loop->header
4061 || !single_exit (innerloop)
4062 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
4064 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4065 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4066 destroy_loop_vec_info (inner_loop_vinfo, true);
4067 return NULL;
4070 if (vect_print_dump_info (REPORT_DETAILS))
4071 fprintf (vect_dump, "Considering outer-loop vectorization.");
4074 if (!single_exit (loop)
4075 || EDGE_COUNT (loop->header->preds) != 2)
4077 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4079 if (!single_exit (loop))
4080 fprintf (vect_dump, "not vectorized: multiple exits.");
4081 else if (EDGE_COUNT (loop->header->preds) != 2)
4082 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4084 if (inner_loop_vinfo)
4085 destroy_loop_vec_info (inner_loop_vinfo, true);
4086 return NULL;
4089 /* We assume that the loop exit condition is at the end of the loop. i.e,
4090 that the loop is represented as a do-while (with a proper if-guard
4091 before the loop if needed), where the loop header contains all the
4092 executable statements, and the latch is empty. */
4093 if (!empty_block_p (loop->latch)
4094 || phi_nodes (loop->latch))
4096 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4097 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4098 if (inner_loop_vinfo)
4099 destroy_loop_vec_info (inner_loop_vinfo, true);
4100 return NULL;
4103 /* Make sure there exists a single-predecessor exit bb: */
4104 if (!single_pred_p (single_exit (loop)->dest))
4106 edge e = single_exit (loop);
4107 if (!(e->flags & EDGE_ABNORMAL))
4109 split_loop_exit_edge (e);
4110 if (vect_print_dump_info (REPORT_DETAILS))
4111 fprintf (vect_dump, "split exit edge.");
4113 else
4115 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4116 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4117 if (inner_loop_vinfo)
4118 destroy_loop_vec_info (inner_loop_vinfo, true);
4119 return NULL;
4123 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4124 if (!loop_cond)
4126 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4127 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4128 if (inner_loop_vinfo)
4129 destroy_loop_vec_info (inner_loop_vinfo, true);
4130 return NULL;
4133 if (!number_of_iterations)
4135 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4136 fprintf (vect_dump,
4137 "not vectorized: number of iterations cannot be computed.");
4138 if (inner_loop_vinfo)
4139 destroy_loop_vec_info (inner_loop_vinfo, true);
4140 return NULL;
4143 if (chrec_contains_undetermined (number_of_iterations))
4145 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4146 fprintf (vect_dump, "Infinite number of iterations.");
4147 if (inner_loop_vinfo)
4148 destroy_loop_vec_info (inner_loop_vinfo, true);
4149 return NULL;
4152 if (!NITERS_KNOWN_P (number_of_iterations))
4154 if (vect_print_dump_info (REPORT_DETAILS))
4156 fprintf (vect_dump, "Symbolic number of iterations is ");
4157 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4160 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4162 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4163 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4164 if (inner_loop_vinfo)
4165 destroy_loop_vec_info (inner_loop_vinfo, false);
4166 return NULL;
4169 loop_vinfo = new_loop_vec_info (loop);
4170 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4172 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4174 /* CHECKME: May want to keep it around it in the future. */
4175 if (inner_loop_vinfo)
4176 destroy_loop_vec_info (inner_loop_vinfo, false);
4178 gcc_assert (!loop->aux);
4179 loop->aux = loop_vinfo;
4180 return loop_vinfo;
4184 /* Function vect_analyze_loop.
4186 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4187 for it. The different analyses will record information in the
4188 loop_vec_info struct. */
4189 loop_vec_info
4190 vect_analyze_loop (struct loop *loop)
4192 bool ok;
4193 loop_vec_info loop_vinfo;
4195 if (vect_print_dump_info (REPORT_DETAILS))
4196 fprintf (vect_dump, "===== analyze_loop_nest =====");
4198 if (loop_outer (loop)
4199 && loop_vec_info_for_loop (loop_outer (loop))
4200 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4202 if (vect_print_dump_info (REPORT_DETAILS))
4203 fprintf (vect_dump, "outer-loop already vectorized.");
4204 return NULL;
4207 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4209 loop_vinfo = vect_analyze_loop_form (loop);
4210 if (!loop_vinfo)
4212 if (vect_print_dump_info (REPORT_DETAILS))
4213 fprintf (vect_dump, "bad loop form.");
4214 return NULL;
4217 /* Find all data references in the loop (which correspond to vdefs/vuses)
4218 and analyze their evolution in the loop.
4220 FORNOW: Handle only simple, array references, which
4221 alignment can be forced, and aligned pointer-references. */
4223 ok = vect_analyze_data_refs (loop_vinfo);
4224 if (!ok)
4226 if (vect_print_dump_info (REPORT_DETAILS))
4227 fprintf (vect_dump, "bad data references.");
4228 destroy_loop_vec_info (loop_vinfo, true);
4229 return NULL;
4232 /* Classify all cross-iteration scalar data-flow cycles.
4233 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4235 vect_analyze_scalar_cycles (loop_vinfo);
4237 vect_pattern_recog (loop_vinfo);
4239 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4241 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4242 if (!ok)
4244 if (vect_print_dump_info (REPORT_DETAILS))
4245 fprintf (vect_dump, "unexpected pattern.");
4246 destroy_loop_vec_info (loop_vinfo, true);
4247 return NULL;
4250 /* Analyze the alignment of the data-refs in the loop.
4251 Fail if a data reference is found that cannot be vectorized. */
4253 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4254 if (!ok)
4256 if (vect_print_dump_info (REPORT_DETAILS))
4257 fprintf (vect_dump, "bad data alignment.");
4258 destroy_loop_vec_info (loop_vinfo, true);
4259 return NULL;
4262 ok = vect_determine_vectorization_factor (loop_vinfo);
4263 if (!ok)
4265 if (vect_print_dump_info (REPORT_DETAILS))
4266 fprintf (vect_dump, "can't determine vectorization factor.");
4267 destroy_loop_vec_info (loop_vinfo, true);
4268 return NULL;
4271 /* Analyze data dependences between the data-refs in the loop.
4272 FORNOW: fail at the first data dependence that we encounter. */
4274 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4275 if (!ok)
4277 if (vect_print_dump_info (REPORT_DETAILS))
4278 fprintf (vect_dump, "bad data dependence.");
4279 destroy_loop_vec_info (loop_vinfo, true);
4280 return NULL;
4283 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4284 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4286 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4287 if (!ok)
4289 if (vect_print_dump_info (REPORT_DETAILS))
4290 fprintf (vect_dump, "bad data access.");
4291 destroy_loop_vec_info (loop_vinfo, true);
4292 return NULL;
4295 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4296 It is important to call pruning after vect_analyze_data_ref_accesses,
4297 since we use grouping information gathered by interleaving analysis. */
4298 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
4299 if (!ok)
4301 if (vect_print_dump_info (REPORT_DETAILS))
4302 fprintf (vect_dump, "too long list of versioning for alias "
4303 "run-time tests.");
4304 destroy_loop_vec_info (loop_vinfo, true);
4305 return NULL;
4308 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4309 ok = vect_analyze_slp (loop_vinfo);
4310 if (ok)
4312 /* Decide which possible SLP instances to SLP. */
4313 vect_make_slp_decision (loop_vinfo);
4315 /* Find stmts that need to be both vectorized and SLPed. */
4316 vect_detect_hybrid_slp (loop_vinfo);
4319 /* This pass will decide on using loop versioning and/or loop peeling in
4320 order to enhance the alignment of data references in the loop. */
4322 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4323 if (!ok)
4325 if (vect_print_dump_info (REPORT_DETAILS))
4326 fprintf (vect_dump, "bad data alignment.");
4327 destroy_loop_vec_info (loop_vinfo, true);
4328 return NULL;
4331 /* Scan all the operations in the loop and make sure they are
4332 vectorizable. */
4334 ok = vect_analyze_operations (loop_vinfo);
4335 if (!ok)
4337 if (vect_print_dump_info (REPORT_DETAILS))
4338 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4339 destroy_loop_vec_info (loop_vinfo, true);
4340 return NULL;
4343 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4345 return loop_vinfo;