rsha Jagasia <harsha.jagasia@amd.com>
[official-gcc.git] / gcc / tree-vect-analyze.c
blob557bf2fb47046bee375b2f573e16210915ac5518
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41 #include "toplev.h"
42 #include "recog.h"
44 /* Main analysis functions. */
45 static loop_vec_info vect_analyze_loop_form (struct loop *);
46 static bool vect_analyze_data_refs (loop_vec_info);
47 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
48 static void vect_analyze_scalar_cycles (loop_vec_info);
49 static bool vect_analyze_data_ref_accesses (loop_vec_info);
50 static bool vect_analyze_data_ref_dependences (loop_vec_info);
51 static bool vect_analyze_data_refs_alignment (loop_vec_info);
52 static bool vect_compute_data_refs_alignment (loop_vec_info);
53 static bool vect_enhance_data_refs_alignment (loop_vec_info);
54 static bool vect_analyze_operations (loop_vec_info);
55 static bool vect_determine_vectorization_factor (loop_vec_info);
57 /* Utility functions for the analyses. */
58 static bool exist_non_indexing_operands_for_use_p (tree, tree);
59 static tree vect_get_loop_niters (struct loop *, tree *);
60 static bool vect_analyze_data_ref_dependence
61 (struct data_dependence_relation *, loop_vec_info);
62 static bool vect_compute_data_ref_alignment (struct data_reference *);
63 static bool vect_analyze_data_ref_access (struct data_reference *);
64 static bool vect_can_advance_ivs_p (loop_vec_info);
65 static void vect_update_misalignment_for_peel
66 (struct data_reference *, struct data_reference *, int npeel);
68 /* Function vect_determine_vectorization_factor
70 Determine the vectorization factor (VF). VF is the number of data elements
71 that are operated upon in parallel in a single iteration of the vectorized
72 loop. For example, when vectorizing a loop that operates on 4byte elements,
73 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
74 elements can fit in a single vector register.
76 We currently support vectorization of loops in which all types operated upon
77 are of the same size. Therefore this function currently sets VF according to
78 the size of the types operated upon, and fails if there are multiple sizes
79 in the loop.
81 VF is also the factor by which the loop iterations are strip-mined, e.g.:
82 original loop:
83 for (i=0; i<N; i++){
84 a[i] = b[i] + c[i];
87 vectorized loop:
88 for (i=0; i<N; i+=VF){
89 a[i:VF] = b[i:VF] + c[i:VF];
93 static bool
94 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
96 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
97 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
98 int nbbs = loop->num_nodes;
99 block_stmt_iterator si;
100 unsigned int vectorization_factor = 0;
101 tree scalar_type;
102 tree phi;
103 tree vectype;
104 unsigned int nunits;
105 stmt_vec_info stmt_info;
106 int i;
108 if (vect_print_dump_info (REPORT_DETAILS))
109 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
111 for (i = 0; i < nbbs; i++)
113 basic_block bb = bbs[i];
115 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
117 stmt_info = vinfo_for_stmt (phi);
118 if (vect_print_dump_info (REPORT_DETAILS))
120 fprintf (vect_dump, "==> examining phi: ");
121 print_generic_expr (vect_dump, phi, TDF_SLIM);
124 gcc_assert (stmt_info);
126 if (STMT_VINFO_RELEVANT_P (stmt_info))
128 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
129 scalar_type = TREE_TYPE (PHI_RESULT (phi));
131 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "get vectype for scalar type: ");
134 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
137 vectype = get_vectype_for_scalar_type (scalar_type);
138 if (!vectype)
140 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
142 fprintf (vect_dump,
143 "not vectorized: unsupported data-type ");
144 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
146 return false;
148 STMT_VINFO_VECTYPE (stmt_info) = vectype;
150 if (vect_print_dump_info (REPORT_DETAILS))
152 fprintf (vect_dump, "vectype: ");
153 print_generic_expr (vect_dump, vectype, TDF_SLIM);
156 nunits = TYPE_VECTOR_SUBPARTS (vectype);
157 if (vect_print_dump_info (REPORT_DETAILS))
158 fprintf (vect_dump, "nunits = %d", nunits);
160 if (!vectorization_factor
161 || (nunits > vectorization_factor))
162 vectorization_factor = nunits;
166 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
168 tree stmt = bsi_stmt (si);
169 stmt_info = vinfo_for_stmt (stmt);
171 if (vect_print_dump_info (REPORT_DETAILS))
173 fprintf (vect_dump, "==> examining statement: ");
174 print_generic_expr (vect_dump, stmt, TDF_SLIM);
177 gcc_assert (stmt_info);
179 /* skip stmts which do not need to be vectorized. */
180 if (!STMT_VINFO_RELEVANT_P (stmt_info)
181 && !STMT_VINFO_LIVE_P (stmt_info))
183 if (vect_print_dump_info (REPORT_DETAILS))
184 fprintf (vect_dump, "skip.");
185 continue;
188 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
190 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
192 fprintf (vect_dump, "not vectorized: irregular stmt.");
193 print_generic_expr (vect_dump, stmt, TDF_SLIM);
195 return false;
198 if (!GIMPLE_STMT_P (stmt)
199 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
201 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
203 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
204 print_generic_expr (vect_dump, stmt, TDF_SLIM);
206 return false;
209 if (STMT_VINFO_VECTYPE (stmt_info))
211 /* The only case when a vectype had been already set is for stmts
212 that contain a dataref, or for "pattern-stmts" (stmts generated
213 by the vectorizer to represent/replace a certain idiom). */
214 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
215 || is_pattern_stmt_p (stmt_info));
216 vectype = STMT_VINFO_VECTYPE (stmt_info);
218 else
220 tree operation;
222 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
223 && !is_pattern_stmt_p (stmt_info));
225 /* We generally set the vectype according to the type of the
226 result (lhs).
227 For stmts whose result-type is different than the type of the
228 arguments (e.g. demotion, promotion), vectype will be reset
229 appropriately (later). Note that we have to visit the smallest
230 datatype in this function, because that determines the VF.
231 If the smallest datatype in the loop is present only as the
232 rhs of a promotion operation - we'd miss it here.
233 Such a case, where a variable of this datatype does not appear
234 in the lhs anywhere in the loop, can only occur if it's an
235 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
236 to have been optimized away by invariant motion. However, we
237 cannot rely on invariant motion to always take invariants out
238 of the loop, and so in the case of promotion we also have to
239 check the rhs. */
240 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
242 operation = GIMPLE_STMT_OPERAND (stmt, 1);
243 if (TREE_CODE (operation) == NOP_EXPR
244 || TREE_CODE (operation) == CONVERT_EXPR
245 || TREE_CODE (operation) == WIDEN_MULT_EXPR)
247 tree rhs_type = TREE_TYPE (TREE_OPERAND (operation, 0));
248 if (TYPE_SIZE_UNIT (rhs_type) < TYPE_SIZE_UNIT (scalar_type))
249 scalar_type = TREE_TYPE (TREE_OPERAND (operation, 0));
252 if (vect_print_dump_info (REPORT_DETAILS))
254 fprintf (vect_dump, "get vectype for scalar type: ");
255 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
258 vectype = get_vectype_for_scalar_type (scalar_type);
259 if (!vectype)
261 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
263 fprintf (vect_dump,
264 "not vectorized: unsupported data-type ");
265 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
267 return false;
269 STMT_VINFO_VECTYPE (stmt_info) = vectype;
272 if (vect_print_dump_info (REPORT_DETAILS))
274 fprintf (vect_dump, "vectype: ");
275 print_generic_expr (vect_dump, vectype, TDF_SLIM);
278 nunits = TYPE_VECTOR_SUBPARTS (vectype);
279 if (vect_print_dump_info (REPORT_DETAILS))
280 fprintf (vect_dump, "nunits = %d", nunits);
282 if (!vectorization_factor
283 || (nunits > vectorization_factor))
284 vectorization_factor = nunits;
289 /* TODO: Analyze cost. Decide if worth while to vectorize. */
290 if (vect_print_dump_info (REPORT_DETAILS))
291 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
292 if (vectorization_factor <= 1)
294 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
295 fprintf (vect_dump, "not vectorized: unsupported data-type");
296 return false;
298 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
300 return true;
304 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
305 the number of created vector stmts depends on the unrolling factor). However,
306 the actual number of vector stmts for every SLP node depends on VF which is
307 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
308 In this function we assume that the inside costs calculated in
309 vect_model_xxx_cost are linear in ncopies. */
311 static void
312 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
314 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
315 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
316 slp_instance instance;
318 if (vect_print_dump_info (REPORT_SLP))
319 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
321 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
322 /* We assume that costs are linear in ncopies. */
323 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
324 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
328 /* Function vect_analyze_operations.
330 Scan the loop stmts and make sure they are all vectorizable. */
332 static bool
333 vect_analyze_operations (loop_vec_info loop_vinfo)
335 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
336 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
337 int nbbs = loop->num_nodes;
338 block_stmt_iterator si;
339 unsigned int vectorization_factor = 0;
340 int i;
341 bool ok;
342 tree phi;
343 stmt_vec_info stmt_info;
344 bool need_to_vectorize = false;
345 int min_profitable_iters;
346 int min_scalar_loop_bound;
347 unsigned int th;
348 bool only_slp_in_loop = true;
350 if (vect_print_dump_info (REPORT_DETAILS))
351 fprintf (vect_dump, "=== vect_analyze_operations ===");
353 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
354 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
356 for (i = 0; i < nbbs; i++)
358 basic_block bb = bbs[i];
360 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
362 ok = true;
364 stmt_info = vinfo_for_stmt (phi);
365 if (vect_print_dump_info (REPORT_DETAILS))
367 fprintf (vect_dump, "examining phi: ");
368 print_generic_expr (vect_dump, phi, TDF_SLIM);
371 if (! is_loop_header_bb_p (bb))
373 /* inner-loop loop-closed exit phi in outer-loop vectorization
374 (i.e. a phi in the tail of the outer-loop).
375 FORNOW: we currently don't support the case that these phis
376 are not used in the outerloop, cause this case requires
377 to actually do something here. */
378 if (!STMT_VINFO_RELEVANT_P (stmt_info)
379 || STMT_VINFO_LIVE_P (stmt_info))
381 if (vect_print_dump_info (REPORT_DETAILS))
382 fprintf (vect_dump,
383 "Unsupported loop-closed phi in outer-loop.");
384 return false;
386 continue;
389 gcc_assert (stmt_info);
391 if (STMT_VINFO_LIVE_P (stmt_info))
393 /* FORNOW: not yet supported. */
394 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
395 fprintf (vect_dump, "not vectorized: value used after loop.");
396 return false;
399 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
400 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
402 /* A scalar-dependence cycle that we don't support. */
403 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
404 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
405 return false;
408 if (STMT_VINFO_RELEVANT_P (stmt_info))
410 need_to_vectorize = true;
411 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
412 ok = vectorizable_induction (phi, NULL, NULL);
415 if (!ok)
417 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
419 fprintf (vect_dump,
420 "not vectorized: relevant phi not supported: ");
421 print_generic_expr (vect_dump, phi, TDF_SLIM);
423 return false;
427 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
429 tree stmt = bsi_stmt (si);
430 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
431 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
433 if (vect_print_dump_info (REPORT_DETAILS))
435 fprintf (vect_dump, "==> examining statement: ");
436 print_generic_expr (vect_dump, stmt, TDF_SLIM);
439 gcc_assert (stmt_info);
441 /* skip stmts which do not need to be vectorized.
442 this is expected to include:
443 - the COND_EXPR which is the loop exit condition
444 - any LABEL_EXPRs in the loop
445 - computations that are used only for array indexing or loop
446 control */
448 if (!STMT_VINFO_RELEVANT_P (stmt_info)
449 && !STMT_VINFO_LIVE_P (stmt_info))
451 if (vect_print_dump_info (REPORT_DETAILS))
452 fprintf (vect_dump, "irrelevant.");
453 continue;
456 switch (STMT_VINFO_DEF_TYPE (stmt_info))
458 case vect_loop_def:
459 break;
461 case vect_reduction_def:
462 gcc_assert (relevance == vect_used_in_outer
463 || relevance == vect_used_in_outer_by_reduction
464 || relevance == vect_unused_in_loop);
465 break;
467 case vect_induction_def:
468 case vect_constant_def:
469 case vect_invariant_def:
470 case vect_unknown_def_type:
471 default:
472 gcc_unreachable ();
475 if (STMT_VINFO_RELEVANT_P (stmt_info))
477 gcc_assert (GIMPLE_STMT_P (stmt)
478 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
479 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
480 need_to_vectorize = true;
483 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
484 || vectorizable_type_demotion (stmt, NULL, NULL)
485 || vectorizable_conversion (stmt, NULL, NULL, NULL)
486 || vectorizable_operation (stmt, NULL, NULL, NULL)
487 || vectorizable_assignment (stmt, NULL, NULL, NULL)
488 || vectorizable_load (stmt, NULL, NULL, NULL)
489 || vectorizable_call (stmt, NULL, NULL)
490 || vectorizable_store (stmt, NULL, NULL, NULL)
491 || vectorizable_condition (stmt, NULL, NULL)
492 || vectorizable_reduction (stmt, NULL, NULL));
494 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
495 need extra handling, except for vectorizable reductions. */
496 if (STMT_VINFO_LIVE_P (stmt_info)
497 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
498 ok |= vectorizable_live_operation (stmt, NULL, NULL);
500 if (!ok)
502 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
504 fprintf (vect_dump, "not vectorized: stmt not supported: ");
505 print_generic_expr (vect_dump, stmt, TDF_SLIM);
507 return false;
510 if (!PURE_SLP_STMT (stmt_info))
512 /* STMT needs loop-based vectorization. */
513 only_slp_in_loop = false;
515 /* Groups of strided accesses whose size is not a power of 2 are
516 not vectorizable yet using loop-vectorization. Therefore, if
517 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
518 both SLPed and loop-based vectorzed), the loop cannot be
519 vectorized. */
520 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
521 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
522 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
524 if (vect_print_dump_info (REPORT_DETAILS))
526 fprintf (vect_dump, "not vectorized: the size of group "
527 "of strided accesses is not a power of 2");
528 print_generic_expr (vect_dump, stmt, TDF_SLIM);
530 return false;
533 } /* stmts in bb */
534 } /* bbs */
536 /* All operations in the loop are either irrelevant (deal with loop
537 control, or dead), or only used outside the loop and can be moved
538 out of the loop (e.g. invariants, inductions). The loop can be
539 optimized away by scalar optimizations. We're better off not
540 touching this loop. */
541 if (!need_to_vectorize)
543 if (vect_print_dump_info (REPORT_DETAILS))
544 fprintf (vect_dump,
545 "All the computation can be taken out of the loop.");
546 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
547 fprintf (vect_dump,
548 "not vectorized: redundant loop. no profit to vectorize.");
549 return false;
552 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
553 vectorization factor of the loop is the unrolling factor required by the
554 SLP instances. If that unrolling factor is 1, we say, that we perform
555 pure SLP on loop - cross iteration parallelism is not exploited. */
556 if (only_slp_in_loop)
557 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
558 else
559 vectorization_factor = least_common_multiple (vectorization_factor,
560 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
562 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
564 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
565 && vect_print_dump_info (REPORT_DETAILS))
566 fprintf (vect_dump,
567 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
568 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
570 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
571 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
573 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
574 fprintf (vect_dump, "not vectorized: iteration count too small.");
575 if (vect_print_dump_info (REPORT_DETAILS))
576 fprintf (vect_dump,"not vectorized: iteration count smaller than "
577 "vectorization factor.");
578 return false;
581 /* Analyze cost. Decide if worth while to vectorize. */
583 /* Once VF is set, SLP costs should be updated since the number of created
584 vector stmts depends on VF. */
585 vect_update_slp_costs_according_to_vf (loop_vinfo);
587 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
588 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
589 if (min_profitable_iters < 0)
591 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
592 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
593 if (vect_print_dump_info (REPORT_DETAILS))
594 fprintf (vect_dump, "not vectorized: vector version will never be "
595 "profitable.");
596 return false;
599 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
600 * vectorization_factor) - 1);
602 /* Use the cost model only if it is more conservative than user specified
603 threshold. */
605 th = (unsigned) min_scalar_loop_bound;
606 if (min_profitable_iters
607 && (!min_scalar_loop_bound
608 || min_profitable_iters > min_scalar_loop_bound))
609 th = (unsigned) min_profitable_iters;
611 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
612 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
614 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
615 fprintf (vect_dump, "not vectorized: vectorization not "
616 "profitable.");
617 if (vect_print_dump_info (REPORT_DETAILS))
618 fprintf (vect_dump, "not vectorized: iteration count smaller than "
619 "user specified loop bound parameter or minimum "
620 "profitable iterations (whichever is more conservative).");
621 return false;
624 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
625 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
626 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
628 if (vect_print_dump_info (REPORT_DETAILS))
629 fprintf (vect_dump, "epilog loop required.");
630 if (!vect_can_advance_ivs_p (loop_vinfo))
632 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
633 fprintf (vect_dump,
634 "not vectorized: can't create epilog loop 1.");
635 return false;
637 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
639 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
640 fprintf (vect_dump,
641 "not vectorized: can't create epilog loop 2.");
642 return false;
646 return true;
650 /* Function exist_non_indexing_operands_for_use_p
652 USE is one of the uses attached to STMT. Check if USE is
653 used in STMT for anything other than indexing an array. */
655 static bool
656 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
658 tree operand;
659 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
661 /* USE corresponds to some operand in STMT. If there is no data
662 reference in STMT, then any operand that corresponds to USE
663 is not indexing an array. */
664 if (!STMT_VINFO_DATA_REF (stmt_info))
665 return true;
667 /* STMT has a data_ref. FORNOW this means that its of one of
668 the following forms:
669 -1- ARRAY_REF = var
670 -2- var = ARRAY_REF
671 (This should have been verified in analyze_data_refs).
673 'var' in the second case corresponds to a def, not a use,
674 so USE cannot correspond to any operands that are not used
675 for array indexing.
677 Therefore, all we need to check is if STMT falls into the
678 first case, and whether var corresponds to USE. */
680 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
681 return false;
683 operand = GIMPLE_STMT_OPERAND (stmt, 1);
685 if (TREE_CODE (operand) != SSA_NAME)
686 return false;
688 if (operand == use)
689 return true;
691 return false;
695 /* Function vect_analyze_scalar_cycles_1.
697 Examine the cross iteration def-use cycles of scalar variables
698 in LOOP. LOOP_VINFO represents the loop that is noe being
699 considered for vectorization (can be LOOP, or an outer-loop
700 enclosing LOOP). */
702 static void
703 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
705 tree phi;
706 basic_block bb = loop->header;
707 tree dumy;
708 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
710 if (vect_print_dump_info (REPORT_DETAILS))
711 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
713 /* First - identify all inductions. */
714 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
716 tree access_fn = NULL;
717 tree def = PHI_RESULT (phi);
718 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
720 if (vect_print_dump_info (REPORT_DETAILS))
722 fprintf (vect_dump, "Analyze phi: ");
723 print_generic_expr (vect_dump, phi, TDF_SLIM);
726 /* Skip virtual phi's. The data dependences that are associated with
727 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
728 if (!is_gimple_reg (SSA_NAME_VAR (def)))
729 continue;
731 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
733 /* Analyze the evolution function. */
734 access_fn = analyze_scalar_evolution (loop, def);
735 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
737 fprintf (vect_dump, "Access function of PHI: ");
738 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
741 if (!access_fn
742 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
744 VEC_safe_push (tree, heap, worklist, phi);
745 continue;
748 if (vect_print_dump_info (REPORT_DETAILS))
749 fprintf (vect_dump, "Detected induction.");
750 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
754 /* Second - identify all reductions. */
755 while (VEC_length (tree, worklist) > 0)
757 tree phi = VEC_pop (tree, worklist);
758 tree def = PHI_RESULT (phi);
759 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
760 tree reduc_stmt;
762 if (vect_print_dump_info (REPORT_DETAILS))
764 fprintf (vect_dump, "Analyze phi: ");
765 print_generic_expr (vect_dump, phi, TDF_SLIM);
768 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
769 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
771 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
772 if (reduc_stmt)
774 if (vect_print_dump_info (REPORT_DETAILS))
775 fprintf (vect_dump, "Detected reduction.");
776 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
777 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
778 vect_reduction_def;
780 else
781 if (vect_print_dump_info (REPORT_DETAILS))
782 fprintf (vect_dump, "Unknown def-use cycle pattern.");
785 VEC_free (tree, heap, worklist);
786 return;
790 /* Function vect_analyze_scalar_cycles.
792 Examine the cross iteration def-use cycles of scalar variables, by
793 analyzing the loop-header PHIs of scalar variables; Classify each
794 cycle as one of the following: invariant, induction, reduction, unknown.
795 We do that for the loop represented by LOOP_VINFO, and also to its
796 inner-loop, if exists.
797 Examples for scalar cycles:
799 Example1: reduction:
801 loop1:
802 for (i=0; i<N; i++)
803 sum += a[i];
805 Example2: induction:
807 loop2:
808 for (i=0; i<N; i++)
809 a[i] = i; */
811 static void
812 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
814 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
816 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
818 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
819 Reductions in such inner-loop therefore have different properties than
820 the reductions in the nest that gets vectorized:
821 1. When vectorized, they are executed in the same order as in the original
822 scalar loop, so we can't change the order of computation when
823 vectorizing them.
824 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
825 current checks are too strict. */
827 if (loop->inner)
828 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
832 /* Function vect_insert_into_interleaving_chain.
834 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
836 static void
837 vect_insert_into_interleaving_chain (struct data_reference *dra,
838 struct data_reference *drb)
840 tree prev, next, next_init;
841 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
842 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
844 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
845 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
846 while (next)
848 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
849 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
851 /* Insert here. */
852 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
853 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
854 return;
856 prev = next;
857 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
860 /* We got to the end of the list. Insert here. */
861 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
862 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
866 /* Function vect_update_interleaving_chain.
868 For two data-refs DRA and DRB that are a part of a chain interleaved data
869 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
871 There are four possible cases:
872 1. New stmts - both DRA and DRB are not a part of any chain:
873 FIRST_DR = DRB
874 NEXT_DR (DRB) = DRA
875 2. DRB is a part of a chain and DRA is not:
876 no need to update FIRST_DR
877 no need to insert DRB
878 insert DRA according to init
879 3. DRA is a part of a chain and DRB is not:
880 if (init of FIRST_DR > init of DRB)
881 FIRST_DR = DRB
882 NEXT(FIRST_DR) = previous FIRST_DR
883 else
884 insert DRB according to its init
885 4. both DRA and DRB are in some interleaving chains:
886 choose the chain with the smallest init of FIRST_DR
887 insert the nodes of the second chain into the first one. */
889 static void
890 vect_update_interleaving_chain (struct data_reference *drb,
891 struct data_reference *dra)
893 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
894 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
895 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
896 tree node, prev, next, node_init, first_stmt;
898 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
899 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
901 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
902 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
903 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
904 return;
907 /* 2. DRB is a part of a chain and DRA is not. */
908 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
910 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
911 /* Insert DRA into the chain of DRB. */
912 vect_insert_into_interleaving_chain (dra, drb);
913 return;
916 /* 3. DRA is a part of a chain and DRB is not. */
917 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
919 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
920 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
921 old_first_stmt)));
922 tree tmp;
924 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
926 /* DRB's init is smaller than the init of the stmt previously marked
927 as the first stmt of the interleaving chain of DRA. Therefore, we
928 update FIRST_STMT and put DRB in the head of the list. */
929 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
930 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
932 /* Update all the stmts in the list to point to the new FIRST_STMT. */
933 tmp = old_first_stmt;
934 while (tmp)
936 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
937 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
940 else
942 /* Insert DRB in the list of DRA. */
943 vect_insert_into_interleaving_chain (drb, dra);
944 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
946 return;
949 /* 4. both DRA and DRB are in some interleaving chains. */
950 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
951 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
952 if (first_a == first_b)
953 return;
954 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
955 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
957 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
959 /* Insert the nodes of DRA chain into the DRB chain.
960 After inserting a node, continue from this node of the DRB chain (don't
961 start from the beginning. */
962 node = DR_GROUP_FIRST_DR (stmtinfo_a);
963 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
964 first_stmt = first_b;
966 else
968 /* Insert the nodes of DRB chain into the DRA chain.
969 After inserting a node, continue from this node of the DRA chain (don't
970 start from the beginning. */
971 node = DR_GROUP_FIRST_DR (stmtinfo_b);
972 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
973 first_stmt = first_a;
976 while (node)
978 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
979 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
980 while (next)
982 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
983 if (tree_int_cst_compare (next_init, node_init) > 0)
985 /* Insert here. */
986 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
987 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
988 prev = node;
989 break;
991 prev = next;
992 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
994 if (!next)
996 /* We got to the end of the list. Insert here. */
997 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
998 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
999 prev = node;
1001 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
1002 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1007 /* Function vect_equal_offsets.
1009 Check if OFFSET1 and OFFSET2 are identical expressions. */
1011 static bool
1012 vect_equal_offsets (tree offset1, tree offset2)
1014 bool res0, res1;
1016 STRIP_NOPS (offset1);
1017 STRIP_NOPS (offset2);
1019 if (offset1 == offset2)
1020 return true;
1022 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1023 || !BINARY_CLASS_P (offset1)
1024 || !BINARY_CLASS_P (offset2))
1025 return false;
1027 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1028 TREE_OPERAND (offset2, 0));
1029 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1030 TREE_OPERAND (offset2, 1));
1032 return (res0 && res1);
1036 /* Function vect_check_interleaving.
1038 Check if DRA and DRB are a part of interleaving. In case they are, insert
1039 DRA and DRB in an interleaving chain. */
1041 static void
1042 vect_check_interleaving (struct data_reference *dra,
1043 struct data_reference *drb)
1045 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1047 /* Check that the data-refs have same first location (except init) and they
1048 are both either store or load (not load and store). */
1049 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1050 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1051 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1052 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1053 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1054 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1055 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1056 || DR_IS_READ (dra) != DR_IS_READ (drb))
1057 return;
1059 /* Check:
1060 1. data-refs are of the same type
1061 2. their steps are equal
1062 3. the step is greater than the difference between data-refs' inits */
1063 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1064 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1066 if (type_size_a != type_size_b
1067 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
1068 return;
1070 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1071 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1072 step = TREE_INT_CST_LOW (DR_STEP (dra));
1074 if (init_a > init_b)
1076 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1077 and DRB is accessed before DRA. */
1078 diff_mod_size = (init_a - init_b) % type_size_a;
1080 if ((init_a - init_b) > step)
1081 return;
1083 if (diff_mod_size == 0)
1085 vect_update_interleaving_chain (drb, dra);
1086 if (vect_print_dump_info (REPORT_DR_DETAILS))
1088 fprintf (vect_dump, "Detected interleaving ");
1089 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1090 fprintf (vect_dump, " and ");
1091 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1093 return;
1096 else
1098 /* If init_b == init_a + the size of the type * k, we have an
1099 interleaving, and DRA is accessed before DRB. */
1100 diff_mod_size = (init_b - init_a) % type_size_a;
1102 if ((init_b - init_a) > step)
1103 return;
1105 if (diff_mod_size == 0)
1107 vect_update_interleaving_chain (dra, drb);
1108 if (vect_print_dump_info (REPORT_DR_DETAILS))
1110 fprintf (vect_dump, "Detected interleaving ");
1111 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1112 fprintf (vect_dump, " and ");
1113 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1115 return;
1121 /* Function vect_analyze_data_ref_dependence.
1123 Return TRUE if there (might) exist a dependence between a memory-reference
1124 DRA and a memory-reference DRB. */
1126 static bool
1127 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1128 loop_vec_info loop_vinfo)
1130 unsigned int i;
1131 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1132 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1133 struct data_reference *dra = DDR_A (ddr);
1134 struct data_reference *drb = DDR_B (ddr);
1135 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1136 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1137 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1138 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1139 lambda_vector dist_v;
1140 unsigned int loop_depth;
1142 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1144 /* Independent data accesses. */
1145 vect_check_interleaving (dra, drb);
1146 return false;
1149 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1150 return false;
1152 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1154 if (vect_print_dump_info (REPORT_DR_DETAILS))
1156 fprintf (vect_dump,
1157 "versioning for alias required: can't determine dependence between ");
1158 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1159 fprintf (vect_dump, " and ");
1160 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1162 return true;
1165 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1167 if (vect_print_dump_info (REPORT_DR_DETAILS))
1169 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1170 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1171 fprintf (vect_dump, " and ");
1172 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1174 return true;
1177 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1178 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1180 int dist = dist_v[loop_depth];
1182 if (vect_print_dump_info (REPORT_DR_DETAILS))
1183 fprintf (vect_dump, "dependence distance = %d.", dist);
1185 /* Same loop iteration. */
1186 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1188 /* Two references with distance zero have the same alignment. */
1189 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1190 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1191 if (vect_print_dump_info (REPORT_ALIGNMENT))
1192 fprintf (vect_dump, "accesses have the same alignment.");
1193 if (vect_print_dump_info (REPORT_DR_DETAILS))
1195 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1196 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1197 fprintf (vect_dump, " and ");
1198 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1201 /* For interleaving, mark that there is a read-write dependency if
1202 necessary. We check before that one of the data-refs is store. */
1203 if (DR_IS_READ (dra))
1204 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1205 else
1207 if (DR_IS_READ (drb))
1208 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1211 continue;
1214 if (abs (dist) >= vectorization_factor)
1216 /* Dependence distance does not create dependence, as far as vectorization
1217 is concerned, in this case. */
1218 if (vect_print_dump_info (REPORT_DR_DETAILS))
1219 fprintf (vect_dump, "dependence distance >= VF.");
1220 continue;
1223 if (vect_print_dump_info (REPORT_DR_DETAILS))
1225 fprintf (vect_dump,
1226 "versioning for alias required: possible dependence "
1227 "between data-refs ");
1228 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1229 fprintf (vect_dump, " and ");
1230 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1233 return true;
1236 return false;
1239 /* Return TRUE if DDR_NEW is already found in MAY_ALIAS_DDRS list. */
1241 static bool
1242 vect_is_duplicate_ddr (VEC (ddr_p, heap) * may_alias_ddrs, ddr_p ddr_new)
1244 unsigned i;
1245 ddr_p ddr;
1247 for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
1249 tree dref_A_i, dref_B_i, dref_A_j, dref_B_j;
1251 dref_A_i = DR_REF (DDR_A (ddr));
1252 dref_B_i = DR_REF (DDR_B (ddr));
1253 dref_A_j = DR_REF (DDR_A (ddr_new));
1254 dref_B_j = DR_REF (DDR_B (ddr_new));
1256 if ((operand_equal_p (dref_A_i, dref_A_j, 0)
1257 && operand_equal_p (dref_B_i, dref_B_j, 0))
1258 || (operand_equal_p (dref_A_i, dref_B_j, 0)
1259 && operand_equal_p (dref_B_i, dref_A_j, 0)))
1261 if (vect_print_dump_info (REPORT_DR_DETAILS))
1263 fprintf (vect_dump, "found same pair of data references ");
1264 print_generic_expr (vect_dump, dref_A_i, TDF_SLIM);
1265 fprintf (vect_dump, " and ");
1266 print_generic_expr (vect_dump, dref_B_i, TDF_SLIM);
1268 return true;
1271 return false;
1274 /* Save DDR in LOOP_VINFO list of ddrs that may alias and need to be
1275 tested at run-time. Returns false if number of run-time checks
1276 inserted by vectorizer is greater than maximum defined by
1277 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS. */
1278 static bool
1279 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1281 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1283 if (vect_print_dump_info (REPORT_DR_DETAILS))
1285 fprintf (vect_dump, "mark for run-time aliasing test between ");
1286 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1287 fprintf (vect_dump, " and ");
1288 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1291 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1292 if (loop->inner)
1294 if (vect_print_dump_info (REPORT_DR_DETAILS))
1295 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1296 return false;
1299 /* Do not add to the list duplicate ddrs. */
1300 if (vect_is_duplicate_ddr (LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr))
1301 return true;
1303 if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))
1304 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1306 if (vect_print_dump_info (REPORT_DR_DETAILS))
1308 fprintf (vect_dump,
1309 "disable versioning for alias - max number of generated "
1310 "checks exceeded.");
1313 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1315 return false;
1317 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1318 return true;
1321 /* Function vect_analyze_data_ref_dependences.
1323 Examine all the data references in the loop, and make sure there do not
1324 exist any data dependences between them. */
1326 static bool
1327 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1329 unsigned int i;
1330 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1331 struct data_dependence_relation *ddr;
1333 if (vect_print_dump_info (REPORT_DETAILS))
1334 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1336 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1337 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1339 /* Add to list of ddrs that need to be tested at run-time. */
1340 if (!vect_mark_for_runtime_alias_test (ddr, loop_vinfo))
1341 return false;
1344 return true;
1348 /* Function vect_compute_data_ref_alignment
1350 Compute the misalignment of the data reference DR.
1352 Output:
1353 1. If during the misalignment computation it is found that the data reference
1354 cannot be vectorized then false is returned.
1355 2. DR_MISALIGNMENT (DR) is defined.
1357 FOR NOW: No analysis is actually performed. Misalignment is calculated
1358 only for trivial cases. TODO. */
1360 static bool
1361 vect_compute_data_ref_alignment (struct data_reference *dr)
1363 tree stmt = DR_STMT (dr);
1364 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1365 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1366 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1367 tree ref = DR_REF (dr);
1368 tree vectype;
1369 tree base, base_addr;
1370 bool base_aligned;
1371 tree misalign;
1372 tree aligned_to, alignment;
1374 if (vect_print_dump_info (REPORT_DETAILS))
1375 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1377 /* Initialize misalignment to unknown. */
1378 SET_DR_MISALIGNMENT (dr, -1);
1380 misalign = DR_INIT (dr);
1381 aligned_to = DR_ALIGNED_TO (dr);
1382 base_addr = DR_BASE_ADDRESS (dr);
1384 /* In case the dataref is in an inner-loop of the loop that is being
1385 vectorized (LOOP), we use the base and misalignment information
1386 relative to the outer-loop (LOOP). This is ok only if the misalignment
1387 stays the same throughout the execution of the inner-loop, which is why
1388 we have to check that the stride of the dataref in the inner-loop evenly
1389 divides by the vector size. */
1390 if (nested_in_vect_loop_p (loop, stmt))
1392 tree step = DR_STEP (dr);
1393 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1395 if (dr_step % UNITS_PER_SIMD_WORD == 0)
1397 if (vect_print_dump_info (REPORT_ALIGNMENT))
1398 fprintf (vect_dump, "inner step divides the vector-size.");
1399 misalign = STMT_VINFO_DR_INIT (stmt_info);
1400 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1401 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1403 else
1405 if (vect_print_dump_info (REPORT_ALIGNMENT))
1406 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1407 misalign = NULL_TREE;
1411 base = build_fold_indirect_ref (base_addr);
1412 vectype = STMT_VINFO_VECTYPE (stmt_info);
1413 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1415 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1416 || !misalign)
1418 if (vect_print_dump_info (REPORT_ALIGNMENT))
1420 fprintf (vect_dump, "Unknown alignment for access: ");
1421 print_generic_expr (vect_dump, base, TDF_SLIM);
1423 return true;
1426 if ((DECL_P (base)
1427 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1428 alignment) >= 0)
1429 || (TREE_CODE (base_addr) == SSA_NAME
1430 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1431 TREE_TYPE (base_addr)))),
1432 alignment) >= 0))
1433 base_aligned = true;
1434 else
1435 base_aligned = false;
1437 if (!base_aligned)
1439 /* Do not change the alignment of global variables if
1440 flag_section_anchors is enabled. */
1441 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1442 || (TREE_STATIC (base) && flag_section_anchors))
1444 if (vect_print_dump_info (REPORT_DETAILS))
1446 fprintf (vect_dump, "can't force alignment of ref: ");
1447 print_generic_expr (vect_dump, ref, TDF_SLIM);
1449 return true;
1452 /* Force the alignment of the decl.
1453 NOTE: This is the only change to the code we make during
1454 the analysis phase, before deciding to vectorize the loop. */
1455 if (vect_print_dump_info (REPORT_DETAILS))
1456 fprintf (vect_dump, "force alignment");
1457 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1458 DECL_USER_ALIGN (base) = 1;
1461 /* At this point we assume that the base is aligned. */
1462 gcc_assert (base_aligned
1463 || (TREE_CODE (base) == VAR_DECL
1464 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1466 /* Modulo alignment. */
1467 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1469 if (!host_integerp (misalign, 1))
1471 /* Negative or overflowed misalignment value. */
1472 if (vect_print_dump_info (REPORT_DETAILS))
1473 fprintf (vect_dump, "unexpected misalign value");
1474 return false;
1477 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1479 if (vect_print_dump_info (REPORT_DETAILS))
1481 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1482 print_generic_expr (vect_dump, ref, TDF_SLIM);
1485 return true;
1489 /* Function vect_compute_data_refs_alignment
1491 Compute the misalignment of data references in the loop.
1492 Return FALSE if a data reference is found that cannot be vectorized. */
1494 static bool
1495 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1497 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1498 struct data_reference *dr;
1499 unsigned int i;
1501 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1502 if (!vect_compute_data_ref_alignment (dr))
1503 return false;
1505 return true;
1509 /* Function vect_update_misalignment_for_peel
1511 DR - the data reference whose misalignment is to be adjusted.
1512 DR_PEEL - the data reference whose misalignment is being made
1513 zero in the vector loop by the peel.
1514 NPEEL - the number of iterations in the peel loop if the misalignment
1515 of DR_PEEL is known at compile time. */
1517 static void
1518 vect_update_misalignment_for_peel (struct data_reference *dr,
1519 struct data_reference *dr_peel, int npeel)
1521 unsigned int i;
1522 VEC(dr_p,heap) *same_align_drs;
1523 struct data_reference *current_dr;
1524 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1525 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1526 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1527 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1529 /* For interleaved data accesses the step in the loop must be multiplied by
1530 the size of the interleaving group. */
1531 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1532 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1533 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1534 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1536 /* It can be assumed that the data refs with the same alignment as dr_peel
1537 are aligned in the vector loop. */
1538 same_align_drs
1539 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1540 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1542 if (current_dr != dr)
1543 continue;
1544 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1545 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1546 SET_DR_MISALIGNMENT (dr, 0);
1547 return;
1550 if (known_alignment_for_access_p (dr)
1551 && known_alignment_for_access_p (dr_peel))
1553 int misal = DR_MISALIGNMENT (dr);
1554 misal += npeel * dr_size;
1555 misal %= UNITS_PER_SIMD_WORD;
1556 SET_DR_MISALIGNMENT (dr, misal);
1557 return;
1560 if (vect_print_dump_info (REPORT_DETAILS))
1561 fprintf (vect_dump, "Setting misalignment to -1.");
1562 SET_DR_MISALIGNMENT (dr, -1);
1566 /* Function vect_verify_datarefs_alignment
1568 Return TRUE if all data references in the loop can be
1569 handled with respect to alignment. */
1571 static bool
1572 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1574 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1575 struct data_reference *dr;
1576 enum dr_alignment_support supportable_dr_alignment;
1577 unsigned int i;
1579 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1581 tree stmt = DR_STMT (dr);
1582 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1584 /* For interleaving, only the alignment of the first access matters. */
1585 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1586 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1587 continue;
1589 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1590 if (!supportable_dr_alignment)
1592 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1594 if (DR_IS_READ (dr))
1595 fprintf (vect_dump,
1596 "not vectorized: unsupported unaligned load.");
1597 else
1598 fprintf (vect_dump,
1599 "not vectorized: unsupported unaligned store.");
1601 return false;
1603 if (supportable_dr_alignment != dr_aligned
1604 && vect_print_dump_info (REPORT_ALIGNMENT))
1605 fprintf (vect_dump, "Vectorizing an unaligned access.");
1607 return true;
1611 /* Function vector_alignment_reachable_p
1613 Return true if vector alignment for DR is reachable by peeling
1614 a few loop iterations. Return false otherwise. */
1616 static bool
1617 vector_alignment_reachable_p (struct data_reference *dr)
1619 tree stmt = DR_STMT (dr);
1620 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1621 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1623 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1625 /* For interleaved access we peel only if number of iterations in
1626 the prolog loop ({VF - misalignment}), is a multiple of the
1627 number of the interleaved accesses. */
1628 int elem_size, mis_in_elements;
1629 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1631 /* FORNOW: handle only known alignment. */
1632 if (!known_alignment_for_access_p (dr))
1633 return false;
1635 elem_size = UNITS_PER_SIMD_WORD / nelements;
1636 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1638 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1639 return false;
1642 /* If misalignment is known at the compile time then allow peeling
1643 only if natural alignment is reachable through peeling. */
1644 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1646 HOST_WIDE_INT elmsize =
1647 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1648 if (vect_print_dump_info (REPORT_DETAILS))
1650 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1651 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1653 if (DR_MISALIGNMENT (dr) % elmsize)
1655 if (vect_print_dump_info (REPORT_DETAILS))
1656 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1657 return false;
1661 if (!known_alignment_for_access_p (dr))
1663 tree type = (TREE_TYPE (DR_REF (dr)));
1664 tree ba = DR_BASE_OBJECT (dr);
1665 bool is_packed = false;
1667 if (ba)
1668 is_packed = contains_packed_reference (ba);
1670 if (vect_print_dump_info (REPORT_DETAILS))
1671 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1672 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1673 return true;
1674 else
1675 return false;
1678 return true;
1681 /* Function vect_enhance_data_refs_alignment
1683 This pass will use loop versioning and loop peeling in order to enhance
1684 the alignment of data references in the loop.
1686 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1687 original loop is to be vectorized; Any other loops that are created by
1688 the transformations performed in this pass - are not supposed to be
1689 vectorized. This restriction will be relaxed.
1691 This pass will require a cost model to guide it whether to apply peeling
1692 or versioning or a combination of the two. For example, the scheme that
1693 intel uses when given a loop with several memory accesses, is as follows:
1694 choose one memory access ('p') which alignment you want to force by doing
1695 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1696 other accesses are not necessarily aligned, or (2) use loop versioning to
1697 generate one loop in which all accesses are aligned, and another loop in
1698 which only 'p' is necessarily aligned.
1700 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1701 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1702 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1704 Devising a cost model is the most critical aspect of this work. It will
1705 guide us on which access to peel for, whether to use loop versioning, how
1706 many versions to create, etc. The cost model will probably consist of
1707 generic considerations as well as target specific considerations (on
1708 powerpc for example, misaligned stores are more painful than misaligned
1709 loads).
1711 Here are the general steps involved in alignment enhancements:
1713 -- original loop, before alignment analysis:
1714 for (i=0; i<N; i++){
1715 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1716 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1719 -- After vect_compute_data_refs_alignment:
1720 for (i=0; i<N; i++){
1721 x = q[i]; # DR_MISALIGNMENT(q) = 3
1722 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1725 -- Possibility 1: we do loop versioning:
1726 if (p is aligned) {
1727 for (i=0; i<N; i++){ # loop 1A
1728 x = q[i]; # DR_MISALIGNMENT(q) = 3
1729 p[i] = y; # DR_MISALIGNMENT(p) = 0
1732 else {
1733 for (i=0; i<N; i++){ # loop 1B
1734 x = q[i]; # DR_MISALIGNMENT(q) = 3
1735 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1739 -- Possibility 2: we do loop peeling:
1740 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1741 x = q[i];
1742 p[i] = y;
1744 for (i = 3; i < N; i++){ # loop 2A
1745 x = q[i]; # DR_MISALIGNMENT(q) = 0
1746 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1749 -- Possibility 3: combination of loop peeling and versioning:
1750 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1751 x = q[i];
1752 p[i] = y;
1754 if (p is aligned) {
1755 for (i = 3; i<N; i++){ # loop 3A
1756 x = q[i]; # DR_MISALIGNMENT(q) = 0
1757 p[i] = y; # DR_MISALIGNMENT(p) = 0
1760 else {
1761 for (i = 3; i<N; i++){ # loop 3B
1762 x = q[i]; # DR_MISALIGNMENT(q) = 0
1763 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1767 These loops are later passed to loop_transform to be vectorized. The
1768 vectorizer will use the alignment information to guide the transformation
1769 (whether to generate regular loads/stores, or with special handling for
1770 misalignment). */
1772 static bool
1773 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1775 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1776 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1777 enum dr_alignment_support supportable_dr_alignment;
1778 struct data_reference *dr0 = NULL;
1779 struct data_reference *dr;
1780 unsigned int i;
1781 bool do_peeling = false;
1782 bool do_versioning = false;
1783 bool stat;
1784 tree stmt;
1785 stmt_vec_info stmt_info;
1786 int vect_versioning_for_alias_required;
1788 if (vect_print_dump_info (REPORT_DETAILS))
1789 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1791 /* While cost model enhancements are expected in the future, the high level
1792 view of the code at this time is as follows:
1794 A) If there is a misaligned write then see if peeling to align this write
1795 can make all data references satisfy vect_supportable_dr_alignment.
1796 If so, update data structures as needed and return true. Note that
1797 at this time vect_supportable_dr_alignment is known to return false
1798 for a misaligned write.
1800 B) If peeling wasn't possible and there is a data reference with an
1801 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1802 then see if loop versioning checks can be used to make all data
1803 references satisfy vect_supportable_dr_alignment. If so, update
1804 data structures as needed and return true.
1806 C) If neither peeling nor versioning were successful then return false if
1807 any data reference does not satisfy vect_supportable_dr_alignment.
1809 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1811 Note, Possibility 3 above (which is peeling and versioning together) is not
1812 being done at this time. */
1814 /* (1) Peeling to force alignment. */
1816 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1817 Considerations:
1818 + How many accesses will become aligned due to the peeling
1819 - How many accesses will become unaligned due to the peeling,
1820 and the cost of misaligned accesses.
1821 - The cost of peeling (the extra runtime checks, the increase
1822 in code size).
1824 The scheme we use FORNOW: peel to force the alignment of the first
1825 misaligned store in the loop.
1826 Rationale: misaligned stores are not yet supported.
1828 TODO: Use a cost model. */
1830 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1832 stmt = DR_STMT (dr);
1833 stmt_info = vinfo_for_stmt (stmt);
1835 /* For interleaving, only the alignment of the first access
1836 matters. */
1837 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1838 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1839 continue;
1841 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1843 do_peeling = vector_alignment_reachable_p (dr);
1844 if (do_peeling)
1845 dr0 = dr;
1846 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1847 fprintf (vect_dump, "vector alignment may not be reachable");
1848 break;
1852 vect_versioning_for_alias_required =
1853 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1855 /* Temporarily, if versioning for alias is required, we disable peeling
1856 until we support peeling and versioning. Often peeling for alignment
1857 will require peeling for loop-bound, which in turn requires that we
1858 know how to adjust the loop ivs after the loop. */
1859 if (vect_versioning_for_alias_required
1860 || !vect_can_advance_ivs_p (loop_vinfo)
1861 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1862 do_peeling = false;
1864 if (do_peeling)
1866 int mis;
1867 int npeel = 0;
1868 tree stmt = DR_STMT (dr0);
1869 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1870 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1871 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1873 if (known_alignment_for_access_p (dr0))
1875 /* Since it's known at compile time, compute the number of iterations
1876 in the peeled loop (the peeling factor) for use in updating
1877 DR_MISALIGNMENT values. The peeling factor is the vectorization
1878 factor minus the misalignment as an element count. */
1879 mis = DR_MISALIGNMENT (dr0);
1880 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1881 npeel = nelements - mis;
1883 /* For interleaved data access every iteration accesses all the
1884 members of the group, therefore we divide the number of iterations
1885 by the group size. */
1886 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1887 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1888 npeel /= DR_GROUP_SIZE (stmt_info);
1890 if (vect_print_dump_info (REPORT_DETAILS))
1891 fprintf (vect_dump, "Try peeling by %d", npeel);
1894 /* Ensure that all data refs can be vectorized after the peel. */
1895 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1897 int save_misalignment;
1899 if (dr == dr0)
1900 continue;
1902 stmt = DR_STMT (dr);
1903 stmt_info = vinfo_for_stmt (stmt);
1904 /* For interleaving, only the alignment of the first access
1905 matters. */
1906 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1907 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1908 continue;
1910 save_misalignment = DR_MISALIGNMENT (dr);
1911 vect_update_misalignment_for_peel (dr, dr0, npeel);
1912 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1913 SET_DR_MISALIGNMENT (dr, save_misalignment);
1915 if (!supportable_dr_alignment)
1917 do_peeling = false;
1918 break;
1922 if (do_peeling)
1924 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1925 If the misalignment of DR_i is identical to that of dr0 then set
1926 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1927 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1928 by the peeling factor times the element size of DR_i (MOD the
1929 vectorization factor times the size). Otherwise, the
1930 misalignment of DR_i must be set to unknown. */
1931 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1932 if (dr != dr0)
1933 vect_update_misalignment_for_peel (dr, dr0, npeel);
1935 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1936 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1937 SET_DR_MISALIGNMENT (dr0, 0);
1938 if (vect_print_dump_info (REPORT_ALIGNMENT))
1939 fprintf (vect_dump, "Alignment of access forced using peeling.");
1941 if (vect_print_dump_info (REPORT_DETAILS))
1942 fprintf (vect_dump, "Peeling for alignment will be applied.");
1944 stat = vect_verify_datarefs_alignment (loop_vinfo);
1945 gcc_assert (stat);
1946 return stat;
1951 /* (2) Versioning to force alignment. */
1953 /* Try versioning if:
1954 1) flag_tree_vect_loop_version is TRUE
1955 2) optimize_size is FALSE
1956 3) there is at least one unsupported misaligned data ref with an unknown
1957 misalignment, and
1958 4) all misaligned data refs with a known misalignment are supported, and
1959 5) the number of runtime alignment checks is within reason. */
1961 do_versioning =
1962 flag_tree_vect_loop_version
1963 && (!optimize_size)
1964 && (!loop->inner); /* FORNOW */
1966 if (do_versioning)
1968 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1970 stmt = DR_STMT (dr);
1971 stmt_info = vinfo_for_stmt (stmt);
1973 /* For interleaving, only the alignment of the first access
1974 matters. */
1975 if (aligned_access_p (dr)
1976 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1977 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1978 continue;
1980 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1982 if (!supportable_dr_alignment)
1984 tree stmt;
1985 int mask;
1986 tree vectype;
1988 if (known_alignment_for_access_p (dr)
1989 || VEC_length (tree,
1990 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1991 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1993 do_versioning = false;
1994 break;
1997 stmt = DR_STMT (dr);
1998 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1999 gcc_assert (vectype);
2001 /* The rightmost bits of an aligned address must be zeros.
2002 Construct the mask needed for this test. For example,
2003 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2004 mask must be 15 = 0xf. */
2005 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2007 /* FORNOW: use the same mask to test all potentially unaligned
2008 references in the loop. The vectorizer currently supports
2009 a single vector size, see the reference to
2010 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2011 vectorization factor is computed. */
2012 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2013 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2014 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2015 VEC_safe_push (tree, heap,
2016 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2017 DR_STMT (dr));
2021 /* Versioning requires at least one misaligned data reference. */
2022 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2023 do_versioning = false;
2024 else if (!do_versioning)
2025 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2028 if (do_versioning)
2030 VEC(tree,heap) *may_misalign_stmts
2031 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2032 tree stmt;
2034 /* It can now be assumed that the data references in the statements
2035 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2036 of the loop being vectorized. */
2037 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
2039 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2040 dr = STMT_VINFO_DATA_REF (stmt_info);
2041 SET_DR_MISALIGNMENT (dr, 0);
2042 if (vect_print_dump_info (REPORT_ALIGNMENT))
2043 fprintf (vect_dump, "Alignment of access forced using versioning.");
2046 if (vect_print_dump_info (REPORT_DETAILS))
2047 fprintf (vect_dump, "Versioning for alignment will be applied.");
2049 /* Peeling and versioning can't be done together at this time. */
2050 gcc_assert (! (do_peeling && do_versioning));
2052 stat = vect_verify_datarefs_alignment (loop_vinfo);
2053 gcc_assert (stat);
2054 return stat;
2057 /* This point is reached if neither peeling nor versioning is being done. */
2058 gcc_assert (! (do_peeling || do_versioning));
2060 stat = vect_verify_datarefs_alignment (loop_vinfo);
2061 return stat;
2065 /* Function vect_analyze_data_refs_alignment
2067 Analyze the alignment of the data-references in the loop.
2068 Return FALSE if a data reference is found that cannot be vectorized. */
2070 static bool
2071 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2073 if (vect_print_dump_info (REPORT_DETAILS))
2074 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2076 if (!vect_compute_data_refs_alignment (loop_vinfo))
2078 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2079 fprintf (vect_dump,
2080 "not vectorized: can't calculate alignment for data ref.");
2081 return false;
2084 return true;
2088 /* Analyze groups of strided accesses: check that DR belongs to a group of
2089 strided accesses of legal size, step, etc. Detect gaps, single element
2090 interleaving, and other special cases. Set strided access info.
2091 Collect groups of strided stores for further use in SLP analysis. */
2093 static bool
2094 vect_analyze_group_access (struct data_reference *dr)
2096 tree step = DR_STEP (dr);
2097 tree scalar_type = TREE_TYPE (DR_REF (dr));
2098 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2099 tree stmt = DR_STMT (dr);
2100 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2101 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2102 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2103 HOST_WIDE_INT stride;
2104 bool slp_impossible = false;
2106 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2107 interleaving group (including gaps). */
2108 stride = dr_step / type_size;
2110 /* Not consecutive access is possible only if it is a part of interleaving. */
2111 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2113 /* Check if it this DR is a part of interleaving, and is a single
2114 element of the group that is accessed in the loop. */
2116 /* Gaps are supported only for loads. STEP must be a multiple of the type
2117 size. The size of the group must be a power of 2. */
2118 if (DR_IS_READ (dr)
2119 && (dr_step % type_size) == 0
2120 && stride > 0
2121 && exact_log2 (stride) != -1)
2123 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2124 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2125 if (vect_print_dump_info (REPORT_DR_DETAILS))
2127 fprintf (vect_dump, "Detected single element interleaving %d ",
2128 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2129 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2130 fprintf (vect_dump, " step ");
2131 print_generic_expr (vect_dump, step, TDF_SLIM);
2133 return true;
2135 if (vect_print_dump_info (REPORT_DETAILS))
2136 fprintf (vect_dump, "not consecutive access");
2137 return false;
2140 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2142 /* First stmt in the interleaving chain. Check the chain. */
2143 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2144 struct data_reference *data_ref = dr;
2145 unsigned int count = 1;
2146 tree next_step;
2147 tree prev_init = DR_INIT (data_ref);
2148 tree prev = stmt;
2149 HOST_WIDE_INT diff, count_in_bytes;
2151 while (next)
2153 /* Skip same data-refs. In case that two or more stmts share data-ref
2154 (supported only for loads), we vectorize only the first stmt, and
2155 the rest get their vectorized loads from the first one. */
2156 if (!tree_int_cst_compare (DR_INIT (data_ref),
2157 DR_INIT (STMT_VINFO_DATA_REF (
2158 vinfo_for_stmt (next)))))
2160 if (!DR_IS_READ (data_ref))
2162 if (vect_print_dump_info (REPORT_DETAILS))
2163 fprintf (vect_dump, "Two store stmts share the same dr.");
2164 return false;
2167 /* Check that there is no load-store dependencies for this loads
2168 to prevent a case of load-store-load to the same location. */
2169 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2170 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2172 if (vect_print_dump_info (REPORT_DETAILS))
2173 fprintf (vect_dump,
2174 "READ_WRITE dependence in interleaving.");
2175 return false;
2178 /* For load use the same data-ref load. */
2179 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2181 prev = next;
2182 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2183 continue;
2185 prev = next;
2187 /* Check that all the accesses have the same STEP. */
2188 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2189 if (tree_int_cst_compare (step, next_step))
2191 if (vect_print_dump_info (REPORT_DETAILS))
2192 fprintf (vect_dump, "not consecutive access in interleaving");
2193 return false;
2196 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2197 /* Check that the distance between two accesses is equal to the type
2198 size. Otherwise, we have gaps. */
2199 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2200 - TREE_INT_CST_LOW (prev_init)) / type_size;
2201 if (diff != 1)
2203 /* FORNOW: SLP of accesses with gaps is not supported. */
2204 slp_impossible = true;
2205 if (!DR_IS_READ (data_ref))
2207 if (vect_print_dump_info (REPORT_DETAILS))
2208 fprintf (vect_dump, "interleaved store with gaps");
2209 return false;
2213 /* Store the gap from the previous member of the group. If there is no
2214 gap in the access, DR_GROUP_GAP is always 1. */
2215 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2217 prev_init = DR_INIT (data_ref);
2218 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2219 /* Count the number of data-refs in the chain. */
2220 count++;
2223 /* COUNT is the number of accesses found, we multiply it by the size of
2224 the type to get COUNT_IN_BYTES. */
2225 count_in_bytes = type_size * count;
2227 /* Check that the size of the interleaving is not greater than STEP. */
2228 if (dr_step < count_in_bytes)
2230 if (vect_print_dump_info (REPORT_DETAILS))
2232 fprintf (vect_dump, "interleaving size is greater than step for ");
2233 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2235 return false;
2238 /* Check that the size of the interleaving is equal to STEP for stores,
2239 i.e., that there are no gaps. */
2240 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
2242 if (vect_print_dump_info (REPORT_DETAILS))
2243 fprintf (vect_dump, "interleaved store with gaps");
2244 return false;
2247 /* Check that STEP is a multiple of type size. */
2248 if ((dr_step % type_size) != 0)
2250 if (vect_print_dump_info (REPORT_DETAILS))
2252 fprintf (vect_dump, "step is not a multiple of type size: step ");
2253 print_generic_expr (vect_dump, step, TDF_SLIM);
2254 fprintf (vect_dump, " size ");
2255 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2256 TDF_SLIM);
2258 return false;
2261 /* FORNOW: we handle only interleaving that is a power of 2.
2262 We don't fail here if it may be still possible to vectorize the
2263 group using SLP. If not, the size of the group will be checked in
2264 vect_analyze_operations, and the vectorization will fail. */
2265 if (exact_log2 (stride) == -1)
2267 if (vect_print_dump_info (REPORT_DETAILS))
2268 fprintf (vect_dump, "interleaving is not a power of 2");
2270 if (slp_impossible)
2271 return false;
2273 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2274 if (vect_print_dump_info (REPORT_DETAILS))
2275 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2277 /* SLP: create an SLP data structure for every interleaving group of
2278 stores for further analysis in vect_analyse_slp. */
2279 if (!DR_IS_READ (dr) && !slp_impossible)
2280 VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2283 return true;
2287 /* Analyze the access pattern of the data-reference DR.
2288 In case of non-consecutive accesse call vect_analyze_group_access() to
2289 analyze groups of strided accesses. */
2291 static bool
2292 vect_analyze_data_ref_access (struct data_reference *dr)
2294 tree step = DR_STEP (dr);
2295 tree scalar_type = TREE_TYPE (DR_REF (dr));
2296 tree stmt = DR_STMT (dr);
2297 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2298 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2299 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2300 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2302 if (!step)
2304 if (vect_print_dump_info (REPORT_DETAILS))
2305 fprintf (vect_dump, "bad data-ref access");
2306 return false;
2309 /* Don't allow invariant accesses. */
2310 if (dr_step == 0)
2311 return false;
2313 if (nested_in_vect_loop_p (loop, stmt))
2315 /* For the rest of the analysis we use the outer-loop step. */
2316 step = STMT_VINFO_DR_STEP (stmt_info);
2317 dr_step = TREE_INT_CST_LOW (step);
2319 if (dr_step == 0)
2321 if (vect_print_dump_info (REPORT_ALIGNMENT))
2322 fprintf (vect_dump, "zero step in outer loop.");
2323 if (DR_IS_READ (dr))
2324 return true;
2325 else
2326 return false;
2330 /* Consecutive? */
2331 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2333 /* Mark that it is not interleaving. */
2334 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2335 return true;
2338 if (nested_in_vect_loop_p (loop, stmt))
2340 if (vect_print_dump_info (REPORT_ALIGNMENT))
2341 fprintf (vect_dump, "strided access in outer loop.");
2342 return false;
2345 /* Not consecutive access - check if it's a part of interleaving group. */
2346 return vect_analyze_group_access (dr);
2350 /* Function vect_analyze_data_ref_accesses.
2352 Analyze the access pattern of all the data references in the loop.
2354 FORNOW: the only access pattern that is considered vectorizable is a
2355 simple step 1 (consecutive) access.
2357 FORNOW: handle only arrays and pointer accesses. */
2359 static bool
2360 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2362 unsigned int i;
2363 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2364 struct data_reference *dr;
2366 if (vect_print_dump_info (REPORT_DETAILS))
2367 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2369 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2370 if (!vect_analyze_data_ref_access (dr))
2372 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2373 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2374 return false;
2377 return true;
2381 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2383 void
2384 vect_free_slp_tree (slp_tree node)
2386 if (!node)
2387 return;
2389 if (SLP_TREE_LEFT (node))
2390 vect_free_slp_tree (SLP_TREE_LEFT (node));
2392 if (SLP_TREE_RIGHT (node))
2393 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2395 VEC_free (tree, heap, SLP_TREE_SCALAR_STMTS (node));
2397 if (SLP_TREE_VEC_STMTS (node))
2398 VEC_free (tree, heap, SLP_TREE_VEC_STMTS (node));
2400 free (node);
2404 /* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are
2405 of a legal type and that they match the defs of the first stmt of the SLP
2406 group (stored in FIRST_STMT_...). */
2408 static bool
2409 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2410 tree rhs, VEC (tree, heap) **def_stmts0,
2411 VEC (tree, heap) **def_stmts1,
2412 enum vect_def_type *first_stmt_dt0,
2413 enum vect_def_type *first_stmt_dt1,
2414 tree *first_stmt_def0_type,
2415 tree *first_stmt_def1_type,
2416 tree *first_stmt_const_oprnd,
2417 int ncopies_for_cost)
2419 tree oprnd;
2420 enum operation_type op_type = TREE_OPERAND_LENGTH (rhs);
2421 unsigned int i, number_of_oprnds = op_type;
2422 tree def, def_stmt;
2423 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2424 stmt_vec_info stmt_info =
2425 vinfo_for_stmt (VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2427 /* Store. */
2428 if (!op_type)
2429 number_of_oprnds = 1;
2430 else
2431 gcc_assert (op_type == unary_op || op_type == binary_op);
2433 for (i = 0; i < number_of_oprnds; i++)
2435 if (op_type)
2436 oprnd = TREE_OPERAND (rhs, i);
2437 else
2438 oprnd = rhs;
2440 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2441 || (!def_stmt && dt[i] != vect_constant_def))
2443 if (vect_print_dump_info (REPORT_SLP))
2445 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2446 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2449 return false;
2452 if (!*first_stmt_dt0)
2454 /* op0 of the first stmt of the group - store its info. */
2455 *first_stmt_dt0 = dt[i];
2456 if (def)
2457 *first_stmt_def0_type = TREE_TYPE (def);
2458 else
2459 *first_stmt_const_oprnd = oprnd;
2461 /* Analyze costs (for the first stmt of the group only). */
2462 if (op_type)
2463 /* Not memory operation (we don't call this functions for loads). */
2464 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2465 else
2466 /* Store. */
2467 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2470 else
2472 if (!*first_stmt_dt1 && i == 1)
2474 /* op1 of the first stmt of the group - store its info. */
2475 *first_stmt_dt1 = dt[i];
2476 if (def)
2477 *first_stmt_def1_type = TREE_TYPE (def);
2478 else
2480 /* We assume that the stmt contains only one constant
2481 operand. We fail otherwise, to be on the safe side. */
2482 if (*first_stmt_const_oprnd)
2484 if (vect_print_dump_info (REPORT_SLP))
2485 fprintf (vect_dump, "Build SLP failed: two constant "
2486 "oprnds in stmt");
2487 return false;
2489 *first_stmt_const_oprnd = oprnd;
2492 else
2494 /* Not first stmt of the group, check that the def-stmt/s match
2495 the def-stmt/s of the first stmt. */
2496 if ((i == 0
2497 && (*first_stmt_dt0 != dt[i]
2498 || (*first_stmt_def0_type && def
2499 && *first_stmt_def0_type != TREE_TYPE (def))))
2500 || (i == 1
2501 && (*first_stmt_dt1 != dt[i]
2502 || (*first_stmt_def1_type && def
2503 && *first_stmt_def1_type != TREE_TYPE (def))))
2504 || (!def
2505 && TREE_TYPE (*first_stmt_const_oprnd)
2506 != TREE_TYPE (oprnd)))
2508 if (vect_print_dump_info (REPORT_SLP))
2509 fprintf (vect_dump, "Build SLP failed: different types ");
2511 return false;
2516 /* Check the types of the definitions. */
2517 switch (dt[i])
2519 case vect_constant_def:
2520 case vect_invariant_def:
2521 break;
2523 case vect_loop_def:
2524 if (i == 0)
2525 VEC_safe_push (tree, heap, *def_stmts0, def_stmt);
2526 else
2527 VEC_safe_push (tree, heap, *def_stmts1, def_stmt);
2528 break;
2530 default:
2531 /* FORNOW: Not supported. */
2532 if (vect_print_dump_info (REPORT_SLP))
2534 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2535 print_generic_expr (vect_dump, def, TDF_SLIM);
2538 return false;
2542 return true;
2546 /* Recursively build an SLP tree starting from NODE.
2547 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2548 permutation or are of unsupported types of operation. Otherwise, return
2549 TRUE.
2550 SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2551 in the case of multiple types for now. */
2553 static bool
2554 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2555 unsigned int group_size, bool *slp_impossible,
2556 int *inside_cost, int *outside_cost,
2557 int ncopies_for_cost)
2559 VEC (tree, heap) *def_stmts0 = VEC_alloc (tree, heap, group_size);
2560 VEC (tree, heap) *def_stmts1 = VEC_alloc (tree, heap, group_size);
2561 unsigned int i;
2562 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2563 tree stmt = VEC_index (tree, stmts, 0);
2564 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2565 enum tree_code first_stmt_code = 0;
2566 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2567 tree lhs, rhs, prev_stmt = NULL_TREE;
2568 bool stop_recursion = false, need_same_oprnds = false;
2569 tree vectype, scalar_type, first_op1 = NULL_TREE;
2570 unsigned int vectorization_factor = 0, ncopies;
2571 optab optab;
2572 int icode;
2573 enum machine_mode optab_op2_mode;
2574 enum machine_mode vec_mode;
2575 tree first_stmt_const_oprnd = NULL_TREE;
2576 struct data_reference *first_dr;
2578 /* For every stmt in NODE find its def stmt/s. */
2579 for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
2581 if (vect_print_dump_info (REPORT_SLP))
2583 fprintf (vect_dump, "Build SLP for ");
2584 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2587 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2589 if (vect_print_dump_info (REPORT_SLP))
2591 fprintf (vect_dump, "Build SLP failed: not MODIFY_STMT ");
2592 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2595 return false;
2598 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2599 vectype = get_vectype_for_scalar_type (scalar_type);
2600 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2601 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2602 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2603 if (ncopies > 1)
2605 /* FORNOW. */
2606 if (vect_print_dump_info (REPORT_SLP))
2607 fprintf (vect_dump, "SLP failed - multiple types ");
2609 *slp_impossible = true;
2610 return false;
2613 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
2614 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2616 /* Check the operation. */
2617 if (i == 0)
2619 first_stmt_code = TREE_CODE (rhs);
2621 /* Shift arguments should be equal in all the packed stmts for a
2622 vector shift with scalar shift operand. */
2623 if (TREE_CODE (rhs) == LSHIFT_EXPR || TREE_CODE (rhs) == RSHIFT_EXPR)
2625 vec_mode = TYPE_MODE (vectype);
2626 optab = optab_for_tree_code (TREE_CODE (rhs), vectype);
2627 if (!optab)
2629 if (vect_print_dump_info (REPORT_SLP))
2630 fprintf (vect_dump, "Build SLP failed: no optab.");
2631 return false;
2633 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2634 optab_op2_mode = insn_data[icode].operand[2].mode;
2635 if (!VECTOR_MODE_P (optab_op2_mode))
2637 need_same_oprnds = true;
2638 first_op1 = TREE_OPERAND (rhs, 1);
2642 else
2644 if (first_stmt_code != TREE_CODE (rhs))
2646 if (vect_print_dump_info (REPORT_SLP))
2648 fprintf (vect_dump,
2649 "Build SLP failed: different operation in stmt ");
2650 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2653 return false;
2656 if (need_same_oprnds
2657 && !operand_equal_p (first_op1, TREE_OPERAND (rhs, 1), 0))
2659 if (vect_print_dump_info (REPORT_SLP))
2661 fprintf (vect_dump,
2662 "Build SLP failed: different shift arguments in ");
2663 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2666 return false;
2670 /* Strided store or load. */
2671 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2673 if (REFERENCE_CLASS_P (lhs))
2675 /* Store. */
2676 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs,
2677 &def_stmts0, &def_stmts1,
2678 &first_stmt_dt0,
2679 &first_stmt_dt1,
2680 &first_stmt_def0_type,
2681 &first_stmt_def1_type,
2682 &first_stmt_const_oprnd,
2683 ncopies_for_cost))
2684 return false;
2686 else
2688 /* Load. */
2689 if (i == 0)
2691 /* First stmt of the SLP group should be the first load of
2692 the interleaving loop if data permutation is not
2693 allowed. */
2694 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt)
2696 /* FORNOW: data permutations are not supported. */
2697 if (vect_print_dump_info (REPORT_SLP))
2699 fprintf (vect_dump, "Build SLP failed: strided "
2700 " loads need permutation ");
2701 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2704 return false;
2707 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2708 if (vect_supportable_dr_alignment (first_dr)
2709 == dr_unaligned_unsupported)
2711 if (vect_print_dump_info (REPORT_SLP))
2713 fprintf (vect_dump, "Build SLP failed: unsupported "
2714 " unaligned load ");
2715 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2718 return false;
2721 /* Analyze costs (for the first stmt in the group). */
2722 vect_model_load_cost (vinfo_for_stmt (stmt),
2723 ncopies_for_cost, *node);
2725 else
2727 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt)
2729 /* FORNOW: data permutations are not supported. */
2730 if (vect_print_dump_info (REPORT_SLP))
2732 fprintf (vect_dump, "Build SLP failed: strided "
2733 " loads need permutation ");
2734 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2736 return false;
2740 prev_stmt = stmt;
2742 /* We stop the tree when we reach a group of loads. */
2743 stop_recursion = true;
2744 continue;
2746 } /* Strided access. */
2747 else
2749 if (REFERENCE_CLASS_P (rhs))
2751 /* Not strided load. */
2752 if (vect_print_dump_info (REPORT_SLP))
2754 fprintf (vect_dump, "Build SLP failed: not strided load ");
2755 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2758 /* FORNOW: Not strided loads are not supported. */
2759 return false;
2762 /* Not memory operation. */
2763 if (!BINARY_CLASS_P (rhs) && !UNARY_CLASS_P (rhs))
2765 if (vect_print_dump_info (REPORT_SLP))
2767 fprintf (vect_dump, "Build SLP failed: operation");
2768 fprintf (vect_dump, " unsupported ");
2769 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2772 return false;
2775 /* Find the def-stmts. */
2776 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, &def_stmts0,
2777 &def_stmts1, &first_stmt_dt0,
2778 &first_stmt_dt1,
2779 &first_stmt_def0_type,
2780 &first_stmt_def1_type,
2781 &first_stmt_const_oprnd,
2782 ncopies_for_cost))
2783 return false;
2787 /* Add the costs of the node to the overall instance costs. */
2788 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
2789 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
2791 /* Strided loads were reached - stop the recursion. */
2792 if (stop_recursion)
2793 return true;
2795 /* Create SLP_TREE nodes for the definition node/s. */
2796 if (first_stmt_dt0 == vect_loop_def)
2798 slp_tree left_node = XNEW (struct _slp_tree);
2799 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
2800 SLP_TREE_VEC_STMTS (left_node) = NULL;
2801 SLP_TREE_LEFT (left_node) = NULL;
2802 SLP_TREE_RIGHT (left_node) = NULL;
2803 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
2804 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
2805 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
2806 slp_impossible, inside_cost, outside_cost,
2807 ncopies_for_cost))
2808 return false;
2810 SLP_TREE_LEFT (*node) = left_node;
2813 if (first_stmt_dt1 == vect_loop_def)
2815 slp_tree right_node = XNEW (struct _slp_tree);
2816 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
2817 SLP_TREE_VEC_STMTS (right_node) = NULL;
2818 SLP_TREE_LEFT (right_node) = NULL;
2819 SLP_TREE_RIGHT (right_node) = NULL;
2820 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
2821 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
2822 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
2823 slp_impossible, inside_cost, outside_cost,
2824 ncopies_for_cost))
2825 return false;
2827 SLP_TREE_RIGHT (*node) = right_node;
2830 return true;
2834 static void
2835 vect_print_slp_tree (slp_tree node)
2837 int i;
2838 tree stmt;
2840 if (!node)
2841 return;
2843 fprintf (vect_dump, "node ");
2844 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2846 fprintf (vect_dump, "\n\tstmt %d ", i);
2847 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2849 fprintf (vect_dump, "\n");
2851 vect_print_slp_tree (SLP_TREE_LEFT (node));
2852 vect_print_slp_tree (SLP_TREE_RIGHT (node));
2856 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2857 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2858 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2859 stmts in NODE are to be marked. */
2861 static void
2862 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
2864 int i;
2865 tree stmt;
2867 if (!node)
2868 return;
2870 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2871 if (j < 0 || i == j)
2872 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
2874 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
2875 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
2879 /* Analyze an SLP instance starting from a group of strided stores. Call
2880 vect_build_slp_tree to build a tree of packed stmts if possible.
2881 Return FALSE if it's impossible to SLP any stmt in the loop. */
2883 static bool
2884 vect_analyze_slp_instance (loop_vec_info loop_vinfo, tree stmt)
2886 slp_instance new_instance;
2887 slp_tree node = XNEW (struct _slp_tree);
2888 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
2889 unsigned int unrolling_factor = 1, nunits;
2890 tree vectype, scalar_type, next;
2891 unsigned int vectorization_factor = 0, ncopies;
2892 bool slp_impossible = false;
2893 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
2895 /* FORNOW: multiple types are not supported. */
2896 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
2897 vectype = get_vectype_for_scalar_type (scalar_type);
2898 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2899 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2900 ncopies = vectorization_factor / nunits;
2901 if (ncopies > 1)
2903 if (vect_print_dump_info (REPORT_SLP))
2904 fprintf (vect_dump, "SLP failed - multiple types ");
2906 return false;
2909 /* Create a node (a root of the SLP tree) for the packed strided stores. */
2910 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (tree, heap, group_size);
2911 next = stmt;
2912 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2913 while (next)
2915 VEC_safe_push (tree, heap, SLP_TREE_SCALAR_STMTS (node), next);
2916 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2919 SLP_TREE_VEC_STMTS (node) = NULL;
2920 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
2921 SLP_TREE_LEFT (node) = NULL;
2922 SLP_TREE_RIGHT (node) = NULL;
2923 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
2924 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
2926 /* Calculate the unrolling factor. */
2927 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
2929 /* Calculate the number of vector stmts to create based on the unrolling
2930 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
2931 GROUP_SIZE / NUNITS otherwise. */
2932 ncopies_for_cost = unrolling_factor * group_size / nunits;
2934 /* Build the tree for the SLP instance. */
2935 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
2936 &inside_cost, &outside_cost, ncopies_for_cost))
2938 /* Create a new SLP instance. */
2939 new_instance = XNEW (struct _slp_instance);
2940 SLP_INSTANCE_TREE (new_instance) = node;
2941 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2942 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2943 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
2944 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
2945 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
2946 new_instance);
2947 if (vect_print_dump_info (REPORT_SLP))
2948 vect_print_slp_tree (node);
2950 return true;
2953 /* Failed to SLP. */
2954 /* Free the allocated memory. */
2955 vect_free_slp_tree (node);
2957 if (slp_impossible)
2958 return false;
2960 /* SLP failed for this instance, but it is still possible to SLP other stmts
2961 in the loop. */
2962 return true;
2966 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2967 trees of packed scalar stmts if SLP is possible. */
2969 static bool
2970 vect_analyze_slp (loop_vec_info loop_vinfo)
2972 unsigned int i;
2973 VEC (tree, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
2974 tree store;
2976 if (vect_print_dump_info (REPORT_SLP))
2977 fprintf (vect_dump, "=== vect_analyze_slp ===");
2979 for (i = 0; VEC_iterate (tree, strided_stores, i, store); i++)
2980 if (!vect_analyze_slp_instance (loop_vinfo, store))
2982 /* SLP failed. No instance can be SLPed in the loop. */
2983 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2984 fprintf (vect_dump, "SLP failed.");
2986 return false;
2989 return true;
2993 /* For each possible SLP instance decide whether to SLP it and calculate overall
2994 unrolling factor needed to SLP the loop. */
2996 static void
2997 vect_make_slp_decision (loop_vec_info loop_vinfo)
2999 unsigned int i, unrolling_factor = 1;
3000 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3001 slp_instance instance;
3002 int decided_to_slp = 0;
3004 if (vect_print_dump_info (REPORT_SLP))
3005 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3007 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3009 /* FORNOW: SLP if you can. */
3010 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3011 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3013 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3014 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3015 loop-based vectorization. Such stmts will be marked as HYBRID. */
3016 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3017 decided_to_slp++;
3020 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3022 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3023 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3024 decided_to_slp, unrolling_factor);
3028 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3029 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3031 static void
3032 vect_detect_hybrid_slp_stmts (slp_tree node)
3034 int i;
3035 tree stmt;
3036 imm_use_iterator imm_iter;
3037 tree use_stmt;
3039 if (!node)
3040 return;
3042 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3043 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3044 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
3045 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
3046 if (vinfo_for_stmt (use_stmt)
3047 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
3048 vect_mark_slp_stmts (node, hybrid, i);
3050 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3051 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3055 /* Find stmts that must be both vectorized and SLPed. */
3057 static void
3058 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3060 unsigned int i;
3061 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3062 slp_instance instance;
3064 if (vect_print_dump_info (REPORT_SLP))
3065 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3067 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3068 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3072 /* Function vect_analyze_data_refs.
3074 Find all the data references in the loop.
3076 The general structure of the analysis of data refs in the vectorizer is as
3077 follows:
3078 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3079 find and analyze all data-refs in the loop and their dependences.
3080 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3081 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3082 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3086 static bool
3087 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3089 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3090 unsigned int i;
3091 VEC (data_reference_p, heap) *datarefs;
3092 struct data_reference *dr;
3093 tree scalar_type;
3095 if (vect_print_dump_info (REPORT_DETAILS))
3096 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3098 compute_data_dependences_for_loop (loop, true,
3099 &LOOP_VINFO_DATAREFS (loop_vinfo),
3100 &LOOP_VINFO_DDRS (loop_vinfo));
3102 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3103 from stmt_vec_info struct to DR and vectype. */
3104 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3106 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3108 tree stmt;
3109 stmt_vec_info stmt_info;
3110 basic_block bb;
3111 tree base, offset, init;
3113 if (!dr || !DR_REF (dr))
3115 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3116 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3117 return false;
3120 stmt = DR_STMT (dr);
3121 stmt_info = vinfo_for_stmt (stmt);
3123 /* Check that analysis of the data-ref succeeded. */
3124 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3125 || !DR_STEP (dr))
3127 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3129 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3130 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3132 return false;
3135 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3137 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3138 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3139 "constant");
3140 return false;
3143 if (!DR_SYMBOL_TAG (dr))
3145 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3147 fprintf (vect_dump, "not vectorized: no memory tag for ");
3148 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3150 return false;
3153 base = unshare_expr (DR_BASE_ADDRESS (dr));
3154 offset = unshare_expr (DR_OFFSET (dr));
3155 init = unshare_expr (DR_INIT (dr));
3157 /* Update DR field in stmt_vec_info struct. */
3158 bb = bb_for_stmt (stmt);
3160 /* If the dataref is in an inner-loop of the loop that is considered for
3161 for vectorization, we also want to analyze the access relative to
3162 the outer-loop (DR contains information only relative to the
3163 inner-most enclosing loop). We do that by building a reference to the
3164 first location accessed by the inner-loop, and analyze it relative to
3165 the outer-loop. */
3166 if (nested_in_vect_loop_p (loop, stmt))
3168 tree outer_step, outer_base, outer_init;
3169 HOST_WIDE_INT pbitsize, pbitpos;
3170 tree poffset;
3171 enum machine_mode pmode;
3172 int punsignedp, pvolatilep;
3173 affine_iv base_iv, offset_iv;
3174 tree dinit;
3176 /* Build a reference to the first location accessed by the
3177 inner-loop: *(BASE+INIT). (The first location is actually
3178 BASE+INIT+OFFSET, but we add OFFSET separately later. */
3179 tree inner_base = build_fold_indirect_ref
3180 (fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, init));
3182 if (vect_print_dump_info (REPORT_DETAILS))
3184 fprintf (dump_file, "analyze in outer-loop: ");
3185 print_generic_expr (dump_file, inner_base, TDF_SLIM);
3188 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3189 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3190 gcc_assert (outer_base != NULL_TREE);
3192 if (pbitpos % BITS_PER_UNIT != 0)
3194 if (vect_print_dump_info (REPORT_DETAILS))
3195 fprintf (dump_file, "failed: bit offset alignment.\n");
3196 return false;
3199 outer_base = build_fold_addr_expr (outer_base);
3200 if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
3202 if (vect_print_dump_info (REPORT_DETAILS))
3203 fprintf (dump_file, "failed: evolution of base is not affine.\n");
3204 return false;
3207 if (offset)
3209 if (poffset)
3210 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3211 else
3212 poffset = offset;
3215 if (!poffset)
3217 offset_iv.base = ssize_int (0);
3218 offset_iv.step = ssize_int (0);
3220 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
3222 if (vect_print_dump_info (REPORT_DETAILS))
3223 fprintf (dump_file, "evolution of offset is not affine.\n");
3224 return false;
3227 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3228 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3229 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3230 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3231 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3233 outer_step = size_binop (PLUS_EXPR,
3234 fold_convert (ssizetype, base_iv.step),
3235 fold_convert (ssizetype, offset_iv.step));
3237 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3238 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3239 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3240 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3241 STMT_VINFO_DR_OFFSET (stmt_info) =
3242 fold_convert (ssizetype, offset_iv.base);
3243 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3244 size_int (highest_pow2_factor (offset_iv.base));
3246 if (dump_file && (dump_flags & TDF_DETAILS))
3248 fprintf (dump_file, "\touter base_address: ");
3249 print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3250 fprintf (dump_file, "\n\touter offset from base address: ");
3251 print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3252 fprintf (dump_file, "\n\touter constant offset from base address: ");
3253 print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3254 fprintf (dump_file, "\n\touter step: ");
3255 print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3256 fprintf (dump_file, "\n\touter aligned to: ");
3257 print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3261 if (STMT_VINFO_DATA_REF (stmt_info))
3263 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3265 fprintf (vect_dump,
3266 "not vectorized: more than one data ref in stmt: ");
3267 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3269 return false;
3271 STMT_VINFO_DATA_REF (stmt_info) = dr;
3273 /* Set vectype for STMT. */
3274 scalar_type = TREE_TYPE (DR_REF (dr));
3275 STMT_VINFO_VECTYPE (stmt_info) =
3276 get_vectype_for_scalar_type (scalar_type);
3277 if (!STMT_VINFO_VECTYPE (stmt_info))
3279 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3281 fprintf (vect_dump,
3282 "not vectorized: no vectype for stmt: ");
3283 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3284 fprintf (vect_dump, " scalar_type: ");
3285 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3287 return false;
3291 return true;
3295 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3297 /* Function vect_mark_relevant.
3299 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3301 static void
3302 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
3303 enum vect_relevant relevant, bool live_p)
3305 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3306 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3307 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3309 if (vect_print_dump_info (REPORT_DETAILS))
3310 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3312 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3314 tree pattern_stmt;
3316 /* This is the last stmt in a sequence that was detected as a
3317 pattern that can potentially be vectorized. Don't mark the stmt
3318 as relevant/live because it's not going to be vectorized.
3319 Instead mark the pattern-stmt that replaces it. */
3321 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3323 if (vect_print_dump_info (REPORT_DETAILS))
3324 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3325 stmt_info = vinfo_for_stmt (pattern_stmt);
3326 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3327 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3328 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3329 stmt = pattern_stmt;
3332 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3333 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3334 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3336 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3337 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3339 if (vect_print_dump_info (REPORT_DETAILS))
3340 fprintf (vect_dump, "already marked relevant/live.");
3341 return;
3344 VEC_safe_push (tree, heap, *worklist, stmt);
3348 /* Function vect_stmt_relevant_p.
3350 Return true if STMT in loop that is represented by LOOP_VINFO is
3351 "relevant for vectorization".
3353 A stmt is considered "relevant for vectorization" if:
3354 - it has uses outside the loop.
3355 - it has vdefs (it alters memory).
3356 - control stmts in the loop (except for the exit condition).
3358 CHECKME: what other side effects would the vectorizer allow? */
3360 static bool
3361 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
3362 enum vect_relevant *relevant, bool *live_p)
3364 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3365 ssa_op_iter op_iter;
3366 imm_use_iterator imm_iter;
3367 use_operand_p use_p;
3368 def_operand_p def_p;
3370 *relevant = vect_unused_in_loop;
3371 *live_p = false;
3373 /* cond stmt other than loop exit cond. */
3374 if (is_ctrl_stmt (stmt)
3375 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3376 *relevant = vect_used_in_loop;
3378 /* changing memory. */
3379 if (TREE_CODE (stmt) != PHI_NODE)
3380 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3382 if (vect_print_dump_info (REPORT_DETAILS))
3383 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3384 *relevant = vect_used_in_loop;
3387 /* uses outside the loop. */
3388 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3390 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3392 basic_block bb = bb_for_stmt (USE_STMT (use_p));
3393 if (!flow_bb_inside_loop_p (loop, bb))
3395 if (vect_print_dump_info (REPORT_DETAILS))
3396 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3398 /* We expect all such uses to be in the loop exit phis
3399 (because of loop closed form) */
3400 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
3401 gcc_assert (bb == single_exit (loop)->dest);
3403 *live_p = true;
3408 return (*live_p || *relevant);
3413 Function process_use.
3415 Inputs:
3416 - a USE in STMT in a loop represented by LOOP_VINFO
3417 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3418 that defined USE. This is dont by calling mark_relevant and passing it
3419 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
3421 Outputs:
3422 Generally, LIVE_P and RELEVANT are used to define the liveness and
3423 relevance info of the DEF_STMT of this USE:
3424 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3425 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3426 Exceptions:
3427 - case 1: If USE is used only for address computations (e.g. array indexing),
3428 which does not need to be directly vectorized, then the liveness/relevance
3429 of the respective DEF_STMT is left unchanged.
3430 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3431 skip DEF_STMT cause it had already been processed.
3432 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3433 be modified accordingly.
3435 Return true if everything is as expected. Return false otherwise. */
3437 static bool
3438 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3439 enum vect_relevant relevant, VEC(tree,heap) **worklist)
3441 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3442 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3443 stmt_vec_info dstmt_vinfo;
3444 basic_block bb, def_bb;
3445 tree def, def_stmt;
3446 enum vect_def_type dt;
3448 /* case 1: we are only interested in uses that need to be vectorized. Uses
3449 that are used for address computation are not considered relevant. */
3450 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3451 return true;
3453 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3455 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3456 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3457 return false;
3460 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
3461 return true;
3463 def_bb = bb_for_stmt (def_stmt);
3464 if (!flow_bb_inside_loop_p (loop, def_bb))
3466 if (vect_print_dump_info (REPORT_DETAILS))
3467 fprintf (vect_dump, "def_stmt is out of loop.");
3468 return true;
3471 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3472 DEF_STMT must have already been processed, because this should be the
3473 only way that STMT, which is a reduction-phi, was put in the worklist,
3474 as there should be no other uses for DEF_STMT in the loop. So we just
3475 check that everything is as expected, and we are done. */
3476 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3477 bb = bb_for_stmt (stmt);
3478 if (TREE_CODE (stmt) == PHI_NODE
3479 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3480 && TREE_CODE (def_stmt) != PHI_NODE
3481 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3482 && bb->loop_father == def_bb->loop_father)
3484 if (vect_print_dump_info (REPORT_DETAILS))
3485 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3486 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3487 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3488 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3489 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3490 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3491 return true;
3494 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3495 outer-loop-header-bb:
3496 d = def_stmt
3497 inner-loop:
3498 stmt # use (d)
3499 outer-loop-tail-bb:
3500 ... */
3501 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3503 if (vect_print_dump_info (REPORT_DETAILS))
3504 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3505 switch (relevant)
3507 case vect_unused_in_loop:
3508 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3509 vect_used_by_reduction : vect_unused_in_loop;
3510 break;
3511 case vect_used_in_outer_by_reduction:
3512 relevant = vect_used_by_reduction;
3513 break;
3514 case vect_used_in_outer:
3515 relevant = vect_used_in_loop;
3516 break;
3517 case vect_used_by_reduction:
3518 case vect_used_in_loop:
3519 break;
3521 default:
3522 gcc_unreachable ();
3526 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3527 outer-loop-header-bb:
3529 inner-loop:
3530 d = def_stmt
3531 outer-loop-tail-bb:
3532 stmt # use (d) */
3533 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3535 if (vect_print_dump_info (REPORT_DETAILS))
3536 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3537 switch (relevant)
3539 case vect_unused_in_loop:
3540 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3541 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3542 break;
3544 case vect_used_in_outer_by_reduction:
3545 case vect_used_in_outer:
3546 break;
3548 case vect_used_by_reduction:
3549 relevant = vect_used_in_outer_by_reduction;
3550 break;
3552 case vect_used_in_loop:
3553 relevant = vect_used_in_outer;
3554 break;
3556 default:
3557 gcc_unreachable ();
3561 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
3562 return true;
3566 /* Function vect_mark_stmts_to_be_vectorized.
3568 Not all stmts in the loop need to be vectorized. For example:
3570 for i...
3571 for j...
3572 1. T0 = i + j
3573 2. T1 = a[T0]
3575 3. j = j + 1
3577 Stmt 1 and 3 do not need to be vectorized, because loop control and
3578 addressing of vectorized data-refs are handled differently.
3580 This pass detects such stmts. */
3582 static bool
3583 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3585 VEC(tree,heap) *worklist;
3586 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3587 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3588 unsigned int nbbs = loop->num_nodes;
3589 block_stmt_iterator si;
3590 tree stmt;
3591 stmt_ann_t ann;
3592 unsigned int i;
3593 stmt_vec_info stmt_vinfo;
3594 basic_block bb;
3595 tree phi;
3596 bool live_p;
3597 enum vect_relevant relevant;
3599 if (vect_print_dump_info (REPORT_DETAILS))
3600 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
3602 worklist = VEC_alloc (tree, heap, 64);
3604 /* 1. Init worklist. */
3605 for (i = 0; i < nbbs; i++)
3607 bb = bbs[i];
3608 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3610 if (vect_print_dump_info (REPORT_DETAILS))
3612 fprintf (vect_dump, "init: phi relevant? ");
3613 print_generic_expr (vect_dump, phi, TDF_SLIM);
3616 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
3617 vect_mark_relevant (&worklist, phi, relevant, live_p);
3619 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3621 stmt = bsi_stmt (si);
3622 if (vect_print_dump_info (REPORT_DETAILS))
3624 fprintf (vect_dump, "init: stmt relevant? ");
3625 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3628 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
3629 vect_mark_relevant (&worklist, stmt, relevant, live_p);
3633 /* 2. Process_worklist */
3634 while (VEC_length (tree, worklist) > 0)
3636 use_operand_p use_p;
3637 ssa_op_iter iter;
3639 stmt = VEC_pop (tree, worklist);
3640 if (vect_print_dump_info (REPORT_DETAILS))
3642 fprintf (vect_dump, "worklist: examine stmt: ");
3643 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3646 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3647 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3648 liveness and relevance properties of STMT. */
3649 ann = stmt_ann (stmt);
3650 stmt_vinfo = vinfo_for_stmt (stmt);
3651 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
3652 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
3654 /* Generally, the liveness and relevance properties of STMT are
3655 propagated as is to the DEF_STMTs of its USEs:
3656 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3657 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3659 One exception is when STMT has been identified as defining a reduction
3660 variable; in this case we set the liveness/relevance as follows:
3661 live_p = false
3662 relevant = vect_used_by_reduction
3663 This is because we distinguish between two kinds of relevant stmts -
3664 those that are used by a reduction computation, and those that are
3665 (also) used by a regular computation. This allows us later on to
3666 identify stmts that are used solely by a reduction, and therefore the
3667 order of the results that they produce does not have to be kept.
3669 Reduction phis are expected to be used by a reduction stmt, or by
3670 in an outer loop; Other reduction stmts are expected to be
3671 in the loop, and possibly used by a stmt in an outer loop.
3672 Here are the expected values of "relevant" for reduction phis/stmts:
3674 relevance: phi stmt
3675 vect_unused_in_loop ok
3676 vect_used_in_outer_by_reduction ok ok
3677 vect_used_in_outer ok ok
3678 vect_used_by_reduction ok
3679 vect_used_in_loop */
3681 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
3683 enum vect_relevant tmp_relevant = relevant;
3684 switch (tmp_relevant)
3686 case vect_unused_in_loop:
3687 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
3688 relevant = vect_used_by_reduction;
3689 break;
3691 case vect_used_in_outer_by_reduction:
3692 case vect_used_in_outer:
3693 gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR
3694 && TREE_CODE (stmt) != DOT_PROD_EXPR);
3695 break;
3697 case vect_used_by_reduction:
3698 if (TREE_CODE (stmt) == PHI_NODE)
3699 break;
3700 /* fall through */
3701 case vect_used_in_loop:
3702 default:
3703 if (vect_print_dump_info (REPORT_DETAILS))
3704 fprintf (vect_dump, "unsupported use of reduction.");
3705 VEC_free (tree, heap, worklist);
3706 return false;
3708 live_p = false;
3711 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3713 tree op = USE_FROM_PTR (use_p);
3714 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
3716 VEC_free (tree, heap, worklist);
3717 return false;
3720 } /* while worklist */
3722 VEC_free (tree, heap, worklist);
3723 return true;
3727 /* Function vect_can_advance_ivs_p
3729 In case the number of iterations that LOOP iterates is unknown at compile
3730 time, an epilog loop will be generated, and the loop induction variables
3731 (IVs) will be "advanced" to the value they are supposed to take just before
3732 the epilog loop. Here we check that the access function of the loop IVs
3733 and the expression that represents the loop bound are simple enough.
3734 These restrictions will be relaxed in the future. */
3736 static bool
3737 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
3739 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3740 basic_block bb = loop->header;
3741 tree phi;
3743 /* Analyze phi functions of the loop header. */
3745 if (vect_print_dump_info (REPORT_DETAILS))
3746 fprintf (vect_dump, "vect_can_advance_ivs_p:");
3748 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3750 tree access_fn = NULL;
3751 tree evolution_part;
3753 if (vect_print_dump_info (REPORT_DETAILS))
3755 fprintf (vect_dump, "Analyze phi: ");
3756 print_generic_expr (vect_dump, phi, TDF_SLIM);
3759 /* Skip virtual phi's. The data dependences that are associated with
3760 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3762 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3764 if (vect_print_dump_info (REPORT_DETAILS))
3765 fprintf (vect_dump, "virtual phi. skip.");
3766 continue;
3769 /* Skip reduction phis. */
3771 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3773 if (vect_print_dump_info (REPORT_DETAILS))
3774 fprintf (vect_dump, "reduc phi. skip.");
3775 continue;
3778 /* Analyze the evolution function. */
3780 access_fn = instantiate_parameters
3781 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
3783 if (!access_fn)
3785 if (vect_print_dump_info (REPORT_DETAILS))
3786 fprintf (vect_dump, "No Access function.");
3787 return false;
3790 if (vect_print_dump_info (REPORT_DETAILS))
3792 fprintf (vect_dump, "Access function of PHI: ");
3793 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
3796 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
3798 if (evolution_part == NULL_TREE)
3800 if (vect_print_dump_info (REPORT_DETAILS))
3801 fprintf (vect_dump, "No evolution.");
3802 return false;
3805 /* FORNOW: We do not transform initial conditions of IVs
3806 which evolution functions are a polynomial of degree >= 2. */
3808 if (tree_is_chrec (evolution_part))
3809 return false;
3812 return true;
3816 /* Function vect_get_loop_niters.
3818 Determine how many iterations the loop is executed.
3819 If an expression that represents the number of iterations
3820 can be constructed, place it in NUMBER_OF_ITERATIONS.
3821 Return the loop exit condition. */
3823 static tree
3824 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
3826 tree niters;
3828 if (vect_print_dump_info (REPORT_DETAILS))
3829 fprintf (vect_dump, "=== get_loop_niters ===");
3831 niters = number_of_exit_cond_executions (loop);
3833 if (niters != NULL_TREE
3834 && niters != chrec_dont_know)
3836 *number_of_iterations = niters;
3838 if (vect_print_dump_info (REPORT_DETAILS))
3840 fprintf (vect_dump, "==> get_loop_niters:" );
3841 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
3845 return get_loop_exit_condition (loop);
3849 /* Function vect_analyze_loop_1.
3851 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3852 for it. The different analyses will record information in the
3853 loop_vec_info struct. This is a subset of the analyses applied in
3854 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3855 that is now considered for (outer-loop) vectorization. */
3857 static loop_vec_info
3858 vect_analyze_loop_1 (struct loop *loop)
3860 loop_vec_info loop_vinfo;
3862 if (vect_print_dump_info (REPORT_DETAILS))
3863 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
3865 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3867 loop_vinfo = vect_analyze_loop_form (loop);
3868 if (!loop_vinfo)
3870 if (vect_print_dump_info (REPORT_DETAILS))
3871 fprintf (vect_dump, "bad inner-loop form.");
3872 return NULL;
3875 return loop_vinfo;
3879 /* Function vect_analyze_loop_form.
3881 Verify that certain CFG restrictions hold, including:
3882 - the loop has a pre-header
3883 - the loop has a single entry and exit
3884 - the loop exit condition is simple enough, and the number of iterations
3885 can be analyzed (a countable loop). */
3887 static loop_vec_info
3888 vect_analyze_loop_form (struct loop *loop)
3890 loop_vec_info loop_vinfo;
3891 tree loop_cond;
3892 tree number_of_iterations = NULL;
3893 loop_vec_info inner_loop_vinfo = NULL;
3895 if (vect_print_dump_info (REPORT_DETAILS))
3896 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
3898 /* Different restrictions apply when we are considering an inner-most loop,
3899 vs. an outer (nested) loop.
3900 (FORNOW. May want to relax some of these restrictions in the future). */
3902 if (!loop->inner)
3904 /* Inner-most loop. We currently require that the number of BBs is
3905 exactly 2 (the header and latch). Vectorizable inner-most loops
3906 look like this:
3908 (pre-header)
3910 header <--------+
3911 | | |
3912 | +--> latch --+
3914 (exit-bb) */
3916 if (loop->num_nodes != 2)
3918 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3919 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
3920 return NULL;
3923 if (empty_block_p (loop->header))
3925 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3926 fprintf (vect_dump, "not vectorized: empty loop.");
3927 return NULL;
3930 else
3932 struct loop *innerloop = loop->inner;
3933 edge backedge, entryedge;
3935 /* Nested loop. We currently require that the loop is doubly-nested,
3936 contains a single inner loop, and the number of BBs is exactly 5.
3937 Vectorizable outer-loops look like this:
3939 (pre-header)
3941 header <---+
3943 inner-loop |
3945 tail ------+
3947 (exit-bb)
3949 The inner-loop has the properties expected of inner-most loops
3950 as described above. */
3952 if ((loop->inner)->inner || (loop->inner)->next)
3954 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3955 fprintf (vect_dump, "not vectorized: multiple nested loops.");
3956 return NULL;
3959 /* Analyze the inner-loop. */
3960 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
3961 if (!inner_loop_vinfo)
3963 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3964 fprintf (vect_dump, "not vectorized: Bad inner loop.");
3965 return NULL;
3968 if (!expr_invariant_in_loop_p (loop,
3969 LOOP_VINFO_NITERS (inner_loop_vinfo)))
3971 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3972 fprintf (vect_dump,
3973 "not vectorized: inner-loop count not invariant.");
3974 destroy_loop_vec_info (inner_loop_vinfo, true);
3975 return NULL;
3978 if (loop->num_nodes != 5)
3980 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3981 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
3982 destroy_loop_vec_info (inner_loop_vinfo, true);
3983 return NULL;
3986 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
3987 backedge = EDGE_PRED (innerloop->header, 1);
3988 entryedge = EDGE_PRED (innerloop->header, 0);
3989 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
3991 backedge = EDGE_PRED (innerloop->header, 0);
3992 entryedge = EDGE_PRED (innerloop->header, 1);
3995 if (entryedge->src != loop->header
3996 || !single_exit (innerloop)
3997 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
3999 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4000 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4001 destroy_loop_vec_info (inner_loop_vinfo, true);
4002 return NULL;
4005 if (vect_print_dump_info (REPORT_DETAILS))
4006 fprintf (vect_dump, "Considering outer-loop vectorization.");
4009 if (!single_exit (loop)
4010 || EDGE_COUNT (loop->header->preds) != 2)
4012 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4014 if (!single_exit (loop))
4015 fprintf (vect_dump, "not vectorized: multiple exits.");
4016 else if (EDGE_COUNT (loop->header->preds) != 2)
4017 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4019 if (inner_loop_vinfo)
4020 destroy_loop_vec_info (inner_loop_vinfo, true);
4021 return NULL;
4024 /* We assume that the loop exit condition is at the end of the loop. i.e,
4025 that the loop is represented as a do-while (with a proper if-guard
4026 before the loop if needed), where the loop header contains all the
4027 executable statements, and the latch is empty. */
4028 if (!empty_block_p (loop->latch)
4029 || phi_nodes (loop->latch))
4031 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4032 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4033 if (inner_loop_vinfo)
4034 destroy_loop_vec_info (inner_loop_vinfo, true);
4035 return NULL;
4038 /* Make sure there exists a single-predecessor exit bb: */
4039 if (!single_pred_p (single_exit (loop)->dest))
4041 edge e = single_exit (loop);
4042 if (!(e->flags & EDGE_ABNORMAL))
4044 split_loop_exit_edge (e);
4045 if (vect_print_dump_info (REPORT_DETAILS))
4046 fprintf (vect_dump, "split exit edge.");
4048 else
4050 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4051 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4052 if (inner_loop_vinfo)
4053 destroy_loop_vec_info (inner_loop_vinfo, true);
4054 return NULL;
4058 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4059 if (!loop_cond)
4061 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4062 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4063 if (inner_loop_vinfo)
4064 destroy_loop_vec_info (inner_loop_vinfo, true);
4065 return NULL;
4068 if (!number_of_iterations)
4070 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4071 fprintf (vect_dump,
4072 "not vectorized: number of iterations cannot be computed.");
4073 if (inner_loop_vinfo)
4074 destroy_loop_vec_info (inner_loop_vinfo, true);
4075 return NULL;
4078 if (chrec_contains_undetermined (number_of_iterations))
4080 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4081 fprintf (vect_dump, "Infinite number of iterations.");
4082 if (inner_loop_vinfo)
4083 destroy_loop_vec_info (inner_loop_vinfo, true);
4084 return NULL;
4087 if (!NITERS_KNOWN_P (number_of_iterations))
4089 if (vect_print_dump_info (REPORT_DETAILS))
4091 fprintf (vect_dump, "Symbolic number of iterations is ");
4092 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4095 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4097 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4098 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4099 if (inner_loop_vinfo)
4100 destroy_loop_vec_info (inner_loop_vinfo, false);
4101 return NULL;
4104 loop_vinfo = new_loop_vec_info (loop);
4105 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4107 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4109 /* CHECKME: May want to keep it around it in the future. */
4110 if (inner_loop_vinfo)
4111 destroy_loop_vec_info (inner_loop_vinfo, false);
4113 gcc_assert (!loop->aux);
4114 loop->aux = loop_vinfo;
4115 return loop_vinfo;
4119 /* Function vect_analyze_loop.
4121 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4122 for it. The different analyses will record information in the
4123 loop_vec_info struct. */
4124 loop_vec_info
4125 vect_analyze_loop (struct loop *loop)
4127 bool ok;
4128 loop_vec_info loop_vinfo;
4130 if (vect_print_dump_info (REPORT_DETAILS))
4131 fprintf (vect_dump, "===== analyze_loop_nest =====");
4133 if (loop_outer (loop)
4134 && loop_vec_info_for_loop (loop_outer (loop))
4135 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4137 if (vect_print_dump_info (REPORT_DETAILS))
4138 fprintf (vect_dump, "outer-loop already vectorized.");
4139 return NULL;
4142 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4144 loop_vinfo = vect_analyze_loop_form (loop);
4145 if (!loop_vinfo)
4147 if (vect_print_dump_info (REPORT_DETAILS))
4148 fprintf (vect_dump, "bad loop form.");
4149 return NULL;
4152 /* Find all data references in the loop (which correspond to vdefs/vuses)
4153 and analyze their evolution in the loop.
4155 FORNOW: Handle only simple, array references, which
4156 alignment can be forced, and aligned pointer-references. */
4158 ok = vect_analyze_data_refs (loop_vinfo);
4159 if (!ok)
4161 if (vect_print_dump_info (REPORT_DETAILS))
4162 fprintf (vect_dump, "bad data references.");
4163 destroy_loop_vec_info (loop_vinfo, true);
4164 return NULL;
4167 /* Classify all cross-iteration scalar data-flow cycles.
4168 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4170 vect_analyze_scalar_cycles (loop_vinfo);
4172 vect_pattern_recog (loop_vinfo);
4174 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4176 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4177 if (!ok)
4179 if (vect_print_dump_info (REPORT_DETAILS))
4180 fprintf (vect_dump, "unexpected pattern.");
4181 destroy_loop_vec_info (loop_vinfo, true);
4182 return NULL;
4185 /* Analyze the alignment of the data-refs in the loop.
4186 Fail if a data reference is found that cannot be vectorized. */
4188 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4189 if (!ok)
4191 if (vect_print_dump_info (REPORT_DETAILS))
4192 fprintf (vect_dump, "bad data alignment.");
4193 destroy_loop_vec_info (loop_vinfo, true);
4194 return NULL;
4197 ok = vect_determine_vectorization_factor (loop_vinfo);
4198 if (!ok)
4200 if (vect_print_dump_info (REPORT_DETAILS))
4201 fprintf (vect_dump, "can't determine vectorization factor.");
4202 destroy_loop_vec_info (loop_vinfo, true);
4203 return NULL;
4206 /* Analyze data dependences between the data-refs in the loop.
4207 FORNOW: fail at the first data dependence that we encounter. */
4209 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4210 if (!ok)
4212 if (vect_print_dump_info (REPORT_DETAILS))
4213 fprintf (vect_dump, "bad data dependence.");
4214 destroy_loop_vec_info (loop_vinfo, true);
4215 return NULL;
4218 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4219 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4221 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4222 if (!ok)
4224 if (vect_print_dump_info (REPORT_DETAILS))
4225 fprintf (vect_dump, "bad data access.");
4226 destroy_loop_vec_info (loop_vinfo, true);
4227 return NULL;
4230 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4231 ok = vect_analyze_slp (loop_vinfo);
4232 if (ok)
4234 /* Decide which possible SLP instances to SLP. */
4235 vect_make_slp_decision (loop_vinfo);
4237 /* Find stmts that need to be both vectorized and SLPed. */
4238 vect_detect_hybrid_slp (loop_vinfo);
4241 /* This pass will decide on using loop versioning and/or loop peeling in
4242 order to enhance the alignment of data references in the loop. */
4244 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4245 if (!ok)
4247 if (vect_print_dump_info (REPORT_DETAILS))
4248 fprintf (vect_dump, "bad data alignment.");
4249 destroy_loop_vec_info (loop_vinfo, true);
4250 return NULL;
4253 /* Scan all the operations in the loop and make sure they are
4254 vectorizable. */
4256 ok = vect_analyze_operations (loop_vinfo);
4257 if (!ok)
4259 if (vect_print_dump_info (REPORT_DETAILS))
4260 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4261 destroy_loop_vec_info (loop_vinfo, true);
4262 return NULL;
4265 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4267 return loop_vinfo;