1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
43 /* Main analysis functions. */
44 static loop_vec_info
vect_analyze_loop_form (struct loop
*);
45 static bool vect_analyze_data_refs (loop_vec_info
);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info
);
47 static void vect_analyze_scalar_cycles (loop_vec_info
);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info
);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info
);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info
);
51 static bool vect_compute_data_refs_alignment (loop_vec_info
);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info
);
53 static bool vect_analyze_operations (loop_vec_info
);
54 static bool vect_determine_vectorization_factor (loop_vec_info
);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree
, tree
);
58 static tree
vect_get_loop_niters (struct loop
*, tree
*);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation
*, loop_vec_info
);
61 static bool vect_compute_data_ref_alignment (struct data_reference
*);
62 static bool vect_analyze_data_ref_access (struct data_reference
*);
63 static bool vect_can_advance_ivs_p (loop_vec_info
);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference
*, struct data_reference
*, int npeel
);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo
)
95 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
96 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
97 int nbbs
= loop
->num_nodes
;
98 block_stmt_iterator si
;
99 unsigned int vectorization_factor
= 0;
104 stmt_vec_info stmt_info
;
107 if (vect_print_dump_info (REPORT_DETAILS
))
108 fprintf (vect_dump
, "=== vect_determine_vectorization_factor ===");
110 for (i
= 0; i
< nbbs
; i
++)
112 basic_block bb
= bbs
[i
];
114 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
116 stmt_info
= vinfo_for_stmt (phi
);
117 if (vect_print_dump_info (REPORT_DETAILS
))
119 fprintf (vect_dump
, "==> examining phi: ");
120 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
123 gcc_assert (stmt_info
);
125 if (STMT_VINFO_RELEVANT_P (stmt_info
))
127 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info
));
128 scalar_type
= TREE_TYPE (PHI_RESULT (phi
));
130 if (vect_print_dump_info (REPORT_DETAILS
))
132 fprintf (vect_dump
, "get vectype for scalar type: ");
133 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
136 vectype
= get_vectype_for_scalar_type (scalar_type
);
139 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
142 "not vectorized: unsupported data-type ");
143 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
147 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
149 if (vect_print_dump_info (REPORT_DETAILS
))
151 fprintf (vect_dump
, "vectype: ");
152 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
155 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
156 if (vect_print_dump_info (REPORT_DETAILS
))
157 fprintf (vect_dump
, "nunits = %d", nunits
);
159 if (!vectorization_factor
160 || (nunits
> vectorization_factor
))
161 vectorization_factor
= nunits
;
165 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
167 tree stmt
= bsi_stmt (si
);
168 stmt_info
= vinfo_for_stmt (stmt
);
170 if (vect_print_dump_info (REPORT_DETAILS
))
172 fprintf (vect_dump
, "==> examining statement: ");
173 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
176 gcc_assert (stmt_info
);
178 /* skip stmts which do not need to be vectorized. */
179 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
180 && !STMT_VINFO_LIVE_P (stmt_info
))
182 if (vect_print_dump_info (REPORT_DETAILS
))
183 fprintf (vect_dump
, "skip.");
187 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
189 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
191 fprintf (vect_dump
, "not vectorized: irregular stmt.");
192 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
197 if (!GIMPLE_STMT_P (stmt
)
198 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))))
200 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
202 fprintf (vect_dump
, "not vectorized: vector stmt in loop:");
203 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
208 if (STMT_VINFO_VECTYPE (stmt_info
))
210 /* The only case when a vectype had been already set is for stmts
211 that contain a dataref, or for "pattern-stmts" (stmts generated
212 by the vectorizer to represent/replace a certain idiom). */
213 gcc_assert (STMT_VINFO_DATA_REF (stmt_info
)
214 || is_pattern_stmt_p (stmt_info
));
215 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
219 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info
)
220 && !is_pattern_stmt_p (stmt_info
));
222 /* We set the vectype according to the type of the result (lhs).
223 For stmts whose result-type is different than the type of the
224 arguments (e.g. demotion, promotion), vectype will be reset
225 appropriately (later). Note that we have to visit the smallest
226 datatype in this function, because that determines the VF.
227 If the smallest datatype in the loop is present only as the
228 rhs of a promotion operation - we'd miss it here.
229 However, in such a case, that a variable of this datatype
230 does not appear in the lhs anywhere in the loop, it shouldn't
231 affect the vectorization factor. */
232 scalar_type
= TREE_TYPE (GIMPLE_STMT_OPERAND (stmt
, 0));
234 if (vect_print_dump_info (REPORT_DETAILS
))
236 fprintf (vect_dump
, "get vectype for scalar type: ");
237 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
240 vectype
= get_vectype_for_scalar_type (scalar_type
);
243 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
246 "not vectorized: unsupported data-type ");
247 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
251 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
254 if (vect_print_dump_info (REPORT_DETAILS
))
256 fprintf (vect_dump
, "vectype: ");
257 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
260 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
261 if (vect_print_dump_info (REPORT_DETAILS
))
262 fprintf (vect_dump
, "nunits = %d", nunits
);
264 if (!vectorization_factor
265 || (nunits
> vectorization_factor
))
266 vectorization_factor
= nunits
;
271 /* TODO: Analyze cost. Decide if worth while to vectorize. */
272 if (vect_print_dump_info (REPORT_DETAILS
))
273 fprintf (vect_dump
, "vectorization factor = %d", vectorization_factor
);
274 if (vectorization_factor
<= 1)
276 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
277 fprintf (vect_dump
, "not vectorized: unsupported data-type");
280 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
286 /* Function vect_analyze_operations.
288 Scan the loop stmts and make sure they are all vectorizable. */
291 vect_analyze_operations (loop_vec_info loop_vinfo
)
293 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
294 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
295 int nbbs
= loop
->num_nodes
;
296 block_stmt_iterator si
;
297 unsigned int vectorization_factor
= 0;
301 stmt_vec_info stmt_info
;
302 bool need_to_vectorize
= false;
303 int min_profitable_iters
;
304 int min_scalar_loop_bound
;
307 if (vect_print_dump_info (REPORT_DETAILS
))
308 fprintf (vect_dump
, "=== vect_analyze_operations ===");
310 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
311 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
313 for (i
= 0; i
< nbbs
; i
++)
315 basic_block bb
= bbs
[i
];
317 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
321 stmt_info
= vinfo_for_stmt (phi
);
322 if (vect_print_dump_info (REPORT_DETAILS
))
324 fprintf (vect_dump
, "examining phi: ");
325 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
328 if (! is_loop_header_bb_p (bb
))
330 /* inner-loop loop-closed exit phi in outer-loop vectorization
331 (i.e. a phi in the tail of the outer-loop).
332 FORNOW: we currently don't support the case that these phis
333 are not used in the outerloop, cause this case requires
334 to actually do something here. */
335 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
336 || STMT_VINFO_LIVE_P (stmt_info
))
338 if (vect_print_dump_info (REPORT_DETAILS
))
340 "Unsupported loop-closed phi in outer-loop.");
346 gcc_assert (stmt_info
);
348 if (STMT_VINFO_LIVE_P (stmt_info
))
350 /* FORNOW: not yet supported. */
351 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
352 fprintf (vect_dump
, "not vectorized: value used after loop.");
356 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_loop
357 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_induction_def
)
359 /* A scalar-dependence cycle that we don't support. */
360 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
361 fprintf (vect_dump
, "not vectorized: scalar dependence cycle.");
365 if (STMT_VINFO_RELEVANT_P (stmt_info
))
367 need_to_vectorize
= true;
368 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
369 ok
= vectorizable_induction (phi
, NULL
, NULL
);
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
377 "not vectorized: relevant phi not supported: ");
378 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
384 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
386 tree stmt
= bsi_stmt (si
);
387 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
388 enum vect_def_type relevance
= STMT_VINFO_RELEVANT (stmt_info
);
390 if (vect_print_dump_info (REPORT_DETAILS
))
392 fprintf (vect_dump
, "==> examining statement: ");
393 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
396 gcc_assert (stmt_info
);
398 /* skip stmts which do not need to be vectorized.
399 this is expected to include:
400 - the COND_EXPR which is the loop exit condition
401 - any LABEL_EXPRs in the loop
402 - computations that are used only for array indexing or loop
405 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
406 && !STMT_VINFO_LIVE_P (stmt_info
))
408 if (vect_print_dump_info (REPORT_DETAILS
))
409 fprintf (vect_dump
, "irrelevant.");
413 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
418 case vect_reduction_def
:
419 gcc_assert (relevance
== vect_used_in_outer
420 || relevance
== vect_used_in_outer_by_reduction
421 || relevance
== vect_unused_in_loop
);
424 case vect_induction_def
:
425 case vect_constant_def
:
426 case vect_invariant_def
:
427 case vect_unknown_def_type
:
432 if (STMT_VINFO_RELEVANT_P (stmt_info
))
434 gcc_assert (GIMPLE_STMT_P (stmt
)
435 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))));
436 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
437 need_to_vectorize
= true;
440 ok
= (vectorizable_type_promotion (stmt
, NULL
, NULL
)
441 || vectorizable_type_demotion (stmt
, NULL
, NULL
)
442 || vectorizable_conversion (stmt
, NULL
, NULL
)
443 || vectorizable_operation (stmt
, NULL
, NULL
)
444 || vectorizable_assignment (stmt
, NULL
, NULL
)
445 || vectorizable_load (stmt
, NULL
, NULL
)
446 || vectorizable_call (stmt
, NULL
, NULL
)
447 || vectorizable_store (stmt
, NULL
, NULL
)
448 || vectorizable_condition (stmt
, NULL
, NULL
)
449 || vectorizable_reduction (stmt
, NULL
, NULL
));
451 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
452 need extra handling, except for vectorizable reductions. */
453 if (STMT_VINFO_LIVE_P (stmt_info
)
454 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
455 ok
|= vectorizable_live_operation (stmt
, NULL
, NULL
);
459 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
461 fprintf (vect_dump
, "not vectorized: stmt not supported: ");
462 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
469 /* All operations in the loop are either irrelevant (deal with loop
470 control, or dead), or only used outside the loop and can be moved
471 out of the loop (e.g. invariants, inductions). The loop can be
472 optimized away by scalar optimizations. We're better off not
473 touching this loop. */
474 if (!need_to_vectorize
)
476 if (vect_print_dump_info (REPORT_DETAILS
))
478 "All the computation can be taken out of the loop.");
479 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
481 "not vectorized: redundant loop. no profit to vectorize.");
485 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
486 && vect_print_dump_info (REPORT_DETAILS
))
488 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC
,
489 vectorization_factor
, LOOP_VINFO_INT_NITERS (loop_vinfo
));
491 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
492 && (LOOP_VINFO_INT_NITERS (loop_vinfo
) < vectorization_factor
))
494 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
495 fprintf (vect_dump
, "not vectorized: iteration count too small.");
496 if (vect_print_dump_info (REPORT_DETAILS
))
497 fprintf (vect_dump
,"not vectorized: iteration count smaller than "
498 "vectorization factor.");
502 /* Analyze cost. Decide if worth while to vectorize. */
504 min_profitable_iters
= vect_estimate_min_profitable_iters (loop_vinfo
);
505 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
) = min_profitable_iters
;
506 if (min_profitable_iters
< 0)
508 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
509 fprintf (vect_dump
, "not vectorized: vectorization not profitable.");
510 if (vect_print_dump_info (REPORT_DETAILS
))
511 fprintf (vect_dump
, "not vectorized: vector version will never be "
516 min_scalar_loop_bound
= (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND
))
517 * vectorization_factor
;
519 /* Use the cost model only if it is more conservative than user specified
522 th
= (unsigned) min_scalar_loop_bound
;
523 if (min_profitable_iters
524 && (!min_scalar_loop_bound
525 || min_profitable_iters
> min_scalar_loop_bound
))
526 th
= (unsigned) min_profitable_iters
;
528 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
529 && LOOP_VINFO_INT_NITERS (loop_vinfo
) < th
)
531 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
532 fprintf (vect_dump
, "not vectorized: vectorization not "
534 if (vect_print_dump_info (REPORT_DETAILS
))
535 fprintf (vect_dump
, "not vectorized: iteration count smaller than "
536 "user specified loop bound parameter or minimum "
537 "profitable iterations (whichever is more conservative).");
541 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
542 || LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0
543 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
545 if (vect_print_dump_info (REPORT_DETAILS
))
546 fprintf (vect_dump
, "epilog loop required.");
547 if (!vect_can_advance_ivs_p (loop_vinfo
))
549 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
551 "not vectorized: can't create epilog loop 1.");
554 if (!slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
556 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
558 "not vectorized: can't create epilog loop 2.");
567 /* Function exist_non_indexing_operands_for_use_p
569 USE is one of the uses attached to STMT. Check if USE is
570 used in STMT for anything other than indexing an array. */
573 exist_non_indexing_operands_for_use_p (tree use
, tree stmt
)
576 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
578 /* USE corresponds to some operand in STMT. If there is no data
579 reference in STMT, then any operand that corresponds to USE
580 is not indexing an array. */
581 if (!STMT_VINFO_DATA_REF (stmt_info
))
584 /* STMT has a data_ref. FORNOW this means that its of one of
588 (This should have been verified in analyze_data_refs).
590 'var' in the second case corresponds to a def, not a use,
591 so USE cannot correspond to any operands that are not used
594 Therefore, all we need to check is if STMT falls into the
595 first case, and whether var corresponds to USE. */
597 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) == SSA_NAME
)
600 operand
= GIMPLE_STMT_OPERAND (stmt
, 1);
602 if (TREE_CODE (operand
) != SSA_NAME
)
612 /* Function vect_analyze_scalar_cycles_1.
614 Examine the cross iteration def-use cycles of scalar variables
615 in LOOP. LOOP_VINFO represents the loop that is noe being
616 considered for vectorization (can be LOOP, or an outer-loop
620 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo
, struct loop
*loop
)
623 basic_block bb
= loop
->header
;
625 VEC(tree
,heap
) *worklist
= VEC_alloc (tree
, heap
, 64);
627 if (vect_print_dump_info (REPORT_DETAILS
))
628 fprintf (vect_dump
, "=== vect_analyze_scalar_cycles ===");
630 /* First - identify all inductions. */
631 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
633 tree access_fn
= NULL
;
634 tree def
= PHI_RESULT (phi
);
635 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
637 if (vect_print_dump_info (REPORT_DETAILS
))
639 fprintf (vect_dump
, "Analyze phi: ");
640 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
643 /* Skip virtual phi's. The data dependences that are associated with
644 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
645 if (!is_gimple_reg (SSA_NAME_VAR (def
)))
648 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_unknown_def_type
;
650 /* Analyze the evolution function. */
651 access_fn
= analyze_scalar_evolution (loop
, def
);
652 if (access_fn
&& vect_print_dump_info (REPORT_DETAILS
))
654 fprintf (vect_dump
, "Access function of PHI: ");
655 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
659 || !vect_is_simple_iv_evolution (loop
->num
, access_fn
, &dumy
, &dumy
))
661 VEC_safe_push (tree
, heap
, worklist
, phi
);
665 if (vect_print_dump_info (REPORT_DETAILS
))
666 fprintf (vect_dump
, "Detected induction.");
667 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_induction_def
;
671 /* Second - identify all reductions. */
672 while (VEC_length (tree
, worklist
) > 0)
674 tree phi
= VEC_pop (tree
, worklist
);
675 tree def
= PHI_RESULT (phi
);
676 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
679 if (vect_print_dump_info (REPORT_DETAILS
))
681 fprintf (vect_dump
, "Analyze phi: ");
682 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
685 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def
)));
686 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_unknown_def_type
);
688 reduc_stmt
= vect_is_simple_reduction (loop_vinfo
, phi
);
691 if (vect_print_dump_info (REPORT_DETAILS
))
692 fprintf (vect_dump
, "Detected reduction.");
693 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_reduction_def
;
694 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
698 if (vect_print_dump_info (REPORT_DETAILS
))
699 fprintf (vect_dump
, "Unknown def-use cycle pattern.");
702 VEC_free (tree
, heap
, worklist
);
707 /* Function vect_analyze_scalar_cycles.
709 Examine the cross iteration def-use cycles of scalar variables, by
710 analyzing the loop-header PHIs of scalar variables; Classify each
711 cycle as one of the following: invariant, induction, reduction, unknown.
712 We do that for the loop represented by LOOP_VINFO, and also to its
713 inner-loop, if exists.
714 Examples for scalar cycles:
729 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo
)
731 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
733 vect_analyze_scalar_cycles_1 (loop_vinfo
, loop
);
735 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
736 Reductions in such inner-loop therefore have different properties than
737 the reductions in the nest that gets vectorized:
738 1. When vectorized, they are executed in the same order as in the original
739 scalar loop, so we can't change the order of computation when
741 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
742 current checks are too strict. */
745 vect_analyze_scalar_cycles_1 (loop_vinfo
, loop
->inner
);
749 /* Function vect_insert_into_interleaving_chain.
751 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
754 vect_insert_into_interleaving_chain (struct data_reference
*dra
,
755 struct data_reference
*drb
)
757 tree prev
, next
, next_init
;
758 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
759 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
761 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
762 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
765 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
766 if (tree_int_cst_compare (next_init
, DR_INIT (dra
)) > 0)
769 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
770 DR_GROUP_NEXT_DR (stmtinfo_a
) = next
;
774 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
777 /* We got to the end of the list. Insert here. */
778 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
779 DR_GROUP_NEXT_DR (stmtinfo_a
) = NULL_TREE
;
783 /* Function vect_update_interleaving_chain.
785 For two data-refs DRA and DRB that are a part of a chain interleaved data
786 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
788 There are four possible cases:
789 1. New stmts - both DRA and DRB are not a part of any chain:
792 2. DRB is a part of a chain and DRA is not:
793 no need to update FIRST_DR
794 no need to insert DRB
795 insert DRA according to init
796 3. DRA is a part of a chain and DRB is not:
797 if (init of FIRST_DR > init of DRB)
799 NEXT(FIRST_DR) = previous FIRST_DR
801 insert DRB according to its init
802 4. both DRA and DRB are in some interleaving chains:
803 choose the chain with the smallest init of FIRST_DR
804 insert the nodes of the second chain into the first one. */
807 vect_update_interleaving_chain (struct data_reference
*drb
,
808 struct data_reference
*dra
)
810 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
811 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
812 tree next_init
, init_dra_chain
, init_drb_chain
, first_a
, first_b
;
813 tree node
, prev
, next
, node_init
, first_stmt
;
815 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
816 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
818 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_STMT (drb
);
819 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
820 DR_GROUP_NEXT_DR (stmtinfo_b
) = DR_STMT (dra
);
824 /* 2. DRB is a part of a chain and DRA is not. */
825 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && DR_GROUP_FIRST_DR (stmtinfo_b
))
827 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_GROUP_FIRST_DR (stmtinfo_b
);
828 /* Insert DRA into the chain of DRB. */
829 vect_insert_into_interleaving_chain (dra
, drb
);
833 /* 3. DRA is a part of a chain and DRB is not. */
834 if (DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
836 tree old_first_stmt
= DR_GROUP_FIRST_DR (stmtinfo_a
);
837 tree init_old
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
841 if (tree_int_cst_compare (init_old
, DR_INIT (drb
)) > 0)
843 /* DRB's init is smaller than the init of the stmt previously marked
844 as the first stmt of the interleaving chain of DRA. Therefore, we
845 update FIRST_STMT and put DRB in the head of the list. */
846 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
847 DR_GROUP_NEXT_DR (stmtinfo_b
) = old_first_stmt
;
849 /* Update all the stmts in the list to point to the new FIRST_STMT. */
850 tmp
= old_first_stmt
;
853 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp
)) = DR_STMT (drb
);
854 tmp
= DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp
));
859 /* Insert DRB in the list of DRA. */
860 vect_insert_into_interleaving_chain (drb
, dra
);
861 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_GROUP_FIRST_DR (stmtinfo_a
);
866 /* 4. both DRA and DRB are in some interleaving chains. */
867 first_a
= DR_GROUP_FIRST_DR (stmtinfo_a
);
868 first_b
= DR_GROUP_FIRST_DR (stmtinfo_b
);
869 if (first_a
== first_b
)
871 init_dra_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a
)));
872 init_drb_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b
)));
874 if (tree_int_cst_compare (init_dra_chain
, init_drb_chain
) > 0)
876 /* Insert the nodes of DRA chain into the DRB chain.
877 After inserting a node, continue from this node of the DRB chain (don't
878 start from the beginning. */
879 node
= DR_GROUP_FIRST_DR (stmtinfo_a
);
880 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
881 first_stmt
= first_b
;
885 /* Insert the nodes of DRB chain into the DRA chain.
886 After inserting a node, continue from this node of the DRA chain (don't
887 start from the beginning. */
888 node
= DR_GROUP_FIRST_DR (stmtinfo_b
);
889 prev
= DR_GROUP_FIRST_DR (stmtinfo_a
);
890 first_stmt
= first_a
;
895 node_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node
)));
896 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
899 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
900 if (tree_int_cst_compare (next_init
, node_init
) > 0)
903 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
904 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = next
;
909 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
913 /* We got to the end of the list. Insert here. */
914 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
915 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = NULL_TREE
;
918 DR_GROUP_FIRST_DR (vinfo_for_stmt (node
)) = first_stmt
;
919 node
= DR_GROUP_NEXT_DR (vinfo_for_stmt (node
));
924 /* Function vect_equal_offsets.
926 Check if OFFSET1 and OFFSET2 are identical expressions. */
929 vect_equal_offsets (tree offset1
, tree offset2
)
933 STRIP_NOPS (offset1
);
934 STRIP_NOPS (offset2
);
936 if (offset1
== offset2
)
939 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
940 || !BINARY_CLASS_P (offset1
)
941 || !BINARY_CLASS_P (offset2
))
944 res0
= vect_equal_offsets (TREE_OPERAND (offset1
, 0),
945 TREE_OPERAND (offset2
, 0));
946 res1
= vect_equal_offsets (TREE_OPERAND (offset1
, 1),
947 TREE_OPERAND (offset2
, 1));
949 return (res0
&& res1
);
953 /* Function vect_check_interleaving.
955 Check if DRA and DRB are a part of interleaving. In case they are, insert
956 DRA and DRB in an interleaving chain. */
959 vect_check_interleaving (struct data_reference
*dra
,
960 struct data_reference
*drb
)
962 HOST_WIDE_INT type_size_a
, type_size_b
, diff_mod_size
, step
, init_a
, init_b
;
964 /* Check that the data-refs have same first location (except init) and they
965 are both either store or load (not load and store). */
966 if ((DR_BASE_ADDRESS (dra
) != DR_BASE_ADDRESS (drb
)
967 && (TREE_CODE (DR_BASE_ADDRESS (dra
)) != ADDR_EXPR
968 || TREE_CODE (DR_BASE_ADDRESS (drb
)) != ADDR_EXPR
969 || TREE_OPERAND (DR_BASE_ADDRESS (dra
), 0)
970 != TREE_OPERAND (DR_BASE_ADDRESS (drb
),0)))
971 || !vect_equal_offsets (DR_OFFSET (dra
), DR_OFFSET (drb
))
972 || !tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
))
973 || DR_IS_READ (dra
) != DR_IS_READ (drb
))
977 1. data-refs are of the same type
978 2. their steps are equal
979 3. the step is greater than the difference between data-refs' inits */
980 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
981 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
983 if (type_size_a
!= type_size_b
984 || tree_int_cst_compare (DR_STEP (dra
), DR_STEP (drb
)))
987 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
988 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
989 step
= TREE_INT_CST_LOW (DR_STEP (dra
));
993 /* If init_a == init_b + the size of the type * k, we have an interleaving,
994 and DRB is accessed before DRA. */
995 diff_mod_size
= (init_a
- init_b
) % type_size_a
;
997 if ((init_a
- init_b
) > step
)
1000 if (diff_mod_size
== 0)
1002 vect_update_interleaving_chain (drb
, dra
);
1003 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1005 fprintf (vect_dump
, "Detected interleaving ");
1006 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1007 fprintf (vect_dump
, " and ");
1008 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1015 /* If init_b == init_a + the size of the type * k, we have an
1016 interleaving, and DRA is accessed before DRB. */
1017 diff_mod_size
= (init_b
- init_a
) % type_size_a
;
1019 if ((init_b
- init_a
) > step
)
1022 if (diff_mod_size
== 0)
1024 vect_update_interleaving_chain (dra
, drb
);
1025 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1027 fprintf (vect_dump
, "Detected interleaving ");
1028 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1029 fprintf (vect_dump
, " and ");
1030 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1038 /* Function vect_analyze_data_ref_dependence.
1040 Return TRUE if there (might) exist a dependence between a memory-reference
1041 DRA and a memory-reference DRB. */
1044 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
1045 loop_vec_info loop_vinfo
)
1048 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1049 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1050 struct data_reference
*dra
= DDR_A (ddr
);
1051 struct data_reference
*drb
= DDR_B (ddr
);
1052 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
1053 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
1054 int dra_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra
))));
1055 int drb_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb
))));
1056 lambda_vector dist_v
;
1057 unsigned int loop_depth
;
1059 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
1061 /* Independent data accesses. */
1062 vect_check_interleaving (dra
, drb
);
1066 if ((DR_IS_READ (dra
) && DR_IS_READ (drb
)) || dra
== drb
)
1069 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1071 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1074 "versioning for alias required: can't determine dependence between ");
1075 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1076 fprintf (vect_dump
, " and ");
1077 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1082 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1084 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1086 fprintf (vect_dump
, "versioning for alias required: bad dist vector for ");
1087 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1088 fprintf (vect_dump
, " and ");
1089 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1094 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
1095 for (i
= 0; VEC_iterate (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
); i
++)
1097 int dist
= dist_v
[loop_depth
];
1099 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1100 fprintf (vect_dump
, "dependence distance = %d.", dist
);
1102 /* Same loop iteration. */
1103 if (dist
% vectorization_factor
== 0 && dra_size
== drb_size
)
1105 /* Two references with distance zero have the same alignment. */
1106 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
), drb
);
1107 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
), dra
);
1108 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1109 fprintf (vect_dump
, "accesses have the same alignment.");
1110 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1112 fprintf (vect_dump
, "dependence distance modulo vf == 0 between ");
1113 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1114 fprintf (vect_dump
, " and ");
1115 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1118 /* For interleaving, mark that there is a read-write dependency if
1119 necessary. We check before that one of the data-refs is store. */
1120 if (DR_IS_READ (dra
))
1121 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a
) = true;
1124 if (DR_IS_READ (drb
))
1125 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b
) = true;
1131 if (abs (dist
) >= vectorization_factor
)
1133 /* Dependence distance does not create dependence, as far as vectorization
1134 is concerned, in this case. */
1135 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1136 fprintf (vect_dump
, "dependence distance >= VF.");
1140 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1143 "versioning for alias required: possible dependence "
1144 "between data-refs ");
1145 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1146 fprintf (vect_dump
, " and ");
1147 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1156 /* Return TRUE if DDR_NEW is already found in MAY_ALIAS_DDRS list. */
1159 vect_is_duplicate_ddr (VEC (ddr_p
, heap
) * may_alias_ddrs
, ddr_p ddr_new
)
1164 for (i
= 0; VEC_iterate (ddr_p
, may_alias_ddrs
, i
, ddr
); i
++)
1166 tree dref_A_i
, dref_B_i
, dref_A_j
, dref_B_j
;
1168 dref_A_i
= DR_REF (DDR_A (ddr
));
1169 dref_B_i
= DR_REF (DDR_B (ddr
));
1170 dref_A_j
= DR_REF (DDR_A (ddr_new
));
1171 dref_B_j
= DR_REF (DDR_B (ddr_new
));
1173 if ((operand_equal_p (dref_A_i
, dref_A_j
, 0)
1174 && operand_equal_p (dref_B_i
, dref_B_j
, 0))
1175 || (operand_equal_p (dref_A_i
, dref_B_j
, 0)
1176 && operand_equal_p (dref_B_i
, dref_A_j
, 0)))
1178 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1180 fprintf (vect_dump
, "found same pair of data references ");
1181 print_generic_expr (vect_dump
, dref_A_i
, TDF_SLIM
);
1182 fprintf (vect_dump
, " and ");
1183 print_generic_expr (vect_dump
, dref_B_i
, TDF_SLIM
);
1191 /* Save DDR in LOOP_VINFO list of ddrs that may alias and need to be
1192 tested at run-time. Returns false if number of run-time checks
1193 inserted by vectorizer is greater than maximum defined by
1194 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS. */
1196 vect_mark_for_runtime_alias_test (ddr_p ddr
, loop_vec_info loop_vinfo
)
1198 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1200 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1202 fprintf (vect_dump
, "mark for run-time aliasing test between ");
1203 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr
)), TDF_SLIM
);
1204 fprintf (vect_dump
, " and ");
1205 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr
)), TDF_SLIM
);
1208 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1211 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1212 fprintf (vect_dump
, "versioning not yet supported for outer-loops.");
1216 /* Do not add to the list duplicate ddrs. */
1217 if (vect_is_duplicate_ddr (LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), ddr
))
1220 if (VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
))
1221 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
))
1223 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1226 "disable versioning for alias - max number of generated "
1227 "checks exceeded.");
1230 VEC_truncate (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), 0);
1234 VEC_safe_push (ddr_p
, heap
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), ddr
);
1238 /* Function vect_analyze_data_ref_dependences.
1240 Examine all the data references in the loop, and make sure there do not
1241 exist any data dependences between them. */
1244 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
)
1247 VEC (ddr_p
, heap
) * ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
1248 struct data_dependence_relation
*ddr
;
1250 if (vect_print_dump_info (REPORT_DETAILS
))
1251 fprintf (vect_dump
, "=== vect_analyze_dependences ===");
1253 for (i
= 0; VEC_iterate (ddr_p
, ddrs
, i
, ddr
); i
++)
1254 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
))
1256 /* Add to list of ddrs that need to be tested at run-time. */
1257 if (!vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
))
1265 /* Function vect_compute_data_ref_alignment
1267 Compute the misalignment of the data reference DR.
1270 1. If during the misalignment computation it is found that the data reference
1271 cannot be vectorized then false is returned.
1272 2. DR_MISALIGNMENT (DR) is defined.
1274 FOR NOW: No analysis is actually performed. Misalignment is calculated
1275 only for trivial cases. TODO. */
1278 vect_compute_data_ref_alignment (struct data_reference
*dr
)
1280 tree stmt
= DR_STMT (dr
);
1281 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1282 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1283 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1284 tree ref
= DR_REF (dr
);
1286 tree base
, base_addr
;
1289 tree aligned_to
, alignment
;
1291 if (vect_print_dump_info (REPORT_DETAILS
))
1292 fprintf (vect_dump
, "vect_compute_data_ref_alignment:");
1294 /* Initialize misalignment to unknown. */
1295 SET_DR_MISALIGNMENT (dr
, -1);
1297 misalign
= DR_INIT (dr
);
1298 aligned_to
= DR_ALIGNED_TO (dr
);
1299 base_addr
= DR_BASE_ADDRESS (dr
);
1301 /* In case the dataref is in an inner-loop of the loop that is being
1302 vectorized (LOOP), we use the base and misalignment information
1303 relative to the outer-loop (LOOP). This is ok only if the misalignment
1304 stays the same throughout the execution of the inner-loop, which is why
1305 we have to check that the stride of the dataref in the inner-loop evenly
1306 divides by the vector size. */
1307 if (nested_in_vect_loop_p (loop
, stmt
))
1309 tree step
= DR_STEP (dr
);
1310 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
1312 if (dr_step
% UNITS_PER_SIMD_WORD
== 0)
1314 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1315 fprintf (vect_dump
, "inner step divides the vector-size.");
1316 misalign
= STMT_VINFO_DR_INIT (stmt_info
);
1317 aligned_to
= STMT_VINFO_DR_ALIGNED_TO (stmt_info
);
1318 base_addr
= STMT_VINFO_DR_BASE_ADDRESS (stmt_info
);
1322 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1323 fprintf (vect_dump
, "inner step doesn't divide the vector-size.");
1324 misalign
= NULL_TREE
;
1328 base
= build_fold_indirect_ref (base_addr
);
1329 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1330 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
1332 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
1335 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1337 fprintf (vect_dump
, "Unknown alignment for access: ");
1338 print_generic_expr (vect_dump
, base
, TDF_SLIM
);
1344 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
1346 || (TREE_CODE (base_addr
) == SSA_NAME
1347 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1348 TREE_TYPE (base_addr
)))),
1350 base_aligned
= true;
1352 base_aligned
= false;
1356 /* Do not change the alignment of global variables if
1357 flag_section_anchors is enabled. */
1358 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
))
1359 || (TREE_STATIC (base
) && flag_section_anchors
))
1361 if (vect_print_dump_info (REPORT_DETAILS
))
1363 fprintf (vect_dump
, "can't force alignment of ref: ");
1364 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
1369 /* Force the alignment of the decl.
1370 NOTE: This is the only change to the code we make during
1371 the analysis phase, before deciding to vectorize the loop. */
1372 if (vect_print_dump_info (REPORT_DETAILS
))
1373 fprintf (vect_dump
, "force alignment");
1374 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
1375 DECL_USER_ALIGN (base
) = 1;
1378 /* At this point we assume that the base is aligned. */
1379 gcc_assert (base_aligned
1380 || (TREE_CODE (base
) == VAR_DECL
1381 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
1383 /* Modulo alignment. */
1384 misalign
= size_binop (TRUNC_MOD_EXPR
, misalign
, alignment
);
1386 if (!host_integerp (misalign
, 1))
1388 /* Negative or overflowed misalignment value. */
1389 if (vect_print_dump_info (REPORT_DETAILS
))
1390 fprintf (vect_dump
, "unexpected misalign value");
1394 SET_DR_MISALIGNMENT (dr
, TREE_INT_CST_LOW (misalign
));
1396 if (vect_print_dump_info (REPORT_DETAILS
))
1398 fprintf (vect_dump
, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
1399 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
1406 /* Function vect_compute_data_refs_alignment
1408 Compute the misalignment of data references in the loop.
1409 Return FALSE if a data reference is found that cannot be vectorized. */
1412 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
)
1414 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1415 struct data_reference
*dr
;
1418 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1419 if (!vect_compute_data_ref_alignment (dr
))
1426 /* Function vect_update_misalignment_for_peel
1428 DR - the data reference whose misalignment is to be adjusted.
1429 DR_PEEL - the data reference whose misalignment is being made
1430 zero in the vector loop by the peel.
1431 NPEEL - the number of iterations in the peel loop if the misalignment
1432 of DR_PEEL is known at compile time. */
1435 vect_update_misalignment_for_peel (struct data_reference
*dr
,
1436 struct data_reference
*dr_peel
, int npeel
)
1439 VEC(dr_p
,heap
) *same_align_drs
;
1440 struct data_reference
*current_dr
;
1441 int dr_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
1442 int dr_peel_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel
))));
1443 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
1444 stmt_vec_info peel_stmt_info
= vinfo_for_stmt (DR_STMT (dr_peel
));
1446 /* For interleaved data accesses the step in the loop must be multiplied by
1447 the size of the interleaving group. */
1448 if (DR_GROUP_FIRST_DR (stmt_info
))
1449 dr_size
*= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info
)));
1450 if (DR_GROUP_FIRST_DR (peel_stmt_info
))
1451 dr_peel_size
*= DR_GROUP_SIZE (peel_stmt_info
);
1453 /* It can be assumed that the data refs with the same alignment as dr_peel
1454 are aligned in the vector loop. */
1456 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
1457 for (i
= 0; VEC_iterate (dr_p
, same_align_drs
, i
, current_dr
); i
++)
1459 if (current_dr
!= dr
)
1461 gcc_assert (DR_MISALIGNMENT (dr
) / dr_size
==
1462 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
);
1463 SET_DR_MISALIGNMENT (dr
, 0);
1467 if (known_alignment_for_access_p (dr
)
1468 && known_alignment_for_access_p (dr_peel
))
1470 int misal
= DR_MISALIGNMENT (dr
);
1471 misal
+= npeel
* dr_size
;
1472 misal
%= UNITS_PER_SIMD_WORD
;
1473 SET_DR_MISALIGNMENT (dr
, misal
);
1477 if (vect_print_dump_info (REPORT_DETAILS
))
1478 fprintf (vect_dump
, "Setting misalignment to -1.");
1479 SET_DR_MISALIGNMENT (dr
, -1);
1483 /* Function vect_verify_datarefs_alignment
1485 Return TRUE if all data references in the loop can be
1486 handled with respect to alignment. */
1489 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
)
1491 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1492 struct data_reference
*dr
;
1493 enum dr_alignment_support supportable_dr_alignment
;
1496 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1498 tree stmt
= DR_STMT (dr
);
1499 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1501 /* For interleaving, only the alignment of the first access matters. */
1502 if (DR_GROUP_FIRST_DR (stmt_info
)
1503 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1506 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1507 if (!supportable_dr_alignment
)
1509 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1511 if (DR_IS_READ (dr
))
1513 "not vectorized: unsupported unaligned load.");
1516 "not vectorized: unsupported unaligned store.");
1520 if (supportable_dr_alignment
!= dr_aligned
1521 && vect_print_dump_info (REPORT_ALIGNMENT
))
1522 fprintf (vect_dump
, "Vectorizing an unaligned access.");
1528 /* Function vector_alignment_reachable_p
1530 Return true if vector alignment for DR is reachable by peeling
1531 a few loop iterations. Return false otherwise. */
1534 vector_alignment_reachable_p (struct data_reference
*dr
)
1536 tree stmt
= DR_STMT (dr
);
1537 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1538 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1540 if (DR_GROUP_FIRST_DR (stmt_info
))
1542 /* For interleaved access we peel only if number of iterations in
1543 the prolog loop ({VF - misalignment}), is a multiple of the
1544 number of the interleaved accesses. */
1545 int elem_size
, mis_in_elements
;
1546 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1548 /* FORNOW: handle only known alignment. */
1549 if (!known_alignment_for_access_p (dr
))
1552 elem_size
= UNITS_PER_SIMD_WORD
/ nelements
;
1553 mis_in_elements
= DR_MISALIGNMENT (dr
) / elem_size
;
1555 if ((nelements
- mis_in_elements
) % DR_GROUP_SIZE (stmt_info
))
1559 /* If misalignment is known at the compile time then allow peeling
1560 only if natural alignment is reachable through peeling. */
1561 if (known_alignment_for_access_p (dr
) && !aligned_access_p (dr
))
1563 HOST_WIDE_INT elmsize
=
1564 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
1565 if (vect_print_dump_info (REPORT_DETAILS
))
1567 fprintf (vect_dump
, "data size =" HOST_WIDE_INT_PRINT_DEC
, elmsize
);
1568 fprintf (vect_dump
, ". misalignment = %d. ", DR_MISALIGNMENT (dr
));
1570 if (DR_MISALIGNMENT (dr
) % elmsize
)
1572 if (vect_print_dump_info (REPORT_DETAILS
))
1573 fprintf (vect_dump
, "data size does not divide the misalignment.\n");
1578 if (!known_alignment_for_access_p (dr
))
1580 tree type
= (TREE_TYPE (DR_REF (dr
)));
1581 tree ba
= DR_BASE_OBJECT (dr
);
1582 bool is_packed
= false;
1585 is_packed
= contains_packed_reference (ba
);
1587 if (vect_print_dump_info (REPORT_DETAILS
))
1588 fprintf (vect_dump
, "Unknown misalignment, is_packed = %d",is_packed
);
1589 if (targetm
.vectorize
.vector_alignment_reachable (type
, is_packed
))
1598 /* Function vect_enhance_data_refs_alignment
1600 This pass will use loop versioning and loop peeling in order to enhance
1601 the alignment of data references in the loop.
1603 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1604 original loop is to be vectorized; Any other loops that are created by
1605 the transformations performed in this pass - are not supposed to be
1606 vectorized. This restriction will be relaxed.
1608 This pass will require a cost model to guide it whether to apply peeling
1609 or versioning or a combination of the two. For example, the scheme that
1610 intel uses when given a loop with several memory accesses, is as follows:
1611 choose one memory access ('p') which alignment you want to force by doing
1612 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1613 other accesses are not necessarily aligned, or (2) use loop versioning to
1614 generate one loop in which all accesses are aligned, and another loop in
1615 which only 'p' is necessarily aligned.
1617 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1618 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1619 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1621 Devising a cost model is the most critical aspect of this work. It will
1622 guide us on which access to peel for, whether to use loop versioning, how
1623 many versions to create, etc. The cost model will probably consist of
1624 generic considerations as well as target specific considerations (on
1625 powerpc for example, misaligned stores are more painful than misaligned
1628 Here are the general steps involved in alignment enhancements:
1630 -- original loop, before alignment analysis:
1631 for (i=0; i<N; i++){
1632 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1633 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1636 -- After vect_compute_data_refs_alignment:
1637 for (i=0; i<N; i++){
1638 x = q[i]; # DR_MISALIGNMENT(q) = 3
1639 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1642 -- Possibility 1: we do loop versioning:
1644 for (i=0; i<N; i++){ # loop 1A
1645 x = q[i]; # DR_MISALIGNMENT(q) = 3
1646 p[i] = y; # DR_MISALIGNMENT(p) = 0
1650 for (i=0; i<N; i++){ # loop 1B
1651 x = q[i]; # DR_MISALIGNMENT(q) = 3
1652 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1656 -- Possibility 2: we do loop peeling:
1657 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1661 for (i = 3; i < N; i++){ # loop 2A
1662 x = q[i]; # DR_MISALIGNMENT(q) = 0
1663 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1666 -- Possibility 3: combination of loop peeling and versioning:
1667 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1672 for (i = 3; i<N; i++){ # loop 3A
1673 x = q[i]; # DR_MISALIGNMENT(q) = 0
1674 p[i] = y; # DR_MISALIGNMENT(p) = 0
1678 for (i = 3; i<N; i++){ # loop 3B
1679 x = q[i]; # DR_MISALIGNMENT(q) = 0
1680 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1684 These loops are later passed to loop_transform to be vectorized. The
1685 vectorizer will use the alignment information to guide the transformation
1686 (whether to generate regular loads/stores, or with special handling for
1690 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1692 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1693 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1694 enum dr_alignment_support supportable_dr_alignment
;
1695 struct data_reference
*dr0
= NULL
;
1696 struct data_reference
*dr
;
1698 bool do_peeling
= false;
1699 bool do_versioning
= false;
1702 stmt_vec_info stmt_info
;
1703 int vect_versioning_for_alias_required
;
1705 if (vect_print_dump_info (REPORT_DETAILS
))
1706 fprintf (vect_dump
, "=== vect_enhance_data_refs_alignment ===");
1708 /* While cost model enhancements are expected in the future, the high level
1709 view of the code at this time is as follows:
1711 A) If there is a misaligned write then see if peeling to align this write
1712 can make all data references satisfy vect_supportable_dr_alignment.
1713 If so, update data structures as needed and return true. Note that
1714 at this time vect_supportable_dr_alignment is known to return false
1715 for a misaligned write.
1717 B) If peeling wasn't possible and there is a data reference with an
1718 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1719 then see if loop versioning checks can be used to make all data
1720 references satisfy vect_supportable_dr_alignment. If so, update
1721 data structures as needed and return true.
1723 C) If neither peeling nor versioning were successful then return false if
1724 any data reference does not satisfy vect_supportable_dr_alignment.
1726 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1728 Note, Possibility 3 above (which is peeling and versioning together) is not
1729 being done at this time. */
1731 /* (1) Peeling to force alignment. */
1733 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1735 + How many accesses will become aligned due to the peeling
1736 - How many accesses will become unaligned due to the peeling,
1737 and the cost of misaligned accesses.
1738 - The cost of peeling (the extra runtime checks, the increase
1741 The scheme we use FORNOW: peel to force the alignment of the first
1742 misaligned store in the loop.
1743 Rationale: misaligned stores are not yet supported.
1745 TODO: Use a cost model. */
1747 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1749 stmt
= DR_STMT (dr
);
1750 stmt_info
= vinfo_for_stmt (stmt
);
1752 /* For interleaving, only the alignment of the first access
1754 if (DR_GROUP_FIRST_DR (stmt_info
)
1755 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1758 if (!DR_IS_READ (dr
) && !aligned_access_p (dr
))
1760 do_peeling
= vector_alignment_reachable_p (dr
);
1763 if (!do_peeling
&& vect_print_dump_info (REPORT_DETAILS
))
1764 fprintf (vect_dump
, "vector alignment may not be reachable");
1769 vect_versioning_for_alias_required
=
1770 (VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)) > 0);
1772 /* Temporarily, if versioning for alias is required, we disable peeling
1773 until we support peeling and versioning. Often peeling for alignment
1774 will require peeling for loop-bound, which in turn requires that we
1775 know how to adjust the loop ivs after the loop. */
1776 if (vect_versioning_for_alias_required
1777 || !vect_can_advance_ivs_p (loop_vinfo
)
1778 || !slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
1785 tree stmt
= DR_STMT (dr0
);
1786 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1787 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1788 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1790 if (known_alignment_for_access_p (dr0
))
1792 /* Since it's known at compile time, compute the number of iterations
1793 in the peeled loop (the peeling factor) for use in updating
1794 DR_MISALIGNMENT values. The peeling factor is the vectorization
1795 factor minus the misalignment as an element count. */
1796 mis
= DR_MISALIGNMENT (dr0
);
1797 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1798 npeel
= nelements
- mis
;
1800 /* For interleaved data access every iteration accesses all the
1801 members of the group, therefore we divide the number of iterations
1802 by the group size. */
1803 stmt_info
= vinfo_for_stmt (DR_STMT (dr0
));
1804 if (DR_GROUP_FIRST_DR (stmt_info
))
1805 npeel
/= DR_GROUP_SIZE (stmt_info
);
1807 if (vect_print_dump_info (REPORT_DETAILS
))
1808 fprintf (vect_dump
, "Try peeling by %d", npeel
);
1811 /* Ensure that all data refs can be vectorized after the peel. */
1812 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1814 int save_misalignment
;
1819 stmt
= DR_STMT (dr
);
1820 stmt_info
= vinfo_for_stmt (stmt
);
1821 /* For interleaving, only the alignment of the first access
1823 if (DR_GROUP_FIRST_DR (stmt_info
)
1824 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1827 save_misalignment
= DR_MISALIGNMENT (dr
);
1828 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1829 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1830 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1832 if (!supportable_dr_alignment
)
1841 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1842 If the misalignment of DR_i is identical to that of dr0 then set
1843 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1844 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1845 by the peeling factor times the element size of DR_i (MOD the
1846 vectorization factor times the size). Otherwise, the
1847 misalignment of DR_i must be set to unknown. */
1848 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1850 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1852 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1853 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1854 SET_DR_MISALIGNMENT (dr0
, 0);
1855 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1856 fprintf (vect_dump
, "Alignment of access forced using peeling.");
1858 if (vect_print_dump_info (REPORT_DETAILS
))
1859 fprintf (vect_dump
, "Peeling for alignment will be applied.");
1861 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1868 /* (2) Versioning to force alignment. */
1870 /* Try versioning if:
1871 1) flag_tree_vect_loop_version is TRUE
1872 2) optimize_size is FALSE
1873 3) there is at least one unsupported misaligned data ref with an unknown
1875 4) all misaligned data refs with a known misalignment are supported, and
1876 5) the number of runtime alignment checks is within reason. */
1879 flag_tree_vect_loop_version
1881 && (!loop
->inner
); /* FORNOW */
1885 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1887 stmt
= DR_STMT (dr
);
1888 stmt_info
= vinfo_for_stmt (stmt
);
1890 /* For interleaving, only the alignment of the first access
1892 if (aligned_access_p (dr
)
1893 || (DR_GROUP_FIRST_DR (stmt_info
)
1894 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
))
1897 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1899 if (!supportable_dr_alignment
)
1905 if (known_alignment_for_access_p (dr
)
1906 || VEC_length (tree
,
1907 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
1908 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS
))
1910 do_versioning
= false;
1914 stmt
= DR_STMT (dr
);
1915 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1916 gcc_assert (vectype
);
1918 /* The rightmost bits of an aligned address must be zeros.
1919 Construct the mask needed for this test. For example,
1920 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1921 mask must be 15 = 0xf. */
1922 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1924 /* FORNOW: use the same mask to test all potentially unaligned
1925 references in the loop. The vectorizer currently supports
1926 a single vector size, see the reference to
1927 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1928 vectorization factor is computed. */
1929 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
1930 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
1931 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
1932 VEC_safe_push (tree
, heap
,
1933 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
),
1938 /* Versioning requires at least one misaligned data reference. */
1939 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)) == 0)
1940 do_versioning
= false;
1941 else if (!do_versioning
)
1942 VEC_truncate (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
), 0);
1947 VEC(tree
,heap
) *may_misalign_stmts
1948 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1951 /* It can now be assumed that the data references in the statements
1952 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1953 of the loop being vectorized. */
1954 for (i
= 0; VEC_iterate (tree
, may_misalign_stmts
, i
, stmt
); i
++)
1956 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1957 dr
= STMT_VINFO_DATA_REF (stmt_info
);
1958 SET_DR_MISALIGNMENT (dr
, 0);
1959 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1960 fprintf (vect_dump
, "Alignment of access forced using versioning.");
1963 if (vect_print_dump_info (REPORT_DETAILS
))
1964 fprintf (vect_dump
, "Versioning for alignment will be applied.");
1966 /* Peeling and versioning can't be done together at this time. */
1967 gcc_assert (! (do_peeling
&& do_versioning
));
1969 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1974 /* This point is reached if neither peeling nor versioning is being done. */
1975 gcc_assert (! (do_peeling
|| do_versioning
));
1977 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1982 /* Function vect_analyze_data_refs_alignment
1984 Analyze the alignment of the data-references in the loop.
1985 Return FALSE if a data reference is found that cannot be vectorized. */
1988 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
)
1990 if (vect_print_dump_info (REPORT_DETAILS
))
1991 fprintf (vect_dump
, "=== vect_analyze_data_refs_alignment ===");
1993 if (!vect_compute_data_refs_alignment (loop_vinfo
))
1995 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1997 "not vectorized: can't calculate alignment for data ref.");
2005 /* Function vect_analyze_data_ref_access.
2007 Analyze the access pattern of the data-reference DR. For now, a data access
2008 has to be consecutive to be considered vectorizable. */
2011 vect_analyze_data_ref_access (struct data_reference
*dr
)
2013 tree step
= DR_STEP (dr
);
2014 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2015 HOST_WIDE_INT type_size
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
2016 tree stmt
= DR_STMT (dr
);
2017 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2018 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2019 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2020 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2021 HOST_WIDE_INT stride
;
2023 /* Don't allow invariant accesses. */
2027 if (nested_in_vect_loop_p (loop
, stmt
))
2029 /* For the rest of the analysis we use the outer-loop step. */
2030 step
= STMT_VINFO_DR_STEP (stmt_info
);
2031 dr_step
= TREE_INT_CST_LOW (step
);
2035 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2036 fprintf (vect_dump
, "zero step in outer loop.");
2037 if (DR_IS_READ (dr
))
2044 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2045 interleaving group (including gaps). */
2046 stride
= dr_step
/ type_size
;
2049 if (!tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
)))
2051 /* Mark that it is not interleaving. */
2052 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = NULL_TREE
;
2056 if (nested_in_vect_loop_p (loop
, stmt
))
2058 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2059 fprintf (vect_dump
, "strided access in outer loop.");
2063 /* Not consecutive access is possible only if it is a part of interleaving. */
2064 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)))
2066 /* Check if it this DR is a part of interleaving, and is a single
2067 element of the group that is accessed in the loop. */
2069 /* Gaps are supported only for loads. STEP must be a multiple of the type
2070 size. The size of the group must be a power of 2. */
2072 && (dr_step
% type_size
) == 0
2074 && exact_log2 (stride
) != -1)
2076 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = stmt
;
2077 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
2078 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2080 fprintf (vect_dump
, "Detected single element interleaving %d ",
2081 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)));
2082 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
2083 fprintf (vect_dump
, " step ");
2084 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
2088 if (vect_print_dump_info (REPORT_DETAILS
))
2089 fprintf (vect_dump
, "not consecutive access");
2093 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) == stmt
)
2095 /* First stmt in the interleaving chain. Check the chain. */
2096 tree next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt
));
2097 struct data_reference
*data_ref
= dr
;
2098 unsigned int count
= 1;
2100 tree prev_init
= DR_INIT (data_ref
);
2102 HOST_WIDE_INT diff
, count_in_bytes
;
2106 /* Skip same data-refs. In case that two or more stmts share data-ref
2107 (supported only for loads), we vectorize only the first stmt, and
2108 the rest get their vectorized loads from the first one. */
2109 if (!tree_int_cst_compare (DR_INIT (data_ref
),
2110 DR_INIT (STMT_VINFO_DATA_REF (
2111 vinfo_for_stmt (next
)))))
2113 if (!DR_IS_READ (data_ref
))
2115 if (vect_print_dump_info (REPORT_DETAILS
))
2116 fprintf (vect_dump
, "Two store stmts share the same dr.");
2120 /* Check that there is no load-store dependencies for this loads
2121 to prevent a case of load-store-load to the same location. */
2122 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next
))
2123 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev
)))
2125 if (vect_print_dump_info (REPORT_DETAILS
))
2127 "READ_WRITE dependence in interleaving.");
2131 /* For load use the same data-ref load. */
2132 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next
)) = prev
;
2135 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
2140 /* Check that all the accesses have the same STEP. */
2141 next_step
= DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
2142 if (tree_int_cst_compare (step
, next_step
))
2144 if (vect_print_dump_info (REPORT_DETAILS
))
2145 fprintf (vect_dump
, "not consecutive access in interleaving");
2149 data_ref
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next
));
2150 /* Check that the distance between two accesses is equal to the type
2151 size. Otherwise, we have gaps. */
2152 diff
= (TREE_INT_CST_LOW (DR_INIT (data_ref
))
2153 - TREE_INT_CST_LOW (prev_init
)) / type_size
;
2154 if (!DR_IS_READ (data_ref
) && diff
!= 1)
2156 if (vect_print_dump_info (REPORT_DETAILS
))
2157 fprintf (vect_dump
, "interleaved store with gaps");
2160 /* Store the gap from the previous member of the group. If there is no
2161 gap in the access, DR_GROUP_GAP is always 1. */
2162 DR_GROUP_GAP (vinfo_for_stmt (next
)) = diff
;
2164 prev_init
= DR_INIT (data_ref
);
2165 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
2166 /* Count the number of data-refs in the chain. */
2170 /* COUNT is the number of accesses found, we multiply it by the size of
2171 the type to get COUNT_IN_BYTES. */
2172 count_in_bytes
= type_size
* count
;
2174 /* Check that the size of the interleaving is not greater than STEP. */
2175 if (dr_step
< count_in_bytes
)
2177 if (vect_print_dump_info (REPORT_DETAILS
))
2179 fprintf (vect_dump
, "interleaving size is greater than step for ");
2180 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
2185 /* Check that the size of the interleaving is equal to STEP for stores,
2186 i.e., that there are no gaps. */
2187 if (!DR_IS_READ (dr
) && dr_step
!= count_in_bytes
)
2189 if (vect_print_dump_info (REPORT_DETAILS
))
2190 fprintf (vect_dump
, "interleaved store with gaps");
2194 /* Check that STEP is a multiple of type size. */
2195 if ((dr_step
% type_size
) != 0)
2197 if (vect_print_dump_info (REPORT_DETAILS
))
2199 fprintf (vect_dump
, "step is not a multiple of type size: step ");
2200 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
2201 fprintf (vect_dump
, " size ");
2202 print_generic_expr (vect_dump
, TYPE_SIZE_UNIT (scalar_type
),
2208 /* FORNOW: we handle only interleaving that is a power of 2. */
2209 if (exact_log2 (stride
) == -1)
2211 if (vect_print_dump_info (REPORT_DETAILS
))
2212 fprintf (vect_dump
, "interleaving is not a power of 2");
2215 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
2221 /* Function vect_analyze_data_ref_accesses.
2223 Analyze the access pattern of all the data references in the loop.
2225 FORNOW: the only access pattern that is considered vectorizable is a
2226 simple step 1 (consecutive) access.
2228 FORNOW: handle only arrays and pointer accesses. */
2231 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
)
2234 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2235 struct data_reference
*dr
;
2237 if (vect_print_dump_info (REPORT_DETAILS
))
2238 fprintf (vect_dump
, "=== vect_analyze_data_ref_accesses ===");
2240 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
2241 if (!vect_analyze_data_ref_access (dr
))
2243 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2244 fprintf (vect_dump
, "not vectorized: complicated access pattern.");
2252 /* Function vect_analyze_data_refs.
2254 Find all the data references in the loop.
2256 The general structure of the analysis of data refs in the vectorizer is as
2258 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
2259 find and analyze all data-refs in the loop and their dependences.
2260 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2261 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2262 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2267 vect_analyze_data_refs (loop_vec_info loop_vinfo
)
2269 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2271 VEC (data_reference_p
, heap
) *datarefs
;
2272 struct data_reference
*dr
;
2275 if (vect_print_dump_info (REPORT_DETAILS
))
2276 fprintf (vect_dump
, "=== vect_analyze_data_refs ===\n");
2278 compute_data_dependences_for_loop (loop
, true,
2279 &LOOP_VINFO_DATAREFS (loop_vinfo
),
2280 &LOOP_VINFO_DDRS (loop_vinfo
));
2282 /* Go through the data-refs, check that the analysis succeeded. Update pointer
2283 from stmt_vec_info struct to DR and vectype. */
2284 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2286 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
2289 stmt_vec_info stmt_info
;
2291 tree base
, offset
, init
;
2293 if (!dr
|| !DR_REF (dr
))
2295 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2296 fprintf (vect_dump
, "not vectorized: unhandled data-ref ");
2300 stmt
= DR_STMT (dr
);
2301 stmt_info
= vinfo_for_stmt (stmt
);
2303 /* Check that analysis of the data-ref succeeded. */
2304 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
2307 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2309 fprintf (vect_dump
, "not vectorized: data ref analysis failed ");
2310 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2315 if (TREE_CODE (DR_BASE_ADDRESS (dr
)) == INTEGER_CST
)
2317 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2318 fprintf (vect_dump
, "not vectorized: base addr of dr is a "
2323 if (!DR_SYMBOL_TAG (dr
))
2325 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2327 fprintf (vect_dump
, "not vectorized: no memory tag for ");
2328 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
2333 base
= unshare_expr (DR_BASE_ADDRESS (dr
));
2334 offset
= unshare_expr (DR_OFFSET (dr
));
2335 init
= unshare_expr (DR_INIT (dr
));
2337 /* Update DR field in stmt_vec_info struct. */
2338 bb
= bb_for_stmt (stmt
);
2340 /* If the dataref is in an inner-loop of the loop that is considered for
2341 for vectorization, we also want to analyze the access relative to
2342 the outer-loop (DR contains information only relative to the
2343 inner-most enclosing loop). We do that by building a reference to the
2344 first location accessed by the inner-loop, and analyze it relative to
2346 if (nested_in_vect_loop_p (loop
, stmt
))
2348 tree outer_step
, outer_base
, outer_init
;
2349 HOST_WIDE_INT pbitsize
, pbitpos
;
2351 enum machine_mode pmode
;
2352 int punsignedp
, pvolatilep
;
2353 affine_iv base_iv
, offset_iv
;
2356 /* Build a reference to the first location accessed by the
2357 inner-loop: *(BASE+INIT). (The first location is actually
2358 BASE+INIT+OFFSET, but we add OFFSET separately later. */
2359 tree inner_base
= build_fold_indirect_ref
2360 (fold_build2 (PLUS_EXPR
, TREE_TYPE (base
), base
, init
));
2362 if (vect_print_dump_info (REPORT_DETAILS
))
2364 fprintf (dump_file
, "analyze in outer-loop: ");
2365 print_generic_expr (dump_file
, inner_base
, TDF_SLIM
);
2368 outer_base
= get_inner_reference (inner_base
, &pbitsize
, &pbitpos
,
2369 &poffset
, &pmode
, &punsignedp
, &pvolatilep
, false);
2370 gcc_assert (outer_base
!= NULL_TREE
);
2372 if (pbitpos
% BITS_PER_UNIT
!= 0)
2374 if (vect_print_dump_info (REPORT_DETAILS
))
2375 fprintf (dump_file
, "failed: bit offset alignment.\n");
2379 outer_base
= build_fold_addr_expr (outer_base
);
2380 if (!simple_iv (loop
, stmt
, outer_base
, &base_iv
, false))
2382 if (vect_print_dump_info (REPORT_DETAILS
))
2383 fprintf (dump_file
, "failed: evolution of base is not affine.\n");
2390 poffset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
, poffset
);
2397 offset_iv
.base
= ssize_int (0);
2398 offset_iv
.step
= ssize_int (0);
2400 else if (!simple_iv (loop
, stmt
, poffset
, &offset_iv
, false))
2402 if (vect_print_dump_info (REPORT_DETAILS
))
2403 fprintf (dump_file
, "evolution of offset is not affine.\n");
2407 outer_init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
2408 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
2409 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
2410 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
2411 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
2413 outer_step
= size_binop (PLUS_EXPR
,
2414 fold_convert (ssizetype
, base_iv
.step
),
2415 fold_convert (ssizetype
, offset_iv
.step
));
2417 STMT_VINFO_DR_STEP (stmt_info
) = outer_step
;
2418 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2419 STMT_VINFO_DR_BASE_ADDRESS (stmt_info
) = base_iv
.base
;
2420 STMT_VINFO_DR_INIT (stmt_info
) = outer_init
;
2421 STMT_VINFO_DR_OFFSET (stmt_info
) =
2422 fold_convert (ssizetype
, offset_iv
.base
);
2423 STMT_VINFO_DR_ALIGNED_TO (stmt_info
) =
2424 size_int (highest_pow2_factor (offset_iv
.base
));
2426 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2428 fprintf (dump_file
, "\touter base_address: ");
2429 print_generic_expr (dump_file
, STMT_VINFO_DR_BASE_ADDRESS (stmt_info
), TDF_SLIM
);
2430 fprintf (dump_file
, "\n\touter offset from base address: ");
2431 print_generic_expr (dump_file
, STMT_VINFO_DR_OFFSET (stmt_info
), TDF_SLIM
);
2432 fprintf (dump_file
, "\n\touter constant offset from base address: ");
2433 print_generic_expr (dump_file
, STMT_VINFO_DR_INIT (stmt_info
), TDF_SLIM
);
2434 fprintf (dump_file
, "\n\touter step: ");
2435 print_generic_expr (dump_file
, STMT_VINFO_DR_STEP (stmt_info
), TDF_SLIM
);
2436 fprintf (dump_file
, "\n\touter aligned to: ");
2437 print_generic_expr (dump_file
, STMT_VINFO_DR_ALIGNED_TO (stmt_info
), TDF_SLIM
);
2441 if (STMT_VINFO_DATA_REF (stmt_info
))
2443 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2446 "not vectorized: more than one data ref in stmt: ");
2447 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2451 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
2453 /* Set vectype for STMT. */
2454 scalar_type
= TREE_TYPE (DR_REF (dr
));
2455 STMT_VINFO_VECTYPE (stmt_info
) =
2456 get_vectype_for_scalar_type (scalar_type
);
2457 if (!STMT_VINFO_VECTYPE (stmt_info
))
2459 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2462 "not vectorized: no vectype for stmt: ");
2463 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2464 fprintf (vect_dump
, " scalar_type: ");
2465 print_generic_expr (vect_dump
, scalar_type
, TDF_DETAILS
);
2475 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2477 /* Function vect_mark_relevant.
2479 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2482 vect_mark_relevant (VEC(tree
,heap
) **worklist
, tree stmt
,
2483 enum vect_relevant relevant
, bool live_p
)
2485 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2486 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
2487 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
2489 if (vect_print_dump_info (REPORT_DETAILS
))
2490 fprintf (vect_dump
, "mark relevant %d, live %d.", relevant
, live_p
);
2492 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2496 /* This is the last stmt in a sequence that was detected as a
2497 pattern that can potentially be vectorized. Don't mark the stmt
2498 as relevant/live because it's not going to be vectorized.
2499 Instead mark the pattern-stmt that replaces it. */
2501 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2503 if (vect_print_dump_info (REPORT_DETAILS
))
2504 fprintf (vect_dump
, "last stmt in pattern. don't mark relevant/live.");
2505 stmt_info
= vinfo_for_stmt (pattern_stmt
);
2506 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
2507 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
2508 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
2509 stmt
= pattern_stmt
;
2512 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
2513 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
2514 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
2516 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
2517 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
2519 if (vect_print_dump_info (REPORT_DETAILS
))
2520 fprintf (vect_dump
, "already marked relevant/live.");
2524 VEC_safe_push (tree
, heap
, *worklist
, stmt
);
2528 /* Function vect_stmt_relevant_p.
2530 Return true if STMT in loop that is represented by LOOP_VINFO is
2531 "relevant for vectorization".
2533 A stmt is considered "relevant for vectorization" if:
2534 - it has uses outside the loop.
2535 - it has vdefs (it alters memory).
2536 - control stmts in the loop (except for the exit condition).
2538 CHECKME: what other side effects would the vectorizer allow? */
2541 vect_stmt_relevant_p (tree stmt
, loop_vec_info loop_vinfo
,
2542 enum vect_relevant
*relevant
, bool *live_p
)
2544 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2545 ssa_op_iter op_iter
;
2546 imm_use_iterator imm_iter
;
2547 use_operand_p use_p
;
2548 def_operand_p def_p
;
2550 *relevant
= vect_unused_in_loop
;
2553 /* cond stmt other than loop exit cond. */
2554 if (is_ctrl_stmt (stmt
)
2555 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
)) != loop_exit_ctrl_vec_info_type
)
2556 *relevant
= vect_used_in_loop
;
2558 /* changing memory. */
2559 if (TREE_CODE (stmt
) != PHI_NODE
)
2560 if (!ZERO_SSA_OPERANDS (stmt
, SSA_OP_VIRTUAL_DEFS
))
2562 if (vect_print_dump_info (REPORT_DETAILS
))
2563 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
2564 *relevant
= vect_used_in_loop
;
2567 /* uses outside the loop. */
2568 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2570 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
2572 basic_block bb
= bb_for_stmt (USE_STMT (use_p
));
2573 if (!flow_bb_inside_loop_p (loop
, bb
))
2575 if (vect_print_dump_info (REPORT_DETAILS
))
2576 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
2578 /* We expect all such uses to be in the loop exit phis
2579 (because of loop closed form) */
2580 gcc_assert (TREE_CODE (USE_STMT (use_p
)) == PHI_NODE
);
2581 gcc_assert (bb
== single_exit (loop
)->dest
);
2588 return (*live_p
|| *relevant
);
2593 Function process_use.
2596 - a USE in STMT in a loop represented by LOOP_VINFO
2597 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
2598 that defined USE. This is dont by calling mark_relevant and passing it
2599 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
2602 Generally, LIVE_P and RELEVANT are used to define the liveness and
2603 relevance info of the DEF_STMT of this USE:
2604 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2605 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2607 - case 1: If USE is used only for address computations (e.g. array indexing),
2608 which does not need to be directly vectorized, then the liveness/relevance
2609 of the respective DEF_STMT is left unchanged.
2610 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
2611 skip DEF_STMT cause it had already been processed.
2612 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
2613 be modified accordingly.
2615 Return true if everything is as expected. Return false otherwise. */
2618 process_use (tree stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
2619 enum vect_relevant relevant
, VEC(tree
,heap
) **worklist
)
2621 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2622 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2623 stmt_vec_info dstmt_vinfo
;
2624 basic_block bb
, def_bb
;
2626 enum vect_def_type dt
;
2628 /* case 1: we are only interested in uses that need to be vectorized. Uses
2629 that are used for address computation are not considered relevant. */
2630 if (!exist_non_indexing_operands_for_use_p (use
, stmt
))
2633 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &def
, &dt
))
2635 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2636 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
2640 if (!def_stmt
|| IS_EMPTY_STMT (def_stmt
))
2643 def_bb
= bb_for_stmt (def_stmt
);
2644 if (!flow_bb_inside_loop_p (loop
, def_bb
))
2646 if (vect_print_dump_info (REPORT_DETAILS
))
2647 fprintf (vect_dump
, "def_stmt is out of loop.");
2651 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
2652 DEF_STMT must have already been processed, because this should be the
2653 only way that STMT, which is a reduction-phi, was put in the worklist,
2654 as there should be no other uses for DEF_STMT in the loop. So we just
2655 check that everything is as expected, and we are done. */
2656 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
2657 bb
= bb_for_stmt (stmt
);
2658 if (TREE_CODE (stmt
) == PHI_NODE
2659 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
2660 && TREE_CODE (def_stmt
) != PHI_NODE
2661 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
2662 && bb
->loop_father
== def_bb
->loop_father
)
2664 if (vect_print_dump_info (REPORT_DETAILS
))
2665 fprintf (vect_dump
, "reduc-stmt defining reduc-phi in the same nest.");
2666 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
2667 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
2668 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
2669 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
2670 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_loop
);
2674 /* case 3a: outer-loop stmt defining an inner-loop stmt:
2675 outer-loop-header-bb:
2681 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
2683 if (vect_print_dump_info (REPORT_DETAILS
))
2684 fprintf (vect_dump
, "outer-loop def-stmt defining inner-loop stmt.");
2687 case vect_unused_in_loop
:
2688 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
) ?
2689 vect_used_by_reduction
: vect_unused_in_loop
;
2691 case vect_used_in_outer_by_reduction
:
2692 relevant
= vect_used_by_reduction
;
2694 case vect_used_in_outer
:
2695 relevant
= vect_used_in_loop
;
2697 case vect_used_by_reduction
:
2698 case vect_used_in_loop
:
2706 /* case 3b: inner-loop stmt defining an outer-loop stmt:
2707 outer-loop-header-bb:
2713 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
2715 if (vect_print_dump_info (REPORT_DETAILS
))
2716 fprintf (vect_dump
, "inner-loop def-stmt defining outer-loop stmt.");
2719 case vect_unused_in_loop
:
2720 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
) ?
2721 vect_used_in_outer_by_reduction
: vect_unused_in_loop
;
2724 case vect_used_in_outer_by_reduction
:
2725 case vect_used_in_outer
:
2728 case vect_used_by_reduction
:
2729 relevant
= vect_used_in_outer_by_reduction
;
2732 case vect_used_in_loop
:
2733 relevant
= vect_used_in_outer
;
2741 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
);
2746 /* Function vect_mark_stmts_to_be_vectorized.
2748 Not all stmts in the loop need to be vectorized. For example:
2757 Stmt 1 and 3 do not need to be vectorized, because loop control and
2758 addressing of vectorized data-refs are handled differently.
2760 This pass detects such stmts. */
2763 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
2765 VEC(tree
,heap
) *worklist
;
2766 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2767 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2768 unsigned int nbbs
= loop
->num_nodes
;
2769 block_stmt_iterator si
;
2773 stmt_vec_info stmt_vinfo
;
2777 enum vect_relevant relevant
;
2779 if (vect_print_dump_info (REPORT_DETAILS
))
2780 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
2782 worklist
= VEC_alloc (tree
, heap
, 64);
2784 /* 1. Init worklist. */
2785 for (i
= 0; i
< nbbs
; i
++)
2788 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
2790 if (vect_print_dump_info (REPORT_DETAILS
))
2792 fprintf (vect_dump
, "init: phi relevant? ");
2793 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
2796 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
2797 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
2799 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
2801 stmt
= bsi_stmt (si
);
2802 if (vect_print_dump_info (REPORT_DETAILS
))
2804 fprintf (vect_dump
, "init: stmt relevant? ");
2805 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2808 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
2809 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
2813 /* 2. Process_worklist */
2814 while (VEC_length (tree
, worklist
) > 0)
2816 use_operand_p use_p
;
2819 stmt
= VEC_pop (tree
, worklist
);
2820 if (vect_print_dump_info (REPORT_DETAILS
))
2822 fprintf (vect_dump
, "worklist: examine stmt: ");
2823 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2826 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
2827 (DEF_STMT) as relevant/irrelevant and live/dead according to the
2828 liveness and relevance properties of STMT. */
2829 ann
= stmt_ann (stmt
);
2830 stmt_vinfo
= vinfo_for_stmt (stmt
);
2831 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
2832 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
2834 /* Generally, the liveness and relevance properties of STMT are
2835 propagated as is to the DEF_STMTs of its USEs:
2836 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
2837 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
2839 One exception is when STMT has been identified as defining a reduction
2840 variable; in this case we set the liveness/relevance as follows:
2842 relevant = vect_used_by_reduction
2843 This is because we distinguish between two kinds of relevant stmts -
2844 those that are used by a reduction computation, and those that are
2845 (also) used by a regular computation. This allows us later on to
2846 identify stmts that are used solely by a reduction, and therefore the
2847 order of the results that they produce does not have to be kept.
2849 Reduction phis are expected to be used by a reduction stmt, or by
2850 in an outer loop; Other reduction stmts are expected to be
2851 in the loop, and possibly used by a stmt in an outer loop.
2852 Here are the expected values of "relevant" for reduction phis/stmts:
2855 vect_unused_in_loop ok
2856 vect_used_in_outer_by_reduction ok ok
2857 vect_used_in_outer ok ok
2858 vect_used_by_reduction ok
2859 vect_used_in_loop */
2861 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
)
2863 enum vect_relevant tmp_relevant
= relevant
;
2864 switch (tmp_relevant
)
2866 case vect_unused_in_loop
:
2867 gcc_assert (TREE_CODE (stmt
) != PHI_NODE
);
2868 relevant
= vect_used_by_reduction
;
2871 case vect_used_in_outer_by_reduction
:
2872 case vect_used_in_outer
:
2873 gcc_assert (TREE_CODE (stmt
) != WIDEN_SUM_EXPR
2874 && TREE_CODE (stmt
) != DOT_PROD_EXPR
);
2877 case vect_used_by_reduction
:
2878 if (TREE_CODE (stmt
) == PHI_NODE
)
2881 case vect_used_in_loop
:
2883 if (vect_print_dump_info (REPORT_DETAILS
))
2884 fprintf (vect_dump
, "unsupported use of reduction.");
2885 VEC_free (tree
, heap
, worklist
);
2891 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2893 tree op
= USE_FROM_PTR (use_p
);
2894 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
, &worklist
))
2896 VEC_free (tree
, heap
, worklist
);
2900 } /* while worklist */
2902 VEC_free (tree
, heap
, worklist
);
2907 /* Function vect_can_advance_ivs_p
2909 In case the number of iterations that LOOP iterates is unknown at compile
2910 time, an epilog loop will be generated, and the loop induction variables
2911 (IVs) will be "advanced" to the value they are supposed to take just before
2912 the epilog loop. Here we check that the access function of the loop IVs
2913 and the expression that represents the loop bound are simple enough.
2914 These restrictions will be relaxed in the future. */
2917 vect_can_advance_ivs_p (loop_vec_info loop_vinfo
)
2919 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2920 basic_block bb
= loop
->header
;
2923 /* Analyze phi functions of the loop header. */
2925 if (vect_print_dump_info (REPORT_DETAILS
))
2926 fprintf (vect_dump
, "vect_can_advance_ivs_p:");
2928 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
2930 tree access_fn
= NULL
;
2931 tree evolution_part
;
2933 if (vect_print_dump_info (REPORT_DETAILS
))
2935 fprintf (vect_dump
, "Analyze phi: ");
2936 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
2939 /* Skip virtual phi's. The data dependences that are associated with
2940 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2942 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
2944 if (vect_print_dump_info (REPORT_DETAILS
))
2945 fprintf (vect_dump
, "virtual phi. skip.");
2949 /* Skip reduction phis. */
2951 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
2953 if (vect_print_dump_info (REPORT_DETAILS
))
2954 fprintf (vect_dump
, "reduc phi. skip.");
2958 /* Analyze the evolution function. */
2960 access_fn
= instantiate_parameters
2961 (loop
, analyze_scalar_evolution (loop
, PHI_RESULT (phi
)));
2965 if (vect_print_dump_info (REPORT_DETAILS
))
2966 fprintf (vect_dump
, "No Access function.");
2970 if (vect_print_dump_info (REPORT_DETAILS
))
2972 fprintf (vect_dump
, "Access function of PHI: ");
2973 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
2976 evolution_part
= evolution_part_in_loop_num (access_fn
, loop
->num
);
2978 if (evolution_part
== NULL_TREE
)
2980 if (vect_print_dump_info (REPORT_DETAILS
))
2981 fprintf (vect_dump
, "No evolution.");
2985 /* FORNOW: We do not transform initial conditions of IVs
2986 which evolution functions are a polynomial of degree >= 2. */
2988 if (tree_is_chrec (evolution_part
))
2996 /* Function vect_get_loop_niters.
2998 Determine how many iterations the loop is executed.
2999 If an expression that represents the number of iterations
3000 can be constructed, place it in NUMBER_OF_ITERATIONS.
3001 Return the loop exit condition. */
3004 vect_get_loop_niters (struct loop
*loop
, tree
*number_of_iterations
)
3008 if (vect_print_dump_info (REPORT_DETAILS
))
3009 fprintf (vect_dump
, "=== get_loop_niters ===");
3011 niters
= number_of_exit_cond_executions (loop
);
3013 if (niters
!= NULL_TREE
3014 && niters
!= chrec_dont_know
)
3016 *number_of_iterations
= niters
;
3018 if (vect_print_dump_info (REPORT_DETAILS
))
3020 fprintf (vect_dump
, "==> get_loop_niters:" );
3021 print_generic_expr (vect_dump
, *number_of_iterations
, TDF_SLIM
);
3025 return get_loop_exit_condition (loop
);
3029 /* Function vect_analyze_loop_1.
3031 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3032 for it. The different analyses will record information in the
3033 loop_vec_info struct. This is a subset of the analyses applied in
3034 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3035 that is now considered for (outer-loop) vectorization. */
3037 static loop_vec_info
3038 vect_analyze_loop_1 (struct loop
*loop
)
3040 loop_vec_info loop_vinfo
;
3042 if (vect_print_dump_info (REPORT_DETAILS
))
3043 fprintf (vect_dump
, "===== analyze_loop_nest_1 =====");
3045 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3047 loop_vinfo
= vect_analyze_loop_form (loop
);
3050 if (vect_print_dump_info (REPORT_DETAILS
))
3051 fprintf (vect_dump
, "bad inner-loop form.");
3059 /* Function vect_analyze_loop_form.
3061 Verify that certain CFG restrictions hold, including:
3062 - the loop has a pre-header
3063 - the loop has a single entry and exit
3064 - the loop exit condition is simple enough, and the number of iterations
3065 can be analyzed (a countable loop). */
3067 static loop_vec_info
3068 vect_analyze_loop_form (struct loop
*loop
)
3070 loop_vec_info loop_vinfo
;
3072 tree number_of_iterations
= NULL
;
3073 loop_vec_info inner_loop_vinfo
= NULL
;
3075 if (vect_print_dump_info (REPORT_DETAILS
))
3076 fprintf (vect_dump
, "=== vect_analyze_loop_form ===");
3078 /* Different restrictions apply when we are considering an inner-most loop,
3079 vs. an outer (nested) loop.
3080 (FORNOW. May want to relax some of these restrictions in the future). */
3084 /* Inner-most loop. We currently require that the number of BBs is
3085 exactly 2 (the header and latch). Vectorizable inner-most loops
3096 if (loop
->num_nodes
!= 2)
3098 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3099 fprintf (vect_dump
, "not vectorized: too many BBs in loop.");
3103 if (empty_block_p (loop
->header
))
3105 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3106 fprintf (vect_dump
, "not vectorized: empty loop.");
3112 struct loop
*innerloop
= loop
->inner
;
3113 edge backedge
, entryedge
;
3115 /* Nested loop. We currently require that the loop is doubly-nested,
3116 contains a single inner loop, and the number of BBs is exactly 5.
3117 Vectorizable outer-loops look like this:
3129 The inner-loop has the properties expected of inner-most loops
3130 as described above. */
3132 if ((loop
->inner
)->inner
|| (loop
->inner
)->next
)
3134 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3135 fprintf (vect_dump
, "not vectorized: multiple nested loops.");
3139 /* Analyze the inner-loop. */
3140 inner_loop_vinfo
= vect_analyze_loop_1 (loop
->inner
);
3141 if (!inner_loop_vinfo
)
3143 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3144 fprintf (vect_dump
, "not vectorized: Bad inner loop.");
3148 if (!expr_invariant_in_loop_p (loop
,
3149 LOOP_VINFO_NITERS (inner_loop_vinfo
)))
3151 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3153 "not vectorized: inner-loop count not invariant.");
3154 destroy_loop_vec_info (inner_loop_vinfo
, true);
3158 if (loop
->num_nodes
!= 5)
3160 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3161 fprintf (vect_dump
, "not vectorized: too many BBs in loop.");
3162 destroy_loop_vec_info (inner_loop_vinfo
, true);
3166 gcc_assert (EDGE_COUNT (innerloop
->header
->preds
) == 2);
3167 backedge
= EDGE_PRED (innerloop
->header
, 1);
3168 entryedge
= EDGE_PRED (innerloop
->header
, 0);
3169 if (EDGE_PRED (innerloop
->header
, 0)->src
== innerloop
->latch
)
3171 backedge
= EDGE_PRED (innerloop
->header
, 0);
3172 entryedge
= EDGE_PRED (innerloop
->header
, 1);
3175 if (entryedge
->src
!= loop
->header
3176 || !single_exit (innerloop
)
3177 || single_exit (innerloop
)->dest
!= EDGE_PRED (loop
->latch
, 0)->src
)
3179 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3180 fprintf (vect_dump
, "not vectorized: unsupported outerloop form.");
3181 destroy_loop_vec_info (inner_loop_vinfo
, true);
3185 if (vect_print_dump_info (REPORT_DETAILS
))
3186 fprintf (vect_dump
, "Considering outer-loop vectorization.");
3189 if (!single_exit (loop
)
3190 || EDGE_COUNT (loop
->header
->preds
) != 2)
3192 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3194 if (!single_exit (loop
))
3195 fprintf (vect_dump
, "not vectorized: multiple exits.");
3196 else if (EDGE_COUNT (loop
->header
->preds
) != 2)
3197 fprintf (vect_dump
, "not vectorized: too many incoming edges.");
3199 if (inner_loop_vinfo
)
3200 destroy_loop_vec_info (inner_loop_vinfo
, true);
3204 /* We assume that the loop exit condition is at the end of the loop. i.e,
3205 that the loop is represented as a do-while (with a proper if-guard
3206 before the loop if needed), where the loop header contains all the
3207 executable statements, and the latch is empty. */
3208 if (!empty_block_p (loop
->latch
)
3209 || phi_nodes (loop
->latch
))
3211 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3212 fprintf (vect_dump
, "not vectorized: unexpected loop form.");
3213 if (inner_loop_vinfo
)
3214 destroy_loop_vec_info (inner_loop_vinfo
, true);
3218 /* Make sure there exists a single-predecessor exit bb: */
3219 if (!single_pred_p (single_exit (loop
)->dest
))
3221 edge e
= single_exit (loop
);
3222 if (!(e
->flags
& EDGE_ABNORMAL
))
3224 split_loop_exit_edge (e
);
3225 if (vect_print_dump_info (REPORT_DETAILS
))
3226 fprintf (vect_dump
, "split exit edge.");
3230 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3231 fprintf (vect_dump
, "not vectorized: abnormal loop exit edge.");
3232 if (inner_loop_vinfo
)
3233 destroy_loop_vec_info (inner_loop_vinfo
, true);
3238 loop_cond
= vect_get_loop_niters (loop
, &number_of_iterations
);
3241 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3242 fprintf (vect_dump
, "not vectorized: complicated exit condition.");
3243 if (inner_loop_vinfo
)
3244 destroy_loop_vec_info (inner_loop_vinfo
, true);
3248 if (!number_of_iterations
)
3250 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3252 "not vectorized: number of iterations cannot be computed.");
3253 if (inner_loop_vinfo
)
3254 destroy_loop_vec_info (inner_loop_vinfo
, true);
3258 if (chrec_contains_undetermined (number_of_iterations
))
3260 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3261 fprintf (vect_dump
, "Infinite number of iterations.");
3262 if (inner_loop_vinfo
)
3263 destroy_loop_vec_info (inner_loop_vinfo
, true);
3267 if (!NITERS_KNOWN_P (number_of_iterations
))
3269 if (vect_print_dump_info (REPORT_DETAILS
))
3271 fprintf (vect_dump
, "Symbolic number of iterations is ");
3272 print_generic_expr (vect_dump
, number_of_iterations
, TDF_DETAILS
);
3275 else if (TREE_INT_CST_LOW (number_of_iterations
) == 0)
3277 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3278 fprintf (vect_dump
, "not vectorized: number of iterations = 0.");
3279 if (inner_loop_vinfo
)
3280 destroy_loop_vec_info (inner_loop_vinfo
, false);
3284 loop_vinfo
= new_loop_vec_info (loop
);
3285 LOOP_VINFO_NITERS (loop_vinfo
) = number_of_iterations
;
3287 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond
)) = loop_exit_ctrl_vec_info_type
;
3289 /* CHECKME: May want to keep it around it in the future. */
3290 if (inner_loop_vinfo
)
3291 destroy_loop_vec_info (inner_loop_vinfo
, false);
3293 gcc_assert (!loop
->aux
);
3294 loop
->aux
= loop_vinfo
;
3299 /* Function vect_analyze_loop.
3301 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3302 for it. The different analyses will record information in the
3303 loop_vec_info struct. */
3305 vect_analyze_loop (struct loop
*loop
)
3308 loop_vec_info loop_vinfo
;
3310 if (vect_print_dump_info (REPORT_DETAILS
))
3311 fprintf (vect_dump
, "===== analyze_loop_nest =====");
3313 if (loop_outer (loop
)
3314 && loop_vec_info_for_loop (loop_outer (loop
))
3315 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop
))))
3317 if (vect_print_dump_info (REPORT_DETAILS
))
3318 fprintf (vect_dump
, "outer-loop already vectorized.");
3322 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3324 loop_vinfo
= vect_analyze_loop_form (loop
);
3327 if (vect_print_dump_info (REPORT_DETAILS
))
3328 fprintf (vect_dump
, "bad loop form.");
3332 /* Find all data references in the loop (which correspond to vdefs/vuses)
3333 and analyze their evolution in the loop.
3335 FORNOW: Handle only simple, array references, which
3336 alignment can be forced, and aligned pointer-references. */
3338 ok
= vect_analyze_data_refs (loop_vinfo
);
3341 if (vect_print_dump_info (REPORT_DETAILS
))
3342 fprintf (vect_dump
, "bad data references.");
3343 destroy_loop_vec_info (loop_vinfo
, true);
3347 /* Classify all cross-iteration scalar data-flow cycles.
3348 Cross-iteration cycles caused by virtual phis are analyzed separately. */
3350 vect_analyze_scalar_cycles (loop_vinfo
);
3352 vect_pattern_recog (loop_vinfo
);
3354 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
3356 ok
= vect_mark_stmts_to_be_vectorized (loop_vinfo
);
3359 if (vect_print_dump_info (REPORT_DETAILS
))
3360 fprintf (vect_dump
, "unexpected pattern.");
3361 destroy_loop_vec_info (loop_vinfo
, true);
3365 /* Analyze the alignment of the data-refs in the loop.
3366 Fail if a data reference is found that cannot be vectorized. */
3368 ok
= vect_analyze_data_refs_alignment (loop_vinfo
);
3371 if (vect_print_dump_info (REPORT_DETAILS
))
3372 fprintf (vect_dump
, "bad data alignment.");
3373 destroy_loop_vec_info (loop_vinfo
, true);
3377 ok
= vect_determine_vectorization_factor (loop_vinfo
);
3380 if (vect_print_dump_info (REPORT_DETAILS
))
3381 fprintf (vect_dump
, "can't determine vectorization factor.");
3382 destroy_loop_vec_info (loop_vinfo
, true);
3386 /* Analyze data dependences between the data-refs in the loop.
3387 FORNOW: fail at the first data dependence that we encounter. */
3389 ok
= vect_analyze_data_ref_dependences (loop_vinfo
);
3392 if (vect_print_dump_info (REPORT_DETAILS
))
3393 fprintf (vect_dump
, "bad data dependence.");
3394 destroy_loop_vec_info (loop_vinfo
, true);
3398 /* Analyze the access patterns of the data-refs in the loop (consecutive,
3399 complex, etc.). FORNOW: Only handle consecutive access pattern. */
3401 ok
= vect_analyze_data_ref_accesses (loop_vinfo
);
3404 if (vect_print_dump_info (REPORT_DETAILS
))
3405 fprintf (vect_dump
, "bad data access.");
3406 destroy_loop_vec_info (loop_vinfo
, true);
3410 /* This pass will decide on using loop versioning and/or loop peeling in
3411 order to enhance the alignment of data references in the loop. */
3413 ok
= vect_enhance_data_refs_alignment (loop_vinfo
);
3416 if (vect_print_dump_info (REPORT_DETAILS
))
3417 fprintf (vect_dump
, "bad data alignment.");
3418 destroy_loop_vec_info (loop_vinfo
, true);
3422 /* Scan all the operations in the loop and make sure they are
3425 ok
= vect_analyze_operations (loop_vinfo
);
3428 if (vect_print_dump_info (REPORT_DETAILS
))
3429 fprintf (vect_dump
, "bad operation or unsupported loop bound.");
3430 destroy_loop_vec_info (loop_vinfo
, true);
3434 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
) = 1;