1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
44 /* Main analysis functions. */
45 static bool vect_analyze_data_refs (loop_vec_info
);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info
);
47 static void vect_analyze_scalar_cycles (loop_vec_info
);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info
);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info
);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info
);
51 static bool vect_compute_data_refs_alignment (loop_vec_info
);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info
);
53 static bool vect_analyze_operations (loop_vec_info
);
54 static bool vect_determine_vectorization_factor (loop_vec_info
);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree
, tree
);
58 static tree
vect_get_loop_niters (struct loop
*, tree
*);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation
*, loop_vec_info
);
61 static bool vect_compute_data_ref_alignment (struct data_reference
*);
62 static bool vect_analyze_data_ref_access (struct data_reference
*);
63 static bool vect_can_advance_ivs_p (loop_vec_info
);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference
*, struct data_reference
*, int npeel
);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo
)
95 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
96 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
97 int nbbs
= loop
->num_nodes
;
98 block_stmt_iterator si
;
99 unsigned int vectorization_factor
= 0;
104 stmt_vec_info stmt_info
;
107 if (vect_print_dump_info (REPORT_DETAILS
))
108 fprintf (vect_dump
, "=== vect_determine_vectorization_factor ===");
110 for (i
= 0; i
< nbbs
; i
++)
112 basic_block bb
= bbs
[i
];
114 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
116 stmt_info
= vinfo_for_stmt (phi
);
117 if (vect_print_dump_info (REPORT_DETAILS
))
119 fprintf (vect_dump
, "==> examining phi: ");
120 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
123 gcc_assert (stmt_info
);
125 if (STMT_VINFO_RELEVANT_P (stmt_info
))
127 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info
));
128 scalar_type
= TREE_TYPE (PHI_RESULT (phi
));
130 if (vect_print_dump_info (REPORT_DETAILS
))
132 fprintf (vect_dump
, "get vectype for scalar type: ");
133 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
136 vectype
= get_vectype_for_scalar_type (scalar_type
);
139 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
142 "not vectorized: unsupported data-type ");
143 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
147 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
149 if (vect_print_dump_info (REPORT_DETAILS
))
151 fprintf (vect_dump
, "vectype: ");
152 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
155 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
156 if (vect_print_dump_info (REPORT_DETAILS
))
157 fprintf (vect_dump
, "nunits = %d", nunits
);
159 if (!vectorization_factor
160 || (nunits
> vectorization_factor
))
161 vectorization_factor
= nunits
;
165 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
167 tree stmt
= bsi_stmt (si
);
168 stmt_info
= vinfo_for_stmt (stmt
);
170 if (vect_print_dump_info (REPORT_DETAILS
))
172 fprintf (vect_dump
, "==> examining statement: ");
173 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
176 gcc_assert (stmt_info
);
178 /* skip stmts which do not need to be vectorized. */
179 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
180 && !STMT_VINFO_LIVE_P (stmt_info
))
182 if (vect_print_dump_info (REPORT_DETAILS
))
183 fprintf (vect_dump
, "skip.");
187 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
189 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
191 fprintf (vect_dump
, "not vectorized: irregular stmt.");
192 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
197 if (!GIMPLE_STMT_P (stmt
)
198 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))))
200 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
202 fprintf (vect_dump
, "not vectorized: vector stmt in loop:");
203 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
208 if (STMT_VINFO_VECTYPE (stmt_info
))
210 /* The only case when a vectype had been already set is for stmts
211 that contain a dataref, or for "pattern-stmts" (stmts generated
212 by the vectorizer to represent/replace a certain idiom). */
213 gcc_assert (STMT_VINFO_DATA_REF (stmt_info
)
214 || is_pattern_stmt_p (stmt_info
));
215 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
221 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info
)
222 && !is_pattern_stmt_p (stmt_info
));
224 /* We generally set the vectype according to the type of the
226 For stmts whose result-type is different than the type of the
227 arguments (e.g. demotion, promotion), vectype will be reset
228 appropriately (later). Note that we have to visit the smallest
229 datatype in this function, because that determines the VF.
230 If the smallest datatype in the loop is present only as the
231 rhs of a promotion operation - we'd miss it here.
232 Such a case, where a variable of this datatype does not appear
233 in the lhs anywhere in the loop, can only occur if it's an
234 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
235 to have been optimized away by invariant motion. However, we
236 cannot rely on invariant motion to always take invariants out
237 of the loop, and so in the case of promotion we also have to
239 scalar_type
= TREE_TYPE (GIMPLE_STMT_OPERAND (stmt
, 0));
241 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
242 if (TREE_CODE (operation
) == NOP_EXPR
243 || TREE_CODE (operation
) == CONVERT_EXPR
244 || TREE_CODE (operation
) == WIDEN_MULT_EXPR
245 || TREE_CODE (operation
) == FLOAT_EXPR
)
247 tree rhs_type
= TREE_TYPE (TREE_OPERAND (operation
, 0));
248 if (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type
)) <
249 TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
)))
250 scalar_type
= rhs_type
;
253 if (vect_print_dump_info (REPORT_DETAILS
))
255 fprintf (vect_dump
, "get vectype for scalar type: ");
256 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
259 vectype
= get_vectype_for_scalar_type (scalar_type
);
262 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
265 "not vectorized: unsupported data-type ");
266 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
270 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
273 if (vect_print_dump_info (REPORT_DETAILS
))
275 fprintf (vect_dump
, "vectype: ");
276 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
279 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
280 if (vect_print_dump_info (REPORT_DETAILS
))
281 fprintf (vect_dump
, "nunits = %d", nunits
);
283 if (!vectorization_factor
284 || (nunits
> vectorization_factor
))
285 vectorization_factor
= nunits
;
290 /* TODO: Analyze cost. Decide if worth while to vectorize. */
291 if (vect_print_dump_info (REPORT_DETAILS
))
292 fprintf (vect_dump
, "vectorization factor = %d", vectorization_factor
);
293 if (vectorization_factor
<= 1)
295 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
296 fprintf (vect_dump
, "not vectorized: unsupported data-type");
299 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
305 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
306 the number of created vector stmts depends on the unrolling factor). However,
307 the actual number of vector stmts for every SLP node depends on VF which is
308 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
309 In this function we assume that the inside costs calculated in
310 vect_model_xxx_cost are linear in ncopies. */
313 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo
)
315 unsigned int i
, vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
316 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
317 slp_instance instance
;
319 if (vect_print_dump_info (REPORT_SLP
))
320 fprintf (vect_dump
, "=== vect_update_slp_costs_according_to_vf ===");
322 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
323 /* We assume that costs are linear in ncopies. */
324 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
) *= vf
325 / SLP_INSTANCE_UNROLLING_FACTOR (instance
);
329 /* Function vect_analyze_operations.
331 Scan the loop stmts and make sure they are all vectorizable. */
334 vect_analyze_operations (loop_vec_info loop_vinfo
)
336 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
337 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
338 int nbbs
= loop
->num_nodes
;
339 block_stmt_iterator si
;
340 unsigned int vectorization_factor
= 0;
344 stmt_vec_info stmt_info
;
345 bool need_to_vectorize
= false;
346 int min_profitable_iters
;
347 int min_scalar_loop_bound
;
349 bool only_slp_in_loop
= true;
351 if (vect_print_dump_info (REPORT_DETAILS
))
352 fprintf (vect_dump
, "=== vect_analyze_operations ===");
354 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
355 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
357 for (i
= 0; i
< nbbs
; i
++)
359 basic_block bb
= bbs
[i
];
361 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
365 stmt_info
= vinfo_for_stmt (phi
);
366 if (vect_print_dump_info (REPORT_DETAILS
))
368 fprintf (vect_dump
, "examining phi: ");
369 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
372 if (! is_loop_header_bb_p (bb
))
374 /* inner-loop loop-closed exit phi in outer-loop vectorization
375 (i.e. a phi in the tail of the outer-loop).
376 FORNOW: we currently don't support the case that these phis
377 are not used in the outerloop, cause this case requires
378 to actually do something here. */
379 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
380 || STMT_VINFO_LIVE_P (stmt_info
))
382 if (vect_print_dump_info (REPORT_DETAILS
))
384 "Unsupported loop-closed phi in outer-loop.");
390 gcc_assert (stmt_info
);
392 if (STMT_VINFO_LIVE_P (stmt_info
))
394 /* FORNOW: not yet supported. */
395 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
396 fprintf (vect_dump
, "not vectorized: value used after loop.");
400 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_loop
401 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_induction_def
)
403 /* A scalar-dependence cycle that we don't support. */
404 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
405 fprintf (vect_dump
, "not vectorized: scalar dependence cycle.");
409 if (STMT_VINFO_RELEVANT_P (stmt_info
))
411 need_to_vectorize
= true;
412 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
413 ok
= vectorizable_induction (phi
, NULL
, NULL
);
418 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
421 "not vectorized: relevant phi not supported: ");
422 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
428 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
430 tree stmt
= bsi_stmt (si
);
431 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
432 enum vect_def_type relevance
= STMT_VINFO_RELEVANT (stmt_info
);
434 if (vect_print_dump_info (REPORT_DETAILS
))
436 fprintf (vect_dump
, "==> examining statement: ");
437 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
440 gcc_assert (stmt_info
);
442 /* skip stmts which do not need to be vectorized.
443 this is expected to include:
444 - the COND_EXPR which is the loop exit condition
445 - any LABEL_EXPRs in the loop
446 - computations that are used only for array indexing or loop
449 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
450 && !STMT_VINFO_LIVE_P (stmt_info
))
452 if (vect_print_dump_info (REPORT_DETAILS
))
453 fprintf (vect_dump
, "irrelevant.");
457 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
462 case vect_reduction_def
:
463 gcc_assert (relevance
== vect_used_in_outer
464 || relevance
== vect_used_in_outer_by_reduction
465 || relevance
== vect_unused_in_loop
);
468 case vect_induction_def
:
469 case vect_constant_def
:
470 case vect_invariant_def
:
471 case vect_unknown_def_type
:
476 if (STMT_VINFO_RELEVANT_P (stmt_info
))
478 gcc_assert (GIMPLE_STMT_P (stmt
)
479 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))));
480 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
481 need_to_vectorize
= true;
485 if (STMT_VINFO_RELEVANT_P (stmt_info
)
486 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
)
487 ok
= (vectorizable_type_promotion (stmt
, NULL
, NULL
)
488 || vectorizable_type_demotion (stmt
, NULL
, NULL
)
489 || vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
490 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
491 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
492 || vectorizable_load (stmt
, NULL
, NULL
, NULL
)
493 || vectorizable_call (stmt
, NULL
, NULL
)
494 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
495 || vectorizable_condition (stmt
, NULL
, NULL
)
496 || vectorizable_reduction (stmt
, NULL
, NULL
));
500 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
502 fprintf (vect_dump
, "not vectorized: relevant stmt not ");
503 fprintf (vect_dump
, "supported: ");
504 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
509 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
510 need extra handling, except for vectorizable reductions. */
511 if (STMT_VINFO_LIVE_P (stmt_info
)
512 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
513 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
517 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
519 fprintf (vect_dump
, "not vectorized: live stmt not ");
520 fprintf (vect_dump
, "supported: ");
521 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
526 if (!PURE_SLP_STMT (stmt_info
))
528 /* STMT needs loop-based vectorization. */
529 only_slp_in_loop
= false;
531 /* Groups of strided accesses whose size is not a power of 2 are
532 not vectorizable yet using loop-vectorization. Therefore, if
533 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
534 both SLPed and loop-based vectorzed), the loop cannot be
536 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
537 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
538 DR_GROUP_FIRST_DR (stmt_info
)))) == -1)
540 if (vect_print_dump_info (REPORT_DETAILS
))
542 fprintf (vect_dump
, "not vectorized: the size of group "
543 "of strided accesses is not a power of 2");
544 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
552 /* All operations in the loop are either irrelevant (deal with loop
553 control, or dead), or only used outside the loop and can be moved
554 out of the loop (e.g. invariants, inductions). The loop can be
555 optimized away by scalar optimizations. We're better off not
556 touching this loop. */
557 if (!need_to_vectorize
)
559 if (vect_print_dump_info (REPORT_DETAILS
))
561 "All the computation can be taken out of the loop.");
562 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
564 "not vectorized: redundant loop. no profit to vectorize.");
568 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
569 vectorization factor of the loop is the unrolling factor required by the
570 SLP instances. If that unrolling factor is 1, we say, that we perform
571 pure SLP on loop - cross iteration parallelism is not exploited. */
572 if (only_slp_in_loop
)
573 vectorization_factor
= LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
);
575 vectorization_factor
= least_common_multiple (vectorization_factor
,
576 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
));
578 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
580 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
581 && vect_print_dump_info (REPORT_DETAILS
))
583 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC
,
584 vectorization_factor
, LOOP_VINFO_INT_NITERS (loop_vinfo
));
586 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
587 && (LOOP_VINFO_INT_NITERS (loop_vinfo
) < vectorization_factor
))
589 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
590 fprintf (vect_dump
, "not vectorized: iteration count too small.");
591 if (vect_print_dump_info (REPORT_DETAILS
))
592 fprintf (vect_dump
,"not vectorized: iteration count smaller than "
593 "vectorization factor.");
597 /* Analyze cost. Decide if worth while to vectorize. */
599 /* Once VF is set, SLP costs should be updated since the number of created
600 vector stmts depends on VF. */
601 vect_update_slp_costs_according_to_vf (loop_vinfo
);
603 min_profitable_iters
= vect_estimate_min_profitable_iters (loop_vinfo
);
604 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
) = min_profitable_iters
;
605 if (min_profitable_iters
< 0)
607 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
608 fprintf (vect_dump
, "not vectorized: vectorization not profitable.");
609 if (vect_print_dump_info (REPORT_DETAILS
))
610 fprintf (vect_dump
, "not vectorized: vector version will never be "
615 min_scalar_loop_bound
= ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND
)
616 * vectorization_factor
) - 1);
618 /* Use the cost model only if it is more conservative than user specified
621 th
= (unsigned) min_scalar_loop_bound
;
622 if (min_profitable_iters
623 && (!min_scalar_loop_bound
624 || min_profitable_iters
> min_scalar_loop_bound
))
625 th
= (unsigned) min_profitable_iters
;
627 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
628 && LOOP_VINFO_INT_NITERS (loop_vinfo
) <= th
)
630 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
631 fprintf (vect_dump
, "not vectorized: vectorization not "
633 if (vect_print_dump_info (REPORT_DETAILS
))
634 fprintf (vect_dump
, "not vectorized: iteration count smaller than "
635 "user specified loop bound parameter or minimum "
636 "profitable iterations (whichever is more conservative).");
640 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
641 || LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0
642 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
644 if (vect_print_dump_info (REPORT_DETAILS
))
645 fprintf (vect_dump
, "epilog loop required.");
646 if (!vect_can_advance_ivs_p (loop_vinfo
))
648 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
650 "not vectorized: can't create epilog loop 1.");
653 if (!slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
655 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
657 "not vectorized: can't create epilog loop 2.");
666 /* Function exist_non_indexing_operands_for_use_p
668 USE is one of the uses attached to STMT. Check if USE is
669 used in STMT for anything other than indexing an array. */
672 exist_non_indexing_operands_for_use_p (tree use
, tree stmt
)
675 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
677 /* USE corresponds to some operand in STMT. If there is no data
678 reference in STMT, then any operand that corresponds to USE
679 is not indexing an array. */
680 if (!STMT_VINFO_DATA_REF (stmt_info
))
683 /* STMT has a data_ref. FORNOW this means that its of one of
687 (This should have been verified in analyze_data_refs).
689 'var' in the second case corresponds to a def, not a use,
690 so USE cannot correspond to any operands that are not used
693 Therefore, all we need to check is if STMT falls into the
694 first case, and whether var corresponds to USE. */
696 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) == SSA_NAME
)
699 operand
= GIMPLE_STMT_OPERAND (stmt
, 1);
701 if (TREE_CODE (operand
) != SSA_NAME
)
711 /* Function vect_analyze_scalar_cycles_1.
713 Examine the cross iteration def-use cycles of scalar variables
714 in LOOP. LOOP_VINFO represents the loop that is noe being
715 considered for vectorization (can be LOOP, or an outer-loop
719 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo
, struct loop
*loop
)
722 basic_block bb
= loop
->header
;
724 VEC(tree
,heap
) *worklist
= VEC_alloc (tree
, heap
, 64);
726 if (vect_print_dump_info (REPORT_DETAILS
))
727 fprintf (vect_dump
, "=== vect_analyze_scalar_cycles ===");
729 /* First - identify all inductions. */
730 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
732 tree access_fn
= NULL
;
733 tree def
= PHI_RESULT (phi
);
734 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
736 if (vect_print_dump_info (REPORT_DETAILS
))
738 fprintf (vect_dump
, "Analyze phi: ");
739 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
742 /* Skip virtual phi's. The data dependences that are associated with
743 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
744 if (!is_gimple_reg (SSA_NAME_VAR (def
)))
747 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_unknown_def_type
;
749 /* Analyze the evolution function. */
750 access_fn
= analyze_scalar_evolution (loop
, def
);
751 if (access_fn
&& vect_print_dump_info (REPORT_DETAILS
))
753 fprintf (vect_dump
, "Access function of PHI: ");
754 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
758 || !vect_is_simple_iv_evolution (loop
->num
, access_fn
, &dumy
, &dumy
))
760 VEC_safe_push (tree
, heap
, worklist
, phi
);
764 if (vect_print_dump_info (REPORT_DETAILS
))
765 fprintf (vect_dump
, "Detected induction.");
766 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_induction_def
;
770 /* Second - identify all reductions. */
771 while (VEC_length (tree
, worklist
) > 0)
773 tree phi
= VEC_pop (tree
, worklist
);
774 tree def
= PHI_RESULT (phi
);
775 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
778 if (vect_print_dump_info (REPORT_DETAILS
))
780 fprintf (vect_dump
, "Analyze phi: ");
781 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
784 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def
)));
785 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_unknown_def_type
);
787 reduc_stmt
= vect_is_simple_reduction (loop_vinfo
, phi
);
790 if (vect_print_dump_info (REPORT_DETAILS
))
791 fprintf (vect_dump
, "Detected reduction.");
792 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_reduction_def
;
793 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
797 if (vect_print_dump_info (REPORT_DETAILS
))
798 fprintf (vect_dump
, "Unknown def-use cycle pattern.");
801 VEC_free (tree
, heap
, worklist
);
806 /* Function vect_analyze_scalar_cycles.
808 Examine the cross iteration def-use cycles of scalar variables, by
809 analyzing the loop-header PHIs of scalar variables; Classify each
810 cycle as one of the following: invariant, induction, reduction, unknown.
811 We do that for the loop represented by LOOP_VINFO, and also to its
812 inner-loop, if exists.
813 Examples for scalar cycles:
828 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo
)
830 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
832 vect_analyze_scalar_cycles_1 (loop_vinfo
, loop
);
834 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
835 Reductions in such inner-loop therefore have different properties than
836 the reductions in the nest that gets vectorized:
837 1. When vectorized, they are executed in the same order as in the original
838 scalar loop, so we can't change the order of computation when
840 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
841 current checks are too strict. */
844 vect_analyze_scalar_cycles_1 (loop_vinfo
, loop
->inner
);
848 /* Function vect_insert_into_interleaving_chain.
850 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
853 vect_insert_into_interleaving_chain (struct data_reference
*dra
,
854 struct data_reference
*drb
)
856 tree prev
, next
, next_init
;
857 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
858 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
860 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
861 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
864 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
865 if (tree_int_cst_compare (next_init
, DR_INIT (dra
)) > 0)
868 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
869 DR_GROUP_NEXT_DR (stmtinfo_a
) = next
;
873 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
876 /* We got to the end of the list. Insert here. */
877 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
878 DR_GROUP_NEXT_DR (stmtinfo_a
) = NULL_TREE
;
882 /* Function vect_update_interleaving_chain.
884 For two data-refs DRA and DRB that are a part of a chain interleaved data
885 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
887 There are four possible cases:
888 1. New stmts - both DRA and DRB are not a part of any chain:
891 2. DRB is a part of a chain and DRA is not:
892 no need to update FIRST_DR
893 no need to insert DRB
894 insert DRA according to init
895 3. DRA is a part of a chain and DRB is not:
896 if (init of FIRST_DR > init of DRB)
898 NEXT(FIRST_DR) = previous FIRST_DR
900 insert DRB according to its init
901 4. both DRA and DRB are in some interleaving chains:
902 choose the chain with the smallest init of FIRST_DR
903 insert the nodes of the second chain into the first one. */
906 vect_update_interleaving_chain (struct data_reference
*drb
,
907 struct data_reference
*dra
)
909 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
910 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
911 tree next_init
, init_dra_chain
, init_drb_chain
, first_a
, first_b
;
912 tree node
, prev
, next
, node_init
, first_stmt
;
914 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
915 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
917 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_STMT (drb
);
918 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
919 DR_GROUP_NEXT_DR (stmtinfo_b
) = DR_STMT (dra
);
923 /* 2. DRB is a part of a chain and DRA is not. */
924 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && DR_GROUP_FIRST_DR (stmtinfo_b
))
926 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_GROUP_FIRST_DR (stmtinfo_b
);
927 /* Insert DRA into the chain of DRB. */
928 vect_insert_into_interleaving_chain (dra
, drb
);
932 /* 3. DRA is a part of a chain and DRB is not. */
933 if (DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
935 tree old_first_stmt
= DR_GROUP_FIRST_DR (stmtinfo_a
);
936 tree init_old
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
940 if (tree_int_cst_compare (init_old
, DR_INIT (drb
)) > 0)
942 /* DRB's init is smaller than the init of the stmt previously marked
943 as the first stmt of the interleaving chain of DRA. Therefore, we
944 update FIRST_STMT and put DRB in the head of the list. */
945 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
946 DR_GROUP_NEXT_DR (stmtinfo_b
) = old_first_stmt
;
948 /* Update all the stmts in the list to point to the new FIRST_STMT. */
949 tmp
= old_first_stmt
;
952 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp
)) = DR_STMT (drb
);
953 tmp
= DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp
));
958 /* Insert DRB in the list of DRA. */
959 vect_insert_into_interleaving_chain (drb
, dra
);
960 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_GROUP_FIRST_DR (stmtinfo_a
);
965 /* 4. both DRA and DRB are in some interleaving chains. */
966 first_a
= DR_GROUP_FIRST_DR (stmtinfo_a
);
967 first_b
= DR_GROUP_FIRST_DR (stmtinfo_b
);
968 if (first_a
== first_b
)
970 init_dra_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a
)));
971 init_drb_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b
)));
973 if (tree_int_cst_compare (init_dra_chain
, init_drb_chain
) > 0)
975 /* Insert the nodes of DRA chain into the DRB chain.
976 After inserting a node, continue from this node of the DRB chain (don't
977 start from the beginning. */
978 node
= DR_GROUP_FIRST_DR (stmtinfo_a
);
979 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
980 first_stmt
= first_b
;
984 /* Insert the nodes of DRB chain into the DRA chain.
985 After inserting a node, continue from this node of the DRA chain (don't
986 start from the beginning. */
987 node
= DR_GROUP_FIRST_DR (stmtinfo_b
);
988 prev
= DR_GROUP_FIRST_DR (stmtinfo_a
);
989 first_stmt
= first_a
;
994 node_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node
)));
995 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
998 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
999 if (tree_int_cst_compare (next_init
, node_init
) > 0)
1002 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
1003 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = next
;
1008 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
1012 /* We got to the end of the list. Insert here. */
1013 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
1014 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = NULL_TREE
;
1017 DR_GROUP_FIRST_DR (vinfo_for_stmt (node
)) = first_stmt
;
1018 node
= DR_GROUP_NEXT_DR (vinfo_for_stmt (node
));
1023 /* Function vect_equal_offsets.
1025 Check if OFFSET1 and OFFSET2 are identical expressions. */
1028 vect_equal_offsets (tree offset1
, tree offset2
)
1032 STRIP_NOPS (offset1
);
1033 STRIP_NOPS (offset2
);
1035 if (offset1
== offset2
)
1038 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
1039 || !BINARY_CLASS_P (offset1
)
1040 || !BINARY_CLASS_P (offset2
))
1043 res0
= vect_equal_offsets (TREE_OPERAND (offset1
, 0),
1044 TREE_OPERAND (offset2
, 0));
1045 res1
= vect_equal_offsets (TREE_OPERAND (offset1
, 1),
1046 TREE_OPERAND (offset2
, 1));
1048 return (res0
&& res1
);
1052 /* Function vect_check_interleaving.
1054 Check if DRA and DRB are a part of interleaving. In case they are, insert
1055 DRA and DRB in an interleaving chain. */
1058 vect_check_interleaving (struct data_reference
*dra
,
1059 struct data_reference
*drb
)
1061 HOST_WIDE_INT type_size_a
, type_size_b
, diff_mod_size
, step
, init_a
, init_b
;
1063 /* Check that the data-refs have same first location (except init) and they
1064 are both either store or load (not load and store). */
1065 if ((DR_BASE_ADDRESS (dra
) != DR_BASE_ADDRESS (drb
)
1066 && (TREE_CODE (DR_BASE_ADDRESS (dra
)) != ADDR_EXPR
1067 || TREE_CODE (DR_BASE_ADDRESS (drb
)) != ADDR_EXPR
1068 || TREE_OPERAND (DR_BASE_ADDRESS (dra
), 0)
1069 != TREE_OPERAND (DR_BASE_ADDRESS (drb
),0)))
1070 || !vect_equal_offsets (DR_OFFSET (dra
), DR_OFFSET (drb
))
1071 || !tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
))
1072 || DR_IS_READ (dra
) != DR_IS_READ (drb
))
1076 1. data-refs are of the same type
1077 2. their steps are equal
1078 3. the step is greater than the difference between data-refs' inits */
1079 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
1080 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
1082 if (type_size_a
!= type_size_b
1083 || tree_int_cst_compare (DR_STEP (dra
), DR_STEP (drb
)))
1086 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
1087 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
1088 step
= TREE_INT_CST_LOW (DR_STEP (dra
));
1090 if (init_a
> init_b
)
1092 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1093 and DRB is accessed before DRA. */
1094 diff_mod_size
= (init_a
- init_b
) % type_size_a
;
1096 if ((init_a
- init_b
) > step
)
1099 if (diff_mod_size
== 0)
1101 vect_update_interleaving_chain (drb
, dra
);
1102 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1104 fprintf (vect_dump
, "Detected interleaving ");
1105 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1106 fprintf (vect_dump
, " and ");
1107 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1114 /* If init_b == init_a + the size of the type * k, we have an
1115 interleaving, and DRA is accessed before DRB. */
1116 diff_mod_size
= (init_b
- init_a
) % type_size_a
;
1118 if ((init_b
- init_a
) > step
)
1121 if (diff_mod_size
== 0)
1123 vect_update_interleaving_chain (dra
, drb
);
1124 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1126 fprintf (vect_dump
, "Detected interleaving ");
1127 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1128 fprintf (vect_dump
, " and ");
1129 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1136 /* Check if data references pointed by DR_I and DR_J are same or
1137 belong to same interleaving group. Return FALSE if drs are
1138 different, otherwise return TRUE. */
1141 vect_same_range_drs (data_reference_p dr_i
, data_reference_p dr_j
)
1143 tree stmt_i
= DR_STMT (dr_i
);
1144 tree stmt_j
= DR_STMT (dr_j
);
1146 if (operand_equal_p (DR_REF (dr_i
), DR_REF (dr_j
), 0)
1147 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i
))
1148 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j
))
1149 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i
))
1150 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j
)))))
1156 /* If address ranges represented by DDR_I and DDR_J are equal,
1157 return TRUE, otherwise return FALSE. */
1160 vect_vfa_range_equal (ddr_p ddr_i
, ddr_p ddr_j
)
1162 if ((vect_same_range_drs (DDR_A (ddr_i
), DDR_A (ddr_j
))
1163 && vect_same_range_drs (DDR_B (ddr_i
), DDR_B (ddr_j
)))
1164 || (vect_same_range_drs (DDR_A (ddr_i
), DDR_B (ddr_j
))
1165 && vect_same_range_drs (DDR_B (ddr_i
), DDR_A (ddr_j
))))
1171 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1172 tested at run-time. Return TRUE if DDR was successfully inserted.
1173 Return false if versioning is not supported. */
1176 vect_mark_for_runtime_alias_test (ddr_p ddr
, loop_vec_info loop_vinfo
)
1178 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1180 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
) == 0)
1183 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1185 fprintf (vect_dump
, "mark for run-time aliasing test between ");
1186 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr
)), TDF_SLIM
);
1187 fprintf (vect_dump
, " and ");
1188 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr
)), TDF_SLIM
);
1193 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1194 fprintf (vect_dump
, "versioning not supported when optimizing for size.");
1198 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1201 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1202 fprintf (vect_dump
, "versioning not yet supported for outer-loops.");
1206 VEC_safe_push (ddr_p
, heap
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), ddr
);
1210 /* Function vect_analyze_data_ref_dependence.
1212 Return TRUE if there (might) exist a dependence between a memory-reference
1213 DRA and a memory-reference DRB. When versioning for alias may check a
1214 dependence at run-time, return FALSE. */
1217 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
1218 loop_vec_info loop_vinfo
)
1221 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1222 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1223 struct data_reference
*dra
= DDR_A (ddr
);
1224 struct data_reference
*drb
= DDR_B (ddr
);
1225 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
1226 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
1227 int dra_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra
))));
1228 int drb_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb
))));
1229 lambda_vector dist_v
;
1230 unsigned int loop_depth
;
1232 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
1234 /* Independent data accesses. */
1235 vect_check_interleaving (dra
, drb
);
1239 if ((DR_IS_READ (dra
) && DR_IS_READ (drb
)) || dra
== drb
)
1242 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1244 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1247 "versioning for alias required: can't determine dependence between ");
1248 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1249 fprintf (vect_dump
, " and ");
1250 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1252 /* Add to list of ddrs that need to be tested at run-time. */
1253 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
1256 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1258 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1260 fprintf (vect_dump
, "versioning for alias required: bad dist vector for ");
1261 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1262 fprintf (vect_dump
, " and ");
1263 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1265 /* Add to list of ddrs that need to be tested at run-time. */
1266 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
1269 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
1270 for (i
= 0; VEC_iterate (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
); i
++)
1272 int dist
= dist_v
[loop_depth
];
1274 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1275 fprintf (vect_dump
, "dependence distance = %d.", dist
);
1277 /* Same loop iteration. */
1278 if (dist
% vectorization_factor
== 0 && dra_size
== drb_size
)
1280 /* Two references with distance zero have the same alignment. */
1281 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
), drb
);
1282 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
), dra
);
1283 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1284 fprintf (vect_dump
, "accesses have the same alignment.");
1285 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1287 fprintf (vect_dump
, "dependence distance modulo vf == 0 between ");
1288 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1289 fprintf (vect_dump
, " and ");
1290 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1293 /* For interleaving, mark that there is a read-write dependency if
1294 necessary. We check before that one of the data-refs is store. */
1295 if (DR_IS_READ (dra
))
1296 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a
) = true;
1299 if (DR_IS_READ (drb
))
1300 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b
) = true;
1306 if (abs (dist
) >= vectorization_factor
1307 || (dist
> 0 && DDR_REVERSED_P (ddr
)))
1309 /* Dependence distance does not create dependence, as far as
1310 vectorization is concerned, in this case. If DDR_REVERSED_P the
1311 order of the data-refs in DDR was reversed (to make distance
1312 vector positive), and the actual distance is negative. */
1313 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1314 fprintf (vect_dump
, "dependence distance >= VF or negative.");
1318 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1321 "not vectorized, possible dependence "
1322 "between data-refs ");
1323 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1324 fprintf (vect_dump
, " and ");
1325 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1334 /* Function vect_analyze_data_ref_dependences.
1336 Examine all the data references in the loop, and make sure there do not
1337 exist any data dependences between them. */
1340 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
)
1343 VEC (ddr_p
, heap
) * ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
1344 struct data_dependence_relation
*ddr
;
1346 if (vect_print_dump_info (REPORT_DETAILS
))
1347 fprintf (vect_dump
, "=== vect_analyze_dependences ===");
1349 for (i
= 0; VEC_iterate (ddr_p
, ddrs
, i
, ddr
); i
++)
1350 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
))
1357 /* Function vect_compute_data_ref_alignment
1359 Compute the misalignment of the data reference DR.
1362 1. If during the misalignment computation it is found that the data reference
1363 cannot be vectorized then false is returned.
1364 2. DR_MISALIGNMENT (DR) is defined.
1366 FOR NOW: No analysis is actually performed. Misalignment is calculated
1367 only for trivial cases. TODO. */
1370 vect_compute_data_ref_alignment (struct data_reference
*dr
)
1372 tree stmt
= DR_STMT (dr
);
1373 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1374 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1375 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1376 tree ref
= DR_REF (dr
);
1378 tree base
, base_addr
;
1381 tree aligned_to
, alignment
;
1383 if (vect_print_dump_info (REPORT_DETAILS
))
1384 fprintf (vect_dump
, "vect_compute_data_ref_alignment:");
1386 /* Initialize misalignment to unknown. */
1387 SET_DR_MISALIGNMENT (dr
, -1);
1389 misalign
= DR_INIT (dr
);
1390 aligned_to
= DR_ALIGNED_TO (dr
);
1391 base_addr
= DR_BASE_ADDRESS (dr
);
1393 /* In case the dataref is in an inner-loop of the loop that is being
1394 vectorized (LOOP), we use the base and misalignment information
1395 relative to the outer-loop (LOOP). This is ok only if the misalignment
1396 stays the same throughout the execution of the inner-loop, which is why
1397 we have to check that the stride of the dataref in the inner-loop evenly
1398 divides by the vector size. */
1399 if (nested_in_vect_loop_p (loop
, stmt
))
1401 tree step
= DR_STEP (dr
);
1402 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
1404 if (dr_step
% UNITS_PER_SIMD_WORD
== 0)
1406 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1407 fprintf (vect_dump
, "inner step divides the vector-size.");
1408 misalign
= STMT_VINFO_DR_INIT (stmt_info
);
1409 aligned_to
= STMT_VINFO_DR_ALIGNED_TO (stmt_info
);
1410 base_addr
= STMT_VINFO_DR_BASE_ADDRESS (stmt_info
);
1414 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1415 fprintf (vect_dump
, "inner step doesn't divide the vector-size.");
1416 misalign
= NULL_TREE
;
1420 base
= build_fold_indirect_ref (base_addr
);
1421 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1422 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
1424 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
1427 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1429 fprintf (vect_dump
, "Unknown alignment for access: ");
1430 print_generic_expr (vect_dump
, base
, TDF_SLIM
);
1436 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
1438 || (TREE_CODE (base_addr
) == SSA_NAME
1439 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1440 TREE_TYPE (base_addr
)))),
1442 base_aligned
= true;
1444 base_aligned
= false;
1448 /* Do not change the alignment of global variables if
1449 flag_section_anchors is enabled. */
1450 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
))
1451 || (TREE_STATIC (base
) && flag_section_anchors
))
1453 if (vect_print_dump_info (REPORT_DETAILS
))
1455 fprintf (vect_dump
, "can't force alignment of ref: ");
1456 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
1461 /* Force the alignment of the decl.
1462 NOTE: This is the only change to the code we make during
1463 the analysis phase, before deciding to vectorize the loop. */
1464 if (vect_print_dump_info (REPORT_DETAILS
))
1465 fprintf (vect_dump
, "force alignment");
1466 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
1467 DECL_USER_ALIGN (base
) = 1;
1470 /* At this point we assume that the base is aligned. */
1471 gcc_assert (base_aligned
1472 || (TREE_CODE (base
) == VAR_DECL
1473 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
1475 /* Modulo alignment. */
1476 misalign
= size_binop (TRUNC_MOD_EXPR
, misalign
, alignment
);
1478 if (!host_integerp (misalign
, 1))
1480 /* Negative or overflowed misalignment value. */
1481 if (vect_print_dump_info (REPORT_DETAILS
))
1482 fprintf (vect_dump
, "unexpected misalign value");
1486 SET_DR_MISALIGNMENT (dr
, TREE_INT_CST_LOW (misalign
));
1488 if (vect_print_dump_info (REPORT_DETAILS
))
1490 fprintf (vect_dump
, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
1491 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
1498 /* Function vect_compute_data_refs_alignment
1500 Compute the misalignment of data references in the loop.
1501 Return FALSE if a data reference is found that cannot be vectorized. */
1504 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
)
1506 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1507 struct data_reference
*dr
;
1510 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1511 if (!vect_compute_data_ref_alignment (dr
))
1518 /* Function vect_update_misalignment_for_peel
1520 DR - the data reference whose misalignment is to be adjusted.
1521 DR_PEEL - the data reference whose misalignment is being made
1522 zero in the vector loop by the peel.
1523 NPEEL - the number of iterations in the peel loop if the misalignment
1524 of DR_PEEL is known at compile time. */
1527 vect_update_misalignment_for_peel (struct data_reference
*dr
,
1528 struct data_reference
*dr_peel
, int npeel
)
1531 VEC(dr_p
,heap
) *same_align_drs
;
1532 struct data_reference
*current_dr
;
1533 int dr_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
1534 int dr_peel_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel
))));
1535 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
1536 stmt_vec_info peel_stmt_info
= vinfo_for_stmt (DR_STMT (dr_peel
));
1538 /* For interleaved data accesses the step in the loop must be multiplied by
1539 the size of the interleaving group. */
1540 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
1541 dr_size
*= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info
)));
1542 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info
))
1543 dr_peel_size
*= DR_GROUP_SIZE (peel_stmt_info
);
1545 /* It can be assumed that the data refs with the same alignment as dr_peel
1546 are aligned in the vector loop. */
1548 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
1549 for (i
= 0; VEC_iterate (dr_p
, same_align_drs
, i
, current_dr
); i
++)
1551 if (current_dr
!= dr
)
1553 gcc_assert (DR_MISALIGNMENT (dr
) / dr_size
==
1554 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
);
1555 SET_DR_MISALIGNMENT (dr
, 0);
1559 if (known_alignment_for_access_p (dr
)
1560 && known_alignment_for_access_p (dr_peel
))
1562 int misal
= DR_MISALIGNMENT (dr
);
1563 misal
+= npeel
* dr_size
;
1564 misal
%= UNITS_PER_SIMD_WORD
;
1565 SET_DR_MISALIGNMENT (dr
, misal
);
1569 if (vect_print_dump_info (REPORT_DETAILS
))
1570 fprintf (vect_dump
, "Setting misalignment to -1.");
1571 SET_DR_MISALIGNMENT (dr
, -1);
1575 /* Function vect_verify_datarefs_alignment
1577 Return TRUE if all data references in the loop can be
1578 handled with respect to alignment. */
1581 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
)
1583 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1584 struct data_reference
*dr
;
1585 enum dr_alignment_support supportable_dr_alignment
;
1588 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1590 tree stmt
= DR_STMT (dr
);
1591 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1593 /* For interleaving, only the alignment of the first access matters. */
1594 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1595 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1598 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1599 if (!supportable_dr_alignment
)
1601 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1603 if (DR_IS_READ (dr
))
1605 "not vectorized: unsupported unaligned load.");
1608 "not vectorized: unsupported unaligned store.");
1612 if (supportable_dr_alignment
!= dr_aligned
1613 && vect_print_dump_info (REPORT_ALIGNMENT
))
1614 fprintf (vect_dump
, "Vectorizing an unaligned access.");
1620 /* Function vector_alignment_reachable_p
1622 Return true if vector alignment for DR is reachable by peeling
1623 a few loop iterations. Return false otherwise. */
1626 vector_alignment_reachable_p (struct data_reference
*dr
)
1628 tree stmt
= DR_STMT (dr
);
1629 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1630 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1632 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
1634 /* For interleaved access we peel only if number of iterations in
1635 the prolog loop ({VF - misalignment}), is a multiple of the
1636 number of the interleaved accesses. */
1637 int elem_size
, mis_in_elements
;
1638 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1640 /* FORNOW: handle only known alignment. */
1641 if (!known_alignment_for_access_p (dr
))
1644 elem_size
= UNITS_PER_SIMD_WORD
/ nelements
;
1645 mis_in_elements
= DR_MISALIGNMENT (dr
) / elem_size
;
1647 if ((nelements
- mis_in_elements
) % DR_GROUP_SIZE (stmt_info
))
1651 /* If misalignment is known at the compile time then allow peeling
1652 only if natural alignment is reachable through peeling. */
1653 if (known_alignment_for_access_p (dr
) && !aligned_access_p (dr
))
1655 HOST_WIDE_INT elmsize
=
1656 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
1657 if (vect_print_dump_info (REPORT_DETAILS
))
1659 fprintf (vect_dump
, "data size =" HOST_WIDE_INT_PRINT_DEC
, elmsize
);
1660 fprintf (vect_dump
, ". misalignment = %d. ", DR_MISALIGNMENT (dr
));
1662 if (DR_MISALIGNMENT (dr
) % elmsize
)
1664 if (vect_print_dump_info (REPORT_DETAILS
))
1665 fprintf (vect_dump
, "data size does not divide the misalignment.\n");
1670 if (!known_alignment_for_access_p (dr
))
1672 tree type
= (TREE_TYPE (DR_REF (dr
)));
1673 tree ba
= DR_BASE_OBJECT (dr
);
1674 bool is_packed
= false;
1677 is_packed
= contains_packed_reference (ba
);
1679 if (vect_print_dump_info (REPORT_DETAILS
))
1680 fprintf (vect_dump
, "Unknown misalignment, is_packed = %d",is_packed
);
1681 if (targetm
.vectorize
.vector_alignment_reachable (type
, is_packed
))
1690 /* Function vect_enhance_data_refs_alignment
1692 This pass will use loop versioning and loop peeling in order to enhance
1693 the alignment of data references in the loop.
1695 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1696 original loop is to be vectorized; Any other loops that are created by
1697 the transformations performed in this pass - are not supposed to be
1698 vectorized. This restriction will be relaxed.
1700 This pass will require a cost model to guide it whether to apply peeling
1701 or versioning or a combination of the two. For example, the scheme that
1702 intel uses when given a loop with several memory accesses, is as follows:
1703 choose one memory access ('p') which alignment you want to force by doing
1704 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1705 other accesses are not necessarily aligned, or (2) use loop versioning to
1706 generate one loop in which all accesses are aligned, and another loop in
1707 which only 'p' is necessarily aligned.
1709 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1710 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1711 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1713 Devising a cost model is the most critical aspect of this work. It will
1714 guide us on which access to peel for, whether to use loop versioning, how
1715 many versions to create, etc. The cost model will probably consist of
1716 generic considerations as well as target specific considerations (on
1717 powerpc for example, misaligned stores are more painful than misaligned
1720 Here are the general steps involved in alignment enhancements:
1722 -- original loop, before alignment analysis:
1723 for (i=0; i<N; i++){
1724 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1725 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1728 -- After vect_compute_data_refs_alignment:
1729 for (i=0; i<N; i++){
1730 x = q[i]; # DR_MISALIGNMENT(q) = 3
1731 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1734 -- Possibility 1: we do loop versioning:
1736 for (i=0; i<N; i++){ # loop 1A
1737 x = q[i]; # DR_MISALIGNMENT(q) = 3
1738 p[i] = y; # DR_MISALIGNMENT(p) = 0
1742 for (i=0; i<N; i++){ # loop 1B
1743 x = q[i]; # DR_MISALIGNMENT(q) = 3
1744 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1748 -- Possibility 2: we do loop peeling:
1749 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1753 for (i = 3; i < N; i++){ # loop 2A
1754 x = q[i]; # DR_MISALIGNMENT(q) = 0
1755 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1758 -- Possibility 3: combination of loop peeling and versioning:
1759 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1764 for (i = 3; i<N; i++){ # loop 3A
1765 x = q[i]; # DR_MISALIGNMENT(q) = 0
1766 p[i] = y; # DR_MISALIGNMENT(p) = 0
1770 for (i = 3; i<N; i++){ # loop 3B
1771 x = q[i]; # DR_MISALIGNMENT(q) = 0
1772 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1776 These loops are later passed to loop_transform to be vectorized. The
1777 vectorizer will use the alignment information to guide the transformation
1778 (whether to generate regular loads/stores, or with special handling for
1782 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1784 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1785 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1786 enum dr_alignment_support supportable_dr_alignment
;
1787 struct data_reference
*dr0
= NULL
;
1788 struct data_reference
*dr
;
1790 bool do_peeling
= false;
1791 bool do_versioning
= false;
1794 stmt_vec_info stmt_info
;
1795 int vect_versioning_for_alias_required
;
1797 if (vect_print_dump_info (REPORT_DETAILS
))
1798 fprintf (vect_dump
, "=== vect_enhance_data_refs_alignment ===");
1800 /* While cost model enhancements are expected in the future, the high level
1801 view of the code at this time is as follows:
1803 A) If there is a misaligned write then see if peeling to align this write
1804 can make all data references satisfy vect_supportable_dr_alignment.
1805 If so, update data structures as needed and return true. Note that
1806 at this time vect_supportable_dr_alignment is known to return false
1807 for a misaligned write.
1809 B) If peeling wasn't possible and there is a data reference with an
1810 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1811 then see if loop versioning checks can be used to make all data
1812 references satisfy vect_supportable_dr_alignment. If so, update
1813 data structures as needed and return true.
1815 C) If neither peeling nor versioning were successful then return false if
1816 any data reference does not satisfy vect_supportable_dr_alignment.
1818 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1820 Note, Possibility 3 above (which is peeling and versioning together) is not
1821 being done at this time. */
1823 /* (1) Peeling to force alignment. */
1825 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1827 + How many accesses will become aligned due to the peeling
1828 - How many accesses will become unaligned due to the peeling,
1829 and the cost of misaligned accesses.
1830 - The cost of peeling (the extra runtime checks, the increase
1833 The scheme we use FORNOW: peel to force the alignment of the first
1834 misaligned store in the loop.
1835 Rationale: misaligned stores are not yet supported.
1837 TODO: Use a cost model. */
1839 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1841 stmt
= DR_STMT (dr
);
1842 stmt_info
= vinfo_for_stmt (stmt
);
1844 /* For interleaving, only the alignment of the first access
1846 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1847 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1850 if (!DR_IS_READ (dr
) && !aligned_access_p (dr
))
1852 do_peeling
= vector_alignment_reachable_p (dr
);
1855 if (!do_peeling
&& vect_print_dump_info (REPORT_DETAILS
))
1856 fprintf (vect_dump
, "vector alignment may not be reachable");
1861 vect_versioning_for_alias_required
=
1862 (VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)) > 0);
1864 /* Temporarily, if versioning for alias is required, we disable peeling
1865 until we support peeling and versioning. Often peeling for alignment
1866 will require peeling for loop-bound, which in turn requires that we
1867 know how to adjust the loop ivs after the loop. */
1868 if (vect_versioning_for_alias_required
1869 || !vect_can_advance_ivs_p (loop_vinfo
)
1870 || !slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
1877 tree stmt
= DR_STMT (dr0
);
1878 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1879 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1880 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1882 if (known_alignment_for_access_p (dr0
))
1884 /* Since it's known at compile time, compute the number of iterations
1885 in the peeled loop (the peeling factor) for use in updating
1886 DR_MISALIGNMENT values. The peeling factor is the vectorization
1887 factor minus the misalignment as an element count. */
1888 mis
= DR_MISALIGNMENT (dr0
);
1889 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1890 npeel
= nelements
- mis
;
1892 /* For interleaved data access every iteration accesses all the
1893 members of the group, therefore we divide the number of iterations
1894 by the group size. */
1895 stmt_info
= vinfo_for_stmt (DR_STMT (dr0
));
1896 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
1897 npeel
/= DR_GROUP_SIZE (stmt_info
);
1899 if (vect_print_dump_info (REPORT_DETAILS
))
1900 fprintf (vect_dump
, "Try peeling by %d", npeel
);
1903 /* Ensure that all data refs can be vectorized after the peel. */
1904 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1906 int save_misalignment
;
1911 stmt
= DR_STMT (dr
);
1912 stmt_info
= vinfo_for_stmt (stmt
);
1913 /* For interleaving, only the alignment of the first access
1915 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1916 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1919 save_misalignment
= DR_MISALIGNMENT (dr
);
1920 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1921 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1922 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1924 if (!supportable_dr_alignment
)
1933 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1934 If the misalignment of DR_i is identical to that of dr0 then set
1935 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1936 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1937 by the peeling factor times the element size of DR_i (MOD the
1938 vectorization factor times the size). Otherwise, the
1939 misalignment of DR_i must be set to unknown. */
1940 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1942 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1944 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1945 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1946 SET_DR_MISALIGNMENT (dr0
, 0);
1947 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1948 fprintf (vect_dump
, "Alignment of access forced using peeling.");
1950 if (vect_print_dump_info (REPORT_DETAILS
))
1951 fprintf (vect_dump
, "Peeling for alignment will be applied.");
1953 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1960 /* (2) Versioning to force alignment. */
1962 /* Try versioning if:
1963 1) flag_tree_vect_loop_version is TRUE
1964 2) optimize_size is FALSE
1965 3) there is at least one unsupported misaligned data ref with an unknown
1967 4) all misaligned data refs with a known misalignment are supported, and
1968 5) the number of runtime alignment checks is within reason. */
1971 flag_tree_vect_loop_version
1973 && (!loop
->inner
); /* FORNOW */
1977 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1979 stmt
= DR_STMT (dr
);
1980 stmt_info
= vinfo_for_stmt (stmt
);
1982 /* For interleaving, only the alignment of the first access
1984 if (aligned_access_p (dr
)
1985 || (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1986 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
))
1989 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1991 if (!supportable_dr_alignment
)
1997 if (known_alignment_for_access_p (dr
)
1998 || VEC_length (tree
,
1999 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
2000 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS
))
2002 do_versioning
= false;
2006 stmt
= DR_STMT (dr
);
2007 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
2008 gcc_assert (vectype
);
2010 /* The rightmost bits of an aligned address must be zeros.
2011 Construct the mask needed for this test. For example,
2012 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2013 mask must be 15 = 0xf. */
2014 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
2016 /* FORNOW: use the same mask to test all potentially unaligned
2017 references in the loop. The vectorizer currently supports
2018 a single vector size, see the reference to
2019 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2020 vectorization factor is computed. */
2021 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
2022 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
2023 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
2024 VEC_safe_push (tree
, heap
,
2025 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
),
2030 /* Versioning requires at least one misaligned data reference. */
2031 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)) == 0)
2032 do_versioning
= false;
2033 else if (!do_versioning
)
2034 VEC_truncate (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
), 0);
2039 VEC(tree
,heap
) *may_misalign_stmts
2040 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
2043 /* It can now be assumed that the data references in the statements
2044 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2045 of the loop being vectorized. */
2046 for (i
= 0; VEC_iterate (tree
, may_misalign_stmts
, i
, stmt
); i
++)
2048 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2049 dr
= STMT_VINFO_DATA_REF (stmt_info
);
2050 SET_DR_MISALIGNMENT (dr
, 0);
2051 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2052 fprintf (vect_dump
, "Alignment of access forced using versioning.");
2055 if (vect_print_dump_info (REPORT_DETAILS
))
2056 fprintf (vect_dump
, "Versioning for alignment will be applied.");
2058 /* Peeling and versioning can't be done together at this time. */
2059 gcc_assert (! (do_peeling
&& do_versioning
));
2061 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
2066 /* This point is reached if neither peeling nor versioning is being done. */
2067 gcc_assert (! (do_peeling
|| do_versioning
));
2069 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
2074 /* Function vect_analyze_data_refs_alignment
2076 Analyze the alignment of the data-references in the loop.
2077 Return FALSE if a data reference is found that cannot be vectorized. */
2080 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
)
2082 if (vect_print_dump_info (REPORT_DETAILS
))
2083 fprintf (vect_dump
, "=== vect_analyze_data_refs_alignment ===");
2085 if (!vect_compute_data_refs_alignment (loop_vinfo
))
2087 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2089 "not vectorized: can't calculate alignment for data ref.");
2097 /* Analyze groups of strided accesses: check that DR belongs to a group of
2098 strided accesses of legal size, step, etc. Detect gaps, single element
2099 interleaving, and other special cases. Set strided access info.
2100 Collect groups of strided stores for further use in SLP analysis. */
2103 vect_analyze_group_access (struct data_reference
*dr
)
2105 tree step
= DR_STEP (dr
);
2106 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2107 HOST_WIDE_INT type_size
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
2108 tree stmt
= DR_STMT (dr
);
2109 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2110 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2111 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2112 HOST_WIDE_INT stride
;
2113 bool slp_impossible
= false;
2115 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2116 interleaving group (including gaps). */
2117 stride
= dr_step
/ type_size
;
2119 /* Not consecutive access is possible only if it is a part of interleaving. */
2120 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)))
2122 /* Check if it this DR is a part of interleaving, and is a single
2123 element of the group that is accessed in the loop. */
2125 /* Gaps are supported only for loads. STEP must be a multiple of the type
2126 size. The size of the group must be a power of 2. */
2128 && (dr_step
% type_size
) == 0
2130 && exact_log2 (stride
) != -1)
2132 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = stmt
;
2133 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
2134 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2136 fprintf (vect_dump
, "Detected single element interleaving %d ",
2137 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)));
2138 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
2139 fprintf (vect_dump
, " step ");
2140 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
2144 if (vect_print_dump_info (REPORT_DETAILS
))
2145 fprintf (vect_dump
, "not consecutive access");
2149 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) == stmt
)
2151 /* First stmt in the interleaving chain. Check the chain. */
2152 tree next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt
));
2153 struct data_reference
*data_ref
= dr
;
2154 unsigned int count
= 1;
2156 tree prev_init
= DR_INIT (data_ref
);
2158 HOST_WIDE_INT diff
, count_in_bytes
;
2162 /* Skip same data-refs. In case that two or more stmts share data-ref
2163 (supported only for loads), we vectorize only the first stmt, and
2164 the rest get their vectorized loads from the first one. */
2165 if (!tree_int_cst_compare (DR_INIT (data_ref
),
2166 DR_INIT (STMT_VINFO_DATA_REF (
2167 vinfo_for_stmt (next
)))))
2169 if (!DR_IS_READ (data_ref
))
2171 if (vect_print_dump_info (REPORT_DETAILS
))
2172 fprintf (vect_dump
, "Two store stmts share the same dr.");
2176 /* Check that there is no load-store dependencies for this loads
2177 to prevent a case of load-store-load to the same location. */
2178 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next
))
2179 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev
)))
2181 if (vect_print_dump_info (REPORT_DETAILS
))
2183 "READ_WRITE dependence in interleaving.");
2187 /* For load use the same data-ref load. */
2188 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next
)) = prev
;
2191 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
2196 /* Check that all the accesses have the same STEP. */
2197 next_step
= DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
2198 if (tree_int_cst_compare (step
, next_step
))
2200 if (vect_print_dump_info (REPORT_DETAILS
))
2201 fprintf (vect_dump
, "not consecutive access in interleaving");
2205 data_ref
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next
));
2206 /* Check that the distance between two accesses is equal to the type
2207 size. Otherwise, we have gaps. */
2208 diff
= (TREE_INT_CST_LOW (DR_INIT (data_ref
))
2209 - TREE_INT_CST_LOW (prev_init
)) / type_size
;
2212 /* FORNOW: SLP of accesses with gaps is not supported. */
2213 slp_impossible
= true;
2214 if (!DR_IS_READ (data_ref
))
2216 if (vect_print_dump_info (REPORT_DETAILS
))
2217 fprintf (vect_dump
, "interleaved store with gaps");
2222 /* Store the gap from the previous member of the group. If there is no
2223 gap in the access, DR_GROUP_GAP is always 1. */
2224 DR_GROUP_GAP (vinfo_for_stmt (next
)) = diff
;
2226 prev_init
= DR_INIT (data_ref
);
2227 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
2228 /* Count the number of data-refs in the chain. */
2232 /* COUNT is the number of accesses found, we multiply it by the size of
2233 the type to get COUNT_IN_BYTES. */
2234 count_in_bytes
= type_size
* count
;
2236 /* Check that the size of the interleaving is not greater than STEP. */
2237 if (dr_step
< count_in_bytes
)
2239 if (vect_print_dump_info (REPORT_DETAILS
))
2241 fprintf (vect_dump
, "interleaving size is greater than step for ");
2242 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
2247 /* Check that the size of the interleaving is equal to STEP for stores,
2248 i.e., that there are no gaps. */
2249 if (!DR_IS_READ (dr
) && dr_step
!= count_in_bytes
)
2251 if (vect_print_dump_info (REPORT_DETAILS
))
2252 fprintf (vect_dump
, "interleaved store with gaps");
2256 /* Check that STEP is a multiple of type size. */
2257 if ((dr_step
% type_size
) != 0)
2259 if (vect_print_dump_info (REPORT_DETAILS
))
2261 fprintf (vect_dump
, "step is not a multiple of type size: step ");
2262 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
2263 fprintf (vect_dump
, " size ");
2264 print_generic_expr (vect_dump
, TYPE_SIZE_UNIT (scalar_type
),
2270 /* FORNOW: we handle only interleaving that is a power of 2.
2271 We don't fail here if it may be still possible to vectorize the
2272 group using SLP. If not, the size of the group will be checked in
2273 vect_analyze_operations, and the vectorization will fail. */
2274 if (exact_log2 (stride
) == -1)
2276 if (vect_print_dump_info (REPORT_DETAILS
))
2277 fprintf (vect_dump
, "interleaving is not a power of 2");
2282 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
2283 if (vect_print_dump_info (REPORT_DETAILS
))
2284 fprintf (vect_dump
, "Detected interleaving of size %d", (int)stride
);
2286 /* SLP: create an SLP data structure for every interleaving group of
2287 stores for further analysis in vect_analyse_slp. */
2288 if (!DR_IS_READ (dr
) && !slp_impossible
)
2289 VEC_safe_push (tree
, heap
, LOOP_VINFO_STRIDED_STORES (loop_vinfo
), stmt
);
2296 /* Analyze the access pattern of the data-reference DR.
2297 In case of non-consecutive accesses call vect_analyze_group_access() to
2298 analyze groups of strided accesses. */
2301 vect_analyze_data_ref_access (struct data_reference
*dr
)
2303 tree step
= DR_STEP (dr
);
2304 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2305 tree stmt
= DR_STMT (dr
);
2306 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2307 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2308 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2309 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2313 if (vect_print_dump_info (REPORT_DETAILS
))
2314 fprintf (vect_dump
, "bad data-ref access");
2318 /* Don't allow invariant accesses. */
2322 if (nested_in_vect_loop_p (loop
, stmt
))
2324 /* Interleaved accesses are not yet supported within outer-loop
2325 vectorization for references in the inner-loop. */
2326 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = NULL_TREE
;
2328 /* For the rest of the analysis we use the outer-loop step. */
2329 step
= STMT_VINFO_DR_STEP (stmt_info
);
2330 dr_step
= TREE_INT_CST_LOW (step
);
2334 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2335 fprintf (vect_dump
, "zero step in outer loop.");
2336 if (DR_IS_READ (dr
))
2344 if (!tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
)))
2346 /* Mark that it is not interleaving. */
2347 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = NULL_TREE
;
2351 if (nested_in_vect_loop_p (loop
, stmt
))
2353 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2354 fprintf (vect_dump
, "strided access in outer loop.");
2358 /* Not consecutive access - check if it's a part of interleaving group. */
2359 return vect_analyze_group_access (dr
);
2363 /* Function vect_analyze_data_ref_accesses.
2365 Analyze the access pattern of all the data references in the loop.
2367 FORNOW: the only access pattern that is considered vectorizable is a
2368 simple step 1 (consecutive) access.
2370 FORNOW: handle only arrays and pointer accesses. */
2373 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
)
2376 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2377 struct data_reference
*dr
;
2379 if (vect_print_dump_info (REPORT_DETAILS
))
2380 fprintf (vect_dump
, "=== vect_analyze_data_ref_accesses ===");
2382 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
2383 if (!vect_analyze_data_ref_access (dr
))
2385 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2386 fprintf (vect_dump
, "not vectorized: complicated access pattern.");
2393 /* Function vect_prune_runtime_alias_test_list.
2395 Prune a list of ddrs to be tested at run-time by versioning for alias.
2396 Return FALSE if resulting list of ddrs is longer then allowed by
2397 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2400 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo
)
2402 VEC (ddr_p
, heap
) * ddrs
=
2403 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
);
2406 if (vect_print_dump_info (REPORT_DETAILS
))
2407 fprintf (vect_dump
, "=== vect_prune_runtime_alias_test_list ===");
2409 for (i
= 0; i
< VEC_length (ddr_p
, ddrs
); )
2414 ddr_i
= VEC_index (ddr_p
, ddrs
, i
);
2417 for (j
= 0; j
< i
; j
++)
2419 ddr_p ddr_j
= VEC_index (ddr_p
, ddrs
, j
);
2421 if (vect_vfa_range_equal (ddr_i
, ddr_j
))
2423 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2425 fprintf (vect_dump
, "found equal ranges ");
2426 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr_i
)), TDF_SLIM
);
2427 fprintf (vect_dump
, ", ");
2428 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr_i
)), TDF_SLIM
);
2429 fprintf (vect_dump
, " and ");
2430 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr_j
)), TDF_SLIM
);
2431 fprintf (vect_dump
, ", ");
2432 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr_j
)), TDF_SLIM
);
2441 VEC_ordered_remove (ddr_p
, ddrs
, i
);
2447 if (VEC_length (ddr_p
, ddrs
) >
2448 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
))
2450 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2453 "disable versioning for alias - max number of generated "
2454 "checks exceeded.");
2457 VEC_truncate (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), 0);
2465 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2468 vect_free_slp_tree (slp_tree node
)
2473 if (SLP_TREE_LEFT (node
))
2474 vect_free_slp_tree (SLP_TREE_LEFT (node
));
2476 if (SLP_TREE_RIGHT (node
))
2477 vect_free_slp_tree (SLP_TREE_RIGHT (node
));
2479 VEC_free (tree
, heap
, SLP_TREE_SCALAR_STMTS (node
));
2481 if (SLP_TREE_VEC_STMTS (node
))
2482 VEC_free (tree
, heap
, SLP_TREE_VEC_STMTS (node
));
2488 /* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are
2489 of a legal type and that they match the defs of the first stmt of the SLP
2490 group (stored in FIRST_STMT_...). */
2493 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo
, slp_tree slp_node
,
2494 tree rhs
, VEC (tree
, heap
) **def_stmts0
,
2495 VEC (tree
, heap
) **def_stmts1
,
2496 enum vect_def_type
*first_stmt_dt0
,
2497 enum vect_def_type
*first_stmt_dt1
,
2498 tree
*first_stmt_def0_type
,
2499 tree
*first_stmt_def1_type
,
2500 tree
*first_stmt_const_oprnd
,
2501 int ncopies_for_cost
)
2504 enum operation_type op_type
= TREE_OPERAND_LENGTH (rhs
);
2505 unsigned int i
, number_of_oprnds
= op_type
;
2507 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2508 stmt_vec_info stmt_info
=
2509 vinfo_for_stmt (VEC_index (tree
, SLP_TREE_SCALAR_STMTS (slp_node
), 0));
2513 number_of_oprnds
= 1;
2515 gcc_assert (op_type
== unary_op
|| op_type
== binary_op
);
2517 for (i
= 0; i
< number_of_oprnds
; i
++)
2520 oprnd
= TREE_OPERAND (rhs
, i
);
2524 if (!vect_is_simple_use (oprnd
, loop_vinfo
, &def_stmt
, &def
, &dt
[i
])
2525 || (!def_stmt
&& dt
[i
] != vect_constant_def
))
2527 if (vect_print_dump_info (REPORT_SLP
))
2529 fprintf (vect_dump
, "Build SLP failed: can't find def for ");
2530 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
2536 if (!*first_stmt_dt0
)
2538 /* op0 of the first stmt of the group - store its info. */
2539 *first_stmt_dt0
= dt
[i
];
2541 *first_stmt_def0_type
= TREE_TYPE (def
);
2543 *first_stmt_const_oprnd
= oprnd
;
2545 /* Analyze costs (for the first stmt of the group only). */
2547 /* Not memory operation (we don't call this functions for loads). */
2548 vect_model_simple_cost (stmt_info
, ncopies_for_cost
, dt
, slp_node
);
2551 vect_model_store_cost (stmt_info
, ncopies_for_cost
, dt
[0], slp_node
);
2556 if (!*first_stmt_dt1
&& i
== 1)
2558 /* op1 of the first stmt of the group - store its info. */
2559 *first_stmt_dt1
= dt
[i
];
2561 *first_stmt_def1_type
= TREE_TYPE (def
);
2564 /* We assume that the stmt contains only one constant
2565 operand. We fail otherwise, to be on the safe side. */
2566 if (*first_stmt_const_oprnd
)
2568 if (vect_print_dump_info (REPORT_SLP
))
2569 fprintf (vect_dump
, "Build SLP failed: two constant "
2573 *first_stmt_const_oprnd
= oprnd
;
2578 /* Not first stmt of the group, check that the def-stmt/s match
2579 the def-stmt/s of the first stmt. */
2581 && (*first_stmt_dt0
!= dt
[i
]
2582 || (*first_stmt_def0_type
&& def
2583 && *first_stmt_def0_type
!= TREE_TYPE (def
))))
2585 && (*first_stmt_dt1
!= dt
[i
]
2586 || (*first_stmt_def1_type
&& def
2587 && *first_stmt_def1_type
!= TREE_TYPE (def
))))
2589 && TREE_TYPE (*first_stmt_const_oprnd
)
2590 != TREE_TYPE (oprnd
)))
2592 if (vect_print_dump_info (REPORT_SLP
))
2593 fprintf (vect_dump
, "Build SLP failed: different types ");
2600 /* Check the types of the definitions. */
2603 case vect_constant_def
:
2604 case vect_invariant_def
:
2609 VEC_safe_push (tree
, heap
, *def_stmts0
, def_stmt
);
2611 VEC_safe_push (tree
, heap
, *def_stmts1
, def_stmt
);
2615 /* FORNOW: Not supported. */
2616 if (vect_print_dump_info (REPORT_SLP
))
2618 fprintf (vect_dump
, "Build SLP failed: illegal type of def ");
2619 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
2630 /* Recursively build an SLP tree starting from NODE.
2631 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2632 permutation or are of unsupported types of operation. Otherwise, return
2634 SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2635 in the case of multiple types for now. */
2638 vect_build_slp_tree (loop_vec_info loop_vinfo
, slp_tree
*node
,
2639 unsigned int group_size
, bool *slp_impossible
,
2640 int *inside_cost
, int *outside_cost
,
2641 int ncopies_for_cost
)
2643 VEC (tree
, heap
) *def_stmts0
= VEC_alloc (tree
, heap
, group_size
);
2644 VEC (tree
, heap
) *def_stmts1
= VEC_alloc (tree
, heap
, group_size
);
2646 VEC (tree
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (*node
);
2647 tree stmt
= VEC_index (tree
, stmts
, 0);
2648 enum vect_def_type first_stmt_dt0
= 0, first_stmt_dt1
= 0;
2649 enum tree_code first_stmt_code
= 0;
2650 tree first_stmt_def1_type
= NULL_TREE
, first_stmt_def0_type
= NULL_TREE
;
2651 tree lhs
, rhs
, prev_stmt
= NULL_TREE
;
2652 bool stop_recursion
= false, need_same_oprnds
= false;
2653 tree vectype
, scalar_type
, first_op1
= NULL_TREE
;
2654 unsigned int vectorization_factor
= 0, ncopies
;
2657 enum machine_mode optab_op2_mode
;
2658 enum machine_mode vec_mode
;
2659 tree first_stmt_const_oprnd
= NULL_TREE
;
2660 struct data_reference
*first_dr
;
2662 /* For every stmt in NODE find its def stmt/s. */
2663 for (i
= 0; VEC_iterate (tree
, stmts
, i
, stmt
); i
++)
2665 if (vect_print_dump_info (REPORT_SLP
))
2667 fprintf (vect_dump
, "Build SLP for ");
2668 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2671 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
2673 if (vect_print_dump_info (REPORT_SLP
))
2675 fprintf (vect_dump
, "Build SLP failed: not MODIFY_STMT ");
2676 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2682 scalar_type
= TREE_TYPE (GIMPLE_STMT_OPERAND (stmt
, 0));
2683 vectype
= get_vectype_for_scalar_type (scalar_type
);
2686 if (vect_print_dump_info (REPORT_SLP
))
2688 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
2689 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
2694 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
2695 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2696 ncopies
= vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
2700 if (vect_print_dump_info (REPORT_SLP
))
2701 fprintf (vect_dump
, "SLP failed - multiple types ");
2703 *slp_impossible
= true;
2707 lhs
= GIMPLE_STMT_OPERAND (stmt
, 0);
2708 rhs
= GIMPLE_STMT_OPERAND (stmt
, 1);
2710 /* Check the operation. */
2713 first_stmt_code
= TREE_CODE (rhs
);
2715 /* Shift arguments should be equal in all the packed stmts for a
2716 vector shift with scalar shift operand. */
2717 if (TREE_CODE (rhs
) == LSHIFT_EXPR
|| TREE_CODE (rhs
) == RSHIFT_EXPR
)
2719 vec_mode
= TYPE_MODE (vectype
);
2720 optab
= optab_for_tree_code (TREE_CODE (rhs
), vectype
);
2723 if (vect_print_dump_info (REPORT_SLP
))
2724 fprintf (vect_dump
, "Build SLP failed: no optab.");
2727 icode
= (int) optab
->handlers
[(int) vec_mode
].insn_code
;
2728 if (icode
== CODE_FOR_nothing
)
2730 if (vect_print_dump_info (REPORT_SLP
))
2732 "Build SLP failed: op not supported by target.");
2735 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
2736 if (!VECTOR_MODE_P (optab_op2_mode
))
2738 need_same_oprnds
= true;
2739 first_op1
= TREE_OPERAND (rhs
, 1);
2745 if (first_stmt_code
!= TREE_CODE (rhs
))
2747 if (vect_print_dump_info (REPORT_SLP
))
2750 "Build SLP failed: different operation in stmt ");
2751 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2757 if (need_same_oprnds
2758 && !operand_equal_p (first_op1
, TREE_OPERAND (rhs
, 1), 0))
2760 if (vect_print_dump_info (REPORT_SLP
))
2763 "Build SLP failed: different shift arguments in ");
2764 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2771 /* Strided store or load. */
2772 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
)))
2774 if (REFERENCE_CLASS_P (lhs
))
2777 if (!vect_get_and_check_slp_defs (loop_vinfo
, *node
, rhs
,
2778 &def_stmts0
, &def_stmts1
,
2781 &first_stmt_def0_type
,
2782 &first_stmt_def1_type
,
2783 &first_stmt_const_oprnd
,
2792 /* First stmt of the SLP group should be the first load of
2793 the interleaving loop if data permutation is not
2795 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) != stmt
)
2797 /* FORNOW: data permutations are not supported. */
2798 if (vect_print_dump_info (REPORT_SLP
))
2800 fprintf (vect_dump
, "Build SLP failed: strided "
2801 " loads need permutation ");
2802 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2808 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
2809 if (vect_supportable_dr_alignment (first_dr
)
2810 == dr_unaligned_unsupported
)
2812 if (vect_print_dump_info (REPORT_SLP
))
2814 fprintf (vect_dump
, "Build SLP failed: unsupported "
2815 " unaligned load ");
2816 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2822 /* Analyze costs (for the first stmt in the group). */
2823 vect_model_load_cost (vinfo_for_stmt (stmt
),
2824 ncopies_for_cost
, *node
);
2828 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt
)) != stmt
)
2830 /* FORNOW: data permutations are not supported. */
2831 if (vect_print_dump_info (REPORT_SLP
))
2833 fprintf (vect_dump
, "Build SLP failed: strided "
2834 " loads need permutation ");
2835 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2843 /* We stop the tree when we reach a group of loads. */
2844 stop_recursion
= true;
2847 } /* Strided access. */
2850 if (REFERENCE_CLASS_P (rhs
))
2852 /* Not strided load. */
2853 if (vect_print_dump_info (REPORT_SLP
))
2855 fprintf (vect_dump
, "Build SLP failed: not strided load ");
2856 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2859 /* FORNOW: Not strided loads are not supported. */
2863 /* Not memory operation. */
2864 if (!BINARY_CLASS_P (rhs
) && !UNARY_CLASS_P (rhs
))
2866 if (vect_print_dump_info (REPORT_SLP
))
2868 fprintf (vect_dump
, "Build SLP failed: operation");
2869 fprintf (vect_dump
, " unsupported ");
2870 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2876 /* Find the def-stmts. */
2877 if (!vect_get_and_check_slp_defs (loop_vinfo
, *node
, rhs
, &def_stmts0
,
2878 &def_stmts1
, &first_stmt_dt0
,
2880 &first_stmt_def0_type
,
2881 &first_stmt_def1_type
,
2882 &first_stmt_const_oprnd
,
2888 /* Add the costs of the node to the overall instance costs. */
2889 *inside_cost
+= SLP_TREE_INSIDE_OF_LOOP_COST (*node
);
2890 *outside_cost
+= SLP_TREE_OUTSIDE_OF_LOOP_COST (*node
);
2892 /* Strided loads were reached - stop the recursion. */
2896 /* Create SLP_TREE nodes for the definition node/s. */
2897 if (first_stmt_dt0
== vect_loop_def
)
2899 slp_tree left_node
= XNEW (struct _slp_tree
);
2900 SLP_TREE_SCALAR_STMTS (left_node
) = def_stmts0
;
2901 SLP_TREE_VEC_STMTS (left_node
) = NULL
;
2902 SLP_TREE_LEFT (left_node
) = NULL
;
2903 SLP_TREE_RIGHT (left_node
) = NULL
;
2904 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node
) = 0;
2905 SLP_TREE_INSIDE_OF_LOOP_COST (left_node
) = 0;
2906 if (!vect_build_slp_tree (loop_vinfo
, &left_node
, group_size
,
2907 slp_impossible
, inside_cost
, outside_cost
,
2911 SLP_TREE_LEFT (*node
) = left_node
;
2914 if (first_stmt_dt1
== vect_loop_def
)
2916 slp_tree right_node
= XNEW (struct _slp_tree
);
2917 SLP_TREE_SCALAR_STMTS (right_node
) = def_stmts1
;
2918 SLP_TREE_VEC_STMTS (right_node
) = NULL
;
2919 SLP_TREE_LEFT (right_node
) = NULL
;
2920 SLP_TREE_RIGHT (right_node
) = NULL
;
2921 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node
) = 0;
2922 SLP_TREE_INSIDE_OF_LOOP_COST (right_node
) = 0;
2923 if (!vect_build_slp_tree (loop_vinfo
, &right_node
, group_size
,
2924 slp_impossible
, inside_cost
, outside_cost
,
2928 SLP_TREE_RIGHT (*node
) = right_node
;
2936 vect_print_slp_tree (slp_tree node
)
2944 fprintf (vect_dump
, "node ");
2945 for (i
= 0; VEC_iterate (tree
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
2947 fprintf (vect_dump
, "\n\tstmt %d ", i
);
2948 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2950 fprintf (vect_dump
, "\n");
2952 vect_print_slp_tree (SLP_TREE_LEFT (node
));
2953 vect_print_slp_tree (SLP_TREE_RIGHT (node
));
2957 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2958 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2959 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2960 stmts in NODE are to be marked. */
2963 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
2971 for (i
= 0; VEC_iterate (tree
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
2972 if (j
< 0 || i
== j
)
2973 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
2975 vect_mark_slp_stmts (SLP_TREE_LEFT (node
), mark
, j
);
2976 vect_mark_slp_stmts (SLP_TREE_RIGHT (node
), mark
, j
);
2980 /* Analyze an SLP instance starting from a group of strided stores. Call
2981 vect_build_slp_tree to build a tree of packed stmts if possible.
2982 Return FALSE if it's impossible to SLP any stmt in the loop. */
2985 vect_analyze_slp_instance (loop_vec_info loop_vinfo
, tree stmt
)
2987 slp_instance new_instance
;
2988 slp_tree node
= XNEW (struct _slp_tree
);
2989 unsigned int group_size
= DR_GROUP_SIZE (vinfo_for_stmt (stmt
));
2990 unsigned int unrolling_factor
= 1, nunits
;
2991 tree vectype
, scalar_type
, next
;
2992 unsigned int vectorization_factor
= 0, ncopies
;
2993 bool slp_impossible
= false;
2994 int inside_cost
= 0, outside_cost
= 0, ncopies_for_cost
;
2996 /* FORNOW: multiple types are not supported. */
2997 scalar_type
= TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))));
2998 vectype
= get_vectype_for_scalar_type (scalar_type
);
3001 if (vect_print_dump_info (REPORT_SLP
))
3003 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
3004 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
3009 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3010 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3011 ncopies
= vectorization_factor
/ nunits
;
3014 if (vect_print_dump_info (REPORT_SLP
))
3015 fprintf (vect_dump
, "SLP failed - multiple types ");
3020 /* Create a node (a root of the SLP tree) for the packed strided stores. */
3021 SLP_TREE_SCALAR_STMTS (node
) = VEC_alloc (tree
, heap
, group_size
);
3023 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
3026 VEC_safe_push (tree
, heap
, SLP_TREE_SCALAR_STMTS (node
), next
);
3027 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
3030 SLP_TREE_VEC_STMTS (node
) = NULL
;
3031 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
3032 SLP_TREE_LEFT (node
) = NULL
;
3033 SLP_TREE_RIGHT (node
) = NULL
;
3034 SLP_TREE_OUTSIDE_OF_LOOP_COST (node
) = 0;
3035 SLP_TREE_INSIDE_OF_LOOP_COST (node
) = 0;
3037 /* Calculate the unrolling factor. */
3038 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
3040 /* Calculate the number of vector stmts to create based on the unrolling
3041 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
3042 GROUP_SIZE / NUNITS otherwise. */
3043 ncopies_for_cost
= unrolling_factor
* group_size
/ nunits
;
3045 /* Build the tree for the SLP instance. */
3046 if (vect_build_slp_tree (loop_vinfo
, &node
, group_size
, &slp_impossible
,
3047 &inside_cost
, &outside_cost
, ncopies_for_cost
))
3049 /* Create a new SLP instance. */
3050 new_instance
= XNEW (struct _slp_instance
);
3051 SLP_INSTANCE_TREE (new_instance
) = node
;
3052 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
3053 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
3054 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance
) = outside_cost
;
3055 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance
) = inside_cost
;
3056 VEC_safe_push (slp_instance
, heap
, LOOP_VINFO_SLP_INSTANCES (loop_vinfo
),
3058 if (vect_print_dump_info (REPORT_SLP
))
3059 vect_print_slp_tree (node
);
3064 /* Failed to SLP. */
3065 /* Free the allocated memory. */
3066 vect_free_slp_tree (node
);
3071 /* SLP failed for this instance, but it is still possible to SLP other stmts
3077 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3078 trees of packed scalar stmts if SLP is possible. */
3081 vect_analyze_slp (loop_vec_info loop_vinfo
)
3084 VEC (tree
, heap
) *strided_stores
= LOOP_VINFO_STRIDED_STORES (loop_vinfo
);
3087 if (vect_print_dump_info (REPORT_SLP
))
3088 fprintf (vect_dump
, "=== vect_analyze_slp ===");
3090 for (i
= 0; VEC_iterate (tree
, strided_stores
, i
, store
); i
++)
3091 if (!vect_analyze_slp_instance (loop_vinfo
, store
))
3093 /* SLP failed. No instance can be SLPed in the loop. */
3094 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3095 fprintf (vect_dump
, "SLP failed.");
3104 /* For each possible SLP instance decide whether to SLP it and calculate overall
3105 unrolling factor needed to SLP the loop. */
3108 vect_make_slp_decision (loop_vec_info loop_vinfo
)
3110 unsigned int i
, unrolling_factor
= 1;
3111 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
3112 slp_instance instance
;
3113 int decided_to_slp
= 0;
3115 if (vect_print_dump_info (REPORT_SLP
))
3116 fprintf (vect_dump
, "=== vect_make_slp_decision ===");
3118 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
3120 /* FORNOW: SLP if you can. */
3121 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
3122 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
3124 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3125 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3126 loop-based vectorization. Such stmts will be marked as HYBRID. */
3127 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
3131 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
3133 if (decided_to_slp
&& vect_print_dump_info (REPORT_SLP
))
3134 fprintf (vect_dump
, "Decided to SLP %d instances. Unrolling factor %d",
3135 decided_to_slp
, unrolling_factor
);
3139 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3140 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3143 vect_detect_hybrid_slp_stmts (slp_tree node
)
3147 imm_use_iterator imm_iter
;
3153 for (i
= 0; VEC_iterate (tree
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
3154 if (PURE_SLP_STMT (vinfo_for_stmt (stmt
))
3155 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) == SSA_NAME
)
3156 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, GIMPLE_STMT_OPERAND (stmt
, 0))
3157 if (vinfo_for_stmt (use_stmt
)
3158 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt
)))
3159 vect_mark_slp_stmts (node
, hybrid
, i
);
3161 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node
));
3162 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node
));
3166 /* Find stmts that must be both vectorized and SLPed. */
3169 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
3172 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
3173 slp_instance instance
;
3175 if (vect_print_dump_info (REPORT_SLP
))
3176 fprintf (vect_dump
, "=== vect_detect_hybrid_slp ===");
3178 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
3179 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
));
3183 /* Function vect_analyze_data_refs.
3185 Find all the data references in the loop.
3187 The general structure of the analysis of data refs in the vectorizer is as
3189 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3190 find and analyze all data-refs in the loop and their dependences.
3191 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3192 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3193 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3198 vect_analyze_data_refs (loop_vec_info loop_vinfo
)
3200 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3202 VEC (data_reference_p
, heap
) *datarefs
;
3203 struct data_reference
*dr
;
3206 if (vect_print_dump_info (REPORT_DETAILS
))
3207 fprintf (vect_dump
, "=== vect_analyze_data_refs ===\n");
3209 compute_data_dependences_for_loop (loop
, true,
3210 &LOOP_VINFO_DATAREFS (loop_vinfo
),
3211 &LOOP_VINFO_DDRS (loop_vinfo
));
3213 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3214 from stmt_vec_info struct to DR and vectype. */
3215 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
3217 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
3220 stmt_vec_info stmt_info
;
3222 tree base
, offset
, init
;
3224 if (!dr
|| !DR_REF (dr
))
3226 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3227 fprintf (vect_dump
, "not vectorized: unhandled data-ref ");
3231 stmt
= DR_STMT (dr
);
3232 stmt_info
= vinfo_for_stmt (stmt
);
3234 /* Check that analysis of the data-ref succeeded. */
3235 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
3238 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3240 fprintf (vect_dump
, "not vectorized: data ref analysis failed ");
3241 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
3246 if (TREE_CODE (DR_BASE_ADDRESS (dr
)) == INTEGER_CST
)
3248 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3249 fprintf (vect_dump
, "not vectorized: base addr of dr is a "
3254 if (!DR_SYMBOL_TAG (dr
))
3256 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3258 fprintf (vect_dump
, "not vectorized: no memory tag for ");
3259 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
3264 base
= unshare_expr (DR_BASE_ADDRESS (dr
));
3265 offset
= unshare_expr (DR_OFFSET (dr
));
3266 init
= unshare_expr (DR_INIT (dr
));
3268 /* Update DR field in stmt_vec_info struct. */
3269 bb
= bb_for_stmt (stmt
);
3271 /* If the dataref is in an inner-loop of the loop that is considered for
3272 for vectorization, we also want to analyze the access relative to
3273 the outer-loop (DR contains information only relative to the
3274 inner-most enclosing loop). We do that by building a reference to the
3275 first location accessed by the inner-loop, and analyze it relative to
3277 if (nested_in_vect_loop_p (loop
, stmt
))
3279 tree outer_step
, outer_base
, outer_init
;
3280 HOST_WIDE_INT pbitsize
, pbitpos
;
3282 enum machine_mode pmode
;
3283 int punsignedp
, pvolatilep
;
3284 affine_iv base_iv
, offset_iv
;
3287 /* Build a reference to the first location accessed by the
3288 inner-loop: *(BASE+INIT). (The first location is actually
3289 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3290 tree inner_base
= build_fold_indirect_ref
3291 (fold_build2 (POINTER_PLUS_EXPR
,
3292 TREE_TYPE (base
), base
,
3293 fold_convert (sizetype
, init
)));
3295 if (vect_print_dump_info (REPORT_DETAILS
))
3297 fprintf (dump_file
, "analyze in outer-loop: ");
3298 print_generic_expr (dump_file
, inner_base
, TDF_SLIM
);
3301 outer_base
= get_inner_reference (inner_base
, &pbitsize
, &pbitpos
,
3302 &poffset
, &pmode
, &punsignedp
, &pvolatilep
, false);
3303 gcc_assert (outer_base
!= NULL_TREE
);
3305 if (pbitpos
% BITS_PER_UNIT
!= 0)
3307 if (vect_print_dump_info (REPORT_DETAILS
))
3308 fprintf (dump_file
, "failed: bit offset alignment.\n");
3312 outer_base
= build_fold_addr_expr (outer_base
);
3313 if (!simple_iv (loop
, stmt
, outer_base
, &base_iv
, false))
3315 if (vect_print_dump_info (REPORT_DETAILS
))
3316 fprintf (dump_file
, "failed: evolution of base is not affine.\n");
3323 poffset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
, poffset
);
3330 offset_iv
.base
= ssize_int (0);
3331 offset_iv
.step
= ssize_int (0);
3333 else if (!simple_iv (loop
, stmt
, poffset
, &offset_iv
, false))
3335 if (vect_print_dump_info (REPORT_DETAILS
))
3336 fprintf (dump_file
, "evolution of offset is not affine.\n");
3340 outer_init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
3341 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
3342 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
3343 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
3344 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
3346 outer_step
= size_binop (PLUS_EXPR
,
3347 fold_convert (ssizetype
, base_iv
.step
),
3348 fold_convert (ssizetype
, offset_iv
.step
));
3350 STMT_VINFO_DR_STEP (stmt_info
) = outer_step
;
3351 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3352 STMT_VINFO_DR_BASE_ADDRESS (stmt_info
) = base_iv
.base
;
3353 STMT_VINFO_DR_INIT (stmt_info
) = outer_init
;
3354 STMT_VINFO_DR_OFFSET (stmt_info
) =
3355 fold_convert (ssizetype
, offset_iv
.base
);
3356 STMT_VINFO_DR_ALIGNED_TO (stmt_info
) =
3357 size_int (highest_pow2_factor (offset_iv
.base
));
3359 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3361 fprintf (dump_file
, "\touter base_address: ");
3362 print_generic_expr (dump_file
, STMT_VINFO_DR_BASE_ADDRESS (stmt_info
), TDF_SLIM
);
3363 fprintf (dump_file
, "\n\touter offset from base address: ");
3364 print_generic_expr (dump_file
, STMT_VINFO_DR_OFFSET (stmt_info
), TDF_SLIM
);
3365 fprintf (dump_file
, "\n\touter constant offset from base address: ");
3366 print_generic_expr (dump_file
, STMT_VINFO_DR_INIT (stmt_info
), TDF_SLIM
);
3367 fprintf (dump_file
, "\n\touter step: ");
3368 print_generic_expr (dump_file
, STMT_VINFO_DR_STEP (stmt_info
), TDF_SLIM
);
3369 fprintf (dump_file
, "\n\touter aligned to: ");
3370 print_generic_expr (dump_file
, STMT_VINFO_DR_ALIGNED_TO (stmt_info
), TDF_SLIM
);
3374 if (STMT_VINFO_DATA_REF (stmt_info
))
3376 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3379 "not vectorized: more than one data ref in stmt: ");
3380 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
3384 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
3386 /* Set vectype for STMT. */
3387 scalar_type
= TREE_TYPE (DR_REF (dr
));
3388 STMT_VINFO_VECTYPE (stmt_info
) =
3389 get_vectype_for_scalar_type (scalar_type
);
3390 if (!STMT_VINFO_VECTYPE (stmt_info
))
3392 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3395 "not vectorized: no vectype for stmt: ");
3396 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
3397 fprintf (vect_dump
, " scalar_type: ");
3398 print_generic_expr (vect_dump
, scalar_type
, TDF_DETAILS
);
3408 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3410 /* Function vect_mark_relevant.
3412 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3415 vect_mark_relevant (VEC(tree
,heap
) **worklist
, tree stmt
,
3416 enum vect_relevant relevant
, bool live_p
)
3418 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3419 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
3420 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
3422 if (vect_print_dump_info (REPORT_DETAILS
))
3423 fprintf (vect_dump
, "mark relevant %d, live %d.", relevant
, live_p
);
3425 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
3429 /* This is the last stmt in a sequence that was detected as a
3430 pattern that can potentially be vectorized. Don't mark the stmt
3431 as relevant/live because it's not going to be vectorized.
3432 Instead mark the pattern-stmt that replaces it. */
3434 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3436 if (vect_print_dump_info (REPORT_DETAILS
))
3437 fprintf (vect_dump
, "last stmt in pattern. don't mark relevant/live.");
3438 stmt_info
= vinfo_for_stmt (pattern_stmt
);
3439 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
3440 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
3441 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
3442 stmt
= pattern_stmt
;
3445 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
3446 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
3447 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
3449 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
3450 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
3452 if (vect_print_dump_info (REPORT_DETAILS
))
3453 fprintf (vect_dump
, "already marked relevant/live.");
3457 VEC_safe_push (tree
, heap
, *worklist
, stmt
);
3461 /* Function vect_stmt_relevant_p.
3463 Return true if STMT in loop that is represented by LOOP_VINFO is
3464 "relevant for vectorization".
3466 A stmt is considered "relevant for vectorization" if:
3467 - it has uses outside the loop.
3468 - it has vdefs (it alters memory).
3469 - control stmts in the loop (except for the exit condition).
3471 CHECKME: what other side effects would the vectorizer allow? */
3474 vect_stmt_relevant_p (tree stmt
, loop_vec_info loop_vinfo
,
3475 enum vect_relevant
*relevant
, bool *live_p
)
3477 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3478 ssa_op_iter op_iter
;
3479 imm_use_iterator imm_iter
;
3480 use_operand_p use_p
;
3481 def_operand_p def_p
;
3483 *relevant
= vect_unused_in_loop
;
3486 /* cond stmt other than loop exit cond. */
3487 if (is_ctrl_stmt (stmt
)
3488 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
)) != loop_exit_ctrl_vec_info_type
)
3489 *relevant
= vect_used_in_loop
;
3491 /* changing memory. */
3492 if (TREE_CODE (stmt
) != PHI_NODE
)
3493 if (!ZERO_SSA_OPERANDS (stmt
, SSA_OP_VIRTUAL_DEFS
))
3495 if (vect_print_dump_info (REPORT_DETAILS
))
3496 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
3497 *relevant
= vect_used_in_loop
;
3500 /* uses outside the loop. */
3501 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
3503 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
3505 basic_block bb
= bb_for_stmt (USE_STMT (use_p
));
3506 if (!flow_bb_inside_loop_p (loop
, bb
))
3508 if (vect_print_dump_info (REPORT_DETAILS
))
3509 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
3511 /* We expect all such uses to be in the loop exit phis
3512 (because of loop closed form) */
3513 gcc_assert (TREE_CODE (USE_STMT (use_p
)) == PHI_NODE
);
3514 gcc_assert (bb
== single_exit (loop
)->dest
);
3521 return (*live_p
|| *relevant
);
3526 Function process_use.
3529 - a USE in STMT in a loop represented by LOOP_VINFO
3530 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3531 that defined USE. This is dont by calling mark_relevant and passing it
3532 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
3535 Generally, LIVE_P and RELEVANT are used to define the liveness and
3536 relevance info of the DEF_STMT of this USE:
3537 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3538 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3540 - case 1: If USE is used only for address computations (e.g. array indexing),
3541 which does not need to be directly vectorized, then the liveness/relevance
3542 of the respective DEF_STMT is left unchanged.
3543 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3544 skip DEF_STMT cause it had already been processed.
3545 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3546 be modified accordingly.
3548 Return true if everything is as expected. Return false otherwise. */
3551 process_use (tree stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
3552 enum vect_relevant relevant
, VEC(tree
,heap
) **worklist
)
3554 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3555 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3556 stmt_vec_info dstmt_vinfo
;
3557 basic_block bb
, def_bb
;
3559 enum vect_def_type dt
;
3561 /* case 1: we are only interested in uses that need to be vectorized. Uses
3562 that are used for address computation are not considered relevant. */
3563 if (!exist_non_indexing_operands_for_use_p (use
, stmt
))
3566 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &def
, &dt
))
3568 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3569 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
3573 if (!def_stmt
|| IS_EMPTY_STMT (def_stmt
))
3576 def_bb
= bb_for_stmt (def_stmt
);
3577 if (!flow_bb_inside_loop_p (loop
, def_bb
))
3579 if (vect_print_dump_info (REPORT_DETAILS
))
3580 fprintf (vect_dump
, "def_stmt is out of loop.");
3584 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3585 DEF_STMT must have already been processed, because this should be the
3586 only way that STMT, which is a reduction-phi, was put in the worklist,
3587 as there should be no other uses for DEF_STMT in the loop. So we just
3588 check that everything is as expected, and we are done. */
3589 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
3590 bb
= bb_for_stmt (stmt
);
3591 if (TREE_CODE (stmt
) == PHI_NODE
3592 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
3593 && TREE_CODE (def_stmt
) != PHI_NODE
3594 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
3595 && bb
->loop_father
== def_bb
->loop_father
)
3597 if (vect_print_dump_info (REPORT_DETAILS
))
3598 fprintf (vect_dump
, "reduc-stmt defining reduc-phi in the same nest.");
3599 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
3600 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
3601 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
3602 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
3603 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_loop
);
3607 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3608 outer-loop-header-bb:
3614 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
3616 if (vect_print_dump_info (REPORT_DETAILS
))
3617 fprintf (vect_dump
, "outer-loop def-stmt defining inner-loop stmt.");
3620 case vect_unused_in_loop
:
3621 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
) ?
3622 vect_used_by_reduction
: vect_unused_in_loop
;
3624 case vect_used_in_outer_by_reduction
:
3625 relevant
= vect_used_by_reduction
;
3627 case vect_used_in_outer
:
3628 relevant
= vect_used_in_loop
;
3630 case vect_used_by_reduction
:
3631 case vect_used_in_loop
:
3639 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3640 outer-loop-header-bb:
3646 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
3648 if (vect_print_dump_info (REPORT_DETAILS
))
3649 fprintf (vect_dump
, "inner-loop def-stmt defining outer-loop stmt.");
3652 case vect_unused_in_loop
:
3653 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
) ?
3654 vect_used_in_outer_by_reduction
: vect_unused_in_loop
;
3657 case vect_used_in_outer_by_reduction
:
3658 case vect_used_in_outer
:
3661 case vect_used_by_reduction
:
3662 relevant
= vect_used_in_outer_by_reduction
;
3665 case vect_used_in_loop
:
3666 relevant
= vect_used_in_outer
;
3674 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
);
3679 /* Function vect_mark_stmts_to_be_vectorized.
3681 Not all stmts in the loop need to be vectorized. For example:
3690 Stmt 1 and 3 do not need to be vectorized, because loop control and
3691 addressing of vectorized data-refs are handled differently.
3693 This pass detects such stmts. */
3696 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
3698 VEC(tree
,heap
) *worklist
;
3699 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3700 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3701 unsigned int nbbs
= loop
->num_nodes
;
3702 block_stmt_iterator si
;
3706 stmt_vec_info stmt_vinfo
;
3710 enum vect_relevant relevant
;
3712 if (vect_print_dump_info (REPORT_DETAILS
))
3713 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
3715 worklist
= VEC_alloc (tree
, heap
, 64);
3717 /* 1. Init worklist. */
3718 for (i
= 0; i
< nbbs
; i
++)
3721 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
3723 if (vect_print_dump_info (REPORT_DETAILS
))
3725 fprintf (vect_dump
, "init: phi relevant? ");
3726 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
3729 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
3730 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
3732 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
3734 stmt
= bsi_stmt (si
);
3735 if (vect_print_dump_info (REPORT_DETAILS
))
3737 fprintf (vect_dump
, "init: stmt relevant? ");
3738 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
3741 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
3742 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
3746 /* 2. Process_worklist */
3747 while (VEC_length (tree
, worklist
) > 0)
3749 use_operand_p use_p
;
3752 stmt
= VEC_pop (tree
, worklist
);
3753 if (vect_print_dump_info (REPORT_DETAILS
))
3755 fprintf (vect_dump
, "worklist: examine stmt: ");
3756 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
3759 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3760 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3761 liveness and relevance properties of STMT. */
3762 ann
= stmt_ann (stmt
);
3763 stmt_vinfo
= vinfo_for_stmt (stmt
);
3764 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
3765 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
3767 /* Generally, the liveness and relevance properties of STMT are
3768 propagated as is to the DEF_STMTs of its USEs:
3769 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3770 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3772 One exception is when STMT has been identified as defining a reduction
3773 variable; in this case we set the liveness/relevance as follows:
3775 relevant = vect_used_by_reduction
3776 This is because we distinguish between two kinds of relevant stmts -
3777 those that are used by a reduction computation, and those that are
3778 (also) used by a regular computation. This allows us later on to
3779 identify stmts that are used solely by a reduction, and therefore the
3780 order of the results that they produce does not have to be kept.
3782 Reduction phis are expected to be used by a reduction stmt, or by
3783 in an outer loop; Other reduction stmts are expected to be
3784 in the loop, and possibly used by a stmt in an outer loop.
3785 Here are the expected values of "relevant" for reduction phis/stmts:
3788 vect_unused_in_loop ok
3789 vect_used_in_outer_by_reduction ok ok
3790 vect_used_in_outer ok ok
3791 vect_used_by_reduction ok
3792 vect_used_in_loop */
3794 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
)
3796 enum vect_relevant tmp_relevant
= relevant
;
3797 switch (tmp_relevant
)
3799 case vect_unused_in_loop
:
3800 gcc_assert (TREE_CODE (stmt
) != PHI_NODE
);
3801 relevant
= vect_used_by_reduction
;
3804 case vect_used_in_outer_by_reduction
:
3805 case vect_used_in_outer
:
3806 gcc_assert (TREE_CODE (stmt
) != WIDEN_SUM_EXPR
3807 && TREE_CODE (stmt
) != DOT_PROD_EXPR
);
3810 case vect_used_by_reduction
:
3811 if (TREE_CODE (stmt
) == PHI_NODE
)
3814 case vect_used_in_loop
:
3816 if (vect_print_dump_info (REPORT_DETAILS
))
3817 fprintf (vect_dump
, "unsupported use of reduction.");
3818 VEC_free (tree
, heap
, worklist
);
3824 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
3826 tree op
= USE_FROM_PTR (use_p
);
3827 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
, &worklist
))
3829 VEC_free (tree
, heap
, worklist
);
3833 } /* while worklist */
3835 VEC_free (tree
, heap
, worklist
);
3840 /* Function vect_can_advance_ivs_p
3842 In case the number of iterations that LOOP iterates is unknown at compile
3843 time, an epilog loop will be generated, and the loop induction variables
3844 (IVs) will be "advanced" to the value they are supposed to take just before
3845 the epilog loop. Here we check that the access function of the loop IVs
3846 and the expression that represents the loop bound are simple enough.
3847 These restrictions will be relaxed in the future. */
3850 vect_can_advance_ivs_p (loop_vec_info loop_vinfo
)
3852 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3853 basic_block bb
= loop
->header
;
3856 /* Analyze phi functions of the loop header. */
3858 if (vect_print_dump_info (REPORT_DETAILS
))
3859 fprintf (vect_dump
, "vect_can_advance_ivs_p:");
3861 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
3863 tree access_fn
= NULL
;
3864 tree evolution_part
;
3866 if (vect_print_dump_info (REPORT_DETAILS
))
3868 fprintf (vect_dump
, "Analyze phi: ");
3869 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
3872 /* Skip virtual phi's. The data dependences that are associated with
3873 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3875 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
3877 if (vect_print_dump_info (REPORT_DETAILS
))
3878 fprintf (vect_dump
, "virtual phi. skip.");
3882 /* Skip reduction phis. */
3884 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
3886 if (vect_print_dump_info (REPORT_DETAILS
))
3887 fprintf (vect_dump
, "reduc phi. skip.");
3891 /* Analyze the evolution function. */
3893 access_fn
= instantiate_parameters
3894 (loop
, analyze_scalar_evolution (loop
, PHI_RESULT (phi
)));
3898 if (vect_print_dump_info (REPORT_DETAILS
))
3899 fprintf (vect_dump
, "No Access function.");
3903 if (vect_print_dump_info (REPORT_DETAILS
))
3905 fprintf (vect_dump
, "Access function of PHI: ");
3906 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
3909 evolution_part
= evolution_part_in_loop_num (access_fn
, loop
->num
);
3911 if (evolution_part
== NULL_TREE
)
3913 if (vect_print_dump_info (REPORT_DETAILS
))
3914 fprintf (vect_dump
, "No evolution.");
3918 /* FORNOW: We do not transform initial conditions of IVs
3919 which evolution functions are a polynomial of degree >= 2. */
3921 if (tree_is_chrec (evolution_part
))
3929 /* Function vect_get_loop_niters.
3931 Determine how many iterations the loop is executed.
3932 If an expression that represents the number of iterations
3933 can be constructed, place it in NUMBER_OF_ITERATIONS.
3934 Return the loop exit condition. */
3937 vect_get_loop_niters (struct loop
*loop
, tree
*number_of_iterations
)
3941 if (vect_print_dump_info (REPORT_DETAILS
))
3942 fprintf (vect_dump
, "=== get_loop_niters ===");
3944 niters
= number_of_exit_cond_executions (loop
);
3946 if (niters
!= NULL_TREE
3947 && niters
!= chrec_dont_know
)
3949 *number_of_iterations
= niters
;
3951 if (vect_print_dump_info (REPORT_DETAILS
))
3953 fprintf (vect_dump
, "==> get_loop_niters:" );
3954 print_generic_expr (vect_dump
, *number_of_iterations
, TDF_SLIM
);
3958 return get_loop_exit_condition (loop
);
3962 /* Function vect_analyze_loop_1.
3964 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3965 for it. The different analyses will record information in the
3966 loop_vec_info struct. This is a subset of the analyses applied in
3967 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3968 that is now considered for (outer-loop) vectorization. */
3970 static loop_vec_info
3971 vect_analyze_loop_1 (struct loop
*loop
)
3973 loop_vec_info loop_vinfo
;
3975 if (vect_print_dump_info (REPORT_DETAILS
))
3976 fprintf (vect_dump
, "===== analyze_loop_nest_1 =====");
3978 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3980 loop_vinfo
= vect_analyze_loop_form (loop
);
3983 if (vect_print_dump_info (REPORT_DETAILS
))
3984 fprintf (vect_dump
, "bad inner-loop form.");
3992 /* Function vect_analyze_loop_form.
3994 Verify that certain CFG restrictions hold, including:
3995 - the loop has a pre-header
3996 - the loop has a single entry and exit
3997 - the loop exit condition is simple enough, and the number of iterations
3998 can be analyzed (a countable loop). */
4001 vect_analyze_loop_form (struct loop
*loop
)
4003 loop_vec_info loop_vinfo
;
4005 tree number_of_iterations
= NULL
;
4006 loop_vec_info inner_loop_vinfo
= NULL
;
4008 if (vect_print_dump_info (REPORT_DETAILS
))
4009 fprintf (vect_dump
, "=== vect_analyze_loop_form ===");
4011 /* Different restrictions apply when we are considering an inner-most loop,
4012 vs. an outer (nested) loop.
4013 (FORNOW. May want to relax some of these restrictions in the future). */
4017 /* Inner-most loop. We currently require that the number of BBs is
4018 exactly 2 (the header and latch). Vectorizable inner-most loops
4029 if (loop
->num_nodes
!= 2)
4031 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4032 fprintf (vect_dump
, "not vectorized: too many BBs in loop.");
4036 if (empty_block_p (loop
->header
))
4038 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4039 fprintf (vect_dump
, "not vectorized: empty loop.");
4045 struct loop
*innerloop
= loop
->inner
;
4046 edge backedge
, entryedge
;
4048 /* Nested loop. We currently require that the loop is doubly-nested,
4049 contains a single inner loop, and the number of BBs is exactly 5.
4050 Vectorizable outer-loops look like this:
4062 The inner-loop has the properties expected of inner-most loops
4063 as described above. */
4065 if ((loop
->inner
)->inner
|| (loop
->inner
)->next
)
4067 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4068 fprintf (vect_dump
, "not vectorized: multiple nested loops.");
4072 /* Analyze the inner-loop. */
4073 inner_loop_vinfo
= vect_analyze_loop_1 (loop
->inner
);
4074 if (!inner_loop_vinfo
)
4076 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4077 fprintf (vect_dump
, "not vectorized: Bad inner loop.");
4081 if (!expr_invariant_in_loop_p (loop
,
4082 LOOP_VINFO_NITERS (inner_loop_vinfo
)))
4084 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4086 "not vectorized: inner-loop count not invariant.");
4087 destroy_loop_vec_info (inner_loop_vinfo
, true);
4091 if (loop
->num_nodes
!= 5)
4093 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4094 fprintf (vect_dump
, "not vectorized: too many BBs in loop.");
4095 destroy_loop_vec_info (inner_loop_vinfo
, true);
4099 gcc_assert (EDGE_COUNT (innerloop
->header
->preds
) == 2);
4100 backedge
= EDGE_PRED (innerloop
->header
, 1);
4101 entryedge
= EDGE_PRED (innerloop
->header
, 0);
4102 if (EDGE_PRED (innerloop
->header
, 0)->src
== innerloop
->latch
)
4104 backedge
= EDGE_PRED (innerloop
->header
, 0);
4105 entryedge
= EDGE_PRED (innerloop
->header
, 1);
4108 if (entryedge
->src
!= loop
->header
4109 || !single_exit (innerloop
)
4110 || single_exit (innerloop
)->dest
!= EDGE_PRED (loop
->latch
, 0)->src
)
4112 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4113 fprintf (vect_dump
, "not vectorized: unsupported outerloop form.");
4114 destroy_loop_vec_info (inner_loop_vinfo
, true);
4118 if (vect_print_dump_info (REPORT_DETAILS
))
4119 fprintf (vect_dump
, "Considering outer-loop vectorization.");
4122 if (!single_exit (loop
)
4123 || EDGE_COUNT (loop
->header
->preds
) != 2)
4125 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4127 if (!single_exit (loop
))
4128 fprintf (vect_dump
, "not vectorized: multiple exits.");
4129 else if (EDGE_COUNT (loop
->header
->preds
) != 2)
4130 fprintf (vect_dump
, "not vectorized: too many incoming edges.");
4132 if (inner_loop_vinfo
)
4133 destroy_loop_vec_info (inner_loop_vinfo
, true);
4137 /* We assume that the loop exit condition is at the end of the loop. i.e,
4138 that the loop is represented as a do-while (with a proper if-guard
4139 before the loop if needed), where the loop header contains all the
4140 executable statements, and the latch is empty. */
4141 if (!empty_block_p (loop
->latch
)
4142 || phi_nodes (loop
->latch
))
4144 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4145 fprintf (vect_dump
, "not vectorized: unexpected loop form.");
4146 if (inner_loop_vinfo
)
4147 destroy_loop_vec_info (inner_loop_vinfo
, true);
4151 /* Make sure there exists a single-predecessor exit bb: */
4152 if (!single_pred_p (single_exit (loop
)->dest
))
4154 edge e
= single_exit (loop
);
4155 if (!(e
->flags
& EDGE_ABNORMAL
))
4157 split_loop_exit_edge (e
);
4158 if (vect_print_dump_info (REPORT_DETAILS
))
4159 fprintf (vect_dump
, "split exit edge.");
4163 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4164 fprintf (vect_dump
, "not vectorized: abnormal loop exit edge.");
4165 if (inner_loop_vinfo
)
4166 destroy_loop_vec_info (inner_loop_vinfo
, true);
4171 loop_cond
= vect_get_loop_niters (loop
, &number_of_iterations
);
4174 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4175 fprintf (vect_dump
, "not vectorized: complicated exit condition.");
4176 if (inner_loop_vinfo
)
4177 destroy_loop_vec_info (inner_loop_vinfo
, true);
4181 if (!number_of_iterations
)
4183 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4185 "not vectorized: number of iterations cannot be computed.");
4186 if (inner_loop_vinfo
)
4187 destroy_loop_vec_info (inner_loop_vinfo
, true);
4191 if (chrec_contains_undetermined (number_of_iterations
))
4193 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4194 fprintf (vect_dump
, "Infinite number of iterations.");
4195 if (inner_loop_vinfo
)
4196 destroy_loop_vec_info (inner_loop_vinfo
, true);
4200 if (!NITERS_KNOWN_P (number_of_iterations
))
4202 if (vect_print_dump_info (REPORT_DETAILS
))
4204 fprintf (vect_dump
, "Symbolic number of iterations is ");
4205 print_generic_expr (vect_dump
, number_of_iterations
, TDF_DETAILS
);
4208 else if (TREE_INT_CST_LOW (number_of_iterations
) == 0)
4210 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
4211 fprintf (vect_dump
, "not vectorized: number of iterations = 0.");
4212 if (inner_loop_vinfo
)
4213 destroy_loop_vec_info (inner_loop_vinfo
, false);
4217 loop_vinfo
= new_loop_vec_info (loop
);
4218 LOOP_VINFO_NITERS (loop_vinfo
) = number_of_iterations
;
4220 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond
)) = loop_exit_ctrl_vec_info_type
;
4222 /* CHECKME: May want to keep it around it in the future. */
4223 if (inner_loop_vinfo
)
4224 destroy_loop_vec_info (inner_loop_vinfo
, false);
4226 gcc_assert (!loop
->aux
);
4227 loop
->aux
= loop_vinfo
;
4232 /* Function vect_analyze_loop.
4234 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4235 for it. The different analyses will record information in the
4236 loop_vec_info struct. */
4238 vect_analyze_loop (struct loop
*loop
)
4241 loop_vec_info loop_vinfo
;
4243 if (vect_print_dump_info (REPORT_DETAILS
))
4244 fprintf (vect_dump
, "===== analyze_loop_nest =====");
4246 if (loop_outer (loop
)
4247 && loop_vec_info_for_loop (loop_outer (loop
))
4248 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop
))))
4250 if (vect_print_dump_info (REPORT_DETAILS
))
4251 fprintf (vect_dump
, "outer-loop already vectorized.");
4255 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4257 loop_vinfo
= vect_analyze_loop_form (loop
);
4260 if (vect_print_dump_info (REPORT_DETAILS
))
4261 fprintf (vect_dump
, "bad loop form.");
4265 /* Find all data references in the loop (which correspond to vdefs/vuses)
4266 and analyze their evolution in the loop.
4268 FORNOW: Handle only simple, array references, which
4269 alignment can be forced, and aligned pointer-references. */
4271 ok
= vect_analyze_data_refs (loop_vinfo
);
4274 if (vect_print_dump_info (REPORT_DETAILS
))
4275 fprintf (vect_dump
, "bad data references.");
4276 destroy_loop_vec_info (loop_vinfo
, true);
4280 /* Classify all cross-iteration scalar data-flow cycles.
4281 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4283 vect_analyze_scalar_cycles (loop_vinfo
);
4285 vect_pattern_recog (loop_vinfo
);
4287 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4289 ok
= vect_mark_stmts_to_be_vectorized (loop_vinfo
);
4292 if (vect_print_dump_info (REPORT_DETAILS
))
4293 fprintf (vect_dump
, "unexpected pattern.");
4294 destroy_loop_vec_info (loop_vinfo
, true);
4298 /* Analyze the alignment of the data-refs in the loop.
4299 Fail if a data reference is found that cannot be vectorized. */
4301 ok
= vect_analyze_data_refs_alignment (loop_vinfo
);
4304 if (vect_print_dump_info (REPORT_DETAILS
))
4305 fprintf (vect_dump
, "bad data alignment.");
4306 destroy_loop_vec_info (loop_vinfo
, true);
4310 ok
= vect_determine_vectorization_factor (loop_vinfo
);
4313 if (vect_print_dump_info (REPORT_DETAILS
))
4314 fprintf (vect_dump
, "can't determine vectorization factor.");
4315 destroy_loop_vec_info (loop_vinfo
, true);
4319 /* Analyze data dependences between the data-refs in the loop.
4320 FORNOW: fail at the first data dependence that we encounter. */
4322 ok
= vect_analyze_data_ref_dependences (loop_vinfo
);
4325 if (vect_print_dump_info (REPORT_DETAILS
))
4326 fprintf (vect_dump
, "bad data dependence.");
4327 destroy_loop_vec_info (loop_vinfo
, true);
4331 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4332 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4334 ok
= vect_analyze_data_ref_accesses (loop_vinfo
);
4337 if (vect_print_dump_info (REPORT_DETAILS
))
4338 fprintf (vect_dump
, "bad data access.");
4339 destroy_loop_vec_info (loop_vinfo
, true);
4343 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4344 It is important to call pruning after vect_analyze_data_ref_accesses,
4345 since we use grouping information gathered by interleaving analysis. */
4346 ok
= vect_prune_runtime_alias_test_list (loop_vinfo
);
4349 if (vect_print_dump_info (REPORT_DETAILS
))
4350 fprintf (vect_dump
, "too long list of versioning for alias "
4352 destroy_loop_vec_info (loop_vinfo
, true);
4356 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4357 ok
= vect_analyze_slp (loop_vinfo
);
4360 /* Decide which possible SLP instances to SLP. */
4361 vect_make_slp_decision (loop_vinfo
);
4363 /* Find stmts that need to be both vectorized and SLPed. */
4364 vect_detect_hybrid_slp (loop_vinfo
);
4367 /* This pass will decide on using loop versioning and/or loop peeling in
4368 order to enhance the alignment of data references in the loop. */
4370 ok
= vect_enhance_data_refs_alignment (loop_vinfo
);
4373 if (vect_print_dump_info (REPORT_DETAILS
))
4374 fprintf (vect_dump
, "bad data alignment.");
4375 destroy_loop_vec_info (loop_vinfo
, true);
4379 /* Scan all the operations in the loop and make sure they are
4382 ok
= vect_analyze_operations (loop_vinfo
);
4385 if (vect_print_dump_info (REPORT_DETAILS
))
4386 fprintf (vect_dump
, "bad operation or unsupported loop bound.");
4387 destroy_loop_vec_info (loop_vinfo
, true);
4391 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
) = 1;