1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
44 /* Main analysis functions. */
45 static loop_vec_info
vect_analyze_loop_form (struct loop
*);
46 static bool vect_analyze_data_refs (loop_vec_info
);
47 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info
);
48 static void vect_analyze_scalar_cycles (loop_vec_info
);
49 static bool vect_analyze_data_ref_accesses (loop_vec_info
);
50 static bool vect_analyze_data_ref_dependences (loop_vec_info
);
51 static bool vect_analyze_data_refs_alignment (loop_vec_info
);
52 static bool vect_compute_data_refs_alignment (loop_vec_info
);
53 static bool vect_enhance_data_refs_alignment (loop_vec_info
);
54 static bool vect_analyze_operations (loop_vec_info
);
55 static bool vect_determine_vectorization_factor (loop_vec_info
);
57 /* Utility functions for the analyses. */
58 static bool exist_non_indexing_operands_for_use_p (tree
, tree
);
59 static tree
vect_get_loop_niters (struct loop
*, tree
*);
60 static bool vect_analyze_data_ref_dependence
61 (struct data_dependence_relation
*, loop_vec_info
);
62 static bool vect_compute_data_ref_alignment (struct data_reference
*);
63 static bool vect_analyze_data_ref_access (struct data_reference
*);
64 static bool vect_can_advance_ivs_p (loop_vec_info
);
65 static void vect_update_misalignment_for_peel
66 (struct data_reference
*, struct data_reference
*, int npeel
);
68 /* Function vect_determine_vectorization_factor
70 Determine the vectorization factor (VF). VF is the number of data elements
71 that are operated upon in parallel in a single iteration of the vectorized
72 loop. For example, when vectorizing a loop that operates on 4byte elements,
73 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
74 elements can fit in a single vector register.
76 We currently support vectorization of loops in which all types operated upon
77 are of the same size. Therefore this function currently sets VF according to
78 the size of the types operated upon, and fails if there are multiple sizes
81 VF is also the factor by which the loop iterations are strip-mined, e.g.:
88 for (i=0; i<N; i+=VF){
89 a[i:VF] = b[i:VF] + c[i:VF];
94 vect_determine_vectorization_factor (loop_vec_info loop_vinfo
)
96 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
97 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
98 int nbbs
= loop
->num_nodes
;
99 block_stmt_iterator si
;
100 unsigned int vectorization_factor
= 0;
105 stmt_vec_info stmt_info
;
108 if (vect_print_dump_info (REPORT_DETAILS
))
109 fprintf (vect_dump
, "=== vect_determine_vectorization_factor ===");
111 for (i
= 0; i
< nbbs
; i
++)
113 basic_block bb
= bbs
[i
];
115 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
117 stmt_info
= vinfo_for_stmt (phi
);
118 if (vect_print_dump_info (REPORT_DETAILS
))
120 fprintf (vect_dump
, "==> examining phi: ");
121 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
124 gcc_assert (stmt_info
);
126 if (STMT_VINFO_RELEVANT_P (stmt_info
))
128 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info
));
129 scalar_type
= TREE_TYPE (PHI_RESULT (phi
));
131 if (vect_print_dump_info (REPORT_DETAILS
))
133 fprintf (vect_dump
, "get vectype for scalar type: ");
134 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
137 vectype
= get_vectype_for_scalar_type (scalar_type
);
140 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
143 "not vectorized: unsupported data-type ");
144 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
148 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
150 if (vect_print_dump_info (REPORT_DETAILS
))
152 fprintf (vect_dump
, "vectype: ");
153 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
156 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
157 if (vect_print_dump_info (REPORT_DETAILS
))
158 fprintf (vect_dump
, "nunits = %d", nunits
);
160 if (!vectorization_factor
161 || (nunits
> vectorization_factor
))
162 vectorization_factor
= nunits
;
166 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
168 tree stmt
= bsi_stmt (si
);
169 stmt_info
= vinfo_for_stmt (stmt
);
171 if (vect_print_dump_info (REPORT_DETAILS
))
173 fprintf (vect_dump
, "==> examining statement: ");
174 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
177 gcc_assert (stmt_info
);
179 /* skip stmts which do not need to be vectorized. */
180 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
181 && !STMT_VINFO_LIVE_P (stmt_info
))
183 if (vect_print_dump_info (REPORT_DETAILS
))
184 fprintf (vect_dump
, "skip.");
188 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
190 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
192 fprintf (vect_dump
, "not vectorized: irregular stmt.");
193 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
198 if (!GIMPLE_STMT_P (stmt
)
199 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))))
201 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
203 fprintf (vect_dump
, "not vectorized: vector stmt in loop:");
204 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
209 if (STMT_VINFO_VECTYPE (stmt_info
))
211 /* The only case when a vectype had been already set is for stmts
212 that contain a dataref, or for "pattern-stmts" (stmts generated
213 by the vectorizer to represent/replace a certain idiom). */
214 gcc_assert (STMT_VINFO_DATA_REF (stmt_info
)
215 || is_pattern_stmt_p (stmt_info
));
216 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
222 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info
)
223 && !is_pattern_stmt_p (stmt_info
));
225 /* We generally set the vectype according to the type of the
227 For stmts whose result-type is different than the type of the
228 arguments (e.g. demotion, promotion), vectype will be reset
229 appropriately (later). Note that we have to visit the smallest
230 datatype in this function, because that determines the VF.
231 If the smallest datatype in the loop is present only as the
232 rhs of a promotion operation - we'd miss it here.
233 Such a case, where a variable of this datatype does not appear
234 in the lhs anywhere in the loop, can only occur if it's an
235 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
236 to have been optimized away by invariant motion. However, we
237 cannot rely on invariant motion to always take invariants out
238 of the loop, and so in the case of promotion we also have to
240 scalar_type
= TREE_TYPE (GIMPLE_STMT_OPERAND (stmt
, 0));
242 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
243 if (TREE_CODE (operation
) == NOP_EXPR
244 || TREE_CODE (operation
) == CONVERT_EXPR
245 || TREE_CODE (operation
) == WIDEN_MULT_EXPR
)
247 tree rhs_type
= TREE_TYPE (TREE_OPERAND (operation
, 0));
248 if (TYPE_SIZE_UNIT (rhs_type
) < TYPE_SIZE_UNIT (scalar_type
))
249 scalar_type
= TREE_TYPE (TREE_OPERAND (operation
, 0));
252 if (vect_print_dump_info (REPORT_DETAILS
))
254 fprintf (vect_dump
, "get vectype for scalar type: ");
255 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
258 vectype
= get_vectype_for_scalar_type (scalar_type
);
261 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
264 "not vectorized: unsupported data-type ");
265 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
269 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
272 if (vect_print_dump_info (REPORT_DETAILS
))
274 fprintf (vect_dump
, "vectype: ");
275 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
278 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
279 if (vect_print_dump_info (REPORT_DETAILS
))
280 fprintf (vect_dump
, "nunits = %d", nunits
);
282 if (!vectorization_factor
283 || (nunits
> vectorization_factor
))
284 vectorization_factor
= nunits
;
289 /* TODO: Analyze cost. Decide if worth while to vectorize. */
290 if (vect_print_dump_info (REPORT_DETAILS
))
291 fprintf (vect_dump
, "vectorization factor = %d", vectorization_factor
);
292 if (vectorization_factor
<= 1)
294 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
295 fprintf (vect_dump
, "not vectorized: unsupported data-type");
298 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
304 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
305 the number of created vector stmts depends on the unrolling factor). However,
306 the actual number of vector stmts for every SLP node depends on VF which is
307 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
308 In this function we assume that the inside costs calculated in
309 vect_model_xxx_cost are linear in ncopies. */
312 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo
)
314 unsigned int i
, vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
315 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
316 slp_instance instance
;
318 if (vect_print_dump_info (REPORT_SLP
))
319 fprintf (vect_dump
, "=== vect_update_slp_costs_according_to_vf ===");
321 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
322 /* We assume that costs are linear in ncopies. */
323 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
) *= vf
324 / SLP_INSTANCE_UNROLLING_FACTOR (instance
);
328 /* Function vect_analyze_operations.
330 Scan the loop stmts and make sure they are all vectorizable. */
333 vect_analyze_operations (loop_vec_info loop_vinfo
)
335 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
336 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
337 int nbbs
= loop
->num_nodes
;
338 block_stmt_iterator si
;
339 unsigned int vectorization_factor
= 0;
343 stmt_vec_info stmt_info
;
344 bool need_to_vectorize
= false;
345 int min_profitable_iters
;
346 int min_scalar_loop_bound
;
348 bool only_slp_in_loop
= true;
350 if (vect_print_dump_info (REPORT_DETAILS
))
351 fprintf (vect_dump
, "=== vect_analyze_operations ===");
353 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
354 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
356 for (i
= 0; i
< nbbs
; i
++)
358 basic_block bb
= bbs
[i
];
360 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
364 stmt_info
= vinfo_for_stmt (phi
);
365 if (vect_print_dump_info (REPORT_DETAILS
))
367 fprintf (vect_dump
, "examining phi: ");
368 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
371 if (! is_loop_header_bb_p (bb
))
373 /* inner-loop loop-closed exit phi in outer-loop vectorization
374 (i.e. a phi in the tail of the outer-loop).
375 FORNOW: we currently don't support the case that these phis
376 are not used in the outerloop, cause this case requires
377 to actually do something here. */
378 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
379 || STMT_VINFO_LIVE_P (stmt_info
))
381 if (vect_print_dump_info (REPORT_DETAILS
))
383 "Unsupported loop-closed phi in outer-loop.");
389 gcc_assert (stmt_info
);
391 if (STMT_VINFO_LIVE_P (stmt_info
))
393 /* FORNOW: not yet supported. */
394 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
395 fprintf (vect_dump
, "not vectorized: value used after loop.");
399 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_loop
400 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_induction_def
)
402 /* A scalar-dependence cycle that we don't support. */
403 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
404 fprintf (vect_dump
, "not vectorized: scalar dependence cycle.");
408 if (STMT_VINFO_RELEVANT_P (stmt_info
))
410 need_to_vectorize
= true;
411 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
412 ok
= vectorizable_induction (phi
, NULL
, NULL
);
417 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
420 "not vectorized: relevant phi not supported: ");
421 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
427 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
429 tree stmt
= bsi_stmt (si
);
430 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
431 enum vect_def_type relevance
= STMT_VINFO_RELEVANT (stmt_info
);
433 if (vect_print_dump_info (REPORT_DETAILS
))
435 fprintf (vect_dump
, "==> examining statement: ");
436 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
439 gcc_assert (stmt_info
);
441 /* skip stmts which do not need to be vectorized.
442 this is expected to include:
443 - the COND_EXPR which is the loop exit condition
444 - any LABEL_EXPRs in the loop
445 - computations that are used only for array indexing or loop
448 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
449 && !STMT_VINFO_LIVE_P (stmt_info
))
451 if (vect_print_dump_info (REPORT_DETAILS
))
452 fprintf (vect_dump
, "irrelevant.");
456 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
461 case vect_reduction_def
:
462 gcc_assert (relevance
== vect_used_in_outer
463 || relevance
== vect_used_in_outer_by_reduction
464 || relevance
== vect_unused_in_loop
);
467 case vect_induction_def
:
468 case vect_constant_def
:
469 case vect_invariant_def
:
470 case vect_unknown_def_type
:
475 if (STMT_VINFO_RELEVANT_P (stmt_info
))
477 gcc_assert (GIMPLE_STMT_P (stmt
)
478 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))));
479 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
480 need_to_vectorize
= true;
483 ok
= (vectorizable_type_promotion (stmt
, NULL
, NULL
)
484 || vectorizable_type_demotion (stmt
, NULL
, NULL
)
485 || vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
486 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
487 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
488 || vectorizable_load (stmt
, NULL
, NULL
, NULL
)
489 || vectorizable_call (stmt
, NULL
, NULL
)
490 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
491 || vectorizable_condition (stmt
, NULL
, NULL
)
492 || vectorizable_reduction (stmt
, NULL
, NULL
));
494 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
495 need extra handling, except for vectorizable reductions. */
496 if (STMT_VINFO_LIVE_P (stmt_info
)
497 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
498 ok
|= vectorizable_live_operation (stmt
, NULL
, NULL
);
502 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
504 fprintf (vect_dump
, "not vectorized: stmt not supported: ");
505 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
510 if (!PURE_SLP_STMT (stmt_info
))
512 /* STMT needs loop-based vectorization. */
513 only_slp_in_loop
= false;
515 /* Groups of strided accesses whose size is not a power of 2 are
516 not vectorizable yet using loop-vectorization. Therefore, if
517 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
518 both SLPed and loop-based vectorzed), the loop cannot be
520 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
521 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
522 DR_GROUP_FIRST_DR (stmt_info
)))) == -1)
524 if (vect_print_dump_info (REPORT_DETAILS
))
526 fprintf (vect_dump
, "not vectorized: the size of group "
527 "of strided accesses is not a power of 2");
528 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
536 /* All operations in the loop are either irrelevant (deal with loop
537 control, or dead), or only used outside the loop and can be moved
538 out of the loop (e.g. invariants, inductions). The loop can be
539 optimized away by scalar optimizations. We're better off not
540 touching this loop. */
541 if (!need_to_vectorize
)
543 if (vect_print_dump_info (REPORT_DETAILS
))
545 "All the computation can be taken out of the loop.");
546 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
548 "not vectorized: redundant loop. no profit to vectorize.");
552 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
553 vectorization factor of the loop is the unrolling factor required by the
554 SLP instances. If that unrolling factor is 1, we say, that we perform
555 pure SLP on loop - cross iteration parallelism is not exploited. */
556 if (only_slp_in_loop
)
557 vectorization_factor
= LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
);
559 vectorization_factor
= least_common_multiple (vectorization_factor
,
560 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
));
562 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
564 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
565 && vect_print_dump_info (REPORT_DETAILS
))
567 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC
,
568 vectorization_factor
, LOOP_VINFO_INT_NITERS (loop_vinfo
));
570 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
571 && (LOOP_VINFO_INT_NITERS (loop_vinfo
) < vectorization_factor
))
573 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
574 fprintf (vect_dump
, "not vectorized: iteration count too small.");
575 if (vect_print_dump_info (REPORT_DETAILS
))
576 fprintf (vect_dump
,"not vectorized: iteration count smaller than "
577 "vectorization factor.");
581 /* Analyze cost. Decide if worth while to vectorize. */
583 /* Once VF is set, SLP costs should be updated since the number of created
584 vector stmts depends on VF. */
585 vect_update_slp_costs_according_to_vf (loop_vinfo
);
587 min_profitable_iters
= vect_estimate_min_profitable_iters (loop_vinfo
);
588 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
) = min_profitable_iters
;
589 if (min_profitable_iters
< 0)
591 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
592 fprintf (vect_dump
, "not vectorized: vectorization not profitable.");
593 if (vect_print_dump_info (REPORT_DETAILS
))
594 fprintf (vect_dump
, "not vectorized: vector version will never be "
599 min_scalar_loop_bound
= ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND
)
600 * vectorization_factor
) - 1);
602 /* Use the cost model only if it is more conservative than user specified
605 th
= (unsigned) min_scalar_loop_bound
;
606 if (min_profitable_iters
607 && (!min_scalar_loop_bound
608 || min_profitable_iters
> min_scalar_loop_bound
))
609 th
= (unsigned) min_profitable_iters
;
611 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
612 && LOOP_VINFO_INT_NITERS (loop_vinfo
) <= th
)
614 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
615 fprintf (vect_dump
, "not vectorized: vectorization not "
617 if (vect_print_dump_info (REPORT_DETAILS
))
618 fprintf (vect_dump
, "not vectorized: iteration count smaller than "
619 "user specified loop bound parameter or minimum "
620 "profitable iterations (whichever is more conservative).");
624 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
625 || LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0
626 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
628 if (vect_print_dump_info (REPORT_DETAILS
))
629 fprintf (vect_dump
, "epilog loop required.");
630 if (!vect_can_advance_ivs_p (loop_vinfo
))
632 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
634 "not vectorized: can't create epilog loop 1.");
637 if (!slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
639 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
641 "not vectorized: can't create epilog loop 2.");
650 /* Function exist_non_indexing_operands_for_use_p
652 USE is one of the uses attached to STMT. Check if USE is
653 used in STMT for anything other than indexing an array. */
656 exist_non_indexing_operands_for_use_p (tree use
, tree stmt
)
659 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
661 /* USE corresponds to some operand in STMT. If there is no data
662 reference in STMT, then any operand that corresponds to USE
663 is not indexing an array. */
664 if (!STMT_VINFO_DATA_REF (stmt_info
))
667 /* STMT has a data_ref. FORNOW this means that its of one of
671 (This should have been verified in analyze_data_refs).
673 'var' in the second case corresponds to a def, not a use,
674 so USE cannot correspond to any operands that are not used
677 Therefore, all we need to check is if STMT falls into the
678 first case, and whether var corresponds to USE. */
680 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) == SSA_NAME
)
683 operand
= GIMPLE_STMT_OPERAND (stmt
, 1);
685 if (TREE_CODE (operand
) != SSA_NAME
)
695 /* Function vect_analyze_scalar_cycles_1.
697 Examine the cross iteration def-use cycles of scalar variables
698 in LOOP. LOOP_VINFO represents the loop that is noe being
699 considered for vectorization (can be LOOP, or an outer-loop
703 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo
, struct loop
*loop
)
706 basic_block bb
= loop
->header
;
708 VEC(tree
,heap
) *worklist
= VEC_alloc (tree
, heap
, 64);
710 if (vect_print_dump_info (REPORT_DETAILS
))
711 fprintf (vect_dump
, "=== vect_analyze_scalar_cycles ===");
713 /* First - identify all inductions. */
714 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
716 tree access_fn
= NULL
;
717 tree def
= PHI_RESULT (phi
);
718 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
720 if (vect_print_dump_info (REPORT_DETAILS
))
722 fprintf (vect_dump
, "Analyze phi: ");
723 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
726 /* Skip virtual phi's. The data dependences that are associated with
727 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
728 if (!is_gimple_reg (SSA_NAME_VAR (def
)))
731 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_unknown_def_type
;
733 /* Analyze the evolution function. */
734 access_fn
= analyze_scalar_evolution (loop
, def
);
735 if (access_fn
&& vect_print_dump_info (REPORT_DETAILS
))
737 fprintf (vect_dump
, "Access function of PHI: ");
738 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
742 || !vect_is_simple_iv_evolution (loop
->num
, access_fn
, &dumy
, &dumy
))
744 VEC_safe_push (tree
, heap
, worklist
, phi
);
748 if (vect_print_dump_info (REPORT_DETAILS
))
749 fprintf (vect_dump
, "Detected induction.");
750 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_induction_def
;
754 /* Second - identify all reductions. */
755 while (VEC_length (tree
, worklist
) > 0)
757 tree phi
= VEC_pop (tree
, worklist
);
758 tree def
= PHI_RESULT (phi
);
759 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
762 if (vect_print_dump_info (REPORT_DETAILS
))
764 fprintf (vect_dump
, "Analyze phi: ");
765 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
768 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def
)));
769 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_unknown_def_type
);
771 reduc_stmt
= vect_is_simple_reduction (loop_vinfo
, phi
);
774 if (vect_print_dump_info (REPORT_DETAILS
))
775 fprintf (vect_dump
, "Detected reduction.");
776 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_reduction_def
;
777 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
781 if (vect_print_dump_info (REPORT_DETAILS
))
782 fprintf (vect_dump
, "Unknown def-use cycle pattern.");
785 VEC_free (tree
, heap
, worklist
);
790 /* Function vect_analyze_scalar_cycles.
792 Examine the cross iteration def-use cycles of scalar variables, by
793 analyzing the loop-header PHIs of scalar variables; Classify each
794 cycle as one of the following: invariant, induction, reduction, unknown.
795 We do that for the loop represented by LOOP_VINFO, and also to its
796 inner-loop, if exists.
797 Examples for scalar cycles:
812 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo
)
814 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
816 vect_analyze_scalar_cycles_1 (loop_vinfo
, loop
);
818 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
819 Reductions in such inner-loop therefore have different properties than
820 the reductions in the nest that gets vectorized:
821 1. When vectorized, they are executed in the same order as in the original
822 scalar loop, so we can't change the order of computation when
824 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
825 current checks are too strict. */
828 vect_analyze_scalar_cycles_1 (loop_vinfo
, loop
->inner
);
832 /* Function vect_insert_into_interleaving_chain.
834 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
837 vect_insert_into_interleaving_chain (struct data_reference
*dra
,
838 struct data_reference
*drb
)
840 tree prev
, next
, next_init
;
841 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
842 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
844 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
845 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
848 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
849 if (tree_int_cst_compare (next_init
, DR_INIT (dra
)) > 0)
852 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
853 DR_GROUP_NEXT_DR (stmtinfo_a
) = next
;
857 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
860 /* We got to the end of the list. Insert here. */
861 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
862 DR_GROUP_NEXT_DR (stmtinfo_a
) = NULL_TREE
;
866 /* Function vect_update_interleaving_chain.
868 For two data-refs DRA and DRB that are a part of a chain interleaved data
869 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
871 There are four possible cases:
872 1. New stmts - both DRA and DRB are not a part of any chain:
875 2. DRB is a part of a chain and DRA is not:
876 no need to update FIRST_DR
877 no need to insert DRB
878 insert DRA according to init
879 3. DRA is a part of a chain and DRB is not:
880 if (init of FIRST_DR > init of DRB)
882 NEXT(FIRST_DR) = previous FIRST_DR
884 insert DRB according to its init
885 4. both DRA and DRB are in some interleaving chains:
886 choose the chain with the smallest init of FIRST_DR
887 insert the nodes of the second chain into the first one. */
890 vect_update_interleaving_chain (struct data_reference
*drb
,
891 struct data_reference
*dra
)
893 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
894 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
895 tree next_init
, init_dra_chain
, init_drb_chain
, first_a
, first_b
;
896 tree node
, prev
, next
, node_init
, first_stmt
;
898 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
899 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
901 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_STMT (drb
);
902 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
903 DR_GROUP_NEXT_DR (stmtinfo_b
) = DR_STMT (dra
);
907 /* 2. DRB is a part of a chain and DRA is not. */
908 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && DR_GROUP_FIRST_DR (stmtinfo_b
))
910 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_GROUP_FIRST_DR (stmtinfo_b
);
911 /* Insert DRA into the chain of DRB. */
912 vect_insert_into_interleaving_chain (dra
, drb
);
916 /* 3. DRA is a part of a chain and DRB is not. */
917 if (DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
919 tree old_first_stmt
= DR_GROUP_FIRST_DR (stmtinfo_a
);
920 tree init_old
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
924 if (tree_int_cst_compare (init_old
, DR_INIT (drb
)) > 0)
926 /* DRB's init is smaller than the init of the stmt previously marked
927 as the first stmt of the interleaving chain of DRA. Therefore, we
928 update FIRST_STMT and put DRB in the head of the list. */
929 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
930 DR_GROUP_NEXT_DR (stmtinfo_b
) = old_first_stmt
;
932 /* Update all the stmts in the list to point to the new FIRST_STMT. */
933 tmp
= old_first_stmt
;
936 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp
)) = DR_STMT (drb
);
937 tmp
= DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp
));
942 /* Insert DRB in the list of DRA. */
943 vect_insert_into_interleaving_chain (drb
, dra
);
944 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_GROUP_FIRST_DR (stmtinfo_a
);
949 /* 4. both DRA and DRB are in some interleaving chains. */
950 first_a
= DR_GROUP_FIRST_DR (stmtinfo_a
);
951 first_b
= DR_GROUP_FIRST_DR (stmtinfo_b
);
952 if (first_a
== first_b
)
954 init_dra_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a
)));
955 init_drb_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b
)));
957 if (tree_int_cst_compare (init_dra_chain
, init_drb_chain
) > 0)
959 /* Insert the nodes of DRA chain into the DRB chain.
960 After inserting a node, continue from this node of the DRB chain (don't
961 start from the beginning. */
962 node
= DR_GROUP_FIRST_DR (stmtinfo_a
);
963 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
964 first_stmt
= first_b
;
968 /* Insert the nodes of DRB chain into the DRA chain.
969 After inserting a node, continue from this node of the DRA chain (don't
970 start from the beginning. */
971 node
= DR_GROUP_FIRST_DR (stmtinfo_b
);
972 prev
= DR_GROUP_FIRST_DR (stmtinfo_a
);
973 first_stmt
= first_a
;
978 node_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node
)));
979 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
982 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
983 if (tree_int_cst_compare (next_init
, node_init
) > 0)
986 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
987 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = next
;
992 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
996 /* We got to the end of the list. Insert here. */
997 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
998 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = NULL_TREE
;
1001 DR_GROUP_FIRST_DR (vinfo_for_stmt (node
)) = first_stmt
;
1002 node
= DR_GROUP_NEXT_DR (vinfo_for_stmt (node
));
1007 /* Function vect_equal_offsets.
1009 Check if OFFSET1 and OFFSET2 are identical expressions. */
1012 vect_equal_offsets (tree offset1
, tree offset2
)
1016 STRIP_NOPS (offset1
);
1017 STRIP_NOPS (offset2
);
1019 if (offset1
== offset2
)
1022 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
1023 || !BINARY_CLASS_P (offset1
)
1024 || !BINARY_CLASS_P (offset2
))
1027 res0
= vect_equal_offsets (TREE_OPERAND (offset1
, 0),
1028 TREE_OPERAND (offset2
, 0));
1029 res1
= vect_equal_offsets (TREE_OPERAND (offset1
, 1),
1030 TREE_OPERAND (offset2
, 1));
1032 return (res0
&& res1
);
1036 /* Function vect_check_interleaving.
1038 Check if DRA and DRB are a part of interleaving. In case they are, insert
1039 DRA and DRB in an interleaving chain. */
1042 vect_check_interleaving (struct data_reference
*dra
,
1043 struct data_reference
*drb
)
1045 HOST_WIDE_INT type_size_a
, type_size_b
, diff_mod_size
, step
, init_a
, init_b
;
1047 /* Check that the data-refs have same first location (except init) and they
1048 are both either store or load (not load and store). */
1049 if ((DR_BASE_ADDRESS (dra
) != DR_BASE_ADDRESS (drb
)
1050 && (TREE_CODE (DR_BASE_ADDRESS (dra
)) != ADDR_EXPR
1051 || TREE_CODE (DR_BASE_ADDRESS (drb
)) != ADDR_EXPR
1052 || TREE_OPERAND (DR_BASE_ADDRESS (dra
), 0)
1053 != TREE_OPERAND (DR_BASE_ADDRESS (drb
),0)))
1054 || !vect_equal_offsets (DR_OFFSET (dra
), DR_OFFSET (drb
))
1055 || !tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
))
1056 || DR_IS_READ (dra
) != DR_IS_READ (drb
))
1060 1. data-refs are of the same type
1061 2. their steps are equal
1062 3. the step is greater than the difference between data-refs' inits */
1063 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
1064 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
1066 if (type_size_a
!= type_size_b
1067 || tree_int_cst_compare (DR_STEP (dra
), DR_STEP (drb
)))
1070 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
1071 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
1072 step
= TREE_INT_CST_LOW (DR_STEP (dra
));
1074 if (init_a
> init_b
)
1076 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1077 and DRB is accessed before DRA. */
1078 diff_mod_size
= (init_a
- init_b
) % type_size_a
;
1080 if ((init_a
- init_b
) > step
)
1083 if (diff_mod_size
== 0)
1085 vect_update_interleaving_chain (drb
, dra
);
1086 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1088 fprintf (vect_dump
, "Detected interleaving ");
1089 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1090 fprintf (vect_dump
, " and ");
1091 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1098 /* If init_b == init_a + the size of the type * k, we have an
1099 interleaving, and DRA is accessed before DRB. */
1100 diff_mod_size
= (init_b
- init_a
) % type_size_a
;
1102 if ((init_b
- init_a
) > step
)
1105 if (diff_mod_size
== 0)
1107 vect_update_interleaving_chain (dra
, drb
);
1108 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1110 fprintf (vect_dump
, "Detected interleaving ");
1111 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1112 fprintf (vect_dump
, " and ");
1113 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1121 /* Function vect_analyze_data_ref_dependence.
1123 Return TRUE if there (might) exist a dependence between a memory-reference
1124 DRA and a memory-reference DRB. */
1127 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
1128 loop_vec_info loop_vinfo
)
1131 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1132 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1133 struct data_reference
*dra
= DDR_A (ddr
);
1134 struct data_reference
*drb
= DDR_B (ddr
);
1135 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
1136 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
1137 int dra_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra
))));
1138 int drb_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb
))));
1139 lambda_vector dist_v
;
1140 unsigned int loop_depth
;
1142 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
1144 /* Independent data accesses. */
1145 vect_check_interleaving (dra
, drb
);
1149 if ((DR_IS_READ (dra
) && DR_IS_READ (drb
)) || dra
== drb
)
1152 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1154 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1157 "versioning for alias required: can't determine dependence between ");
1158 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1159 fprintf (vect_dump
, " and ");
1160 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1165 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1167 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1169 fprintf (vect_dump
, "versioning for alias required: bad dist vector for ");
1170 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1171 fprintf (vect_dump
, " and ");
1172 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1177 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
1178 for (i
= 0; VEC_iterate (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
); i
++)
1180 int dist
= dist_v
[loop_depth
];
1182 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1183 fprintf (vect_dump
, "dependence distance = %d.", dist
);
1185 /* Same loop iteration. */
1186 if (dist
% vectorization_factor
== 0 && dra_size
== drb_size
)
1188 /* Two references with distance zero have the same alignment. */
1189 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
), drb
);
1190 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
), dra
);
1191 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1192 fprintf (vect_dump
, "accesses have the same alignment.");
1193 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1195 fprintf (vect_dump
, "dependence distance modulo vf == 0 between ");
1196 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1197 fprintf (vect_dump
, " and ");
1198 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1201 /* For interleaving, mark that there is a read-write dependency if
1202 necessary. We check before that one of the data-refs is store. */
1203 if (DR_IS_READ (dra
))
1204 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a
) = true;
1207 if (DR_IS_READ (drb
))
1208 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b
) = true;
1214 if (abs (dist
) >= vectorization_factor
)
1216 /* Dependence distance does not create dependence, as far as vectorization
1217 is concerned, in this case. */
1218 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1219 fprintf (vect_dump
, "dependence distance >= VF.");
1223 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1226 "versioning for alias required: possible dependence "
1227 "between data-refs ");
1228 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1229 fprintf (vect_dump
, " and ");
1230 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1239 /* Return TRUE if DDR_NEW is already found in MAY_ALIAS_DDRS list. */
1242 vect_is_duplicate_ddr (VEC (ddr_p
, heap
) * may_alias_ddrs
, ddr_p ddr_new
)
1247 for (i
= 0; VEC_iterate (ddr_p
, may_alias_ddrs
, i
, ddr
); i
++)
1249 tree dref_A_i
, dref_B_i
, dref_A_j
, dref_B_j
;
1251 dref_A_i
= DR_REF (DDR_A (ddr
));
1252 dref_B_i
= DR_REF (DDR_B (ddr
));
1253 dref_A_j
= DR_REF (DDR_A (ddr_new
));
1254 dref_B_j
= DR_REF (DDR_B (ddr_new
));
1256 if ((operand_equal_p (dref_A_i
, dref_A_j
, 0)
1257 && operand_equal_p (dref_B_i
, dref_B_j
, 0))
1258 || (operand_equal_p (dref_A_i
, dref_B_j
, 0)
1259 && operand_equal_p (dref_B_i
, dref_A_j
, 0)))
1261 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1263 fprintf (vect_dump
, "found same pair of data references ");
1264 print_generic_expr (vect_dump
, dref_A_i
, TDF_SLIM
);
1265 fprintf (vect_dump
, " and ");
1266 print_generic_expr (vect_dump
, dref_B_i
, TDF_SLIM
);
1274 /* Save DDR in LOOP_VINFO list of ddrs that may alias and need to be
1275 tested at run-time. Returns false if number of run-time checks
1276 inserted by vectorizer is greater than maximum defined by
1277 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS. */
1279 vect_mark_for_runtime_alias_test (ddr_p ddr
, loop_vec_info loop_vinfo
)
1281 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1283 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1285 fprintf (vect_dump
, "mark for run-time aliasing test between ");
1286 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr
)), TDF_SLIM
);
1287 fprintf (vect_dump
, " and ");
1288 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr
)), TDF_SLIM
);
1291 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1294 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1295 fprintf (vect_dump
, "versioning not yet supported for outer-loops.");
1299 /* Do not add to the list duplicate ddrs. */
1300 if (vect_is_duplicate_ddr (LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), ddr
))
1303 if (VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
))
1304 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
))
1306 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1309 "disable versioning for alias - max number of generated "
1310 "checks exceeded.");
1313 VEC_truncate (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), 0);
1317 VEC_safe_push (ddr_p
, heap
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), ddr
);
1321 /* Function vect_analyze_data_ref_dependences.
1323 Examine all the data references in the loop, and make sure there do not
1324 exist any data dependences between them. */
1327 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
)
1330 VEC (ddr_p
, heap
) * ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
1331 struct data_dependence_relation
*ddr
;
1333 if (vect_print_dump_info (REPORT_DETAILS
))
1334 fprintf (vect_dump
, "=== vect_analyze_dependences ===");
1336 for (i
= 0; VEC_iterate (ddr_p
, ddrs
, i
, ddr
); i
++)
1337 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
))
1339 /* Add to list of ddrs that need to be tested at run-time. */
1340 if (!vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
))
1348 /* Function vect_compute_data_ref_alignment
1350 Compute the misalignment of the data reference DR.
1353 1. If during the misalignment computation it is found that the data reference
1354 cannot be vectorized then false is returned.
1355 2. DR_MISALIGNMENT (DR) is defined.
1357 FOR NOW: No analysis is actually performed. Misalignment is calculated
1358 only for trivial cases. TODO. */
1361 vect_compute_data_ref_alignment (struct data_reference
*dr
)
1363 tree stmt
= DR_STMT (dr
);
1364 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1365 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1366 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1367 tree ref
= DR_REF (dr
);
1369 tree base
, base_addr
;
1372 tree aligned_to
, alignment
;
1374 if (vect_print_dump_info (REPORT_DETAILS
))
1375 fprintf (vect_dump
, "vect_compute_data_ref_alignment:");
1377 /* Initialize misalignment to unknown. */
1378 SET_DR_MISALIGNMENT (dr
, -1);
1380 misalign
= DR_INIT (dr
);
1381 aligned_to
= DR_ALIGNED_TO (dr
);
1382 base_addr
= DR_BASE_ADDRESS (dr
);
1384 /* In case the dataref is in an inner-loop of the loop that is being
1385 vectorized (LOOP), we use the base and misalignment information
1386 relative to the outer-loop (LOOP). This is ok only if the misalignment
1387 stays the same throughout the execution of the inner-loop, which is why
1388 we have to check that the stride of the dataref in the inner-loop evenly
1389 divides by the vector size. */
1390 if (nested_in_vect_loop_p (loop
, stmt
))
1392 tree step
= DR_STEP (dr
);
1393 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
1395 if (dr_step
% UNITS_PER_SIMD_WORD
== 0)
1397 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1398 fprintf (vect_dump
, "inner step divides the vector-size.");
1399 misalign
= STMT_VINFO_DR_INIT (stmt_info
);
1400 aligned_to
= STMT_VINFO_DR_ALIGNED_TO (stmt_info
);
1401 base_addr
= STMT_VINFO_DR_BASE_ADDRESS (stmt_info
);
1405 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1406 fprintf (vect_dump
, "inner step doesn't divide the vector-size.");
1407 misalign
= NULL_TREE
;
1411 base
= build_fold_indirect_ref (base_addr
);
1412 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1413 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
1415 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
1418 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1420 fprintf (vect_dump
, "Unknown alignment for access: ");
1421 print_generic_expr (vect_dump
, base
, TDF_SLIM
);
1427 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
1429 || (TREE_CODE (base_addr
) == SSA_NAME
1430 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1431 TREE_TYPE (base_addr
)))),
1433 base_aligned
= true;
1435 base_aligned
= false;
1439 /* Do not change the alignment of global variables if
1440 flag_section_anchors is enabled. */
1441 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
))
1442 || (TREE_STATIC (base
) && flag_section_anchors
))
1444 if (vect_print_dump_info (REPORT_DETAILS
))
1446 fprintf (vect_dump
, "can't force alignment of ref: ");
1447 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
1452 /* Force the alignment of the decl.
1453 NOTE: This is the only change to the code we make during
1454 the analysis phase, before deciding to vectorize the loop. */
1455 if (vect_print_dump_info (REPORT_DETAILS
))
1456 fprintf (vect_dump
, "force alignment");
1457 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
1458 DECL_USER_ALIGN (base
) = 1;
1461 /* At this point we assume that the base is aligned. */
1462 gcc_assert (base_aligned
1463 || (TREE_CODE (base
) == VAR_DECL
1464 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
1466 /* Modulo alignment. */
1467 misalign
= size_binop (TRUNC_MOD_EXPR
, misalign
, alignment
);
1469 if (!host_integerp (misalign
, 1))
1471 /* Negative or overflowed misalignment value. */
1472 if (vect_print_dump_info (REPORT_DETAILS
))
1473 fprintf (vect_dump
, "unexpected misalign value");
1477 SET_DR_MISALIGNMENT (dr
, TREE_INT_CST_LOW (misalign
));
1479 if (vect_print_dump_info (REPORT_DETAILS
))
1481 fprintf (vect_dump
, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
1482 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
1489 /* Function vect_compute_data_refs_alignment
1491 Compute the misalignment of data references in the loop.
1492 Return FALSE if a data reference is found that cannot be vectorized. */
1495 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
)
1497 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1498 struct data_reference
*dr
;
1501 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1502 if (!vect_compute_data_ref_alignment (dr
))
1509 /* Function vect_update_misalignment_for_peel
1511 DR - the data reference whose misalignment is to be adjusted.
1512 DR_PEEL - the data reference whose misalignment is being made
1513 zero in the vector loop by the peel.
1514 NPEEL - the number of iterations in the peel loop if the misalignment
1515 of DR_PEEL is known at compile time. */
1518 vect_update_misalignment_for_peel (struct data_reference
*dr
,
1519 struct data_reference
*dr_peel
, int npeel
)
1522 VEC(dr_p
,heap
) *same_align_drs
;
1523 struct data_reference
*current_dr
;
1524 int dr_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
1525 int dr_peel_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel
))));
1526 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
1527 stmt_vec_info peel_stmt_info
= vinfo_for_stmt (DR_STMT (dr_peel
));
1529 /* For interleaved data accesses the step in the loop must be multiplied by
1530 the size of the interleaving group. */
1531 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
1532 dr_size
*= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info
)));
1533 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info
))
1534 dr_peel_size
*= DR_GROUP_SIZE (peel_stmt_info
);
1536 /* It can be assumed that the data refs with the same alignment as dr_peel
1537 are aligned in the vector loop. */
1539 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
1540 for (i
= 0; VEC_iterate (dr_p
, same_align_drs
, i
, current_dr
); i
++)
1542 if (current_dr
!= dr
)
1544 gcc_assert (DR_MISALIGNMENT (dr
) / dr_size
==
1545 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
);
1546 SET_DR_MISALIGNMENT (dr
, 0);
1550 if (known_alignment_for_access_p (dr
)
1551 && known_alignment_for_access_p (dr_peel
))
1553 int misal
= DR_MISALIGNMENT (dr
);
1554 misal
+= npeel
* dr_size
;
1555 misal
%= UNITS_PER_SIMD_WORD
;
1556 SET_DR_MISALIGNMENT (dr
, misal
);
1560 if (vect_print_dump_info (REPORT_DETAILS
))
1561 fprintf (vect_dump
, "Setting misalignment to -1.");
1562 SET_DR_MISALIGNMENT (dr
, -1);
1566 /* Function vect_verify_datarefs_alignment
1568 Return TRUE if all data references in the loop can be
1569 handled with respect to alignment. */
1572 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
)
1574 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1575 struct data_reference
*dr
;
1576 enum dr_alignment_support supportable_dr_alignment
;
1579 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1581 tree stmt
= DR_STMT (dr
);
1582 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1584 /* For interleaving, only the alignment of the first access matters. */
1585 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1586 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1589 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1590 if (!supportable_dr_alignment
)
1592 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1594 if (DR_IS_READ (dr
))
1596 "not vectorized: unsupported unaligned load.");
1599 "not vectorized: unsupported unaligned store.");
1603 if (supportable_dr_alignment
!= dr_aligned
1604 && vect_print_dump_info (REPORT_ALIGNMENT
))
1605 fprintf (vect_dump
, "Vectorizing an unaligned access.");
1611 /* Function vector_alignment_reachable_p
1613 Return true if vector alignment for DR is reachable by peeling
1614 a few loop iterations. Return false otherwise. */
1617 vector_alignment_reachable_p (struct data_reference
*dr
)
1619 tree stmt
= DR_STMT (dr
);
1620 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1621 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1623 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
1625 /* For interleaved access we peel only if number of iterations in
1626 the prolog loop ({VF - misalignment}), is a multiple of the
1627 number of the interleaved accesses. */
1628 int elem_size
, mis_in_elements
;
1629 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1631 /* FORNOW: handle only known alignment. */
1632 if (!known_alignment_for_access_p (dr
))
1635 elem_size
= UNITS_PER_SIMD_WORD
/ nelements
;
1636 mis_in_elements
= DR_MISALIGNMENT (dr
) / elem_size
;
1638 if ((nelements
- mis_in_elements
) % DR_GROUP_SIZE (stmt_info
))
1642 /* If misalignment is known at the compile time then allow peeling
1643 only if natural alignment is reachable through peeling. */
1644 if (known_alignment_for_access_p (dr
) && !aligned_access_p (dr
))
1646 HOST_WIDE_INT elmsize
=
1647 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
1648 if (vect_print_dump_info (REPORT_DETAILS
))
1650 fprintf (vect_dump
, "data size =" HOST_WIDE_INT_PRINT_DEC
, elmsize
);
1651 fprintf (vect_dump
, ". misalignment = %d. ", DR_MISALIGNMENT (dr
));
1653 if (DR_MISALIGNMENT (dr
) % elmsize
)
1655 if (vect_print_dump_info (REPORT_DETAILS
))
1656 fprintf (vect_dump
, "data size does not divide the misalignment.\n");
1661 if (!known_alignment_for_access_p (dr
))
1663 tree type
= (TREE_TYPE (DR_REF (dr
)));
1664 tree ba
= DR_BASE_OBJECT (dr
);
1665 bool is_packed
= false;
1668 is_packed
= contains_packed_reference (ba
);
1670 if (vect_print_dump_info (REPORT_DETAILS
))
1671 fprintf (vect_dump
, "Unknown misalignment, is_packed = %d",is_packed
);
1672 if (targetm
.vectorize
.vector_alignment_reachable (type
, is_packed
))
1681 /* Function vect_enhance_data_refs_alignment
1683 This pass will use loop versioning and loop peeling in order to enhance
1684 the alignment of data references in the loop.
1686 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1687 original loop is to be vectorized; Any other loops that are created by
1688 the transformations performed in this pass - are not supposed to be
1689 vectorized. This restriction will be relaxed.
1691 This pass will require a cost model to guide it whether to apply peeling
1692 or versioning or a combination of the two. For example, the scheme that
1693 intel uses when given a loop with several memory accesses, is as follows:
1694 choose one memory access ('p') which alignment you want to force by doing
1695 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1696 other accesses are not necessarily aligned, or (2) use loop versioning to
1697 generate one loop in which all accesses are aligned, and another loop in
1698 which only 'p' is necessarily aligned.
1700 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1701 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1702 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1704 Devising a cost model is the most critical aspect of this work. It will
1705 guide us on which access to peel for, whether to use loop versioning, how
1706 many versions to create, etc. The cost model will probably consist of
1707 generic considerations as well as target specific considerations (on
1708 powerpc for example, misaligned stores are more painful than misaligned
1711 Here are the general steps involved in alignment enhancements:
1713 -- original loop, before alignment analysis:
1714 for (i=0; i<N; i++){
1715 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1716 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1719 -- After vect_compute_data_refs_alignment:
1720 for (i=0; i<N; i++){
1721 x = q[i]; # DR_MISALIGNMENT(q) = 3
1722 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1725 -- Possibility 1: we do loop versioning:
1727 for (i=0; i<N; i++){ # loop 1A
1728 x = q[i]; # DR_MISALIGNMENT(q) = 3
1729 p[i] = y; # DR_MISALIGNMENT(p) = 0
1733 for (i=0; i<N; i++){ # loop 1B
1734 x = q[i]; # DR_MISALIGNMENT(q) = 3
1735 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1739 -- Possibility 2: we do loop peeling:
1740 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1744 for (i = 3; i < N; i++){ # loop 2A
1745 x = q[i]; # DR_MISALIGNMENT(q) = 0
1746 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1749 -- Possibility 3: combination of loop peeling and versioning:
1750 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1755 for (i = 3; i<N; i++){ # loop 3A
1756 x = q[i]; # DR_MISALIGNMENT(q) = 0
1757 p[i] = y; # DR_MISALIGNMENT(p) = 0
1761 for (i = 3; i<N; i++){ # loop 3B
1762 x = q[i]; # DR_MISALIGNMENT(q) = 0
1763 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1767 These loops are later passed to loop_transform to be vectorized. The
1768 vectorizer will use the alignment information to guide the transformation
1769 (whether to generate regular loads/stores, or with special handling for
1773 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1775 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1776 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1777 enum dr_alignment_support supportable_dr_alignment
;
1778 struct data_reference
*dr0
= NULL
;
1779 struct data_reference
*dr
;
1781 bool do_peeling
= false;
1782 bool do_versioning
= false;
1785 stmt_vec_info stmt_info
;
1786 int vect_versioning_for_alias_required
;
1788 if (vect_print_dump_info (REPORT_DETAILS
))
1789 fprintf (vect_dump
, "=== vect_enhance_data_refs_alignment ===");
1791 /* While cost model enhancements are expected in the future, the high level
1792 view of the code at this time is as follows:
1794 A) If there is a misaligned write then see if peeling to align this write
1795 can make all data references satisfy vect_supportable_dr_alignment.
1796 If so, update data structures as needed and return true. Note that
1797 at this time vect_supportable_dr_alignment is known to return false
1798 for a misaligned write.
1800 B) If peeling wasn't possible and there is a data reference with an
1801 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1802 then see if loop versioning checks can be used to make all data
1803 references satisfy vect_supportable_dr_alignment. If so, update
1804 data structures as needed and return true.
1806 C) If neither peeling nor versioning were successful then return false if
1807 any data reference does not satisfy vect_supportable_dr_alignment.
1809 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1811 Note, Possibility 3 above (which is peeling and versioning together) is not
1812 being done at this time. */
1814 /* (1) Peeling to force alignment. */
1816 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1818 + How many accesses will become aligned due to the peeling
1819 - How many accesses will become unaligned due to the peeling,
1820 and the cost of misaligned accesses.
1821 - The cost of peeling (the extra runtime checks, the increase
1824 The scheme we use FORNOW: peel to force the alignment of the first
1825 misaligned store in the loop.
1826 Rationale: misaligned stores are not yet supported.
1828 TODO: Use a cost model. */
1830 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1832 stmt
= DR_STMT (dr
);
1833 stmt_info
= vinfo_for_stmt (stmt
);
1835 /* For interleaving, only the alignment of the first access
1837 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1838 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1841 if (!DR_IS_READ (dr
) && !aligned_access_p (dr
))
1843 do_peeling
= vector_alignment_reachable_p (dr
);
1846 if (!do_peeling
&& vect_print_dump_info (REPORT_DETAILS
))
1847 fprintf (vect_dump
, "vector alignment may not be reachable");
1852 vect_versioning_for_alias_required
=
1853 (VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)) > 0);
1855 /* Temporarily, if versioning for alias is required, we disable peeling
1856 until we support peeling and versioning. Often peeling for alignment
1857 will require peeling for loop-bound, which in turn requires that we
1858 know how to adjust the loop ivs after the loop. */
1859 if (vect_versioning_for_alias_required
1860 || !vect_can_advance_ivs_p (loop_vinfo
)
1861 || !slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
1868 tree stmt
= DR_STMT (dr0
);
1869 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1870 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1871 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1873 if (known_alignment_for_access_p (dr0
))
1875 /* Since it's known at compile time, compute the number of iterations
1876 in the peeled loop (the peeling factor) for use in updating
1877 DR_MISALIGNMENT values. The peeling factor is the vectorization
1878 factor minus the misalignment as an element count. */
1879 mis
= DR_MISALIGNMENT (dr0
);
1880 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1881 npeel
= nelements
- mis
;
1883 /* For interleaved data access every iteration accesses all the
1884 members of the group, therefore we divide the number of iterations
1885 by the group size. */
1886 stmt_info
= vinfo_for_stmt (DR_STMT (dr0
));
1887 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
1888 npeel
/= DR_GROUP_SIZE (stmt_info
);
1890 if (vect_print_dump_info (REPORT_DETAILS
))
1891 fprintf (vect_dump
, "Try peeling by %d", npeel
);
1894 /* Ensure that all data refs can be vectorized after the peel. */
1895 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1897 int save_misalignment
;
1902 stmt
= DR_STMT (dr
);
1903 stmt_info
= vinfo_for_stmt (stmt
);
1904 /* For interleaving, only the alignment of the first access
1906 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1907 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1910 save_misalignment
= DR_MISALIGNMENT (dr
);
1911 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1912 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1913 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1915 if (!supportable_dr_alignment
)
1924 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1925 If the misalignment of DR_i is identical to that of dr0 then set
1926 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1927 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1928 by the peeling factor times the element size of DR_i (MOD the
1929 vectorization factor times the size). Otherwise, the
1930 misalignment of DR_i must be set to unknown. */
1931 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1933 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1935 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1936 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1937 SET_DR_MISALIGNMENT (dr0
, 0);
1938 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1939 fprintf (vect_dump
, "Alignment of access forced using peeling.");
1941 if (vect_print_dump_info (REPORT_DETAILS
))
1942 fprintf (vect_dump
, "Peeling for alignment will be applied.");
1944 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1951 /* (2) Versioning to force alignment. */
1953 /* Try versioning if:
1954 1) flag_tree_vect_loop_version is TRUE
1955 2) optimize_size is FALSE
1956 3) there is at least one unsupported misaligned data ref with an unknown
1958 4) all misaligned data refs with a known misalignment are supported, and
1959 5) the number of runtime alignment checks is within reason. */
1962 flag_tree_vect_loop_version
1964 && (!loop
->inner
); /* FORNOW */
1968 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1970 stmt
= DR_STMT (dr
);
1971 stmt_info
= vinfo_for_stmt (stmt
);
1973 /* For interleaving, only the alignment of the first access
1975 if (aligned_access_p (dr
)
1976 || (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1977 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
))
1980 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1982 if (!supportable_dr_alignment
)
1988 if (known_alignment_for_access_p (dr
)
1989 || VEC_length (tree
,
1990 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
1991 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS
))
1993 do_versioning
= false;
1997 stmt
= DR_STMT (dr
);
1998 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1999 gcc_assert (vectype
);
2001 /* The rightmost bits of an aligned address must be zeros.
2002 Construct the mask needed for this test. For example,
2003 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2004 mask must be 15 = 0xf. */
2005 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
2007 /* FORNOW: use the same mask to test all potentially unaligned
2008 references in the loop. The vectorizer currently supports
2009 a single vector size, see the reference to
2010 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2011 vectorization factor is computed. */
2012 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
2013 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
2014 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
2015 VEC_safe_push (tree
, heap
,
2016 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
),
2021 /* Versioning requires at least one misaligned data reference. */
2022 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)) == 0)
2023 do_versioning
= false;
2024 else if (!do_versioning
)
2025 VEC_truncate (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
), 0);
2030 VEC(tree
,heap
) *may_misalign_stmts
2031 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
2034 /* It can now be assumed that the data references in the statements
2035 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2036 of the loop being vectorized. */
2037 for (i
= 0; VEC_iterate (tree
, may_misalign_stmts
, i
, stmt
); i
++)
2039 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2040 dr
= STMT_VINFO_DATA_REF (stmt_info
);
2041 SET_DR_MISALIGNMENT (dr
, 0);
2042 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2043 fprintf (vect_dump
, "Alignment of access forced using versioning.");
2046 if (vect_print_dump_info (REPORT_DETAILS
))
2047 fprintf (vect_dump
, "Versioning for alignment will be applied.");
2049 /* Peeling and versioning can't be done together at this time. */
2050 gcc_assert (! (do_peeling
&& do_versioning
));
2052 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
2057 /* This point is reached if neither peeling nor versioning is being done. */
2058 gcc_assert (! (do_peeling
|| do_versioning
));
2060 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
2065 /* Function vect_analyze_data_refs_alignment
2067 Analyze the alignment of the data-references in the loop.
2068 Return FALSE if a data reference is found that cannot be vectorized. */
2071 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
)
2073 if (vect_print_dump_info (REPORT_DETAILS
))
2074 fprintf (vect_dump
, "=== vect_analyze_data_refs_alignment ===");
2076 if (!vect_compute_data_refs_alignment (loop_vinfo
))
2078 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2080 "not vectorized: can't calculate alignment for data ref.");
2088 /* Analyze groups of strided accesses: check that DR belongs to a group of
2089 strided accesses of legal size, step, etc. Detect gaps, single element
2090 interleaving, and other special cases. Set strided access info.
2091 Collect groups of strided stores for further use in SLP analysis. */
2094 vect_analyze_group_access (struct data_reference
*dr
)
2096 tree step
= DR_STEP (dr
);
2097 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2098 HOST_WIDE_INT type_size
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
2099 tree stmt
= DR_STMT (dr
);
2100 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2101 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2102 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2103 HOST_WIDE_INT stride
;
2104 bool slp_impossible
= false;
2106 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2107 interleaving group (including gaps). */
2108 stride
= dr_step
/ type_size
;
2110 /* Not consecutive access is possible only if it is a part of interleaving. */
2111 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)))
2113 /* Check if it this DR is a part of interleaving, and is a single
2114 element of the group that is accessed in the loop. */
2116 /* Gaps are supported only for loads. STEP must be a multiple of the type
2117 size. The size of the group must be a power of 2. */
2119 && (dr_step
% type_size
) == 0
2121 && exact_log2 (stride
) != -1)
2123 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = stmt
;
2124 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
2125 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2127 fprintf (vect_dump
, "Detected single element interleaving %d ",
2128 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)));
2129 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
2130 fprintf (vect_dump
, " step ");
2131 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
2135 if (vect_print_dump_info (REPORT_DETAILS
))
2136 fprintf (vect_dump
, "not consecutive access");
2140 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) == stmt
)
2142 /* First stmt in the interleaving chain. Check the chain. */
2143 tree next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt
));
2144 struct data_reference
*data_ref
= dr
;
2145 unsigned int count
= 1;
2147 tree prev_init
= DR_INIT (data_ref
);
2149 HOST_WIDE_INT diff
, count_in_bytes
;
2153 /* Skip same data-refs. In case that two or more stmts share data-ref
2154 (supported only for loads), we vectorize only the first stmt, and
2155 the rest get their vectorized loads from the first one. */
2156 if (!tree_int_cst_compare (DR_INIT (data_ref
),
2157 DR_INIT (STMT_VINFO_DATA_REF (
2158 vinfo_for_stmt (next
)))))
2160 if (!DR_IS_READ (data_ref
))
2162 if (vect_print_dump_info (REPORT_DETAILS
))
2163 fprintf (vect_dump
, "Two store stmts share the same dr.");
2167 /* Check that there is no load-store dependencies for this loads
2168 to prevent a case of load-store-load to the same location. */
2169 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next
))
2170 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev
)))
2172 if (vect_print_dump_info (REPORT_DETAILS
))
2174 "READ_WRITE dependence in interleaving.");
2178 /* For load use the same data-ref load. */
2179 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next
)) = prev
;
2182 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
2187 /* Check that all the accesses have the same STEP. */
2188 next_step
= DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
2189 if (tree_int_cst_compare (step
, next_step
))
2191 if (vect_print_dump_info (REPORT_DETAILS
))
2192 fprintf (vect_dump
, "not consecutive access in interleaving");
2196 data_ref
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next
));
2197 /* Check that the distance between two accesses is equal to the type
2198 size. Otherwise, we have gaps. */
2199 diff
= (TREE_INT_CST_LOW (DR_INIT (data_ref
))
2200 - TREE_INT_CST_LOW (prev_init
)) / type_size
;
2203 /* FORNOW: SLP of accesses with gaps is not supported. */
2204 slp_impossible
= true;
2205 if (!DR_IS_READ (data_ref
))
2207 if (vect_print_dump_info (REPORT_DETAILS
))
2208 fprintf (vect_dump
, "interleaved store with gaps");
2213 /* Store the gap from the previous member of the group. If there is no
2214 gap in the access, DR_GROUP_GAP is always 1. */
2215 DR_GROUP_GAP (vinfo_for_stmt (next
)) = diff
;
2217 prev_init
= DR_INIT (data_ref
);
2218 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
2219 /* Count the number of data-refs in the chain. */
2223 /* COUNT is the number of accesses found, we multiply it by the size of
2224 the type to get COUNT_IN_BYTES. */
2225 count_in_bytes
= type_size
* count
;
2227 /* Check that the size of the interleaving is not greater than STEP. */
2228 if (dr_step
< count_in_bytes
)
2230 if (vect_print_dump_info (REPORT_DETAILS
))
2232 fprintf (vect_dump
, "interleaving size is greater than step for ");
2233 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
2238 /* Check that the size of the interleaving is equal to STEP for stores,
2239 i.e., that there are no gaps. */
2240 if (!DR_IS_READ (dr
) && dr_step
!= count_in_bytes
)
2242 if (vect_print_dump_info (REPORT_DETAILS
))
2243 fprintf (vect_dump
, "interleaved store with gaps");
2247 /* Check that STEP is a multiple of type size. */
2248 if ((dr_step
% type_size
) != 0)
2250 if (vect_print_dump_info (REPORT_DETAILS
))
2252 fprintf (vect_dump
, "step is not a multiple of type size: step ");
2253 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
2254 fprintf (vect_dump
, " size ");
2255 print_generic_expr (vect_dump
, TYPE_SIZE_UNIT (scalar_type
),
2261 /* FORNOW: we handle only interleaving that is a power of 2.
2262 We don't fail here if it may be still possible to vectorize the
2263 group using SLP. If not, the size of the group will be checked in
2264 vect_analyze_operations, and the vectorization will fail. */
2265 if (exact_log2 (stride
) == -1)
2267 if (vect_print_dump_info (REPORT_DETAILS
))
2268 fprintf (vect_dump
, "interleaving is not a power of 2");
2273 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
2274 if (vect_print_dump_info (REPORT_DETAILS
))
2275 fprintf (vect_dump
, "Detected interleaving of size %d", (int)stride
);
2277 /* SLP: create an SLP data structure for every interleaving group of
2278 stores for further analysis in vect_analyse_slp. */
2279 if (!DR_IS_READ (dr
) && !slp_impossible
)
2280 VEC_safe_push (tree
, heap
, LOOP_VINFO_STRIDED_STORES (loop_vinfo
), stmt
);
2287 /* Analyze the access pattern of the data-reference DR.
2288 In case of non-consecutive accesse call vect_analyze_group_access() to
2289 analyze groups of strided accesses. */
2292 vect_analyze_data_ref_access (struct data_reference
*dr
)
2294 tree step
= DR_STEP (dr
);
2295 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2296 tree stmt
= DR_STMT (dr
);
2297 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2298 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2299 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2300 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2304 if (vect_print_dump_info (REPORT_DETAILS
))
2305 fprintf (vect_dump
, "bad data-ref access");
2309 /* Don't allow invariant accesses. */
2313 if (nested_in_vect_loop_p (loop
, stmt
))
2315 /* For the rest of the analysis we use the outer-loop step. */
2316 step
= STMT_VINFO_DR_STEP (stmt_info
);
2317 dr_step
= TREE_INT_CST_LOW (step
);
2321 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2322 fprintf (vect_dump
, "zero step in outer loop.");
2323 if (DR_IS_READ (dr
))
2331 if (!tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
)))
2333 /* Mark that it is not interleaving. */
2334 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = NULL_TREE
;
2338 if (nested_in_vect_loop_p (loop
, stmt
))
2340 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2341 fprintf (vect_dump
, "strided access in outer loop.");
2345 /* Not consecutive access - check if it's a part of interleaving group. */
2346 return vect_analyze_group_access (dr
);
2350 /* Function vect_analyze_data_ref_accesses.
2352 Analyze the access pattern of all the data references in the loop.
2354 FORNOW: the only access pattern that is considered vectorizable is a
2355 simple step 1 (consecutive) access.
2357 FORNOW: handle only arrays and pointer accesses. */
2360 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
)
2363 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2364 struct data_reference
*dr
;
2366 if (vect_print_dump_info (REPORT_DETAILS
))
2367 fprintf (vect_dump
, "=== vect_analyze_data_ref_accesses ===");
2369 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
2370 if (!vect_analyze_data_ref_access (dr
))
2372 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2373 fprintf (vect_dump
, "not vectorized: complicated access pattern.");
2381 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2384 vect_free_slp_tree (slp_tree node
)
2389 if (SLP_TREE_LEFT (node
))
2390 vect_free_slp_tree (SLP_TREE_LEFT (node
));
2392 if (SLP_TREE_RIGHT (node
))
2393 vect_free_slp_tree (SLP_TREE_RIGHT (node
));
2395 VEC_free (tree
, heap
, SLP_TREE_SCALAR_STMTS (node
));
2397 if (SLP_TREE_VEC_STMTS (node
))
2398 VEC_free (tree
, heap
, SLP_TREE_VEC_STMTS (node
));
2404 /* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are
2405 of a legal type and that they match the defs of the first stmt of the SLP
2406 group (stored in FIRST_STMT_...). */
2409 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo
, slp_tree slp_node
,
2410 tree rhs
, VEC (tree
, heap
) **def_stmts0
,
2411 VEC (tree
, heap
) **def_stmts1
,
2412 enum vect_def_type
*first_stmt_dt0
,
2413 enum vect_def_type
*first_stmt_dt1
,
2414 tree
*first_stmt_def0_type
,
2415 tree
*first_stmt_def1_type
,
2416 tree
*first_stmt_const_oprnd
,
2417 int ncopies_for_cost
)
2420 enum operation_type op_type
= TREE_OPERAND_LENGTH (rhs
);
2421 unsigned int i
, number_of_oprnds
= op_type
;
2423 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2424 stmt_vec_info stmt_info
=
2425 vinfo_for_stmt (VEC_index (tree
, SLP_TREE_SCALAR_STMTS (slp_node
), 0));
2429 number_of_oprnds
= 1;
2431 gcc_assert (op_type
== unary_op
|| op_type
== binary_op
);
2433 for (i
= 0; i
< number_of_oprnds
; i
++)
2436 oprnd
= TREE_OPERAND (rhs
, i
);
2440 if (!vect_is_simple_use (oprnd
, loop_vinfo
, &def_stmt
, &def
, &dt
[i
])
2441 || (!def_stmt
&& dt
[i
] != vect_constant_def
))
2443 if (vect_print_dump_info (REPORT_SLP
))
2445 fprintf (vect_dump
, "Build SLP failed: can't find def for ");
2446 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
2452 if (!*first_stmt_dt0
)
2454 /* op0 of the first stmt of the group - store its info. */
2455 *first_stmt_dt0
= dt
[i
];
2457 *first_stmt_def0_type
= TREE_TYPE (def
);
2459 *first_stmt_const_oprnd
= oprnd
;
2461 /* Analyze costs (for the first stmt of the group only). */
2463 /* Not memory operation (we don't call this functions for loads). */
2464 vect_model_simple_cost (stmt_info
, ncopies_for_cost
, dt
, slp_node
);
2467 vect_model_store_cost (stmt_info
, ncopies_for_cost
, dt
[0], slp_node
);
2472 if (!*first_stmt_dt1
&& i
== 1)
2474 /* op1 of the first stmt of the group - store its info. */
2475 *first_stmt_dt1
= dt
[i
];
2477 *first_stmt_def1_type
= TREE_TYPE (def
);
2480 /* We assume that the stmt contains only one constant
2481 operand. We fail otherwise, to be on the safe side. */
2482 if (*first_stmt_const_oprnd
)
2484 if (vect_print_dump_info (REPORT_SLP
))
2485 fprintf (vect_dump
, "Build SLP failed: two constant "
2489 *first_stmt_const_oprnd
= oprnd
;
2494 /* Not first stmt of the group, check that the def-stmt/s match
2495 the def-stmt/s of the first stmt. */
2497 && (*first_stmt_dt0
!= dt
[i
]
2498 || (*first_stmt_def0_type
&& def
2499 && *first_stmt_def0_type
!= TREE_TYPE (def
))))
2501 && (*first_stmt_dt1
!= dt
[i
]
2502 || (*first_stmt_def1_type
&& def
2503 && *first_stmt_def1_type
!= TREE_TYPE (def
))))
2505 && TREE_TYPE (*first_stmt_const_oprnd
)
2506 != TREE_TYPE (oprnd
)))
2508 if (vect_print_dump_info (REPORT_SLP
))
2509 fprintf (vect_dump
, "Build SLP failed: different types ");
2516 /* Check the types of the definitions. */
2519 case vect_constant_def
:
2520 case vect_invariant_def
:
2525 VEC_safe_push (tree
, heap
, *def_stmts0
, def_stmt
);
2527 VEC_safe_push (tree
, heap
, *def_stmts1
, def_stmt
);
2531 /* FORNOW: Not supported. */
2532 if (vect_print_dump_info (REPORT_SLP
))
2534 fprintf (vect_dump
, "Build SLP failed: illegal type of def ");
2535 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
2546 /* Recursively build an SLP tree starting from NODE.
2547 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2548 permutation or are of unsupported types of operation. Otherwise, return
2550 SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2551 in the case of multiple types for now. */
2554 vect_build_slp_tree (loop_vec_info loop_vinfo
, slp_tree
*node
,
2555 unsigned int group_size
, bool *slp_impossible
,
2556 int *inside_cost
, int *outside_cost
,
2557 int ncopies_for_cost
)
2559 VEC (tree
, heap
) *def_stmts0
= VEC_alloc (tree
, heap
, group_size
);
2560 VEC (tree
, heap
) *def_stmts1
= VEC_alloc (tree
, heap
, group_size
);
2562 VEC (tree
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (*node
);
2563 tree stmt
= VEC_index (tree
, stmts
, 0);
2564 enum vect_def_type first_stmt_dt0
= 0, first_stmt_dt1
= 0;
2565 enum tree_code first_stmt_code
= 0;
2566 tree first_stmt_def1_type
= NULL_TREE
, first_stmt_def0_type
= NULL_TREE
;
2567 tree lhs
, rhs
, prev_stmt
= NULL_TREE
;
2568 bool stop_recursion
= false, need_same_oprnds
= false;
2569 tree vectype
, scalar_type
, first_op1
= NULL_TREE
;
2570 unsigned int vectorization_factor
= 0, ncopies
;
2573 enum machine_mode optab_op2_mode
;
2574 enum machine_mode vec_mode
;
2575 tree first_stmt_const_oprnd
= NULL_TREE
;
2576 struct data_reference
*first_dr
;
2578 /* For every stmt in NODE find its def stmt/s. */
2579 for (i
= 0; VEC_iterate (tree
, stmts
, i
, stmt
); i
++)
2581 if (vect_print_dump_info (REPORT_SLP
))
2583 fprintf (vect_dump
, "Build SLP for ");
2584 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2587 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
2589 if (vect_print_dump_info (REPORT_SLP
))
2591 fprintf (vect_dump
, "Build SLP failed: not MODIFY_STMT ");
2592 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2598 scalar_type
= TREE_TYPE (GIMPLE_STMT_OPERAND (stmt
, 0));
2599 vectype
= get_vectype_for_scalar_type (scalar_type
);
2600 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
2601 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2602 ncopies
= vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
2606 if (vect_print_dump_info (REPORT_SLP
))
2607 fprintf (vect_dump
, "SLP failed - multiple types ");
2609 *slp_impossible
= true;
2613 lhs
= GIMPLE_STMT_OPERAND (stmt
, 0);
2614 rhs
= GIMPLE_STMT_OPERAND (stmt
, 1);
2616 /* Check the operation. */
2619 first_stmt_code
= TREE_CODE (rhs
);
2621 /* Shift arguments should be equal in all the packed stmts for a
2622 vector shift with scalar shift operand. */
2623 if (TREE_CODE (rhs
) == LSHIFT_EXPR
|| TREE_CODE (rhs
) == RSHIFT_EXPR
)
2625 vec_mode
= TYPE_MODE (vectype
);
2626 optab
= optab_for_tree_code (TREE_CODE (rhs
), vectype
);
2629 if (vect_print_dump_info (REPORT_SLP
))
2630 fprintf (vect_dump
, "Build SLP failed: no optab.");
2633 icode
= (int) optab
->handlers
[(int) vec_mode
].insn_code
;
2634 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
2635 if (!VECTOR_MODE_P (optab_op2_mode
))
2637 need_same_oprnds
= true;
2638 first_op1
= TREE_OPERAND (rhs
, 1);
2644 if (first_stmt_code
!= TREE_CODE (rhs
))
2646 if (vect_print_dump_info (REPORT_SLP
))
2649 "Build SLP failed: different operation in stmt ");
2650 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2656 if (need_same_oprnds
2657 && !operand_equal_p (first_op1
, TREE_OPERAND (rhs
, 1), 0))
2659 if (vect_print_dump_info (REPORT_SLP
))
2662 "Build SLP failed: different shift arguments in ");
2663 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2670 /* Strided store or load. */
2671 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
)))
2673 if (REFERENCE_CLASS_P (lhs
))
2676 if (!vect_get_and_check_slp_defs (loop_vinfo
, *node
, rhs
,
2677 &def_stmts0
, &def_stmts1
,
2680 &first_stmt_def0_type
,
2681 &first_stmt_def1_type
,
2682 &first_stmt_const_oprnd
,
2691 /* First stmt of the SLP group should be the first load of
2692 the interleaving loop if data permutation is not
2694 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) != stmt
)
2696 /* FORNOW: data permutations are not supported. */
2697 if (vect_print_dump_info (REPORT_SLP
))
2699 fprintf (vect_dump
, "Build SLP failed: strided "
2700 " loads need permutation ");
2701 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2707 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
2708 if (vect_supportable_dr_alignment (first_dr
)
2709 == dr_unaligned_unsupported
)
2711 if (vect_print_dump_info (REPORT_SLP
))
2713 fprintf (vect_dump
, "Build SLP failed: unsupported "
2714 " unaligned load ");
2715 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2721 /* Analyze costs (for the first stmt in the group). */
2722 vect_model_load_cost (vinfo_for_stmt (stmt
),
2723 ncopies_for_cost
, *node
);
2727 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt
)) != stmt
)
2729 /* FORNOW: data permutations are not supported. */
2730 if (vect_print_dump_info (REPORT_SLP
))
2732 fprintf (vect_dump
, "Build SLP failed: strided "
2733 " loads need permutation ");
2734 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2742 /* We stop the tree when we reach a group of loads. */
2743 stop_recursion
= true;
2746 } /* Strided access. */
2749 if (REFERENCE_CLASS_P (rhs
))
2751 /* Not strided load. */
2752 if (vect_print_dump_info (REPORT_SLP
))
2754 fprintf (vect_dump
, "Build SLP failed: not strided load ");
2755 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2758 /* FORNOW: Not strided loads are not supported. */
2762 /* Not memory operation. */
2763 if (!BINARY_CLASS_P (rhs
) && !UNARY_CLASS_P (rhs
))
2765 if (vect_print_dump_info (REPORT_SLP
))
2767 fprintf (vect_dump
, "Build SLP failed: operation");
2768 fprintf (vect_dump
, " unsupported ");
2769 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2775 /* Find the def-stmts. */
2776 if (!vect_get_and_check_slp_defs (loop_vinfo
, *node
, rhs
, &def_stmts0
,
2777 &def_stmts1
, &first_stmt_dt0
,
2779 &first_stmt_def0_type
,
2780 &first_stmt_def1_type
,
2781 &first_stmt_const_oprnd
,
2787 /* Add the costs of the node to the overall instance costs. */
2788 *inside_cost
+= SLP_TREE_INSIDE_OF_LOOP_COST (*node
);
2789 *outside_cost
+= SLP_TREE_OUTSIDE_OF_LOOP_COST (*node
);
2791 /* Strided loads were reached - stop the recursion. */
2795 /* Create SLP_TREE nodes for the definition node/s. */
2796 if (first_stmt_dt0
== vect_loop_def
)
2798 slp_tree left_node
= XNEW (struct _slp_tree
);
2799 SLP_TREE_SCALAR_STMTS (left_node
) = def_stmts0
;
2800 SLP_TREE_VEC_STMTS (left_node
) = NULL
;
2801 SLP_TREE_LEFT (left_node
) = NULL
;
2802 SLP_TREE_RIGHT (left_node
) = NULL
;
2803 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node
) = 0;
2804 SLP_TREE_INSIDE_OF_LOOP_COST (left_node
) = 0;
2805 if (!vect_build_slp_tree (loop_vinfo
, &left_node
, group_size
,
2806 slp_impossible
, inside_cost
, outside_cost
,
2810 SLP_TREE_LEFT (*node
) = left_node
;
2813 if (first_stmt_dt1
== vect_loop_def
)
2815 slp_tree right_node
= XNEW (struct _slp_tree
);
2816 SLP_TREE_SCALAR_STMTS (right_node
) = def_stmts1
;
2817 SLP_TREE_VEC_STMTS (right_node
) = NULL
;
2818 SLP_TREE_LEFT (right_node
) = NULL
;
2819 SLP_TREE_RIGHT (right_node
) = NULL
;
2820 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node
) = 0;
2821 SLP_TREE_INSIDE_OF_LOOP_COST (right_node
) = 0;
2822 if (!vect_build_slp_tree (loop_vinfo
, &right_node
, group_size
,
2823 slp_impossible
, inside_cost
, outside_cost
,
2827 SLP_TREE_RIGHT (*node
) = right_node
;
2835 vect_print_slp_tree (slp_tree node
)
2843 fprintf (vect_dump
, "node ");
2844 for (i
= 0; VEC_iterate (tree
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
2846 fprintf (vect_dump
, "\n\tstmt %d ", i
);
2847 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2849 fprintf (vect_dump
, "\n");
2851 vect_print_slp_tree (SLP_TREE_LEFT (node
));
2852 vect_print_slp_tree (SLP_TREE_RIGHT (node
));
2856 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2857 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2858 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2859 stmts in NODE are to be marked. */
2862 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
2870 for (i
= 0; VEC_iterate (tree
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
2871 if (j
< 0 || i
== j
)
2872 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
2874 vect_mark_slp_stmts (SLP_TREE_LEFT (node
), mark
, j
);
2875 vect_mark_slp_stmts (SLP_TREE_RIGHT (node
), mark
, j
);
2879 /* Analyze an SLP instance starting from a group of strided stores. Call
2880 vect_build_slp_tree to build a tree of packed stmts if possible.
2881 Return FALSE if it's impossible to SLP any stmt in the loop. */
2884 vect_analyze_slp_instance (loop_vec_info loop_vinfo
, tree stmt
)
2886 slp_instance new_instance
;
2887 slp_tree node
= XNEW (struct _slp_tree
);
2888 unsigned int group_size
= DR_GROUP_SIZE (vinfo_for_stmt (stmt
));
2889 unsigned int unrolling_factor
= 1, nunits
;
2890 tree vectype
, scalar_type
, next
;
2891 unsigned int vectorization_factor
= 0, ncopies
;
2892 bool slp_impossible
= false;
2893 int inside_cost
= 0, outside_cost
= 0, ncopies_for_cost
;
2895 /* FORNOW: multiple types are not supported. */
2896 scalar_type
= TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))));
2897 vectype
= get_vectype_for_scalar_type (scalar_type
);
2898 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2899 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2900 ncopies
= vectorization_factor
/ nunits
;
2903 if (vect_print_dump_info (REPORT_SLP
))
2904 fprintf (vect_dump
, "SLP failed - multiple types ");
2909 /* Create a node (a root of the SLP tree) for the packed strided stores. */
2910 SLP_TREE_SCALAR_STMTS (node
) = VEC_alloc (tree
, heap
, group_size
);
2912 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2915 VEC_safe_push (tree
, heap
, SLP_TREE_SCALAR_STMTS (node
), next
);
2916 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
2919 SLP_TREE_VEC_STMTS (node
) = NULL
;
2920 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
2921 SLP_TREE_LEFT (node
) = NULL
;
2922 SLP_TREE_RIGHT (node
) = NULL
;
2923 SLP_TREE_OUTSIDE_OF_LOOP_COST (node
) = 0;
2924 SLP_TREE_INSIDE_OF_LOOP_COST (node
) = 0;
2926 /* Calculate the unrolling factor. */
2927 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
2929 /* Calculate the number of vector stmts to create based on the unrolling
2930 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
2931 GROUP_SIZE / NUNITS otherwise. */
2932 ncopies_for_cost
= unrolling_factor
* group_size
/ nunits
;
2934 /* Build the tree for the SLP instance. */
2935 if (vect_build_slp_tree (loop_vinfo
, &node
, group_size
, &slp_impossible
,
2936 &inside_cost
, &outside_cost
, ncopies_for_cost
))
2938 /* Create a new SLP instance. */
2939 new_instance
= XNEW (struct _slp_instance
);
2940 SLP_INSTANCE_TREE (new_instance
) = node
;
2941 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2942 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2943 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance
) = outside_cost
;
2944 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance
) = inside_cost
;
2945 VEC_safe_push (slp_instance
, heap
, LOOP_VINFO_SLP_INSTANCES (loop_vinfo
),
2947 if (vect_print_dump_info (REPORT_SLP
))
2948 vect_print_slp_tree (node
);
2953 /* Failed to SLP. */
2954 /* Free the allocated memory. */
2955 vect_free_slp_tree (node
);
2960 /* SLP failed for this instance, but it is still possible to SLP other stmts
2966 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2967 trees of packed scalar stmts if SLP is possible. */
2970 vect_analyze_slp (loop_vec_info loop_vinfo
)
2973 VEC (tree
, heap
) *strided_stores
= LOOP_VINFO_STRIDED_STORES (loop_vinfo
);
2976 if (vect_print_dump_info (REPORT_SLP
))
2977 fprintf (vect_dump
, "=== vect_analyze_slp ===");
2979 for (i
= 0; VEC_iterate (tree
, strided_stores
, i
, store
); i
++)
2980 if (!vect_analyze_slp_instance (loop_vinfo
, store
))
2982 /* SLP failed. No instance can be SLPed in the loop. */
2983 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2984 fprintf (vect_dump
, "SLP failed.");
2993 /* For each possible SLP instance decide whether to SLP it and calculate overall
2994 unrolling factor needed to SLP the loop. */
2997 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2999 unsigned int i
, unrolling_factor
= 1;
3000 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
3001 slp_instance instance
;
3002 int decided_to_slp
= 0;
3004 if (vect_print_dump_info (REPORT_SLP
))
3005 fprintf (vect_dump
, "=== vect_make_slp_decision ===");
3007 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
3009 /* FORNOW: SLP if you can. */
3010 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
3011 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
3013 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3014 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3015 loop-based vectorization. Such stmts will be marked as HYBRID. */
3016 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
3020 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
3022 if (decided_to_slp
&& vect_print_dump_info (REPORT_SLP
))
3023 fprintf (vect_dump
, "Decided to SLP %d instances. Unrolling factor %d",
3024 decided_to_slp
, unrolling_factor
);
3028 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3029 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3032 vect_detect_hybrid_slp_stmts (slp_tree node
)
3036 imm_use_iterator imm_iter
;
3042 for (i
= 0; VEC_iterate (tree
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
3043 if (PURE_SLP_STMT (vinfo_for_stmt (stmt
))
3044 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) == SSA_NAME
)
3045 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, GIMPLE_STMT_OPERAND (stmt
, 0))
3046 if (vinfo_for_stmt (use_stmt
)
3047 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt
)))
3048 vect_mark_slp_stmts (node
, hybrid
, i
);
3050 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node
));
3051 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node
));
3055 /* Find stmts that must be both vectorized and SLPed. */
3058 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
3061 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
3062 slp_instance instance
;
3064 if (vect_print_dump_info (REPORT_SLP
))
3065 fprintf (vect_dump
, "=== vect_detect_hybrid_slp ===");
3067 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
3068 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
));
3072 /* Function vect_analyze_data_refs.
3074 Find all the data references in the loop.
3076 The general structure of the analysis of data refs in the vectorizer is as
3078 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3079 find and analyze all data-refs in the loop and their dependences.
3080 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3081 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3082 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3087 vect_analyze_data_refs (loop_vec_info loop_vinfo
)
3089 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3091 VEC (data_reference_p
, heap
) *datarefs
;
3092 struct data_reference
*dr
;
3095 if (vect_print_dump_info (REPORT_DETAILS
))
3096 fprintf (vect_dump
, "=== vect_analyze_data_refs ===\n");
3098 compute_data_dependences_for_loop (loop
, true,
3099 &LOOP_VINFO_DATAREFS (loop_vinfo
),
3100 &LOOP_VINFO_DDRS (loop_vinfo
));
3102 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3103 from stmt_vec_info struct to DR and vectype. */
3104 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
3106 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
3109 stmt_vec_info stmt_info
;
3111 tree base
, offset
, init
;
3113 if (!dr
|| !DR_REF (dr
))
3115 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3116 fprintf (vect_dump
, "not vectorized: unhandled data-ref ");
3120 stmt
= DR_STMT (dr
);
3121 stmt_info
= vinfo_for_stmt (stmt
);
3123 /* Check that analysis of the data-ref succeeded. */
3124 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
3127 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3129 fprintf (vect_dump
, "not vectorized: data ref analysis failed ");
3130 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
3135 if (TREE_CODE (DR_BASE_ADDRESS (dr
)) == INTEGER_CST
)
3137 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3138 fprintf (vect_dump
, "not vectorized: base addr of dr is a "
3143 if (!DR_SYMBOL_TAG (dr
))
3145 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3147 fprintf (vect_dump
, "not vectorized: no memory tag for ");
3148 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
3153 base
= unshare_expr (DR_BASE_ADDRESS (dr
));
3154 offset
= unshare_expr (DR_OFFSET (dr
));
3155 init
= unshare_expr (DR_INIT (dr
));
3157 /* Update DR field in stmt_vec_info struct. */
3158 bb
= bb_for_stmt (stmt
);
3160 /* If the dataref is in an inner-loop of the loop that is considered for
3161 for vectorization, we also want to analyze the access relative to
3162 the outer-loop (DR contains information only relative to the
3163 inner-most enclosing loop). We do that by building a reference to the
3164 first location accessed by the inner-loop, and analyze it relative to
3166 if (nested_in_vect_loop_p (loop
, stmt
))
3168 tree outer_step
, outer_base
, outer_init
;
3169 HOST_WIDE_INT pbitsize
, pbitpos
;
3171 enum machine_mode pmode
;
3172 int punsignedp
, pvolatilep
;
3173 affine_iv base_iv
, offset_iv
;
3176 /* Build a reference to the first location accessed by the
3177 inner-loop: *(BASE+INIT). (The first location is actually
3178 BASE+INIT+OFFSET, but we add OFFSET separately later. */
3179 tree inner_base
= build_fold_indirect_ref
3180 (fold_build2 (PLUS_EXPR
, TREE_TYPE (base
), base
, init
));
3182 if (vect_print_dump_info (REPORT_DETAILS
))
3184 fprintf (dump_file
, "analyze in outer-loop: ");
3185 print_generic_expr (dump_file
, inner_base
, TDF_SLIM
);
3188 outer_base
= get_inner_reference (inner_base
, &pbitsize
, &pbitpos
,
3189 &poffset
, &pmode
, &punsignedp
, &pvolatilep
, false);
3190 gcc_assert (outer_base
!= NULL_TREE
);
3192 if (pbitpos
% BITS_PER_UNIT
!= 0)
3194 if (vect_print_dump_info (REPORT_DETAILS
))
3195 fprintf (dump_file
, "failed: bit offset alignment.\n");
3199 outer_base
= build_fold_addr_expr (outer_base
);
3200 if (!simple_iv (loop
, stmt
, outer_base
, &base_iv
, false))
3202 if (vect_print_dump_info (REPORT_DETAILS
))
3203 fprintf (dump_file
, "failed: evolution of base is not affine.\n");
3210 poffset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
, poffset
);
3217 offset_iv
.base
= ssize_int (0);
3218 offset_iv
.step
= ssize_int (0);
3220 else if (!simple_iv (loop
, stmt
, poffset
, &offset_iv
, false))
3222 if (vect_print_dump_info (REPORT_DETAILS
))
3223 fprintf (dump_file
, "evolution of offset is not affine.\n");
3227 outer_init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
3228 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
3229 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
3230 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
3231 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
3233 outer_step
= size_binop (PLUS_EXPR
,
3234 fold_convert (ssizetype
, base_iv
.step
),
3235 fold_convert (ssizetype
, offset_iv
.step
));
3237 STMT_VINFO_DR_STEP (stmt_info
) = outer_step
;
3238 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3239 STMT_VINFO_DR_BASE_ADDRESS (stmt_info
) = base_iv
.base
;
3240 STMT_VINFO_DR_INIT (stmt_info
) = outer_init
;
3241 STMT_VINFO_DR_OFFSET (stmt_info
) =
3242 fold_convert (ssizetype
, offset_iv
.base
);
3243 STMT_VINFO_DR_ALIGNED_TO (stmt_info
) =
3244 size_int (highest_pow2_factor (offset_iv
.base
));
3246 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3248 fprintf (dump_file
, "\touter base_address: ");
3249 print_generic_expr (dump_file
, STMT_VINFO_DR_BASE_ADDRESS (stmt_info
), TDF_SLIM
);
3250 fprintf (dump_file
, "\n\touter offset from base address: ");
3251 print_generic_expr (dump_file
, STMT_VINFO_DR_OFFSET (stmt_info
), TDF_SLIM
);
3252 fprintf (dump_file
, "\n\touter constant offset from base address: ");
3253 print_generic_expr (dump_file
, STMT_VINFO_DR_INIT (stmt_info
), TDF_SLIM
);
3254 fprintf (dump_file
, "\n\touter step: ");
3255 print_generic_expr (dump_file
, STMT_VINFO_DR_STEP (stmt_info
), TDF_SLIM
);
3256 fprintf (dump_file
, "\n\touter aligned to: ");
3257 print_generic_expr (dump_file
, STMT_VINFO_DR_ALIGNED_TO (stmt_info
), TDF_SLIM
);
3261 if (STMT_VINFO_DATA_REF (stmt_info
))
3263 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3266 "not vectorized: more than one data ref in stmt: ");
3267 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
3271 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
3273 /* Set vectype for STMT. */
3274 scalar_type
= TREE_TYPE (DR_REF (dr
));
3275 STMT_VINFO_VECTYPE (stmt_info
) =
3276 get_vectype_for_scalar_type (scalar_type
);
3277 if (!STMT_VINFO_VECTYPE (stmt_info
))
3279 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3282 "not vectorized: no vectype for stmt: ");
3283 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
3284 fprintf (vect_dump
, " scalar_type: ");
3285 print_generic_expr (vect_dump
, scalar_type
, TDF_DETAILS
);
3295 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3297 /* Function vect_mark_relevant.
3299 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3302 vect_mark_relevant (VEC(tree
,heap
) **worklist
, tree stmt
,
3303 enum vect_relevant relevant
, bool live_p
)
3305 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3306 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
3307 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
3309 if (vect_print_dump_info (REPORT_DETAILS
))
3310 fprintf (vect_dump
, "mark relevant %d, live %d.", relevant
, live_p
);
3312 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
3316 /* This is the last stmt in a sequence that was detected as a
3317 pattern that can potentially be vectorized. Don't mark the stmt
3318 as relevant/live because it's not going to be vectorized.
3319 Instead mark the pattern-stmt that replaces it. */
3321 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3323 if (vect_print_dump_info (REPORT_DETAILS
))
3324 fprintf (vect_dump
, "last stmt in pattern. don't mark relevant/live.");
3325 stmt_info
= vinfo_for_stmt (pattern_stmt
);
3326 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
3327 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
3328 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
3329 stmt
= pattern_stmt
;
3332 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
3333 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
3334 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
3336 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
3337 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
3339 if (vect_print_dump_info (REPORT_DETAILS
))
3340 fprintf (vect_dump
, "already marked relevant/live.");
3344 VEC_safe_push (tree
, heap
, *worklist
, stmt
);
3348 /* Function vect_stmt_relevant_p.
3350 Return true if STMT in loop that is represented by LOOP_VINFO is
3351 "relevant for vectorization".
3353 A stmt is considered "relevant for vectorization" if:
3354 - it has uses outside the loop.
3355 - it has vdefs (it alters memory).
3356 - control stmts in the loop (except for the exit condition).
3358 CHECKME: what other side effects would the vectorizer allow? */
3361 vect_stmt_relevant_p (tree stmt
, loop_vec_info loop_vinfo
,
3362 enum vect_relevant
*relevant
, bool *live_p
)
3364 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3365 ssa_op_iter op_iter
;
3366 imm_use_iterator imm_iter
;
3367 use_operand_p use_p
;
3368 def_operand_p def_p
;
3370 *relevant
= vect_unused_in_loop
;
3373 /* cond stmt other than loop exit cond. */
3374 if (is_ctrl_stmt (stmt
)
3375 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
)) != loop_exit_ctrl_vec_info_type
)
3376 *relevant
= vect_used_in_loop
;
3378 /* changing memory. */
3379 if (TREE_CODE (stmt
) != PHI_NODE
)
3380 if (!ZERO_SSA_OPERANDS (stmt
, SSA_OP_VIRTUAL_DEFS
))
3382 if (vect_print_dump_info (REPORT_DETAILS
))
3383 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
3384 *relevant
= vect_used_in_loop
;
3387 /* uses outside the loop. */
3388 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
3390 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
3392 basic_block bb
= bb_for_stmt (USE_STMT (use_p
));
3393 if (!flow_bb_inside_loop_p (loop
, bb
))
3395 if (vect_print_dump_info (REPORT_DETAILS
))
3396 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
3398 /* We expect all such uses to be in the loop exit phis
3399 (because of loop closed form) */
3400 gcc_assert (TREE_CODE (USE_STMT (use_p
)) == PHI_NODE
);
3401 gcc_assert (bb
== single_exit (loop
)->dest
);
3408 return (*live_p
|| *relevant
);
3413 Function process_use.
3416 - a USE in STMT in a loop represented by LOOP_VINFO
3417 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3418 that defined USE. This is dont by calling mark_relevant and passing it
3419 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
3422 Generally, LIVE_P and RELEVANT are used to define the liveness and
3423 relevance info of the DEF_STMT of this USE:
3424 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3425 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3427 - case 1: If USE is used only for address computations (e.g. array indexing),
3428 which does not need to be directly vectorized, then the liveness/relevance
3429 of the respective DEF_STMT is left unchanged.
3430 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3431 skip DEF_STMT cause it had already been processed.
3432 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3433 be modified accordingly.
3435 Return true if everything is as expected. Return false otherwise. */
3438 process_use (tree stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
3439 enum vect_relevant relevant
, VEC(tree
,heap
) **worklist
)
3441 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3442 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3443 stmt_vec_info dstmt_vinfo
;
3444 basic_block bb
, def_bb
;
3446 enum vect_def_type dt
;
3448 /* case 1: we are only interested in uses that need to be vectorized. Uses
3449 that are used for address computation are not considered relevant. */
3450 if (!exist_non_indexing_operands_for_use_p (use
, stmt
))
3453 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &def
, &dt
))
3455 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3456 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
3460 if (!def_stmt
|| IS_EMPTY_STMT (def_stmt
))
3463 def_bb
= bb_for_stmt (def_stmt
);
3464 if (!flow_bb_inside_loop_p (loop
, def_bb
))
3466 if (vect_print_dump_info (REPORT_DETAILS
))
3467 fprintf (vect_dump
, "def_stmt is out of loop.");
3471 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3472 DEF_STMT must have already been processed, because this should be the
3473 only way that STMT, which is a reduction-phi, was put in the worklist,
3474 as there should be no other uses for DEF_STMT in the loop. So we just
3475 check that everything is as expected, and we are done. */
3476 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
3477 bb
= bb_for_stmt (stmt
);
3478 if (TREE_CODE (stmt
) == PHI_NODE
3479 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
3480 && TREE_CODE (def_stmt
) != PHI_NODE
3481 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
3482 && bb
->loop_father
== def_bb
->loop_father
)
3484 if (vect_print_dump_info (REPORT_DETAILS
))
3485 fprintf (vect_dump
, "reduc-stmt defining reduc-phi in the same nest.");
3486 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
3487 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
3488 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
3489 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
3490 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_loop
);
3494 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3495 outer-loop-header-bb:
3501 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
3503 if (vect_print_dump_info (REPORT_DETAILS
))
3504 fprintf (vect_dump
, "outer-loop def-stmt defining inner-loop stmt.");
3507 case vect_unused_in_loop
:
3508 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
) ?
3509 vect_used_by_reduction
: vect_unused_in_loop
;
3511 case vect_used_in_outer_by_reduction
:
3512 relevant
= vect_used_by_reduction
;
3514 case vect_used_in_outer
:
3515 relevant
= vect_used_in_loop
;
3517 case vect_used_by_reduction
:
3518 case vect_used_in_loop
:
3526 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3527 outer-loop-header-bb:
3533 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
3535 if (vect_print_dump_info (REPORT_DETAILS
))
3536 fprintf (vect_dump
, "inner-loop def-stmt defining outer-loop stmt.");
3539 case vect_unused_in_loop
:
3540 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
) ?
3541 vect_used_in_outer_by_reduction
: vect_unused_in_loop
;
3544 case vect_used_in_outer_by_reduction
:
3545 case vect_used_in_outer
:
3548 case vect_used_by_reduction
:
3549 relevant
= vect_used_in_outer_by_reduction
;
3552 case vect_used_in_loop
:
3553 relevant
= vect_used_in_outer
;
3561 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
);
3566 /* Function vect_mark_stmts_to_be_vectorized.
3568 Not all stmts in the loop need to be vectorized. For example:
3577 Stmt 1 and 3 do not need to be vectorized, because loop control and
3578 addressing of vectorized data-refs are handled differently.
3580 This pass detects such stmts. */
3583 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
3585 VEC(tree
,heap
) *worklist
;
3586 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3587 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3588 unsigned int nbbs
= loop
->num_nodes
;
3589 block_stmt_iterator si
;
3593 stmt_vec_info stmt_vinfo
;
3597 enum vect_relevant relevant
;
3599 if (vect_print_dump_info (REPORT_DETAILS
))
3600 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
3602 worklist
= VEC_alloc (tree
, heap
, 64);
3604 /* 1. Init worklist. */
3605 for (i
= 0; i
< nbbs
; i
++)
3608 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
3610 if (vect_print_dump_info (REPORT_DETAILS
))
3612 fprintf (vect_dump
, "init: phi relevant? ");
3613 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
3616 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
3617 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
3619 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
3621 stmt
= bsi_stmt (si
);
3622 if (vect_print_dump_info (REPORT_DETAILS
))
3624 fprintf (vect_dump
, "init: stmt relevant? ");
3625 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
3628 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
3629 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
3633 /* 2. Process_worklist */
3634 while (VEC_length (tree
, worklist
) > 0)
3636 use_operand_p use_p
;
3639 stmt
= VEC_pop (tree
, worklist
);
3640 if (vect_print_dump_info (REPORT_DETAILS
))
3642 fprintf (vect_dump
, "worklist: examine stmt: ");
3643 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
3646 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3647 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3648 liveness and relevance properties of STMT. */
3649 ann
= stmt_ann (stmt
);
3650 stmt_vinfo
= vinfo_for_stmt (stmt
);
3651 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
3652 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
3654 /* Generally, the liveness and relevance properties of STMT are
3655 propagated as is to the DEF_STMTs of its USEs:
3656 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3657 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3659 One exception is when STMT has been identified as defining a reduction
3660 variable; in this case we set the liveness/relevance as follows:
3662 relevant = vect_used_by_reduction
3663 This is because we distinguish between two kinds of relevant stmts -
3664 those that are used by a reduction computation, and those that are
3665 (also) used by a regular computation. This allows us later on to
3666 identify stmts that are used solely by a reduction, and therefore the
3667 order of the results that they produce does not have to be kept.
3669 Reduction phis are expected to be used by a reduction stmt, or by
3670 in an outer loop; Other reduction stmts are expected to be
3671 in the loop, and possibly used by a stmt in an outer loop.
3672 Here are the expected values of "relevant" for reduction phis/stmts:
3675 vect_unused_in_loop ok
3676 vect_used_in_outer_by_reduction ok ok
3677 vect_used_in_outer ok ok
3678 vect_used_by_reduction ok
3679 vect_used_in_loop */
3681 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
)
3683 enum vect_relevant tmp_relevant
= relevant
;
3684 switch (tmp_relevant
)
3686 case vect_unused_in_loop
:
3687 gcc_assert (TREE_CODE (stmt
) != PHI_NODE
);
3688 relevant
= vect_used_by_reduction
;
3691 case vect_used_in_outer_by_reduction
:
3692 case vect_used_in_outer
:
3693 gcc_assert (TREE_CODE (stmt
) != WIDEN_SUM_EXPR
3694 && TREE_CODE (stmt
) != DOT_PROD_EXPR
);
3697 case vect_used_by_reduction
:
3698 if (TREE_CODE (stmt
) == PHI_NODE
)
3701 case vect_used_in_loop
:
3703 if (vect_print_dump_info (REPORT_DETAILS
))
3704 fprintf (vect_dump
, "unsupported use of reduction.");
3705 VEC_free (tree
, heap
, worklist
);
3711 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
3713 tree op
= USE_FROM_PTR (use_p
);
3714 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
, &worklist
))
3716 VEC_free (tree
, heap
, worklist
);
3720 } /* while worklist */
3722 VEC_free (tree
, heap
, worklist
);
3727 /* Function vect_can_advance_ivs_p
3729 In case the number of iterations that LOOP iterates is unknown at compile
3730 time, an epilog loop will be generated, and the loop induction variables
3731 (IVs) will be "advanced" to the value they are supposed to take just before
3732 the epilog loop. Here we check that the access function of the loop IVs
3733 and the expression that represents the loop bound are simple enough.
3734 These restrictions will be relaxed in the future. */
3737 vect_can_advance_ivs_p (loop_vec_info loop_vinfo
)
3739 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3740 basic_block bb
= loop
->header
;
3743 /* Analyze phi functions of the loop header. */
3745 if (vect_print_dump_info (REPORT_DETAILS
))
3746 fprintf (vect_dump
, "vect_can_advance_ivs_p:");
3748 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
3750 tree access_fn
= NULL
;
3751 tree evolution_part
;
3753 if (vect_print_dump_info (REPORT_DETAILS
))
3755 fprintf (vect_dump
, "Analyze phi: ");
3756 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
3759 /* Skip virtual phi's. The data dependences that are associated with
3760 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3762 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
3764 if (vect_print_dump_info (REPORT_DETAILS
))
3765 fprintf (vect_dump
, "virtual phi. skip.");
3769 /* Skip reduction phis. */
3771 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
3773 if (vect_print_dump_info (REPORT_DETAILS
))
3774 fprintf (vect_dump
, "reduc phi. skip.");
3778 /* Analyze the evolution function. */
3780 access_fn
= instantiate_parameters
3781 (loop
, analyze_scalar_evolution (loop
, PHI_RESULT (phi
)));
3785 if (vect_print_dump_info (REPORT_DETAILS
))
3786 fprintf (vect_dump
, "No Access function.");
3790 if (vect_print_dump_info (REPORT_DETAILS
))
3792 fprintf (vect_dump
, "Access function of PHI: ");
3793 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
3796 evolution_part
= evolution_part_in_loop_num (access_fn
, loop
->num
);
3798 if (evolution_part
== NULL_TREE
)
3800 if (vect_print_dump_info (REPORT_DETAILS
))
3801 fprintf (vect_dump
, "No evolution.");
3805 /* FORNOW: We do not transform initial conditions of IVs
3806 which evolution functions are a polynomial of degree >= 2. */
3808 if (tree_is_chrec (evolution_part
))
3816 /* Function vect_get_loop_niters.
3818 Determine how many iterations the loop is executed.
3819 If an expression that represents the number of iterations
3820 can be constructed, place it in NUMBER_OF_ITERATIONS.
3821 Return the loop exit condition. */
3824 vect_get_loop_niters (struct loop
*loop
, tree
*number_of_iterations
)
3828 if (vect_print_dump_info (REPORT_DETAILS
))
3829 fprintf (vect_dump
, "=== get_loop_niters ===");
3831 niters
= number_of_exit_cond_executions (loop
);
3833 if (niters
!= NULL_TREE
3834 && niters
!= chrec_dont_know
)
3836 *number_of_iterations
= niters
;
3838 if (vect_print_dump_info (REPORT_DETAILS
))
3840 fprintf (vect_dump
, "==> get_loop_niters:" );
3841 print_generic_expr (vect_dump
, *number_of_iterations
, TDF_SLIM
);
3845 return get_loop_exit_condition (loop
);
3849 /* Function vect_analyze_loop_1.
3851 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3852 for it. The different analyses will record information in the
3853 loop_vec_info struct. This is a subset of the analyses applied in
3854 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3855 that is now considered for (outer-loop) vectorization. */
3857 static loop_vec_info
3858 vect_analyze_loop_1 (struct loop
*loop
)
3860 loop_vec_info loop_vinfo
;
3862 if (vect_print_dump_info (REPORT_DETAILS
))
3863 fprintf (vect_dump
, "===== analyze_loop_nest_1 =====");
3865 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3867 loop_vinfo
= vect_analyze_loop_form (loop
);
3870 if (vect_print_dump_info (REPORT_DETAILS
))
3871 fprintf (vect_dump
, "bad inner-loop form.");
3879 /* Function vect_analyze_loop_form.
3881 Verify that certain CFG restrictions hold, including:
3882 - the loop has a pre-header
3883 - the loop has a single entry and exit
3884 - the loop exit condition is simple enough, and the number of iterations
3885 can be analyzed (a countable loop). */
3887 static loop_vec_info
3888 vect_analyze_loop_form (struct loop
*loop
)
3890 loop_vec_info loop_vinfo
;
3892 tree number_of_iterations
= NULL
;
3893 loop_vec_info inner_loop_vinfo
= NULL
;
3895 if (vect_print_dump_info (REPORT_DETAILS
))
3896 fprintf (vect_dump
, "=== vect_analyze_loop_form ===");
3898 /* Different restrictions apply when we are considering an inner-most loop,
3899 vs. an outer (nested) loop.
3900 (FORNOW. May want to relax some of these restrictions in the future). */
3904 /* Inner-most loop. We currently require that the number of BBs is
3905 exactly 2 (the header and latch). Vectorizable inner-most loops
3916 if (loop
->num_nodes
!= 2)
3918 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3919 fprintf (vect_dump
, "not vectorized: too many BBs in loop.");
3923 if (empty_block_p (loop
->header
))
3925 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3926 fprintf (vect_dump
, "not vectorized: empty loop.");
3932 struct loop
*innerloop
= loop
->inner
;
3933 edge backedge
, entryedge
;
3935 /* Nested loop. We currently require that the loop is doubly-nested,
3936 contains a single inner loop, and the number of BBs is exactly 5.
3937 Vectorizable outer-loops look like this:
3949 The inner-loop has the properties expected of inner-most loops
3950 as described above. */
3952 if ((loop
->inner
)->inner
|| (loop
->inner
)->next
)
3954 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3955 fprintf (vect_dump
, "not vectorized: multiple nested loops.");
3959 /* Analyze the inner-loop. */
3960 inner_loop_vinfo
= vect_analyze_loop_1 (loop
->inner
);
3961 if (!inner_loop_vinfo
)
3963 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3964 fprintf (vect_dump
, "not vectorized: Bad inner loop.");
3968 if (!expr_invariant_in_loop_p (loop
,
3969 LOOP_VINFO_NITERS (inner_loop_vinfo
)))
3971 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3973 "not vectorized: inner-loop count not invariant.");
3974 destroy_loop_vec_info (inner_loop_vinfo
, true);
3978 if (loop
->num_nodes
!= 5)
3980 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
3981 fprintf (vect_dump
, "not vectorized: too many BBs in loop.");
3982 destroy_loop_vec_info (inner_loop_vinfo
, true);
3986 gcc_assert (EDGE_COUNT (innerloop
->header
->preds
) == 2);
3987 backedge
= EDGE_PRED (innerloop
->header
, 1);
3988 entryedge
= EDGE_PRED (innerloop
->header
, 0);
3989 if (EDGE_PRED (innerloop
->header
, 0)->src
== innerloop
->latch
)
3991 backedge
= EDGE_PRED (innerloop
->header
, 0);
3992 entryedge
= EDGE_PRED (innerloop
->header
, 1);
3995 if (entryedge
->src
!= loop
->header
3996 || !single_exit (innerloop
)
3997 || single_exit (innerloop
)->dest
!= EDGE_PRED (loop
->latch
, 0)->src
)
3999 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4000 fprintf (vect_dump
, "not vectorized: unsupported outerloop form.");
4001 destroy_loop_vec_info (inner_loop_vinfo
, true);
4005 if (vect_print_dump_info (REPORT_DETAILS
))
4006 fprintf (vect_dump
, "Considering outer-loop vectorization.");
4009 if (!single_exit (loop
)
4010 || EDGE_COUNT (loop
->header
->preds
) != 2)
4012 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4014 if (!single_exit (loop
))
4015 fprintf (vect_dump
, "not vectorized: multiple exits.");
4016 else if (EDGE_COUNT (loop
->header
->preds
) != 2)
4017 fprintf (vect_dump
, "not vectorized: too many incoming edges.");
4019 if (inner_loop_vinfo
)
4020 destroy_loop_vec_info (inner_loop_vinfo
, true);
4024 /* We assume that the loop exit condition is at the end of the loop. i.e,
4025 that the loop is represented as a do-while (with a proper if-guard
4026 before the loop if needed), where the loop header contains all the
4027 executable statements, and the latch is empty. */
4028 if (!empty_block_p (loop
->latch
)
4029 || phi_nodes (loop
->latch
))
4031 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4032 fprintf (vect_dump
, "not vectorized: unexpected loop form.");
4033 if (inner_loop_vinfo
)
4034 destroy_loop_vec_info (inner_loop_vinfo
, true);
4038 /* Make sure there exists a single-predecessor exit bb: */
4039 if (!single_pred_p (single_exit (loop
)->dest
))
4041 edge e
= single_exit (loop
);
4042 if (!(e
->flags
& EDGE_ABNORMAL
))
4044 split_loop_exit_edge (e
);
4045 if (vect_print_dump_info (REPORT_DETAILS
))
4046 fprintf (vect_dump
, "split exit edge.");
4050 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4051 fprintf (vect_dump
, "not vectorized: abnormal loop exit edge.");
4052 if (inner_loop_vinfo
)
4053 destroy_loop_vec_info (inner_loop_vinfo
, true);
4058 loop_cond
= vect_get_loop_niters (loop
, &number_of_iterations
);
4061 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4062 fprintf (vect_dump
, "not vectorized: complicated exit condition.");
4063 if (inner_loop_vinfo
)
4064 destroy_loop_vec_info (inner_loop_vinfo
, true);
4068 if (!number_of_iterations
)
4070 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4072 "not vectorized: number of iterations cannot be computed.");
4073 if (inner_loop_vinfo
)
4074 destroy_loop_vec_info (inner_loop_vinfo
, true);
4078 if (chrec_contains_undetermined (number_of_iterations
))
4080 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4081 fprintf (vect_dump
, "Infinite number of iterations.");
4082 if (inner_loop_vinfo
)
4083 destroy_loop_vec_info (inner_loop_vinfo
, true);
4087 if (!NITERS_KNOWN_P (number_of_iterations
))
4089 if (vect_print_dump_info (REPORT_DETAILS
))
4091 fprintf (vect_dump
, "Symbolic number of iterations is ");
4092 print_generic_expr (vect_dump
, number_of_iterations
, TDF_DETAILS
);
4095 else if (TREE_INT_CST_LOW (number_of_iterations
) == 0)
4097 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
4098 fprintf (vect_dump
, "not vectorized: number of iterations = 0.");
4099 if (inner_loop_vinfo
)
4100 destroy_loop_vec_info (inner_loop_vinfo
, false);
4104 loop_vinfo
= new_loop_vec_info (loop
);
4105 LOOP_VINFO_NITERS (loop_vinfo
) = number_of_iterations
;
4107 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond
)) = loop_exit_ctrl_vec_info_type
;
4109 /* CHECKME: May want to keep it around it in the future. */
4110 if (inner_loop_vinfo
)
4111 destroy_loop_vec_info (inner_loop_vinfo
, false);
4113 gcc_assert (!loop
->aux
);
4114 loop
->aux
= loop_vinfo
;
4119 /* Function vect_analyze_loop.
4121 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4122 for it. The different analyses will record information in the
4123 loop_vec_info struct. */
4125 vect_analyze_loop (struct loop
*loop
)
4128 loop_vec_info loop_vinfo
;
4130 if (vect_print_dump_info (REPORT_DETAILS
))
4131 fprintf (vect_dump
, "===== analyze_loop_nest =====");
4133 if (loop_outer (loop
)
4134 && loop_vec_info_for_loop (loop_outer (loop
))
4135 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop
))))
4137 if (vect_print_dump_info (REPORT_DETAILS
))
4138 fprintf (vect_dump
, "outer-loop already vectorized.");
4142 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4144 loop_vinfo
= vect_analyze_loop_form (loop
);
4147 if (vect_print_dump_info (REPORT_DETAILS
))
4148 fprintf (vect_dump
, "bad loop form.");
4152 /* Find all data references in the loop (which correspond to vdefs/vuses)
4153 and analyze their evolution in the loop.
4155 FORNOW: Handle only simple, array references, which
4156 alignment can be forced, and aligned pointer-references. */
4158 ok
= vect_analyze_data_refs (loop_vinfo
);
4161 if (vect_print_dump_info (REPORT_DETAILS
))
4162 fprintf (vect_dump
, "bad data references.");
4163 destroy_loop_vec_info (loop_vinfo
, true);
4167 /* Classify all cross-iteration scalar data-flow cycles.
4168 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4170 vect_analyze_scalar_cycles (loop_vinfo
);
4172 vect_pattern_recog (loop_vinfo
);
4174 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4176 ok
= vect_mark_stmts_to_be_vectorized (loop_vinfo
);
4179 if (vect_print_dump_info (REPORT_DETAILS
))
4180 fprintf (vect_dump
, "unexpected pattern.");
4181 destroy_loop_vec_info (loop_vinfo
, true);
4185 /* Analyze the alignment of the data-refs in the loop.
4186 Fail if a data reference is found that cannot be vectorized. */
4188 ok
= vect_analyze_data_refs_alignment (loop_vinfo
);
4191 if (vect_print_dump_info (REPORT_DETAILS
))
4192 fprintf (vect_dump
, "bad data alignment.");
4193 destroy_loop_vec_info (loop_vinfo
, true);
4197 ok
= vect_determine_vectorization_factor (loop_vinfo
);
4200 if (vect_print_dump_info (REPORT_DETAILS
))
4201 fprintf (vect_dump
, "can't determine vectorization factor.");
4202 destroy_loop_vec_info (loop_vinfo
, true);
4206 /* Analyze data dependences between the data-refs in the loop.
4207 FORNOW: fail at the first data dependence that we encounter. */
4209 ok
= vect_analyze_data_ref_dependences (loop_vinfo
);
4212 if (vect_print_dump_info (REPORT_DETAILS
))
4213 fprintf (vect_dump
, "bad data dependence.");
4214 destroy_loop_vec_info (loop_vinfo
, true);
4218 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4219 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4221 ok
= vect_analyze_data_ref_accesses (loop_vinfo
);
4224 if (vect_print_dump_info (REPORT_DETAILS
))
4225 fprintf (vect_dump
, "bad data access.");
4226 destroy_loop_vec_info (loop_vinfo
, true);
4230 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4231 ok
= vect_analyze_slp (loop_vinfo
);
4234 /* Decide which possible SLP instances to SLP. */
4235 vect_make_slp_decision (loop_vinfo
);
4237 /* Find stmts that need to be both vectorized and SLPed. */
4238 vect_detect_hybrid_slp (loop_vinfo
);
4241 /* This pass will decide on using loop versioning and/or loop peeling in
4242 order to enhance the alignment of data references in the loop. */
4244 ok
= vect_enhance_data_refs_alignment (loop_vinfo
);
4247 if (vect_print_dump_info (REPORT_DETAILS
))
4248 fprintf (vect_dump
, "bad data alignment.");
4249 destroy_loop_vec_info (loop_vinfo
, true);
4253 /* Scan all the operations in the loop and make sure they are
4256 ok
= vect_analyze_operations (loop_vinfo
);
4259 if (vect_print_dump_info (REPORT_DETAILS
))
4260 fprintf (vect_dump
, "bad operation or unsupported loop bound.");
4261 destroy_loop_vec_info (loop_vinfo
, true);
4265 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
) = 1;