1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
42 /* Main analysis functions. */
43 static loop_vec_info
vect_analyze_loop_form (struct loop
*);
44 static bool vect_analyze_data_refs (loop_vec_info
);
45 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info
);
46 static void vect_analyze_scalar_cycles (loop_vec_info
);
47 static bool vect_analyze_data_ref_accesses (loop_vec_info
);
48 static bool vect_analyze_data_ref_dependences (loop_vec_info
);
49 static bool vect_analyze_data_refs_alignment (loop_vec_info
);
50 static bool vect_compute_data_refs_alignment (loop_vec_info
);
51 static bool vect_enhance_data_refs_alignment (loop_vec_info
);
52 static bool vect_analyze_operations (loop_vec_info
);
53 static bool vect_determine_vectorization_factor (loop_vec_info
);
55 /* Utility functions for the analyses. */
56 static bool exist_non_indexing_operands_for_use_p (tree
, tree
);
57 static void vect_mark_relevant (VEC(tree
,heap
) **, tree
, bool, bool);
58 static bool vect_stmt_relevant_p (tree
, loop_vec_info
, bool *, bool *);
59 static tree
vect_get_loop_niters (struct loop
*, tree
*);
60 static bool vect_analyze_data_ref_dependence
61 (struct data_dependence_relation
*, loop_vec_info
);
62 static bool vect_compute_data_ref_alignment (struct data_reference
*);
63 static bool vect_analyze_data_ref_access (struct data_reference
*);
64 static bool vect_can_advance_ivs_p (loop_vec_info
);
65 static void vect_update_misalignment_for_peel
66 (struct data_reference
*, struct data_reference
*, int npeel
);
69 /* Function vect_determine_vectorization_factor
71 Determine the vectorization factor (VF). VF is the number of data elements
72 that are operated upon in parallel in a single iteration of the vectorized
73 loop. For example, when vectorizing a loop that operates on 4byte elements,
74 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
75 elements can fit in a single vector register.
77 We currently support vectorization of loops in which all types operated upon
78 are of the same size. Therefore this function currently sets VF according to
79 the size of the types operated upon, and fails if there are multiple sizes
82 VF is also the factor by which the loop iterations are strip-mined, e.g.:
89 for (i=0; i<N; i+=VF){
90 a[i:VF] = b[i:VF] + c[i:VF];
95 vect_determine_vectorization_factor (loop_vec_info loop_vinfo
)
97 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
98 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
99 int nbbs
= loop
->num_nodes
;
100 block_stmt_iterator si
;
101 unsigned int vectorization_factor
= 0;
105 if (vect_print_dump_info (REPORT_DETAILS
))
106 fprintf (vect_dump
, "=== vect_determine_vectorization_factor ===");
108 for (i
= 0; i
< nbbs
; i
++)
110 basic_block bb
= bbs
[i
];
112 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
114 tree stmt
= bsi_stmt (si
);
116 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
119 if (vect_print_dump_info (REPORT_DETAILS
))
121 fprintf (vect_dump
, "==> examining statement: ");
122 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
125 gcc_assert (stmt_info
);
126 /* skip stmts which do not need to be vectorized. */
127 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
128 && !STMT_VINFO_LIVE_P (stmt_info
))
130 if (vect_print_dump_info (REPORT_DETAILS
))
131 fprintf (vect_dump
, "skip.");
135 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))))
137 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
139 fprintf (vect_dump
, "not vectorized: vector stmt in loop:");
140 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
145 if (STMT_VINFO_DATA_REF (stmt_info
))
146 scalar_type
= TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info
)));
147 else if (TREE_CODE (stmt
) == MODIFY_EXPR
)
148 scalar_type
= TREE_TYPE (TREE_OPERAND (stmt
, 0));
150 scalar_type
= TREE_TYPE (stmt
);
152 if (vect_print_dump_info (REPORT_DETAILS
))
154 fprintf (vect_dump
, "get vectype for scalar type: ");
155 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
158 vectype
= get_vectype_for_scalar_type (scalar_type
);
161 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
163 fprintf (vect_dump
, "not vectorized: unsupported data-type ");
164 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
168 if (vect_print_dump_info (REPORT_DETAILS
))
170 fprintf (vect_dump
, "vectype: ");
171 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
173 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
175 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
176 if (vect_print_dump_info (REPORT_DETAILS
))
177 fprintf (vect_dump
, "nunits = %d", nunits
);
179 if (vectorization_factor
)
181 /* FORNOW: don't allow mixed units.
182 This restriction will be relaxed in the future. */
183 if (nunits
!= vectorization_factor
)
185 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
186 fprintf (vect_dump
, "not vectorized: mixed data-types");
191 vectorization_factor
= nunits
;
193 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type
))
194 * vectorization_factor
== UNITS_PER_SIMD_WORD
);
198 /* TODO: Analyze cost. Decide if worth while to vectorize. */
200 if (vectorization_factor
<= 1)
202 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
203 fprintf (vect_dump
, "not vectorized: unsupported data-type");
206 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
212 /* Function vect_analyze_operations.
214 Scan the loop stmts and make sure they are all vectorizable. */
217 vect_analyze_operations (loop_vec_info loop_vinfo
)
219 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
220 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
221 int nbbs
= loop
->num_nodes
;
222 block_stmt_iterator si
;
223 unsigned int vectorization_factor
= 0;
227 stmt_vec_info stmt_info
;
228 bool need_to_vectorize
= false;
230 if (vect_print_dump_info (REPORT_DETAILS
))
231 fprintf (vect_dump
, "=== vect_analyze_operations ===");
233 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
234 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
236 for (i
= 0; i
< nbbs
; i
++)
238 basic_block bb
= bbs
[i
];
240 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
242 stmt_info
= vinfo_for_stmt (phi
);
243 if (vect_print_dump_info (REPORT_DETAILS
))
245 fprintf (vect_dump
, "examining phi: ");
246 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
249 gcc_assert (stmt_info
);
251 if (STMT_VINFO_LIVE_P (stmt_info
))
253 /* FORNOW: not yet supported. */
254 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
255 fprintf (vect_dump
, "not vectorized: value used after loop.");
259 if (STMT_VINFO_RELEVANT_P (stmt_info
))
261 /* Most likely a reduction-like computation that is used
263 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
264 fprintf (vect_dump
, "not vectorized: unsupported pattern.");
269 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
271 tree stmt
= bsi_stmt (si
);
272 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
274 if (vect_print_dump_info (REPORT_DETAILS
))
276 fprintf (vect_dump
, "==> examining statement: ");
277 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
280 gcc_assert (stmt_info
);
282 /* skip stmts which do not need to be vectorized.
283 this is expected to include:
284 - the COND_EXPR which is the loop exit condition
285 - any LABEL_EXPRs in the loop
286 - computations that are used only for array indexing or loop
289 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
290 && !STMT_VINFO_LIVE_P (stmt_info
))
292 if (vect_print_dump_info (REPORT_DETAILS
))
293 fprintf (vect_dump
, "irrelevant.");
297 if (STMT_VINFO_RELEVANT_P (stmt_info
))
299 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))));
300 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
302 ok
= (vectorizable_operation (stmt
, NULL
, NULL
)
303 || vectorizable_assignment (stmt
, NULL
, NULL
)
304 || vectorizable_load (stmt
, NULL
, NULL
)
305 || vectorizable_store (stmt
, NULL
, NULL
)
306 || vectorizable_condition (stmt
, NULL
, NULL
));
310 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
313 "not vectorized: relevant stmt not supported: ");
314 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
318 need_to_vectorize
= true;
321 if (STMT_VINFO_LIVE_P (stmt_info
))
323 ok
= vectorizable_reduction (stmt
, NULL
, NULL
);
326 need_to_vectorize
= true;
328 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
332 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
335 "not vectorized: live stmt not supported: ");
336 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
344 /* TODO: Analyze cost. Decide if worth while to vectorize. */
346 /* All operations in the loop are either irrelevant (deal with loop
347 control, or dead), or only used outside the loop and can be moved
348 out of the loop (e.g. invariants, inductions). The loop can be
349 optimized away by scalar optimizations. We're better off not
350 touching this loop. */
351 if (!need_to_vectorize
)
353 if (vect_print_dump_info (REPORT_DETAILS
))
355 "All the computation can be taken out of the loop.");
356 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
358 "not vectorized: redundant loop. no profit to vectorize.");
362 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
363 && vect_print_dump_info (REPORT_DETAILS
))
365 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC
,
366 vectorization_factor
, LOOP_VINFO_INT_NITERS (loop_vinfo
));
368 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
369 && LOOP_VINFO_INT_NITERS (loop_vinfo
) < vectorization_factor
)
371 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
372 fprintf (vect_dump
, "not vectorized: iteration count too small.");
376 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
377 || LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0
378 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
380 if (vect_print_dump_info (REPORT_DETAILS
))
381 fprintf (vect_dump
, "epilog loop required.");
382 if (!vect_can_advance_ivs_p (loop_vinfo
))
384 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
386 "not vectorized: can't create epilog loop 1.");
389 if (!slpeel_can_duplicate_loop_p (loop
, loop
->single_exit
))
391 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
393 "not vectorized: can't create epilog loop 2.");
402 /* Function exist_non_indexing_operands_for_use_p
404 USE is one of the uses attached to STMT. Check if USE is
405 used in STMT for anything other than indexing an array. */
408 exist_non_indexing_operands_for_use_p (tree use
, tree stmt
)
411 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
413 /* USE corresponds to some operand in STMT. If there is no data
414 reference in STMT, then any operand that corresponds to USE
415 is not indexing an array. */
416 if (!STMT_VINFO_DATA_REF (stmt_info
))
419 /* STMT has a data_ref. FORNOW this means that its of one of
423 (This should have been verified in analyze_data_refs).
425 'var' in the second case corresponds to a def, not a use,
426 so USE cannot correspond to any operands that are not used
429 Therefore, all we need to check is if STMT falls into the
430 first case, and whether var corresponds to USE. */
432 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) == SSA_NAME
)
435 operand
= TREE_OPERAND (stmt
, 1);
437 if (TREE_CODE (operand
) != SSA_NAME
)
447 /* Function vect_analyze_scalar_cycles.
449 Examine the cross iteration def-use cycles of scalar variables, by
450 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
451 following: invariant, induction, reduction, unknown.
453 Some forms of scalar cycles are not yet supported.
455 Example1: reduction: (unsupported yet)
461 Example2: induction: (unsupported yet)
467 Note: the following loop *is* vectorizable:
473 even though it has a def-use cycle caused by the induction variable i:
475 loop: i_2 = PHI (i_0, i_1)
480 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
481 it does not need to be vectorized because it is only used for array
482 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
483 loop2 on the other hand is relevant (it is being written to memory).
487 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo
)
490 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
491 basic_block bb
= loop
->header
;
494 if (vect_print_dump_info (REPORT_DETAILS
))
495 fprintf (vect_dump
, "=== vect_analyze_scalar_cycles ===");
497 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
499 tree access_fn
= NULL
;
500 tree def
= PHI_RESULT (phi
);
501 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
504 if (vect_print_dump_info (REPORT_DETAILS
))
506 fprintf (vect_dump
, "Analyze phi: ");
507 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
510 /* Skip virtual phi's. The data dependences that are associated with
511 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
513 if (!is_gimple_reg (SSA_NAME_VAR (def
)))
515 if (vect_print_dump_info (REPORT_DETAILS
))
516 fprintf (vect_dump
, "virtual phi. skip.");
520 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_unknown_def_type
;
522 /* Analyze the evolution function. */
524 access_fn
= analyze_scalar_evolution (loop
, def
);
529 if (vect_print_dump_info (REPORT_DETAILS
))
531 fprintf (vect_dump
, "Access function of PHI: ");
532 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
535 if (vect_is_simple_iv_evolution (loop
->num
, access_fn
, &dummy
, &dummy
))
537 if (vect_print_dump_info (REPORT_DETAILS
))
538 fprintf (vect_dump
, "Detected induction.");
539 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_induction_def
;
543 /* TODO: handle invariant phis */
545 reduc_stmt
= vect_is_simple_reduction (loop
, phi
);
548 if (vect_print_dump_info (REPORT_DETAILS
))
549 fprintf (vect_dump
, "Detected reduction.");
550 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_reduction_def
;
551 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
555 if (vect_print_dump_info (REPORT_DETAILS
))
556 fprintf (vect_dump
, "Unknown def-use cycle pattern.");
564 /* Function vect_analyze_data_ref_dependence.
566 Return TRUE if there (might) exist a dependence between a memory-reference
567 DRA and a memory-reference DRB. */
570 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
571 loop_vec_info loop_vinfo
)
573 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
574 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
576 unsigned int loop_depth
= 0;
577 struct loop
*loop_nest
= loop
;
578 struct data_reference
*dra
= DDR_A (ddr
);
579 struct data_reference
*drb
= DDR_B (ddr
);
580 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
581 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
583 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
586 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
588 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
591 "not vectorized: can't determine dependence between ");
592 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
593 fprintf (vect_dump
, " and ");
594 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
599 if (!DDR_DIST_VECT (ddr
))
601 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
603 fprintf (vect_dump
, "not vectorized: bad dist vector for ");
604 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
605 fprintf (vect_dump
, " and ");
606 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
611 /* Find loop depth. */
612 while (loop_nest
&& loop_nest
->outer
&& loop_nest
->outer
->outer
)
614 loop_nest
= loop_nest
->outer
;
618 dist
= DDR_DIST_VECT (ddr
)[loop_depth
];
619 if (vect_print_dump_info (REPORT_DR_DETAILS
))
620 fprintf (vect_dump
, "dependence distance = %d.",dist
);
622 /* Same loop iteration. */
623 if (dist
% vectorization_factor
== 0)
625 /* Two references with distance zero have the same alignment. */
626 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
), drb
);
627 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
), dra
);
628 if (vect_print_dump_info (REPORT_ALIGNMENT
))
629 fprintf (vect_dump
, "accesses have the same alignment.");
630 if (vect_print_dump_info (REPORT_DR_DETAILS
))
632 fprintf (vect_dump
, "dependence distance modulo vf == 0 between ");
633 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
634 fprintf (vect_dump
, " and ");
635 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
640 if (abs (dist
) >= vectorization_factor
)
642 /* Dependence distance does not create dependence, as far as vectorization
643 is concerned, in this case. */
644 if (vect_print_dump_info (REPORT_DR_DETAILS
))
645 fprintf (vect_dump
, "dependence distance >= VF.");
649 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
652 "not vectorized: possible dependence between data-refs ");
653 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
654 fprintf (vect_dump
, " and ");
655 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
662 /* Function vect_analyze_data_ref_dependences.
664 Examine all the data references in the loop, and make sure there do not
665 exist any data dependences between them. */
668 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
)
671 varray_type ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
673 if (vect_print_dump_info (REPORT_DETAILS
))
674 fprintf (vect_dump
, "=== vect_analyze_dependences ===");
676 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (ddrs
); i
++)
678 struct data_dependence_relation
*ddr
= VARRAY_GENERIC_PTR (ddrs
, i
);
680 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
))
688 /* Function vect_compute_data_ref_alignment
690 Compute the misalignment of the data reference DR.
693 1. If during the misalignment computation it is found that the data reference
694 cannot be vectorized then false is returned.
695 2. DR_MISALIGNMENT (DR) is defined.
697 FOR NOW: No analysis is actually performed. Misalignment is calculated
698 only for trivial cases. TODO. */
701 vect_compute_data_ref_alignment (struct data_reference
*dr
)
703 tree stmt
= DR_STMT (dr
);
704 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
705 tree ref
= DR_REF (dr
);
707 tree base
, base_addr
;
710 tree aligned_to
, alignment
;
712 if (vect_print_dump_info (REPORT_DETAILS
))
713 fprintf (vect_dump
, "vect_compute_data_ref_alignment:");
715 /* Initialize misalignment to unknown. */
716 DR_MISALIGNMENT (dr
) = -1;
718 misalign
= DR_OFFSET_MISALIGNMENT (dr
);
719 aligned_to
= DR_ALIGNED_TO (dr
);
720 base_addr
= DR_BASE_ADDRESS (dr
);
721 base
= build_fold_indirect_ref (base_addr
);
722 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
723 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
725 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
728 if (vect_print_dump_info (REPORT_DETAILS
))
730 fprintf (vect_dump
, "Unknown alignment for access: ");
731 print_generic_expr (vect_dump
, base
, TDF_SLIM
);
737 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
739 || (TREE_CODE (base_addr
) == SSA_NAME
740 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
741 TREE_TYPE (base_addr
)))),
745 base_aligned
= false;
749 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
)))
751 if (vect_print_dump_info (REPORT_DETAILS
))
753 fprintf (vect_dump
, "can't force alignment of ref: ");
754 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
759 /* Force the alignment of the decl.
760 NOTE: This is the only change to the code we make during
761 the analysis phase, before deciding to vectorize the loop. */
762 if (vect_print_dump_info (REPORT_DETAILS
))
763 fprintf (vect_dump
, "force alignment");
764 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
765 DECL_USER_ALIGN (base
) = 1;
768 /* At this point we assume that the base is aligned. */
769 gcc_assert (base_aligned
770 || (TREE_CODE (base
) == VAR_DECL
771 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
773 /* Modulo alignment. */
774 misalign
= size_binop (TRUNC_MOD_EXPR
, misalign
, alignment
);
776 if (tree_int_cst_sgn (misalign
) < 0)
778 /* Negative misalignment value. */
779 if (vect_print_dump_info (REPORT_DETAILS
))
780 fprintf (vect_dump
, "unexpected misalign value");
784 DR_MISALIGNMENT (dr
) = tree_low_cst (misalign
, 1);
786 if (vect_print_dump_info (REPORT_DETAILS
))
788 fprintf (vect_dump
, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
789 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
796 /* Function vect_compute_data_refs_alignment
798 Compute the misalignment of data references in the loop.
799 Return FALSE if a data reference is found that cannot be vectorized. */
802 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
)
804 varray_type datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
807 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (datarefs
); i
++)
809 struct data_reference
*dr
= VARRAY_GENERIC_PTR (datarefs
, i
);
810 if (!vect_compute_data_ref_alignment (dr
))
818 /* Function vect_update_misalignment_for_peel
820 DR - the data reference whose misalignment is to be adjusted.
821 DR_PEEL - the data reference whose misalignment is being made
822 zero in the vector loop by the peel.
823 NPEEL - the number of iterations in the peel loop if the misalignment
824 of DR_PEEL is known at compile time. */
827 vect_update_misalignment_for_peel (struct data_reference
*dr
,
828 struct data_reference
*dr_peel
, int npeel
)
832 VEC(dr_p
,heap
) *same_align_drs
;
833 struct data_reference
*current_dr
;
835 if (known_alignment_for_access_p (dr
)
836 && DR_MISALIGNMENT (dr
) == DR_MISALIGNMENT (dr_peel
))
838 DR_MISALIGNMENT (dr
) = 0;
842 /* It can be assumed that the data refs with the same alignment as dr_peel
843 are aligned in the vector loop. */
845 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
846 for (i
= 0; VEC_iterate (dr_p
, same_align_drs
, i
, current_dr
); i
++)
848 if (current_dr
!= dr
)
850 gcc_assert (DR_MISALIGNMENT (dr
) == DR_MISALIGNMENT (dr_peel
));
851 DR_MISALIGNMENT (dr
) = 0;
855 if (known_alignment_for_access_p (dr
)
856 && known_alignment_for_access_p (dr_peel
))
858 drsize
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
859 DR_MISALIGNMENT (dr
) += npeel
* drsize
;
860 DR_MISALIGNMENT (dr
) %= UNITS_PER_SIMD_WORD
;
864 DR_MISALIGNMENT (dr
) = -1;
868 /* Function vect_verify_datarefs_alignment
870 Return TRUE if all data references in the loop can be
871 handled with respect to alignment. */
874 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
)
876 varray_type datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
877 enum dr_alignment_support supportable_dr_alignment
;
880 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (datarefs
); i
++)
882 struct data_reference
*dr
= VARRAY_GENERIC_PTR (datarefs
, i
);
883 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
884 if (!supportable_dr_alignment
)
886 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
890 "not vectorized: unsupported unaligned load.");
893 "not vectorized: unsupported unaligned store.");
897 if (supportable_dr_alignment
!= dr_aligned
898 && vect_print_dump_info (REPORT_ALIGNMENT
))
899 fprintf (vect_dump
, "Vectorizing an unaligned access.");
905 /* Function vect_enhance_data_refs_alignment
907 This pass will use loop versioning and loop peeling in order to enhance
908 the alignment of data references in the loop.
910 FOR NOW: we assume that whatever versioning/peeling takes place, only the
911 original loop is to be vectorized; Any other loops that are created by
912 the transformations performed in this pass - are not supposed to be
913 vectorized. This restriction will be relaxed.
915 This pass will require a cost model to guide it whether to apply peeling
916 or versioning or a combination of the two. For example, the scheme that
917 intel uses when given a loop with several memory accesses, is as follows:
918 choose one memory access ('p') which alignment you want to force by doing
919 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
920 other accesses are not necessarily aligned, or (2) use loop versioning to
921 generate one loop in which all accesses are aligned, and another loop in
922 which only 'p' is necessarily aligned.
924 ("Automatic Intra-Register Vectorization for the Intel Architecture",
925 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
926 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
928 Devising a cost model is the most critical aspect of this work. It will
929 guide us on which access to peel for, whether to use loop versioning, how
930 many versions to create, etc. The cost model will probably consist of
931 generic considerations as well as target specific considerations (on
932 powerpc for example, misaligned stores are more painful than misaligned
935 Here are the general steps involved in alignment enhancements:
937 -- original loop, before alignment analysis:
939 x = q[i]; # DR_MISALIGNMENT(q) = unknown
940 p[i] = y; # DR_MISALIGNMENT(p) = unknown
943 -- After vect_compute_data_refs_alignment:
945 x = q[i]; # DR_MISALIGNMENT(q) = 3
946 p[i] = y; # DR_MISALIGNMENT(p) = unknown
949 -- Possibility 1: we do loop versioning:
951 for (i=0; i<N; i++){ # loop 1A
952 x = q[i]; # DR_MISALIGNMENT(q) = 3
953 p[i] = y; # DR_MISALIGNMENT(p) = 0
957 for (i=0; i<N; i++){ # loop 1B
958 x = q[i]; # DR_MISALIGNMENT(q) = 3
959 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
963 -- Possibility 2: we do loop peeling:
964 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
968 for (i = 3; i < N; i++){ # loop 2A
969 x = q[i]; # DR_MISALIGNMENT(q) = 0
970 p[i] = y; # DR_MISALIGNMENT(p) = unknown
973 -- Possibility 3: combination of loop peeling and versioning:
974 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
979 for (i = 3; i<N; i++){ # loop 3A
980 x = q[i]; # DR_MISALIGNMENT(q) = 0
981 p[i] = y; # DR_MISALIGNMENT(p) = 0
985 for (i = 3; i<N; i++){ # loop 3B
986 x = q[i]; # DR_MISALIGNMENT(q) = 0
987 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
991 These loops are later passed to loop_transform to be vectorized. The
992 vectorizer will use the alignment information to guide the transformation
993 (whether to generate regular loads/stores, or with special handling for
997 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
999 varray_type datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1000 enum dr_alignment_support supportable_dr_alignment
;
1001 struct data_reference
*dr0
= NULL
;
1002 struct data_reference
*dr
;
1004 bool do_peeling
= false;
1005 bool do_versioning
= false;
1008 /* While cost model enhancements are expected in the future, the high level
1009 view of the code at this time is as follows:
1011 A) If there is a misaligned write then see if peeling to align this write
1012 can make all data references satisfy vect_supportable_dr_alignment.
1013 If so, update data structures as needed and return true. Note that
1014 at this time vect_supportable_dr_alignment is known to return false
1015 for a a misaligned write.
1017 B) If peeling wasn't possible and there is a data reference with an
1018 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1019 then see if loop versioning checks can be used to make all data
1020 references satisfy vect_supportable_dr_alignment. If so, update
1021 data structures as needed and return true.
1023 C) If neither peeling nor versioning were successful then return false if
1024 any data reference does not satisfy vect_supportable_dr_alignment.
1026 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1028 Note, Possibility 3 above (which is peeling and versioning together) is not
1029 being done at this time. */
1031 /* (1) Peeling to force alignment. */
1033 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1035 + How many accesses will become aligned due to the peeling
1036 - How many accesses will become unaligned due to the peeling,
1037 and the cost of misaligned accesses.
1038 - The cost of peeling (the extra runtime checks, the increase
1041 The scheme we use FORNOW: peel to force the alignment of the first
1042 misaligned store in the loop.
1043 Rationale: misaligned stores are not yet supported.
1045 TODO: Use a cost model. */
1047 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (datarefs
); i
++)
1049 dr
= VARRAY_GENERIC_PTR (datarefs
, i
);
1050 if (!DR_IS_READ (dr
) && !aligned_access_p (dr
))
1058 /* Often peeling for alignment will require peeling for loop-bound, which in
1059 turn requires that we know how to adjust the loop ivs after the loop. */
1060 if (!vect_can_advance_ivs_p (loop_vinfo
))
1068 if (known_alignment_for_access_p (dr0
))
1070 /* Since it's known at compile time, compute the number of iterations
1071 in the peeled loop (the peeling factor) for use in updating
1072 DR_MISALIGNMENT values. The peeling factor is the vectorization
1073 factor minus the misalignment as an element count. */
1074 mis
= DR_MISALIGNMENT (dr0
);
1075 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1076 npeel
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) - mis
;
1079 /* Ensure that all data refs can be vectorized after the peel. */
1080 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (datarefs
); i
++)
1082 int save_misalignment
;
1084 dr
= VARRAY_GENERIC_PTR (datarefs
, i
);
1087 save_misalignment
= DR_MISALIGNMENT (dr
);
1088 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1089 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1090 DR_MISALIGNMENT (dr
) = save_misalignment
;
1092 if (!supportable_dr_alignment
)
1101 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1102 If the misalignment of DR_i is identical to that of dr0 then set
1103 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1104 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1105 by the peeling factor times the element size of DR_i (MOD the
1106 vectorization factor times the size). Otherwise, the
1107 misalignment of DR_i must be set to unknown. */
1108 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (datarefs
); i
++)
1110 dr
= VARRAY_GENERIC_PTR (datarefs
, i
);
1113 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1116 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1117 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1118 DR_MISALIGNMENT (dr0
) = 0;
1119 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1120 fprintf (vect_dump
, "Alignment of access forced using peeling.");
1122 if (vect_print_dump_info (REPORT_DETAILS
))
1123 fprintf (vect_dump
, "Peeling for alignment will be applied.");
1125 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1132 /* (2) Versioning to force alignment. */
1134 /* Try versioning if:
1135 1) flag_tree_vect_loop_version is TRUE
1136 2) optimize_size is FALSE
1137 3) there is at least one unsupported misaligned data ref with an unknown
1139 4) all misaligned data refs with a known misalignment are supported, and
1140 5) the number of runtime alignment checks is within reason. */
1142 do_versioning
= flag_tree_vect_loop_version
&& (!optimize_size
);
1146 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (datarefs
); i
++)
1148 dr
= VARRAY_GENERIC_PTR (datarefs
, i
);
1150 if (aligned_access_p (dr
))
1153 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1155 if (!supportable_dr_alignment
)
1161 if (known_alignment_for_access_p (dr
)
1162 || VEC_length (tree
,
1163 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
1164 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS
))
1166 do_versioning
= false;
1170 stmt
= DR_STMT (dr
);
1171 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1172 gcc_assert (vectype
);
1174 /* The rightmost bits of an aligned address must be zeros.
1175 Construct the mask needed for this test. For example,
1176 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1177 mask must be 15 = 0xf. */
1178 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1180 /* FORNOW: use the same mask to test all potentially unaligned
1181 references in the loop. The vectorizer currently supports
1182 a single vector size, see the reference to
1183 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1184 vectorization factor is computed. */
1185 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
1186 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
1187 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
1188 VEC_safe_push (tree
, heap
,
1189 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
),
1194 /* Versioning requires at least one misaligned data reference. */
1195 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)) == 0)
1196 do_versioning
= false;
1197 else if (!do_versioning
)
1198 VEC_truncate (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
), 0);
1203 VEC(tree
,heap
) *may_misalign_stmts
1204 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1207 /* It can now be assumed that the data references in the statements
1208 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1209 of the loop being vectorized. */
1210 for (i
= 0; VEC_iterate (tree
, may_misalign_stmts
, i
, stmt
); i
++)
1212 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1213 dr
= STMT_VINFO_DATA_REF (stmt_info
);
1214 DR_MISALIGNMENT (dr
) = 0;
1215 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1216 fprintf (vect_dump
, "Alignment of access forced using versioning.");
1219 if (vect_print_dump_info (REPORT_DETAILS
))
1220 fprintf (vect_dump
, "Versioning for alignment will be applied.");
1222 /* Peeling and versioning can't be done together at this time. */
1223 gcc_assert (! (do_peeling
&& do_versioning
));
1225 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1230 /* This point is reached if neither peeling nor versioning is being done. */
1231 gcc_assert (! (do_peeling
|| do_versioning
));
1233 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1238 /* Function vect_analyze_data_refs_alignment
1240 Analyze the alignment of the data-references in the loop.
1241 Return FALSE if a data reference is found that cannot be vectorized. */
1244 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
)
1246 if (vect_print_dump_info (REPORT_DETAILS
))
1247 fprintf (vect_dump
, "=== vect_analyze_data_refs_alignment ===");
1249 if (!vect_compute_data_refs_alignment (loop_vinfo
))
1251 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1253 "not vectorized: can't calculate alignment for data ref.");
1261 /* Function vect_analyze_data_ref_access.
1263 Analyze the access pattern of the data-reference DR. For now, a data access
1264 has to be consecutive to be considered vectorizable. */
1267 vect_analyze_data_ref_access (struct data_reference
*dr
)
1269 tree step
= DR_STEP (dr
);
1270 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
1272 if (!step
|| tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
)))
1274 if (vect_print_dump_info (REPORT_DETAILS
))
1275 fprintf (vect_dump
, "not consecutive access");
1282 /* Function vect_analyze_data_ref_accesses.
1284 Analyze the access pattern of all the data references in the loop.
1286 FORNOW: the only access pattern that is considered vectorizable is a
1287 simple step 1 (consecutive) access.
1289 FORNOW: handle only arrays and pointer accesses. */
1292 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
)
1295 varray_type datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1297 if (vect_print_dump_info (REPORT_DETAILS
))
1298 fprintf (vect_dump
, "=== vect_analyze_data_ref_accesses ===");
1300 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (datarefs
); i
++)
1302 struct data_reference
*dr
= VARRAY_GENERIC_PTR (datarefs
, i
);
1303 if (!vect_analyze_data_ref_access (dr
))
1305 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1306 fprintf (vect_dump
, "not vectorized: complicated access pattern.");
1315 /* Function vect_analyze_data_refs.
1317 Find all the data references in the loop.
1319 The general structure of the analysis of data refs in the vectorizer is as
1321 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1322 find and analyze all data-refs in the loop and their dependences.
1323 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1324 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1325 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1330 vect_analyze_data_refs (loop_vec_info loop_vinfo
)
1332 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1334 varray_type datarefs
;
1337 if (vect_print_dump_info (REPORT_DETAILS
))
1338 fprintf (vect_dump
, "=== vect_analyze_data_refs ===");
1340 compute_data_dependences_for_loop (loop
, false,
1341 &(LOOP_VINFO_DATAREFS (loop_vinfo
)),
1342 &(LOOP_VINFO_DDRS (loop_vinfo
)));
1344 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1345 from stmt_vec_info struct to DR and vectype. */
1346 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1347 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (datarefs
); i
++)
1349 struct data_reference
*dr
= VARRAY_GENERIC_PTR (datarefs
, i
);
1351 stmt_vec_info stmt_info
;
1353 if (!dr
|| !DR_REF (dr
))
1355 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1356 fprintf (vect_dump
, "not vectorized: unhandled data-ref ");
1360 /* Update DR field in stmt_vec_info struct. */
1361 stmt
= DR_STMT (dr
);
1362 stmt_info
= vinfo_for_stmt (stmt
);
1364 if (STMT_VINFO_DATA_REF (stmt_info
))
1366 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1369 "not vectorized: more than one data ref in stmt: ");
1370 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1374 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
1376 /* Check that analysis of the data-ref succeeded. */
1377 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
1380 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1382 fprintf (vect_dump
, "not vectorized: data ref analysis failed ");
1383 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1387 if (!DR_MEMTAG (dr
))
1389 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1391 fprintf (vect_dump
, "not vectorized: no memory tag for ");
1392 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
1397 /* Set vectype for STMT. */
1398 scalar_type
= TREE_TYPE (DR_REF (dr
));
1399 STMT_VINFO_VECTYPE (stmt_info
) =
1400 get_vectype_for_scalar_type (scalar_type
);
1401 if (!STMT_VINFO_VECTYPE (stmt_info
))
1403 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1406 "not vectorized: no vectype for stmt: ");
1407 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1408 fprintf (vect_dump
, " scalar_type: ");
1409 print_generic_expr (vect_dump
, scalar_type
, TDF_DETAILS
);
1419 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1421 /* Function vect_mark_relevant.
1423 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
1426 vect_mark_relevant (VEC(tree
,heap
) **worklist
, tree stmt
,
1427 bool relevant_p
, bool live_p
)
1429 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1430 bool save_relevant_p
= STMT_VINFO_RELEVANT_P (stmt_info
);
1431 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
1433 if (vect_print_dump_info (REPORT_DETAILS
))
1434 fprintf (vect_dump
, "mark relevant %d, live %d.",relevant_p
, live_p
);
1436 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
1437 STMT_VINFO_RELEVANT_P (stmt_info
) |= relevant_p
;
1439 if (TREE_CODE (stmt
) == PHI_NODE
)
1440 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
1441 or live will fail vectorization later on. */
1444 if (STMT_VINFO_RELEVANT_P (stmt_info
) == save_relevant_p
1445 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
1447 if (vect_print_dump_info (REPORT_DETAILS
))
1448 fprintf (vect_dump
, "already marked relevant/live.");
1452 VEC_safe_push (tree
, heap
, *worklist
, stmt
);
1456 /* Function vect_stmt_relevant_p.
1458 Return true if STMT in loop that is represented by LOOP_VINFO is
1459 "relevant for vectorization".
1461 A stmt is considered "relevant for vectorization" if:
1462 - it has uses outside the loop.
1463 - it has vdefs (it alters memory).
1464 - control stmts in the loop (except for the exit condition).
1466 CHECKME: what other side effects would the vectorizer allow? */
1469 vect_stmt_relevant_p (tree stmt
, loop_vec_info loop_vinfo
,
1470 bool *relevant_p
, bool *live_p
)
1472 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1473 ssa_op_iter op_iter
;
1474 imm_use_iterator imm_iter
;
1475 use_operand_p use_p
;
1476 def_operand_p def_p
;
1478 *relevant_p
= false;
1481 /* cond stmt other than loop exit cond. */
1482 if (is_ctrl_stmt (stmt
) && (stmt
!= LOOP_VINFO_EXIT_COND (loop_vinfo
)))
1485 /* changing memory. */
1486 if (TREE_CODE (stmt
) != PHI_NODE
)
1487 if (!ZERO_SSA_OPERANDS (stmt
, SSA_OP_VIRTUAL_DEFS
))
1489 if (vect_print_dump_info (REPORT_DETAILS
))
1490 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
1494 /* uses outside the loop. */
1495 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
1497 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
1499 basic_block bb
= bb_for_stmt (USE_STMT (use_p
));
1500 if (!flow_bb_inside_loop_p (loop
, bb
))
1502 if (vect_print_dump_info (REPORT_DETAILS
))
1503 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
1505 /* We expect all such uses to be in the loop exit phis
1506 (because of loop closed form) */
1507 gcc_assert (TREE_CODE (USE_STMT (use_p
)) == PHI_NODE
);
1508 gcc_assert (bb
== loop
->single_exit
->dest
);
1515 return (*live_p
|| *relevant_p
);
1519 /* Function vect_mark_stmts_to_be_vectorized.
1521 Not all stmts in the loop need to be vectorized. For example:
1530 Stmt 1 and 3 do not need to be vectorized, because loop control and
1531 addressing of vectorized data-refs are handled differently.
1533 This pass detects such stmts. */
1536 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
1538 VEC(tree
,heap
) *worklist
;
1539 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1540 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1541 unsigned int nbbs
= loop
->num_nodes
;
1542 block_stmt_iterator si
;
1547 stmt_vec_info stmt_vinfo
;
1550 bool relevant_p
, live_p
;
1552 enum vect_def_type dt
;
1554 if (vect_print_dump_info (REPORT_DETAILS
))
1555 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
1557 worklist
= VEC_alloc (tree
, heap
, 64);
1559 /* 1. Init worklist. */
1562 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1564 if (vect_print_dump_info (REPORT_DETAILS
))
1566 fprintf (vect_dump
, "init: phi relevant? ");
1567 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
1570 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant_p
, &live_p
))
1571 vect_mark_relevant (&worklist
, phi
, relevant_p
, live_p
);
1574 for (i
= 0; i
< nbbs
; i
++)
1577 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
1579 stmt
= bsi_stmt (si
);
1581 if (vect_print_dump_info (REPORT_DETAILS
))
1583 fprintf (vect_dump
, "init: stmt relevant? ");
1584 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1587 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant_p
, &live_p
))
1588 vect_mark_relevant (&worklist
, stmt
, relevant_p
, live_p
);
1593 /* 2. Process_worklist */
1595 while (VEC_length (tree
, worklist
) > 0)
1597 stmt
= VEC_pop (tree
, worklist
);
1599 if (vect_print_dump_info (REPORT_DETAILS
))
1601 fprintf (vect_dump
, "worklist: examine stmt: ");
1602 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1605 /* Examine the USEs of STMT. For each ssa-name USE thta is defined
1606 in the loop, mark the stmt that defines it (DEF_STMT) as
1607 relevant/irrelevant and live/dead according to the liveness and
1608 relevance properties of STMT.
1611 gcc_assert (TREE_CODE (stmt
) != PHI_NODE
);
1613 ann
= stmt_ann (stmt
);
1614 stmt_vinfo
= vinfo_for_stmt (stmt
);
1616 relevant_p
= STMT_VINFO_RELEVANT_P (stmt_vinfo
);
1617 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
1619 /* Generally, the liveness and relevance properties of STMT are
1620 propagated to the DEF_STMTs of its USEs:
1621 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
1622 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
1627 If USE is used only for address computations (e.g. array indexing),
1628 which does not need to be directly vectorized, then the
1629 liveness/relevance of the respective DEF_STMT is left unchanged.
1632 If STMT has been identified as defining a reduction variable, then
1635 The last use of STMT is the reduction-variable, which is defined
1636 by a loop-header-phi. We don't want to mark the phi as live or
1637 relevant (because it does not need to be vectorized, it is handled
1638 as part of the vectorization of the reduction), so in this case we
1639 skip the call to vect_mark_relevant.
1641 The rest of the uses of STMT are defined in the loop body. For
1642 the def_stmt of these uses we want to set liveness/relevance
1644 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
1645 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
1646 because even though STMT is classified as live (since it defines a
1647 value that is used across loop iterations) and irrelevant (since it
1648 is not used inside the loop), it will be vectorized, and therefore
1649 the corresponding DEF_STMTs need to marked as relevant.
1653 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
)
1655 gcc_assert (!relevant_p
&& live_p
);
1660 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
1662 /* case 1: we are only interested in uses that need to be vectorized.
1663 Uses that are used for address computation are not considered
1666 if (!exist_non_indexing_operands_for_use_p (use
, stmt
))
1669 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &def
, &dt
))
1671 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1672 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
1673 VEC_free (tree
, heap
, worklist
);
1677 if (!def_stmt
|| IS_EMPTY_STMT (def_stmt
))
1680 if (vect_print_dump_info (REPORT_DETAILS
))
1682 fprintf (vect_dump
, "worklist: examine use %d: ", i
);
1683 print_generic_expr (vect_dump
, use
, TDF_SLIM
);
1686 bb
= bb_for_stmt (def_stmt
);
1687 if (!flow_bb_inside_loop_p (loop
, bb
))
1690 /* case 2.1: the reduction-use does not mark the defining-phi
1692 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
1693 && TREE_CODE (def_stmt
) == PHI_NODE
)
1696 vect_mark_relevant (&worklist
, def_stmt
, relevant_p
, live_p
);
1698 } /* while worklist */
1700 VEC_free (tree
, heap
, worklist
);
1705 /* Function vect_can_advance_ivs_p
1707 In case the number of iterations that LOOP iterates is unknown at compile
1708 time, an epilog loop will be generated, and the loop induction variables
1709 (IVs) will be "advanced" to the value they are supposed to take just before
1710 the epilog loop. Here we check that the access function of the loop IVs
1711 and the expression that represents the loop bound are simple enough.
1712 These restrictions will be relaxed in the future. */
1715 vect_can_advance_ivs_p (loop_vec_info loop_vinfo
)
1717 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1718 basic_block bb
= loop
->header
;
1721 /* Analyze phi functions of the loop header. */
1723 if (vect_print_dump_info (REPORT_DETAILS
))
1724 fprintf (vect_dump
, "=== vect_can_advance_ivs_p ===");
1726 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1728 tree access_fn
= NULL
;
1729 tree evolution_part
;
1731 if (vect_print_dump_info (REPORT_DETAILS
))
1733 fprintf (vect_dump
, "Analyze phi: ");
1734 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
1737 /* Skip virtual phi's. The data dependences that are associated with
1738 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1740 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
1742 if (vect_print_dump_info (REPORT_DETAILS
))
1743 fprintf (vect_dump
, "virtual phi. skip.");
1747 /* Skip reduction phis. */
1749 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
1751 if (vect_print_dump_info (REPORT_DETAILS
))
1752 fprintf (vect_dump
, "reduc phi. skip.");
1756 /* Analyze the evolution function. */
1758 access_fn
= instantiate_parameters
1759 (loop
, analyze_scalar_evolution (loop
, PHI_RESULT (phi
)));
1763 if (vect_print_dump_info (REPORT_DETAILS
))
1764 fprintf (vect_dump
, "No Access function.");
1768 if (vect_print_dump_info (REPORT_DETAILS
))
1770 fprintf (vect_dump
, "Access function of PHI: ");
1771 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
1774 evolution_part
= evolution_part_in_loop_num (access_fn
, loop
->num
);
1776 if (evolution_part
== NULL_TREE
)
1778 if (vect_print_dump_info (REPORT_DETAILS
))
1779 fprintf (vect_dump
, "No evolution.");
1783 /* FORNOW: We do not transform initial conditions of IVs
1784 which evolution functions are a polynomial of degree >= 2. */
1786 if (tree_is_chrec (evolution_part
))
1794 /* Function vect_get_loop_niters.
1796 Determine how many iterations the loop is executed.
1797 If an expression that represents the number of iterations
1798 can be constructed, place it in NUMBER_OF_ITERATIONS.
1799 Return the loop exit condition. */
1802 vect_get_loop_niters (struct loop
*loop
, tree
*number_of_iterations
)
1806 if (vect_print_dump_info (REPORT_DETAILS
))
1807 fprintf (vect_dump
, "=== get_loop_niters ===");
1809 niters
= number_of_iterations_in_loop (loop
);
1811 if (niters
!= NULL_TREE
1812 && niters
!= chrec_dont_know
)
1814 *number_of_iterations
= niters
;
1816 if (vect_print_dump_info (REPORT_DETAILS
))
1818 fprintf (vect_dump
, "==> get_loop_niters:" );
1819 print_generic_expr (vect_dump
, *number_of_iterations
, TDF_SLIM
);
1823 return get_loop_exit_condition (loop
);
1827 /* Function vect_analyze_loop_form.
1829 Verify the following restrictions (some may be relaxed in the future):
1830 - it's an inner-most loop
1831 - number of BBs = 2 (which are the loop header and the latch)
1832 - the loop has a pre-header
1833 - the loop has a single entry and exit
1834 - the loop exit condition is simple enough, and the number of iterations
1835 can be analyzed (a countable loop). */
1837 static loop_vec_info
1838 vect_analyze_loop_form (struct loop
*loop
)
1840 loop_vec_info loop_vinfo
;
1842 tree number_of_iterations
= NULL
;
1844 if (vect_print_dump_info (REPORT_DETAILS
))
1845 fprintf (vect_dump
, "=== vect_analyze_loop_form ===");
1849 if (vect_print_dump_info (REPORT_OUTER_LOOPS
))
1850 fprintf (vect_dump
, "not vectorized: nested loop.");
1854 if (!loop
->single_exit
1855 || loop
->num_nodes
!= 2
1856 || EDGE_COUNT (loop
->header
->preds
) != 2)
1858 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1860 if (!loop
->single_exit
)
1861 fprintf (vect_dump
, "not vectorized: multiple exits.");
1862 else if (loop
->num_nodes
!= 2)
1863 fprintf (vect_dump
, "not vectorized: too many BBs in loop.");
1864 else if (EDGE_COUNT (loop
->header
->preds
) != 2)
1865 fprintf (vect_dump
, "not vectorized: too many incoming edges.");
1871 /* We assume that the loop exit condition is at the end of the loop. i.e,
1872 that the loop is represented as a do-while (with a proper if-guard
1873 before the loop if needed), where the loop header contains all the
1874 executable statements, and the latch is empty. */
1875 if (!empty_block_p (loop
->latch
))
1877 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1878 fprintf (vect_dump
, "not vectorized: unexpected loop form.");
1882 /* Make sure there exists a single-predecessor exit bb: */
1883 if (!single_pred_p (loop
->single_exit
->dest
))
1885 edge e
= loop
->single_exit
;
1886 if (!(e
->flags
& EDGE_ABNORMAL
))
1888 split_loop_exit_edge (e
);
1889 if (vect_print_dump_info (REPORT_DETAILS
))
1890 fprintf (vect_dump
, "split exit edge.");
1894 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1895 fprintf (vect_dump
, "not vectorized: abnormal loop exit edge.");
1900 if (empty_block_p (loop
->header
))
1902 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1903 fprintf (vect_dump
, "not vectorized: empty loop.");
1907 loop_cond
= vect_get_loop_niters (loop
, &number_of_iterations
);
1910 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1911 fprintf (vect_dump
, "not vectorized: complicated exit condition.");
1915 if (!number_of_iterations
)
1917 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1919 "not vectorized: number of iterations cannot be computed.");
1923 if (chrec_contains_undetermined (number_of_iterations
))
1925 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1926 fprintf (vect_dump
, "Infinite number of iterations.");
1930 loop_vinfo
= new_loop_vec_info (loop
);
1931 LOOP_VINFO_NITERS (loop_vinfo
) = number_of_iterations
;
1933 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
1935 if (vect_print_dump_info (REPORT_DETAILS
))
1937 fprintf (vect_dump
, "Symbolic number of iterations is ");
1938 print_generic_expr (vect_dump
, number_of_iterations
, TDF_DETAILS
);
1942 if (LOOP_VINFO_INT_NITERS (loop_vinfo
) == 0)
1944 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1945 fprintf (vect_dump
, "not vectorized: number of iterations = 0.");
1949 LOOP_VINFO_EXIT_COND (loop_vinfo
) = loop_cond
;
1955 /* Function vect_analyze_loop.
1957 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1958 for it. The different analyses will record information in the
1959 loop_vec_info struct. */
1961 vect_analyze_loop (struct loop
*loop
)
1964 loop_vec_info loop_vinfo
;
1966 if (vect_print_dump_info (REPORT_DETAILS
))
1967 fprintf (vect_dump
, "===== analyze_loop_nest =====");
1969 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
1971 loop_vinfo
= vect_analyze_loop_form (loop
);
1974 if (vect_print_dump_info (REPORT_DETAILS
))
1975 fprintf (vect_dump
, "bad loop form.");
1979 /* Find all data references in the loop (which correspond to vdefs/vuses)
1980 and analyze their evolution in the loop.
1982 FORNOW: Handle only simple, array references, which
1983 alignment can be forced, and aligned pointer-references. */
1985 ok
= vect_analyze_data_refs (loop_vinfo
);
1988 if (vect_print_dump_info (REPORT_DETAILS
))
1989 fprintf (vect_dump
, "bad data references.");
1990 destroy_loop_vec_info (loop_vinfo
);
1994 /* Classify all cross-iteration scalar data-flow cycles.
1995 Cross-iteration cycles caused by virtual phis are analyzed separately. */
1997 vect_analyze_scalar_cycles (loop_vinfo
);
1999 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2001 ok
= vect_mark_stmts_to_be_vectorized (loop_vinfo
);
2004 if (vect_print_dump_info (REPORT_DETAILS
))
2005 fprintf (vect_dump
, "unexpected pattern.");
2006 destroy_loop_vec_info (loop_vinfo
);
2010 /* Analyze the alignment of the data-refs in the loop.
2011 Fail if a data reference is found that cannot be vectorized. */
2013 ok
= vect_analyze_data_refs_alignment (loop_vinfo
);
2016 if (vect_print_dump_info (REPORT_DETAILS
))
2017 fprintf (vect_dump
, "bad data alignment.");
2018 destroy_loop_vec_info (loop_vinfo
);
2022 ok
= vect_determine_vectorization_factor (loop_vinfo
);
2025 if (vect_print_dump_info (REPORT_DETAILS
))
2026 fprintf (vect_dump
, "can't determine vectorization factor.");
2027 destroy_loop_vec_info (loop_vinfo
);
2031 /* Analyze data dependences between the data-refs in the loop.
2032 FORNOW: fail at the first data dependence that we encounter. */
2034 ok
= vect_analyze_data_ref_dependences (loop_vinfo
);
2037 if (vect_print_dump_info (REPORT_DETAILS
))
2038 fprintf (vect_dump
, "bad data dependence.");
2039 destroy_loop_vec_info (loop_vinfo
);
2043 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2044 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2046 ok
= vect_analyze_data_ref_accesses (loop_vinfo
);
2049 if (vect_print_dump_info (REPORT_DETAILS
))
2050 fprintf (vect_dump
, "bad data access.");
2051 destroy_loop_vec_info (loop_vinfo
);
2055 /* This pass will decide on using loop versioning and/or loop peeling in
2056 order to enhance the alignment of data references in the loop. */
2058 ok
= vect_enhance_data_refs_alignment (loop_vinfo
);
2061 if (vect_print_dump_info (REPORT_DETAILS
))
2062 fprintf (vect_dump
, "bad data alignment.");
2063 destroy_loop_vec_info (loop_vinfo
);
2067 /* Scan all the operations in the loop and make sure they are
2070 ok
= vect_analyze_operations (loop_vinfo
);
2073 if (vect_print_dump_info (REPORT_DETAILS
))
2074 fprintf (vect_dump
, "bad operation or unsupported loop bound.");
2075 destroy_loop_vec_info (loop_vinfo
);
2079 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
) = 1;