1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
42 /* Main analysis functions. */
43 static loop_vec_info
vect_analyze_loop_form (struct loop
*);
44 static bool vect_analyze_data_refs (loop_vec_info
);
45 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info
);
46 static void vect_analyze_scalar_cycles (loop_vec_info
);
47 static bool vect_analyze_data_ref_accesses (loop_vec_info
);
48 static bool vect_analyze_data_ref_dependences (loop_vec_info
);
49 static bool vect_analyze_data_refs_alignment (loop_vec_info
);
50 static bool vect_compute_data_refs_alignment (loop_vec_info
);
51 static bool vect_enhance_data_refs_alignment (loop_vec_info
);
52 static bool vect_analyze_operations (loop_vec_info
);
53 static bool vect_determine_vectorization_factor (loop_vec_info
);
55 /* Utility functions for the analyses. */
56 static bool exist_non_indexing_operands_for_use_p (tree
, tree
);
57 static void vect_mark_relevant (VEC(tree
,heap
) **, tree
, bool, bool);
58 static bool vect_stmt_relevant_p (tree
, loop_vec_info
, bool *, bool *);
59 static tree
vect_get_loop_niters (struct loop
*, tree
*);
60 static bool vect_analyze_data_ref_dependence
61 (struct data_dependence_relation
*, loop_vec_info
);
62 static bool vect_compute_data_ref_alignment (struct data_reference
*);
63 static bool vect_analyze_data_ref_access (struct data_reference
*);
64 static bool vect_can_advance_ivs_p (loop_vec_info
);
65 static void vect_update_misalignment_for_peel
66 (struct data_reference
*, struct data_reference
*, int npeel
);
69 /* Function vect_determine_vectorization_factor
71 Determine the vectorization factor (VF). VF is the number of data elements
72 that are operated upon in parallel in a single iteration of the vectorized
73 loop. For example, when vectorizing a loop that operates on 4byte elements,
74 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
75 elements can fit in a single vector register.
77 We currently support vectorization of loops in which all types operated upon
78 are of the same size. Therefore this function currently sets VF according to
79 the size of the types operated upon, and fails if there are multiple sizes
82 VF is also the factor by which the loop iterations are strip-mined, e.g.:
89 for (i=0; i<N; i+=VF){
90 a[i:VF] = b[i:VF] + c[i:VF];
95 vect_determine_vectorization_factor (loop_vec_info loop_vinfo
)
97 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
98 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
99 int nbbs
= loop
->num_nodes
;
100 block_stmt_iterator si
;
101 unsigned int vectorization_factor
= 0;
105 if (vect_print_dump_info (REPORT_DETAILS
))
106 fprintf (vect_dump
, "=== vect_determine_vectorization_factor ===");
108 for (i
= 0; i
< nbbs
; i
++)
110 basic_block bb
= bbs
[i
];
112 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
114 tree stmt
= bsi_stmt (si
);
116 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
119 if (vect_print_dump_info (REPORT_DETAILS
))
121 fprintf (vect_dump
, "==> examining statement: ");
122 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
125 gcc_assert (stmt_info
);
126 /* skip stmts which do not need to be vectorized. */
127 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
128 && !STMT_VINFO_LIVE_P (stmt_info
))
130 if (vect_print_dump_info (REPORT_DETAILS
))
131 fprintf (vect_dump
, "skip.");
135 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))))
137 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
139 fprintf (vect_dump
, "not vectorized: vector stmt in loop:");
140 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
145 if (STMT_VINFO_VECTYPE (stmt_info
))
147 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
148 scalar_type
= TREE_TYPE (vectype
);
152 if (STMT_VINFO_DATA_REF (stmt_info
))
154 TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info
)));
155 else if (TREE_CODE (stmt
) == MODIFY_EXPR
)
156 scalar_type
= TREE_TYPE (TREE_OPERAND (stmt
, 0));
158 scalar_type
= TREE_TYPE (stmt
);
160 if (vect_print_dump_info (REPORT_DETAILS
))
162 fprintf (vect_dump
, "get vectype for scalar type: ");
163 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
166 vectype
= get_vectype_for_scalar_type (scalar_type
);
169 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
172 "not vectorized: unsupported data-type ");
173 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
177 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
180 if (vect_print_dump_info (REPORT_DETAILS
))
182 fprintf (vect_dump
, "vectype: ");
183 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
186 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
187 if (vect_print_dump_info (REPORT_DETAILS
))
188 fprintf (vect_dump
, "nunits = %d", nunits
);
190 if (vectorization_factor
)
192 /* FORNOW: don't allow mixed units.
193 This restriction will be relaxed in the future. */
194 if (nunits
!= vectorization_factor
)
196 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
197 fprintf (vect_dump
, "not vectorized: mixed data-types");
202 vectorization_factor
= nunits
;
204 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type
))
205 * vectorization_factor
== UNITS_PER_SIMD_WORD
);
209 /* TODO: Analyze cost. Decide if worth while to vectorize. */
211 if (vectorization_factor
<= 1)
213 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
214 fprintf (vect_dump
, "not vectorized: unsupported data-type");
217 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
223 /* Function vect_analyze_operations.
225 Scan the loop stmts and make sure they are all vectorizable. */
228 vect_analyze_operations (loop_vec_info loop_vinfo
)
230 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
231 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
232 int nbbs
= loop
->num_nodes
;
233 block_stmt_iterator si
;
234 unsigned int vectorization_factor
= 0;
238 stmt_vec_info stmt_info
;
239 bool need_to_vectorize
= false;
241 if (vect_print_dump_info (REPORT_DETAILS
))
242 fprintf (vect_dump
, "=== vect_analyze_operations ===");
244 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
245 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
247 for (i
= 0; i
< nbbs
; i
++)
249 basic_block bb
= bbs
[i
];
251 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
253 stmt_info
= vinfo_for_stmt (phi
);
254 if (vect_print_dump_info (REPORT_DETAILS
))
256 fprintf (vect_dump
, "examining phi: ");
257 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
260 gcc_assert (stmt_info
);
262 if (STMT_VINFO_LIVE_P (stmt_info
))
264 /* FORNOW: not yet supported. */
265 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
266 fprintf (vect_dump
, "not vectorized: value used after loop.");
270 if (STMT_VINFO_RELEVANT_P (stmt_info
))
272 /* Most likely a reduction-like computation that is used
274 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
275 fprintf (vect_dump
, "not vectorized: unsupported pattern.");
280 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
282 tree stmt
= bsi_stmt (si
);
283 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
285 if (vect_print_dump_info (REPORT_DETAILS
))
287 fprintf (vect_dump
, "==> examining statement: ");
288 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
291 gcc_assert (stmt_info
);
293 /* skip stmts which do not need to be vectorized.
294 this is expected to include:
295 - the COND_EXPR which is the loop exit condition
296 - any LABEL_EXPRs in the loop
297 - computations that are used only for array indexing or loop
300 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
301 && !STMT_VINFO_LIVE_P (stmt_info
))
303 if (vect_print_dump_info (REPORT_DETAILS
))
304 fprintf (vect_dump
, "irrelevant.");
308 if (STMT_VINFO_RELEVANT_P (stmt_info
))
310 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))));
311 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
313 ok
= (vectorizable_operation (stmt
, NULL
, NULL
)
314 || vectorizable_assignment (stmt
, NULL
, NULL
)
315 || vectorizable_load (stmt
, NULL
, NULL
)
316 || vectorizable_store (stmt
, NULL
, NULL
)
317 || vectorizable_condition (stmt
, NULL
, NULL
));
321 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
324 "not vectorized: relevant stmt not supported: ");
325 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
329 need_to_vectorize
= true;
332 if (STMT_VINFO_LIVE_P (stmt_info
))
334 ok
= vectorizable_reduction (stmt
, NULL
, NULL
);
337 need_to_vectorize
= true;
339 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
343 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
346 "not vectorized: live stmt not supported: ");
347 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
355 /* TODO: Analyze cost. Decide if worth while to vectorize. */
357 /* All operations in the loop are either irrelevant (deal with loop
358 control, or dead), or only used outside the loop and can be moved
359 out of the loop (e.g. invariants, inductions). The loop can be
360 optimized away by scalar optimizations. We're better off not
361 touching this loop. */
362 if (!need_to_vectorize
)
364 if (vect_print_dump_info (REPORT_DETAILS
))
366 "All the computation can be taken out of the loop.");
367 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
369 "not vectorized: redundant loop. no profit to vectorize.");
373 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
374 && vect_print_dump_info (REPORT_DETAILS
))
376 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC
,
377 vectorization_factor
, LOOP_VINFO_INT_NITERS (loop_vinfo
));
379 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
380 && LOOP_VINFO_INT_NITERS (loop_vinfo
) < vectorization_factor
)
382 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
383 fprintf (vect_dump
, "not vectorized: iteration count too small.");
387 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
388 || LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0
389 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
391 if (vect_print_dump_info (REPORT_DETAILS
))
392 fprintf (vect_dump
, "epilog loop required.");
393 if (!vect_can_advance_ivs_p (loop_vinfo
))
395 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
397 "not vectorized: can't create epilog loop 1.");
400 if (!slpeel_can_duplicate_loop_p (loop
, loop
->single_exit
))
402 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
404 "not vectorized: can't create epilog loop 2.");
413 /* Function exist_non_indexing_operands_for_use_p
415 USE is one of the uses attached to STMT. Check if USE is
416 used in STMT for anything other than indexing an array. */
419 exist_non_indexing_operands_for_use_p (tree use
, tree stmt
)
422 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
424 /* USE corresponds to some operand in STMT. If there is no data
425 reference in STMT, then any operand that corresponds to USE
426 is not indexing an array. */
427 if (!STMT_VINFO_DATA_REF (stmt_info
))
430 /* STMT has a data_ref. FORNOW this means that its of one of
434 (This should have been verified in analyze_data_refs).
436 'var' in the second case corresponds to a def, not a use,
437 so USE cannot correspond to any operands that are not used
440 Therefore, all we need to check is if STMT falls into the
441 first case, and whether var corresponds to USE. */
443 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) == SSA_NAME
)
446 operand
= TREE_OPERAND (stmt
, 1);
448 if (TREE_CODE (operand
) != SSA_NAME
)
458 /* Function vect_analyze_scalar_cycles.
460 Examine the cross iteration def-use cycles of scalar variables, by
461 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
462 following: invariant, induction, reduction, unknown.
464 Some forms of scalar cycles are not yet supported.
466 Example1: reduction: (unsupported yet)
472 Example2: induction: (unsupported yet)
478 Note: the following loop *is* vectorizable:
484 even though it has a def-use cycle caused by the induction variable i:
486 loop: i_2 = PHI (i_0, i_1)
491 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
492 it does not need to be vectorized because it is only used for array
493 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
494 loop2 on the other hand is relevant (it is being written to memory).
498 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo
)
501 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
502 basic_block bb
= loop
->header
;
505 if (vect_print_dump_info (REPORT_DETAILS
))
506 fprintf (vect_dump
, "=== vect_analyze_scalar_cycles ===");
508 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
510 tree access_fn
= NULL
;
511 tree def
= PHI_RESULT (phi
);
512 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
515 if (vect_print_dump_info (REPORT_DETAILS
))
517 fprintf (vect_dump
, "Analyze phi: ");
518 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
521 /* Skip virtual phi's. The data dependences that are associated with
522 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
524 if (!is_gimple_reg (SSA_NAME_VAR (def
)))
526 if (vect_print_dump_info (REPORT_DETAILS
))
527 fprintf (vect_dump
, "virtual phi. skip.");
531 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_unknown_def_type
;
533 /* Analyze the evolution function. */
535 access_fn
= analyze_scalar_evolution (loop
, def
);
540 if (vect_print_dump_info (REPORT_DETAILS
))
542 fprintf (vect_dump
, "Access function of PHI: ");
543 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
546 if (vect_is_simple_iv_evolution (loop
->num
, access_fn
, &dummy
, &dummy
))
548 if (vect_print_dump_info (REPORT_DETAILS
))
549 fprintf (vect_dump
, "Detected induction.");
550 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_induction_def
;
554 /* TODO: handle invariant phis */
556 reduc_stmt
= vect_is_simple_reduction (loop
, phi
);
559 if (vect_print_dump_info (REPORT_DETAILS
))
560 fprintf (vect_dump
, "Detected reduction.");
561 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_reduction_def
;
562 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
566 if (vect_print_dump_info (REPORT_DETAILS
))
567 fprintf (vect_dump
, "Unknown def-use cycle pattern.");
575 /* Function vect_analyze_data_ref_dependence.
577 Return TRUE if there (might) exist a dependence between a memory-reference
578 DRA and a memory-reference DRB. */
581 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
582 loop_vec_info loop_vinfo
)
585 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
586 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
587 struct data_reference
*dra
= DDR_A (ddr
);
588 struct data_reference
*drb
= DDR_B (ddr
);
589 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
590 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
591 lambda_vector dist_v
;
592 unsigned int loop_depth
;
594 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
597 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
599 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
602 "not vectorized: can't determine dependence between ");
603 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
604 fprintf (vect_dump
, " and ");
605 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
610 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
612 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
614 fprintf (vect_dump
, "not vectorized: bad dist vector for ");
615 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
616 fprintf (vect_dump
, " and ");
617 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
622 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
623 for (i
= 0; VEC_iterate (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
); i
++)
625 int dist
= dist_v
[loop_depth
];
627 if (vect_print_dump_info (REPORT_DR_DETAILS
))
628 fprintf (vect_dump
, "dependence distance = %d.", dist
);
630 /* Same loop iteration. */
631 if (dist
% vectorization_factor
== 0)
633 /* Two references with distance zero have the same alignment. */
634 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
), drb
);
635 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
), dra
);
636 if (vect_print_dump_info (REPORT_ALIGNMENT
))
637 fprintf (vect_dump
, "accesses have the same alignment.");
638 if (vect_print_dump_info (REPORT_DR_DETAILS
))
640 fprintf (vect_dump
, "dependence distance modulo vf == 0 between ");
641 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
642 fprintf (vect_dump
, " and ");
643 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
648 if (abs (dist
) >= vectorization_factor
)
650 /* Dependence distance does not create dependence, as far as vectorization
651 is concerned, in this case. */
652 if (vect_print_dump_info (REPORT_DR_DETAILS
))
653 fprintf (vect_dump
, "dependence distance >= VF.");
657 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
660 "not vectorized: possible dependence between data-refs ");
661 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
662 fprintf (vect_dump
, " and ");
663 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
673 /* Function vect_analyze_data_ref_dependences.
675 Examine all the data references in the loop, and make sure there do not
676 exist any data dependences between them. */
679 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
)
682 VEC (ddr_p
, heap
) *ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
683 struct data_dependence_relation
*ddr
;
685 if (vect_print_dump_info (REPORT_DETAILS
))
686 fprintf (vect_dump
, "=== vect_analyze_dependences ===");
688 for (i
= 0; VEC_iterate (ddr_p
, ddrs
, i
, ddr
); i
++)
689 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
))
696 /* Function vect_compute_data_ref_alignment
698 Compute the misalignment of the data reference DR.
701 1. If during the misalignment computation it is found that the data reference
702 cannot be vectorized then false is returned.
703 2. DR_MISALIGNMENT (DR) is defined.
705 FOR NOW: No analysis is actually performed. Misalignment is calculated
706 only for trivial cases. TODO. */
709 vect_compute_data_ref_alignment (struct data_reference
*dr
)
711 tree stmt
= DR_STMT (dr
);
712 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
713 tree ref
= DR_REF (dr
);
715 tree base
, base_addr
;
718 tree aligned_to
, alignment
;
720 if (vect_print_dump_info (REPORT_DETAILS
))
721 fprintf (vect_dump
, "vect_compute_data_ref_alignment:");
723 /* Initialize misalignment to unknown. */
724 DR_MISALIGNMENT (dr
) = -1;
726 misalign
= DR_OFFSET_MISALIGNMENT (dr
);
727 aligned_to
= DR_ALIGNED_TO (dr
);
728 base_addr
= DR_BASE_ADDRESS (dr
);
729 base
= build_fold_indirect_ref (base_addr
);
730 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
731 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
733 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
736 if (vect_print_dump_info (REPORT_DETAILS
))
738 fprintf (vect_dump
, "Unknown alignment for access: ");
739 print_generic_expr (vect_dump
, base
, TDF_SLIM
);
745 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
747 || (TREE_CODE (base_addr
) == SSA_NAME
748 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
749 TREE_TYPE (base_addr
)))),
753 base_aligned
= false;
757 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
)))
759 if (vect_print_dump_info (REPORT_DETAILS
))
761 fprintf (vect_dump
, "can't force alignment of ref: ");
762 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
767 /* Force the alignment of the decl.
768 NOTE: This is the only change to the code we make during
769 the analysis phase, before deciding to vectorize the loop. */
770 if (vect_print_dump_info (REPORT_DETAILS
))
771 fprintf (vect_dump
, "force alignment");
772 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
773 DECL_USER_ALIGN (base
) = 1;
776 /* At this point we assume that the base is aligned. */
777 gcc_assert (base_aligned
778 || (TREE_CODE (base
) == VAR_DECL
779 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
781 /* Modulo alignment. */
782 misalign
= size_binop (TRUNC_MOD_EXPR
, misalign
, alignment
);
784 if (!host_integerp (misalign
, 1))
786 /* Negative or overflowed misalignment value. */
787 if (vect_print_dump_info (REPORT_DETAILS
))
788 fprintf (vect_dump
, "unexpected misalign value");
792 DR_MISALIGNMENT (dr
) = TREE_INT_CST_LOW (misalign
);
794 if (vect_print_dump_info (REPORT_DETAILS
))
796 fprintf (vect_dump
, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
797 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
804 /* Function vect_compute_data_refs_alignment
806 Compute the misalignment of data references in the loop.
807 Return FALSE if a data reference is found that cannot be vectorized. */
810 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
)
812 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
813 struct data_reference
*dr
;
816 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
817 if (!vect_compute_data_ref_alignment (dr
))
824 /* Function vect_update_misalignment_for_peel
826 DR - the data reference whose misalignment is to be adjusted.
827 DR_PEEL - the data reference whose misalignment is being made
828 zero in the vector loop by the peel.
829 NPEEL - the number of iterations in the peel loop if the misalignment
830 of DR_PEEL is known at compile time. */
833 vect_update_misalignment_for_peel (struct data_reference
*dr
,
834 struct data_reference
*dr_peel
, int npeel
)
838 VEC(dr_p
,heap
) *same_align_drs
;
839 struct data_reference
*current_dr
;
841 if (known_alignment_for_access_p (dr
)
842 && DR_MISALIGNMENT (dr
) == DR_MISALIGNMENT (dr_peel
))
844 DR_MISALIGNMENT (dr
) = 0;
848 /* It can be assumed that the data refs with the same alignment as dr_peel
849 are aligned in the vector loop. */
851 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
852 for (i
= 0; VEC_iterate (dr_p
, same_align_drs
, i
, current_dr
); i
++)
854 if (current_dr
!= dr
)
856 gcc_assert (DR_MISALIGNMENT (dr
) == DR_MISALIGNMENT (dr_peel
));
857 DR_MISALIGNMENT (dr
) = 0;
861 if (known_alignment_for_access_p (dr
)
862 && known_alignment_for_access_p (dr_peel
))
864 drsize
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
865 DR_MISALIGNMENT (dr
) += npeel
* drsize
;
866 DR_MISALIGNMENT (dr
) %= UNITS_PER_SIMD_WORD
;
870 DR_MISALIGNMENT (dr
) = -1;
874 /* Function vect_verify_datarefs_alignment
876 Return TRUE if all data references in the loop can be
877 handled with respect to alignment. */
880 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
)
882 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
883 struct data_reference
*dr
;
884 enum dr_alignment_support supportable_dr_alignment
;
887 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
889 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
890 if (!supportable_dr_alignment
)
892 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
896 "not vectorized: unsupported unaligned load.");
899 "not vectorized: unsupported unaligned store.");
903 if (supportable_dr_alignment
!= dr_aligned
904 && vect_print_dump_info (REPORT_ALIGNMENT
))
905 fprintf (vect_dump
, "Vectorizing an unaligned access.");
911 /* Function vect_enhance_data_refs_alignment
913 This pass will use loop versioning and loop peeling in order to enhance
914 the alignment of data references in the loop.
916 FOR NOW: we assume that whatever versioning/peeling takes place, only the
917 original loop is to be vectorized; Any other loops that are created by
918 the transformations performed in this pass - are not supposed to be
919 vectorized. This restriction will be relaxed.
921 This pass will require a cost model to guide it whether to apply peeling
922 or versioning or a combination of the two. For example, the scheme that
923 intel uses when given a loop with several memory accesses, is as follows:
924 choose one memory access ('p') which alignment you want to force by doing
925 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
926 other accesses are not necessarily aligned, or (2) use loop versioning to
927 generate one loop in which all accesses are aligned, and another loop in
928 which only 'p' is necessarily aligned.
930 ("Automatic Intra-Register Vectorization for the Intel Architecture",
931 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
932 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
934 Devising a cost model is the most critical aspect of this work. It will
935 guide us on which access to peel for, whether to use loop versioning, how
936 many versions to create, etc. The cost model will probably consist of
937 generic considerations as well as target specific considerations (on
938 powerpc for example, misaligned stores are more painful than misaligned
941 Here are the general steps involved in alignment enhancements:
943 -- original loop, before alignment analysis:
945 x = q[i]; # DR_MISALIGNMENT(q) = unknown
946 p[i] = y; # DR_MISALIGNMENT(p) = unknown
949 -- After vect_compute_data_refs_alignment:
951 x = q[i]; # DR_MISALIGNMENT(q) = 3
952 p[i] = y; # DR_MISALIGNMENT(p) = unknown
955 -- Possibility 1: we do loop versioning:
957 for (i=0; i<N; i++){ # loop 1A
958 x = q[i]; # DR_MISALIGNMENT(q) = 3
959 p[i] = y; # DR_MISALIGNMENT(p) = 0
963 for (i=0; i<N; i++){ # loop 1B
964 x = q[i]; # DR_MISALIGNMENT(q) = 3
965 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
969 -- Possibility 2: we do loop peeling:
970 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
974 for (i = 3; i < N; i++){ # loop 2A
975 x = q[i]; # DR_MISALIGNMENT(q) = 0
976 p[i] = y; # DR_MISALIGNMENT(p) = unknown
979 -- Possibility 3: combination of loop peeling and versioning:
980 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
985 for (i = 3; i<N; i++){ # loop 3A
986 x = q[i]; # DR_MISALIGNMENT(q) = 0
987 p[i] = y; # DR_MISALIGNMENT(p) = 0
991 for (i = 3; i<N; i++){ # loop 3B
992 x = q[i]; # DR_MISALIGNMENT(q) = 0
993 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
997 These loops are later passed to loop_transform to be vectorized. The
998 vectorizer will use the alignment information to guide the transformation
999 (whether to generate regular loads/stores, or with special handling for
1003 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1005 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1006 enum dr_alignment_support supportable_dr_alignment
;
1007 struct data_reference
*dr0
= NULL
;
1008 struct data_reference
*dr
;
1010 bool do_peeling
= false;
1011 bool do_versioning
= false;
1014 /* While cost model enhancements are expected in the future, the high level
1015 view of the code at this time is as follows:
1017 A) If there is a misaligned write then see if peeling to align this write
1018 can make all data references satisfy vect_supportable_dr_alignment.
1019 If so, update data structures as needed and return true. Note that
1020 at this time vect_supportable_dr_alignment is known to return false
1021 for a a misaligned write.
1023 B) If peeling wasn't possible and there is a data reference with an
1024 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1025 then see if loop versioning checks can be used to make all data
1026 references satisfy vect_supportable_dr_alignment. If so, update
1027 data structures as needed and return true.
1029 C) If neither peeling nor versioning were successful then return false if
1030 any data reference does not satisfy vect_supportable_dr_alignment.
1032 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1034 Note, Possibility 3 above (which is peeling and versioning together) is not
1035 being done at this time. */
1037 /* (1) Peeling to force alignment. */
1039 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1041 + How many accesses will become aligned due to the peeling
1042 - How many accesses will become unaligned due to the peeling,
1043 and the cost of misaligned accesses.
1044 - The cost of peeling (the extra runtime checks, the increase
1047 The scheme we use FORNOW: peel to force the alignment of the first
1048 misaligned store in the loop.
1049 Rationale: misaligned stores are not yet supported.
1051 TODO: Use a cost model. */
1053 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1054 if (!DR_IS_READ (dr
) && !aligned_access_p (dr
))
1061 /* Often peeling for alignment will require peeling for loop-bound, which in
1062 turn requires that we know how to adjust the loop ivs after the loop. */
1063 if (!vect_can_advance_ivs_p (loop_vinfo
))
1071 if (known_alignment_for_access_p (dr0
))
1073 /* Since it's known at compile time, compute the number of iterations
1074 in the peeled loop (the peeling factor) for use in updating
1075 DR_MISALIGNMENT values. The peeling factor is the vectorization
1076 factor minus the misalignment as an element count. */
1077 mis
= DR_MISALIGNMENT (dr0
);
1078 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1079 npeel
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) - mis
;
1082 /* Ensure that all data refs can be vectorized after the peel. */
1083 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1085 int save_misalignment
;
1090 save_misalignment
= DR_MISALIGNMENT (dr
);
1091 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1092 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1093 DR_MISALIGNMENT (dr
) = save_misalignment
;
1095 if (!supportable_dr_alignment
)
1104 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1105 If the misalignment of DR_i is identical to that of dr0 then set
1106 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1107 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1108 by the peeling factor times the element size of DR_i (MOD the
1109 vectorization factor times the size). Otherwise, the
1110 misalignment of DR_i must be set to unknown. */
1111 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1113 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1115 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1116 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1117 DR_MISALIGNMENT (dr0
) = 0;
1118 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1119 fprintf (vect_dump
, "Alignment of access forced using peeling.");
1121 if (vect_print_dump_info (REPORT_DETAILS
))
1122 fprintf (vect_dump
, "Peeling for alignment will be applied.");
1124 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1131 /* (2) Versioning to force alignment. */
1133 /* Try versioning if:
1134 1) flag_tree_vect_loop_version is TRUE
1135 2) optimize_size is FALSE
1136 3) there is at least one unsupported misaligned data ref with an unknown
1138 4) all misaligned data refs with a known misalignment are supported, and
1139 5) the number of runtime alignment checks is within reason. */
1141 do_versioning
= flag_tree_vect_loop_version
&& (!optimize_size
);
1145 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1147 if (aligned_access_p (dr
))
1150 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1152 if (!supportable_dr_alignment
)
1158 if (known_alignment_for_access_p (dr
)
1159 || VEC_length (tree
,
1160 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
1161 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS
))
1163 do_versioning
= false;
1167 stmt
= DR_STMT (dr
);
1168 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1169 gcc_assert (vectype
);
1171 /* The rightmost bits of an aligned address must be zeros.
1172 Construct the mask needed for this test. For example,
1173 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1174 mask must be 15 = 0xf. */
1175 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1177 /* FORNOW: use the same mask to test all potentially unaligned
1178 references in the loop. The vectorizer currently supports
1179 a single vector size, see the reference to
1180 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1181 vectorization factor is computed. */
1182 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
1183 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
1184 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
1185 VEC_safe_push (tree
, heap
,
1186 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
),
1191 /* Versioning requires at least one misaligned data reference. */
1192 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)) == 0)
1193 do_versioning
= false;
1194 else if (!do_versioning
)
1195 VEC_truncate (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
), 0);
1200 VEC(tree
,heap
) *may_misalign_stmts
1201 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1204 /* It can now be assumed that the data references in the statements
1205 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1206 of the loop being vectorized. */
1207 for (i
= 0; VEC_iterate (tree
, may_misalign_stmts
, i
, stmt
); i
++)
1209 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1210 dr
= STMT_VINFO_DATA_REF (stmt_info
);
1211 DR_MISALIGNMENT (dr
) = 0;
1212 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1213 fprintf (vect_dump
, "Alignment of access forced using versioning.");
1216 if (vect_print_dump_info (REPORT_DETAILS
))
1217 fprintf (vect_dump
, "Versioning for alignment will be applied.");
1219 /* Peeling and versioning can't be done together at this time. */
1220 gcc_assert (! (do_peeling
&& do_versioning
));
1222 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1227 /* This point is reached if neither peeling nor versioning is being done. */
1228 gcc_assert (! (do_peeling
|| do_versioning
));
1230 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1235 /* Function vect_analyze_data_refs_alignment
1237 Analyze the alignment of the data-references in the loop.
1238 Return FALSE if a data reference is found that cannot be vectorized. */
1241 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
)
1243 if (vect_print_dump_info (REPORT_DETAILS
))
1244 fprintf (vect_dump
, "=== vect_analyze_data_refs_alignment ===");
1246 if (!vect_compute_data_refs_alignment (loop_vinfo
))
1248 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1250 "not vectorized: can't calculate alignment for data ref.");
1258 /* Function vect_analyze_data_ref_access.
1260 Analyze the access pattern of the data-reference DR. For now, a data access
1261 has to be consecutive to be considered vectorizable. */
1264 vect_analyze_data_ref_access (struct data_reference
*dr
)
1266 tree step
= DR_STEP (dr
);
1267 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
1269 if (!step
|| tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
)))
1271 if (vect_print_dump_info (REPORT_DETAILS
))
1272 fprintf (vect_dump
, "not consecutive access");
1279 /* Function vect_analyze_data_ref_accesses.
1281 Analyze the access pattern of all the data references in the loop.
1283 FORNOW: the only access pattern that is considered vectorizable is a
1284 simple step 1 (consecutive) access.
1286 FORNOW: handle only arrays and pointer accesses. */
1289 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
)
1292 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1293 struct data_reference
*dr
;
1295 if (vect_print_dump_info (REPORT_DETAILS
))
1296 fprintf (vect_dump
, "=== vect_analyze_data_ref_accesses ===");
1298 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1299 if (!vect_analyze_data_ref_access (dr
))
1301 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1302 fprintf (vect_dump
, "not vectorized: complicated access pattern.");
1310 /* Function vect_analyze_data_refs.
1312 Find all the data references in the loop.
1314 The general structure of the analysis of data refs in the vectorizer is as
1316 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1317 find and analyze all data-refs in the loop and their dependences.
1318 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1319 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1320 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1325 vect_analyze_data_refs (loop_vec_info loop_vinfo
)
1327 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1329 VEC (data_reference_p
, heap
) *datarefs
;
1330 struct data_reference
*dr
;
1333 if (vect_print_dump_info (REPORT_DETAILS
))
1334 fprintf (vect_dump
, "=== vect_analyze_data_refs ===");
1336 compute_data_dependences_for_loop (loop
, false,
1337 &LOOP_VINFO_DATAREFS (loop_vinfo
),
1338 &LOOP_VINFO_DDRS (loop_vinfo
));
1340 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1341 from stmt_vec_info struct to DR and vectype. */
1342 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1344 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1347 stmt_vec_info stmt_info
;
1349 if (!dr
|| !DR_REF (dr
))
1351 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1352 fprintf (vect_dump
, "not vectorized: unhandled data-ref ");
1356 /* Update DR field in stmt_vec_info struct. */
1357 stmt
= DR_STMT (dr
);
1358 stmt_info
= vinfo_for_stmt (stmt
);
1360 if (STMT_VINFO_DATA_REF (stmt_info
))
1362 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1365 "not vectorized: more than one data ref in stmt: ");
1366 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1370 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
1372 /* Check that analysis of the data-ref succeeded. */
1373 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
1376 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1378 fprintf (vect_dump
, "not vectorized: data ref analysis failed ");
1379 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1383 if (!DR_MEMTAG (dr
))
1385 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1387 fprintf (vect_dump
, "not vectorized: no memory tag for ");
1388 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
1393 /* Set vectype for STMT. */
1394 scalar_type
= TREE_TYPE (DR_REF (dr
));
1395 STMT_VINFO_VECTYPE (stmt_info
) =
1396 get_vectype_for_scalar_type (scalar_type
);
1397 if (!STMT_VINFO_VECTYPE (stmt_info
))
1399 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1402 "not vectorized: no vectype for stmt: ");
1403 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1404 fprintf (vect_dump
, " scalar_type: ");
1405 print_generic_expr (vect_dump
, scalar_type
, TDF_DETAILS
);
1415 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1417 /* Function vect_mark_relevant.
1419 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
1422 vect_mark_relevant (VEC(tree
,heap
) **worklist
, tree stmt
,
1423 bool relevant_p
, bool live_p
)
1425 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1426 bool save_relevant_p
= STMT_VINFO_RELEVANT_P (stmt_info
);
1427 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
1429 if (vect_print_dump_info (REPORT_DETAILS
))
1430 fprintf (vect_dump
, "mark relevant %d, live %d.",relevant_p
, live_p
);
1432 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
1436 /* This is the last stmt in a sequence that was detected as a
1437 pattern that can potentially be vectorized. Don't mark the stmt
1438 as relevant/live because it's not going to vectorized.
1439 Instead mark the pattern-stmt that replaces it. */
1440 if (vect_print_dump_info (REPORT_DETAILS
))
1441 fprintf (vect_dump
, "last stmt in pattern. don't mark relevant/live.");
1442 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
1443 stmt_info
= vinfo_for_stmt (pattern_stmt
);
1444 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
1445 save_relevant_p
= STMT_VINFO_RELEVANT_P (stmt_info
);
1446 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
1447 stmt
= pattern_stmt
;
1450 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
1451 STMT_VINFO_RELEVANT_P (stmt_info
) |= relevant_p
;
1453 if (TREE_CODE (stmt
) == PHI_NODE
)
1454 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
1455 or live will fail vectorization later on. */
1458 if (STMT_VINFO_RELEVANT_P (stmt_info
) == save_relevant_p
1459 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
1461 if (vect_print_dump_info (REPORT_DETAILS
))
1462 fprintf (vect_dump
, "already marked relevant/live.");
1466 VEC_safe_push (tree
, heap
, *worklist
, stmt
);
1470 /* Function vect_stmt_relevant_p.
1472 Return true if STMT in loop that is represented by LOOP_VINFO is
1473 "relevant for vectorization".
1475 A stmt is considered "relevant for vectorization" if:
1476 - it has uses outside the loop.
1477 - it has vdefs (it alters memory).
1478 - control stmts in the loop (except for the exit condition).
1480 CHECKME: what other side effects would the vectorizer allow? */
1483 vect_stmt_relevant_p (tree stmt
, loop_vec_info loop_vinfo
,
1484 bool *relevant_p
, bool *live_p
)
1486 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1487 ssa_op_iter op_iter
;
1488 imm_use_iterator imm_iter
;
1489 use_operand_p use_p
;
1490 def_operand_p def_p
;
1492 *relevant_p
= false;
1495 /* cond stmt other than loop exit cond. */
1496 if (is_ctrl_stmt (stmt
) && (stmt
!= LOOP_VINFO_EXIT_COND (loop_vinfo
)))
1499 /* changing memory. */
1500 if (TREE_CODE (stmt
) != PHI_NODE
)
1501 if (!ZERO_SSA_OPERANDS (stmt
, SSA_OP_VIRTUAL_DEFS
))
1503 if (vect_print_dump_info (REPORT_DETAILS
))
1504 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
1508 /* uses outside the loop. */
1509 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
1511 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
1513 basic_block bb
= bb_for_stmt (USE_STMT (use_p
));
1514 if (!flow_bb_inside_loop_p (loop
, bb
))
1516 if (vect_print_dump_info (REPORT_DETAILS
))
1517 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
1519 /* We expect all such uses to be in the loop exit phis
1520 (because of loop closed form) */
1521 gcc_assert (TREE_CODE (USE_STMT (use_p
)) == PHI_NODE
);
1522 gcc_assert (bb
== loop
->single_exit
->dest
);
1529 return (*live_p
|| *relevant_p
);
1533 /* Function vect_mark_stmts_to_be_vectorized.
1535 Not all stmts in the loop need to be vectorized. For example:
1544 Stmt 1 and 3 do not need to be vectorized, because loop control and
1545 addressing of vectorized data-refs are handled differently.
1547 This pass detects such stmts. */
1550 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
1552 VEC(tree
,heap
) *worklist
;
1553 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1554 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1555 unsigned int nbbs
= loop
->num_nodes
;
1556 block_stmt_iterator si
;
1561 stmt_vec_info stmt_vinfo
;
1564 bool relevant_p
, live_p
;
1566 enum vect_def_type dt
;
1568 if (vect_print_dump_info (REPORT_DETAILS
))
1569 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
1571 worklist
= VEC_alloc (tree
, heap
, 64);
1573 /* 1. Init worklist. */
1576 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1578 if (vect_print_dump_info (REPORT_DETAILS
))
1580 fprintf (vect_dump
, "init: phi relevant? ");
1581 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
1584 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant_p
, &live_p
))
1585 vect_mark_relevant (&worklist
, phi
, relevant_p
, live_p
);
1588 for (i
= 0; i
< nbbs
; i
++)
1591 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
1593 stmt
= bsi_stmt (si
);
1595 if (vect_print_dump_info (REPORT_DETAILS
))
1597 fprintf (vect_dump
, "init: stmt relevant? ");
1598 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1601 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant_p
, &live_p
))
1602 vect_mark_relevant (&worklist
, stmt
, relevant_p
, live_p
);
1607 /* 2. Process_worklist */
1609 while (VEC_length (tree
, worklist
) > 0)
1611 stmt
= VEC_pop (tree
, worklist
);
1613 if (vect_print_dump_info (REPORT_DETAILS
))
1615 fprintf (vect_dump
, "worklist: examine stmt: ");
1616 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1619 /* Examine the USEs of STMT. For each ssa-name USE thta is defined
1620 in the loop, mark the stmt that defines it (DEF_STMT) as
1621 relevant/irrelevant and live/dead according to the liveness and
1622 relevance properties of STMT.
1625 gcc_assert (TREE_CODE (stmt
) != PHI_NODE
);
1627 ann
= stmt_ann (stmt
);
1628 stmt_vinfo
= vinfo_for_stmt (stmt
);
1630 relevant_p
= STMT_VINFO_RELEVANT_P (stmt_vinfo
);
1631 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
1633 /* Generally, the liveness and relevance properties of STMT are
1634 propagated to the DEF_STMTs of its USEs:
1635 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
1636 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
1641 If USE is used only for address computations (e.g. array indexing),
1642 which does not need to be directly vectorized, then the
1643 liveness/relevance of the respective DEF_STMT is left unchanged.
1646 If STMT has been identified as defining a reduction variable, then
1649 The last use of STMT is the reduction-variable, which is defined
1650 by a loop-header-phi. We don't want to mark the phi as live or
1651 relevant (because it does not need to be vectorized, it is handled
1652 as part of the vectorization of the reduction), so in this case we
1653 skip the call to vect_mark_relevant.
1655 The rest of the uses of STMT are defined in the loop body. For
1656 the def_stmt of these uses we want to set liveness/relevance
1658 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
1659 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
1660 because even though STMT is classified as live (since it defines a
1661 value that is used across loop iterations) and irrelevant (since it
1662 is not used inside the loop), it will be vectorized, and therefore
1663 the corresponding DEF_STMTs need to marked as relevant.
1667 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
)
1669 gcc_assert (!relevant_p
&& live_p
);
1674 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
1676 /* case 1: we are only interested in uses that need to be vectorized.
1677 Uses that are used for address computation are not considered
1680 if (!exist_non_indexing_operands_for_use_p (use
, stmt
))
1683 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &def
, &dt
))
1685 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1686 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
1687 VEC_free (tree
, heap
, worklist
);
1691 if (!def_stmt
|| IS_EMPTY_STMT (def_stmt
))
1694 if (vect_print_dump_info (REPORT_DETAILS
))
1696 fprintf (vect_dump
, "worklist: examine use %d: ", i
);
1697 print_generic_expr (vect_dump
, use
, TDF_SLIM
);
1700 bb
= bb_for_stmt (def_stmt
);
1701 if (!flow_bb_inside_loop_p (loop
, bb
))
1704 /* case 2.1: the reduction-use does not mark the defining-phi
1706 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
1707 && TREE_CODE (def_stmt
) == PHI_NODE
)
1710 vect_mark_relevant (&worklist
, def_stmt
, relevant_p
, live_p
);
1712 } /* while worklist */
1714 VEC_free (tree
, heap
, worklist
);
1719 /* Function vect_can_advance_ivs_p
1721 In case the number of iterations that LOOP iterates is unknown at compile
1722 time, an epilog loop will be generated, and the loop induction variables
1723 (IVs) will be "advanced" to the value they are supposed to take just before
1724 the epilog loop. Here we check that the access function of the loop IVs
1725 and the expression that represents the loop bound are simple enough.
1726 These restrictions will be relaxed in the future. */
1729 vect_can_advance_ivs_p (loop_vec_info loop_vinfo
)
1731 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1732 basic_block bb
= loop
->header
;
1735 /* Analyze phi functions of the loop header. */
1737 if (vect_print_dump_info (REPORT_DETAILS
))
1738 fprintf (vect_dump
, "=== vect_can_advance_ivs_p ===");
1740 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1742 tree access_fn
= NULL
;
1743 tree evolution_part
;
1745 if (vect_print_dump_info (REPORT_DETAILS
))
1747 fprintf (vect_dump
, "Analyze phi: ");
1748 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
1751 /* Skip virtual phi's. The data dependences that are associated with
1752 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1754 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
1756 if (vect_print_dump_info (REPORT_DETAILS
))
1757 fprintf (vect_dump
, "virtual phi. skip.");
1761 /* Skip reduction phis. */
1763 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
1765 if (vect_print_dump_info (REPORT_DETAILS
))
1766 fprintf (vect_dump
, "reduc phi. skip.");
1770 /* Analyze the evolution function. */
1772 access_fn
= instantiate_parameters
1773 (loop
, analyze_scalar_evolution (loop
, PHI_RESULT (phi
)));
1777 if (vect_print_dump_info (REPORT_DETAILS
))
1778 fprintf (vect_dump
, "No Access function.");
1782 if (vect_print_dump_info (REPORT_DETAILS
))
1784 fprintf (vect_dump
, "Access function of PHI: ");
1785 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
1788 evolution_part
= evolution_part_in_loop_num (access_fn
, loop
->num
);
1790 if (evolution_part
== NULL_TREE
)
1792 if (vect_print_dump_info (REPORT_DETAILS
))
1793 fprintf (vect_dump
, "No evolution.");
1797 /* FORNOW: We do not transform initial conditions of IVs
1798 which evolution functions are a polynomial of degree >= 2. */
1800 if (tree_is_chrec (evolution_part
))
1808 /* Function vect_get_loop_niters.
1810 Determine how many iterations the loop is executed.
1811 If an expression that represents the number of iterations
1812 can be constructed, place it in NUMBER_OF_ITERATIONS.
1813 Return the loop exit condition. */
1816 vect_get_loop_niters (struct loop
*loop
, tree
*number_of_iterations
)
1820 if (vect_print_dump_info (REPORT_DETAILS
))
1821 fprintf (vect_dump
, "=== get_loop_niters ===");
1823 niters
= number_of_iterations_in_loop (loop
);
1825 if (niters
!= NULL_TREE
1826 && niters
!= chrec_dont_know
)
1828 *number_of_iterations
= niters
;
1830 if (vect_print_dump_info (REPORT_DETAILS
))
1832 fprintf (vect_dump
, "==> get_loop_niters:" );
1833 print_generic_expr (vect_dump
, *number_of_iterations
, TDF_SLIM
);
1837 return get_loop_exit_condition (loop
);
1841 /* Function vect_analyze_loop_form.
1843 Verify the following restrictions (some may be relaxed in the future):
1844 - it's an inner-most loop
1845 - number of BBs = 2 (which are the loop header and the latch)
1846 - the loop has a pre-header
1847 - the loop has a single entry and exit
1848 - the loop exit condition is simple enough, and the number of iterations
1849 can be analyzed (a countable loop). */
1851 static loop_vec_info
1852 vect_analyze_loop_form (struct loop
*loop
)
1854 loop_vec_info loop_vinfo
;
1856 tree number_of_iterations
= NULL
;
1858 if (vect_print_dump_info (REPORT_DETAILS
))
1859 fprintf (vect_dump
, "=== vect_analyze_loop_form ===");
1863 if (vect_print_dump_info (REPORT_OUTER_LOOPS
))
1864 fprintf (vect_dump
, "not vectorized: nested loop.");
1868 if (!loop
->single_exit
1869 || loop
->num_nodes
!= 2
1870 || EDGE_COUNT (loop
->header
->preds
) != 2)
1872 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1874 if (!loop
->single_exit
)
1875 fprintf (vect_dump
, "not vectorized: multiple exits.");
1876 else if (loop
->num_nodes
!= 2)
1877 fprintf (vect_dump
, "not vectorized: too many BBs in loop.");
1878 else if (EDGE_COUNT (loop
->header
->preds
) != 2)
1879 fprintf (vect_dump
, "not vectorized: too many incoming edges.");
1885 /* We assume that the loop exit condition is at the end of the loop. i.e,
1886 that the loop is represented as a do-while (with a proper if-guard
1887 before the loop if needed), where the loop header contains all the
1888 executable statements, and the latch is empty. */
1889 if (!empty_block_p (loop
->latch
))
1891 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1892 fprintf (vect_dump
, "not vectorized: unexpected loop form.");
1896 /* Make sure there exists a single-predecessor exit bb: */
1897 if (!single_pred_p (loop
->single_exit
->dest
))
1899 edge e
= loop
->single_exit
;
1900 if (!(e
->flags
& EDGE_ABNORMAL
))
1902 split_loop_exit_edge (e
);
1903 if (vect_print_dump_info (REPORT_DETAILS
))
1904 fprintf (vect_dump
, "split exit edge.");
1908 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1909 fprintf (vect_dump
, "not vectorized: abnormal loop exit edge.");
1914 if (empty_block_p (loop
->header
))
1916 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1917 fprintf (vect_dump
, "not vectorized: empty loop.");
1921 loop_cond
= vect_get_loop_niters (loop
, &number_of_iterations
);
1924 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1925 fprintf (vect_dump
, "not vectorized: complicated exit condition.");
1929 if (!number_of_iterations
)
1931 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1933 "not vectorized: number of iterations cannot be computed.");
1937 if (chrec_contains_undetermined (number_of_iterations
))
1939 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1940 fprintf (vect_dump
, "Infinite number of iterations.");
1944 loop_vinfo
= new_loop_vec_info (loop
);
1945 LOOP_VINFO_NITERS (loop_vinfo
) = number_of_iterations
;
1947 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
1949 if (vect_print_dump_info (REPORT_DETAILS
))
1951 fprintf (vect_dump
, "Symbolic number of iterations is ");
1952 print_generic_expr (vect_dump
, number_of_iterations
, TDF_DETAILS
);
1956 if (LOOP_VINFO_INT_NITERS (loop_vinfo
) == 0)
1958 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1959 fprintf (vect_dump
, "not vectorized: number of iterations = 0.");
1963 LOOP_VINFO_EXIT_COND (loop_vinfo
) = loop_cond
;
1969 /* Function vect_analyze_loop.
1971 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1972 for it. The different analyses will record information in the
1973 loop_vec_info struct. */
1975 vect_analyze_loop (struct loop
*loop
)
1978 loop_vec_info loop_vinfo
;
1980 if (vect_print_dump_info (REPORT_DETAILS
))
1981 fprintf (vect_dump
, "===== analyze_loop_nest =====");
1983 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
1985 loop_vinfo
= vect_analyze_loop_form (loop
);
1988 if (vect_print_dump_info (REPORT_DETAILS
))
1989 fprintf (vect_dump
, "bad loop form.");
1993 /* Find all data references in the loop (which correspond to vdefs/vuses)
1994 and analyze their evolution in the loop.
1996 FORNOW: Handle only simple, array references, which
1997 alignment can be forced, and aligned pointer-references. */
1999 ok
= vect_analyze_data_refs (loop_vinfo
);
2002 if (vect_print_dump_info (REPORT_DETAILS
))
2003 fprintf (vect_dump
, "bad data references.");
2004 destroy_loop_vec_info (loop_vinfo
);
2008 /* Classify all cross-iteration scalar data-flow cycles.
2009 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2011 vect_analyze_scalar_cycles (loop_vinfo
);
2013 vect_pattern_recog (loop_vinfo
);
2015 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2017 ok
= vect_mark_stmts_to_be_vectorized (loop_vinfo
);
2020 if (vect_print_dump_info (REPORT_DETAILS
))
2021 fprintf (vect_dump
, "unexpected pattern.");
2022 destroy_loop_vec_info (loop_vinfo
);
2026 /* Analyze the alignment of the data-refs in the loop.
2027 Fail if a data reference is found that cannot be vectorized. */
2029 ok
= vect_analyze_data_refs_alignment (loop_vinfo
);
2032 if (vect_print_dump_info (REPORT_DETAILS
))
2033 fprintf (vect_dump
, "bad data alignment.");
2034 destroy_loop_vec_info (loop_vinfo
);
2038 ok
= vect_determine_vectorization_factor (loop_vinfo
);
2041 if (vect_print_dump_info (REPORT_DETAILS
))
2042 fprintf (vect_dump
, "can't determine vectorization factor.");
2043 destroy_loop_vec_info (loop_vinfo
);
2047 /* Analyze data dependences between the data-refs in the loop.
2048 FORNOW: fail at the first data dependence that we encounter. */
2050 ok
= vect_analyze_data_ref_dependences (loop_vinfo
);
2053 if (vect_print_dump_info (REPORT_DETAILS
))
2054 fprintf (vect_dump
, "bad data dependence.");
2055 destroy_loop_vec_info (loop_vinfo
);
2059 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2060 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2062 ok
= vect_analyze_data_ref_accesses (loop_vinfo
);
2065 if (vect_print_dump_info (REPORT_DETAILS
))
2066 fprintf (vect_dump
, "bad data access.");
2067 destroy_loop_vec_info (loop_vinfo
);
2071 /* This pass will decide on using loop versioning and/or loop peeling in
2072 order to enhance the alignment of data references in the loop. */
2074 ok
= vect_enhance_data_refs_alignment (loop_vinfo
);
2077 if (vect_print_dump_info (REPORT_DETAILS
))
2078 fprintf (vect_dump
, "bad data alignment.");
2079 destroy_loop_vec_info (loop_vinfo
);
2083 /* Scan all the operations in the loop and make sure they are
2086 ok
= vect_analyze_operations (loop_vinfo
);
2089 if (vect_print_dump_info (REPORT_DETAILS
))
2090 fprintf (vect_dump
, "bad operation or unsupported loop bound.");
2091 destroy_loop_vec_info (loop_vinfo
);
2095 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
) = 1;