1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
43 /* Main analysis functions. */
44 static loop_vec_info
vect_analyze_loop_form (struct loop
*);
45 static bool vect_analyze_data_refs (loop_vec_info
);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info
);
47 static void vect_analyze_scalar_cycles (loop_vec_info
);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info
);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info
);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info
);
51 static bool vect_compute_data_refs_alignment (loop_vec_info
);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info
);
53 static bool vect_analyze_operations (loop_vec_info
);
54 static bool vect_determine_vectorization_factor (loop_vec_info
);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree
, tree
);
58 static tree
vect_get_loop_niters (struct loop
*, tree
*);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation
*, loop_vec_info
, bool);
61 static bool vect_compute_data_ref_alignment (struct data_reference
*);
62 static bool vect_analyze_data_ref_access (struct data_reference
*);
63 static bool vect_can_advance_ivs_p (loop_vec_info
);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference
*, struct data_reference
*, int npeel
);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo
)
95 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
96 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
97 int nbbs
= loop
->num_nodes
;
98 block_stmt_iterator si
;
99 unsigned int vectorization_factor
= 0;
103 if (vect_print_dump_info (REPORT_DETAILS
))
104 fprintf (vect_dump
, "=== vect_determine_vectorization_factor ===");
106 for (i
= 0; i
< nbbs
; i
++)
108 basic_block bb
= bbs
[i
];
110 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
112 tree stmt
= bsi_stmt (si
);
114 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
117 if (vect_print_dump_info (REPORT_DETAILS
))
119 fprintf (vect_dump
, "==> examining statement: ");
120 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
123 gcc_assert (stmt_info
);
124 /* skip stmts which do not need to be vectorized. */
125 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
126 && !STMT_VINFO_LIVE_P (stmt_info
))
128 if (vect_print_dump_info (REPORT_DETAILS
))
129 fprintf (vect_dump
, "skip.");
133 if (!GIMPLE_STMT_P (stmt
)
134 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))))
136 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
138 fprintf (vect_dump
, "not vectorized: vector stmt in loop:");
139 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
144 if (STMT_VINFO_VECTYPE (stmt_info
))
146 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
147 scalar_type
= TREE_TYPE (vectype
);
151 if (STMT_VINFO_DATA_REF (stmt_info
))
153 TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info
)));
154 else if (TREE_CODE (stmt
) == GIMPLE_MODIFY_STMT
)
155 scalar_type
= TREE_TYPE (GIMPLE_STMT_OPERAND (stmt
, 0));
157 scalar_type
= TREE_TYPE (stmt
);
159 if (vect_print_dump_info (REPORT_DETAILS
))
161 fprintf (vect_dump
, "get vectype for scalar type: ");
162 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
165 vectype
= get_vectype_for_scalar_type (scalar_type
);
168 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
171 "not vectorized: unsupported data-type ");
172 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
176 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
179 if (vect_print_dump_info (REPORT_DETAILS
))
181 fprintf (vect_dump
, "vectype: ");
182 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
185 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
186 if (vect_print_dump_info (REPORT_DETAILS
))
187 fprintf (vect_dump
, "nunits = %d", nunits
);
189 if (!vectorization_factor
190 || (nunits
> vectorization_factor
))
191 vectorization_factor
= nunits
;
196 /* TODO: Analyze cost. Decide if worth while to vectorize. */
198 if (vectorization_factor
<= 1)
200 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
201 fprintf (vect_dump
, "not vectorized: unsupported data-type");
204 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
210 /* Function vect_analyze_operations.
212 Scan the loop stmts and make sure they are all vectorizable. */
215 vect_analyze_operations (loop_vec_info loop_vinfo
)
217 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
218 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
219 int nbbs
= loop
->num_nodes
;
220 block_stmt_iterator si
;
221 unsigned int vectorization_factor
= 0;
225 stmt_vec_info stmt_info
;
226 bool need_to_vectorize
= false;
228 if (vect_print_dump_info (REPORT_DETAILS
))
229 fprintf (vect_dump
, "=== vect_analyze_operations ===");
231 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
232 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
234 for (i
= 0; i
< nbbs
; i
++)
236 basic_block bb
= bbs
[i
];
238 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
240 stmt_info
= vinfo_for_stmt (phi
);
241 if (vect_print_dump_info (REPORT_DETAILS
))
243 fprintf (vect_dump
, "examining phi: ");
244 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
247 gcc_assert (stmt_info
);
249 if (STMT_VINFO_LIVE_P (stmt_info
))
251 /* FORNOW: not yet supported. */
252 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
253 fprintf (vect_dump
, "not vectorized: value used after loop.");
257 if (STMT_VINFO_RELEVANT_P (stmt_info
))
259 /* Most likely a reduction-like computation that is used
261 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
262 fprintf (vect_dump
, "not vectorized: unsupported pattern.");
267 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
269 tree stmt
= bsi_stmt (si
);
270 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
272 if (vect_print_dump_info (REPORT_DETAILS
))
274 fprintf (vect_dump
, "==> examining statement: ");
275 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
278 gcc_assert (stmt_info
);
280 /* skip stmts which do not need to be vectorized.
281 this is expected to include:
282 - the COND_EXPR which is the loop exit condition
283 - any LABEL_EXPRs in the loop
284 - computations that are used only for array indexing or loop
287 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
288 && !STMT_VINFO_LIVE_P (stmt_info
))
290 if (vect_print_dump_info (REPORT_DETAILS
))
291 fprintf (vect_dump
, "irrelevant.");
295 if (STMT_VINFO_RELEVANT_P (stmt_info
))
297 gcc_assert (GIMPLE_STMT_P (stmt
)
298 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))));
299 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
301 ok
= (vectorizable_type_promotion (stmt
, NULL
, NULL
)
302 || vectorizable_type_demotion (stmt
, NULL
, NULL
)
303 || vectorizable_operation (stmt
, NULL
, NULL
)
304 || vectorizable_assignment (stmt
, NULL
, NULL
)
305 || vectorizable_load (stmt
, NULL
, NULL
)
306 || vectorizable_call (stmt
, NULL
, NULL
)
307 || vectorizable_store (stmt
, NULL
, NULL
)
308 || vectorizable_condition (stmt
, NULL
, NULL
));
312 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
315 "not vectorized: relevant stmt not supported: ");
316 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
320 need_to_vectorize
= true;
323 if (STMT_VINFO_LIVE_P (stmt_info
))
325 ok
= vectorizable_reduction (stmt
, NULL
, NULL
);
328 need_to_vectorize
= true;
330 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
334 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
337 "not vectorized: live stmt not supported: ");
338 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
346 /* TODO: Analyze cost. Decide if worth while to vectorize. */
348 /* All operations in the loop are either irrelevant (deal with loop
349 control, or dead), or only used outside the loop and can be moved
350 out of the loop (e.g. invariants, inductions). The loop can be
351 optimized away by scalar optimizations. We're better off not
352 touching this loop. */
353 if (!need_to_vectorize
)
355 if (vect_print_dump_info (REPORT_DETAILS
))
357 "All the computation can be taken out of the loop.");
358 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
360 "not vectorized: redundant loop. no profit to vectorize.");
364 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
365 && vect_print_dump_info (REPORT_DETAILS
))
367 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC
,
368 vectorization_factor
, LOOP_VINFO_INT_NITERS (loop_vinfo
));
370 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
371 && LOOP_VINFO_INT_NITERS (loop_vinfo
) < vectorization_factor
)
373 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
374 fprintf (vect_dump
, "not vectorized: iteration count too small.");
378 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
379 || LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0
380 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
382 if (vect_print_dump_info (REPORT_DETAILS
))
383 fprintf (vect_dump
, "epilog loop required.");
384 if (!vect_can_advance_ivs_p (loop_vinfo
))
386 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
388 "not vectorized: can't create epilog loop 1.");
391 if (!slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
393 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
395 "not vectorized: can't create epilog loop 2.");
404 /* Function exist_non_indexing_operands_for_use_p
406 USE is one of the uses attached to STMT. Check if USE is
407 used in STMT for anything other than indexing an array. */
410 exist_non_indexing_operands_for_use_p (tree use
, tree stmt
)
413 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
415 /* USE corresponds to some operand in STMT. If there is no data
416 reference in STMT, then any operand that corresponds to USE
417 is not indexing an array. */
418 if (!STMT_VINFO_DATA_REF (stmt_info
))
421 /* STMT has a data_ref. FORNOW this means that its of one of
425 (This should have been verified in analyze_data_refs).
427 'var' in the second case corresponds to a def, not a use,
428 so USE cannot correspond to any operands that are not used
431 Therefore, all we need to check is if STMT falls into the
432 first case, and whether var corresponds to USE. */
434 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) == SSA_NAME
)
437 operand
= GIMPLE_STMT_OPERAND (stmt
, 1);
439 if (TREE_CODE (operand
) != SSA_NAME
)
449 /* Function vect_analyze_scalar_cycles.
451 Examine the cross iteration def-use cycles of scalar variables, by
452 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
453 following: invariant, induction, reduction, unknown.
455 Some forms of scalar cycles are not yet supported.
457 Example1: reduction: (unsupported yet)
463 Example2: induction: (unsupported yet)
469 Note: the following loop *is* vectorizable:
475 even though it has a def-use cycle caused by the induction variable i:
477 loop: i_2 = PHI (i_0, i_1)
482 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
483 it does not need to be vectorized because it is only used for array
484 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
485 loop2 on the other hand is relevant (it is being written to memory).
489 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo
)
492 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
493 basic_block bb
= loop
->header
;
496 if (vect_print_dump_info (REPORT_DETAILS
))
497 fprintf (vect_dump
, "=== vect_analyze_scalar_cycles ===");
499 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
501 tree access_fn
= NULL
;
502 tree def
= PHI_RESULT (phi
);
503 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
506 if (vect_print_dump_info (REPORT_DETAILS
))
508 fprintf (vect_dump
, "Analyze phi: ");
509 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
512 /* Skip virtual phi's. The data dependences that are associated with
513 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
515 if (!is_gimple_reg (SSA_NAME_VAR (def
)))
517 if (vect_print_dump_info (REPORT_DETAILS
))
518 fprintf (vect_dump
, "virtual phi. skip.");
522 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_unknown_def_type
;
524 /* Analyze the evolution function. */
526 access_fn
= analyze_scalar_evolution (loop
, def
);
531 if (vect_print_dump_info (REPORT_DETAILS
))
533 fprintf (vect_dump
, "Access function of PHI: ");
534 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
537 if (vect_is_simple_iv_evolution (loop
->num
, access_fn
, &dummy
, &dummy
))
539 if (vect_print_dump_info (REPORT_DETAILS
))
540 fprintf (vect_dump
, "Detected induction.");
541 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_induction_def
;
545 /* TODO: handle invariant phis */
547 reduc_stmt
= vect_is_simple_reduction (loop
, phi
);
550 if (vect_print_dump_info (REPORT_DETAILS
))
551 fprintf (vect_dump
, "Detected reduction.");
552 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_reduction_def
;
553 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
557 if (vect_print_dump_info (REPORT_DETAILS
))
558 fprintf (vect_dump
, "Unknown def-use cycle pattern.");
566 /* Function vect_insert_into_interleaving_chain.
568 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
571 vect_insert_into_interleaving_chain (struct data_reference
*dra
,
572 struct data_reference
*drb
)
574 tree prev
, next
, next_init
;
575 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
576 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
578 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
579 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
582 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
583 if (tree_int_cst_compare (next_init
, DR_INIT (dra
)) > 0)
586 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
587 DR_GROUP_NEXT_DR (stmtinfo_a
) = next
;
591 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
594 /* We got to the end of the list. Insert here. */
595 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
596 DR_GROUP_NEXT_DR (stmtinfo_a
) = NULL_TREE
;
600 /* Function vect_update_interleaving_chain.
602 For two data-refs DRA and DRB that are a part of a chain interleaved data
603 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
605 There are four possible cases:
606 1. New stmts - both DRA and DRB are not a part of any chain:
609 2. DRB is a part of a chain and DRA is not:
610 no need to update FIRST_DR
611 no need to insert DRB
612 insert DRA according to init
613 3. DRA is a part of a chain and DRB is not:
614 if (init of FIRST_DR > init of DRB)
616 NEXT(FIRST_DR) = previous FIRST_DR
618 insert DRB according to its init
619 4. both DRA and DRB are in some interleaving chains:
620 choose the chain with the smallest init of FIRST_DR
621 insert the nodes of the second chain into the first one. */
624 vect_update_interleaving_chain (struct data_reference
*drb
,
625 struct data_reference
*dra
)
627 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
628 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
629 tree next_init
, init_dra_chain
, init_drb_chain
, first_a
, first_b
;
630 tree node
, prev
, next
, node_init
, first_stmt
;
632 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
633 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
635 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_STMT (drb
);
636 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
637 DR_GROUP_NEXT_DR (stmtinfo_b
) = DR_STMT (dra
);
641 /* 2. DRB is a part of a chain and DRA is not. */
642 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && DR_GROUP_FIRST_DR (stmtinfo_b
))
644 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_GROUP_FIRST_DR (stmtinfo_b
);
645 /* Insert DRA into the chain of DRB. */
646 vect_insert_into_interleaving_chain (dra
, drb
);
650 /* 3. DRA is a part of a chain and DRB is not. */
651 if (DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
653 tree old_first_stmt
= DR_GROUP_FIRST_DR (stmtinfo_a
);
654 tree init_old
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
658 if (tree_int_cst_compare (init_old
, DR_INIT (drb
)) > 0)
660 /* DRB's init is smaller than the init of the stmt previously marked
661 as the first stmt of the interleaving chain of DRA. Therefore, we
662 update FIRST_STMT and put DRB in the head of the list. */
663 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
664 DR_GROUP_NEXT_DR (stmtinfo_b
) = old_first_stmt
;
666 /* Update all the stmts in the list to point to the new FIRST_STMT. */
667 tmp
= old_first_stmt
;
670 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp
)) = DR_STMT (drb
);
671 tmp
= DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp
));
676 /* Insert DRB in the list of DRA. */
677 vect_insert_into_interleaving_chain (drb
, dra
);
678 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_GROUP_FIRST_DR (stmtinfo_a
);
683 /* 4. both DRA and DRB are in some interleaving chains. */
684 first_a
= DR_GROUP_FIRST_DR (stmtinfo_a
);
685 first_b
= DR_GROUP_FIRST_DR (stmtinfo_b
);
686 if (first_a
== first_b
)
688 init_dra_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a
)));
689 init_drb_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b
)));
691 if (tree_int_cst_compare (init_dra_chain
, init_drb_chain
) > 0)
693 /* Insert the nodes of DRA chain into the DRB chain.
694 After inserting a node, continue from this node of the DRB chain (don't
695 start from the beginning. */
696 node
= DR_GROUP_FIRST_DR (stmtinfo_a
);
697 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
698 first_stmt
= first_b
;
702 /* Insert the nodes of DRB chain into the DRA chain.
703 After inserting a node, continue from this node of the DRA chain (don't
704 start from the beginning. */
705 node
= DR_GROUP_FIRST_DR (stmtinfo_b
);
706 prev
= DR_GROUP_FIRST_DR (stmtinfo_a
);
707 first_stmt
= first_a
;
712 node_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node
)));
713 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
716 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
717 if (tree_int_cst_compare (next_init
, node_init
) > 0)
720 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
721 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = next
;
726 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
730 /* We got to the end of the list. Insert here. */
731 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
732 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = NULL_TREE
;
735 DR_GROUP_FIRST_DR (vinfo_for_stmt (node
)) = first_stmt
;
736 node
= DR_GROUP_NEXT_DR (vinfo_for_stmt (node
));
741 /* Function vect_equal_offsets.
743 Check if OFFSET1 and OFFSET2 are identical expressions. */
746 vect_equal_offsets (tree offset1
, tree offset2
)
750 STRIP_NOPS (offset1
);
751 STRIP_NOPS (offset2
);
753 if (offset1
== offset2
)
756 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
757 || !BINARY_CLASS_P (offset1
)
758 || !BINARY_CLASS_P (offset2
))
761 res0
= vect_equal_offsets (TREE_OPERAND (offset1
, 0),
762 TREE_OPERAND (offset2
, 0));
763 res1
= vect_equal_offsets (TREE_OPERAND (offset1
, 1),
764 TREE_OPERAND (offset2
, 1));
766 return (res0
&& res1
);
770 /* Function vect_check_interleaving.
772 Check if DRA and DRB are a part of interleaving. In case they are, insert
773 DRA and DRB in an interleaving chain. */
776 vect_check_interleaving (struct data_reference
*dra
,
777 struct data_reference
*drb
)
779 HOST_WIDE_INT type_size_a
, type_size_b
, diff_mod_size
, step
, init_a
, init_b
;
781 /* Check that the data-refs have same first location (except init) and they
782 are both either store or load (not load and store). */
783 if ((DR_BASE_ADDRESS (dra
) != DR_BASE_ADDRESS (drb
)
784 && (TREE_CODE (DR_BASE_ADDRESS (dra
)) != ADDR_EXPR
785 || TREE_CODE (DR_BASE_ADDRESS (drb
)) != ADDR_EXPR
786 || TREE_OPERAND (DR_BASE_ADDRESS (dra
), 0)
787 != TREE_OPERAND (DR_BASE_ADDRESS (drb
),0)))
788 || !vect_equal_offsets (DR_OFFSET (dra
), DR_OFFSET (drb
))
789 || !tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
))
790 || DR_IS_READ (dra
) != DR_IS_READ (drb
))
794 1. data-refs are of the same type
795 2. their steps are equal
796 3. the step is greater than the difference between data-refs' inits */
797 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
798 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
800 if (type_size_a
!= type_size_b
801 || tree_int_cst_compare (DR_STEP (dra
), DR_STEP (drb
)))
804 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
805 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
806 step
= TREE_INT_CST_LOW (DR_STEP (dra
));
810 /* If init_a == init_b + the size of the type * k, we have an interleaving,
811 and DRB is accessed before DRA. */
812 diff_mod_size
= (init_a
- init_b
) % type_size_a
;
814 if ((init_a
- init_b
) > step
)
817 if (diff_mod_size
== 0)
819 vect_update_interleaving_chain (drb
, dra
);
820 if (vect_print_dump_info (REPORT_DR_DETAILS
))
822 fprintf (vect_dump
, "Detected interleaving ");
823 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
824 fprintf (vect_dump
, " and ");
825 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
832 /* If init_b == init_a + the size of the type * k, we have an
833 interleaving, and DRA is accessed before DRB. */
834 diff_mod_size
= (init_b
- init_a
) % type_size_a
;
836 if ((init_b
- init_a
) > step
)
839 if (diff_mod_size
== 0)
841 vect_update_interleaving_chain (dra
, drb
);
842 if (vect_print_dump_info (REPORT_DR_DETAILS
))
844 fprintf (vect_dump
, "Detected interleaving ");
845 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
846 fprintf (vect_dump
, " and ");
847 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
855 /* Function vect_analyze_data_ref_dependence.
857 Return TRUE if there (might) exist a dependence between a memory-reference
858 DRA and a memory-reference DRB. */
861 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
862 loop_vec_info loop_vinfo
,
863 bool check_interleaving
)
866 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
867 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
868 struct data_reference
*dra
= DDR_A (ddr
);
869 struct data_reference
*drb
= DDR_B (ddr
);
870 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
871 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
872 int dra_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra
))));
873 int drb_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb
))));
874 lambda_vector dist_v
;
875 unsigned int loop_depth
;
877 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
879 /* Independent data accesses. */
880 if (check_interleaving
)
881 vect_check_interleaving (dra
, drb
);
885 if ((DR_IS_READ (dra
) && DR_IS_READ (drb
)) || dra
== drb
)
888 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
890 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
893 "not vectorized: can't determine dependence between ");
894 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
895 fprintf (vect_dump
, " and ");
896 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
901 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
903 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
905 fprintf (vect_dump
, "not vectorized: bad dist vector for ");
906 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
907 fprintf (vect_dump
, " and ");
908 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
913 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
914 for (i
= 0; VEC_iterate (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
); i
++)
916 int dist
= dist_v
[loop_depth
];
918 if (vect_print_dump_info (REPORT_DR_DETAILS
))
919 fprintf (vect_dump
, "dependence distance = %d.", dist
);
921 /* Same loop iteration. */
922 if (dist
% vectorization_factor
== 0 && dra_size
== drb_size
)
924 /* Two references with distance zero have the same alignment. */
925 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
), drb
);
926 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
), dra
);
927 if (vect_print_dump_info (REPORT_ALIGNMENT
))
928 fprintf (vect_dump
, "accesses have the same alignment.");
929 if (vect_print_dump_info (REPORT_DR_DETAILS
))
931 fprintf (vect_dump
, "dependence distance modulo vf == 0 between ");
932 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
933 fprintf (vect_dump
, " and ");
934 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
939 if (abs (dist
) >= vectorization_factor
)
941 /* Dependence distance does not create dependence, as far as vectorization
942 is concerned, in this case. */
943 if (vect_print_dump_info (REPORT_DR_DETAILS
))
944 fprintf (vect_dump
, "dependence distance >= VF.");
948 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
951 "not vectorized: possible dependence between data-refs ");
952 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
953 fprintf (vect_dump
, " and ");
954 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
964 /* Function vect_check_dependences.
966 Return TRUE if there is a store-store or load-store dependence between
967 data-refs in DDR, otherwise return FALSE. */
970 vect_check_dependences (struct data_dependence_relation
*ddr
)
972 struct data_reference
*dra
= DDR_A (ddr
);
973 struct data_reference
*drb
= DDR_B (ddr
);
975 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
|| dra
== drb
)
976 /* Independent or same data accesses. */
979 if (DR_IS_READ (dra
) == DR_IS_READ (drb
) && DR_IS_READ (dra
))
983 if (vect_print_dump_info (REPORT_DR_DETAILS
))
985 fprintf (vect_dump
, "possible store or store/load dependence between ");
986 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
987 fprintf (vect_dump
, " and ");
988 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
994 /* Function vect_analyze_data_ref_dependences.
996 Examine all the data references in the loop, and make sure there do not
997 exist any data dependences between them. */
1000 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
)
1003 VEC (ddr_p
, heap
) *ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
1004 struct data_dependence_relation
*ddr
;
1005 bool check_interleaving
= true;
1007 if (vect_print_dump_info (REPORT_DETAILS
))
1008 fprintf (vect_dump
, "=== vect_analyze_dependences ===");
1010 /* We allow interleaving only if there are no store-store and load-store
1011 dependencies in the loop. */
1012 for (i
= 0; VEC_iterate (ddr_p
, ddrs
, i
, ddr
); i
++)
1014 if (vect_check_dependences (ddr
))
1016 check_interleaving
= false;
1021 for (i
= 0; VEC_iterate (ddr_p
, ddrs
, i
, ddr
); i
++)
1022 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
, check_interleaving
))
1029 /* Function vect_compute_data_ref_alignment
1031 Compute the misalignment of the data reference DR.
1034 1. If during the misalignment computation it is found that the data reference
1035 cannot be vectorized then false is returned.
1036 2. DR_MISALIGNMENT (DR) is defined.
1038 FOR NOW: No analysis is actually performed. Misalignment is calculated
1039 only for trivial cases. TODO. */
1042 vect_compute_data_ref_alignment (struct data_reference
*dr
)
1044 tree stmt
= DR_STMT (dr
);
1045 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1046 tree ref
= DR_REF (dr
);
1048 tree base
, base_addr
;
1051 tree aligned_to
, alignment
;
1053 if (vect_print_dump_info (REPORT_DETAILS
))
1054 fprintf (vect_dump
, "vect_compute_data_ref_alignment:");
1056 /* Initialize misalignment to unknown. */
1057 DR_MISALIGNMENT (dr
) = -1;
1059 misalign
= DR_OFFSET_MISALIGNMENT (dr
);
1060 aligned_to
= DR_ALIGNED_TO (dr
);
1061 base_addr
= DR_BASE_ADDRESS (dr
);
1062 base
= build_fold_indirect_ref (base_addr
);
1063 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1064 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
1066 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
1069 if (vect_print_dump_info (REPORT_DETAILS
))
1071 fprintf (vect_dump
, "Unknown alignment for access: ");
1072 print_generic_expr (vect_dump
, base
, TDF_SLIM
);
1078 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
1080 || (TREE_CODE (base_addr
) == SSA_NAME
1081 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1082 TREE_TYPE (base_addr
)))),
1084 base_aligned
= true;
1086 base_aligned
= false;
1090 /* Do not change the alignment of global variables if
1091 flag_section_anchors is enabled. */
1092 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
))
1093 || (TREE_STATIC (base
) && flag_section_anchors
))
1095 if (vect_print_dump_info (REPORT_DETAILS
))
1097 fprintf (vect_dump
, "can't force alignment of ref: ");
1098 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
1103 /* Force the alignment of the decl.
1104 NOTE: This is the only change to the code we make during
1105 the analysis phase, before deciding to vectorize the loop. */
1106 if (vect_print_dump_info (REPORT_DETAILS
))
1107 fprintf (vect_dump
, "force alignment");
1108 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
1109 DECL_USER_ALIGN (base
) = 1;
1112 /* At this point we assume that the base is aligned. */
1113 gcc_assert (base_aligned
1114 || (TREE_CODE (base
) == VAR_DECL
1115 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
1117 /* Modulo alignment. */
1118 misalign
= size_binop (TRUNC_MOD_EXPR
, misalign
, alignment
);
1120 if (!host_integerp (misalign
, 1))
1122 /* Negative or overflowed misalignment value. */
1123 if (vect_print_dump_info (REPORT_DETAILS
))
1124 fprintf (vect_dump
, "unexpected misalign value");
1128 DR_MISALIGNMENT (dr
) = TREE_INT_CST_LOW (misalign
);
1130 if (vect_print_dump_info (REPORT_DETAILS
))
1132 fprintf (vect_dump
, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
1133 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
1140 /* Function vect_compute_data_refs_alignment
1142 Compute the misalignment of data references in the loop.
1143 Return FALSE if a data reference is found that cannot be vectorized. */
1146 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
)
1148 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1149 struct data_reference
*dr
;
1152 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1153 if (!vect_compute_data_ref_alignment (dr
))
1160 /* Function vect_update_misalignment_for_peel
1162 DR - the data reference whose misalignment is to be adjusted.
1163 DR_PEEL - the data reference whose misalignment is being made
1164 zero in the vector loop by the peel.
1165 NPEEL - the number of iterations in the peel loop if the misalignment
1166 of DR_PEEL is known at compile time. */
1169 vect_update_misalignment_for_peel (struct data_reference
*dr
,
1170 struct data_reference
*dr_peel
, int npeel
)
1173 VEC(dr_p
,heap
) *same_align_drs
;
1174 struct data_reference
*current_dr
;
1175 int dr_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
1176 int dr_peel_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel
))));
1177 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
1178 stmt_vec_info peel_stmt_info
= vinfo_for_stmt (DR_STMT (dr_peel
));
1180 /* For interleaved data accesses the step in the loop must be multiplied by
1181 the size of the interleaving group. */
1182 if (DR_GROUP_FIRST_DR (stmt_info
))
1183 dr_size
*= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info
)));
1184 if (DR_GROUP_FIRST_DR (peel_stmt_info
))
1185 dr_peel_size
*= DR_GROUP_SIZE (peel_stmt_info
);
1187 if (known_alignment_for_access_p (dr
)
1188 && known_alignment_for_access_p (dr_peel
)
1189 && (DR_MISALIGNMENT (dr
) / dr_size
==
1190 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
))
1192 DR_MISALIGNMENT (dr
) = 0;
1196 /* It can be assumed that the data refs with the same alignment as dr_peel
1197 are aligned in the vector loop. */
1199 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
1200 for (i
= 0; VEC_iterate (dr_p
, same_align_drs
, i
, current_dr
); i
++)
1202 if (current_dr
!= dr
)
1204 gcc_assert (DR_MISALIGNMENT (dr
) / dr_size
==
1205 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
);
1206 DR_MISALIGNMENT (dr
) = 0;
1210 if (known_alignment_for_access_p (dr
)
1211 && known_alignment_for_access_p (dr_peel
))
1213 DR_MISALIGNMENT (dr
) += npeel
* dr_size
;
1214 DR_MISALIGNMENT (dr
) %= UNITS_PER_SIMD_WORD
;
1218 if (vect_print_dump_info (REPORT_DETAILS
))
1219 fprintf (vect_dump
, "Setting misalignment to -1.");
1220 DR_MISALIGNMENT (dr
) = -1;
1224 /* Function vect_verify_datarefs_alignment
1226 Return TRUE if all data references in the loop can be
1227 handled with respect to alignment. */
1230 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
)
1232 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1233 struct data_reference
*dr
;
1234 enum dr_alignment_support supportable_dr_alignment
;
1237 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1239 tree stmt
= DR_STMT (dr
);
1240 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1242 /* For interleaving, only the alignment of the first access matters. */
1243 if (DR_GROUP_FIRST_DR (stmt_info
)
1244 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1247 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1248 if (!supportable_dr_alignment
)
1250 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1252 if (DR_IS_READ (dr
))
1254 "not vectorized: unsupported unaligned load.");
1257 "not vectorized: unsupported unaligned store.");
1261 if (supportable_dr_alignment
!= dr_aligned
1262 && vect_print_dump_info (REPORT_ALIGNMENT
))
1263 fprintf (vect_dump
, "Vectorizing an unaligned access.");
1269 /* Function vect_enhance_data_refs_alignment
1271 This pass will use loop versioning and loop peeling in order to enhance
1272 the alignment of data references in the loop.
1274 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1275 original loop is to be vectorized; Any other loops that are created by
1276 the transformations performed in this pass - are not supposed to be
1277 vectorized. This restriction will be relaxed.
1279 This pass will require a cost model to guide it whether to apply peeling
1280 or versioning or a combination of the two. For example, the scheme that
1281 intel uses when given a loop with several memory accesses, is as follows:
1282 choose one memory access ('p') which alignment you want to force by doing
1283 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1284 other accesses are not necessarily aligned, or (2) use loop versioning to
1285 generate one loop in which all accesses are aligned, and another loop in
1286 which only 'p' is necessarily aligned.
1288 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1289 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1290 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1292 Devising a cost model is the most critical aspect of this work. It will
1293 guide us on which access to peel for, whether to use loop versioning, how
1294 many versions to create, etc. The cost model will probably consist of
1295 generic considerations as well as target specific considerations (on
1296 powerpc for example, misaligned stores are more painful than misaligned
1299 Here are the general steps involved in alignment enhancements:
1301 -- original loop, before alignment analysis:
1302 for (i=0; i<N; i++){
1303 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1304 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1307 -- After vect_compute_data_refs_alignment:
1308 for (i=0; i<N; i++){
1309 x = q[i]; # DR_MISALIGNMENT(q) = 3
1310 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1313 -- Possibility 1: we do loop versioning:
1315 for (i=0; i<N; i++){ # loop 1A
1316 x = q[i]; # DR_MISALIGNMENT(q) = 3
1317 p[i] = y; # DR_MISALIGNMENT(p) = 0
1321 for (i=0; i<N; i++){ # loop 1B
1322 x = q[i]; # DR_MISALIGNMENT(q) = 3
1323 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1327 -- Possibility 2: we do loop peeling:
1328 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1332 for (i = 3; i < N; i++){ # loop 2A
1333 x = q[i]; # DR_MISALIGNMENT(q) = 0
1334 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1337 -- Possibility 3: combination of loop peeling and versioning:
1338 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1343 for (i = 3; i<N; i++){ # loop 3A
1344 x = q[i]; # DR_MISALIGNMENT(q) = 0
1345 p[i] = y; # DR_MISALIGNMENT(p) = 0
1349 for (i = 3; i<N; i++){ # loop 3B
1350 x = q[i]; # DR_MISALIGNMENT(q) = 0
1351 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1355 These loops are later passed to loop_transform to be vectorized. The
1356 vectorizer will use the alignment information to guide the transformation
1357 (whether to generate regular loads/stores, or with special handling for
1361 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1363 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1364 enum dr_alignment_support supportable_dr_alignment
;
1365 struct data_reference
*dr0
= NULL
;
1366 struct data_reference
*dr
;
1368 bool do_peeling
= false;
1369 bool do_versioning
= false;
1372 stmt_vec_info stmt_info
;
1374 if (vect_print_dump_info (REPORT_DETAILS
))
1375 fprintf (vect_dump
, "=== vect_enhance_data_refs_alignment ===");
1377 /* While cost model enhancements are expected in the future, the high level
1378 view of the code at this time is as follows:
1380 A) If there is a misaligned write then see if peeling to align this write
1381 can make all data references satisfy vect_supportable_dr_alignment.
1382 If so, update data structures as needed and return true. Note that
1383 at this time vect_supportable_dr_alignment is known to return false
1384 for a a misaligned write.
1386 B) If peeling wasn't possible and there is a data reference with an
1387 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1388 then see if loop versioning checks can be used to make all data
1389 references satisfy vect_supportable_dr_alignment. If so, update
1390 data structures as needed and return true.
1392 C) If neither peeling nor versioning were successful then return false if
1393 any data reference does not satisfy vect_supportable_dr_alignment.
1395 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1397 Note, Possibility 3 above (which is peeling and versioning together) is not
1398 being done at this time. */
1400 /* (1) Peeling to force alignment. */
1402 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1404 + How many accesses will become aligned due to the peeling
1405 - How many accesses will become unaligned due to the peeling,
1406 and the cost of misaligned accesses.
1407 - The cost of peeling (the extra runtime checks, the increase
1410 The scheme we use FORNOW: peel to force the alignment of the first
1411 misaligned store in the loop.
1412 Rationale: misaligned stores are not yet supported.
1414 TODO: Use a cost model. */
1416 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1418 stmt
= DR_STMT (dr
);
1419 stmt_info
= vinfo_for_stmt (stmt
);
1421 /* For interleaving, only the alignment of the first access
1423 if (DR_GROUP_FIRST_DR (stmt_info
)
1424 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1427 if (!DR_IS_READ (dr
) && !aligned_access_p (dr
))
1429 if (DR_GROUP_FIRST_DR (stmt_info
))
1431 /* For interleaved access we peel only if number of iterations in
1432 the prolog loop ({VF - misalignment}), is a multiple of the
1433 number of the interleaved accesses. */
1434 int elem_size
, mis_in_elements
;
1435 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1437 /* FORNOW: handle only known alignment. */
1438 if (!known_alignment_for_access_p (dr
))
1444 elem_size
= UNITS_PER_SIMD_WORD
/ vf
;
1445 mis_in_elements
= DR_MISALIGNMENT (dr
) / elem_size
;
1447 if ((vf
- mis_in_elements
) % DR_GROUP_SIZE (stmt_info
))
1459 /* Often peeling for alignment will require peeling for loop-bound, which in
1460 turn requires that we know how to adjust the loop ivs after the loop. */
1461 if (!vect_can_advance_ivs_p (loop_vinfo
))
1469 if (known_alignment_for_access_p (dr0
))
1471 /* Since it's known at compile time, compute the number of iterations
1472 in the peeled loop (the peeling factor) for use in updating
1473 DR_MISALIGNMENT values. The peeling factor is the vectorization
1474 factor minus the misalignment as an element count. */
1475 mis
= DR_MISALIGNMENT (dr0
);
1476 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1477 npeel
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) - mis
;
1479 /* For interleaved data access every iteration accesses all the
1480 members of the group, therefore we divide the number of iterations
1481 by the group size. */
1482 stmt_info
= vinfo_for_stmt (DR_STMT (dr0
));
1483 if (DR_GROUP_FIRST_DR (stmt_info
))
1484 npeel
/= DR_GROUP_SIZE (stmt_info
);
1486 if (vect_print_dump_info (REPORT_DETAILS
))
1487 fprintf (vect_dump
, "Try peeling by %d", npeel
);
1490 /* Ensure that all data refs can be vectorized after the peel. */
1491 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1493 int save_misalignment
;
1498 stmt
= DR_STMT (dr
);
1499 stmt_info
= vinfo_for_stmt (stmt
);
1500 /* For interleaving, only the alignment of the first access
1502 if (DR_GROUP_FIRST_DR (stmt_info
)
1503 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1506 save_misalignment
= DR_MISALIGNMENT (dr
);
1507 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1508 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1509 DR_MISALIGNMENT (dr
) = save_misalignment
;
1511 if (!supportable_dr_alignment
)
1520 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1521 If the misalignment of DR_i is identical to that of dr0 then set
1522 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1523 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1524 by the peeling factor times the element size of DR_i (MOD the
1525 vectorization factor times the size). Otherwise, the
1526 misalignment of DR_i must be set to unknown. */
1527 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1529 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1531 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1532 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1533 DR_MISALIGNMENT (dr0
) = 0;
1534 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1535 fprintf (vect_dump
, "Alignment of access forced using peeling.");
1537 if (vect_print_dump_info (REPORT_DETAILS
))
1538 fprintf (vect_dump
, "Peeling for alignment will be applied.");
1540 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1547 /* (2) Versioning to force alignment. */
1549 /* Try versioning if:
1550 1) flag_tree_vect_loop_version is TRUE
1551 2) optimize_size is FALSE
1552 3) there is at least one unsupported misaligned data ref with an unknown
1554 4) all misaligned data refs with a known misalignment are supported, and
1555 5) the number of runtime alignment checks is within reason. */
1557 do_versioning
= flag_tree_vect_loop_version
&& (!optimize_size
);
1561 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1563 stmt
= DR_STMT (dr
);
1564 stmt_info
= vinfo_for_stmt (stmt
);
1566 /* For interleaving, only the alignment of the first access
1568 if (aligned_access_p (dr
)
1569 || (DR_GROUP_FIRST_DR (stmt_info
)
1570 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
))
1573 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1575 if (!supportable_dr_alignment
)
1581 if (known_alignment_for_access_p (dr
)
1582 || VEC_length (tree
,
1583 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
1584 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS
))
1586 do_versioning
= false;
1590 stmt
= DR_STMT (dr
);
1591 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1592 gcc_assert (vectype
);
1594 /* The rightmost bits of an aligned address must be zeros.
1595 Construct the mask needed for this test. For example,
1596 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1597 mask must be 15 = 0xf. */
1598 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1600 /* FORNOW: use the same mask to test all potentially unaligned
1601 references in the loop. The vectorizer currently supports
1602 a single vector size, see the reference to
1603 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1604 vectorization factor is computed. */
1605 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
1606 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
1607 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
1608 VEC_safe_push (tree
, heap
,
1609 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
),
1614 /* Versioning requires at least one misaligned data reference. */
1615 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)) == 0)
1616 do_versioning
= false;
1617 else if (!do_versioning
)
1618 VEC_truncate (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
), 0);
1623 VEC(tree
,heap
) *may_misalign_stmts
1624 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1627 /* It can now be assumed that the data references in the statements
1628 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1629 of the loop being vectorized. */
1630 for (i
= 0; VEC_iterate (tree
, may_misalign_stmts
, i
, stmt
); i
++)
1632 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1633 dr
= STMT_VINFO_DATA_REF (stmt_info
);
1634 DR_MISALIGNMENT (dr
) = 0;
1635 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1636 fprintf (vect_dump
, "Alignment of access forced using versioning.");
1639 if (vect_print_dump_info (REPORT_DETAILS
))
1640 fprintf (vect_dump
, "Versioning for alignment will be applied.");
1642 /* Peeling and versioning can't be done together at this time. */
1643 gcc_assert (! (do_peeling
&& do_versioning
));
1645 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1650 /* This point is reached if neither peeling nor versioning is being done. */
1651 gcc_assert (! (do_peeling
|| do_versioning
));
1653 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1658 /* Function vect_analyze_data_refs_alignment
1660 Analyze the alignment of the data-references in the loop.
1661 Return FALSE if a data reference is found that cannot be vectorized. */
1664 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
)
1666 if (vect_print_dump_info (REPORT_DETAILS
))
1667 fprintf (vect_dump
, "=== vect_analyze_data_refs_alignment ===");
1669 if (!vect_compute_data_refs_alignment (loop_vinfo
))
1671 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1673 "not vectorized: can't calculate alignment for data ref.");
1681 /* Function vect_analyze_data_ref_access.
1683 Analyze the access pattern of the data-reference DR. For now, a data access
1684 has to be consecutive to be considered vectorizable. */
1687 vect_analyze_data_ref_access (struct data_reference
*dr
)
1689 tree step
= DR_STEP (dr
);
1690 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
1691 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
1692 HOST_WIDE_INT type_size
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
1693 tree stmt
= DR_STMT (dr
);
1694 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1695 interleaving group (including gaps). */
1696 HOST_WIDE_INT stride
= dr_step
/ type_size
;
1700 if (vect_print_dump_info (REPORT_DETAILS
))
1701 fprintf (vect_dump
, "bad data-ref access");
1706 if (!tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
)))
1708 /* Mark that it is not interleaving. */
1709 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = NULL_TREE
;
1713 /* Not consecutive access is possible only if it is a part of interleaving. */
1714 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)))
1716 /* Check if it this DR is a part of interleaving, and is a single
1717 element of the group that is accessed in the loop. */
1719 /* Gaps are supported only for loads. STEP must be a multiple of the type
1720 size. The size of the group must be a power of 2. */
1722 && (dr_step
% type_size
) == 0
1724 && exact_log2 (stride
) != -1)
1726 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = stmt
;
1727 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
1728 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1730 fprintf (vect_dump
, "Detected single element interleaving %d ",
1731 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)));
1732 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
1733 fprintf (vect_dump
, " step ");
1734 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
1738 if (vect_print_dump_info (REPORT_DETAILS
))
1739 fprintf (vect_dump
, "not consecutive access");
1743 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) == stmt
)
1745 /* First stmt in the interleaving chain. Check the chain. */
1746 tree next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt
));
1747 struct data_reference
*data_ref
= dr
;
1748 unsigned int count
= 1;
1750 tree prev_init
= DR_INIT (data_ref
);
1752 HOST_WIDE_INT diff
, count_in_bytes
;
1756 /* Skip same data-refs. In case that two or more stmts share data-ref
1757 (supported only for loads), we vectorize only the first stmt, and
1758 the rest get their vectorized loads from the the first one. */
1759 if (!tree_int_cst_compare (DR_INIT (data_ref
),
1760 DR_INIT (STMT_VINFO_DATA_REF (
1761 vinfo_for_stmt (next
)))))
1763 /* For load use the same data-ref load. (We check in
1764 vect_check_dependences() that there are no two stores to the
1766 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next
)) = prev
;
1769 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
1774 /* Check that all the accesses have the same STEP. */
1775 next_step
= DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
1776 if (tree_int_cst_compare (step
, next_step
))
1778 if (vect_print_dump_info (REPORT_DETAILS
))
1779 fprintf (vect_dump
, "not consecutive access in interleaving");
1783 data_ref
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next
));
1784 /* Check that the distance between two accesses is equal to the type
1785 size. Otherwise, we have gaps. */
1786 diff
= (TREE_INT_CST_LOW (DR_INIT (data_ref
))
1787 - TREE_INT_CST_LOW (prev_init
)) / type_size
;
1788 if (!DR_IS_READ (data_ref
) && diff
!= 1)
1790 if (vect_print_dump_info (REPORT_DETAILS
))
1791 fprintf (vect_dump
, "interleaved store with gaps");
1794 /* Store the gap from the previous member of the group. If there is no
1795 gap in the access, DR_GROUP_GAP is always 1. */
1796 DR_GROUP_GAP (vinfo_for_stmt (next
)) = diff
;
1798 prev_init
= DR_INIT (data_ref
);
1799 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
1800 /* Count the number of data-refs in the chain. */
1804 /* COUNT is the number of accesses found, we multiply it by the size of
1805 the type to get COUNT_IN_BYTES. */
1806 count_in_bytes
= type_size
* count
;
1807 /* Check the size of the interleaving is not greater than STEP. */
1808 if (dr_step
< count_in_bytes
)
1810 if (vect_print_dump_info (REPORT_DETAILS
))
1812 fprintf (vect_dump
, "interleaving size is greater than step for ");
1813 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
1818 /* Check that STEP is a multiple of type size. */
1819 if ((dr_step
% type_size
) != 0)
1821 if (vect_print_dump_info (REPORT_DETAILS
))
1823 fprintf (vect_dump
, "step is not a multiple of type size: step ");
1824 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
1825 fprintf (vect_dump
, " size ");
1826 print_generic_expr (vect_dump
, TYPE_SIZE_UNIT (scalar_type
),
1832 /* FORNOW: we handle only interleaving that is a power of 2. */
1833 if (exact_log2 (stride
) == -1)
1835 if (vect_print_dump_info (REPORT_DETAILS
))
1836 fprintf (vect_dump
, "interleaving is not a power of 2");
1839 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
1845 /* Function vect_analyze_data_ref_accesses.
1847 Analyze the access pattern of all the data references in the loop.
1849 FORNOW: the only access pattern that is considered vectorizable is a
1850 simple step 1 (consecutive) access.
1852 FORNOW: handle only arrays and pointer accesses. */
1855 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
)
1858 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1859 struct data_reference
*dr
;
1861 if (vect_print_dump_info (REPORT_DETAILS
))
1862 fprintf (vect_dump
, "=== vect_analyze_data_ref_accesses ===");
1864 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1865 if (!vect_analyze_data_ref_access (dr
))
1867 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1868 fprintf (vect_dump
, "not vectorized: complicated access pattern.");
1876 /* Function vect_analyze_data_refs.
1878 Find all the data references in the loop.
1880 The general structure of the analysis of data refs in the vectorizer is as
1882 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1883 find and analyze all data-refs in the loop and their dependences.
1884 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1885 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1886 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1891 vect_analyze_data_refs (loop_vec_info loop_vinfo
)
1893 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1895 VEC (data_reference_p
, heap
) *datarefs
;
1896 struct data_reference
*dr
;
1899 if (vect_print_dump_info (REPORT_DETAILS
))
1900 fprintf (vect_dump
, "=== vect_analyze_data_refs ===");
1902 compute_data_dependences_for_loop (loop
, true,
1903 &LOOP_VINFO_DATAREFS (loop_vinfo
),
1904 &LOOP_VINFO_DDRS (loop_vinfo
));
1906 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1907 from stmt_vec_info struct to DR and vectype. */
1908 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1910 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1913 stmt_vec_info stmt_info
;
1915 if (!dr
|| !DR_REF (dr
))
1917 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1918 fprintf (vect_dump
, "not vectorized: unhandled data-ref ");
1922 /* Update DR field in stmt_vec_info struct. */
1923 stmt
= DR_STMT (dr
);
1924 stmt_info
= vinfo_for_stmt (stmt
);
1926 if (STMT_VINFO_DATA_REF (stmt_info
))
1928 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1931 "not vectorized: more than one data ref in stmt: ");
1932 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1936 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
1938 /* Check that analysis of the data-ref succeeded. */
1939 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
1942 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1944 fprintf (vect_dump
, "not vectorized: data ref analysis failed ");
1945 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1949 if (!DR_MEMTAG (dr
))
1951 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1953 fprintf (vect_dump
, "not vectorized: no memory tag for ");
1954 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
1959 /* Set vectype for STMT. */
1960 scalar_type
= TREE_TYPE (DR_REF (dr
));
1961 STMT_VINFO_VECTYPE (stmt_info
) =
1962 get_vectype_for_scalar_type (scalar_type
);
1963 if (!STMT_VINFO_VECTYPE (stmt_info
))
1965 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1968 "not vectorized: no vectype for stmt: ");
1969 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1970 fprintf (vect_dump
, " scalar_type: ");
1971 print_generic_expr (vect_dump
, scalar_type
, TDF_DETAILS
);
1981 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1983 /* Function vect_mark_relevant.
1985 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
1988 vect_mark_relevant (VEC(tree
,heap
) **worklist
, tree stmt
,
1989 enum vect_relevant relevant
, bool live_p
)
1991 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1992 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
1993 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
1995 if (vect_print_dump_info (REPORT_DETAILS
))
1996 fprintf (vect_dump
, "mark relevant %d, live %d.", relevant
, live_p
);
1998 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2002 /* This is the last stmt in a sequence that was detected as a
2003 pattern that can potentially be vectorized. Don't mark the stmt
2004 as relevant/live because it's not going to vectorized.
2005 Instead mark the pattern-stmt that replaces it. */
2006 if (vect_print_dump_info (REPORT_DETAILS
))
2007 fprintf (vect_dump
, "last stmt in pattern. don't mark relevant/live.");
2008 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2009 stmt_info
= vinfo_for_stmt (pattern_stmt
);
2010 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
2011 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
2012 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
2013 stmt
= pattern_stmt
;
2016 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
2017 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
2018 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
2020 if (TREE_CODE (stmt
) == PHI_NODE
)
2021 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
2022 or live will fail vectorization later on. */
2025 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
2026 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
2028 if (vect_print_dump_info (REPORT_DETAILS
))
2029 fprintf (vect_dump
, "already marked relevant/live.");
2033 VEC_safe_push (tree
, heap
, *worklist
, stmt
);
2037 /* Function vect_stmt_relevant_p.
2039 Return true if STMT in loop that is represented by LOOP_VINFO is
2040 "relevant for vectorization".
2042 A stmt is considered "relevant for vectorization" if:
2043 - it has uses outside the loop.
2044 - it has vdefs (it alters memory).
2045 - control stmts in the loop (except for the exit condition).
2047 CHECKME: what other side effects would the vectorizer allow? */
2050 vect_stmt_relevant_p (tree stmt
, loop_vec_info loop_vinfo
,
2051 enum vect_relevant
*relevant
, bool *live_p
)
2053 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2054 ssa_op_iter op_iter
;
2055 imm_use_iterator imm_iter
;
2056 use_operand_p use_p
;
2057 def_operand_p def_p
;
2059 *relevant
= vect_unused_in_loop
;
2062 /* cond stmt other than loop exit cond. */
2063 if (is_ctrl_stmt (stmt
) && (stmt
!= LOOP_VINFO_EXIT_COND (loop_vinfo
)))
2064 *relevant
= vect_used_in_loop
;
2066 /* changing memory. */
2067 if (TREE_CODE (stmt
) != PHI_NODE
)
2068 if (!ZERO_SSA_OPERANDS (stmt
, SSA_OP_VIRTUAL_DEFS
))
2070 if (vect_print_dump_info (REPORT_DETAILS
))
2071 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
2072 *relevant
= vect_used_in_loop
;
2075 /* uses outside the loop. */
2076 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2078 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
2080 basic_block bb
= bb_for_stmt (USE_STMT (use_p
));
2081 if (!flow_bb_inside_loop_p (loop
, bb
))
2083 if (vect_print_dump_info (REPORT_DETAILS
))
2084 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
2086 /* We expect all such uses to be in the loop exit phis
2087 (because of loop closed form) */
2088 gcc_assert (TREE_CODE (USE_STMT (use_p
)) == PHI_NODE
);
2089 gcc_assert (bb
== single_exit (loop
)->dest
);
2096 return (*live_p
|| *relevant
);
2100 /* Function vect_mark_stmts_to_be_vectorized.
2102 Not all stmts in the loop need to be vectorized. For example:
2111 Stmt 1 and 3 do not need to be vectorized, because loop control and
2112 addressing of vectorized data-refs are handled differently.
2114 This pass detects such stmts. */
2117 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
2119 VEC(tree
,heap
) *worklist
;
2120 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2121 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2122 unsigned int nbbs
= loop
->num_nodes
;
2123 block_stmt_iterator si
;
2128 stmt_vec_info stmt_vinfo
;
2132 enum vect_relevant relevant
;
2134 enum vect_def_type dt
;
2136 if (vect_print_dump_info (REPORT_DETAILS
))
2137 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
2139 worklist
= VEC_alloc (tree
, heap
, 64);
2141 /* 1. Init worklist. */
2144 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
2146 if (vect_print_dump_info (REPORT_DETAILS
))
2148 fprintf (vect_dump
, "init: phi relevant? ");
2149 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
2152 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
2153 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
2156 for (i
= 0; i
< nbbs
; i
++)
2159 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
2161 stmt
= bsi_stmt (si
);
2163 if (vect_print_dump_info (REPORT_DETAILS
))
2165 fprintf (vect_dump
, "init: stmt relevant? ");
2166 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2169 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
2170 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
2175 /* 2. Process_worklist */
2177 while (VEC_length (tree
, worklist
) > 0)
2179 stmt
= VEC_pop (tree
, worklist
);
2181 if (vect_print_dump_info (REPORT_DETAILS
))
2183 fprintf (vect_dump
, "worklist: examine stmt: ");
2184 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2187 /* Examine the USEs of STMT. For each ssa-name USE that is defined
2188 in the loop, mark the stmt that defines it (DEF_STMT) as
2189 relevant/irrelevant and live/dead according to the liveness and
2190 relevance properties of STMT.
2193 gcc_assert (TREE_CODE (stmt
) != PHI_NODE
);
2195 ann
= stmt_ann (stmt
);
2196 stmt_vinfo
= vinfo_for_stmt (stmt
);
2198 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
2199 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
2201 /* Generally, the liveness and relevance properties of STMT are
2202 propagated to the DEF_STMTs of its USEs:
2203 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2204 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2209 If USE is used only for address computations (e.g. array indexing),
2210 which does not need to be directly vectorized, then the
2211 liveness/relevance of the respective DEF_STMT is left unchanged.
2214 If STMT has been identified as defining a reduction variable, then
2217 The last use of STMT is the reduction-variable, which is defined
2218 by a loop-header-phi. We don't want to mark the phi as live or
2219 relevant (because it does not need to be vectorized, it is handled
2220 as part of the vectorization of the reduction), so in this case we
2221 skip the call to vect_mark_relevant.
2223 The rest of the uses of STMT are defined in the loop body. For
2224 the def_stmt of these uses we want to set liveness/relevance
2226 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
2227 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- vect_used_by_reduction
2228 because even though STMT is classified as live (since it defines a
2229 value that is used across loop iterations) and irrelevant (since it
2230 is not used inside the loop), it will be vectorized, and therefore
2231 the corresponding DEF_STMTs need to marked as relevant.
2232 We distinguish between two kinds of relevant stmts - those that are
2233 used by a reduction computation, and those that are (also) used by
2234 a regular computation. This allows us later on to identify stmts
2235 that are used solely by a reduction, and therefore the order of
2236 the results that they produce does not have to be kept.
2240 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
)
2242 gcc_assert (relevant
== vect_unused_in_loop
&& live_p
);
2243 relevant
= vect_used_by_reduction
;
2247 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2249 /* case 1: we are only interested in uses that need to be vectorized.
2250 Uses that are used for address computation are not considered
2253 if (!exist_non_indexing_operands_for_use_p (use
, stmt
))
2256 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &def
, &dt
))
2258 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2259 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
2260 VEC_free (tree
, heap
, worklist
);
2264 if (!def_stmt
|| IS_EMPTY_STMT (def_stmt
))
2267 if (vect_print_dump_info (REPORT_DETAILS
))
2269 fprintf (vect_dump
, "worklist: examine use %d: ", i
);
2270 print_generic_expr (vect_dump
, use
, TDF_SLIM
);
2273 bb
= bb_for_stmt (def_stmt
);
2274 if (!flow_bb_inside_loop_p (loop
, bb
))
2277 /* case 2.1: the reduction-use does not mark the defining-phi
2279 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
2280 && TREE_CODE (def_stmt
) == PHI_NODE
)
2283 vect_mark_relevant (&worklist
, def_stmt
, relevant
, live_p
);
2285 } /* while worklist */
2287 VEC_free (tree
, heap
, worklist
);
2292 /* Function vect_can_advance_ivs_p
2294 In case the number of iterations that LOOP iterates is unknown at compile
2295 time, an epilog loop will be generated, and the loop induction variables
2296 (IVs) will be "advanced" to the value they are supposed to take just before
2297 the epilog loop. Here we check that the access function of the loop IVs
2298 and the expression that represents the loop bound are simple enough.
2299 These restrictions will be relaxed in the future. */
2302 vect_can_advance_ivs_p (loop_vec_info loop_vinfo
)
2304 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2305 basic_block bb
= loop
->header
;
2308 /* Analyze phi functions of the loop header. */
2310 if (vect_print_dump_info (REPORT_DETAILS
))
2311 fprintf (vect_dump
, "vect_can_advance_ivs_p:");
2313 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
2315 tree access_fn
= NULL
;
2316 tree evolution_part
;
2318 if (vect_print_dump_info (REPORT_DETAILS
))
2320 fprintf (vect_dump
, "Analyze phi: ");
2321 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
2324 /* Skip virtual phi's. The data dependences that are associated with
2325 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2327 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
2329 if (vect_print_dump_info (REPORT_DETAILS
))
2330 fprintf (vect_dump
, "virtual phi. skip.");
2334 /* Skip reduction phis. */
2336 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
2338 if (vect_print_dump_info (REPORT_DETAILS
))
2339 fprintf (vect_dump
, "reduc phi. skip.");
2343 /* Analyze the evolution function. */
2345 access_fn
= instantiate_parameters
2346 (loop
, analyze_scalar_evolution (loop
, PHI_RESULT (phi
)));
2350 if (vect_print_dump_info (REPORT_DETAILS
))
2351 fprintf (vect_dump
, "No Access function.");
2355 if (vect_print_dump_info (REPORT_DETAILS
))
2357 fprintf (vect_dump
, "Access function of PHI: ");
2358 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
2361 evolution_part
= evolution_part_in_loop_num (access_fn
, loop
->num
);
2363 if (evolution_part
== NULL_TREE
)
2365 if (vect_print_dump_info (REPORT_DETAILS
))
2366 fprintf (vect_dump
, "No evolution.");
2370 /* FORNOW: We do not transform initial conditions of IVs
2371 which evolution functions are a polynomial of degree >= 2. */
2373 if (tree_is_chrec (evolution_part
))
2381 /* Function vect_get_loop_niters.
2383 Determine how many iterations the loop is executed.
2384 If an expression that represents the number of iterations
2385 can be constructed, place it in NUMBER_OF_ITERATIONS.
2386 Return the loop exit condition. */
2389 vect_get_loop_niters (struct loop
*loop
, tree
*number_of_iterations
)
2393 if (vect_print_dump_info (REPORT_DETAILS
))
2394 fprintf (vect_dump
, "=== get_loop_niters ===");
2396 niters
= number_of_iterations_in_loop (loop
);
2398 if (niters
!= NULL_TREE
2399 && niters
!= chrec_dont_know
)
2401 *number_of_iterations
= niters
;
2403 if (vect_print_dump_info (REPORT_DETAILS
))
2405 fprintf (vect_dump
, "==> get_loop_niters:" );
2406 print_generic_expr (vect_dump
, *number_of_iterations
, TDF_SLIM
);
2410 return get_loop_exit_condition (loop
);
2414 /* Function vect_analyze_loop_form.
2416 Verify the following restrictions (some may be relaxed in the future):
2417 - it's an inner-most loop
2418 - number of BBs = 2 (which are the loop header and the latch)
2419 - the loop has a pre-header
2420 - the loop has a single entry and exit
2421 - the loop exit condition is simple enough, and the number of iterations
2422 can be analyzed (a countable loop). */
2424 static loop_vec_info
2425 vect_analyze_loop_form (struct loop
*loop
)
2427 loop_vec_info loop_vinfo
;
2429 tree number_of_iterations
= NULL
;
2431 if (vect_print_dump_info (REPORT_DETAILS
))
2432 fprintf (vect_dump
, "=== vect_analyze_loop_form ===");
2436 if (vect_print_dump_info (REPORT_OUTER_LOOPS
))
2437 fprintf (vect_dump
, "not vectorized: nested loop.");
2441 if (!single_exit (loop
)
2442 || loop
->num_nodes
!= 2
2443 || EDGE_COUNT (loop
->header
->preds
) != 2)
2445 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2447 if (!single_exit (loop
))
2448 fprintf (vect_dump
, "not vectorized: multiple exits.");
2449 else if (loop
->num_nodes
!= 2)
2450 fprintf (vect_dump
, "not vectorized: too many BBs in loop.");
2451 else if (EDGE_COUNT (loop
->header
->preds
) != 2)
2452 fprintf (vect_dump
, "not vectorized: too many incoming edges.");
2458 /* We assume that the loop exit condition is at the end of the loop. i.e,
2459 that the loop is represented as a do-while (with a proper if-guard
2460 before the loop if needed), where the loop header contains all the
2461 executable statements, and the latch is empty. */
2462 if (!empty_block_p (loop
->latch
)
2463 || phi_nodes (loop
->latch
))
2465 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2466 fprintf (vect_dump
, "not vectorized: unexpected loop form.");
2470 /* Make sure there exists a single-predecessor exit bb: */
2471 if (!single_pred_p (single_exit (loop
)->dest
))
2473 edge e
= single_exit (loop
);
2474 if (!(e
->flags
& EDGE_ABNORMAL
))
2476 split_loop_exit_edge (e
);
2477 if (vect_print_dump_info (REPORT_DETAILS
))
2478 fprintf (vect_dump
, "split exit edge.");
2482 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2483 fprintf (vect_dump
, "not vectorized: abnormal loop exit edge.");
2488 if (empty_block_p (loop
->header
))
2490 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2491 fprintf (vect_dump
, "not vectorized: empty loop.");
2495 loop_cond
= vect_get_loop_niters (loop
, &number_of_iterations
);
2498 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2499 fprintf (vect_dump
, "not vectorized: complicated exit condition.");
2503 if (!number_of_iterations
)
2505 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2507 "not vectorized: number of iterations cannot be computed.");
2511 if (chrec_contains_undetermined (number_of_iterations
))
2513 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2514 fprintf (vect_dump
, "Infinite number of iterations.");
2518 loop_vinfo
= new_loop_vec_info (loop
);
2519 LOOP_VINFO_NITERS (loop_vinfo
) = number_of_iterations
;
2521 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
2523 if (vect_print_dump_info (REPORT_DETAILS
))
2525 fprintf (vect_dump
, "Symbolic number of iterations is ");
2526 print_generic_expr (vect_dump
, number_of_iterations
, TDF_DETAILS
);
2530 if (LOOP_VINFO_INT_NITERS (loop_vinfo
) == 0)
2532 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2533 fprintf (vect_dump
, "not vectorized: number of iterations = 0.");
2537 LOOP_VINFO_EXIT_COND (loop_vinfo
) = loop_cond
;
2543 /* Function vect_analyze_loop.
2545 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2546 for it. The different analyses will record information in the
2547 loop_vec_info struct. */
2549 vect_analyze_loop (struct loop
*loop
)
2552 loop_vec_info loop_vinfo
;
2554 if (vect_print_dump_info (REPORT_DETAILS
))
2555 fprintf (vect_dump
, "===== analyze_loop_nest =====");
2557 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2559 loop_vinfo
= vect_analyze_loop_form (loop
);
2562 if (vect_print_dump_info (REPORT_DETAILS
))
2563 fprintf (vect_dump
, "bad loop form.");
2567 /* Find all data references in the loop (which correspond to vdefs/vuses)
2568 and analyze their evolution in the loop.
2570 FORNOW: Handle only simple, array references, which
2571 alignment can be forced, and aligned pointer-references. */
2573 ok
= vect_analyze_data_refs (loop_vinfo
);
2576 if (vect_print_dump_info (REPORT_DETAILS
))
2577 fprintf (vect_dump
, "bad data references.");
2578 destroy_loop_vec_info (loop_vinfo
);
2582 /* Classify all cross-iteration scalar data-flow cycles.
2583 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2585 vect_analyze_scalar_cycles (loop_vinfo
);
2587 vect_pattern_recog (loop_vinfo
);
2589 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2591 ok
= vect_mark_stmts_to_be_vectorized (loop_vinfo
);
2594 if (vect_print_dump_info (REPORT_DETAILS
))
2595 fprintf (vect_dump
, "unexpected pattern.");
2596 destroy_loop_vec_info (loop_vinfo
);
2600 /* Analyze the alignment of the data-refs in the loop.
2601 Fail if a data reference is found that cannot be vectorized. */
2603 ok
= vect_analyze_data_refs_alignment (loop_vinfo
);
2606 if (vect_print_dump_info (REPORT_DETAILS
))
2607 fprintf (vect_dump
, "bad data alignment.");
2608 destroy_loop_vec_info (loop_vinfo
);
2612 ok
= vect_determine_vectorization_factor (loop_vinfo
);
2615 if (vect_print_dump_info (REPORT_DETAILS
))
2616 fprintf (vect_dump
, "can't determine vectorization factor.");
2617 destroy_loop_vec_info (loop_vinfo
);
2621 /* Analyze data dependences between the data-refs in the loop.
2622 FORNOW: fail at the first data dependence that we encounter. */
2624 ok
= vect_analyze_data_ref_dependences (loop_vinfo
);
2627 if (vect_print_dump_info (REPORT_DETAILS
))
2628 fprintf (vect_dump
, "bad data dependence.");
2629 destroy_loop_vec_info (loop_vinfo
);
2633 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2634 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2636 ok
= vect_analyze_data_ref_accesses (loop_vinfo
);
2639 if (vect_print_dump_info (REPORT_DETAILS
))
2640 fprintf (vect_dump
, "bad data access.");
2641 destroy_loop_vec_info (loop_vinfo
);
2645 /* This pass will decide on using loop versioning and/or loop peeling in
2646 order to enhance the alignment of data references in the loop. */
2648 ok
= vect_enhance_data_refs_alignment (loop_vinfo
);
2651 if (vect_print_dump_info (REPORT_DETAILS
))
2652 fprintf (vect_dump
, "bad data alignment.");
2653 destroy_loop_vec_info (loop_vinfo
);
2657 /* Scan all the operations in the loop and make sure they are
2660 ok
= vect_analyze_operations (loop_vinfo
);
2663 if (vect_print_dump_info (REPORT_DETAILS
))
2664 fprintf (vect_dump
, "bad operation or unsupported loop bound.");
2665 destroy_loop_vec_info (loop_vinfo
);
2669 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
) = 1;