1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
43 /* Main analysis functions. */
44 static loop_vec_info
vect_analyze_loop_form (struct loop
*);
45 static bool vect_analyze_data_refs (loop_vec_info
);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info
);
47 static void vect_analyze_scalar_cycles (loop_vec_info
);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info
);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info
);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info
);
51 static bool vect_compute_data_refs_alignment (loop_vec_info
);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info
);
53 static bool vect_analyze_operations (loop_vec_info
);
54 static bool vect_determine_vectorization_factor (loop_vec_info
);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree
, tree
);
58 static tree
vect_get_loop_niters (struct loop
*, tree
*);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation
*, loop_vec_info
);
61 static bool vect_compute_data_ref_alignment (struct data_reference
*);
62 static bool vect_analyze_data_ref_access (struct data_reference
*);
63 static bool vect_can_advance_ivs_p (loop_vec_info
);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference
*, struct data_reference
*, int npeel
);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo
)
95 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
96 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
97 int nbbs
= loop
->num_nodes
;
98 block_stmt_iterator si
;
99 unsigned int vectorization_factor
= 0;
103 if (vect_print_dump_info (REPORT_DETAILS
))
104 fprintf (vect_dump
, "=== vect_determine_vectorization_factor ===");
106 for (i
= 0; i
< nbbs
; i
++)
108 basic_block bb
= bbs
[i
];
110 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
112 tree stmt
= bsi_stmt (si
);
114 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
117 if (vect_print_dump_info (REPORT_DETAILS
))
119 fprintf (vect_dump
, "==> examining statement: ");
120 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
123 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
126 gcc_assert (stmt_info
);
128 /* skip stmts which do not need to be vectorized. */
129 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
130 && !STMT_VINFO_LIVE_P (stmt_info
))
132 if (vect_print_dump_info (REPORT_DETAILS
))
133 fprintf (vect_dump
, "skip.");
137 if (!GIMPLE_STMT_P (stmt
)
138 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))))
140 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
142 fprintf (vect_dump
, "not vectorized: vector stmt in loop:");
143 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
148 if (STMT_VINFO_VECTYPE (stmt_info
))
150 /* The only case when a vectype had been already set is for stmts
151 that contain a dataref, or for "pattern-stmts" (stmts generated
152 by the vectorizer to represent/replace a certain idiom). */
153 gcc_assert (STMT_VINFO_DATA_REF (stmt_info
)
154 || is_pattern_stmt_p (stmt_info
));
155 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
159 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info
)
160 && !is_pattern_stmt_p (stmt_info
));
162 /* We set the vectype according to the type of the result (lhs).
163 For stmts whose result-type is different than the type of the
164 arguments (e.g. demotion, promotion), vectype will be reset
165 appropriately (later). Note that we have to visit the smallest
166 datatype in this function, because that determines the VF.
167 If the samallest datatype in the loop is present only as the
168 rhs of a promotion operation - we'd miss it here.
169 However, in such a case, that a variable of this datatype
170 does not appear in the lhs anywhere in the loop, it shouldn't
171 affect the vectorization factor. */
172 scalar_type
= TREE_TYPE (GIMPLE_STMT_OPERAND (stmt
, 0));
174 if (vect_print_dump_info (REPORT_DETAILS
))
176 fprintf (vect_dump
, "get vectype for scalar type: ");
177 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
180 vectype
= get_vectype_for_scalar_type (scalar_type
);
183 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
186 "not vectorized: unsupported data-type ");
187 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
191 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
194 if (vect_print_dump_info (REPORT_DETAILS
))
196 fprintf (vect_dump
, "vectype: ");
197 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
200 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
201 if (vect_print_dump_info (REPORT_DETAILS
))
202 fprintf (vect_dump
, "nunits = %d", nunits
);
204 if (!vectorization_factor
205 || (nunits
> vectorization_factor
))
206 vectorization_factor
= nunits
;
211 /* TODO: Analyze cost. Decide if worth while to vectorize. */
213 if (vectorization_factor
<= 1)
215 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
216 fprintf (vect_dump
, "not vectorized: unsupported data-type");
219 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
225 /* Function vect_analyze_operations.
227 Scan the loop stmts and make sure they are all vectorizable. */
230 vect_analyze_operations (loop_vec_info loop_vinfo
)
232 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
233 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
234 int nbbs
= loop
->num_nodes
;
235 block_stmt_iterator si
;
236 unsigned int vectorization_factor
= 0;
240 stmt_vec_info stmt_info
;
241 bool need_to_vectorize
= false;
243 if (vect_print_dump_info (REPORT_DETAILS
))
244 fprintf (vect_dump
, "=== vect_analyze_operations ===");
246 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
247 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
249 for (i
= 0; i
< nbbs
; i
++)
251 basic_block bb
= bbs
[i
];
253 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
255 stmt_info
= vinfo_for_stmt (phi
);
256 if (vect_print_dump_info (REPORT_DETAILS
))
258 fprintf (vect_dump
, "examining phi: ");
259 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
262 gcc_assert (stmt_info
);
264 if (STMT_VINFO_LIVE_P (stmt_info
))
266 /* FORNOW: not yet supported. */
267 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
268 fprintf (vect_dump
, "not vectorized: value used after loop.");
272 if (STMT_VINFO_RELEVANT_P (stmt_info
))
274 /* Most likely a reduction-like computation that is used
276 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
277 fprintf (vect_dump
, "not vectorized: unsupported pattern.");
282 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
284 tree stmt
= bsi_stmt (si
);
285 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
287 if (vect_print_dump_info (REPORT_DETAILS
))
289 fprintf (vect_dump
, "==> examining statement: ");
290 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
293 gcc_assert (stmt_info
);
295 /* skip stmts which do not need to be vectorized.
296 this is expected to include:
297 - the COND_EXPR which is the loop exit condition
298 - any LABEL_EXPRs in the loop
299 - computations that are used only for array indexing or loop
302 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
303 && !STMT_VINFO_LIVE_P (stmt_info
))
305 if (vect_print_dump_info (REPORT_DETAILS
))
306 fprintf (vect_dump
, "irrelevant.");
310 if (STMT_VINFO_RELEVANT_P (stmt_info
))
312 gcc_assert (GIMPLE_STMT_P (stmt
)
313 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))));
314 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
316 ok
= (vectorizable_type_promotion (stmt
, NULL
, NULL
)
317 || vectorizable_type_demotion (stmt
, NULL
, NULL
)
318 || vectorizable_operation (stmt
, NULL
, NULL
)
319 || vectorizable_assignment (stmt
, NULL
, NULL
)
320 || vectorizable_load (stmt
, NULL
, NULL
)
321 || vectorizable_call (stmt
, NULL
, NULL
)
322 || vectorizable_store (stmt
, NULL
, NULL
)
323 || vectorizable_condition (stmt
, NULL
, NULL
));
327 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
330 "not vectorized: relevant stmt not supported: ");
331 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
335 need_to_vectorize
= true;
338 if (STMT_VINFO_LIVE_P (stmt_info
))
340 ok
= vectorizable_reduction (stmt
, NULL
, NULL
);
343 need_to_vectorize
= true;
345 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
349 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
352 "not vectorized: live stmt not supported: ");
353 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
361 /* TODO: Analyze cost. Decide if worth while to vectorize. */
363 /* All operations in the loop are either irrelevant (deal with loop
364 control, or dead), or only used outside the loop and can be moved
365 out of the loop (e.g. invariants, inductions). The loop can be
366 optimized away by scalar optimizations. We're better off not
367 touching this loop. */
368 if (!need_to_vectorize
)
370 if (vect_print_dump_info (REPORT_DETAILS
))
372 "All the computation can be taken out of the loop.");
373 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
375 "not vectorized: redundant loop. no profit to vectorize.");
379 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
380 && vect_print_dump_info (REPORT_DETAILS
))
382 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC
,
383 vectorization_factor
, LOOP_VINFO_INT_NITERS (loop_vinfo
));
385 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
386 && ((LOOP_VINFO_INT_NITERS (loop_vinfo
) < vectorization_factor
)
387 || (LOOP_VINFO_INT_NITERS (loop_vinfo
) <=
388 ((unsigned) (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND
))
389 * vectorization_factor
))))
391 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
392 fprintf (vect_dump
, "not vectorized: iteration count too small.");
396 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
397 || LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0
398 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
400 if (vect_print_dump_info (REPORT_DETAILS
))
401 fprintf (vect_dump
, "epilog loop required.");
402 if (!vect_can_advance_ivs_p (loop_vinfo
))
404 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
406 "not vectorized: can't create epilog loop 1.");
409 if (!slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
411 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
413 "not vectorized: can't create epilog loop 2.");
422 /* Function exist_non_indexing_operands_for_use_p
424 USE is one of the uses attached to STMT. Check if USE is
425 used in STMT for anything other than indexing an array. */
428 exist_non_indexing_operands_for_use_p (tree use
, tree stmt
)
431 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
433 /* USE corresponds to some operand in STMT. If there is no data
434 reference in STMT, then any operand that corresponds to USE
435 is not indexing an array. */
436 if (!STMT_VINFO_DATA_REF (stmt_info
))
439 /* STMT has a data_ref. FORNOW this means that its of one of
443 (This should have been verified in analyze_data_refs).
445 'var' in the second case corresponds to a def, not a use,
446 so USE cannot correspond to any operands that are not used
449 Therefore, all we need to check is if STMT falls into the
450 first case, and whether var corresponds to USE. */
452 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) == SSA_NAME
)
455 operand
= GIMPLE_STMT_OPERAND (stmt
, 1);
457 if (TREE_CODE (operand
) != SSA_NAME
)
467 /* Function vect_analyze_scalar_cycles.
469 Examine the cross iteration def-use cycles of scalar variables, by
470 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
471 following: invariant, induction, reduction, unknown.
473 Some forms of scalar cycles are not yet supported.
475 Example1: reduction: (unsupported yet)
481 Example2: induction: (unsupported yet)
487 Note: the following loop *is* vectorizable:
493 even though it has a def-use cycle caused by the induction variable i:
495 loop: i_2 = PHI (i_0, i_1)
500 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
501 it does not need to be vectorized because it is only used for array
502 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
503 loop2 on the other hand is relevant (it is being written to memory).
507 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo
)
510 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
511 basic_block bb
= loop
->header
;
514 if (vect_print_dump_info (REPORT_DETAILS
))
515 fprintf (vect_dump
, "=== vect_analyze_scalar_cycles ===");
517 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
519 tree access_fn
= NULL
;
520 tree def
= PHI_RESULT (phi
);
521 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
524 if (vect_print_dump_info (REPORT_DETAILS
))
526 fprintf (vect_dump
, "Analyze phi: ");
527 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
530 /* Skip virtual phi's. The data dependences that are associated with
531 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
533 if (!is_gimple_reg (SSA_NAME_VAR (def
)))
535 if (vect_print_dump_info (REPORT_DETAILS
))
536 fprintf (vect_dump
, "virtual phi. skip.");
540 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_unknown_def_type
;
542 /* Analyze the evolution function. */
544 access_fn
= analyze_scalar_evolution (loop
, def
);
549 if (vect_print_dump_info (REPORT_DETAILS
))
551 fprintf (vect_dump
, "Access function of PHI: ");
552 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
555 if (vect_is_simple_iv_evolution (loop
->num
, access_fn
, &dummy
, &dummy
))
557 if (vect_print_dump_info (REPORT_DETAILS
))
558 fprintf (vect_dump
, "Detected induction.");
559 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_induction_def
;
563 /* TODO: handle invariant phis */
565 reduc_stmt
= vect_is_simple_reduction (loop
, phi
);
568 if (vect_print_dump_info (REPORT_DETAILS
))
569 fprintf (vect_dump
, "Detected reduction.");
570 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_reduction_def
;
571 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
575 if (vect_print_dump_info (REPORT_DETAILS
))
576 fprintf (vect_dump
, "Unknown def-use cycle pattern.");
584 /* Function vect_insert_into_interleaving_chain.
586 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
589 vect_insert_into_interleaving_chain (struct data_reference
*dra
,
590 struct data_reference
*drb
)
592 tree prev
, next
, next_init
;
593 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
594 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
596 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
597 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
600 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
601 if (tree_int_cst_compare (next_init
, DR_INIT (dra
)) > 0)
604 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
605 DR_GROUP_NEXT_DR (stmtinfo_a
) = next
;
609 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
612 /* We got to the end of the list. Insert here. */
613 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
614 DR_GROUP_NEXT_DR (stmtinfo_a
) = NULL_TREE
;
618 /* Function vect_update_interleaving_chain.
620 For two data-refs DRA and DRB that are a part of a chain interleaved data
621 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
623 There are four possible cases:
624 1. New stmts - both DRA and DRB are not a part of any chain:
627 2. DRB is a part of a chain and DRA is not:
628 no need to update FIRST_DR
629 no need to insert DRB
630 insert DRA according to init
631 3. DRA is a part of a chain and DRB is not:
632 if (init of FIRST_DR > init of DRB)
634 NEXT(FIRST_DR) = previous FIRST_DR
636 insert DRB according to its init
637 4. both DRA and DRB are in some interleaving chains:
638 choose the chain with the smallest init of FIRST_DR
639 insert the nodes of the second chain into the first one. */
642 vect_update_interleaving_chain (struct data_reference
*drb
,
643 struct data_reference
*dra
)
645 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
646 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
647 tree next_init
, init_dra_chain
, init_drb_chain
, first_a
, first_b
;
648 tree node
, prev
, next
, node_init
, first_stmt
;
650 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
651 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
653 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_STMT (drb
);
654 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
655 DR_GROUP_NEXT_DR (stmtinfo_b
) = DR_STMT (dra
);
659 /* 2. DRB is a part of a chain and DRA is not. */
660 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && DR_GROUP_FIRST_DR (stmtinfo_b
))
662 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_GROUP_FIRST_DR (stmtinfo_b
);
663 /* Insert DRA into the chain of DRB. */
664 vect_insert_into_interleaving_chain (dra
, drb
);
668 /* 3. DRA is a part of a chain and DRB is not. */
669 if (DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
671 tree old_first_stmt
= DR_GROUP_FIRST_DR (stmtinfo_a
);
672 tree init_old
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
676 if (tree_int_cst_compare (init_old
, DR_INIT (drb
)) > 0)
678 /* DRB's init is smaller than the init of the stmt previously marked
679 as the first stmt of the interleaving chain of DRA. Therefore, we
680 update FIRST_STMT and put DRB in the head of the list. */
681 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
682 DR_GROUP_NEXT_DR (stmtinfo_b
) = old_first_stmt
;
684 /* Update all the stmts in the list to point to the new FIRST_STMT. */
685 tmp
= old_first_stmt
;
688 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp
)) = DR_STMT (drb
);
689 tmp
= DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp
));
694 /* Insert DRB in the list of DRA. */
695 vect_insert_into_interleaving_chain (drb
, dra
);
696 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_GROUP_FIRST_DR (stmtinfo_a
);
701 /* 4. both DRA and DRB are in some interleaving chains. */
702 first_a
= DR_GROUP_FIRST_DR (stmtinfo_a
);
703 first_b
= DR_GROUP_FIRST_DR (stmtinfo_b
);
704 if (first_a
== first_b
)
706 init_dra_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a
)));
707 init_drb_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b
)));
709 if (tree_int_cst_compare (init_dra_chain
, init_drb_chain
) > 0)
711 /* Insert the nodes of DRA chain into the DRB chain.
712 After inserting a node, continue from this node of the DRB chain (don't
713 start from the beginning. */
714 node
= DR_GROUP_FIRST_DR (stmtinfo_a
);
715 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
716 first_stmt
= first_b
;
720 /* Insert the nodes of DRB chain into the DRA chain.
721 After inserting a node, continue from this node of the DRA chain (don't
722 start from the beginning. */
723 node
= DR_GROUP_FIRST_DR (stmtinfo_b
);
724 prev
= DR_GROUP_FIRST_DR (stmtinfo_a
);
725 first_stmt
= first_a
;
730 node_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node
)));
731 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
734 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
735 if (tree_int_cst_compare (next_init
, node_init
) > 0)
738 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
739 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = next
;
744 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
748 /* We got to the end of the list. Insert here. */
749 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
750 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = NULL_TREE
;
753 DR_GROUP_FIRST_DR (vinfo_for_stmt (node
)) = first_stmt
;
754 node
= DR_GROUP_NEXT_DR (vinfo_for_stmt (node
));
759 /* Function vect_equal_offsets.
761 Check if OFFSET1 and OFFSET2 are identical expressions. */
764 vect_equal_offsets (tree offset1
, tree offset2
)
768 STRIP_NOPS (offset1
);
769 STRIP_NOPS (offset2
);
771 if (offset1
== offset2
)
774 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
775 || !BINARY_CLASS_P (offset1
)
776 || !BINARY_CLASS_P (offset2
))
779 res0
= vect_equal_offsets (TREE_OPERAND (offset1
, 0),
780 TREE_OPERAND (offset2
, 0));
781 res1
= vect_equal_offsets (TREE_OPERAND (offset1
, 1),
782 TREE_OPERAND (offset2
, 1));
784 return (res0
&& res1
);
788 /* Function vect_check_interleaving.
790 Check if DRA and DRB are a part of interleaving. In case they are, insert
791 DRA and DRB in an interleaving chain. */
794 vect_check_interleaving (struct data_reference
*dra
,
795 struct data_reference
*drb
)
797 HOST_WIDE_INT type_size_a
, type_size_b
, diff_mod_size
, step
, init_a
, init_b
;
799 /* Check that the data-refs have same first location (except init) and they
800 are both either store or load (not load and store). */
801 if ((DR_BASE_ADDRESS (dra
) != DR_BASE_ADDRESS (drb
)
802 && (TREE_CODE (DR_BASE_ADDRESS (dra
)) != ADDR_EXPR
803 || TREE_CODE (DR_BASE_ADDRESS (drb
)) != ADDR_EXPR
804 || TREE_OPERAND (DR_BASE_ADDRESS (dra
), 0)
805 != TREE_OPERAND (DR_BASE_ADDRESS (drb
),0)))
806 || !vect_equal_offsets (DR_OFFSET (dra
), DR_OFFSET (drb
))
807 || !tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
))
808 || DR_IS_READ (dra
) != DR_IS_READ (drb
))
812 1. data-refs are of the same type
813 2. their steps are equal
814 3. the step is greater than the difference between data-refs' inits */
815 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
816 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
818 if (type_size_a
!= type_size_b
819 || tree_int_cst_compare (DR_STEP (dra
), DR_STEP (drb
)))
822 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
823 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
824 step
= TREE_INT_CST_LOW (DR_STEP (dra
));
828 /* If init_a == init_b + the size of the type * k, we have an interleaving,
829 and DRB is accessed before DRA. */
830 diff_mod_size
= (init_a
- init_b
) % type_size_a
;
832 if ((init_a
- init_b
) > step
)
835 if (diff_mod_size
== 0)
837 vect_update_interleaving_chain (drb
, dra
);
838 if (vect_print_dump_info (REPORT_DR_DETAILS
))
840 fprintf (vect_dump
, "Detected interleaving ");
841 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
842 fprintf (vect_dump
, " and ");
843 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
850 /* If init_b == init_a + the size of the type * k, we have an
851 interleaving, and DRA is accessed before DRB. */
852 diff_mod_size
= (init_b
- init_a
) % type_size_a
;
854 if ((init_b
- init_a
) > step
)
857 if (diff_mod_size
== 0)
859 vect_update_interleaving_chain (dra
, drb
);
860 if (vect_print_dump_info (REPORT_DR_DETAILS
))
862 fprintf (vect_dump
, "Detected interleaving ");
863 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
864 fprintf (vect_dump
, " and ");
865 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
873 /* Function vect_analyze_data_ref_dependence.
875 Return TRUE if there (might) exist a dependence between a memory-reference
876 DRA and a memory-reference DRB. */
879 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
880 loop_vec_info loop_vinfo
)
883 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
884 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
885 struct data_reference
*dra
= DDR_A (ddr
);
886 struct data_reference
*drb
= DDR_B (ddr
);
887 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
888 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
889 int dra_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra
))));
890 int drb_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb
))));
891 lambda_vector dist_v
;
892 unsigned int loop_depth
;
894 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
896 /* Independent data accesses. */
897 vect_check_interleaving (dra
, drb
);
901 if ((DR_IS_READ (dra
) && DR_IS_READ (drb
)) || dra
== drb
)
904 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
906 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
909 "not vectorized: can't determine dependence between ");
910 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
911 fprintf (vect_dump
, " and ");
912 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
917 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
919 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
921 fprintf (vect_dump
, "not vectorized: bad dist vector for ");
922 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
923 fprintf (vect_dump
, " and ");
924 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
929 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
930 for (i
= 0; VEC_iterate (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
); i
++)
932 int dist
= dist_v
[loop_depth
];
934 if (vect_print_dump_info (REPORT_DR_DETAILS
))
935 fprintf (vect_dump
, "dependence distance = %d.", dist
);
937 /* Same loop iteration. */
938 if (dist
% vectorization_factor
== 0 && dra_size
== drb_size
)
940 /* Two references with distance zero have the same alignment. */
941 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
), drb
);
942 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
), dra
);
943 if (vect_print_dump_info (REPORT_ALIGNMENT
))
944 fprintf (vect_dump
, "accesses have the same alignment.");
945 if (vect_print_dump_info (REPORT_DR_DETAILS
))
947 fprintf (vect_dump
, "dependence distance modulo vf == 0 between ");
948 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
949 fprintf (vect_dump
, " and ");
950 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
953 /* For interleaving, mark that there is a read-write dependency if
954 necessary. We check before that one of the data-refs is store. */
955 if (DR_IS_READ (dra
))
956 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a
) = true;
959 if (DR_IS_READ (drb
))
960 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b
) = true;
966 if (abs (dist
) >= vectorization_factor
)
968 /* Dependence distance does not create dependence, as far as vectorization
969 is concerned, in this case. */
970 if (vect_print_dump_info (REPORT_DR_DETAILS
))
971 fprintf (vect_dump
, "dependence distance >= VF.");
975 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
978 "not vectorized: possible dependence between data-refs ");
979 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
980 fprintf (vect_dump
, " and ");
981 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
991 /* Function vect_analyze_data_ref_dependences.
993 Examine all the data references in the loop, and make sure there do not
994 exist any data dependences between them. */
997 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
)
1000 VEC (ddr_p
, heap
) *ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
1001 struct data_dependence_relation
*ddr
;
1003 if (vect_print_dump_info (REPORT_DETAILS
))
1004 fprintf (vect_dump
, "=== vect_analyze_dependences ===");
1006 for (i
= 0; VEC_iterate (ddr_p
, ddrs
, i
, ddr
); i
++)
1007 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
))
1014 /* Function vect_compute_data_ref_alignment
1016 Compute the misalignment of the data reference DR.
1019 1. If during the misalignment computation it is found that the data reference
1020 cannot be vectorized then false is returned.
1021 2. DR_MISALIGNMENT (DR) is defined.
1023 FOR NOW: No analysis is actually performed. Misalignment is calculated
1024 only for trivial cases. TODO. */
1027 vect_compute_data_ref_alignment (struct data_reference
*dr
)
1029 tree stmt
= DR_STMT (dr
);
1030 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1031 tree ref
= DR_REF (dr
);
1033 tree base
, base_addr
;
1036 tree aligned_to
, alignment
;
1038 if (vect_print_dump_info (REPORT_DETAILS
))
1039 fprintf (vect_dump
, "vect_compute_data_ref_alignment:");
1041 /* Initialize misalignment to unknown. */
1042 DR_MISALIGNMENT (dr
) = -1;
1044 misalign
= DR_OFFSET_MISALIGNMENT (dr
);
1045 aligned_to
= DR_ALIGNED_TO (dr
);
1046 base_addr
= DR_BASE_ADDRESS (dr
);
1047 base
= build_fold_indirect_ref (base_addr
);
1048 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1049 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
1051 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
1054 if (vect_print_dump_info (REPORT_DETAILS
))
1056 fprintf (vect_dump
, "Unknown alignment for access: ");
1057 print_generic_expr (vect_dump
, base
, TDF_SLIM
);
1063 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
1065 || (TREE_CODE (base_addr
) == SSA_NAME
1066 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1067 TREE_TYPE (base_addr
)))),
1069 base_aligned
= true;
1071 base_aligned
= false;
1075 /* Do not change the alignment of global variables if
1076 flag_section_anchors is enabled. */
1077 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
))
1078 || (TREE_STATIC (base
) && flag_section_anchors
))
1080 if (vect_print_dump_info (REPORT_DETAILS
))
1082 fprintf (vect_dump
, "can't force alignment of ref: ");
1083 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
1088 /* Force the alignment of the decl.
1089 NOTE: This is the only change to the code we make during
1090 the analysis phase, before deciding to vectorize the loop. */
1091 if (vect_print_dump_info (REPORT_DETAILS
))
1092 fprintf (vect_dump
, "force alignment");
1093 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
1094 DECL_USER_ALIGN (base
) = 1;
1097 /* At this point we assume that the base is aligned. */
1098 gcc_assert (base_aligned
1099 || (TREE_CODE (base
) == VAR_DECL
1100 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
1102 /* Modulo alignment. */
1103 misalign
= size_binop (TRUNC_MOD_EXPR
, misalign
, alignment
);
1105 if (!host_integerp (misalign
, 1))
1107 /* Negative or overflowed misalignment value. */
1108 if (vect_print_dump_info (REPORT_DETAILS
))
1109 fprintf (vect_dump
, "unexpected misalign value");
1113 DR_MISALIGNMENT (dr
) = TREE_INT_CST_LOW (misalign
);
1115 if (vect_print_dump_info (REPORT_DETAILS
))
1117 fprintf (vect_dump
, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
1118 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
1125 /* Function vect_compute_data_refs_alignment
1127 Compute the misalignment of data references in the loop.
1128 Return FALSE if a data reference is found that cannot be vectorized. */
1131 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
)
1133 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1134 struct data_reference
*dr
;
1137 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1138 if (!vect_compute_data_ref_alignment (dr
))
1145 /* Function vect_update_misalignment_for_peel
1147 DR - the data reference whose misalignment is to be adjusted.
1148 DR_PEEL - the data reference whose misalignment is being made
1149 zero in the vector loop by the peel.
1150 NPEEL - the number of iterations in the peel loop if the misalignment
1151 of DR_PEEL is known at compile time. */
1154 vect_update_misalignment_for_peel (struct data_reference
*dr
,
1155 struct data_reference
*dr_peel
, int npeel
)
1158 VEC(dr_p
,heap
) *same_align_drs
;
1159 struct data_reference
*current_dr
;
1160 int dr_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
1161 int dr_peel_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel
))));
1162 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
1163 stmt_vec_info peel_stmt_info
= vinfo_for_stmt (DR_STMT (dr_peel
));
1165 /* For interleaved data accesses the step in the loop must be multiplied by
1166 the size of the interleaving group. */
1167 if (DR_GROUP_FIRST_DR (stmt_info
))
1168 dr_size
*= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info
)));
1169 if (DR_GROUP_FIRST_DR (peel_stmt_info
))
1170 dr_peel_size
*= DR_GROUP_SIZE (peel_stmt_info
);
1172 if (known_alignment_for_access_p (dr
)
1173 && known_alignment_for_access_p (dr_peel
)
1174 && (DR_MISALIGNMENT (dr
) / dr_size
==
1175 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
))
1177 DR_MISALIGNMENT (dr
) = 0;
1181 /* It can be assumed that the data refs with the same alignment as dr_peel
1182 are aligned in the vector loop. */
1184 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
1185 for (i
= 0; VEC_iterate (dr_p
, same_align_drs
, i
, current_dr
); i
++)
1187 if (current_dr
!= dr
)
1189 gcc_assert (DR_MISALIGNMENT (dr
) / dr_size
==
1190 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
);
1191 DR_MISALIGNMENT (dr
) = 0;
1195 if (known_alignment_for_access_p (dr
)
1196 && known_alignment_for_access_p (dr_peel
))
1198 DR_MISALIGNMENT (dr
) += npeel
* dr_size
;
1199 DR_MISALIGNMENT (dr
) %= UNITS_PER_SIMD_WORD
;
1203 if (vect_print_dump_info (REPORT_DETAILS
))
1204 fprintf (vect_dump
, "Setting misalignment to -1.");
1205 DR_MISALIGNMENT (dr
) = -1;
1209 /* Function vect_verify_datarefs_alignment
1211 Return TRUE if all data references in the loop can be
1212 handled with respect to alignment. */
1215 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
)
1217 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1218 struct data_reference
*dr
;
1219 enum dr_alignment_support supportable_dr_alignment
;
1222 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1224 tree stmt
= DR_STMT (dr
);
1225 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1227 /* For interleaving, only the alignment of the first access matters. */
1228 if (DR_GROUP_FIRST_DR (stmt_info
)
1229 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1232 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1233 if (!supportable_dr_alignment
)
1235 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1237 if (DR_IS_READ (dr
))
1239 "not vectorized: unsupported unaligned load.");
1242 "not vectorized: unsupported unaligned store.");
1246 if (supportable_dr_alignment
!= dr_aligned
1247 && vect_print_dump_info (REPORT_ALIGNMENT
))
1248 fprintf (vect_dump
, "Vectorizing an unaligned access.");
1254 /* Function vect_enhance_data_refs_alignment
1256 This pass will use loop versioning and loop peeling in order to enhance
1257 the alignment of data references in the loop.
1259 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1260 original loop is to be vectorized; Any other loops that are created by
1261 the transformations performed in this pass - are not supposed to be
1262 vectorized. This restriction will be relaxed.
1264 This pass will require a cost model to guide it whether to apply peeling
1265 or versioning or a combination of the two. For example, the scheme that
1266 intel uses when given a loop with several memory accesses, is as follows:
1267 choose one memory access ('p') which alignment you want to force by doing
1268 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1269 other accesses are not necessarily aligned, or (2) use loop versioning to
1270 generate one loop in which all accesses are aligned, and another loop in
1271 which only 'p' is necessarily aligned.
1273 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1274 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1275 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1277 Devising a cost model is the most critical aspect of this work. It will
1278 guide us on which access to peel for, whether to use loop versioning, how
1279 many versions to create, etc. The cost model will probably consist of
1280 generic considerations as well as target specific considerations (on
1281 powerpc for example, misaligned stores are more painful than misaligned
1284 Here are the general steps involved in alignment enhancements:
1286 -- original loop, before alignment analysis:
1287 for (i=0; i<N; i++){
1288 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1289 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1292 -- After vect_compute_data_refs_alignment:
1293 for (i=0; i<N; i++){
1294 x = q[i]; # DR_MISALIGNMENT(q) = 3
1295 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1298 -- Possibility 1: we do loop versioning:
1300 for (i=0; i<N; i++){ # loop 1A
1301 x = q[i]; # DR_MISALIGNMENT(q) = 3
1302 p[i] = y; # DR_MISALIGNMENT(p) = 0
1306 for (i=0; i<N; i++){ # loop 1B
1307 x = q[i]; # DR_MISALIGNMENT(q) = 3
1308 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1312 -- Possibility 2: we do loop peeling:
1313 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1317 for (i = 3; i < N; i++){ # loop 2A
1318 x = q[i]; # DR_MISALIGNMENT(q) = 0
1319 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1322 -- Possibility 3: combination of loop peeling and versioning:
1323 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1328 for (i = 3; i<N; i++){ # loop 3A
1329 x = q[i]; # DR_MISALIGNMENT(q) = 0
1330 p[i] = y; # DR_MISALIGNMENT(p) = 0
1334 for (i = 3; i<N; i++){ # loop 3B
1335 x = q[i]; # DR_MISALIGNMENT(q) = 0
1336 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1340 These loops are later passed to loop_transform to be vectorized. The
1341 vectorizer will use the alignment information to guide the transformation
1342 (whether to generate regular loads/stores, or with special handling for
1346 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1348 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1349 enum dr_alignment_support supportable_dr_alignment
;
1350 struct data_reference
*dr0
= NULL
;
1351 struct data_reference
*dr
;
1353 bool do_peeling
= false;
1354 bool do_versioning
= false;
1357 stmt_vec_info stmt_info
;
1359 if (vect_print_dump_info (REPORT_DETAILS
))
1360 fprintf (vect_dump
, "=== vect_enhance_data_refs_alignment ===");
1362 /* While cost model enhancements are expected in the future, the high level
1363 view of the code at this time is as follows:
1365 A) If there is a misaligned write then see if peeling to align this write
1366 can make all data references satisfy vect_supportable_dr_alignment.
1367 If so, update data structures as needed and return true. Note that
1368 at this time vect_supportable_dr_alignment is known to return false
1369 for a a misaligned write.
1371 B) If peeling wasn't possible and there is a data reference with an
1372 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1373 then see if loop versioning checks can be used to make all data
1374 references satisfy vect_supportable_dr_alignment. If so, update
1375 data structures as needed and return true.
1377 C) If neither peeling nor versioning were successful then return false if
1378 any data reference does not satisfy vect_supportable_dr_alignment.
1380 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1382 Note, Possibility 3 above (which is peeling and versioning together) is not
1383 being done at this time. */
1385 /* (1) Peeling to force alignment. */
1387 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1389 + How many accesses will become aligned due to the peeling
1390 - How many accesses will become unaligned due to the peeling,
1391 and the cost of misaligned accesses.
1392 - The cost of peeling (the extra runtime checks, the increase
1395 The scheme we use FORNOW: peel to force the alignment of the first
1396 misaligned store in the loop.
1397 Rationale: misaligned stores are not yet supported.
1399 TODO: Use a cost model. */
1401 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1403 stmt
= DR_STMT (dr
);
1404 stmt_info
= vinfo_for_stmt (stmt
);
1406 /* For interleaving, only the alignment of the first access
1408 if (DR_GROUP_FIRST_DR (stmt_info
)
1409 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1412 if (!DR_IS_READ (dr
) && !aligned_access_p (dr
))
1414 if (DR_GROUP_FIRST_DR (stmt_info
))
1416 /* For interleaved access we peel only if number of iterations in
1417 the prolog loop ({VF - misalignment}), is a multiple of the
1418 number of the interleaved accesses. */
1419 int elem_size
, mis_in_elements
;
1420 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1422 /* FORNOW: handle only known alignment. */
1423 if (!known_alignment_for_access_p (dr
))
1429 elem_size
= UNITS_PER_SIMD_WORD
/ vf
;
1430 mis_in_elements
= DR_MISALIGNMENT (dr
) / elem_size
;
1432 if ((vf
- mis_in_elements
) % DR_GROUP_SIZE (stmt_info
))
1444 /* Often peeling for alignment will require peeling for loop-bound, which in
1445 turn requires that we know how to adjust the loop ivs after the loop. */
1446 if (!vect_can_advance_ivs_p (loop_vinfo
))
1454 if (known_alignment_for_access_p (dr0
))
1456 /* Since it's known at compile time, compute the number of iterations
1457 in the peeled loop (the peeling factor) for use in updating
1458 DR_MISALIGNMENT values. The peeling factor is the vectorization
1459 factor minus the misalignment as an element count. */
1460 mis
= DR_MISALIGNMENT (dr0
);
1461 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1462 npeel
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) - mis
;
1464 /* For interleaved data access every iteration accesses all the
1465 members of the group, therefore we divide the number of iterations
1466 by the group size. */
1467 stmt_info
= vinfo_for_stmt (DR_STMT (dr0
));
1468 if (DR_GROUP_FIRST_DR (stmt_info
))
1469 npeel
/= DR_GROUP_SIZE (stmt_info
);
1471 if (vect_print_dump_info (REPORT_DETAILS
))
1472 fprintf (vect_dump
, "Try peeling by %d", npeel
);
1475 /* Ensure that all data refs can be vectorized after the peel. */
1476 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1478 int save_misalignment
;
1483 stmt
= DR_STMT (dr
);
1484 stmt_info
= vinfo_for_stmt (stmt
);
1485 /* For interleaving, only the alignment of the first access
1487 if (DR_GROUP_FIRST_DR (stmt_info
)
1488 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1491 save_misalignment
= DR_MISALIGNMENT (dr
);
1492 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1493 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1494 DR_MISALIGNMENT (dr
) = save_misalignment
;
1496 if (!supportable_dr_alignment
)
1505 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1506 If the misalignment of DR_i is identical to that of dr0 then set
1507 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1508 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1509 by the peeling factor times the element size of DR_i (MOD the
1510 vectorization factor times the size). Otherwise, the
1511 misalignment of DR_i must be set to unknown. */
1512 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1514 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1516 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1517 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1518 DR_MISALIGNMENT (dr0
) = 0;
1519 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1520 fprintf (vect_dump
, "Alignment of access forced using peeling.");
1522 if (vect_print_dump_info (REPORT_DETAILS
))
1523 fprintf (vect_dump
, "Peeling for alignment will be applied.");
1525 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1532 /* (2) Versioning to force alignment. */
1534 /* Try versioning if:
1535 1) flag_tree_vect_loop_version is TRUE
1536 2) optimize_size is FALSE
1537 3) there is at least one unsupported misaligned data ref with an unknown
1539 4) all misaligned data refs with a known misalignment are supported, and
1540 5) the number of runtime alignment checks is within reason. */
1542 do_versioning
= flag_tree_vect_loop_version
&& (!optimize_size
);
1546 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1548 stmt
= DR_STMT (dr
);
1549 stmt_info
= vinfo_for_stmt (stmt
);
1551 /* For interleaving, only the alignment of the first access
1553 if (aligned_access_p (dr
)
1554 || (DR_GROUP_FIRST_DR (stmt_info
)
1555 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
))
1558 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1560 if (!supportable_dr_alignment
)
1566 if (known_alignment_for_access_p (dr
)
1567 || VEC_length (tree
,
1568 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
1569 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS
))
1571 do_versioning
= false;
1575 stmt
= DR_STMT (dr
);
1576 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1577 gcc_assert (vectype
);
1579 /* The rightmost bits of an aligned address must be zeros.
1580 Construct the mask needed for this test. For example,
1581 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1582 mask must be 15 = 0xf. */
1583 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1585 /* FORNOW: use the same mask to test all potentially unaligned
1586 references in the loop. The vectorizer currently supports
1587 a single vector size, see the reference to
1588 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1589 vectorization factor is computed. */
1590 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
1591 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
1592 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
1593 VEC_safe_push (tree
, heap
,
1594 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
),
1599 /* Versioning requires at least one misaligned data reference. */
1600 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)) == 0)
1601 do_versioning
= false;
1602 else if (!do_versioning
)
1603 VEC_truncate (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
), 0);
1608 VEC(tree
,heap
) *may_misalign_stmts
1609 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1612 /* It can now be assumed that the data references in the statements
1613 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1614 of the loop being vectorized. */
1615 for (i
= 0; VEC_iterate (tree
, may_misalign_stmts
, i
, stmt
); i
++)
1617 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1618 dr
= STMT_VINFO_DATA_REF (stmt_info
);
1619 DR_MISALIGNMENT (dr
) = 0;
1620 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1621 fprintf (vect_dump
, "Alignment of access forced using versioning.");
1624 if (vect_print_dump_info (REPORT_DETAILS
))
1625 fprintf (vect_dump
, "Versioning for alignment will be applied.");
1627 /* Peeling and versioning can't be done together at this time. */
1628 gcc_assert (! (do_peeling
&& do_versioning
));
1630 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1635 /* This point is reached if neither peeling nor versioning is being done. */
1636 gcc_assert (! (do_peeling
|| do_versioning
));
1638 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1643 /* Function vect_analyze_data_refs_alignment
1645 Analyze the alignment of the data-references in the loop.
1646 Return FALSE if a data reference is found that cannot be vectorized. */
1649 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
)
1651 if (vect_print_dump_info (REPORT_DETAILS
))
1652 fprintf (vect_dump
, "=== vect_analyze_data_refs_alignment ===");
1654 if (!vect_compute_data_refs_alignment (loop_vinfo
))
1656 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1658 "not vectorized: can't calculate alignment for data ref.");
1666 /* Function vect_analyze_data_ref_access.
1668 Analyze the access pattern of the data-reference DR. For now, a data access
1669 has to be consecutive to be considered vectorizable. */
1672 vect_analyze_data_ref_access (struct data_reference
*dr
)
1674 tree step
= DR_STEP (dr
);
1675 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
1676 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
1677 HOST_WIDE_INT type_size
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
1678 tree stmt
= DR_STMT (dr
);
1679 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1680 interleaving group (including gaps). */
1681 HOST_WIDE_INT stride
= dr_step
/ type_size
;
1685 if (vect_print_dump_info (REPORT_DETAILS
))
1686 fprintf (vect_dump
, "bad data-ref access");
1691 if (!tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
)))
1693 /* Mark that it is not interleaving. */
1694 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = NULL_TREE
;
1698 /* Not consecutive access is possible only if it is a part of interleaving. */
1699 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)))
1701 /* Check if it this DR is a part of interleaving, and is a single
1702 element of the group that is accessed in the loop. */
1704 /* Gaps are supported only for loads. STEP must be a multiple of the type
1705 size. The size of the group must be a power of 2. */
1707 && (dr_step
% type_size
) == 0
1709 && exact_log2 (stride
) != -1)
1711 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = stmt
;
1712 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
1713 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1715 fprintf (vect_dump
, "Detected single element interleaving %d ",
1716 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)));
1717 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
1718 fprintf (vect_dump
, " step ");
1719 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
1723 if (vect_print_dump_info (REPORT_DETAILS
))
1724 fprintf (vect_dump
, "not consecutive access");
1728 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) == stmt
)
1730 /* First stmt in the interleaving chain. Check the chain. */
1731 tree next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt
));
1732 struct data_reference
*data_ref
= dr
;
1733 unsigned int count
= 1;
1735 tree prev_init
= DR_INIT (data_ref
);
1737 HOST_WIDE_INT diff
, count_in_bytes
;
1741 /* Skip same data-refs. In case that two or more stmts share data-ref
1742 (supported only for loads), we vectorize only the first stmt, and
1743 the rest get their vectorized loads from the the first one. */
1744 if (!tree_int_cst_compare (DR_INIT (data_ref
),
1745 DR_INIT (STMT_VINFO_DATA_REF (
1746 vinfo_for_stmt (next
)))))
1748 if (!DR_IS_READ (data_ref
))
1750 if (vect_print_dump_info (REPORT_DETAILS
))
1751 fprintf (vect_dump
, "Two store stmts share the same dr.");
1755 /* Check that there is no load-store dependecies for this loads
1756 to prevent a case of load-store-load to the same location. */
1757 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next
))
1758 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev
)))
1760 if (vect_print_dump_info (REPORT_DETAILS
))
1762 "READ_WRITE dependence in interleaving.");
1766 /* For load use the same data-ref load. */
1767 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next
)) = prev
;
1770 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
1775 /* Check that all the accesses have the same STEP. */
1776 next_step
= DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
1777 if (tree_int_cst_compare (step
, next_step
))
1779 if (vect_print_dump_info (REPORT_DETAILS
))
1780 fprintf (vect_dump
, "not consecutive access in interleaving");
1784 data_ref
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next
));
1785 /* Check that the distance between two accesses is equal to the type
1786 size. Otherwise, we have gaps. */
1787 diff
= (TREE_INT_CST_LOW (DR_INIT (data_ref
))
1788 - TREE_INT_CST_LOW (prev_init
)) / type_size
;
1789 if (!DR_IS_READ (data_ref
) && diff
!= 1)
1791 if (vect_print_dump_info (REPORT_DETAILS
))
1792 fprintf (vect_dump
, "interleaved store with gaps");
1795 /* Store the gap from the previous member of the group. If there is no
1796 gap in the access, DR_GROUP_GAP is always 1. */
1797 DR_GROUP_GAP (vinfo_for_stmt (next
)) = diff
;
1799 prev_init
= DR_INIT (data_ref
);
1800 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
1801 /* Count the number of data-refs in the chain. */
1805 /* COUNT is the number of accesses found, we multiply it by the size of
1806 the type to get COUNT_IN_BYTES. */
1807 count_in_bytes
= type_size
* count
;
1809 /* Check that the size of the interleaving is not greater than STEP. */
1810 if (dr_step
< count_in_bytes
)
1812 if (vect_print_dump_info (REPORT_DETAILS
))
1814 fprintf (vect_dump
, "interleaving size is greater than step for ");
1815 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
1820 /* Check that the size of the interleaving is equal to STEP for stores,
1821 i.e., that there are no gaps. */
1822 if (!DR_IS_READ (dr
) && dr_step
!= count_in_bytes
)
1824 if (vect_print_dump_info (REPORT_DETAILS
))
1825 fprintf (vect_dump
, "interleaved store with gaps");
1829 /* Check that STEP is a multiple of type size. */
1830 if ((dr_step
% type_size
) != 0)
1832 if (vect_print_dump_info (REPORT_DETAILS
))
1834 fprintf (vect_dump
, "step is not a multiple of type size: step ");
1835 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
1836 fprintf (vect_dump
, " size ");
1837 print_generic_expr (vect_dump
, TYPE_SIZE_UNIT (scalar_type
),
1843 /* FORNOW: we handle only interleaving that is a power of 2. */
1844 if (exact_log2 (stride
) == -1)
1846 if (vect_print_dump_info (REPORT_DETAILS
))
1847 fprintf (vect_dump
, "interleaving is not a power of 2");
1850 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
1856 /* Function vect_analyze_data_ref_accesses.
1858 Analyze the access pattern of all the data references in the loop.
1860 FORNOW: the only access pattern that is considered vectorizable is a
1861 simple step 1 (consecutive) access.
1863 FORNOW: handle only arrays and pointer accesses. */
1866 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
)
1869 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1870 struct data_reference
*dr
;
1872 if (vect_print_dump_info (REPORT_DETAILS
))
1873 fprintf (vect_dump
, "=== vect_analyze_data_ref_accesses ===");
1875 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1876 if (!vect_analyze_data_ref_access (dr
))
1878 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1879 fprintf (vect_dump
, "not vectorized: complicated access pattern.");
1887 /* Function vect_analyze_data_refs.
1889 Find all the data references in the loop.
1891 The general structure of the analysis of data refs in the vectorizer is as
1893 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1894 find and analyze all data-refs in the loop and their dependences.
1895 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1896 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1897 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1902 vect_analyze_data_refs (loop_vec_info loop_vinfo
)
1904 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1906 VEC (data_reference_p
, heap
) *datarefs
;
1907 struct data_reference
*dr
;
1910 if (vect_print_dump_info (REPORT_DETAILS
))
1911 fprintf (vect_dump
, "=== vect_analyze_data_refs ===");
1913 compute_data_dependences_for_loop (loop
, true,
1914 &LOOP_VINFO_DATAREFS (loop_vinfo
),
1915 &LOOP_VINFO_DDRS (loop_vinfo
));
1917 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1918 from stmt_vec_info struct to DR and vectype. */
1919 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1921 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1924 stmt_vec_info stmt_info
;
1926 if (!dr
|| !DR_REF (dr
))
1928 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1929 fprintf (vect_dump
, "not vectorized: unhandled data-ref ");
1933 /* Update DR field in stmt_vec_info struct. */
1934 stmt
= DR_STMT (dr
);
1935 stmt_info
= vinfo_for_stmt (stmt
);
1937 if (STMT_VINFO_DATA_REF (stmt_info
))
1939 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1942 "not vectorized: more than one data ref in stmt: ");
1943 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1947 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
1949 /* Check that analysis of the data-ref succeeded. */
1950 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
1953 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1955 fprintf (vect_dump
, "not vectorized: data ref analysis failed ");
1956 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1960 if (!DR_MEMTAG (dr
))
1962 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1964 fprintf (vect_dump
, "not vectorized: no memory tag for ");
1965 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
1970 /* Set vectype for STMT. */
1971 scalar_type
= TREE_TYPE (DR_REF (dr
));
1972 STMT_VINFO_VECTYPE (stmt_info
) =
1973 get_vectype_for_scalar_type (scalar_type
);
1974 if (!STMT_VINFO_VECTYPE (stmt_info
))
1976 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1979 "not vectorized: no vectype for stmt: ");
1980 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1981 fprintf (vect_dump
, " scalar_type: ");
1982 print_generic_expr (vect_dump
, scalar_type
, TDF_DETAILS
);
1992 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1994 /* Function vect_mark_relevant.
1996 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
1999 vect_mark_relevant (VEC(tree
,heap
) **worklist
, tree stmt
,
2000 enum vect_relevant relevant
, bool live_p
)
2002 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2003 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
2004 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
2006 if (vect_print_dump_info (REPORT_DETAILS
))
2007 fprintf (vect_dump
, "mark relevant %d, live %d.", relevant
, live_p
);
2009 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2013 /* This is the last stmt in a sequence that was detected as a
2014 pattern that can potentially be vectorized. Don't mark the stmt
2015 as relevant/live because it's not going to vectorized.
2016 Instead mark the pattern-stmt that replaces it. */
2017 if (vect_print_dump_info (REPORT_DETAILS
))
2018 fprintf (vect_dump
, "last stmt in pattern. don't mark relevant/live.");
2019 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2020 stmt_info
= vinfo_for_stmt (pattern_stmt
);
2021 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
2022 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
2023 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
2024 stmt
= pattern_stmt
;
2027 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
2028 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
2029 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
2031 if (TREE_CODE (stmt
) == PHI_NODE
)
2032 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
2033 or live will fail vectorization later on. */
2036 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
2037 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
2039 if (vect_print_dump_info (REPORT_DETAILS
))
2040 fprintf (vect_dump
, "already marked relevant/live.");
2044 VEC_safe_push (tree
, heap
, *worklist
, stmt
);
2048 /* Function vect_stmt_relevant_p.
2050 Return true if STMT in loop that is represented by LOOP_VINFO is
2051 "relevant for vectorization".
2053 A stmt is considered "relevant for vectorization" if:
2054 - it has uses outside the loop.
2055 - it has vdefs (it alters memory).
2056 - control stmts in the loop (except for the exit condition).
2058 CHECKME: what other side effects would the vectorizer allow? */
2061 vect_stmt_relevant_p (tree stmt
, loop_vec_info loop_vinfo
,
2062 enum vect_relevant
*relevant
, bool *live_p
)
2064 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2065 ssa_op_iter op_iter
;
2066 imm_use_iterator imm_iter
;
2067 use_operand_p use_p
;
2068 def_operand_p def_p
;
2070 *relevant
= vect_unused_in_loop
;
2073 /* cond stmt other than loop exit cond. */
2074 if (is_ctrl_stmt (stmt
) && (stmt
!= LOOP_VINFO_EXIT_COND (loop_vinfo
)))
2075 *relevant
= vect_used_in_loop
;
2077 /* changing memory. */
2078 if (TREE_CODE (stmt
) != PHI_NODE
)
2079 if (!ZERO_SSA_OPERANDS (stmt
, SSA_OP_VIRTUAL_DEFS
))
2081 if (vect_print_dump_info (REPORT_DETAILS
))
2082 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
2083 *relevant
= vect_used_in_loop
;
2086 /* uses outside the loop. */
2087 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2089 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
2091 basic_block bb
= bb_for_stmt (USE_STMT (use_p
));
2092 if (!flow_bb_inside_loop_p (loop
, bb
))
2094 if (vect_print_dump_info (REPORT_DETAILS
))
2095 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
2097 /* We expect all such uses to be in the loop exit phis
2098 (because of loop closed form) */
2099 gcc_assert (TREE_CODE (USE_STMT (use_p
)) == PHI_NODE
);
2100 gcc_assert (bb
== single_exit (loop
)->dest
);
2107 return (*live_p
|| *relevant
);
2111 /* Function vect_mark_stmts_to_be_vectorized.
2113 Not all stmts in the loop need to be vectorized. For example:
2122 Stmt 1 and 3 do not need to be vectorized, because loop control and
2123 addressing of vectorized data-refs are handled differently.
2125 This pass detects such stmts. */
2128 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
2130 VEC(tree
,heap
) *worklist
;
2131 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2132 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2133 unsigned int nbbs
= loop
->num_nodes
;
2134 block_stmt_iterator si
;
2139 stmt_vec_info stmt_vinfo
;
2143 enum vect_relevant relevant
;
2145 enum vect_def_type dt
;
2147 if (vect_print_dump_info (REPORT_DETAILS
))
2148 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
2150 worklist
= VEC_alloc (tree
, heap
, 64);
2152 /* 1. Init worklist. */
2155 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
2157 if (vect_print_dump_info (REPORT_DETAILS
))
2159 fprintf (vect_dump
, "init: phi relevant? ");
2160 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
2163 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
2164 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
2167 for (i
= 0; i
< nbbs
; i
++)
2170 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
2172 stmt
= bsi_stmt (si
);
2174 if (vect_print_dump_info (REPORT_DETAILS
))
2176 fprintf (vect_dump
, "init: stmt relevant? ");
2177 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2180 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
2181 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
2186 /* 2. Process_worklist */
2188 while (VEC_length (tree
, worklist
) > 0)
2190 stmt
= VEC_pop (tree
, worklist
);
2192 if (vect_print_dump_info (REPORT_DETAILS
))
2194 fprintf (vect_dump
, "worklist: examine stmt: ");
2195 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2198 /* Examine the USEs of STMT. For each ssa-name USE that is defined
2199 in the loop, mark the stmt that defines it (DEF_STMT) as
2200 relevant/irrelevant and live/dead according to the liveness and
2201 relevance properties of STMT.
2204 gcc_assert (TREE_CODE (stmt
) != PHI_NODE
);
2206 ann
= stmt_ann (stmt
);
2207 stmt_vinfo
= vinfo_for_stmt (stmt
);
2209 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
2210 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
2212 /* Generally, the liveness and relevance properties of STMT are
2213 propagated to the DEF_STMTs of its USEs:
2214 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2215 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2220 If USE is used only for address computations (e.g. array indexing),
2221 which does not need to be directly vectorized, then the
2222 liveness/relevance of the respective DEF_STMT is left unchanged.
2225 If STMT has been identified as defining a reduction variable, then
2228 The last use of STMT is the reduction-variable, which is defined
2229 by a loop-header-phi. We don't want to mark the phi as live or
2230 relevant (because it does not need to be vectorized, it is handled
2231 as part of the vectorization of the reduction), so in this case we
2232 skip the call to vect_mark_relevant.
2234 The rest of the uses of STMT are defined in the loop body. For
2235 the def_stmt of these uses we want to set liveness/relevance
2237 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
2238 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- vect_used_by_reduction
2239 because even though STMT is classified as live (since it defines a
2240 value that is used across loop iterations) and irrelevant (since it
2241 is not used inside the loop), it will be vectorized, and therefore
2242 the corresponding DEF_STMTs need to marked as relevant.
2243 We distinguish between two kinds of relevant stmts - those that are
2244 used by a reduction computation, and those that are (also) used by
2245 a regular computation. This allows us later on to identify stmts
2246 that are used solely by a reduction, and therefore the order of
2247 the results that they produce does not have to be kept.
2251 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
)
2253 gcc_assert (relevant
== vect_unused_in_loop
&& live_p
);
2254 relevant
= vect_used_by_reduction
;
2258 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2260 /* case 1: we are only interested in uses that need to be vectorized.
2261 Uses that are used for address computation are not considered
2264 if (!exist_non_indexing_operands_for_use_p (use
, stmt
))
2267 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &def
, &dt
))
2269 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2270 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
2271 VEC_free (tree
, heap
, worklist
);
2275 if (!def_stmt
|| IS_EMPTY_STMT (def_stmt
))
2278 if (vect_print_dump_info (REPORT_DETAILS
))
2280 fprintf (vect_dump
, "worklist: examine use %d: ", i
);
2281 print_generic_expr (vect_dump
, use
, TDF_SLIM
);
2284 bb
= bb_for_stmt (def_stmt
);
2285 if (!flow_bb_inside_loop_p (loop
, bb
))
2288 /* case 2.1: the reduction-use does not mark the defining-phi
2290 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
2291 && TREE_CODE (def_stmt
) == PHI_NODE
)
2294 vect_mark_relevant (&worklist
, def_stmt
, relevant
, live_p
);
2296 } /* while worklist */
2298 VEC_free (tree
, heap
, worklist
);
2303 /* Function vect_can_advance_ivs_p
2305 In case the number of iterations that LOOP iterates is unknown at compile
2306 time, an epilog loop will be generated, and the loop induction variables
2307 (IVs) will be "advanced" to the value they are supposed to take just before
2308 the epilog loop. Here we check that the access function of the loop IVs
2309 and the expression that represents the loop bound are simple enough.
2310 These restrictions will be relaxed in the future. */
2313 vect_can_advance_ivs_p (loop_vec_info loop_vinfo
)
2315 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2316 basic_block bb
= loop
->header
;
2319 /* Analyze phi functions of the loop header. */
2321 if (vect_print_dump_info (REPORT_DETAILS
))
2322 fprintf (vect_dump
, "vect_can_advance_ivs_p:");
2324 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
2326 tree access_fn
= NULL
;
2327 tree evolution_part
;
2329 if (vect_print_dump_info (REPORT_DETAILS
))
2331 fprintf (vect_dump
, "Analyze phi: ");
2332 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
2335 /* Skip virtual phi's. The data dependences that are associated with
2336 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2338 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
2340 if (vect_print_dump_info (REPORT_DETAILS
))
2341 fprintf (vect_dump
, "virtual phi. skip.");
2345 /* Skip reduction phis. */
2347 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
2349 if (vect_print_dump_info (REPORT_DETAILS
))
2350 fprintf (vect_dump
, "reduc phi. skip.");
2354 /* Analyze the evolution function. */
2356 access_fn
= instantiate_parameters
2357 (loop
, analyze_scalar_evolution (loop
, PHI_RESULT (phi
)));
2361 if (vect_print_dump_info (REPORT_DETAILS
))
2362 fprintf (vect_dump
, "No Access function.");
2366 if (vect_print_dump_info (REPORT_DETAILS
))
2368 fprintf (vect_dump
, "Access function of PHI: ");
2369 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
2372 evolution_part
= evolution_part_in_loop_num (access_fn
, loop
->num
);
2374 if (evolution_part
== NULL_TREE
)
2376 if (vect_print_dump_info (REPORT_DETAILS
))
2377 fprintf (vect_dump
, "No evolution.");
2381 /* FORNOW: We do not transform initial conditions of IVs
2382 which evolution functions are a polynomial of degree >= 2. */
2384 if (tree_is_chrec (evolution_part
))
2392 /* Function vect_get_loop_niters.
2394 Determine how many iterations the loop is executed.
2395 If an expression that represents the number of iterations
2396 can be constructed, place it in NUMBER_OF_ITERATIONS.
2397 Return the loop exit condition. */
2400 vect_get_loop_niters (struct loop
*loop
, tree
*number_of_iterations
)
2404 if (vect_print_dump_info (REPORT_DETAILS
))
2405 fprintf (vect_dump
, "=== get_loop_niters ===");
2407 niters
= number_of_exit_cond_executions (loop
);
2409 if (niters
!= NULL_TREE
2410 && niters
!= chrec_dont_know
)
2412 *number_of_iterations
= niters
;
2414 if (vect_print_dump_info (REPORT_DETAILS
))
2416 fprintf (vect_dump
, "==> get_loop_niters:" );
2417 print_generic_expr (vect_dump
, *number_of_iterations
, TDF_SLIM
);
2421 return get_loop_exit_condition (loop
);
2425 /* Function vect_analyze_loop_form.
2427 Verify the following restrictions (some may be relaxed in the future):
2428 - it's an inner-most loop
2429 - number of BBs = 2 (which are the loop header and the latch)
2430 - the loop has a pre-header
2431 - the loop has a single entry and exit
2432 - the loop exit condition is simple enough, and the number of iterations
2433 can be analyzed (a countable loop). */
2435 static loop_vec_info
2436 vect_analyze_loop_form (struct loop
*loop
)
2438 loop_vec_info loop_vinfo
;
2440 tree number_of_iterations
= NULL
;
2442 if (vect_print_dump_info (REPORT_DETAILS
))
2443 fprintf (vect_dump
, "=== vect_analyze_loop_form ===");
2447 if (vect_print_dump_info (REPORT_OUTER_LOOPS
))
2448 fprintf (vect_dump
, "not vectorized: nested loop.");
2452 if (!single_exit (loop
)
2453 || loop
->num_nodes
!= 2
2454 || EDGE_COUNT (loop
->header
->preds
) != 2)
2456 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2458 if (!single_exit (loop
))
2459 fprintf (vect_dump
, "not vectorized: multiple exits.");
2460 else if (loop
->num_nodes
!= 2)
2461 fprintf (vect_dump
, "not vectorized: too many BBs in loop.");
2462 else if (EDGE_COUNT (loop
->header
->preds
) != 2)
2463 fprintf (vect_dump
, "not vectorized: too many incoming edges.");
2469 /* We assume that the loop exit condition is at the end of the loop. i.e,
2470 that the loop is represented as a do-while (with a proper if-guard
2471 before the loop if needed), where the loop header contains all the
2472 executable statements, and the latch is empty. */
2473 if (!empty_block_p (loop
->latch
)
2474 || phi_nodes (loop
->latch
))
2476 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2477 fprintf (vect_dump
, "not vectorized: unexpected loop form.");
2481 /* Make sure there exists a single-predecessor exit bb: */
2482 if (!single_pred_p (single_exit (loop
)->dest
))
2484 edge e
= single_exit (loop
);
2485 if (!(e
->flags
& EDGE_ABNORMAL
))
2487 split_loop_exit_edge (e
);
2488 if (vect_print_dump_info (REPORT_DETAILS
))
2489 fprintf (vect_dump
, "split exit edge.");
2493 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2494 fprintf (vect_dump
, "not vectorized: abnormal loop exit edge.");
2499 if (empty_block_p (loop
->header
))
2501 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2502 fprintf (vect_dump
, "not vectorized: empty loop.");
2506 loop_cond
= vect_get_loop_niters (loop
, &number_of_iterations
);
2509 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2510 fprintf (vect_dump
, "not vectorized: complicated exit condition.");
2514 if (!number_of_iterations
)
2516 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2518 "not vectorized: number of iterations cannot be computed.");
2522 if (chrec_contains_undetermined (number_of_iterations
))
2524 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
2525 fprintf (vect_dump
, "Infinite number of iterations.");
2529 loop_vinfo
= new_loop_vec_info (loop
);
2530 LOOP_VINFO_NITERS (loop_vinfo
) = number_of_iterations
;
2532 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
2534 if (vect_print_dump_info (REPORT_DETAILS
))
2536 fprintf (vect_dump
, "Symbolic number of iterations is ");
2537 print_generic_expr (vect_dump
, number_of_iterations
, TDF_DETAILS
);
2541 if (LOOP_VINFO_INT_NITERS (loop_vinfo
) == 0)
2543 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2544 fprintf (vect_dump
, "not vectorized: number of iterations = 0.");
2548 LOOP_VINFO_EXIT_COND (loop_vinfo
) = loop_cond
;
2554 /* Function vect_analyze_loop.
2556 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2557 for it. The different analyses will record information in the
2558 loop_vec_info struct. */
2560 vect_analyze_loop (struct loop
*loop
)
2563 loop_vec_info loop_vinfo
;
2565 if (vect_print_dump_info (REPORT_DETAILS
))
2566 fprintf (vect_dump
, "===== analyze_loop_nest =====");
2568 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2570 loop_vinfo
= vect_analyze_loop_form (loop
);
2573 if (vect_print_dump_info (REPORT_DETAILS
))
2574 fprintf (vect_dump
, "bad loop form.");
2578 /* Find all data references in the loop (which correspond to vdefs/vuses)
2579 and analyze their evolution in the loop.
2581 FORNOW: Handle only simple, array references, which
2582 alignment can be forced, and aligned pointer-references. */
2584 ok
= vect_analyze_data_refs (loop_vinfo
);
2587 if (vect_print_dump_info (REPORT_DETAILS
))
2588 fprintf (vect_dump
, "bad data references.");
2589 destroy_loop_vec_info (loop_vinfo
);
2593 /* Classify all cross-iteration scalar data-flow cycles.
2594 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2596 vect_analyze_scalar_cycles (loop_vinfo
);
2598 vect_pattern_recog (loop_vinfo
);
2600 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2602 ok
= vect_mark_stmts_to_be_vectorized (loop_vinfo
);
2605 if (vect_print_dump_info (REPORT_DETAILS
))
2606 fprintf (vect_dump
, "unexpected pattern.");
2607 destroy_loop_vec_info (loop_vinfo
);
2611 /* Analyze the alignment of the data-refs in the loop.
2612 Fail if a data reference is found that cannot be vectorized. */
2614 ok
= vect_analyze_data_refs_alignment (loop_vinfo
);
2617 if (vect_print_dump_info (REPORT_DETAILS
))
2618 fprintf (vect_dump
, "bad data alignment.");
2619 destroy_loop_vec_info (loop_vinfo
);
2623 ok
= vect_determine_vectorization_factor (loop_vinfo
);
2626 if (vect_print_dump_info (REPORT_DETAILS
))
2627 fprintf (vect_dump
, "can't determine vectorization factor.");
2628 destroy_loop_vec_info (loop_vinfo
);
2632 /* Analyze data dependences between the data-refs in the loop.
2633 FORNOW: fail at the first data dependence that we encounter. */
2635 ok
= vect_analyze_data_ref_dependences (loop_vinfo
);
2638 if (vect_print_dump_info (REPORT_DETAILS
))
2639 fprintf (vect_dump
, "bad data dependence.");
2640 destroy_loop_vec_info (loop_vinfo
);
2644 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2645 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2647 ok
= vect_analyze_data_ref_accesses (loop_vinfo
);
2650 if (vect_print_dump_info (REPORT_DETAILS
))
2651 fprintf (vect_dump
, "bad data access.");
2652 destroy_loop_vec_info (loop_vinfo
);
2656 /* This pass will decide on using loop versioning and/or loop peeling in
2657 order to enhance the alignment of data references in the loop. */
2659 ok
= vect_enhance_data_refs_alignment (loop_vinfo
);
2662 if (vect_print_dump_info (REPORT_DETAILS
))
2663 fprintf (vect_dump
, "bad data alignment.");
2664 destroy_loop_vec_info (loop_vinfo
);
2668 /* Scan all the operations in the loop and make sure they are
2671 ok
= vect_analyze_operations (loop_vinfo
);
2674 if (vect_print_dump_info (REPORT_DETAILS
))
2675 fprintf (vect_dump
, "bad operation or unsupported loop bound.");
2676 destroy_loop_vec_info (loop_vinfo
);
2680 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
) = 1;