1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
42 /* Main analysis functions. */
43 static loop_vec_info
vect_analyze_loop_form (struct loop
*);
44 static bool vect_analyze_data_refs (loop_vec_info
);
45 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info
);
46 static bool vect_analyze_scalar_cycles (loop_vec_info
);
47 static bool vect_analyze_data_ref_accesses (loop_vec_info
);
48 static bool vect_analyze_data_ref_dependences (loop_vec_info
);
49 static bool vect_analyze_data_refs_alignment (loop_vec_info
);
50 static bool vect_compute_data_refs_alignment (loop_vec_info
);
51 static void vect_enhance_data_refs_alignment (loop_vec_info
);
52 static bool vect_analyze_operations (loop_vec_info
);
53 static bool vect_determine_vectorization_factor (loop_vec_info
);
55 /* Utility functions for the analyses. */
56 static bool exist_non_indexing_operands_for_use_p (tree
, tree
);
57 static void vect_mark_relevant (VEC(tree
,heap
) **, tree
);
58 static bool vect_stmt_relevant_p (tree
, loop_vec_info
);
59 static tree
vect_get_loop_niters (struct loop
*, tree
*);
60 static bool vect_analyze_data_ref_dependence
61 (struct data_reference
*, struct data_reference
*, loop_vec_info
);
62 static bool vect_compute_data_ref_alignment (struct data_reference
*);
63 static bool vect_analyze_data_ref_access (struct data_reference
*);
64 static struct data_reference
* vect_analyze_pointer_ref_access
65 (tree
, tree
, bool, tree
, tree
*, tree
*);
66 static bool vect_can_advance_ivs_p (loop_vec_info
);
67 static tree
vect_get_ptr_offset (tree
, tree
, tree
*);
68 static bool vect_analyze_offset_expr (tree
, struct loop
*, tree
, tree
*,
70 static bool vect_base_addr_differ_p (struct data_reference
*,
71 struct data_reference
*drb
, bool *);
72 static tree
vect_object_analysis (tree
, tree
, bool, tree
,
73 struct data_reference
**, tree
*, tree
*,
74 tree
*, bool *, tree
*, struct ptr_info_def
**,
76 static tree
vect_address_analysis (tree
, tree
, bool, tree
,
77 struct data_reference
*, tree
*, tree
*,
81 /* Function vect_get_ptr_offset
83 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
86 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED
,
87 tree vectype ATTRIBUTE_UNUSED
,
88 tree
*offset ATTRIBUTE_UNUSED
)
90 /* TODO: Use alignment information. */
95 /* Function vect_analyze_offset_expr
97 Given an offset expression EXPR received from get_inner_reference, analyze
98 it and create an expression for INITIAL_OFFSET by substituting the variables
99 of EXPR with initial_condition of the corresponding access_fn in the loop.
102 for (j = 3; j < N; j++)
105 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
106 substituted, since its access_fn in the inner loop is i. 'j' will be
107 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
110 Compute MISALIGN (the misalignment of the data reference initial access from
111 its base) if possible. Misalignment can be calculated only if all the
112 variables can be substituted with constants, or if a variable is multiplied
113 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
114 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
115 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
116 VECTYPE_ALIGNMENT computation in the caller of this function).
118 STEP is an evolution of the data reference in this loop in bytes.
119 In the above example, STEP is C_j.
121 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
122 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
123 are NULL_TREEs. Otherwise, return TRUE.
128 vect_analyze_offset_expr (tree expr
,
130 tree vectype_alignment
,
131 tree
*initial_offset
,
137 tree left_offset
= ssize_int (0);
138 tree right_offset
= ssize_int (0);
139 tree left_misalign
= ssize_int (0);
140 tree right_misalign
= ssize_int (0);
141 tree left_step
= ssize_int (0);
142 tree right_step
= ssize_int (0);
144 tree init
, evolution
;
147 *misalign
= NULL_TREE
;
148 *initial_offset
= NULL_TREE
;
150 /* Strip conversions that don't narrow the mode. */
151 expr
= vect_strip_conversion (expr
);
157 if (TREE_CODE (expr
) == INTEGER_CST
)
159 *initial_offset
= fold_convert (ssizetype
, expr
);
160 *misalign
= fold_convert (ssizetype
, expr
);
161 *step
= ssize_int (0);
165 /* 2. Variable. Try to substitute with initial_condition of the corresponding
166 access_fn in the current loop. */
167 if (SSA_VAR_P (expr
))
169 tree access_fn
= analyze_scalar_evolution (loop
, expr
);
171 if (access_fn
== chrec_dont_know
)
175 init
= initial_condition_in_loop_num (access_fn
, loop
->num
);
176 if (init
== expr
&& !expr_invariant_in_loop_p (loop
, init
))
177 /* Not enough information: may be not loop invariant.
178 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
179 initial_condition is D, but it depends on i - loop's induction
183 evolution
= evolution_part_in_loop_num (access_fn
, loop
->num
);
184 if (evolution
&& TREE_CODE (evolution
) != INTEGER_CST
)
185 /* Evolution is not constant. */
188 if (TREE_CODE (init
) == INTEGER_CST
)
189 *misalign
= fold_convert (ssizetype
, init
);
191 /* Not constant, misalignment cannot be calculated. */
192 *misalign
= NULL_TREE
;
194 *initial_offset
= fold_convert (ssizetype
, init
);
196 *step
= evolution
? fold_convert (ssizetype
, evolution
) : ssize_int (0);
200 /* Recursive computation. */
201 if (!BINARY_CLASS_P (expr
))
203 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
204 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
206 fprintf (vect_dump
, "Not binary expression ");
207 print_generic_expr (vect_dump
, expr
, TDF_SLIM
);
211 oprnd0
= TREE_OPERAND (expr
, 0);
212 oprnd1
= TREE_OPERAND (expr
, 1);
214 if (!vect_analyze_offset_expr (oprnd0
, loop
, vectype_alignment
, &left_offset
,
215 &left_misalign
, &left_step
)
216 || !vect_analyze_offset_expr (oprnd1
, loop
, vectype_alignment
,
217 &right_offset
, &right_misalign
, &right_step
))
220 /* The type of the operation: plus, minus or mult. */
221 code
= TREE_CODE (expr
);
225 if (TREE_CODE (right_offset
) != INTEGER_CST
)
226 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
228 FORNOW: We don't support such cases. */
231 /* Strip conversions that don't narrow the mode. */
232 left_offset
= vect_strip_conversion (left_offset
);
235 /* Misalignment computation. */
236 if (SSA_VAR_P (left_offset
))
238 /* If the left side contains variables that can't be substituted with
239 constants, we check if the right side is a multiple of ALIGNMENT.
241 if (integer_zerop (size_binop (TRUNC_MOD_EXPR
, right_offset
,
242 fold_convert (ssizetype
, vectype_alignment
))))
243 *misalign
= ssize_int (0);
245 /* If the remainder is not zero or the right side isn't constant,
246 we can't compute misalignment. */
247 *misalign
= NULL_TREE
;
251 /* The left operand was successfully substituted with constant. */
253 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
255 *misalign
= size_binop (code
, left_misalign
, right_misalign
);
257 *misalign
= NULL_TREE
;
260 /* Step calculation. */
261 /* Multiply the step by the right operand. */
262 *step
= size_binop (MULT_EXPR
, left_step
, right_offset
);
267 /* Combine the recursive calculations for step and misalignment. */
268 *step
= size_binop (code
, left_step
, right_step
);
270 if (left_misalign
&& right_misalign
)
271 *misalign
= size_binop (code
, left_misalign
, right_misalign
);
273 *misalign
= NULL_TREE
;
281 /* Compute offset. */
282 *initial_offset
= fold_convert (ssizetype
,
283 fold (build2 (code
, TREE_TYPE (left_offset
),
290 /* Function vect_determine_vectorization_factor
292 Determine the vectorization factor (VF). VF is the number of data elements
293 that are operated upon in parallel in a single iteration of the vectorized
294 loop. For example, when vectorizing a loop that operates on 4byte elements,
295 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
296 elements can fit in a single vector register.
298 We currently support vectorization of loops in which all types operated upon
299 are of the same size. Therefore this function currently sets VF according to
300 the size of the types operated upon, and fails if there are multiple sizes
303 VF is also the factor by which the loop iterations are strip-mined, e.g.:
310 for (i=0; i<N; i+=VF){
311 a[i:VF] = b[i:VF] + c[i:VF];
316 vect_determine_vectorization_factor (loop_vec_info loop_vinfo
)
318 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
319 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
320 int nbbs
= loop
->num_nodes
;
321 block_stmt_iterator si
;
322 unsigned int vectorization_factor
= 0;
326 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
327 fprintf (vect_dump
, "=== vect_determine_vectorization_factor ===");
329 for (i
= 0; i
< nbbs
; i
++)
331 basic_block bb
= bbs
[i
];
333 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
335 tree stmt
= bsi_stmt (si
);
337 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
340 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
342 fprintf (vect_dump
, "==> examining statement: ");
343 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
346 gcc_assert (stmt_info
);
347 /* skip stmts which do not need to be vectorized. */
348 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
351 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))))
353 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
354 LOOP_LOC (loop_vinfo
)))
356 fprintf (vect_dump
, "not vectorized: vector stmt in loop:");
357 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
362 if (STMT_VINFO_DATA_REF (stmt_info
))
363 scalar_type
= TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info
)));
364 else if (TREE_CODE (stmt
) == MODIFY_EXPR
)
365 scalar_type
= TREE_TYPE (TREE_OPERAND (stmt
, 0));
367 scalar_type
= TREE_TYPE (stmt
);
369 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
371 fprintf (vect_dump
, "get vectype for scalar type: ");
372 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
375 vectype
= get_vectype_for_scalar_type (scalar_type
);
378 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
379 LOOP_LOC (loop_vinfo
)))
381 fprintf (vect_dump
, "not vectorized: unsupported data-type ");
382 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
386 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
388 fprintf (vect_dump
, "vectype: ");
389 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
391 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
393 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
394 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
395 fprintf (vect_dump
, "nunits = %d", nunits
);
397 if (vectorization_factor
)
399 /* FORNOW: don't allow mixed units.
400 This restriction will be relaxed in the future. */
401 if (nunits
!= vectorization_factor
)
403 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
404 LOOP_LOC (loop_vinfo
)))
405 fprintf (vect_dump
, "not vectorized: mixed data-types");
410 vectorization_factor
= nunits
;
412 #ifdef ENABLE_CHECKING
413 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type
))
414 * vectorization_factor
== UNITS_PER_SIMD_WORD
);
419 /* TODO: Analyze cost. Decide if worth while to vectorize. */
421 if (vectorization_factor
<= 1)
423 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
424 LOOP_LOC (loop_vinfo
)))
425 fprintf (vect_dump
, "not vectorized: unsupported data-type");
428 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
434 /* Function vect_analyze_operations.
436 Scan the loop stmts and make sure they are all vectorizable. */
439 vect_analyze_operations (loop_vec_info loop_vinfo
)
441 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
442 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
443 int nbbs
= loop
->num_nodes
;
444 block_stmt_iterator si
;
445 unsigned int vectorization_factor
= 0;
449 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
450 fprintf (vect_dump
, "=== vect_analyze_operations ===");
452 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
453 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
455 for (i
= 0; i
< nbbs
; i
++)
457 basic_block bb
= bbs
[i
];
459 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
461 tree stmt
= bsi_stmt (si
);
462 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
464 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
466 fprintf (vect_dump
, "==> examining statement: ");
467 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
470 gcc_assert (stmt_info
);
472 /* skip stmts which do not need to be vectorized.
473 this is expected to include:
474 - the COND_EXPR which is the loop exit condition
475 - any LABEL_EXPRs in the loop
476 - computations that are used only for array indexing or loop
479 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
481 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
482 fprintf (vect_dump
, "irrelevant.");
486 #ifdef ENABLE_CHECKING
487 if (STMT_VINFO_RELEVANT_P (stmt_info
))
489 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))));
490 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
494 ok
= (vectorizable_operation (stmt
, NULL
, NULL
)
495 || vectorizable_assignment (stmt
, NULL
, NULL
)
496 || vectorizable_load (stmt
, NULL
, NULL
)
497 || vectorizable_store (stmt
, NULL
, NULL
)
498 || vectorizable_condition (stmt
, NULL
, NULL
));
502 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
503 LOOP_LOC (loop_vinfo
)))
505 fprintf (vect_dump
, "not vectorized: stmt not supported: ");
506 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
513 /* TODO: Analyze cost. Decide if worth while to vectorize. */
515 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
516 && vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
518 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC
,
519 vectorization_factor
, LOOP_VINFO_INT_NITERS (loop_vinfo
));
521 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
522 && LOOP_VINFO_INT_NITERS (loop_vinfo
) < vectorization_factor
)
524 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
525 LOOP_LOC (loop_vinfo
)))
526 fprintf (vect_dump
, "not vectorized: iteration count too small.");
530 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
531 || LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0)
533 if (vect_print_dump_info (REPORT_DETAILS
, LOOP_LOC (loop_vinfo
)))
534 fprintf (vect_dump
, "epilog loop required.");
535 if (!vect_can_advance_ivs_p (loop_vinfo
))
537 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
538 LOOP_LOC (loop_vinfo
)))
540 "not vectorized: can't create epilog loop 1.");
543 if (!slpeel_can_duplicate_loop_p (loop
, loop
->single_exit
))
545 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
546 LOOP_LOC (loop_vinfo
)))
548 "not vectorized: can't create epilog loop 2.");
557 /* Function exist_non_indexing_operands_for_use_p
559 USE is one of the uses attached to STMT. Check if USE is
560 used in STMT for anything other than indexing an array. */
563 exist_non_indexing_operands_for_use_p (tree use
, tree stmt
)
566 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
568 /* USE corresponds to some operand in STMT. If there is no data
569 reference in STMT, then any operand that corresponds to USE
570 is not indexing an array. */
571 if (!STMT_VINFO_DATA_REF (stmt_info
))
574 /* STMT has a data_ref. FORNOW this means that its of one of
578 (This should have been verified in analyze_data_refs).
580 'var' in the second case corresponds to a def, not a use,
581 so USE cannot correspond to any operands that are not used
584 Therefore, all we need to check is if STMT falls into the
585 first case, and whether var corresponds to USE. */
587 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) == SSA_NAME
)
590 operand
= TREE_OPERAND (stmt
, 1);
592 if (TREE_CODE (operand
) != SSA_NAME
)
602 /* Function vect_analyze_scalar_cycles.
604 Examine the cross iteration def-use cycles of scalar variables, by
605 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
606 cycles that they represent do not impede vectorization.
608 FORNOW: Reduction as in the following loop, is not supported yet:
612 The cross-iteration cycle corresponding to variable 'sum' will be
613 considered too complicated and will impede vectorization.
615 FORNOW: Induction as in the following loop, is not supported yet:
620 However, the following loop *is* vectorizable:
625 In both loops there exists a def-use cycle for the variable i:
626 loop: i_2 = PHI (i_0, i_1)
631 The evolution of the above cycle is considered simple enough,
632 however, we also check that the cycle does not need to be
633 vectorized, i.e - we check that the variable that this cycle
634 defines is only used for array indexing or in stmts that do not
635 need to be vectorized. This is not the case in loop2, but it
636 *is* the case in loop3. */
639 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo
)
642 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
643 basic_block bb
= loop
->header
;
646 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
647 fprintf (vect_dump
, "=== vect_analyze_scalar_cycles ===");
649 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
651 tree access_fn
= NULL
;
653 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
655 fprintf (vect_dump
, "Analyze phi: ");
656 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
659 /* Skip virtual phi's. The data dependences that are associated with
660 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
662 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
664 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
665 fprintf (vect_dump
, "virtual phi. skip.");
669 /* Analyze the evolution function. */
671 /* FORNOW: The only scalar cross-iteration cycles that we allow are
672 those of loop induction variables; This property is verified here.
674 Furthermore, if that induction variable is used in an operation
675 that needs to be vectorized (i.e, is not solely used to index
676 arrays and check the exit condition) - we do not support its
677 vectorization yet. This property is verified in vect_is_simple_use,
678 during vect_analyze_operations. */
680 access_fn
= /* instantiate_parameters
682 analyze_scalar_evolution (loop
, PHI_RESULT (phi
));
686 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
687 LOOP_LOC (loop_vinfo
)))
688 fprintf (vect_dump
, "not vectorized: unsupported scalar cycle.");
692 if (vect_print_dump_info (REPORT_DETAILS
,
693 LOOP_LOC (loop_vinfo
)))
695 fprintf (vect_dump
, "Access function of PHI: ");
696 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
699 if (!vect_is_simple_iv_evolution (loop
->num
, access_fn
, &dummy
, &dummy
))
701 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
702 LOOP_LOC (loop_vinfo
)))
703 fprintf (vect_dump
, "not vectorized: unsupported scalar cycle.");
712 /* Function vect_base_addr_differ_p.
714 This is the simplest data dependence test: determines whether the
715 data references A and B access the same array/region. Returns
716 false when the property is not computable at compile time.
717 Otherwise return true, and DIFFER_P will record the result. This
718 utility will not be necessary when alias_sets_conflict_p will be
719 less conservative. */
722 vect_base_addr_differ_p (struct data_reference
*dra
,
723 struct data_reference
*drb
,
726 tree stmt_a
= DR_STMT (dra
);
727 stmt_vec_info stmt_info_a
= vinfo_for_stmt (stmt_a
);
728 tree stmt_b
= DR_STMT (drb
);
729 stmt_vec_info stmt_info_b
= vinfo_for_stmt (stmt_b
);
730 tree addr_a
= STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_a
);
731 tree addr_b
= STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_b
);
732 tree type_a
= TREE_TYPE (addr_a
);
733 tree type_b
= TREE_TYPE (addr_b
);
734 HOST_WIDE_INT alias_set_a
, alias_set_b
;
736 gcc_assert (POINTER_TYPE_P (type_a
) && POINTER_TYPE_P (type_b
));
738 /* Both references are ADDR_EXPR, i.e., we have the objects. */
739 if (TREE_CODE (addr_a
) == ADDR_EXPR
&& TREE_CODE (addr_b
) == ADDR_EXPR
)
740 return array_base_name_differ_p (dra
, drb
, differ_p
);
742 alias_set_a
= (TREE_CODE (addr_a
) == ADDR_EXPR
) ?
743 get_alias_set (TREE_OPERAND (addr_a
, 0)) : get_alias_set (addr_a
);
744 alias_set_b
= (TREE_CODE (addr_b
) == ADDR_EXPR
) ?
745 get_alias_set (TREE_OPERAND (addr_b
, 0)) : get_alias_set (addr_b
);
747 if (!alias_sets_conflict_p (alias_set_a
, alias_set_b
))
753 /* An instruction writing through a restricted pointer is "independent" of any
754 instruction reading or writing through a different pointer, in the same
756 else if ((TYPE_RESTRICT (type_a
) && !DR_IS_READ (dra
))
757 || (TYPE_RESTRICT (type_b
) && !DR_IS_READ (drb
)))
766 /* Function vect_analyze_data_ref_dependence.
768 Return TRUE if there (might) exist a dependence between a memory-reference
769 DRA and a memory-reference DRB. */
772 vect_analyze_data_ref_dependence (struct data_reference
*dra
,
773 struct data_reference
*drb
,
774 loop_vec_info loop_vinfo
)
777 struct data_dependence_relation
*ddr
;
778 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
779 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
781 unsigned int loop_depth
= 0;
782 struct loop
*loop_nest
= loop
;
785 if (!vect_base_addr_differ_p (dra
, drb
, &differ_p
))
787 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
788 LOOP_LOC (loop_vinfo
)))
791 "not vectorized: can't determine dependence between: ");
792 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
793 fprintf (vect_dump
, " and ");
794 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
802 ddr
= initialize_data_dependence_relation (dra
, drb
);
803 compute_affine_dependence (ddr
);
805 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
808 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
810 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
811 LOOP_LOC (loop_vinfo
)))
814 "not vectorized: can't determine dependence between ");
815 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
816 fprintf (vect_dump
, " and ");
817 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
822 /* Find loop depth. */
825 if (loop_nest
->outer
&& loop_nest
->outer
->outer
)
827 loop_nest
= loop_nest
->outer
;
834 /* Compute distance vector. */
835 compute_subscript_distance (ddr
);
836 build_classic_dist_vector (ddr
, vect_loops_num
, loop_nest
->depth
);
838 if (!DDR_DIST_VECT (ddr
))
840 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
841 LOOP_LOC (loop_vinfo
)))
843 fprintf (vect_dump
, "not vectorized: bad dist vector for ");
844 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
845 fprintf (vect_dump
, " and ");
846 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
851 dist
= DDR_DIST_VECT (ddr
)[loop_depth
];
853 /* Same loop iteration. */
856 if (vect_print_dump_info (REPORT_DETAILS
, LOOP_LOC (loop_vinfo
)))
857 fprintf (vect_dump
, "dependence distance 0.");
861 if (dist
>= vectorization_factor
)
862 /* Dependence distance does not create dependence, as far as vectorization
863 is concerned, in this case. */
866 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
867 LOOP_LOC (loop_vinfo
)))
870 "not vectorized: possible dependence between data-refs ");
871 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
872 fprintf (vect_dump
, " and ");
873 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
880 /* Function vect_analyze_data_ref_dependences.
882 Examine all the data references in the loop, and make sure there do not
883 exist any data dependences between them. */
886 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
)
889 varray_type loop_write_refs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
890 varray_type loop_read_refs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
892 /* Examine store-store (output) dependences. */
894 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
895 fprintf (vect_dump
, "=== vect_analyze_dependences ===");
897 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
898 fprintf (vect_dump
, "compare all store-store pairs.");
900 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_refs
); i
++)
902 for (j
= i
+ 1; j
< VARRAY_ACTIVE_SIZE (loop_write_refs
); j
++)
904 struct data_reference
*dra
=
905 VARRAY_GENERIC_PTR (loop_write_refs
, i
);
906 struct data_reference
*drb
=
907 VARRAY_GENERIC_PTR (loop_write_refs
, j
);
908 if (vect_analyze_data_ref_dependence (dra
, drb
, loop_vinfo
))
913 /* Examine load-store (true/anti) dependences. */
915 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
916 fprintf (vect_dump
, "compare all load-store pairs.");
918 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_refs
); i
++)
920 for (j
= 0; j
< VARRAY_ACTIVE_SIZE (loop_write_refs
); j
++)
922 struct data_reference
*dra
= VARRAY_GENERIC_PTR (loop_read_refs
, i
);
923 struct data_reference
*drb
=
924 VARRAY_GENERIC_PTR (loop_write_refs
, j
);
925 if (vect_analyze_data_ref_dependence (dra
, drb
, loop_vinfo
))
934 /* Function vect_compute_data_ref_alignment
936 Compute the misalignment of the data reference DR.
939 1. If during the misalignment computation it is found that the data reference
940 cannot be vectorized then false is returned.
941 2. DR_MISALIGNMENT (DR) is defined.
943 FOR NOW: No analysis is actually performed. Misalignment is calculated
944 only for trivial cases. TODO. */
947 vect_compute_data_ref_alignment (struct data_reference
*dr
)
949 tree stmt
= DR_STMT (dr
);
950 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
951 tree ref
= DR_REF (dr
);
953 tree base
, alignment
;
957 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
958 fprintf (vect_dump
, "vect_compute_data_ref_alignment:");
960 /* Initialize misalignment to unknown. */
961 DR_MISALIGNMENT (dr
) = -1;
963 misalign
= STMT_VINFO_VECT_MISALIGNMENT (stmt_info
);
964 base_aligned_p
= STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info
);
965 base
= build_fold_indirect_ref (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info
));
966 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
970 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
972 fprintf (vect_dump
, "Unknown alignment for access: ");
973 print_generic_expr (vect_dump
, base
, TDF_SLIM
);
980 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
)))
982 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
984 fprintf (vect_dump
, "can't force alignment of ref: ");
985 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
990 /* Force the alignment of the decl.
991 NOTE: This is the only change to the code we make during
992 the analysis phase, before deciding to vectorize the loop. */
993 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
994 fprintf (vect_dump
, "force alignment");
995 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
996 DECL_USER_ALIGN (base
) = 1;
999 /* At this point we assume that the base is aligned. */
1000 gcc_assert (base_aligned_p
1001 || (TREE_CODE (base
) == VAR_DECL
1002 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
1004 /* Alignment required, in bytes: */
1005 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
1007 /* Modulo alignment. */
1008 misalign
= size_binop (TRUNC_MOD_EXPR
, misalign
, alignment
);
1009 if (tree_int_cst_sgn (misalign
) < 0)
1011 /* Negative misalignment value. */
1012 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1013 fprintf (vect_dump
, "unexpected misalign value");
1017 DR_MISALIGNMENT (dr
) = tree_low_cst (misalign
, 1);
1019 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1020 fprintf (vect_dump
, "misalign = %d bytes", DR_MISALIGNMENT (dr
));
1026 /* Function vect_compute_data_refs_alignment
1028 Compute the misalignment of data references in the loop.
1029 This pass may take place at function granularity instead of at loop
1032 FOR NOW: No analysis is actually performed. Misalignment is calculated
1033 only for trivial cases. TODO. */
1036 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
)
1038 varray_type loop_write_datarefs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
1039 varray_type loop_read_datarefs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
1042 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
1044 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
1045 if (!vect_compute_data_ref_alignment (dr
))
1049 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_datarefs
); i
++)
1051 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_read_datarefs
, i
);
1052 if (!vect_compute_data_ref_alignment (dr
))
1060 /* Function vect_enhance_data_refs_alignment
1062 This pass will use loop versioning and loop peeling in order to enhance
1063 the alignment of data references in the loop.
1065 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1066 original loop is to be vectorized; Any other loops that are created by
1067 the transformations performed in this pass - are not supposed to be
1068 vectorized. This restriction will be relaxed. */
1071 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1073 varray_type loop_read_datarefs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
1074 varray_type loop_write_datarefs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
1075 varray_type datarefs
;
1076 struct data_reference
*dr0
= NULL
;
1080 This pass will require a cost model to guide it whether to apply peeling
1081 or versioning or a combination of the two. For example, the scheme that
1082 intel uses when given a loop with several memory accesses, is as follows:
1083 choose one memory access ('p') which alignment you want to force by doing
1084 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1085 other accesses are not necessarily aligned, or (2) use loop versioning to
1086 generate one loop in which all accesses are aligned, and another loop in
1087 which only 'p' is necessarily aligned.
1089 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1090 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1091 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1093 Devising a cost model is the most critical aspect of this work. It will
1094 guide us on which access to peel for, whether to use loop versioning, how
1095 many versions to create, etc. The cost model will probably consist of
1096 generic considerations as well as target specific considerations (on
1097 powerpc for example, misaligned stores are more painful than misaligned
1100 Here is the general steps involved in alignment enhancements:
1102 -- original loop, before alignment analysis:
1103 for (i=0; i<N; i++){
1104 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1105 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1108 -- After vect_compute_data_refs_alignment:
1109 for (i=0; i<N; i++){
1110 x = q[i]; # DR_MISALIGNMENT(q) = 3
1111 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1114 -- Possibility 1: we do loop versioning:
1116 for (i=0; i<N; i++){ # loop 1A
1117 x = q[i]; # DR_MISALIGNMENT(q) = 3
1118 p[i] = y; # DR_MISALIGNMENT(p) = 0
1122 for (i=0; i<N; i++){ # loop 1B
1123 x = q[i]; # DR_MISALIGNMENT(q) = 3
1124 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1128 -- Possibility 2: we do loop peeling:
1129 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1133 for (i = 3; i < N; i++){ # loop 2A
1134 x = q[i]; # DR_MISALIGNMENT(q) = 0
1135 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1138 -- Possibility 3: combination of loop peeling and versioning:
1139 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1144 for (i = 3; i<N; i++){ # loop 3A
1145 x = q[i]; # DR_MISALIGNMENT(q) = 0
1146 p[i] = y; # DR_MISALIGNMENT(p) = 0
1150 for (i = 3; i<N; i++){ # loop 3B
1151 x = q[i]; # DR_MISALIGNMENT(q) = 0
1152 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1156 These loops are later passed to loop_transform to be vectorized. The
1157 vectorizer will use the alignment information to guide the transformation
1158 (whether to generate regular loads/stores, or with special handling for
1162 /* (1) Peeling to force alignment. */
1164 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1166 + How many accesses will become aligned due to the peeling
1167 - How many accesses will become unaligned due to the peeling,
1168 and the cost of misaligned accesses.
1169 - The cost of peeling (the extra runtime checks, the increase
1172 The scheme we use FORNOW: peel to force the alignment of the first
1173 misaligned store in the loop.
1174 Rationale: misaligned stores are not yet supported.
1176 TODO: Use a better cost model. */
1178 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
1180 dr0
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
1181 if (!aligned_access_p (dr0
))
1183 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1184 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1189 /* (1.2) Update the alignment info according to the peeling factor.
1190 If the misalignment of the DR we peel for is M, then the
1191 peeling factor is VF - M, and the misalignment of each access DR_i
1192 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
1193 If the misalignment of the DR we peel for is unknown, then the
1194 misalignment of each access DR_i in the loop is also unknown.
1196 TODO: - consider accesses that are known to have the same
1197 alignment, even if that alignment is unknown. */
1199 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
1204 if (known_alignment_for_access_p (dr0
))
1206 /* Since it's known at compile time, compute the number of iterations
1207 in the peeled loop (the peeling factor) for use in updating
1208 DR_MISALIGNMENT values. The peeling factor is the vectorization
1209 factor minus the misalignment as an element count. */
1210 mis
= DR_MISALIGNMENT (dr0
);
1211 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1212 npeel
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) - mis
;
1215 datarefs
= loop_write_datarefs
;
1216 for (j
= 0; j
< 2; j
++)
1218 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (datarefs
); i
++)
1220 struct data_reference
*dr
= VARRAY_GENERIC_PTR (datarefs
, i
);
1224 if (known_alignment_for_access_p (dr
)
1225 && DR_MISALIGNMENT (dr
) == DR_MISALIGNMENT (dr0
))
1226 DR_MISALIGNMENT (dr
) = 0;
1227 else if (known_alignment_for_access_p (dr
)
1228 && known_alignment_for_access_p (dr0
))
1230 int drsize
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
1232 DR_MISALIGNMENT (dr
) += npeel
* drsize
;
1233 DR_MISALIGNMENT (dr
) %= UNITS_PER_SIMD_WORD
;
1236 DR_MISALIGNMENT (dr
) = -1;
1238 datarefs
= loop_read_datarefs
;
1241 DR_MISALIGNMENT (dr0
) = 0;
1246 /* Function vect_analyze_data_refs_alignment
1248 Analyze the alignment of the data-references in the loop.
1249 FOR NOW: Until support for misliagned accesses is in place, only if all
1250 accesses are aligned can the loop be vectorized. This restriction will be
1254 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
)
1256 varray_type loop_read_datarefs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
1257 varray_type loop_write_datarefs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
1258 enum dr_alignment_support supportable_dr_alignment
;
1261 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1262 fprintf (vect_dump
, "=== vect_analyze_data_refs_alignment ===");
1265 /* This pass may take place at function granularity instead of at loop
1268 if (!vect_compute_data_refs_alignment (loop_vinfo
))
1270 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
1271 LOOP_LOC (loop_vinfo
)))
1273 "not vectorized: can't calculate alignment for data ref.");
1278 /* This pass will decide on using loop versioning and/or loop peeling in
1279 order to enhance the alignment of data references in the loop. */
1281 vect_enhance_data_refs_alignment (loop_vinfo
);
1284 /* Finally, check that all the data references in the loop can be
1285 handled with respect to their alignment. */
1287 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_datarefs
); i
++)
1289 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_read_datarefs
, i
);
1290 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1291 if (!supportable_dr_alignment
)
1293 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
1294 LOOP_LOC (loop_vinfo
)))
1295 fprintf (vect_dump
, "not vectorized: unsupported unaligned load.");
1298 if (supportable_dr_alignment
!= dr_aligned
1299 && (vect_print_dump_info (REPORT_ALIGNMENT
, LOOP_LOC (loop_vinfo
))))
1300 fprintf (vect_dump
, "Vectorizing an unaligned access.");
1302 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
1304 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
1305 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1306 if (!supportable_dr_alignment
)
1308 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
1309 LOOP_LOC (loop_vinfo
)))
1310 fprintf (vect_dump
, "not vectorized: unsupported unaligned store.");
1313 if (supportable_dr_alignment
!= dr_aligned
1314 && (vect_print_dump_info (REPORT_ALIGNMENT
, LOOP_LOC (loop_vinfo
))))
1315 fprintf (vect_dump
, "Vectorizing an unaligned access.");
1317 if (LOOP_VINFO_UNALIGNED_DR (loop_vinfo
)
1318 && vect_print_dump_info (REPORT_ALIGNMENT
, LOOP_LOC (loop_vinfo
)))
1319 fprintf (vect_dump
, "Alignment of access forced using peeling.");
1325 /* Function vect_analyze_data_ref_access.
1327 Analyze the access pattern of the data-reference DR. For now, a data access
1328 has to consecutive to be considered vectorizable. */
1331 vect_analyze_data_ref_access (struct data_reference
*dr
)
1333 tree stmt
= DR_STMT (dr
);
1334 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1335 tree step
= STMT_VINFO_VECT_STEP (stmt_info
);
1336 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
1338 if (!step
|| tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
)))
1340 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1341 fprintf (vect_dump
, "not consecutive access");
1348 /* Function vect_analyze_data_ref_accesses.
1350 Analyze the access pattern of all the data references in the loop.
1352 FORNOW: the only access pattern that is considered vectorizable is a
1353 simple step 1 (consecutive) access.
1355 FORNOW: handle only arrays and pointer accesses. */
1358 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
)
1361 varray_type loop_write_datarefs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
1362 varray_type loop_read_datarefs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
1364 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1365 fprintf (vect_dump
, "=== vect_analyze_data_ref_accesses ===");
1367 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
1369 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
1370 bool ok
= vect_analyze_data_ref_access (dr
);
1373 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
1374 LOOP_LOC (loop_vinfo
)))
1375 fprintf (vect_dump
, "not vectorized: complicated access pattern.");
1380 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_datarefs
); i
++)
1382 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_read_datarefs
, i
);
1383 bool ok
= vect_analyze_data_ref_access (dr
);
1386 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
1387 LOOP_LOC (loop_vinfo
)))
1388 fprintf (vect_dump
, "not vectorized: complicated access pattern.");
1397 /* Function vect_analyze_pointer_ref_access.
1400 STMT - a stmt that contains a data-ref.
1401 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
1402 ACCESS_FN - the access function of MEMREF.
1405 If the data-ref access is vectorizable, return a data_reference structure
1406 that represents it (DR). Otherwise - return NULL.
1407 STEP - the stride of MEMREF in the loop.
1408 INIT - the initial condition of MEMREF in the loop.
1411 static struct data_reference
*
1412 vect_analyze_pointer_ref_access (tree memref
, tree stmt
, bool is_read
,
1413 tree access_fn
, tree
*ptr_init
, tree
*ptr_step
)
1415 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1416 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1417 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1419 tree reftype
, innertype
;
1420 tree indx_access_fn
;
1421 int loopnum
= loop
->num
;
1422 struct data_reference
*dr
;
1424 if (!vect_is_simple_iv_evolution (loopnum
, access_fn
, &init
, &step
))
1426 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
1427 LOOP_LOC (loop_vinfo
)))
1428 fprintf (vect_dump
, "not vectorized: pointer access is not simple.");
1434 if (!expr_invariant_in_loop_p (loop
, init
))
1436 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
1437 LOOP_LOC (loop_vinfo
)))
1439 "not vectorized: initial condition is not loop invariant.");
1443 if (TREE_CODE (step
) != INTEGER_CST
)
1445 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
1446 LOOP_LOC (loop_vinfo
)))
1448 "not vectorized: non constant step for pointer access.");
1452 reftype
= TREE_TYPE (TREE_OPERAND (memref
, 0));
1453 if (!POINTER_TYPE_P (reftype
))
1455 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
1456 LOOP_LOC (loop_vinfo
)))
1457 fprintf (vect_dump
, "not vectorized: unexpected pointer access form.");
1461 if (!POINTER_TYPE_P (TREE_TYPE (init
)))
1463 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
1464 LOOP_LOC (loop_vinfo
)))
1465 fprintf (vect_dump
, "not vectorized: unexpected pointer access form.");
1469 *ptr_step
= fold_convert (ssizetype
, step
);
1470 innertype
= TREE_TYPE (reftype
);
1471 if (!COMPLETE_TYPE_P (innertype
))
1473 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
1474 LOOP_LOC (loop_vinfo
)))
1475 fprintf (vect_dump
, "not vectorized: pointer to incomplete type.");
1479 /* Check that STEP is a multiple of type size. */
1480 if (!integer_zerop (size_binop (TRUNC_MOD_EXPR
, *ptr_step
,
1481 fold_convert (ssizetype
, TYPE_SIZE_UNIT (innertype
)))))
1483 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
1484 LOOP_LOC (loop_vinfo
)))
1485 fprintf (vect_dump
, "not vectorized: non consecutive access.");
1490 build_polynomial_chrec (loopnum
, integer_zero_node
, integer_one_node
);
1491 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1493 fprintf (vect_dump
, "Access function of ptr indx: ");
1494 print_generic_expr (vect_dump
, indx_access_fn
, TDF_SLIM
);
1496 dr
= init_data_ref (stmt
, memref
, NULL_TREE
, indx_access_fn
, is_read
);
1502 /* Function vect_address_analysis
1504 Return the BASE of the address expression EXPR.
1505 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1508 EXPR - the address expression that is being analyzed
1509 STMT - the statement that contains EXPR or its original memory reference
1510 IS_READ - TRUE if STMT reads from EXPR, FALSE if writes to EXPR
1511 VECTYPE - the type that defines the alignment (i.e, we compute
1512 alignment relative to TYPE_ALIGN(VECTYPE))
1513 DR - data_reference struct for the original memory reference
1516 BASE (returned value) - the base of the data reference EXPR.
1517 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1518 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1519 computation is impossible
1520 STEP - evolution of EXPR in the loop
1521 BASE_ALIGNED - indicates if BASE is aligned
1523 If something unexpected is encountered (an unsupported form of data-ref),
1524 then NULL_TREE is returned.
1528 vect_address_analysis (tree expr
, tree stmt
, bool is_read
, tree vectype
,
1529 struct data_reference
*dr
, tree
*offset
, tree
*misalign
,
1530 tree
*step
, bool *base_aligned
)
1532 tree oprnd0
, oprnd1
, base_address
, offset_expr
, base_addr0
, base_addr1
;
1533 tree address_offset
= ssize_int (0), address_misalign
= ssize_int (0);
1535 struct ptr_info_def
*dummy1
;
1538 switch (TREE_CODE (expr
))
1542 /* EXPR is of form {base +/- offset} (or {offset +/- base}). */
1543 oprnd0
= TREE_OPERAND (expr
, 0);
1544 oprnd1
= TREE_OPERAND (expr
, 1);
1546 STRIP_NOPS (oprnd0
);
1547 STRIP_NOPS (oprnd1
);
1549 /* Recursively try to find the base of the address contained in EXPR.
1550 For offset, the returned base will be NULL. */
1551 base_addr0
= vect_address_analysis (oprnd0
, stmt
, is_read
, vectype
, dr
,
1552 &address_offset
, &address_misalign
, step
,
1555 base_addr1
= vect_address_analysis (oprnd1
, stmt
, is_read
, vectype
, dr
,
1556 &address_offset
, &address_misalign
, step
,
1559 /* We support cases where only one of the operands contains an
1561 if ((base_addr0
&& base_addr1
) || (!base_addr0
&& !base_addr1
))
1564 /* To revert STRIP_NOPS. */
1565 oprnd0
= TREE_OPERAND (expr
, 0);
1566 oprnd1
= TREE_OPERAND (expr
, 1);
1568 offset_expr
= base_addr0
?
1569 fold_convert (ssizetype
, oprnd1
) : fold_convert (ssizetype
, oprnd0
);
1571 /* EXPR is of form {base +/- offset} (or {offset +/- base}). If offset is
1572 a number, we can add it to the misalignment value calculated for base,
1573 otherwise, misalignment is NULL. */
1574 if (TREE_CODE (offset_expr
) == INTEGER_CST
&& address_misalign
)
1575 *misalign
= size_binop (TREE_CODE (expr
), address_misalign
,
1578 *misalign
= NULL_TREE
;
1580 /* Combine offset (from EXPR {base + offset}) with the offset calculated
1582 *offset
= size_binop (TREE_CODE (expr
), address_offset
, offset_expr
);
1583 return base_addr0
? base_addr0
: base_addr1
;
1586 base_address
= vect_object_analysis (TREE_OPERAND (expr
, 0), stmt
,
1587 is_read
, vectype
, &dr
, offset
,
1588 misalign
, step
, base_aligned
,
1589 &dummy
, &dummy1
, &dummy2
);
1590 return base_address
;
1593 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
1596 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr
))) < TYPE_ALIGN (vectype
))
1598 if (vect_get_ptr_offset (expr
, vectype
, misalign
))
1599 *base_aligned
= true;
1601 *base_aligned
= false;
1605 *base_aligned
= true;
1606 *misalign
= ssize_int (0);
1608 *offset
= ssize_int (0);
1609 *step
= ssize_int (0);
1618 /* Function vect_object_analysis
1620 Return the BASE of the data reference MEMREF.
1621 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1622 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1623 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1624 instantiated with initial_conditions of access_functions of variables,
1625 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1627 Function get_inner_reference is used for the above in case of ARRAY_REF and
1630 The structure of the function is as follows:
1632 Case 1. For handled_component_p refs
1633 1.1 call get_inner_reference
1634 1.1.1 analyze offset expr received from get_inner_reference
1635 1.2. build data-reference structure for MEMREF
1636 (fall through with BASE)
1637 Case 2. For declarations
1639 2.2 update DR_BASE_NAME if necessary for alias
1640 Case 3. For INDIRECT_REFs
1641 3.1 get the access function
1642 3.2 analyze evolution of MEMREF
1643 3.3 set data-reference structure for MEMREF
1644 3.4 call vect_address_analysis to analyze INIT of the access function
1647 Combine the results of object and address analysis to calculate
1648 INITIAL_OFFSET, STEP and misalignment info.
1651 MEMREF - the memory reference that is being analyzed
1652 STMT - the statement that contains MEMREF
1653 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
1654 VECTYPE - the type that defines the alignment (i.e, we compute
1655 alignment relative to TYPE_ALIGN(VECTYPE))
1658 BASE_ADDRESS (returned value) - the base address of the data reference MEMREF
1659 E.g, if MEMREF is a.b[k].c[i][j] the returned
1661 DR - data_reference struct for MEMREF
1662 INITIAL_OFFSET - initial offset of MEMREF from BASE (an expression)
1663 MISALIGN - offset of MEMREF from BASE in bytes (a constant) or NULL_TREE if
1664 the computation is impossible
1665 STEP - evolution of the DR_REF in the loop
1666 BASE_ALIGNED - indicates if BASE is aligned
1667 MEMTAG - memory tag for aliasing purposes
1668 PTR_INFO - NULL or points-to aliasing info from a pointer SSA_NAME
1669 SUBVAR - Sub-variables of the variable
1671 If something unexpected is encountered (an unsupported form of data-ref),
1672 then NULL_TREE is returned. */
1675 vect_object_analysis (tree memref
, tree stmt
, bool is_read
,
1676 tree vectype
, struct data_reference
**dr
,
1677 tree
*offset
, tree
*misalign
, tree
*step
,
1678 bool *base_aligned
, tree
*memtag
,
1679 struct ptr_info_def
**ptr_info
, subvar_t
*subvars
)
1681 tree base
= NULL_TREE
, base_address
= NULL_TREE
;
1682 tree object_offset
= ssize_int (0), object_misalign
= ssize_int (0);
1683 tree object_step
= ssize_int (0), address_step
= ssize_int (0);
1684 bool object_base_aligned
= true, address_base_aligned
= true;
1685 tree address_offset
= ssize_int (0), address_misalign
= ssize_int (0);
1686 HOST_WIDE_INT pbitsize
, pbitpos
;
1687 tree poffset
, bit_pos_in_bytes
;
1688 enum machine_mode pmode
;
1689 int punsignedp
, pvolatilep
;
1690 tree ptr_step
= ssize_int (0), ptr_init
= NULL_TREE
;
1691 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1692 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1693 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1694 struct data_reference
*ptr_dr
= NULL
;
1695 tree access_fn
, evolution_part
, address_to_analyze
;
1700 /* Case 1. handled_component_p refs. */
1701 if (handled_component_p (memref
))
1703 /* 1.1 call get_inner_reference. */
1704 /* Find the base and the offset from it. */
1705 base
= get_inner_reference (memref
, &pbitsize
, &pbitpos
, &poffset
,
1706 &pmode
, &punsignedp
, &pvolatilep
, false);
1710 /* 1.1.1 analyze offset expr received from get_inner_reference. */
1712 && !vect_analyze_offset_expr (poffset
, loop
, TYPE_SIZE_UNIT (vectype
),
1713 &object_offset
, &object_misalign
, &object_step
))
1715 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1717 fprintf (vect_dump
, "failed to compute offset or step for ");
1718 print_generic_expr (vect_dump
, memref
, TDF_SLIM
);
1723 /* Add bit position to OFFSET and MISALIGN. */
1725 bit_pos_in_bytes
= ssize_int (pbitpos
/BITS_PER_UNIT
);
1726 /* Check that there is no remainder in bits. */
1727 if (pbitpos
%BITS_PER_UNIT
)
1729 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1730 fprintf (vect_dump
, "bit offset alignment.");
1733 object_offset
= size_binop (PLUS_EXPR
, bit_pos_in_bytes
, object_offset
);
1734 if (object_misalign
)
1735 object_misalign
= size_binop (PLUS_EXPR
, object_misalign
,
1738 /* Create data-reference for MEMREF. TODO: handle COMPONENT_REFs. */
1741 if (TREE_CODE (memref
) == ARRAY_REF
)
1742 *dr
= analyze_array (stmt
, memref
, is_read
);
1747 memref
= base
; /* To continue analysis of BASE. */
1751 /* Part 1: Case 2. Declarations. */
1752 if (DECL_P (memref
))
1754 /* We expect to get a decl only if we already have a DR. */
1757 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1759 fprintf (vect_dump
, "unhandled decl ");
1760 print_generic_expr (vect_dump
, memref
, TDF_SLIM
);
1765 /* 2.1 check the alignment. */
1766 if (DECL_ALIGN (memref
) >= TYPE_ALIGN (vectype
))
1767 object_base_aligned
= true;
1769 object_base_aligned
= false;
1771 /* 2.2 update DR_BASE_NAME if necessary. */
1772 if (!DR_BASE_NAME ((*dr
)))
1773 /* For alias analysis. In case the analysis of INDIRECT_REF brought
1775 DR_BASE_NAME ((*dr
)) = memref
;
1777 if (SSA_VAR_P (memref
) && var_can_have_subvars (memref
))
1778 *subvars
= get_subvars_for_var (memref
);
1779 base_address
= build_fold_addr_expr (memref
);
1783 /* Part 1: Case 3. INDIRECT_REFs. */
1784 else if (TREE_CODE (memref
) == INDIRECT_REF
)
1786 tree ptr_ref
= TREE_OPERAND (memref
, 0);
1787 if (TREE_CODE (ptr_ref
) == SSA_NAME
)
1788 *ptr_info
= SSA_NAME_PTR_INFO (ptr_ref
);
1790 /* 3.1 get the access function. */
1791 access_fn
= analyze_scalar_evolution (loop
, ptr_ref
);
1794 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
1795 LOOP_LOC (loop_vinfo
)))
1796 fprintf (vect_dump
, "not vectorized: complicated pointer access.");
1799 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1801 fprintf (vect_dump
, "Access function of ptr: ");
1802 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
1805 /* 3.2 analyze evolution of MEMREF. */
1806 evolution_part
= evolution_part_in_loop_num (access_fn
, loop
->num
);
1809 ptr_dr
= vect_analyze_pointer_ref_access (memref
, stmt
, is_read
,
1810 access_fn
, &ptr_init
, &ptr_step
);
1814 object_step
= size_binop (PLUS_EXPR
, object_step
, ptr_step
);
1815 address_to_analyze
= ptr_init
;
1821 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
1822 LOOP_LOC (loop_vinfo
)))
1823 fprintf (vect_dump
, "not vectorized: ptr is loop invariant.");
1826 /* Since there exists DR for MEMREF, we are analyzing the init of
1827 the access function, which not necessary has evolution in the
1829 address_to_analyze
= initial_condition_in_loop_num (access_fn
,
1833 /* 3.3 set data-reference structure for MEMREF. */
1834 *dr
= (*dr
) ? *dr
: ptr_dr
;
1836 /* 3.4 call vect_address_analysis to analyze INIT of the access
1838 base_address
= vect_address_analysis (address_to_analyze
, stmt
, is_read
,
1839 vectype
, *dr
, &address_offset
, &address_misalign
,
1840 &address_step
, &address_base_aligned
);
1844 switch (TREE_CODE (base_address
))
1847 *memtag
= get_var_ann (SSA_NAME_VAR (base_address
))->type_mem_tag
;
1848 if (!(*memtag
) && TREE_CODE (TREE_OPERAND (memref
, 0)) == SSA_NAME
)
1849 *memtag
= get_var_ann (
1850 SSA_NAME_VAR (TREE_OPERAND (memref
, 0)))->type_mem_tag
;
1853 *memtag
= TREE_OPERAND (base_address
, 0);
1856 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
1857 LOOP_LOC (loop_vinfo
)))
1859 fprintf (vect_dump
, "not vectorized: no memtag ref: ");
1860 print_generic_expr (vect_dump
, memref
, TDF_SLIM
);
1867 /* MEMREF cannot be analyzed. */
1870 if (SSA_VAR_P (*memtag
) && var_can_have_subvars (*memtag
))
1871 *subvars
= get_subvars_for_var (*memtag
);
1873 /* Part 2: Combine the results of object and address analysis to calculate
1874 INITIAL_OFFSET, STEP and misalignment info. */
1875 *offset
= size_binop (PLUS_EXPR
, object_offset
, address_offset
);
1876 if (object_misalign
&& address_misalign
)
1877 *misalign
= size_binop (PLUS_EXPR
, object_misalign
, address_misalign
);
1879 *misalign
= NULL_TREE
;
1880 *step
= size_binop (PLUS_EXPR
, object_step
, address_step
);
1881 *base_aligned
= object_base_aligned
&& address_base_aligned
;
1883 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1885 fprintf (vect_dump
, "Results of object analysis for: ");
1886 print_generic_expr (vect_dump
, memref
, TDF_SLIM
);
1887 fprintf (vect_dump
, "\n\tbase_address: ");
1888 print_generic_expr (vect_dump
, base_address
, TDF_SLIM
);
1889 fprintf (vect_dump
, "\n\toffset: ");
1890 print_generic_expr (vect_dump
, *offset
, TDF_SLIM
);
1891 fprintf (vect_dump
, "\n\tstep: ");
1892 print_generic_expr (vect_dump
, *step
, TDF_SLIM
);
1893 fprintf (vect_dump
, "\n\tbase aligned %d\n\tmisalign: ", *base_aligned
);
1894 print_generic_expr (vect_dump
, *misalign
, TDF_SLIM
);
1896 return base_address
;
1900 /* Function vect_analyze_data_refs.
1902 Find all the data references in the loop.
1904 The general structure of the analysis of data refs in the vectorizer is as
1906 1- vect_analyze_data_refs(loop):
1907 Find and analyze all data-refs in the loop:
1909 base_address = vect_object_analysis(ref)
1910 1.1- vect_object_analysis(ref):
1911 Analyze ref, and build a DR (data_referece struct) for it;
1912 compute base, initial_offset, step and alignment.
1913 Call get_inner_reference for refs handled in this function.
1914 Call vect_addr_analysis(addr) to analyze pointer type expressions.
1915 Set ref_stmt.base, ref_stmt.initial_offset, ref_stmt.alignment,
1916 ref_stmt.memtag, ref_stmt.ptr_info and ref_stmt.step accordingly.
1917 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
1918 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1919 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1921 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
1922 which base is really an array (not a pointer) and which alignment
1923 can be forced. This restriction will be relaxed. */
1926 vect_analyze_data_refs (loop_vec_info loop_vinfo
)
1928 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1929 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1930 int nbbs
= loop
->num_nodes
;
1931 block_stmt_iterator si
;
1933 struct data_reference
*dr
;
1935 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1936 fprintf (vect_dump
, "=== vect_analyze_data_refs ===");
1938 for (j
= 0; j
< nbbs
; j
++)
1940 basic_block bb
= bbs
[j
];
1941 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
1943 bool is_read
= false;
1944 tree stmt
= bsi_stmt (si
);
1945 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1946 varray_type
*datarefs
= NULL
;
1948 tree scalar_type
, vectype
;
1949 tree base
, offset
, misalign
, step
, tag
;
1950 struct ptr_info_def
*ptr_info
;
1952 subvar_t subvars
= NULL
;
1953 bool no_vuse
, no_vmaymust
;
1955 /* Assumption: there exists a data-ref in stmt, if and only if
1956 it has vuses/vdefs. */
1958 no_vuse
= ZERO_SSA_OPERANDS (stmt
, SSA_OP_VUSE
);
1959 no_vmaymust
= ZERO_SSA_OPERANDS (stmt
,
1960 SSA_OP_VMAYDEF
| SSA_OP_VMUSTDEF
);
1961 if (no_vuse
&& no_vmaymust
)
1964 if (!no_vuse
&& !no_vmaymust
)
1966 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1968 fprintf (vect_dump
, "unexpected vdefs and vuses in stmt: ");
1969 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1974 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1976 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1978 fprintf (vect_dump
, "unexpected vops in stmt: ");
1979 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1986 memref
= TREE_OPERAND (stmt
, 1);
1987 datarefs
= &(LOOP_VINFO_DATAREF_READS (loop_vinfo
));
1992 memref
= TREE_OPERAND (stmt
, 0);
1993 datarefs
= &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo
));
1997 scalar_type
= TREE_TYPE (memref
);
1998 vectype
= get_vectype_for_scalar_type (scalar_type
);
2001 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2003 fprintf (vect_dump
, "no vectype for stmt: ");
2004 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2005 fprintf (vect_dump
, " scalar_type: ");
2006 print_generic_expr (vect_dump
, scalar_type
, TDF_DETAILS
);
2008 /* It is not possible to vectorize this data reference. */
2011 /* Analyze MEMREF. If it is of a supported form, build data_reference
2012 struct for it (DR). */
2014 base
= vect_object_analysis (memref
, stmt
, is_read
, vectype
, &dr
,
2015 &offset
, &misalign
, &step
,
2016 &base_aligned
, &tag
, &ptr_info
,
2020 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
2021 LOOP_LOC (loop_vinfo
)))
2023 fprintf (vect_dump
, "not vectorized: unhandled data ref: ");
2024 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2028 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info
) = base
;
2029 STMT_VINFO_VECT_INIT_OFFSET (stmt_info
) = offset
;
2030 STMT_VINFO_VECT_STEP (stmt_info
) = step
;
2031 STMT_VINFO_VECT_MISALIGNMENT (stmt_info
) = misalign
;
2032 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info
) = base_aligned
;
2033 STMT_VINFO_MEMTAG (stmt_info
) = tag
;
2034 STMT_VINFO_PTR_INFO (stmt_info
) = ptr_info
;
2035 STMT_VINFO_SUBVARS (stmt_info
) = subvars
;
2036 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
2037 VARRAY_PUSH_GENERIC_PTR (*datarefs
, dr
);
2038 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
2046 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2048 /* Function vect_mark_relevant.
2050 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2053 vect_mark_relevant (VEC(tree
,heap
) **worklist
, tree stmt
)
2055 stmt_vec_info stmt_info
;
2057 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2058 fprintf (vect_dump
, "mark relevant.");
2060 if (TREE_CODE (stmt
) == PHI_NODE
)
2062 VEC_safe_push (tree
, heap
, *worklist
, stmt
);
2066 stmt_info
= vinfo_for_stmt (stmt
);
2070 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2072 fprintf (vect_dump
, "mark relevant: no stmt info!!.");
2073 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2078 if (STMT_VINFO_RELEVANT_P (stmt_info
))
2080 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2081 fprintf (vect_dump
, "already marked relevant.");
2085 STMT_VINFO_RELEVANT_P (stmt_info
) = 1;
2086 VEC_safe_push (tree
, heap
, *worklist
, stmt
);
2090 /* Function vect_stmt_relevant_p.
2092 Return true if STMT in loop that is represented by LOOP_VINFO is
2093 "relevant for vectorization".
2095 A stmt is considered "relevant for vectorization" if:
2096 - it has uses outside the loop.
2097 - it has vdefs (it alters memory).
2098 - control stmts in the loop (except for the exit condition).
2100 CHECKME: what other side effects would the vectorizer allow? */
2103 vect_stmt_relevant_p (tree stmt
, loop_vec_info loop_vinfo
)
2105 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2106 ssa_op_iter op_iter
;
2107 imm_use_iterator imm_iter
;
2108 use_operand_p use_p
;
2109 def_operand_p def_p
;
2111 /* cond stmt other than loop exit cond. */
2112 if (is_ctrl_stmt (stmt
) && (stmt
!= LOOP_VINFO_EXIT_COND (loop_vinfo
)))
2115 /* changing memory. */
2116 if (TREE_CODE (stmt
) != PHI_NODE
)
2117 if (!ZERO_SSA_OPERANDS (stmt
, SSA_OP_VIRTUAL_DEFS
))
2119 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2120 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
2124 /* uses outside the loop. */
2125 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2127 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
2129 basic_block bb
= bb_for_stmt (USE_STMT (use_p
));
2130 if (!flow_bb_inside_loop_p (loop
, bb
))
2132 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2133 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
2143 /* Function vect_mark_stmts_to_be_vectorized.
2145 Not all stmts in the loop need to be vectorized. For example:
2154 Stmt 1 and 3 do not need to be vectorized, because loop control and
2155 addressing of vectorized data-refs are handled differently.
2157 This pass detects such stmts. */
2160 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
2162 VEC(tree
,heap
) *worklist
;
2163 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2164 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2165 unsigned int nbbs
= loop
->num_nodes
;
2166 block_stmt_iterator si
;
2171 stmt_vec_info stmt_info
;
2175 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2176 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
2179 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
2181 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2183 fprintf (vect_dump
, "init: phi relevant? ");
2184 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
2187 if (vect_stmt_relevant_p (phi
, loop_vinfo
))
2189 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
2190 LOOP_LOC (loop_vinfo
)))
2191 fprintf (vect_dump
, "unsupported reduction/induction.");
2196 worklist
= VEC_alloc (tree
, heap
, 64);
2198 /* 1. Init worklist. */
2200 for (i
= 0; i
< nbbs
; i
++)
2203 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
2205 stmt
= bsi_stmt (si
);
2207 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2209 fprintf (vect_dump
, "init: stmt relevant? ");
2210 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2213 stmt_info
= vinfo_for_stmt (stmt
);
2214 STMT_VINFO_RELEVANT_P (stmt_info
) = 0;
2216 if (vect_stmt_relevant_p (stmt
, loop_vinfo
))
2217 vect_mark_relevant (&worklist
, stmt
);
2222 /* 2. Process_worklist */
2224 while (VEC_length (tree
, worklist
) > 0)
2226 stmt
= VEC_pop (tree
, worklist
);
2228 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2230 fprintf (vect_dump
, "worklist: examine stmt: ");
2231 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2234 /* Examine the USES in this statement. Mark all the statements which
2235 feed this statement's uses as "relevant", unless the USE is used as
2238 if (TREE_CODE (stmt
) == PHI_NODE
)
2240 /* follow the def-use chain inside the loop. */
2241 for (j
= 0; j
< PHI_NUM_ARGS (stmt
); j
++)
2243 tree arg
= PHI_ARG_DEF (stmt
, j
);
2244 tree def_stmt
= NULL_TREE
;
2246 if (!vect_is_simple_use (arg
, loop_vinfo
, &def_stmt
))
2248 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
2249 LOOP_LOC (loop_vinfo
)))
2250 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
2251 VEC_free (tree
, heap
, worklist
);
2257 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2259 fprintf (vect_dump
, "worklist: def_stmt: ");
2260 print_generic_expr (vect_dump
, def_stmt
, TDF_SLIM
);
2263 bb
= bb_for_stmt (def_stmt
);
2264 if (flow_bb_inside_loop_p (loop
, bb
))
2265 vect_mark_relevant (&worklist
, def_stmt
);
2269 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2272 /* We are only interested in uses that need to be vectorized. Uses
2273 that are used for address computation are not considered relevant.
2275 if (exist_non_indexing_operands_for_use_p (use
, stmt
))
2277 tree def_stmt
= NULL_TREE
;
2279 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
))
2281 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
,
2282 LOOP_LOC (loop_vinfo
)))
2283 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
2284 VEC_free (tree
, heap
, worklist
);
2291 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2293 fprintf (vect_dump
, "worklist: examine use %d: ", i
);
2294 print_generic_expr (vect_dump
, use
, TDF_SLIM
);
2297 bb
= bb_for_stmt (def_stmt
);
2298 if (flow_bb_inside_loop_p (loop
, bb
))
2299 vect_mark_relevant (&worklist
, def_stmt
);
2302 } /* while worklist */
2304 VEC_free (tree
, heap
, worklist
);
2309 /* Function vect_can_advance_ivs_p
2311 In case the number of iterations that LOOP iterates in unknown at compile
2312 time, an epilog loop will be generated, and the loop induction variables
2313 (IVs) will be "advanced" to the value they are supposed to take just before
2314 the epilog loop. Here we check that the access function of the loop IVs
2315 and the expression that represents the loop bound are simple enough.
2316 These restrictions will be relaxed in the future. */
2319 vect_can_advance_ivs_p (loop_vec_info loop_vinfo
)
2321 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2322 basic_block bb
= loop
->header
;
2325 /* Analyze phi functions of the loop header. */
2327 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
2329 tree access_fn
= NULL
;
2330 tree evolution_part
;
2332 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2334 fprintf (vect_dump
, "Analyze phi: ");
2335 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
2338 /* Skip virtual phi's. The data dependences that are associated with
2339 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2341 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
2343 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2344 fprintf (vect_dump
, "virtual phi. skip.");
2348 /* Analyze the evolution function. */
2350 access_fn
= instantiate_parameters
2351 (loop
, analyze_scalar_evolution (loop
, PHI_RESULT (phi
)));
2355 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2356 fprintf (vect_dump
, "No Access function.");
2360 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2362 fprintf (vect_dump
, "Access function of PHI: ");
2363 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
2366 evolution_part
= evolution_part_in_loop_num (access_fn
, loop
->num
);
2368 if (evolution_part
== NULL_TREE
)
2371 /* FORNOW: We do not transform initial conditions of IVs
2372 which evolution functions are a polynomial of degree >= 2. */
2374 if (tree_is_chrec (evolution_part
))
2382 /* Function vect_get_loop_niters.
2384 Determine how many iterations the loop is executed.
2385 If an expression that represents the number of iterations
2386 can be constructed, place it in NUMBER_OF_ITERATIONS.
2387 Return the loop exit condition. */
2390 vect_get_loop_niters (struct loop
*loop
, tree
*number_of_iterations
)
2394 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2395 fprintf (vect_dump
, "=== get_loop_niters ===");
2397 niters
= number_of_iterations_in_loop (loop
);
2399 if (niters
!= NULL_TREE
2400 && niters
!= chrec_dont_know
)
2402 *number_of_iterations
= niters
;
2404 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2406 fprintf (vect_dump
, "==> get_loop_niters:" );
2407 print_generic_expr (vect_dump
, *number_of_iterations
, TDF_SLIM
);
2411 return get_loop_exit_condition (loop
);
2415 /* Function vect_analyze_loop_form.
2417 Verify the following restrictions (some may be relaxed in the future):
2418 - it's an inner-most loop
2419 - number of BBs = 2 (which are the loop header and the latch)
2420 - the loop has a pre-header
2421 - the loop has a single entry and exit
2422 - the loop exit condition is simple enough, and the number of iterations
2423 can be analyzed (a countable loop). */
2425 static loop_vec_info
2426 vect_analyze_loop_form (struct loop
*loop
)
2428 loop_vec_info loop_vinfo
;
2430 tree number_of_iterations
= NULL
;
2433 loop_loc
= find_loop_location (loop
);
2435 if (vect_print_dump_info (REPORT_DETAILS
, loop_loc
))
2436 fprintf (vect_dump
, "=== vect_analyze_loop_form ===");
2440 if (vect_print_dump_info (REPORT_OUTER_LOOPS
, loop_loc
))
2441 fprintf (vect_dump
, "not vectorized: nested loop.");
2445 if (!loop
->single_exit
2446 || loop
->num_nodes
!= 2
2447 || EDGE_COUNT (loop
->header
->preds
) != 2)
2449 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
, loop_loc
))
2451 if (!loop
->single_exit
)
2452 fprintf (vect_dump
, "not vectorized: multiple exits.");
2453 else if (loop
->num_nodes
!= 2)
2454 fprintf (vect_dump
, "not vectorized: too many BBs in loop.");
2455 else if (EDGE_COUNT (loop
->header
->preds
) != 2)
2456 fprintf (vect_dump
, "not vectorized: too many incoming edges.");
2462 /* We assume that the loop exit condition is at the end of the loop. i.e,
2463 that the loop is represented as a do-while (with a proper if-guard
2464 before the loop if needed), where the loop header contains all the
2465 executable statements, and the latch is empty. */
2466 if (!empty_block_p (loop
->latch
))
2468 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
, loop_loc
))
2469 fprintf (vect_dump
, "not vectorized: unexpectd loop form.");
2473 /* Make sure there exists a single-predecessor exit bb: */
2474 if (!single_pred_p (loop
->single_exit
->dest
))
2476 edge e
= loop
->single_exit
;
2477 if (!(e
->flags
& EDGE_ABNORMAL
))
2479 split_loop_exit_edge (e
);
2480 if (vect_print_dump_info (REPORT_DETAILS
, loop_loc
))
2481 fprintf (vect_dump
, "split exit edge.");
2485 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
, loop_loc
))
2486 fprintf (vect_dump
, "not vectorized: abnormal loop exit edge.");
2491 if (empty_block_p (loop
->header
))
2493 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
, loop_loc
))
2494 fprintf (vect_dump
, "not vectorized: empty loop.");
2498 loop_cond
= vect_get_loop_niters (loop
, &number_of_iterations
);
2501 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
, loop_loc
))
2502 fprintf (vect_dump
, "not vectorized: complicated exit condition.");
2506 if (!number_of_iterations
)
2508 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
, loop_loc
))
2510 "not vectorized: number of iterations cannot be computed.");
2514 if (chrec_contains_undetermined (number_of_iterations
))
2516 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
, loop_loc
))
2517 fprintf (vect_dump
, "Infinite number of iterations.");
2521 loop_vinfo
= new_loop_vec_info (loop
);
2522 LOOP_VINFO_NITERS (loop_vinfo
) = number_of_iterations
;
2524 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
2526 if (vect_print_dump_info (REPORT_DETAILS
, loop_loc
))
2528 fprintf (vect_dump
, "Symbolic number of iterations is ");
2529 print_generic_expr (vect_dump
, number_of_iterations
, TDF_DETAILS
);
2533 if (LOOP_VINFO_INT_NITERS (loop_vinfo
) == 0)
2535 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
, loop_loc
))
2536 fprintf (vect_dump
, "not vectorized: number of iterations = 0.");
2540 LOOP_VINFO_EXIT_COND (loop_vinfo
) = loop_cond
;
2541 LOOP_VINFO_LOC (loop_vinfo
) = loop_loc
;
2547 /* Function vect_analyze_loop.
2549 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2550 for it. The different analyses will record information in the
2551 loop_vec_info struct. */
2553 vect_analyze_loop (struct loop
*loop
)
2556 loop_vec_info loop_vinfo
;
2558 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2559 fprintf (vect_dump
, "===== analyze_loop_nest =====");
2561 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2563 loop_vinfo
= vect_analyze_loop_form (loop
);
2566 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
2567 fprintf (vect_dump
, "bad loop form.");
2571 /* Find all data references in the loop (which correspond to vdefs/vuses)
2572 and analyze their evolution in the loop.
2574 FORNOW: Handle only simple, array references, which
2575 alignment can be forced, and aligned pointer-references. */
2577 ok
= vect_analyze_data_refs (loop_vinfo
);
2580 if (vect_print_dump_info (REPORT_DETAILS
, LOOP_LOC (loop_vinfo
)))
2581 fprintf (vect_dump
, "bad data references.");
2582 destroy_loop_vec_info (loop_vinfo
);
2586 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2588 ok
= vect_mark_stmts_to_be_vectorized (loop_vinfo
);
2591 if (vect_print_dump_info (REPORT_DETAILS
, LOOP_LOC (loop_vinfo
)))
2592 fprintf (vect_dump
, "unexpected pattern.");
2593 destroy_loop_vec_info (loop_vinfo
);
2597 /* Check that all cross-iteration scalar data-flow cycles are OK.
2598 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2600 ok
= vect_analyze_scalar_cycles (loop_vinfo
);
2603 if (vect_print_dump_info (REPORT_DETAILS
, LOOP_LOC (loop_vinfo
)))
2604 fprintf (vect_dump
, "bad scalar cycle.");
2605 destroy_loop_vec_info (loop_vinfo
);
2609 ok
= vect_determine_vectorization_factor (loop_vinfo
);
2612 if (vect_print_dump_info (REPORT_DETAILS
, LOOP_LOC (loop_vinfo
)))
2613 fprintf (vect_dump
, "can't determine vectorization factor.");
2614 destroy_loop_vec_info (loop_vinfo
);
2618 /* Analyze data dependences between the data-refs in the loop.
2619 FORNOW: fail at the first data dependence that we encounter. */
2621 ok
= vect_analyze_data_ref_dependences (loop_vinfo
);
2624 if (vect_print_dump_info (REPORT_DETAILS
, LOOP_LOC (loop_vinfo
)))
2625 fprintf (vect_dump
, "bad data dependence.");
2626 destroy_loop_vec_info (loop_vinfo
);
2630 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2631 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2633 ok
= vect_analyze_data_ref_accesses (loop_vinfo
);
2636 if (vect_print_dump_info (REPORT_DETAILS
, LOOP_LOC (loop_vinfo
)))
2637 fprintf (vect_dump
, "bad data access.");
2638 destroy_loop_vec_info (loop_vinfo
);
2642 /* Analyze the alignment of the data-refs in the loop.
2643 FORNOW: Only aligned accesses are handled. */
2645 ok
= vect_analyze_data_refs_alignment (loop_vinfo
);
2648 if (vect_print_dump_info (REPORT_DETAILS
, LOOP_LOC (loop_vinfo
)))
2649 fprintf (vect_dump
, "bad data alignment.");
2650 destroy_loop_vec_info (loop_vinfo
);
2654 /* Scan all the operations in the loop and make sure they are
2657 ok
= vect_analyze_operations (loop_vinfo
);
2660 if (vect_print_dump_info (REPORT_DETAILS
, LOOP_LOC (loop_vinfo
)))
2661 fprintf (vect_dump
, "bad operation or unsupported loop bound.");
2662 destroy_loop_vec_info (loop_vinfo
);
2666 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
) = 1;