* tree-ssa-phiopt.c, config/arm/arm.c, config/fr30/fr30.md,
[official-gcc.git] / gcc / tree-vect-analyze.c
blob46d76731f44695430c4be949303217a1306337fe
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "errors.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
42 /* Main analysis functions. */
43 static loop_vec_info vect_analyze_loop_form (struct loop *);
44 static bool vect_analyze_data_refs (loop_vec_info);
45 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
46 static bool vect_analyze_scalar_cycles (loop_vec_info);
47 static bool vect_analyze_data_ref_accesses (loop_vec_info);
48 static bool vect_analyze_data_ref_dependences (loop_vec_info);
49 static bool vect_analyze_data_refs_alignment (loop_vec_info);
50 static bool vect_compute_data_refs_alignment (loop_vec_info);
51 static void vect_enhance_data_refs_alignment (loop_vec_info);
52 static bool vect_analyze_operations (loop_vec_info);
53 static bool vect_determine_vectorization_factor (loop_vec_info);
55 /* Utility functions for the analyses. */
56 static bool exist_non_indexing_operands_for_use_p (tree, tree);
57 static void vect_mark_relevant (varray_type *, tree);
58 static bool vect_stmt_relevant_p (tree, loop_vec_info);
59 static tree vect_get_loop_niters (struct loop *, tree *);
60 static bool vect_analyze_data_ref_dependence
61 (struct data_reference *, struct data_reference *, loop_vec_info);
62 static bool vect_compute_data_ref_alignment (struct data_reference *);
63 static bool vect_analyze_data_ref_access (struct data_reference *);
64 static struct data_reference * vect_analyze_pointer_ref_access
65 (tree, tree, bool, tree, tree *, tree *);
66 static bool vect_can_advance_ivs_p (loop_vec_info);
67 static tree vect_get_ptr_offset (tree, tree, tree *);
68 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
69 tree *, tree *);
70 static bool vect_base_addr_differ_p (struct data_reference *,
71 struct data_reference *drb, bool *);
72 static tree vect_object_analysis (tree, tree, bool, tree,
73 struct data_reference **, tree *, tree *,
74 tree *, bool *, tree *, struct ptr_info_def **,
75 subvar_t *);
76 static tree vect_address_analysis (tree, tree, bool, tree,
77 struct data_reference *, tree *, tree *,
78 tree *, bool *);
81 /* Function vect_get_ptr_offset
83 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
85 static tree
86 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
87 tree vectype ATTRIBUTE_UNUSED,
88 tree *offset ATTRIBUTE_UNUSED)
90 /* TODO: Use alignment information. */
91 return NULL_TREE;
95 /* Function vect_analyze_offset_expr
97 Given an offset expression EXPR received from get_inner_reference, analyze
98 it and create an expression for INITIAL_OFFSET by substituting the variables
99 of EXPR with initial_condition of the corresponding access_fn in the loop.
100 E.g.,
101 for i
102 for (j = 3; j < N; j++)
103 a[j].b[i][j] = 0;
105 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
106 substituted, since its access_fn in the inner loop is i. 'j' will be
107 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
108 C` = 3 * C_j + C.
110 Compute MISALIGN (the misalignment of the data reference initial access from
111 its base) if possible. Misalignment can be calculated only if all the
112 variables can be substituted with constants, or if a variable is multiplied
113 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
114 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
115 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
116 VECTYPE_ALIGNMENT computation in the caller of this function).
118 STEP is an evolution of the data reference in this loop in bytes.
119 In the above example, STEP is C_j.
121 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
122 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
123 are NULL_TREEs. Otherwise, return TRUE.
127 static bool
128 vect_analyze_offset_expr (tree expr,
129 struct loop *loop,
130 tree vectype_alignment,
131 tree *initial_offset,
132 tree *misalign,
133 tree *step)
135 tree oprnd0;
136 tree oprnd1;
137 tree left_offset = ssize_int (0);
138 tree right_offset = ssize_int (0);
139 tree left_misalign = ssize_int (0);
140 tree right_misalign = ssize_int (0);
141 tree left_step = ssize_int (0);
142 tree right_step = ssize_int (0);
143 enum tree_code code;
144 tree init, evolution;
146 *step = NULL_TREE;
147 *misalign = NULL_TREE;
148 *initial_offset = NULL_TREE;
150 /* Strip conversions that don't narrow the mode. */
151 expr = vect_strip_conversion (expr);
152 if (!expr)
153 return false;
155 /* Stop conditions:
156 1. Constant. */
157 if (TREE_CODE (expr) == INTEGER_CST)
159 *initial_offset = fold_convert (ssizetype, expr);
160 *misalign = fold_convert (ssizetype, expr);
161 *step = ssize_int (0);
162 return true;
165 /* 2. Variable. Try to substitute with initial_condition of the corresponding
166 access_fn in the current loop. */
167 if (SSA_VAR_P (expr))
169 tree access_fn = analyze_scalar_evolution (loop, expr);
171 if (access_fn == chrec_dont_know)
172 /* No access_fn. */
173 return false;
175 init = initial_condition_in_loop_num (access_fn, loop->num);
176 if (init == expr && !expr_invariant_in_loop_p (loop, init))
177 /* Not enough information: may be not loop invariant.
178 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
179 initial_condition is D, but it depends on i - loop's induction
180 variable. */
181 return false;
183 evolution = evolution_part_in_loop_num (access_fn, loop->num);
184 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
185 /* Evolution is not constant. */
186 return false;
188 if (TREE_CODE (init) == INTEGER_CST)
189 *misalign = fold_convert (ssizetype, init);
190 else
191 /* Not constant, misalignment cannot be calculated. */
192 *misalign = NULL_TREE;
194 *initial_offset = fold_convert (ssizetype, init);
196 *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0);
197 return true;
200 /* Recursive computation. */
201 if (!BINARY_CLASS_P (expr))
203 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
204 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
206 fprintf (vect_dump, "Not binary expression ");
207 print_generic_expr (vect_dump, expr, TDF_SLIM);
209 return false;
211 oprnd0 = TREE_OPERAND (expr, 0);
212 oprnd1 = TREE_OPERAND (expr, 1);
214 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
215 &left_misalign, &left_step)
216 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
217 &right_offset, &right_misalign, &right_step))
218 return false;
220 /* The type of the operation: plus, minus or mult. */
221 code = TREE_CODE (expr);
222 switch (code)
224 case MULT_EXPR:
225 if (TREE_CODE (right_offset) != INTEGER_CST)
226 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
227 sized types.
228 FORNOW: We don't support such cases. */
229 return false;
231 /* Strip conversions that don't narrow the mode. */
232 left_offset = vect_strip_conversion (left_offset);
233 if (!left_offset)
234 return false;
235 /* Misalignment computation. */
236 if (SSA_VAR_P (left_offset))
238 /* If the left side contains variables that can't be substituted with
239 constants, we check if the right side is a multiple of ALIGNMENT.
241 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
242 fold_convert (ssizetype, vectype_alignment))))
243 *misalign = ssize_int (0);
244 else
245 /* If the remainder is not zero or the right side isn't constant,
246 we can't compute misalignment. */
247 *misalign = NULL_TREE;
249 else
251 /* The left operand was successfully substituted with constant. */
252 if (left_misalign)
253 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
254 NULL_TREE. */
255 *misalign = size_binop (code, left_misalign, right_misalign);
256 else
257 *misalign = NULL_TREE;
260 /* Step calculation. */
261 /* Multiply the step by the right operand. */
262 *step = size_binop (MULT_EXPR, left_step, right_offset);
263 break;
265 case PLUS_EXPR:
266 case MINUS_EXPR:
267 /* Combine the recursive calculations for step and misalignment. */
268 *step = size_binop (code, left_step, right_step);
270 if (left_misalign && right_misalign)
271 *misalign = size_binop (code, left_misalign, right_misalign);
272 else
273 *misalign = NULL_TREE;
275 break;
277 default:
278 gcc_unreachable ();
281 /* Compute offset. */
282 *initial_offset = fold_convert (ssizetype,
283 fold (build2 (code, TREE_TYPE (left_offset),
284 left_offset,
285 right_offset)));
286 return true;
290 /* Function vect_determine_vectorization_factor
292 Determine the vectorization factor (VF). VF is the number of data elements
293 that are operated upon in parallel in a single iteration of the vectorized
294 loop. For example, when vectorizing a loop that operates on 4byte elements,
295 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
296 elements can fit in a single vector register.
298 We currently support vectorization of loops in which all types operated upon
299 are of the same size. Therefore this function currently sets VF according to
300 the size of the types operated upon, and fails if there are multiple sizes
301 in the loop.
303 VF is also the factor by which the loop iterations are strip-mined, e.g.:
304 original loop:
305 for (i=0; i<N; i++){
306 a[i] = b[i] + c[i];
309 vectorized loop:
310 for (i=0; i<N; i+=VF){
311 a[i:VF] = b[i:VF] + c[i:VF];
315 static bool
316 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
318 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
319 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
320 int nbbs = loop->num_nodes;
321 block_stmt_iterator si;
322 unsigned int vectorization_factor = 0;
323 int i;
324 tree scalar_type;
326 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
327 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
329 for (i = 0; i < nbbs; i++)
331 basic_block bb = bbs[i];
333 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
335 tree stmt = bsi_stmt (si);
336 unsigned int nunits;
337 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
338 tree vectype;
340 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
342 fprintf (vect_dump, "==> examining statement: ");
343 print_generic_expr (vect_dump, stmt, TDF_SLIM);
346 gcc_assert (stmt_info);
347 /* skip stmts which do not need to be vectorized. */
348 if (!STMT_VINFO_RELEVANT_P (stmt_info))
349 continue;
351 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
353 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
354 LOOP_LOC (loop_vinfo)))
356 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
357 print_generic_expr (vect_dump, stmt, TDF_SLIM);
359 return false;
362 if (STMT_VINFO_DATA_REF (stmt_info))
363 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
364 else if (TREE_CODE (stmt) == MODIFY_EXPR)
365 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
366 else
367 scalar_type = TREE_TYPE (stmt);
369 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
371 fprintf (vect_dump, "get vectype for scalar type: ");
372 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
375 vectype = get_vectype_for_scalar_type (scalar_type);
376 if (!vectype)
378 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
379 LOOP_LOC (loop_vinfo)))
381 fprintf (vect_dump, "not vectorized: unsupported data-type ");
382 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
384 return false;
386 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
388 fprintf (vect_dump, "vectype: ");
389 print_generic_expr (vect_dump, vectype, TDF_SLIM);
391 STMT_VINFO_VECTYPE (stmt_info) = vectype;
393 nunits = TYPE_VECTOR_SUBPARTS (vectype);
394 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
395 fprintf (vect_dump, "nunits = %d", nunits);
397 if (vectorization_factor)
399 /* FORNOW: don't allow mixed units.
400 This restriction will be relaxed in the future. */
401 if (nunits != vectorization_factor)
403 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
404 LOOP_LOC (loop_vinfo)))
405 fprintf (vect_dump, "not vectorized: mixed data-types");
406 return false;
409 else
410 vectorization_factor = nunits;
412 #ifdef ENABLE_CHECKING
413 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
414 * vectorization_factor == UNITS_PER_SIMD_WORD);
415 #endif
419 /* TODO: Analyze cost. Decide if worth while to vectorize. */
421 if (vectorization_factor <= 1)
423 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
424 LOOP_LOC (loop_vinfo)))
425 fprintf (vect_dump, "not vectorized: unsupported data-type");
426 return false;
428 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
430 return true;
434 /* Function vect_analyze_operations.
436 Scan the loop stmts and make sure they are all vectorizable. */
438 static bool
439 vect_analyze_operations (loop_vec_info loop_vinfo)
441 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
442 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
443 int nbbs = loop->num_nodes;
444 block_stmt_iterator si;
445 unsigned int vectorization_factor = 0;
446 int i;
447 bool ok;
449 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
450 fprintf (vect_dump, "=== vect_analyze_operations ===");
452 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
453 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
455 for (i = 0; i < nbbs; i++)
457 basic_block bb = bbs[i];
459 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
461 tree stmt = bsi_stmt (si);
462 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
464 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
466 fprintf (vect_dump, "==> examining statement: ");
467 print_generic_expr (vect_dump, stmt, TDF_SLIM);
470 gcc_assert (stmt_info);
472 /* skip stmts which do not need to be vectorized.
473 this is expected to include:
474 - the COND_EXPR which is the loop exit condition
475 - any LABEL_EXPRs in the loop
476 - computations that are used only for array indexing or loop
477 control */
479 if (!STMT_VINFO_RELEVANT_P (stmt_info))
481 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
482 fprintf (vect_dump, "irrelevant.");
483 continue;
486 #ifdef ENABLE_CHECKING
487 if (STMT_VINFO_RELEVANT_P (stmt_info))
489 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
490 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
492 #endif
494 ok = (vectorizable_operation (stmt, NULL, NULL)
495 || vectorizable_assignment (stmt, NULL, NULL)
496 || vectorizable_load (stmt, NULL, NULL)
497 || vectorizable_store (stmt, NULL, NULL)
498 || vectorizable_condition (stmt, NULL, NULL));
500 if (!ok)
502 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
503 LOOP_LOC (loop_vinfo)))
505 fprintf (vect_dump, "not vectorized: stmt not supported: ");
506 print_generic_expr (vect_dump, stmt, TDF_SLIM);
508 return false;
513 /* TODO: Analyze cost. Decide if worth while to vectorize. */
515 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
516 && vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
517 fprintf (vect_dump,
518 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
519 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
521 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
522 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
524 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
525 LOOP_LOC (loop_vinfo)))
526 fprintf (vect_dump, "not vectorized: iteration count too small.");
527 return false;
530 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
531 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
533 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
534 fprintf (vect_dump, "epilog loop required.");
535 if (!vect_can_advance_ivs_p (loop_vinfo))
537 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
538 LOOP_LOC (loop_vinfo)))
539 fprintf (vect_dump,
540 "not vectorized: can't create epilog loop 1.");
541 return false;
543 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
545 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
546 LOOP_LOC (loop_vinfo)))
547 fprintf (vect_dump,
548 "not vectorized: can't create epilog loop 2.");
549 return false;
553 return true;
557 /* Function exist_non_indexing_operands_for_use_p
559 USE is one of the uses attached to STMT. Check if USE is
560 used in STMT for anything other than indexing an array. */
562 static bool
563 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
565 tree operand;
566 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
568 /* USE corresponds to some operand in STMT. If there is no data
569 reference in STMT, then any operand that corresponds to USE
570 is not indexing an array. */
571 if (!STMT_VINFO_DATA_REF (stmt_info))
572 return true;
574 /* STMT has a data_ref. FORNOW this means that its of one of
575 the following forms:
576 -1- ARRAY_REF = var
577 -2- var = ARRAY_REF
578 (This should have been verified in analyze_data_refs).
580 'var' in the second case corresponds to a def, not a use,
581 so USE cannot correspond to any operands that are not used
582 for array indexing.
584 Therefore, all we need to check is if STMT falls into the
585 first case, and whether var corresponds to USE. */
587 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
588 return false;
590 operand = TREE_OPERAND (stmt, 1);
592 if (TREE_CODE (operand) != SSA_NAME)
593 return false;
595 if (operand == use)
596 return true;
598 return false;
602 /* Function vect_analyze_scalar_cycles.
604 Examine the cross iteration def-use cycles of scalar variables, by
605 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
606 cycles that they represent do not impede vectorization.
608 FORNOW: Reduction as in the following loop, is not supported yet:
609 loop1:
610 for (i=0; i<N; i++)
611 sum += a[i];
612 The cross-iteration cycle corresponding to variable 'sum' will be
613 considered too complicated and will impede vectorization.
615 FORNOW: Induction as in the following loop, is not supported yet:
616 loop2:
617 for (i=0; i<N; i++)
618 a[i] = i;
620 However, the following loop *is* vectorizable:
621 loop3:
622 for (i=0; i<N; i++)
623 a[i] = b[i];
625 In both loops there exists a def-use cycle for the variable i:
626 loop: i_2 = PHI (i_0, i_1)
627 a[i_2] = ...;
628 i_1 = i_2 + 1;
629 GOTO loop;
631 The evolution of the above cycle is considered simple enough,
632 however, we also check that the cycle does not need to be
633 vectorized, i.e - we check that the variable that this cycle
634 defines is only used for array indexing or in stmts that do not
635 need to be vectorized. This is not the case in loop2, but it
636 *is* the case in loop3. */
638 static bool
639 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
641 tree phi;
642 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
643 basic_block bb = loop->header;
644 tree dummy;
646 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
647 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
649 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
651 tree access_fn = NULL;
653 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
655 fprintf (vect_dump, "Analyze phi: ");
656 print_generic_expr (vect_dump, phi, TDF_SLIM);
659 /* Skip virtual phi's. The data dependences that are associated with
660 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
662 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
664 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
665 fprintf (vect_dump, "virtual phi. skip.");
666 continue;
669 /* Analyze the evolution function. */
671 /* FORNOW: The only scalar cross-iteration cycles that we allow are
672 those of loop induction variables; This property is verified here.
674 Furthermore, if that induction variable is used in an operation
675 that needs to be vectorized (i.e, is not solely used to index
676 arrays and check the exit condition) - we do not support its
677 vectorization yet. This property is verified in vect_is_simple_use,
678 during vect_analyze_operations. */
680 access_fn = /* instantiate_parameters
681 (loop,*/
682 analyze_scalar_evolution (loop, PHI_RESULT (phi));
684 if (!access_fn)
686 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
687 LOOP_LOC (loop_vinfo)))
688 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
689 return false;
692 if (vect_print_dump_info (REPORT_DETAILS,
693 LOOP_LOC (loop_vinfo)))
695 fprintf (vect_dump, "Access function of PHI: ");
696 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
699 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
701 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
702 LOOP_LOC (loop_vinfo)))
703 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
704 return false;
708 return true;
712 /* Function vect_base_addr_differ_p.
714 This is the simplest data dependence test: determines whether the
715 data references A and B access the same array/region. Returns
716 false when the property is not computable at compile time.
717 Otherwise return true, and DIFFER_P will record the result. This
718 utility will not be necessary when alias_sets_conflict_p will be
719 less conservative. */
721 static bool
722 vect_base_addr_differ_p (struct data_reference *dra,
723 struct data_reference *drb,
724 bool *differ_p)
726 tree stmt_a = DR_STMT (dra);
727 stmt_vec_info stmt_info_a = vinfo_for_stmt (stmt_a);
728 tree stmt_b = DR_STMT (drb);
729 stmt_vec_info stmt_info_b = vinfo_for_stmt (stmt_b);
730 tree addr_a = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_a);
731 tree addr_b = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_b);
732 tree type_a = TREE_TYPE (addr_a);
733 tree type_b = TREE_TYPE (addr_b);
734 HOST_WIDE_INT alias_set_a, alias_set_b;
736 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
738 /* Both references are ADDR_EXPR, i.e., we have the objects. */
739 if (TREE_CODE (addr_a) == ADDR_EXPR && TREE_CODE (addr_b) == ADDR_EXPR)
740 return array_base_name_differ_p (dra, drb, differ_p);
742 alias_set_a = (TREE_CODE (addr_a) == ADDR_EXPR) ?
743 get_alias_set (TREE_OPERAND (addr_a, 0)) : get_alias_set (addr_a);
744 alias_set_b = (TREE_CODE (addr_b) == ADDR_EXPR) ?
745 get_alias_set (TREE_OPERAND (addr_b, 0)) : get_alias_set (addr_b);
747 if (!alias_sets_conflict_p (alias_set_a, alias_set_b))
749 *differ_p = true;
750 return true;
753 /* An instruction writing through a restricted pointer is "independent" of any
754 instruction reading or writing through a different pointer, in the same
755 block/scope. */
756 else if ((TYPE_RESTRICT (type_a) && !DR_IS_READ (dra))
757 || (TYPE_RESTRICT (type_b) && !DR_IS_READ (drb)))
759 *differ_p = true;
760 return true;
762 return false;
766 /* Function vect_analyze_data_ref_dependence.
768 Return TRUE if there (might) exist a dependence between a memory-reference
769 DRA and a memory-reference DRB. */
771 static bool
772 vect_analyze_data_ref_dependence (struct data_reference *dra,
773 struct data_reference *drb,
774 loop_vec_info loop_vinfo)
776 bool differ_p;
777 struct data_dependence_relation *ddr;
778 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
779 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
780 int dist = 0;
781 unsigned int loop_depth = 0;
782 struct loop *loop_nest = loop;
785 if (!vect_base_addr_differ_p (dra, drb, &differ_p))
787 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
788 LOOP_LOC (loop_vinfo)))
790 fprintf (vect_dump,
791 "not vectorized: can't determine dependence between: ");
792 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
793 fprintf (vect_dump, " and ");
794 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
796 return true;
799 if (differ_p)
800 return false;
802 ddr = initialize_data_dependence_relation (dra, drb);
803 compute_affine_dependence (ddr);
805 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
806 return false;
808 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
810 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
811 LOOP_LOC (loop_vinfo)))
813 fprintf (vect_dump,
814 "not vectorized: can't determine dependence between ");
815 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
816 fprintf (vect_dump, " and ");
817 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
819 return true;
822 /* Find loop depth. */
823 while (loop_nest)
825 if (loop_nest->outer && loop_nest->outer->outer)
827 loop_nest = loop_nest->outer;
828 loop_depth++;
830 else
831 break;
834 /* Compute distance vector. */
835 compute_subscript_distance (ddr);
836 build_classic_dist_vector (ddr, vect_loops_num, loop_nest->depth);
838 if (!DDR_DIST_VECT (ddr))
840 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
841 LOOP_LOC (loop_vinfo)))
843 fprintf (vect_dump, "not vectorized: bad dist vector for ");
844 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
845 fprintf (vect_dump, " and ");
846 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
848 return true;
851 dist = DDR_DIST_VECT (ddr)[loop_depth];
853 /* Same loop iteration. */
854 if (dist == 0)
856 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
857 fprintf (vect_dump, "dependence distance 0.");
858 return false;
861 if (dist >= vectorization_factor)
862 /* Dependence distance does not create dependence, as far as vectorization
863 is concerned, in this case. */
864 return false;
866 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
867 LOOP_LOC (loop_vinfo)))
869 fprintf (vect_dump,
870 "not vectorized: possible dependence between data-refs ");
871 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
872 fprintf (vect_dump, " and ");
873 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
876 return true;
880 /* Function vect_analyze_data_ref_dependences.
882 Examine all the data references in the loop, and make sure there do not
883 exist any data dependences between them. */
885 static bool
886 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
888 unsigned int i, j;
889 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
890 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
892 /* Examine store-store (output) dependences. */
894 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
895 fprintf (vect_dump, "=== vect_analyze_dependences ===");
897 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
898 fprintf (vect_dump, "compare all store-store pairs.");
900 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
902 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
904 struct data_reference *dra =
905 VARRAY_GENERIC_PTR (loop_write_refs, i);
906 struct data_reference *drb =
907 VARRAY_GENERIC_PTR (loop_write_refs, j);
908 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
909 return false;
913 /* Examine load-store (true/anti) dependences. */
915 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
916 fprintf (vect_dump, "compare all load-store pairs.");
918 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
920 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
922 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
923 struct data_reference *drb =
924 VARRAY_GENERIC_PTR (loop_write_refs, j);
925 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
926 return false;
930 return true;
934 /* Function vect_compute_data_ref_alignment
936 Compute the misalignment of the data reference DR.
938 Output:
939 1. If during the misalignment computation it is found that the data reference
940 cannot be vectorized then false is returned.
941 2. DR_MISALIGNMENT (DR) is defined.
943 FOR NOW: No analysis is actually performed. Misalignment is calculated
944 only for trivial cases. TODO. */
946 static bool
947 vect_compute_data_ref_alignment (struct data_reference *dr)
949 tree stmt = DR_STMT (dr);
950 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
951 tree ref = DR_REF (dr);
952 tree vectype;
953 tree base, alignment;
954 bool base_aligned_p;
955 tree misalign;
957 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
958 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
960 /* Initialize misalignment to unknown. */
961 DR_MISALIGNMENT (dr) = -1;
963 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
964 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
965 base = build_fold_indirect_ref (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
966 vectype = STMT_VINFO_VECTYPE (stmt_info);
968 if (!misalign)
970 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
972 fprintf (vect_dump, "Unknown alignment for access: ");
973 print_generic_expr (vect_dump, base, TDF_SLIM);
975 return true;
978 if (!base_aligned_p)
980 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
982 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
984 fprintf (vect_dump, "can't force alignment of ref: ");
985 print_generic_expr (vect_dump, ref, TDF_SLIM);
987 return true;
990 /* Force the alignment of the decl.
991 NOTE: This is the only change to the code we make during
992 the analysis phase, before deciding to vectorize the loop. */
993 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
994 fprintf (vect_dump, "force alignment");
995 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
996 DECL_USER_ALIGN (base) = 1;
999 /* At this point we assume that the base is aligned. */
1000 gcc_assert (base_aligned_p
1001 || (TREE_CODE (base) == VAR_DECL
1002 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1004 /* Alignment required, in bytes: */
1005 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1007 /* Modulo alignment. */
1008 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1009 if (tree_int_cst_sgn (misalign) < 0)
1011 /* Negative misalignment value. */
1012 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1013 fprintf (vect_dump, "unexpected misalign value");
1014 return false;
1017 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
1019 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1020 fprintf (vect_dump, "misalign = %d bytes", DR_MISALIGNMENT (dr));
1022 return true;
1026 /* Function vect_compute_data_refs_alignment
1028 Compute the misalignment of data references in the loop.
1029 This pass may take place at function granularity instead of at loop
1030 granularity.
1032 FOR NOW: No analysis is actually performed. Misalignment is calculated
1033 only for trivial cases. TODO. */
1035 static bool
1036 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1038 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1039 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1040 unsigned int i;
1042 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1044 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1045 if (!vect_compute_data_ref_alignment (dr))
1046 return false;
1049 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1051 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1052 if (!vect_compute_data_ref_alignment (dr))
1053 return false;
1056 return true;
1060 /* Function vect_enhance_data_refs_alignment
1062 This pass will use loop versioning and loop peeling in order to enhance
1063 the alignment of data references in the loop.
1065 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1066 original loop is to be vectorized; Any other loops that are created by
1067 the transformations performed in this pass - are not supposed to be
1068 vectorized. This restriction will be relaxed. */
1070 static void
1071 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1073 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1074 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1075 varray_type datarefs;
1076 struct data_reference *dr0 = NULL;
1077 unsigned int i, j;
1079 /* Sigh, a hack to make targets that do not define UNITS_PER_SIMD_WORD
1080 bootstrap. Copy UNITS_PER_SIMD_WORD to a local variable to avoid a
1081 "division by zero" error. This error would be issued because we
1082 we do "... % UNITS_PER_SIMD_WORD" below, and UNITS_PER_SIMD_WORD
1083 defaults to 0 if it is not defined by the target. */
1084 int units_per_simd_word = UNITS_PER_SIMD_WORD;
1087 This pass will require a cost model to guide it whether to apply peeling
1088 or versioning or a combination of the two. For example, the scheme that
1089 intel uses when given a loop with several memory accesses, is as follows:
1090 choose one memory access ('p') which alignment you want to force by doing
1091 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1092 other accesses are not necessarily aligned, or (2) use loop versioning to
1093 generate one loop in which all accesses are aligned, and another loop in
1094 which only 'p' is necessarily aligned.
1096 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1097 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1098 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1100 Devising a cost model is the most critical aspect of this work. It will
1101 guide us on which access to peel for, whether to use loop versioning, how
1102 many versions to create, etc. The cost model will probably consist of
1103 generic considerations as well as target specific considerations (on
1104 powerpc for example, misaligned stores are more painful than misaligned
1105 loads).
1107 Here is the general steps involved in alignment enhancements:
1109 -- original loop, before alignment analysis:
1110 for (i=0; i<N; i++){
1111 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1112 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1115 -- After vect_compute_data_refs_alignment:
1116 for (i=0; i<N; i++){
1117 x = q[i]; # DR_MISALIGNMENT(q) = 3
1118 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1121 -- Possibility 1: we do loop versioning:
1122 if (p is aligned) {
1123 for (i=0; i<N; i++){ # loop 1A
1124 x = q[i]; # DR_MISALIGNMENT(q) = 3
1125 p[i] = y; # DR_MISALIGNMENT(p) = 0
1128 else {
1129 for (i=0; i<N; i++){ # loop 1B
1130 x = q[i]; # DR_MISALIGNMENT(q) = 3
1131 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1135 -- Possibility 2: we do loop peeling:
1136 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1137 x = q[i];
1138 p[i] = y;
1140 for (i = 3; i < N; i++){ # loop 2A
1141 x = q[i]; # DR_MISALIGNMENT(q) = 0
1142 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1145 -- Possibility 3: combination of loop peeling and versioning:
1146 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1147 x = q[i];
1148 p[i] = y;
1150 if (p is aligned) {
1151 for (i = 3; i<N; i++){ # loop 3A
1152 x = q[i]; # DR_MISALIGNMENT(q) = 0
1153 p[i] = y; # DR_MISALIGNMENT(p) = 0
1156 else {
1157 for (i = 3; i<N; i++){ # loop 3B
1158 x = q[i]; # DR_MISALIGNMENT(q) = 0
1159 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1163 These loops are later passed to loop_transform to be vectorized. The
1164 vectorizer will use the alignment information to guide the transformation
1165 (whether to generate regular loads/stores, or with special handling for
1166 misalignment).
1169 /* (1) Peeling to force alignment. */
1171 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1172 Considerations:
1173 + How many accesses will become aligned due to the peeling
1174 - How many accesses will become unaligned due to the peeling,
1175 and the cost of misaligned accesses.
1176 - The cost of peeling (the extra runtime checks, the increase
1177 in code size).
1179 The scheme we use FORNOW: peel to force the alignment of the first
1180 misaligned store in the loop.
1181 Rationale: misaligned stores are not yet supported.
1183 TODO: Use a better cost model. */
1185 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1187 dr0 = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1188 if (!aligned_access_p (dr0))
1190 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1191 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1192 break;
1196 /* (1.2) Update the alignment info according to the peeling factor.
1197 If the misalignment of the DR we peel for is M, then the
1198 peeling factor is VF - M, and the misalignment of each access DR_i
1199 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
1200 If the misalignment of the DR we peel for is unknown, then the
1201 misalignment of each access DR_i in the loop is also unknown.
1203 TODO: - consider accesses that are known to have the same
1204 alignment, even if that alignment is unknown. */
1206 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1208 int mis;
1209 int npeel = 0;
1211 if (known_alignment_for_access_p (dr0))
1213 /* Since it's known at compile time, compute the number of iterations
1214 in the peeled loop (the peeling factor) for use in updating
1215 DR_MISALIGNMENT values. The peeling factor is the vectorization
1216 factor minus the misalignment as an element count. */
1217 mis = DR_MISALIGNMENT (dr0);
1218 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1219 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1222 datarefs = loop_write_datarefs;
1223 for (j = 0; j < 2; j++)
1225 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1227 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1229 if (dr == dr0)
1230 continue;
1231 if (known_alignment_for_access_p (dr)
1232 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr0))
1233 DR_MISALIGNMENT (dr) = 0;
1234 else if (known_alignment_for_access_p (dr)
1235 && known_alignment_for_access_p (dr0))
1237 int drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1239 DR_MISALIGNMENT (dr) += npeel * drsize;
1240 DR_MISALIGNMENT (dr) %= units_per_simd_word;
1242 else
1243 DR_MISALIGNMENT (dr) = -1;
1245 datarefs = loop_read_datarefs;
1248 DR_MISALIGNMENT (dr0) = 0;
1253 /* Function vect_analyze_data_refs_alignment
1255 Analyze the alignment of the data-references in the loop.
1256 FOR NOW: Until support for misliagned accesses is in place, only if all
1257 accesses are aligned can the loop be vectorized. This restriction will be
1258 relaxed. */
1260 static bool
1261 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1263 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1264 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1265 enum dr_alignment_support supportable_dr_alignment;
1266 unsigned int i;
1268 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1269 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1272 /* This pass may take place at function granularity instead of at loop
1273 granularity. */
1275 if (!vect_compute_data_refs_alignment (loop_vinfo))
1277 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1278 LOOP_LOC (loop_vinfo)))
1279 fprintf (vect_dump,
1280 "not vectorized: can't calculate alignment for data ref.");
1281 return false;
1285 /* This pass will decide on using loop versioning and/or loop peeling in
1286 order to enhance the alignment of data references in the loop. */
1288 vect_enhance_data_refs_alignment (loop_vinfo);
1291 /* Finally, check that all the data references in the loop can be
1292 handled with respect to their alignment. */
1294 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1296 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1297 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1298 if (!supportable_dr_alignment)
1300 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1301 LOOP_LOC (loop_vinfo)))
1302 fprintf (vect_dump, "not vectorized: unsupported unaligned load.");
1303 return false;
1305 if (supportable_dr_alignment != dr_aligned
1306 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1307 fprintf (vect_dump, "Vectorizing an unaligned access.");
1309 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1311 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1312 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1313 if (!supportable_dr_alignment)
1315 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1316 LOOP_LOC (loop_vinfo)))
1317 fprintf (vect_dump, "not vectorized: unsupported unaligned store.");
1318 return false;
1320 if (supportable_dr_alignment != dr_aligned
1321 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1322 fprintf (vect_dump, "Vectorizing an unaligned access.");
1324 if (LOOP_VINFO_UNALIGNED_DR (loop_vinfo)
1325 && vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
1326 fprintf (vect_dump, "Alignment of access forced using peeling.");
1328 return true;
1332 /* Function vect_analyze_data_ref_access.
1334 Analyze the access pattern of the data-reference DR. For now, a data access
1335 has to consecutive to be considered vectorizable. */
1337 static bool
1338 vect_analyze_data_ref_access (struct data_reference *dr)
1340 tree stmt = DR_STMT (dr);
1341 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1342 tree step = STMT_VINFO_VECT_STEP (stmt_info);
1343 tree scalar_type = TREE_TYPE (DR_REF (dr));
1345 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1347 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1348 fprintf (vect_dump, "not consecutive access");
1349 return false;
1351 return true;
1355 /* Function vect_analyze_data_ref_accesses.
1357 Analyze the access pattern of all the data references in the loop.
1359 FORNOW: the only access pattern that is considered vectorizable is a
1360 simple step 1 (consecutive) access.
1362 FORNOW: handle only arrays and pointer accesses. */
1364 static bool
1365 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1367 unsigned int i;
1368 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1369 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1371 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1372 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1374 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1376 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1377 bool ok = vect_analyze_data_ref_access (dr);
1378 if (!ok)
1380 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1381 LOOP_LOC (loop_vinfo)))
1382 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1383 return false;
1387 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1389 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1390 bool ok = vect_analyze_data_ref_access (dr);
1391 if (!ok)
1393 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1394 LOOP_LOC (loop_vinfo)))
1395 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1396 return false;
1400 return true;
1404 /* Function vect_analyze_pointer_ref_access.
1406 Input:
1407 STMT - a stmt that contains a data-ref.
1408 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
1409 ACCESS_FN - the access function of MEMREF.
1411 Output:
1412 If the data-ref access is vectorizable, return a data_reference structure
1413 that represents it (DR). Otherwise - return NULL.
1414 STEP - the stride of MEMREF in the loop.
1415 INIT - the initial condition of MEMREF in the loop.
1418 static struct data_reference *
1419 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read,
1420 tree access_fn, tree *ptr_init, tree *ptr_step)
1422 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1423 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1424 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1425 tree step, init;
1426 tree reftype, innertype;
1427 tree indx_access_fn;
1428 int loopnum = loop->num;
1429 struct data_reference *dr;
1431 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step))
1433 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1434 LOOP_LOC (loop_vinfo)))
1435 fprintf (vect_dump, "not vectorized: pointer access is not simple.");
1436 return NULL;
1439 STRIP_NOPS (init);
1441 if (!expr_invariant_in_loop_p (loop, init))
1443 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1444 LOOP_LOC (loop_vinfo)))
1445 fprintf (vect_dump,
1446 "not vectorized: initial condition is not loop invariant.");
1447 return NULL;
1450 if (TREE_CODE (step) != INTEGER_CST)
1452 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1453 LOOP_LOC (loop_vinfo)))
1454 fprintf (vect_dump,
1455 "not vectorized: non constant step for pointer access.");
1456 return NULL;
1459 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
1460 if (!POINTER_TYPE_P (reftype))
1462 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1463 LOOP_LOC (loop_vinfo)))
1464 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1465 return NULL;
1468 if (!POINTER_TYPE_P (TREE_TYPE (init)))
1470 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1471 LOOP_LOC (loop_vinfo)))
1472 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1473 return NULL;
1476 *ptr_step = fold_convert (ssizetype, step);
1477 innertype = TREE_TYPE (reftype);
1478 if (!COMPLETE_TYPE_P (innertype))
1480 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1481 LOOP_LOC (loop_vinfo)))
1482 fprintf (vect_dump, "not vectorized: pointer to incomplete type.");
1483 return NULL;
1486 /* Check that STEP is a multiple of type size. */
1487 if (!integer_zerop (size_binop (TRUNC_MOD_EXPR, *ptr_step,
1488 fold_convert (ssizetype, TYPE_SIZE_UNIT (innertype)))))
1490 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1491 LOOP_LOC (loop_vinfo)))
1492 fprintf (vect_dump, "not vectorized: non consecutive access.");
1493 return NULL;
1496 indx_access_fn =
1497 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
1498 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1500 fprintf (vect_dump, "Access function of ptr indx: ");
1501 print_generic_expr (vect_dump, indx_access_fn, TDF_SLIM);
1503 dr = init_data_ref (stmt, memref, NULL_TREE, indx_access_fn, is_read);
1504 *ptr_init = init;
1505 return dr;
1509 /* Function vect_address_analysis
1511 Return the BASE of the address expression EXPR.
1512 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1514 Input:
1515 EXPR - the address expression that is being analyzed
1516 STMT - the statement that contains EXPR or its original memory reference
1517 IS_READ - TRUE if STMT reads from EXPR, FALSE if writes to EXPR
1518 VECTYPE - the type that defines the alignment (i.e, we compute
1519 alignment relative to TYPE_ALIGN(VECTYPE))
1520 DR - data_reference struct for the original memory reference
1522 Output:
1523 BASE (returned value) - the base of the data reference EXPR.
1524 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1525 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1526 computation is impossible
1527 STEP - evolution of EXPR in the loop
1528 BASE_ALIGNED - indicates if BASE is aligned
1530 If something unexpected is encountered (an unsupported form of data-ref),
1531 then NULL_TREE is returned.
1534 static tree
1535 vect_address_analysis (tree expr, tree stmt, bool is_read, tree vectype,
1536 struct data_reference *dr, tree *offset, tree *misalign,
1537 tree *step, bool *base_aligned)
1539 tree oprnd0, oprnd1, base_address, offset_expr, base_addr0, base_addr1;
1540 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1541 tree dummy;
1542 struct ptr_info_def *dummy1;
1543 subvar_t dummy2;
1545 switch (TREE_CODE (expr))
1547 case PLUS_EXPR:
1548 case MINUS_EXPR:
1549 /* EXPR is of form {base +/- offset} (or {offset +/- base}). */
1550 oprnd0 = TREE_OPERAND (expr, 0);
1551 oprnd1 = TREE_OPERAND (expr, 1);
1553 STRIP_NOPS (oprnd0);
1554 STRIP_NOPS (oprnd1);
1556 /* Recursively try to find the base of the address contained in EXPR.
1557 For offset, the returned base will be NULL. */
1558 base_addr0 = vect_address_analysis (oprnd0, stmt, is_read, vectype, dr,
1559 &address_offset, &address_misalign, step,
1560 base_aligned);
1562 base_addr1 = vect_address_analysis (oprnd1, stmt, is_read, vectype, dr,
1563 &address_offset, &address_misalign, step,
1564 base_aligned);
1566 /* We support cases where only one of the operands contains an
1567 address. */
1568 if ((base_addr0 && base_addr1) || (!base_addr0 && !base_addr1))
1569 return NULL_TREE;
1571 /* To revert STRIP_NOPS. */
1572 oprnd0 = TREE_OPERAND (expr, 0);
1573 oprnd1 = TREE_OPERAND (expr, 1);
1575 offset_expr = base_addr0 ?
1576 fold_convert (ssizetype, oprnd1) : fold_convert (ssizetype, oprnd0);
1578 /* EXPR is of form {base +/- offset} (or {offset +/- base}). If offset is
1579 a number, we can add it to the misalignment value calculated for base,
1580 otherwise, misalignment is NULL. */
1581 if (TREE_CODE (offset_expr) == INTEGER_CST && address_misalign)
1582 *misalign = size_binop (TREE_CODE (expr), address_misalign,
1583 offset_expr);
1584 else
1585 *misalign = NULL_TREE;
1587 /* Combine offset (from EXPR {base + offset}) with the offset calculated
1588 for base. */
1589 *offset = size_binop (TREE_CODE (expr), address_offset, offset_expr);
1590 return base_addr0 ? base_addr0 : base_addr1;
1592 case ADDR_EXPR:
1593 base_address = vect_object_analysis (TREE_OPERAND (expr, 0), stmt,
1594 is_read, vectype, &dr, offset,
1595 misalign, step, base_aligned,
1596 &dummy, &dummy1, &dummy2);
1597 return base_address;
1599 case SSA_NAME:
1600 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1601 return NULL_TREE;
1603 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1605 if (vect_get_ptr_offset (expr, vectype, misalign))
1606 *base_aligned = true;
1607 else
1608 *base_aligned = false;
1610 else
1612 *base_aligned = true;
1613 *misalign = ssize_int (0);
1615 *offset = ssize_int (0);
1616 *step = ssize_int (0);
1617 return expr;
1619 default:
1620 return NULL_TREE;
1625 /* Function vect_object_analysis
1627 Return the BASE of the data reference MEMREF.
1628 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1629 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1630 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1631 instantiated with initial_conditions of access_functions of variables,
1632 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1634 Function get_inner_reference is used for the above in case of ARRAY_REF and
1635 COMPONENT_REF.
1637 The structure of the function is as follows:
1638 Part 1:
1639 Case 1. For handled_component_p refs
1640 1.1 call get_inner_reference
1641 1.1.1 analyze offset expr received from get_inner_reference
1642 1.2. build data-reference structure for MEMREF
1643 (fall through with BASE)
1644 Case 2. For declarations
1645 2.1 check alignment
1646 2.2 update DR_BASE_NAME if necessary for alias
1647 Case 3. For INDIRECT_REFs
1648 3.1 get the access function
1649 3.2 analyze evolution of MEMREF
1650 3.3 set data-reference structure for MEMREF
1651 3.4 call vect_address_analysis to analyze INIT of the access function
1653 Part 2:
1654 Combine the results of object and address analysis to calculate
1655 INITIAL_OFFSET, STEP and misalignment info.
1657 Input:
1658 MEMREF - the memory reference that is being analyzed
1659 STMT - the statement that contains MEMREF
1660 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
1661 VECTYPE - the type that defines the alignment (i.e, we compute
1662 alignment relative to TYPE_ALIGN(VECTYPE))
1664 Output:
1665 BASE_ADDRESS (returned value) - the base address of the data reference MEMREF
1666 E.g, if MEMREF is a.b[k].c[i][j] the returned
1667 base is &a.
1668 DR - data_reference struct for MEMREF
1669 INITIAL_OFFSET - initial offset of MEMREF from BASE (an expression)
1670 MISALIGN - offset of MEMREF from BASE in bytes (a constant) or NULL_TREE if
1671 the computation is impossible
1672 STEP - evolution of the DR_REF in the loop
1673 BASE_ALIGNED - indicates if BASE is aligned
1674 MEMTAG - memory tag for aliasing purposes
1675 PTR_INFO - NULL or points-to aliasing info from a pointer SSA_NAME
1676 SUBVAR - Sub-variables of the variable
1678 If something unexpected is encountered (an unsupported form of data-ref),
1679 then NULL_TREE is returned. */
1681 static tree
1682 vect_object_analysis (tree memref, tree stmt, bool is_read,
1683 tree vectype, struct data_reference **dr,
1684 tree *offset, tree *misalign, tree *step,
1685 bool *base_aligned, tree *memtag,
1686 struct ptr_info_def **ptr_info, subvar_t *subvars)
1688 tree base = NULL_TREE, base_address = NULL_TREE;
1689 tree object_offset = ssize_int (0), object_misalign = ssize_int (0);
1690 tree object_step = ssize_int (0), address_step = ssize_int (0);
1691 bool object_base_aligned = true, address_base_aligned = true;
1692 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1693 HOST_WIDE_INT pbitsize, pbitpos;
1694 tree poffset, bit_pos_in_bytes;
1695 enum machine_mode pmode;
1696 int punsignedp, pvolatilep;
1697 tree ptr_step = ssize_int (0), ptr_init = NULL_TREE;
1698 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1699 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1700 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1701 struct data_reference *ptr_dr = NULL;
1702 tree access_fn, evolution_part, address_to_analyze;
1704 *ptr_info = NULL;
1706 /* Part 1: */
1707 /* Case 1. handled_component_p refs. */
1708 if (handled_component_p (memref))
1710 /* 1.1 call get_inner_reference. */
1711 /* Find the base and the offset from it. */
1712 base = get_inner_reference (memref, &pbitsize, &pbitpos, &poffset,
1713 &pmode, &punsignedp, &pvolatilep, false);
1714 if (!base)
1715 return NULL_TREE;
1717 /* 1.1.1 analyze offset expr received from get_inner_reference. */
1718 if (poffset
1719 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1720 &object_offset, &object_misalign, &object_step))
1722 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1724 fprintf (vect_dump, "failed to compute offset or step for ");
1725 print_generic_expr (vect_dump, memref, TDF_SLIM);
1727 return NULL_TREE;
1730 /* Add bit position to OFFSET and MISALIGN. */
1732 bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT);
1733 /* Check that there is no remainder in bits. */
1734 if (pbitpos%BITS_PER_UNIT)
1736 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1737 fprintf (vect_dump, "bit offset alignment.");
1738 return NULL_TREE;
1740 object_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes, object_offset);
1741 if (object_misalign)
1742 object_misalign = size_binop (PLUS_EXPR, object_misalign,
1743 bit_pos_in_bytes);
1745 /* Create data-reference for MEMREF. TODO: handle COMPONENT_REFs. */
1746 if (!(*dr))
1748 if (TREE_CODE (memref) == ARRAY_REF)
1749 *dr = analyze_array (stmt, memref, is_read);
1750 else
1751 /* FORNOW. */
1752 return NULL_TREE;
1754 memref = base; /* To continue analysis of BASE. */
1755 /* fall through */
1758 /* Part 1: Case 2. Declarations. */
1759 if (DECL_P (memref))
1761 /* We expect to get a decl only if we already have a DR. */
1762 if (!(*dr))
1764 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1766 fprintf (vect_dump, "unhandled decl ");
1767 print_generic_expr (vect_dump, memref, TDF_SLIM);
1769 return NULL_TREE;
1772 /* 2.1 check the alignment. */
1773 if (DECL_ALIGN (memref) >= TYPE_ALIGN (vectype))
1774 object_base_aligned = true;
1775 else
1776 object_base_aligned = false;
1778 /* 2.2 update DR_BASE_NAME if necessary. */
1779 if (!DR_BASE_NAME ((*dr)))
1780 /* For alias analysis. In case the analysis of INDIRECT_REF brought
1781 us to object. */
1782 DR_BASE_NAME ((*dr)) = memref;
1784 if (SSA_VAR_P (memref) && var_can_have_subvars (memref))
1785 *subvars = get_subvars_for_var (memref);
1786 base_address = build_fold_addr_expr (memref);
1787 *memtag = memref;
1790 /* Part 1: Case 3. INDIRECT_REFs. */
1791 else if (TREE_CODE (memref) == INDIRECT_REF)
1793 tree ptr_ref = TREE_OPERAND (memref, 0);
1794 if (TREE_CODE (ptr_ref) == SSA_NAME)
1795 *ptr_info = SSA_NAME_PTR_INFO (ptr_ref);
1797 /* 3.1 get the access function. */
1798 access_fn = analyze_scalar_evolution (loop, ptr_ref);
1799 if (!access_fn)
1801 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1802 LOOP_LOC (loop_vinfo)))
1803 fprintf (vect_dump, "not vectorized: complicated pointer access.");
1804 return NULL_TREE;
1806 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1808 fprintf (vect_dump, "Access function of ptr: ");
1809 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1812 /* 3.2 analyze evolution of MEMREF. */
1813 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1814 if (evolution_part)
1816 ptr_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read,
1817 access_fn, &ptr_init, &ptr_step);
1818 if (!(ptr_dr))
1819 return NULL_TREE;
1821 object_step = size_binop (PLUS_EXPR, object_step, ptr_step);
1822 address_to_analyze = ptr_init;
1824 else
1826 if (!(*dr))
1828 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1829 LOOP_LOC (loop_vinfo)))
1830 fprintf (vect_dump, "not vectorized: ptr is loop invariant.");
1831 return NULL_TREE;
1833 /* Since there exists DR for MEMREF, we are analyzing the init of
1834 the access function, which not necessary has evolution in the
1835 loop. */
1836 address_to_analyze = initial_condition_in_loop_num (access_fn,
1837 loop->num);
1840 /* 3.3 set data-reference structure for MEMREF. */
1841 *dr = (*dr) ? *dr : ptr_dr;
1843 /* 3.4 call vect_address_analysis to analyze INIT of the access
1844 function. */
1845 base_address = vect_address_analysis (address_to_analyze, stmt, is_read,
1846 vectype, *dr, &address_offset, &address_misalign,
1847 &address_step, &address_base_aligned);
1848 if (!base_address)
1849 return NULL_TREE;
1851 switch (TREE_CODE (base_address))
1853 case SSA_NAME:
1854 *memtag = get_var_ann (SSA_NAME_VAR (base_address))->type_mem_tag;
1855 if (!(*memtag) && TREE_CODE (TREE_OPERAND (memref, 0)) == SSA_NAME)
1856 *memtag = get_var_ann (
1857 SSA_NAME_VAR (TREE_OPERAND (memref, 0)))->type_mem_tag;
1858 break;
1859 case ADDR_EXPR:
1860 *memtag = TREE_OPERAND (base_address, 0);
1861 break;
1862 default:
1863 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1864 LOOP_LOC (loop_vinfo)))
1866 fprintf (vect_dump, "not vectorized: no memtag ref: ");
1867 print_generic_expr (vect_dump, memref, TDF_SLIM);
1869 return NULL_TREE;
1873 if (!base_address)
1874 /* MEMREF cannot be analyzed. */
1875 return NULL_TREE;
1877 if (SSA_VAR_P (*memtag) && var_can_have_subvars (*memtag))
1878 *subvars = get_subvars_for_var (*memtag);
1880 /* Part 2: Combine the results of object and address analysis to calculate
1881 INITIAL_OFFSET, STEP and misalignment info. */
1882 *offset = size_binop (PLUS_EXPR, object_offset, address_offset);
1883 if (object_misalign && address_misalign)
1884 *misalign = size_binop (PLUS_EXPR, object_misalign, address_misalign);
1885 else
1886 *misalign = NULL_TREE;
1887 *step = size_binop (PLUS_EXPR, object_step, address_step);
1888 *base_aligned = object_base_aligned && address_base_aligned;
1890 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1892 fprintf (vect_dump, "Results of object analysis for: ");
1893 print_generic_expr (vect_dump, memref, TDF_SLIM);
1894 fprintf (vect_dump, "\n\tbase_address: ");
1895 print_generic_expr (vect_dump, base_address, TDF_SLIM);
1896 fprintf (vect_dump, "\n\toffset: ");
1897 print_generic_expr (vect_dump, *offset, TDF_SLIM);
1898 fprintf (vect_dump, "\n\tstep: ");
1899 print_generic_expr (vect_dump, *step, TDF_SLIM);
1900 fprintf (vect_dump, "\n\tbase aligned %d\n\tmisalign: ", *base_aligned);
1901 print_generic_expr (vect_dump, *misalign, TDF_SLIM);
1903 return base_address;
1907 /* Function vect_analyze_data_refs.
1909 Find all the data references in the loop.
1911 The general structure of the analysis of data refs in the vectorizer is as
1912 follows:
1913 1- vect_analyze_data_refs(loop):
1914 Find and analyze all data-refs in the loop:
1915 foreach ref
1916 base_address = vect_object_analysis(ref)
1917 1.1- vect_object_analysis(ref):
1918 Analyze ref, and build a DR (data_referece struct) for it;
1919 compute base, initial_offset, step and alignment.
1920 Call get_inner_reference for refs handled in this function.
1921 Call vect_addr_analysis(addr) to analyze pointer type expressions.
1922 Set ref_stmt.base, ref_stmt.initial_offset, ref_stmt.alignment,
1923 ref_stmt.memtag, ref_stmt.ptr_info and ref_stmt.step accordingly.
1924 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
1925 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1926 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1928 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
1929 which base is really an array (not a pointer) and which alignment
1930 can be forced. This restriction will be relaxed. */
1932 static bool
1933 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1935 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1936 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1937 int nbbs = loop->num_nodes;
1938 block_stmt_iterator si;
1939 int j;
1940 struct data_reference *dr;
1942 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1943 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1945 for (j = 0; j < nbbs; j++)
1947 basic_block bb = bbs[j];
1948 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1950 bool is_read = false;
1951 tree stmt = bsi_stmt (si);
1952 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1953 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1954 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1955 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1956 varray_type *datarefs = NULL;
1957 int nvuses, nv_may_defs, nv_must_defs;
1958 tree memref = NULL;
1959 tree scalar_type, vectype;
1960 tree base, offset, misalign, step, tag;
1961 struct ptr_info_def *ptr_info;
1962 bool base_aligned;
1963 subvar_t subvars = NULL;
1965 /* Assumption: there exists a data-ref in stmt, if and only if
1966 it has vuses/vdefs. */
1968 if (!vuses && !v_may_defs && !v_must_defs)
1969 continue;
1971 nvuses = NUM_VUSES (vuses);
1972 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1973 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1975 if (nvuses && (nv_may_defs || nv_must_defs))
1977 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1979 fprintf (vect_dump, "unexpected vdefs and vuses in stmt: ");
1980 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1982 return false;
1985 if (TREE_CODE (stmt) != MODIFY_EXPR)
1987 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1989 fprintf (vect_dump, "unexpected vops in stmt: ");
1990 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1992 return false;
1995 if (vuses)
1997 memref = TREE_OPERAND (stmt, 1);
1998 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
1999 is_read = true;
2001 else /* vdefs */
2003 memref = TREE_OPERAND (stmt, 0);
2004 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
2005 is_read = false;
2008 scalar_type = TREE_TYPE (memref);
2009 vectype = get_vectype_for_scalar_type (scalar_type);
2010 if (!vectype)
2012 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2014 fprintf (vect_dump, "no vectype for stmt: ");
2015 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2016 fprintf (vect_dump, " scalar_type: ");
2017 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2019 /* It is not possible to vectorize this data reference. */
2020 return false;
2022 /* Analyze MEMREF. If it is of a supported form, build data_reference
2023 struct for it (DR). */
2024 dr = NULL;
2025 base = vect_object_analysis (memref, stmt, is_read, vectype, &dr,
2026 &offset, &misalign, &step,
2027 &base_aligned, &tag, &ptr_info,
2028 &subvars);
2029 if (!base)
2031 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2032 LOOP_LOC (loop_vinfo)))
2034 fprintf (vect_dump, "not vectorized: unhandled data ref: ");
2035 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2037 return false;
2039 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info) = base;
2040 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
2041 STMT_VINFO_VECT_STEP (stmt_info) = step;
2042 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
2043 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned;
2044 STMT_VINFO_MEMTAG (stmt_info) = tag;
2045 STMT_VINFO_PTR_INFO (stmt_info) = ptr_info;
2046 STMT_VINFO_SUBVARS (stmt_info) = subvars;
2047 STMT_VINFO_VECTYPE (stmt_info) = vectype;
2048 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
2049 STMT_VINFO_DATA_REF (stmt_info) = dr;
2053 return true;
2057 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2059 /* Function vect_mark_relevant.
2061 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2063 static void
2064 vect_mark_relevant (varray_type *worklist, tree stmt)
2066 stmt_vec_info stmt_info;
2068 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2069 fprintf (vect_dump, "mark relevant.");
2071 if (TREE_CODE (stmt) == PHI_NODE)
2073 VARRAY_PUSH_TREE (*worklist, stmt);
2074 return;
2077 stmt_info = vinfo_for_stmt (stmt);
2079 if (!stmt_info)
2081 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2083 fprintf (vect_dump, "mark relevant: no stmt info!!.");
2084 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2086 return;
2089 if (STMT_VINFO_RELEVANT_P (stmt_info))
2091 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2092 fprintf (vect_dump, "already marked relevant.");
2093 return;
2096 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
2097 VARRAY_PUSH_TREE (*worklist, stmt);
2101 /* Function vect_stmt_relevant_p.
2103 Return true if STMT in loop that is represented by LOOP_VINFO is
2104 "relevant for vectorization".
2106 A stmt is considered "relevant for vectorization" if:
2107 - it has uses outside the loop.
2108 - it has vdefs (it alters memory).
2109 - control stmts in the loop (except for the exit condition).
2111 CHECKME: what other side effects would the vectorizer allow? */
2113 static bool
2114 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
2116 v_may_def_optype v_may_defs;
2117 v_must_def_optype v_must_defs;
2118 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2119 ssa_op_iter op_iter;
2120 imm_use_iterator imm_iter;
2121 use_operand_p use_p;
2122 tree var;
2124 /* cond stmt other than loop exit cond. */
2125 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2126 return true;
2128 /* changing memory. */
2129 if (TREE_CODE (stmt) == PHI_NODE)
2131 if (!is_gimple_reg (PHI_RESULT (stmt)))
2132 return false;
2133 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, PHI_RESULT (stmt))
2135 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2136 if (!flow_bb_inside_loop_p (loop, bb))
2138 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2139 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2140 return true;
2143 return false;
2146 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2147 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2148 if (v_may_defs || v_must_defs)
2150 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2151 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2152 return true;
2155 /* uses outside the loop. */
2156 FOR_EACH_SSA_TREE_OPERAND (var, stmt, op_iter, SSA_OP_DEF)
2158 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
2160 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2161 if (!flow_bb_inside_loop_p (loop, bb))
2163 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2164 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2165 return true;
2170 return false;
2174 /* Function vect_mark_stmts_to_be_vectorized.
2176 Not all stmts in the loop need to be vectorized. For example:
2178 for i...
2179 for j...
2180 1. T0 = i + j
2181 2. T1 = a[T0]
2183 3. j = j + 1
2185 Stmt 1 and 3 do not need to be vectorized, because loop control and
2186 addressing of vectorized data-refs are handled differently.
2188 This pass detects such stmts. */
2190 static bool
2191 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2193 varray_type worklist;
2194 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2195 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2196 unsigned int nbbs = loop->num_nodes;
2197 block_stmt_iterator si;
2198 tree stmt;
2199 stmt_ann_t ann;
2200 unsigned int i;
2201 int j;
2202 use_optype use_ops;
2203 stmt_vec_info stmt_info;
2204 basic_block bb;
2205 tree phi;
2207 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2208 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2210 bb = loop->header;
2211 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2213 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2215 fprintf (vect_dump, "init: phi relevant? ");
2216 print_generic_expr (vect_dump, phi, TDF_SLIM);
2219 if (vect_stmt_relevant_p (phi, loop_vinfo))
2221 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2222 LOOP_LOC (loop_vinfo)))
2223 fprintf (vect_dump, "unsupported reduction/induction.");
2224 return false;
2228 VARRAY_TREE_INIT (worklist, 64, "work list");
2230 /* 1. Init worklist. */
2232 for (i = 0; i < nbbs; i++)
2234 bb = bbs[i];
2235 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2237 stmt = bsi_stmt (si);
2239 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2241 fprintf (vect_dump, "init: stmt relevant? ");
2242 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2245 stmt_info = vinfo_for_stmt (stmt);
2246 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
2248 if (vect_stmt_relevant_p (stmt, loop_vinfo))
2249 vect_mark_relevant (&worklist, stmt);
2254 /* 2. Process_worklist */
2256 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
2258 stmt = VARRAY_TOP_TREE (worklist);
2259 VARRAY_POP (worklist);
2261 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2263 fprintf (vect_dump, "worklist: examine stmt: ");
2264 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2267 /* Examine the USES in this statement. Mark all the statements which
2268 feed this statement's uses as "relevant", unless the USE is used as
2269 an array index. */
2271 if (TREE_CODE (stmt) == PHI_NODE)
2273 /* follow the def-use chain inside the loop. */
2274 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
2276 tree arg = PHI_ARG_DEF (stmt, j);
2277 tree def_stmt = NULL_TREE;
2278 basic_block bb;
2279 if (!vect_is_simple_use (arg, loop_vinfo, &def_stmt))
2281 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2282 LOOP_LOC (loop_vinfo)))
2283 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2284 varray_clear (worklist);
2285 return false;
2287 if (!def_stmt)
2288 continue;
2290 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2292 fprintf (vect_dump, "worklist: def_stmt: ");
2293 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2296 bb = bb_for_stmt (def_stmt);
2297 if (flow_bb_inside_loop_p (loop, bb))
2298 vect_mark_relevant (&worklist, def_stmt);
2302 ann = stmt_ann (stmt);
2303 use_ops = USE_OPS (ann);
2305 for (i = 0; i < NUM_USES (use_ops); i++)
2307 tree use = USE_OP (use_ops, i);
2309 /* We are only interested in uses that need to be vectorized. Uses
2310 that are used for address computation are not considered relevant.
2312 if (exist_non_indexing_operands_for_use_p (use, stmt))
2314 tree def_stmt = NULL_TREE;
2315 basic_block bb;
2316 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt))
2318 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2319 LOOP_LOC (loop_vinfo)))
2320 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2321 varray_clear (worklist);
2322 return false;
2325 if (!def_stmt)
2326 continue;
2328 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2330 fprintf (vect_dump, "worklist: examine use %d: ", i);
2331 print_generic_expr (vect_dump, use, TDF_SLIM);
2334 bb = bb_for_stmt (def_stmt);
2335 if (flow_bb_inside_loop_p (loop, bb))
2336 vect_mark_relevant (&worklist, def_stmt);
2339 } /* while worklist */
2341 varray_clear (worklist);
2342 return true;
2346 /* Function vect_can_advance_ivs_p
2348 In case the number of iterations that LOOP iterates in unknown at compile
2349 time, an epilog loop will be generated, and the loop induction variables
2350 (IVs) will be "advanced" to the value they are supposed to take just before
2351 the epilog loop. Here we check that the access function of the loop IVs
2352 and the expression that represents the loop bound are simple enough.
2353 These restrictions will be relaxed in the future. */
2355 static bool
2356 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2358 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2359 basic_block bb = loop->header;
2360 tree phi;
2362 /* Analyze phi functions of the loop header. */
2364 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2366 tree access_fn = NULL;
2367 tree evolution_part;
2369 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2371 fprintf (vect_dump, "Analyze phi: ");
2372 print_generic_expr (vect_dump, phi, TDF_SLIM);
2375 /* Skip virtual phi's. The data dependences that are associated with
2376 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2378 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2380 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2381 fprintf (vect_dump, "virtual phi. skip.");
2382 continue;
2385 /* Analyze the evolution function. */
2387 access_fn = instantiate_parameters
2388 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2390 if (!access_fn)
2392 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2393 fprintf (vect_dump, "No Access function.");
2394 return false;
2397 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2399 fprintf (vect_dump, "Access function of PHI: ");
2400 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2403 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2405 if (evolution_part == NULL_TREE)
2406 return false;
2408 /* FORNOW: We do not transform initial conditions of IVs
2409 which evolution functions are a polynomial of degree >= 2. */
2411 if (tree_is_chrec (evolution_part))
2412 return false;
2415 return true;
2419 /* Function vect_get_loop_niters.
2421 Determine how many iterations the loop is executed.
2422 If an expression that represents the number of iterations
2423 can be constructed, place it in NUMBER_OF_ITERATIONS.
2424 Return the loop exit condition. */
2426 static tree
2427 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2429 tree niters;
2431 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2432 fprintf (vect_dump, "=== get_loop_niters ===");
2434 niters = number_of_iterations_in_loop (loop);
2436 if (niters != NULL_TREE
2437 && niters != chrec_dont_know)
2439 *number_of_iterations = niters;
2441 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2443 fprintf (vect_dump, "==> get_loop_niters:" );
2444 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2448 return get_loop_exit_condition (loop);
2452 /* Function vect_analyze_loop_form.
2454 Verify the following restrictions (some may be relaxed in the future):
2455 - it's an inner-most loop
2456 - number of BBs = 2 (which are the loop header and the latch)
2457 - the loop has a pre-header
2458 - the loop has a single entry and exit
2459 - the loop exit condition is simple enough, and the number of iterations
2460 can be analyzed (a countable loop). */
2462 static loop_vec_info
2463 vect_analyze_loop_form (struct loop *loop)
2465 loop_vec_info loop_vinfo;
2466 tree loop_cond;
2467 tree number_of_iterations = NULL;
2468 LOC loop_loc;
2470 loop_loc = find_loop_location (loop);
2472 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2473 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2475 if (loop->inner)
2477 if (vect_print_dump_info (REPORT_OUTER_LOOPS, loop_loc))
2478 fprintf (vect_dump, "not vectorized: nested loop.");
2479 return NULL;
2482 if (!loop->single_exit
2483 || loop->num_nodes != 2
2484 || EDGE_COUNT (loop->header->preds) != 2)
2486 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2488 if (!loop->single_exit)
2489 fprintf (vect_dump, "not vectorized: multiple exits.");
2490 else if (loop->num_nodes != 2)
2491 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2492 else if (EDGE_COUNT (loop->header->preds) != 2)
2493 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2496 return NULL;
2499 /* We assume that the loop exit condition is at the end of the loop. i.e,
2500 that the loop is represented as a do-while (with a proper if-guard
2501 before the loop if needed), where the loop header contains all the
2502 executable statements, and the latch is empty. */
2503 if (!empty_block_p (loop->latch))
2505 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2506 fprintf (vect_dump, "not vectorized: unexpectd loop form.");
2507 return NULL;
2510 /* Make sure there exists a single-predecessor exit bb: */
2511 if (!single_pred_p (loop->single_exit->dest))
2513 edge e = loop->single_exit;
2514 if (!(e->flags & EDGE_ABNORMAL))
2516 split_loop_exit_edge (e);
2517 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2518 fprintf (vect_dump, "split exit edge.");
2520 else
2522 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2523 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2524 return NULL;
2528 if (empty_block_p (loop->header))
2530 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2531 fprintf (vect_dump, "not vectorized: empty loop.");
2532 return NULL;
2535 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2536 if (!loop_cond)
2538 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2539 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2540 return NULL;
2543 if (!number_of_iterations)
2545 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2546 fprintf (vect_dump,
2547 "not vectorized: number of iterations cannot be computed.");
2548 return NULL;
2551 if (chrec_contains_undetermined (number_of_iterations))
2553 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2554 fprintf (vect_dump, "Infinite number of iterations.");
2555 return false;
2558 loop_vinfo = new_loop_vec_info (loop);
2559 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2561 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2563 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2565 fprintf (vect_dump, "Symbolic number of iterations is ");
2566 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2569 else
2570 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2572 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, loop_loc))
2573 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2574 return NULL;
2577 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2578 LOOP_VINFO_LOC (loop_vinfo) = loop_loc;
2580 return loop_vinfo;
2584 /* Function vect_analyze_loop.
2586 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2587 for it. The different analyses will record information in the
2588 loop_vec_info struct. */
2589 loop_vec_info
2590 vect_analyze_loop (struct loop *loop)
2592 bool ok;
2593 loop_vec_info loop_vinfo;
2595 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2596 fprintf (vect_dump, "===== analyze_loop_nest =====");
2598 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2600 loop_vinfo = vect_analyze_loop_form (loop);
2601 if (!loop_vinfo)
2603 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2604 fprintf (vect_dump, "bad loop form.");
2605 return NULL;
2608 /* Find all data references in the loop (which correspond to vdefs/vuses)
2609 and analyze their evolution in the loop.
2611 FORNOW: Handle only simple, array references, which
2612 alignment can be forced, and aligned pointer-references. */
2614 ok = vect_analyze_data_refs (loop_vinfo);
2615 if (!ok)
2617 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2618 fprintf (vect_dump, "bad data references.");
2619 destroy_loop_vec_info (loop_vinfo);
2620 return NULL;
2623 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2625 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2626 if (!ok)
2628 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2629 fprintf (vect_dump, "unexpected pattern.");
2630 destroy_loop_vec_info (loop_vinfo);
2631 return NULL;
2634 /* Check that all cross-iteration scalar data-flow cycles are OK.
2635 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2637 ok = vect_analyze_scalar_cycles (loop_vinfo);
2638 if (!ok)
2640 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2641 fprintf (vect_dump, "bad scalar cycle.");
2642 destroy_loop_vec_info (loop_vinfo);
2643 return NULL;
2646 ok = vect_determine_vectorization_factor (loop_vinfo);
2647 if (!ok)
2649 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2650 fprintf (vect_dump, "can't determine vectorization factor.");
2651 destroy_loop_vec_info (loop_vinfo);
2652 return NULL;
2655 /* Analyze data dependences between the data-refs in the loop.
2656 FORNOW: fail at the first data dependence that we encounter. */
2658 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2659 if (!ok)
2661 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2662 fprintf (vect_dump, "bad data dependence.");
2663 destroy_loop_vec_info (loop_vinfo);
2664 return NULL;
2667 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2668 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2670 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2671 if (!ok)
2673 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2674 fprintf (vect_dump, "bad data access.");
2675 destroy_loop_vec_info (loop_vinfo);
2676 return NULL;
2679 /* Analyze the alignment of the data-refs in the loop.
2680 FORNOW: Only aligned accesses are handled. */
2682 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2683 if (!ok)
2685 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2686 fprintf (vect_dump, "bad data alignment.");
2687 destroy_loop_vec_info (loop_vinfo);
2688 return NULL;
2691 /* Scan all the operations in the loop and make sure they are
2692 vectorizable. */
2694 ok = vect_analyze_operations (loop_vinfo);
2695 if (!ok)
2697 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2698 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2699 destroy_loop_vec_info (loop_vinfo);
2700 return NULL;
2703 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2705 return loop_vinfo;