2005-06-30 J. D. Johnston <jjohnst@us.ibm.com>
[official-gcc.git] / gcc / tree-vect-analyze.c
blobf37b7311150df05cba5ecd0d3923375b226adb58
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "tree-chrec.h"
37 #include "tree-data-ref.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
41 /* Main analysis functions. */
42 static loop_vec_info vect_analyze_loop_form (struct loop *);
43 static bool vect_analyze_data_refs (loop_vec_info);
44 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
45 static void vect_analyze_scalar_cycles (loop_vec_info);
46 static bool vect_analyze_data_ref_accesses (loop_vec_info);
47 static bool vect_analyze_data_ref_dependences (loop_vec_info);
48 static bool vect_analyze_data_refs_alignment (loop_vec_info);
49 static bool vect_compute_data_refs_alignment (loop_vec_info);
50 static void vect_enhance_data_refs_alignment (loop_vec_info);
51 static bool vect_analyze_operations (loop_vec_info);
52 static bool vect_determine_vectorization_factor (loop_vec_info);
54 /* Utility functions for the analyses. */
55 static bool exist_non_indexing_operands_for_use_p (tree, tree);
56 static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
57 static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_reference *, struct data_reference *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static struct data_reference * vect_analyze_pointer_ref_access
64 (tree, tree, bool, tree, tree *, tree *);
65 static bool vect_can_advance_ivs_p (loop_vec_info);
66 static tree vect_get_ptr_offset (tree, tree, tree *);
67 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
68 tree *, tree *);
69 static bool vect_base_addr_differ_p (struct data_reference *,
70 struct data_reference *drb, bool *);
71 static tree vect_object_analysis (tree, tree, bool, tree,
72 struct data_reference **, tree *, tree *,
73 tree *, bool *, tree *, struct ptr_info_def **,
74 subvar_t *);
75 static tree vect_address_analysis (tree, tree, bool, tree,
76 struct data_reference *, tree *, tree *,
77 tree *, bool *);
80 /* Function vect_get_ptr_offset
82 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
84 static tree
85 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
86 tree vectype ATTRIBUTE_UNUSED,
87 tree *offset ATTRIBUTE_UNUSED)
89 /* TODO: Use alignment information. */
90 return NULL_TREE;
94 /* Function vect_analyze_offset_expr
96 Given an offset expression EXPR received from get_inner_reference, analyze
97 it and create an expression for INITIAL_OFFSET by substituting the variables
98 of EXPR with initial_condition of the corresponding access_fn in the loop.
99 E.g.,
100 for i
101 for (j = 3; j < N; j++)
102 a[j].b[i][j] = 0;
104 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
105 substituted, since its access_fn in the inner loop is i. 'j' will be
106 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
107 C` = 3 * C_j + C.
109 Compute MISALIGN (the misalignment of the data reference initial access from
110 its base) if possible. Misalignment can be calculated only if all the
111 variables can be substituted with constants, or if a variable is multiplied
112 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
113 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
114 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
115 VECTYPE_ALIGNMENT computation in the caller of this function).
117 STEP is an evolution of the data reference in this loop in bytes.
118 In the above example, STEP is C_j.
120 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
121 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
122 are NULL_TREEs. Otherwise, return TRUE.
126 static bool
127 vect_analyze_offset_expr (tree expr,
128 struct loop *loop,
129 tree vectype_alignment,
130 tree *initial_offset,
131 tree *misalign,
132 tree *step)
134 tree oprnd0;
135 tree oprnd1;
136 tree left_offset = ssize_int (0);
137 tree right_offset = ssize_int (0);
138 tree left_misalign = ssize_int (0);
139 tree right_misalign = ssize_int (0);
140 tree left_step = ssize_int (0);
141 tree right_step = ssize_int (0);
142 enum tree_code code;
143 tree init, evolution;
145 *step = NULL_TREE;
146 *misalign = NULL_TREE;
147 *initial_offset = NULL_TREE;
149 /* Strip conversions that don't narrow the mode. */
150 expr = vect_strip_conversion (expr);
151 if (!expr)
152 return false;
154 /* Stop conditions:
155 1. Constant. */
156 if (TREE_CODE (expr) == INTEGER_CST)
158 *initial_offset = fold_convert (ssizetype, expr);
159 *misalign = fold_convert (ssizetype, expr);
160 *step = ssize_int (0);
161 return true;
164 /* 2. Variable. Try to substitute with initial_condition of the corresponding
165 access_fn in the current loop. */
166 if (SSA_VAR_P (expr))
168 tree access_fn = analyze_scalar_evolution (loop, expr);
170 if (access_fn == chrec_dont_know)
171 /* No access_fn. */
172 return false;
174 init = initial_condition_in_loop_num (access_fn, loop->num);
175 if (init == expr && !expr_invariant_in_loop_p (loop, init))
176 /* Not enough information: may be not loop invariant.
177 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
178 initial_condition is D, but it depends on i - loop's induction
179 variable. */
180 return false;
182 evolution = evolution_part_in_loop_num (access_fn, loop->num);
183 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
184 /* Evolution is not constant. */
185 return false;
187 if (TREE_CODE (init) == INTEGER_CST)
188 *misalign = fold_convert (ssizetype, init);
189 else
190 /* Not constant, misalignment cannot be calculated. */
191 *misalign = NULL_TREE;
193 *initial_offset = fold_convert (ssizetype, init);
195 *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0);
196 return true;
199 /* Recursive computation. */
200 if (!BINARY_CLASS_P (expr))
202 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
203 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
205 fprintf (vect_dump, "Not binary expression ");
206 print_generic_expr (vect_dump, expr, TDF_SLIM);
208 return false;
210 oprnd0 = TREE_OPERAND (expr, 0);
211 oprnd1 = TREE_OPERAND (expr, 1);
213 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
214 &left_misalign, &left_step)
215 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
216 &right_offset, &right_misalign, &right_step))
217 return false;
219 /* The type of the operation: plus, minus or mult. */
220 code = TREE_CODE (expr);
221 switch (code)
223 case MULT_EXPR:
224 if (TREE_CODE (right_offset) != INTEGER_CST)
225 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
226 sized types.
227 FORNOW: We don't support such cases. */
228 return false;
230 /* Strip conversions that don't narrow the mode. */
231 left_offset = vect_strip_conversion (left_offset);
232 if (!left_offset)
233 return false;
234 /* Misalignment computation. */
235 if (SSA_VAR_P (left_offset))
237 /* If the left side contains variables that can't be substituted with
238 constants, we check if the right side is a multiple of ALIGNMENT.
240 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
241 fold_convert (ssizetype, vectype_alignment))))
242 *misalign = ssize_int (0);
243 else
244 /* If the remainder is not zero or the right side isn't constant,
245 we can't compute misalignment. */
246 *misalign = NULL_TREE;
248 else
250 /* The left operand was successfully substituted with constant. */
251 if (left_misalign)
252 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
253 NULL_TREE. */
254 *misalign = size_binop (code, left_misalign, right_misalign);
255 else
256 *misalign = NULL_TREE;
259 /* Step calculation. */
260 /* Multiply the step by the right operand. */
261 *step = size_binop (MULT_EXPR, left_step, right_offset);
262 break;
264 case PLUS_EXPR:
265 case MINUS_EXPR:
266 /* Combine the recursive calculations for step and misalignment. */
267 *step = size_binop (code, left_step, right_step);
269 if (left_misalign && right_misalign)
270 *misalign = size_binop (code, left_misalign, right_misalign);
271 else
272 *misalign = NULL_TREE;
274 break;
276 default:
277 gcc_unreachable ();
280 /* Compute offset. */
281 *initial_offset = fold_convert (ssizetype,
282 fold_build2 (code, TREE_TYPE (left_offset),
283 left_offset,
284 right_offset));
285 return true;
289 /* Function vect_determine_vectorization_factor
291 Determine the vectorization factor (VF). VF is the number of data elements
292 that are operated upon in parallel in a single iteration of the vectorized
293 loop. For example, when vectorizing a loop that operates on 4byte elements,
294 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
295 elements can fit in a single vector register.
297 We currently support vectorization of loops in which all types operated upon
298 are of the same size. Therefore this function currently sets VF according to
299 the size of the types operated upon, and fails if there are multiple sizes
300 in the loop.
302 VF is also the factor by which the loop iterations are strip-mined, e.g.:
303 original loop:
304 for (i=0; i<N; i++){
305 a[i] = b[i] + c[i];
308 vectorized loop:
309 for (i=0; i<N; i+=VF){
310 a[i:VF] = b[i:VF] + c[i:VF];
314 static bool
315 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
317 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
318 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
319 int nbbs = loop->num_nodes;
320 block_stmt_iterator si;
321 unsigned int vectorization_factor = 0;
322 int i;
323 tree scalar_type;
325 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
326 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
328 for (i = 0; i < nbbs; i++)
330 basic_block bb = bbs[i];
332 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
334 tree stmt = bsi_stmt (si);
335 unsigned int nunits;
336 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
337 tree vectype;
339 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
341 fprintf (vect_dump, "==> examining statement: ");
342 print_generic_expr (vect_dump, stmt, TDF_SLIM);
345 gcc_assert (stmt_info);
346 /* skip stmts which do not need to be vectorized. */
347 if (!STMT_VINFO_RELEVANT_P (stmt_info)
348 && !STMT_VINFO_LIVE_P (stmt_info))
350 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
351 fprintf (vect_dump, "skip.");
352 continue;
355 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
357 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
358 LOOP_LOC (loop_vinfo)))
360 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
361 print_generic_expr (vect_dump, stmt, TDF_SLIM);
363 return false;
366 if (STMT_VINFO_DATA_REF (stmt_info))
367 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
368 else if (TREE_CODE (stmt) == MODIFY_EXPR)
369 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
370 else
371 scalar_type = TREE_TYPE (stmt);
373 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
375 fprintf (vect_dump, "get vectype for scalar type: ");
376 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
379 vectype = get_vectype_for_scalar_type (scalar_type);
380 if (!vectype)
382 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
383 LOOP_LOC (loop_vinfo)))
385 fprintf (vect_dump, "not vectorized: unsupported data-type ");
386 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
388 return false;
390 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
392 fprintf (vect_dump, "vectype: ");
393 print_generic_expr (vect_dump, vectype, TDF_SLIM);
395 STMT_VINFO_VECTYPE (stmt_info) = vectype;
397 nunits = TYPE_VECTOR_SUBPARTS (vectype);
398 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
399 fprintf (vect_dump, "nunits = %d", nunits);
401 if (vectorization_factor)
403 /* FORNOW: don't allow mixed units.
404 This restriction will be relaxed in the future. */
405 if (nunits != vectorization_factor)
407 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
408 LOOP_LOC (loop_vinfo)))
409 fprintf (vect_dump, "not vectorized: mixed data-types");
410 return false;
413 else
414 vectorization_factor = nunits;
416 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
417 * vectorization_factor == UNITS_PER_SIMD_WORD);
421 /* TODO: Analyze cost. Decide if worth while to vectorize. */
423 if (vectorization_factor <= 1)
425 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
426 LOOP_LOC (loop_vinfo)))
427 fprintf (vect_dump, "not vectorized: unsupported data-type");
428 return false;
430 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
432 return true;
436 /* Function vect_analyze_operations.
438 Scan the loop stmts and make sure they are all vectorizable. */
440 static bool
441 vect_analyze_operations (loop_vec_info loop_vinfo)
443 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
444 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
445 int nbbs = loop->num_nodes;
446 block_stmt_iterator si;
447 unsigned int vectorization_factor = 0;
448 int i;
449 bool ok;
450 tree phi;
451 stmt_vec_info stmt_info;
452 bool need_to_vectorize = false;
454 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
455 fprintf (vect_dump, "=== vect_analyze_operations ===");
457 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
458 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
460 for (i = 0; i < nbbs; i++)
462 basic_block bb = bbs[i];
464 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
466 stmt_info = vinfo_for_stmt (phi);
467 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
469 fprintf (vect_dump, "examining phi: ");
470 print_generic_expr (vect_dump, phi, TDF_SLIM);
473 gcc_assert (stmt_info);
475 if (STMT_VINFO_LIVE_P (stmt_info))
477 /* FORNOW: not yet supported. */
478 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
479 LOOP_LOC (loop_vinfo)))
480 fprintf (vect_dump, "not vectorized: value used after loop.");
481 return false;
484 if (STMT_VINFO_RELEVANT_P (stmt_info))
486 /* Most likely a reduction-like computation that is used
487 in the loop. */
488 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
489 LOOP_LOC (loop_vinfo)))
490 fprintf (vect_dump, "not vectorized: unsupported pattern.");
491 return false;
495 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
497 tree stmt = bsi_stmt (si);
498 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
500 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
502 fprintf (vect_dump, "==> examining statement: ");
503 print_generic_expr (vect_dump, stmt, TDF_SLIM);
506 gcc_assert (stmt_info);
508 /* skip stmts which do not need to be vectorized.
509 this is expected to include:
510 - the COND_EXPR which is the loop exit condition
511 - any LABEL_EXPRs in the loop
512 - computations that are used only for array indexing or loop
513 control */
515 if (!STMT_VINFO_RELEVANT_P (stmt_info)
516 && !STMT_VINFO_LIVE_P (stmt_info))
518 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
519 fprintf (vect_dump, "irrelevant.");
520 continue;
523 if (STMT_VINFO_RELEVANT_P (stmt_info))
525 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
526 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
528 ok = (vectorizable_operation (stmt, NULL, NULL)
529 || vectorizable_assignment (stmt, NULL, NULL)
530 || vectorizable_load (stmt, NULL, NULL)
531 || vectorizable_store (stmt, NULL, NULL)
532 || vectorizable_condition (stmt, NULL, NULL));
534 if (!ok)
536 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
537 LOOP_LOC (loop_vinfo)))
539 fprintf (vect_dump,
540 "not vectorized: relevant stmt not supported: ");
541 print_generic_expr (vect_dump, stmt, TDF_SLIM);
543 return false;
545 need_to_vectorize = true;
548 if (STMT_VINFO_LIVE_P (stmt_info))
550 ok = vectorizable_reduction (stmt, NULL, NULL);
552 if (ok)
553 need_to_vectorize = true;
554 else
555 ok = vectorizable_live_operation (stmt, NULL, NULL);
557 if (!ok)
559 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
560 LOOP_LOC (loop_vinfo)))
562 fprintf (vect_dump,
563 "not vectorized: live stmt not supported: ");
564 print_generic_expr (vect_dump, stmt, TDF_SLIM);
566 return false;
569 } /* stmts in bb */
570 } /* bbs */
572 /* TODO: Analyze cost. Decide if worth while to vectorize. */
574 /* All operations in the loop are either irrelevant (deal with loop
575 control, or dead), or only used outside the loop and can be moved
576 out of the loop (e.g. invariants, inductions). The loop can be
577 optimized away by scalar optimizations. We're better off not
578 touching this loop. */
579 if (!need_to_vectorize)
581 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
582 fprintf (vect_dump,
583 "All the computation can be taken out of the loop.");
584 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
585 LOOP_LOC (loop_vinfo)))
586 fprintf (vect_dump,
587 "not vectorized: redundant loop. no profit to vectorize.");
588 return false;
591 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
592 && vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
593 fprintf (vect_dump,
594 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
595 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
597 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
598 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
600 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
601 LOOP_LOC (loop_vinfo)))
602 fprintf (vect_dump, "not vectorized: iteration count too small.");
603 return false;
606 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
607 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
609 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
610 fprintf (vect_dump, "epilog loop required.");
611 if (!vect_can_advance_ivs_p (loop_vinfo))
613 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
614 LOOP_LOC (loop_vinfo)))
615 fprintf (vect_dump,
616 "not vectorized: can't create epilog loop 1.");
617 return false;
619 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
621 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
622 LOOP_LOC (loop_vinfo)))
623 fprintf (vect_dump,
624 "not vectorized: can't create epilog loop 2.");
625 return false;
629 return true;
633 /* Function exist_non_indexing_operands_for_use_p
635 USE is one of the uses attached to STMT. Check if USE is
636 used in STMT for anything other than indexing an array. */
638 static bool
639 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
641 tree operand;
642 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
644 /* USE corresponds to some operand in STMT. If there is no data
645 reference in STMT, then any operand that corresponds to USE
646 is not indexing an array. */
647 if (!STMT_VINFO_DATA_REF (stmt_info))
648 return true;
650 /* STMT has a data_ref. FORNOW this means that its of one of
651 the following forms:
652 -1- ARRAY_REF = var
653 -2- var = ARRAY_REF
654 (This should have been verified in analyze_data_refs).
656 'var' in the second case corresponds to a def, not a use,
657 so USE cannot correspond to any operands that are not used
658 for array indexing.
660 Therefore, all we need to check is if STMT falls into the
661 first case, and whether var corresponds to USE. */
663 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
664 return false;
666 operand = TREE_OPERAND (stmt, 1);
668 if (TREE_CODE (operand) != SSA_NAME)
669 return false;
671 if (operand == use)
672 return true;
674 return false;
678 /* Function vect_analyze_scalar_cycles.
680 Examine the cross iteration def-use cycles of scalar variables, by
681 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
682 following: invariant, induction, reduction, unknown.
684 Some forms of scalar cycles are not yet supported.
686 Example1: reduction: (unsupported yet)
688 loop1:
689 for (i=0; i<N; i++)
690 sum += a[i];
692 Example2: induction: (unsupported yet)
694 loop2:
695 for (i=0; i<N; i++)
696 a[i] = i;
698 Note: the following loop *is* vectorizable:
700 loop3:
701 for (i=0; i<N; i++)
702 a[i] = b[i];
704 even though it has a def-use cycle caused by the induction variable i:
706 loop: i_2 = PHI (i_0, i_1)
707 a[i_2] = ...;
708 i_1 = i_2 + 1;
709 GOTO loop;
711 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
712 it does not need to be vectorized because it is only used for array
713 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
714 loop2 on the other hand is relevant (it is being written to memory).
717 static void
718 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
720 tree phi;
721 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
722 basic_block bb = loop->header;
723 tree dummy;
725 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
726 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
728 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
730 tree access_fn = NULL;
731 tree def = PHI_RESULT (phi);
732 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
733 tree reduc_stmt;
735 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
737 fprintf (vect_dump, "Analyze phi: ");
738 print_generic_expr (vect_dump, phi, TDF_SLIM);
741 /* Skip virtual phi's. The data dependences that are associated with
742 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
744 if (!is_gimple_reg (SSA_NAME_VAR (def)))
746 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
747 fprintf (vect_dump, "virtual phi. skip.");
748 continue;
751 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
753 /* Analyze the evolution function. */
755 access_fn = analyze_scalar_evolution (loop, def);
757 if (!access_fn)
758 continue;
760 if (vect_print_dump_info (REPORT_DETAILS,
761 LOOP_LOC (loop_vinfo)))
763 fprintf (vect_dump, "Access function of PHI: ");
764 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
767 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
769 if (vect_print_dump_info (REPORT_DETAILS,LOOP_LOC (loop_vinfo)))
770 fprintf (vect_dump, "Detected induction.");
771 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
772 continue;
775 /* TODO: handle invariant phis */
777 reduc_stmt = vect_is_simple_reduction (loop, phi);
778 if (reduc_stmt)
780 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
781 fprintf (vect_dump, "Detected reduction.");
782 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
783 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
784 vect_reduction_def;
786 else
787 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
788 fprintf (vect_dump, "Unknown def-use cycle pattern.");
792 return;
796 /* Function vect_base_addr_differ_p.
798 This is the simplest data dependence test: determines whether the
799 data references A and B access the same array/region. Returns
800 false when the property is not computable at compile time.
801 Otherwise return true, and DIFFER_P will record the result. This
802 utility will not be necessary when alias_sets_conflict_p will be
803 less conservative. */
805 static bool
806 vect_base_addr_differ_p (struct data_reference *dra,
807 struct data_reference *drb,
808 bool *differ_p)
810 tree stmt_a = DR_STMT (dra);
811 stmt_vec_info stmt_info_a = vinfo_for_stmt (stmt_a);
812 tree stmt_b = DR_STMT (drb);
813 stmt_vec_info stmt_info_b = vinfo_for_stmt (stmt_b);
814 tree addr_a = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_a);
815 tree addr_b = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_b);
816 tree type_a = TREE_TYPE (addr_a);
817 tree type_b = TREE_TYPE (addr_b);
818 HOST_WIDE_INT alias_set_a, alias_set_b;
820 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
822 /* Both references are ADDR_EXPR, i.e., we have the objects. */
823 if (TREE_CODE (addr_a) == ADDR_EXPR && TREE_CODE (addr_b) == ADDR_EXPR)
824 return array_base_name_differ_p (dra, drb, differ_p);
826 alias_set_a = (TREE_CODE (addr_a) == ADDR_EXPR) ?
827 get_alias_set (TREE_OPERAND (addr_a, 0)) : get_alias_set (addr_a);
828 alias_set_b = (TREE_CODE (addr_b) == ADDR_EXPR) ?
829 get_alias_set (TREE_OPERAND (addr_b, 0)) : get_alias_set (addr_b);
831 if (!alias_sets_conflict_p (alias_set_a, alias_set_b))
833 *differ_p = true;
834 return true;
837 /* An instruction writing through a restricted pointer is "independent" of any
838 instruction reading or writing through a different pointer, in the same
839 block/scope. */
840 else if ((TYPE_RESTRICT (type_a) && !DR_IS_READ (dra))
841 || (TYPE_RESTRICT (type_b) && !DR_IS_READ (drb)))
843 *differ_p = true;
844 return true;
846 return false;
850 /* Function vect_analyze_data_ref_dependence.
852 Return TRUE if there (might) exist a dependence between a memory-reference
853 DRA and a memory-reference DRB. */
855 static bool
856 vect_analyze_data_ref_dependence (struct data_reference *dra,
857 struct data_reference *drb,
858 loop_vec_info loop_vinfo)
860 bool differ_p;
861 struct data_dependence_relation *ddr;
862 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
863 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
864 int dist = 0;
865 unsigned int loop_depth = 0;
866 struct loop *loop_nest = loop;
867 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
868 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
870 if (!vect_base_addr_differ_p (dra, drb, &differ_p))
872 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
873 LOOP_LOC (loop_vinfo)))
875 fprintf (vect_dump,
876 "not vectorized: can't determine dependence between: ");
877 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
878 fprintf (vect_dump, " and ");
879 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
881 return true;
884 if (differ_p)
885 return false;
887 ddr = initialize_data_dependence_relation (dra, drb);
888 compute_affine_dependence (ddr);
890 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
891 return false;
893 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
895 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
896 LOOP_LOC (loop_vinfo)))
898 fprintf (vect_dump,
899 "not vectorized: can't determine dependence between ");
900 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
901 fprintf (vect_dump, " and ");
902 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
904 return true;
907 /* Find loop depth. */
908 while (loop_nest)
910 if (loop_nest->outer && loop_nest->outer->outer)
912 loop_nest = loop_nest->outer;
913 loop_depth++;
915 else
916 break;
919 /* Compute distance vector. */
920 compute_subscript_distance (ddr);
921 build_classic_dist_vector (ddr, vect_loops_num, loop_nest->depth);
923 if (!DDR_DIST_VECT (ddr))
925 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
926 LOOP_LOC (loop_vinfo)))
928 fprintf (vect_dump, "not vectorized: bad dist vector for ");
929 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
930 fprintf (vect_dump, " and ");
931 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
933 return true;
936 dist = DDR_DIST_VECT (ddr)[loop_depth];
938 /* Same loop iteration. */
939 if (dist % vectorization_factor == 0)
941 /* Two references with distance zero have the same alignment. */
942 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
943 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
944 if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
945 fprintf (vect_dump, "accesses have the same alignment.");
946 return false;
949 if (dist >= vectorization_factor)
950 /* Dependence distance does not create dependence, as far as vectorization
951 is concerned, in this case. */
952 return false;
954 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
955 LOOP_LOC (loop_vinfo)))
957 fprintf (vect_dump,
958 "not vectorized: possible dependence between data-refs ");
959 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
960 fprintf (vect_dump, " and ");
961 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
964 return true;
968 /* Function vect_analyze_data_ref_dependences.
970 Examine all the data references in the loop, and make sure there do not
971 exist any data dependences between them. */
973 static bool
974 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
976 unsigned int i, j;
977 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
978 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
980 /* Examine store-store (output) dependences. */
982 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
983 fprintf (vect_dump, "=== vect_analyze_dependences ===");
985 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
986 fprintf (vect_dump, "compare all store-store pairs.");
988 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
990 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
992 struct data_reference *dra =
993 VARRAY_GENERIC_PTR (loop_write_refs, i);
994 struct data_reference *drb =
995 VARRAY_GENERIC_PTR (loop_write_refs, j);
996 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
997 return false;
1001 /* Examine load-store (true/anti) dependences. */
1003 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1004 fprintf (vect_dump, "compare all load-store pairs.");
1006 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
1008 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
1010 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
1011 struct data_reference *drb =
1012 VARRAY_GENERIC_PTR (loop_write_refs, j);
1013 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
1014 return false;
1018 return true;
1022 /* Function vect_compute_data_ref_alignment
1024 Compute the misalignment of the data reference DR.
1026 Output:
1027 1. If during the misalignment computation it is found that the data reference
1028 cannot be vectorized then false is returned.
1029 2. DR_MISALIGNMENT (DR) is defined.
1031 FOR NOW: No analysis is actually performed. Misalignment is calculated
1032 only for trivial cases. TODO. */
1034 static bool
1035 vect_compute_data_ref_alignment (struct data_reference *dr)
1037 tree stmt = DR_STMT (dr);
1038 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1039 tree ref = DR_REF (dr);
1040 tree vectype;
1041 tree base, alignment;
1042 bool base_aligned_p;
1043 tree misalign;
1045 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1046 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1048 /* Initialize misalignment to unknown. */
1049 DR_MISALIGNMENT (dr) = -1;
1051 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
1052 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
1053 base = build_fold_indirect_ref (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
1054 vectype = STMT_VINFO_VECTYPE (stmt_info);
1056 if (!misalign)
1058 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1060 fprintf (vect_dump, "Unknown alignment for access: ");
1061 print_generic_expr (vect_dump, base, TDF_SLIM);
1063 return true;
1066 if (!base_aligned_p)
1068 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
1070 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1072 fprintf (vect_dump, "can't force alignment of ref: ");
1073 print_generic_expr (vect_dump, ref, TDF_SLIM);
1075 return true;
1078 /* Force the alignment of the decl.
1079 NOTE: This is the only change to the code we make during
1080 the analysis phase, before deciding to vectorize the loop. */
1081 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1082 fprintf (vect_dump, "force alignment");
1083 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1084 DECL_USER_ALIGN (base) = 1;
1087 /* At this point we assume that the base is aligned. */
1088 gcc_assert (base_aligned_p
1089 || (TREE_CODE (base) == VAR_DECL
1090 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1092 /* Alignment required, in bytes: */
1093 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1095 /* Modulo alignment. */
1096 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1097 if (tree_int_cst_sgn (misalign) < 0)
1099 /* Negative misalignment value. */
1100 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1101 fprintf (vect_dump, "unexpected misalign value");
1102 return false;
1105 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
1107 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1108 fprintf (vect_dump, "misalign = %d bytes", DR_MISALIGNMENT (dr));
1110 return true;
1114 /* Function vect_compute_data_refs_alignment
1116 Compute the misalignment of data references in the loop.
1117 This pass may take place at function granularity instead of at loop
1118 granularity.
1120 FOR NOW: No analysis is actually performed. Misalignment is calculated
1121 only for trivial cases. TODO. */
1123 static bool
1124 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1126 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1127 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1128 unsigned int i;
1130 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1132 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1133 if (!vect_compute_data_ref_alignment (dr))
1134 return false;
1137 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1139 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1140 if (!vect_compute_data_ref_alignment (dr))
1141 return false;
1144 return true;
1148 /* Function vect_enhance_data_refs_alignment
1150 This pass will use loop versioning and loop peeling in order to enhance
1151 the alignment of data references in the loop.
1153 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1154 original loop is to be vectorized; Any other loops that are created by
1155 the transformations performed in this pass - are not supposed to be
1156 vectorized. This restriction will be relaxed. */
1158 static void
1159 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1161 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1162 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1163 varray_type datarefs;
1164 VEC(dr_p,heap) *same_align_drs;
1165 struct data_reference *dr0 = NULL;
1166 struct data_reference *dr;
1167 unsigned int i, j;
1170 This pass will require a cost model to guide it whether to apply peeling
1171 or versioning or a combination of the two. For example, the scheme that
1172 intel uses when given a loop with several memory accesses, is as follows:
1173 choose one memory access ('p') which alignment you want to force by doing
1174 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1175 other accesses are not necessarily aligned, or (2) use loop versioning to
1176 generate one loop in which all accesses are aligned, and another loop in
1177 which only 'p' is necessarily aligned.
1179 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1180 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1181 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1183 Devising a cost model is the most critical aspect of this work. It will
1184 guide us on which access to peel for, whether to use loop versioning, how
1185 many versions to create, etc. The cost model will probably consist of
1186 generic considerations as well as target specific considerations (on
1187 powerpc for example, misaligned stores are more painful than misaligned
1188 loads).
1190 Here is the general steps involved in alignment enhancements:
1192 -- original loop, before alignment analysis:
1193 for (i=0; i<N; i++){
1194 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1195 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1198 -- After vect_compute_data_refs_alignment:
1199 for (i=0; i<N; i++){
1200 x = q[i]; # DR_MISALIGNMENT(q) = 3
1201 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1204 -- Possibility 1: we do loop versioning:
1205 if (p is aligned) {
1206 for (i=0; i<N; i++){ # loop 1A
1207 x = q[i]; # DR_MISALIGNMENT(q) = 3
1208 p[i] = y; # DR_MISALIGNMENT(p) = 0
1211 else {
1212 for (i=0; i<N; i++){ # loop 1B
1213 x = q[i]; # DR_MISALIGNMENT(q) = 3
1214 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1218 -- Possibility 2: we do loop peeling:
1219 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1220 x = q[i];
1221 p[i] = y;
1223 for (i = 3; i < N; i++){ # loop 2A
1224 x = q[i]; # DR_MISALIGNMENT(q) = 0
1225 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1228 -- Possibility 3: combination of loop peeling and versioning:
1229 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1230 x = q[i];
1231 p[i] = y;
1233 if (p is aligned) {
1234 for (i = 3; i<N; i++){ # loop 3A
1235 x = q[i]; # DR_MISALIGNMENT(q) = 0
1236 p[i] = y; # DR_MISALIGNMENT(p) = 0
1239 else {
1240 for (i = 3; i<N; i++){ # loop 3B
1241 x = q[i]; # DR_MISALIGNMENT(q) = 0
1242 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1246 These loops are later passed to loop_transform to be vectorized. The
1247 vectorizer will use the alignment information to guide the transformation
1248 (whether to generate regular loads/stores, or with special handling for
1249 misalignment).
1252 /* (1) Peeling to force alignment. */
1254 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1255 Considerations:
1256 + How many accesses will become aligned due to the peeling
1257 - How many accesses will become unaligned due to the peeling,
1258 and the cost of misaligned accesses.
1259 - The cost of peeling (the extra runtime checks, the increase
1260 in code size).
1262 The scheme we use FORNOW: peel to force the alignment of the first
1263 misaligned store in the loop.
1264 Rationale: misaligned stores are not yet supported.
1266 TODO: Use a better cost model. */
1268 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1270 dr0 = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1271 if (!aligned_access_p (dr0))
1273 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1274 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1275 break;
1279 /* (1.2) Update the alignment info according to the peeling factor.
1280 If the misalignment of the DR we peel for is M, then the
1281 peeling factor is VF - M, and the misalignment of each access DR_i
1282 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
1283 If the misalignment of the DR we peel for is unknown, then the
1284 misalignment of each access DR_i in the loop is also unknown.
1286 TODO: - consider accesses that are known to have the same
1287 alignment, even if that alignment is unknown. */
1289 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1291 int mis;
1292 int npeel = 0;
1294 if (known_alignment_for_access_p (dr0))
1296 /* Since it's known at compile time, compute the number of iterations
1297 in the peeled loop (the peeling factor) for use in updating
1298 DR_MISALIGNMENT values. The peeling factor is the vectorization
1299 factor minus the misalignment as an element count. */
1300 mis = DR_MISALIGNMENT (dr0);
1301 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1302 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1305 datarefs = loop_write_datarefs;
1306 for (j = 0; j < 2; j++)
1308 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1310 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1312 if (dr == dr0)
1313 continue;
1314 if (known_alignment_for_access_p (dr)
1315 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr0))
1316 DR_MISALIGNMENT (dr) = 0;
1317 else if (known_alignment_for_access_p (dr)
1318 && known_alignment_for_access_p (dr0))
1320 int drsize =
1321 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1323 DR_MISALIGNMENT (dr) += npeel * drsize;
1324 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
1326 else
1327 DR_MISALIGNMENT (dr) = -1;
1329 datarefs = loop_read_datarefs;
1332 same_align_drs =
1333 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr0)));
1334 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, dr); i++)
1336 DR_MISALIGNMENT (dr) = 0;
1339 DR_MISALIGNMENT (dr0) = 0;
1344 /* Function vect_analyze_data_refs_alignment
1346 Analyze the alignment of the data-references in the loop.
1347 FOR NOW: Until support for misaligned accesses is in place, only if all
1348 accesses are aligned can the loop be vectorized. This restriction will be
1349 relaxed. */
1351 static bool
1352 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1354 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1355 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1356 enum dr_alignment_support supportable_dr_alignment;
1357 unsigned int i;
1359 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1360 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1363 /* This pass may take place at function granularity instead of at loop
1364 granularity. */
1366 if (!vect_compute_data_refs_alignment (loop_vinfo))
1368 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1369 LOOP_LOC (loop_vinfo)))
1370 fprintf (vect_dump,
1371 "not vectorized: can't calculate alignment for data ref.");
1372 return false;
1376 /* This pass will decide on using loop versioning and/or loop peeling in
1377 order to enhance the alignment of data references in the loop. */
1379 vect_enhance_data_refs_alignment (loop_vinfo);
1382 /* Finally, check that all the data references in the loop can be
1383 handled with respect to their alignment. */
1385 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1387 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1388 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1389 if (!supportable_dr_alignment)
1391 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1392 LOOP_LOC (loop_vinfo)))
1393 fprintf (vect_dump, "not vectorized: unsupported unaligned load.");
1394 return false;
1396 if (supportable_dr_alignment != dr_aligned
1397 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1398 fprintf (vect_dump, "Vectorizing an unaligned access.");
1400 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1402 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1403 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1404 if (!supportable_dr_alignment)
1406 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1407 LOOP_LOC (loop_vinfo)))
1408 fprintf (vect_dump, "not vectorized: unsupported unaligned store.");
1409 return false;
1411 if (supportable_dr_alignment != dr_aligned
1412 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1413 fprintf (vect_dump, "Vectorizing an unaligned access.");
1415 if (LOOP_VINFO_UNALIGNED_DR (loop_vinfo)
1416 && vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
1417 fprintf (vect_dump, "Alignment of access forced using peeling.");
1419 return true;
1423 /* Function vect_analyze_data_ref_access.
1425 Analyze the access pattern of the data-reference DR. For now, a data access
1426 has to consecutive to be considered vectorizable. */
1428 static bool
1429 vect_analyze_data_ref_access (struct data_reference *dr)
1431 tree stmt = DR_STMT (dr);
1432 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1433 tree step = STMT_VINFO_VECT_STEP (stmt_info);
1434 tree scalar_type = TREE_TYPE (DR_REF (dr));
1436 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1438 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1439 fprintf (vect_dump, "not consecutive access");
1440 return false;
1442 return true;
1446 /* Function vect_analyze_data_ref_accesses.
1448 Analyze the access pattern of all the data references in the loop.
1450 FORNOW: the only access pattern that is considered vectorizable is a
1451 simple step 1 (consecutive) access.
1453 FORNOW: handle only arrays and pointer accesses. */
1455 static bool
1456 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1458 unsigned int i;
1459 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1460 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1462 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1463 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1465 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1467 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1468 bool ok = vect_analyze_data_ref_access (dr);
1469 if (!ok)
1471 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1472 LOOP_LOC (loop_vinfo)))
1473 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1474 return false;
1478 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1480 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1481 bool ok = vect_analyze_data_ref_access (dr);
1482 if (!ok)
1484 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1485 LOOP_LOC (loop_vinfo)))
1486 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1487 return false;
1491 return true;
1495 /* Function vect_analyze_pointer_ref_access.
1497 Input:
1498 STMT - a stmt that contains a data-ref.
1499 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
1500 ACCESS_FN - the access function of MEMREF.
1502 Output:
1503 If the data-ref access is vectorizable, return a data_reference structure
1504 that represents it (DR). Otherwise - return NULL.
1505 STEP - the stride of MEMREF in the loop.
1506 INIT - the initial condition of MEMREF in the loop.
1509 static struct data_reference *
1510 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read,
1511 tree access_fn, tree *ptr_init, tree *ptr_step)
1513 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1514 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1515 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1516 tree step, init;
1517 tree reftype, innertype;
1518 tree indx_access_fn;
1519 int loopnum = loop->num;
1520 struct data_reference *dr;
1522 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step))
1524 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1525 LOOP_LOC (loop_vinfo)))
1526 fprintf (vect_dump, "not vectorized: pointer access is not simple.");
1527 return NULL;
1530 STRIP_NOPS (init);
1532 if (!expr_invariant_in_loop_p (loop, init))
1534 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1535 LOOP_LOC (loop_vinfo)))
1536 fprintf (vect_dump,
1537 "not vectorized: initial condition is not loop invariant.");
1538 return NULL;
1541 if (TREE_CODE (step) != INTEGER_CST)
1543 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1544 LOOP_LOC (loop_vinfo)))
1545 fprintf (vect_dump,
1546 "not vectorized: non constant step for pointer access.");
1547 return NULL;
1550 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
1551 if (!POINTER_TYPE_P (reftype))
1553 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1554 LOOP_LOC (loop_vinfo)))
1555 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1556 return NULL;
1559 if (!POINTER_TYPE_P (TREE_TYPE (init)))
1561 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1562 LOOP_LOC (loop_vinfo)))
1563 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1564 return NULL;
1567 *ptr_step = fold_convert (ssizetype, step);
1568 innertype = TREE_TYPE (reftype);
1569 if (!COMPLETE_TYPE_P (innertype))
1571 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1572 LOOP_LOC (loop_vinfo)))
1573 fprintf (vect_dump, "not vectorized: pointer to incomplete type.");
1574 return NULL;
1577 /* Check that STEP is a multiple of type size. */
1578 if (!integer_zerop (size_binop (TRUNC_MOD_EXPR, *ptr_step,
1579 fold_convert (ssizetype, TYPE_SIZE_UNIT (innertype)))))
1581 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1582 LOOP_LOC (loop_vinfo)))
1583 fprintf (vect_dump, "not vectorized: non consecutive access.");
1584 return NULL;
1587 indx_access_fn =
1588 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
1589 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1591 fprintf (vect_dump, "Access function of ptr indx: ");
1592 print_generic_expr (vect_dump, indx_access_fn, TDF_SLIM);
1594 dr = init_data_ref (stmt, memref, NULL_TREE, indx_access_fn, is_read);
1595 *ptr_init = init;
1596 return dr;
1600 /* Function vect_address_analysis
1602 Return the BASE of the address expression EXPR.
1603 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1605 Input:
1606 EXPR - the address expression that is being analyzed
1607 STMT - the statement that contains EXPR or its original memory reference
1608 IS_READ - TRUE if STMT reads from EXPR, FALSE if writes to EXPR
1609 VECTYPE - the type that defines the alignment (i.e, we compute
1610 alignment relative to TYPE_ALIGN(VECTYPE))
1611 DR - data_reference struct for the original memory reference
1613 Output:
1614 BASE (returned value) - the base of the data reference EXPR.
1615 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1616 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1617 computation is impossible
1618 STEP - evolution of EXPR in the loop
1619 BASE_ALIGNED - indicates if BASE is aligned
1621 If something unexpected is encountered (an unsupported form of data-ref),
1622 then NULL_TREE is returned.
1625 static tree
1626 vect_address_analysis (tree expr, tree stmt, bool is_read, tree vectype,
1627 struct data_reference *dr, tree *offset, tree *misalign,
1628 tree *step, bool *base_aligned)
1630 tree oprnd0, oprnd1, base_address, offset_expr, base_addr0, base_addr1;
1631 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1632 tree dummy;
1633 struct ptr_info_def *dummy1;
1634 subvar_t dummy2;
1636 switch (TREE_CODE (expr))
1638 case PLUS_EXPR:
1639 case MINUS_EXPR:
1640 /* EXPR is of form {base +/- offset} (or {offset +/- base}). */
1641 oprnd0 = TREE_OPERAND (expr, 0);
1642 oprnd1 = TREE_OPERAND (expr, 1);
1644 STRIP_NOPS (oprnd0);
1645 STRIP_NOPS (oprnd1);
1647 /* Recursively try to find the base of the address contained in EXPR.
1648 For offset, the returned base will be NULL. */
1649 base_addr0 = vect_address_analysis (oprnd0, stmt, is_read, vectype, dr,
1650 &address_offset, &address_misalign, step,
1651 base_aligned);
1653 base_addr1 = vect_address_analysis (oprnd1, stmt, is_read, vectype, dr,
1654 &address_offset, &address_misalign, step,
1655 base_aligned);
1657 /* We support cases where only one of the operands contains an
1658 address. */
1659 if ((base_addr0 && base_addr1) || (!base_addr0 && !base_addr1))
1660 return NULL_TREE;
1662 /* To revert STRIP_NOPS. */
1663 oprnd0 = TREE_OPERAND (expr, 0);
1664 oprnd1 = TREE_OPERAND (expr, 1);
1666 offset_expr = base_addr0 ?
1667 fold_convert (ssizetype, oprnd1) : fold_convert (ssizetype, oprnd0);
1669 /* EXPR is of form {base +/- offset} (or {offset +/- base}). If offset is
1670 a number, we can add it to the misalignment value calculated for base,
1671 otherwise, misalignment is NULL. */
1672 if (TREE_CODE (offset_expr) == INTEGER_CST && address_misalign)
1673 *misalign = size_binop (TREE_CODE (expr), address_misalign,
1674 offset_expr);
1675 else
1676 *misalign = NULL_TREE;
1678 /* Combine offset (from EXPR {base + offset}) with the offset calculated
1679 for base. */
1680 *offset = size_binop (TREE_CODE (expr), address_offset, offset_expr);
1681 return base_addr0 ? base_addr0 : base_addr1;
1683 case ADDR_EXPR:
1684 base_address = vect_object_analysis (TREE_OPERAND (expr, 0), stmt,
1685 is_read, vectype, &dr, offset,
1686 misalign, step, base_aligned,
1687 &dummy, &dummy1, &dummy2);
1688 return base_address;
1690 case SSA_NAME:
1691 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1692 return NULL_TREE;
1694 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1696 if (vect_get_ptr_offset (expr, vectype, misalign))
1697 *base_aligned = true;
1698 else
1699 *base_aligned = false;
1701 else
1703 *base_aligned = true;
1704 *misalign = ssize_int (0);
1706 *offset = ssize_int (0);
1707 *step = ssize_int (0);
1708 return expr;
1710 default:
1711 return NULL_TREE;
1716 /* Function vect_object_analysis
1718 Return the BASE of the data reference MEMREF.
1719 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1720 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1721 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1722 instantiated with initial_conditions of access_functions of variables,
1723 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1725 Function get_inner_reference is used for the above in case of ARRAY_REF and
1726 COMPONENT_REF.
1728 The structure of the function is as follows:
1729 Part 1:
1730 Case 1. For handled_component_p refs
1731 1.1 call get_inner_reference
1732 1.1.1 analyze offset expr received from get_inner_reference
1733 1.2. build data-reference structure for MEMREF
1734 (fall through with BASE)
1735 Case 2. For declarations
1736 2.1 check alignment
1737 2.2 update DR_BASE_NAME if necessary for alias
1738 Case 3. For INDIRECT_REFs
1739 3.1 get the access function
1740 3.2 analyze evolution of MEMREF
1741 3.3 set data-reference structure for MEMREF
1742 3.4 call vect_address_analysis to analyze INIT of the access function
1744 Part 2:
1745 Combine the results of object and address analysis to calculate
1746 INITIAL_OFFSET, STEP and misalignment info.
1748 Input:
1749 MEMREF - the memory reference that is being analyzed
1750 STMT - the statement that contains MEMREF
1751 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
1752 VECTYPE - the type that defines the alignment (i.e, we compute
1753 alignment relative to TYPE_ALIGN(VECTYPE))
1755 Output:
1756 BASE_ADDRESS (returned value) - the base address of the data reference MEMREF
1757 E.g, if MEMREF is a.b[k].c[i][j] the returned
1758 base is &a.
1759 DR - data_reference struct for MEMREF
1760 INITIAL_OFFSET - initial offset of MEMREF from BASE (an expression)
1761 MISALIGN - offset of MEMREF from BASE in bytes (a constant) or NULL_TREE if
1762 the computation is impossible
1763 STEP - evolution of the DR_REF in the loop
1764 BASE_ALIGNED - indicates if BASE is aligned
1765 MEMTAG - memory tag for aliasing purposes
1766 PTR_INFO - NULL or points-to aliasing info from a pointer SSA_NAME
1767 SUBVAR - Sub-variables of the variable
1769 If something unexpected is encountered (an unsupported form of data-ref),
1770 then NULL_TREE is returned. */
1772 static tree
1773 vect_object_analysis (tree memref, tree stmt, bool is_read,
1774 tree vectype, struct data_reference **dr,
1775 tree *offset, tree *misalign, tree *step,
1776 bool *base_aligned, tree *memtag,
1777 struct ptr_info_def **ptr_info, subvar_t *subvars)
1779 tree base = NULL_TREE, base_address = NULL_TREE;
1780 tree object_offset = ssize_int (0), object_misalign = ssize_int (0);
1781 tree object_step = ssize_int (0), address_step = ssize_int (0);
1782 bool object_base_aligned = true, address_base_aligned = true;
1783 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1784 HOST_WIDE_INT pbitsize, pbitpos;
1785 tree poffset, bit_pos_in_bytes;
1786 enum machine_mode pmode;
1787 int punsignedp, pvolatilep;
1788 tree ptr_step = ssize_int (0), ptr_init = NULL_TREE;
1789 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1790 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1791 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1792 struct data_reference *ptr_dr = NULL;
1793 tree access_fn, evolution_part, address_to_analyze;
1795 *ptr_info = NULL;
1797 /* Part 1: */
1798 /* Case 1. handled_component_p refs. */
1799 if (handled_component_p (memref))
1801 /* 1.1 call get_inner_reference. */
1802 /* Find the base and the offset from it. */
1803 base = get_inner_reference (memref, &pbitsize, &pbitpos, &poffset,
1804 &pmode, &punsignedp, &pvolatilep, false);
1805 if (!base)
1806 return NULL_TREE;
1808 /* 1.1.1 analyze offset expr received from get_inner_reference. */
1809 if (poffset
1810 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1811 &object_offset, &object_misalign, &object_step))
1813 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1815 fprintf (vect_dump, "failed to compute offset or step for ");
1816 print_generic_expr (vect_dump, memref, TDF_SLIM);
1818 return NULL_TREE;
1821 /* Add bit position to OFFSET and MISALIGN. */
1823 bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT);
1824 /* Check that there is no remainder in bits. */
1825 if (pbitpos%BITS_PER_UNIT)
1827 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1828 fprintf (vect_dump, "bit offset alignment.");
1829 return NULL_TREE;
1831 object_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes, object_offset);
1832 if (object_misalign)
1833 object_misalign = size_binop (PLUS_EXPR, object_misalign,
1834 bit_pos_in_bytes);
1836 /* Create data-reference for MEMREF. TODO: handle COMPONENT_REFs. */
1837 if (!(*dr))
1839 if (TREE_CODE (memref) == ARRAY_REF)
1840 *dr = analyze_array (stmt, memref, is_read);
1841 else
1842 /* FORNOW. */
1843 return NULL_TREE;
1845 memref = base; /* To continue analysis of BASE. */
1846 /* fall through */
1849 /* Part 1: Case 2. Declarations. */
1850 if (DECL_P (memref))
1852 /* We expect to get a decl only if we already have a DR. */
1853 if (!(*dr))
1855 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1857 fprintf (vect_dump, "unhandled decl ");
1858 print_generic_expr (vect_dump, memref, TDF_SLIM);
1860 return NULL_TREE;
1863 /* 2.1 check the alignment. */
1864 if (DECL_ALIGN (memref) >= TYPE_ALIGN (vectype))
1865 object_base_aligned = true;
1866 else
1867 object_base_aligned = false;
1869 /* 2.2 update DR_BASE_NAME if necessary. */
1870 if (!DR_BASE_NAME ((*dr)))
1871 /* For alias analysis. In case the analysis of INDIRECT_REF brought
1872 us to object. */
1873 DR_BASE_NAME ((*dr)) = memref;
1875 if (SSA_VAR_P (memref) && var_can_have_subvars (memref))
1876 *subvars = get_subvars_for_var (memref);
1877 base_address = build_fold_addr_expr (memref);
1878 *memtag = memref;
1881 /* Part 1: Case 3. INDIRECT_REFs. */
1882 else if (TREE_CODE (memref) == INDIRECT_REF)
1884 tree ptr_ref = TREE_OPERAND (memref, 0);
1885 if (TREE_CODE (ptr_ref) == SSA_NAME)
1886 *ptr_info = SSA_NAME_PTR_INFO (ptr_ref);
1888 /* 3.1 get the access function. */
1889 access_fn = analyze_scalar_evolution (loop, ptr_ref);
1890 if (!access_fn)
1892 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1893 LOOP_LOC (loop_vinfo)))
1894 fprintf (vect_dump, "not vectorized: complicated pointer access.");
1895 return NULL_TREE;
1897 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1899 fprintf (vect_dump, "Access function of ptr: ");
1900 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1903 /* 3.2 analyze evolution of MEMREF. */
1904 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1905 if (evolution_part)
1907 ptr_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read,
1908 access_fn, &ptr_init, &ptr_step);
1909 if (!(ptr_dr))
1910 return NULL_TREE;
1912 object_step = size_binop (PLUS_EXPR, object_step, ptr_step);
1913 address_to_analyze = ptr_init;
1915 else
1917 if (!(*dr))
1919 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1920 LOOP_LOC (loop_vinfo)))
1921 fprintf (vect_dump, "not vectorized: ptr is loop invariant.");
1922 return NULL_TREE;
1924 /* Since there exists DR for MEMREF, we are analyzing the init of
1925 the access function, which not necessary has evolution in the
1926 loop. */
1927 address_to_analyze = initial_condition_in_loop_num (access_fn,
1928 loop->num);
1931 /* 3.3 set data-reference structure for MEMREF. */
1932 *dr = (*dr) ? *dr : ptr_dr;
1934 /* 3.4 call vect_address_analysis to analyze INIT of the access
1935 function. */
1936 base_address = vect_address_analysis (address_to_analyze, stmt, is_read,
1937 vectype, *dr, &address_offset, &address_misalign,
1938 &address_step, &address_base_aligned);
1939 if (!base_address)
1940 return NULL_TREE;
1942 switch (TREE_CODE (base_address))
1944 case SSA_NAME:
1945 *memtag = get_var_ann (SSA_NAME_VAR (base_address))->type_mem_tag;
1946 if (!(*memtag) && TREE_CODE (TREE_OPERAND (memref, 0)) == SSA_NAME)
1947 *memtag = get_var_ann (
1948 SSA_NAME_VAR (TREE_OPERAND (memref, 0)))->type_mem_tag;
1949 break;
1950 case ADDR_EXPR:
1951 *memtag = TREE_OPERAND (base_address, 0);
1952 break;
1953 default:
1954 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1955 LOOP_LOC (loop_vinfo)))
1957 fprintf (vect_dump, "not vectorized: no memtag ref: ");
1958 print_generic_expr (vect_dump, memref, TDF_SLIM);
1960 return NULL_TREE;
1964 if (!base_address)
1965 /* MEMREF cannot be analyzed. */
1966 return NULL_TREE;
1968 if (SSA_VAR_P (*memtag) && var_can_have_subvars (*memtag))
1969 *subvars = get_subvars_for_var (*memtag);
1971 /* Part 2: Combine the results of object and address analysis to calculate
1972 INITIAL_OFFSET, STEP and misalignment info. */
1973 *offset = size_binop (PLUS_EXPR, object_offset, address_offset);
1974 if (object_misalign && address_misalign)
1975 *misalign = size_binop (PLUS_EXPR, object_misalign, address_misalign);
1976 else
1977 *misalign = NULL_TREE;
1978 *step = size_binop (PLUS_EXPR, object_step, address_step);
1979 *base_aligned = object_base_aligned && address_base_aligned;
1981 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1983 fprintf (vect_dump, "Results of object analysis for: ");
1984 print_generic_expr (vect_dump, memref, TDF_SLIM);
1985 fprintf (vect_dump, "\n\tbase_address: ");
1986 print_generic_expr (vect_dump, base_address, TDF_SLIM);
1987 fprintf (vect_dump, "\n\toffset: ");
1988 print_generic_expr (vect_dump, *offset, TDF_SLIM);
1989 fprintf (vect_dump, "\n\tstep: ");
1990 print_generic_expr (vect_dump, *step, TDF_SLIM);
1991 fprintf (vect_dump, "\n\tbase aligned %d\n\tmisalign: ", *base_aligned);
1992 print_generic_expr (vect_dump, *misalign, TDF_SLIM);
1994 return base_address;
1998 /* Function vect_analyze_data_refs.
2000 Find all the data references in the loop.
2002 The general structure of the analysis of data refs in the vectorizer is as
2003 follows:
2004 1- vect_analyze_data_refs(loop):
2005 Find and analyze all data-refs in the loop:
2006 foreach ref
2007 base_address = vect_object_analysis(ref)
2008 1.1- vect_object_analysis(ref):
2009 Analyze ref, and build a DR (data_reference struct) for it;
2010 compute base, initial_offset, step and alignment.
2011 Call get_inner_reference for refs handled in this function.
2012 Call vect_addr_analysis(addr) to analyze pointer type expressions.
2013 Set ref_stmt.base, ref_stmt.initial_offset, ref_stmt.alignment,
2014 ref_stmt.memtag, ref_stmt.ptr_info and ref_stmt.step accordingly.
2015 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
2016 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2017 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2019 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
2020 which base is really an array (not a pointer) and which alignment
2021 can be forced. This restriction will be relaxed. */
2023 static bool
2024 vect_analyze_data_refs (loop_vec_info loop_vinfo)
2026 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2027 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2028 int nbbs = loop->num_nodes;
2029 block_stmt_iterator si;
2030 int j;
2031 struct data_reference *dr;
2033 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2034 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
2036 for (j = 0; j < nbbs; j++)
2038 basic_block bb = bbs[j];
2039 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2041 bool is_read = false;
2042 tree stmt = bsi_stmt (si);
2043 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2044 varray_type *datarefs = NULL;
2045 tree memref = NULL;
2046 tree scalar_type, vectype;
2047 tree base, offset, misalign, step, tag;
2048 struct ptr_info_def *ptr_info;
2049 bool base_aligned;
2050 subvar_t subvars = NULL;
2051 bool no_vuse, no_vmaymust;
2053 /* Assumption: there exists a data-ref in stmt, if and only if
2054 it has vuses/vdefs. */
2056 no_vuse = ZERO_SSA_OPERANDS (stmt, SSA_OP_VUSE);
2057 no_vmaymust = ZERO_SSA_OPERANDS (stmt,
2058 SSA_OP_VMAYDEF | SSA_OP_VMUSTDEF);
2059 if (no_vuse && no_vmaymust)
2060 continue;
2062 if (!no_vuse && !no_vmaymust)
2064 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2066 fprintf (vect_dump, "unexpected vdefs and vuses in stmt: ");
2067 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2069 return false;
2072 if (TREE_CODE (stmt) != MODIFY_EXPR)
2074 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2076 fprintf (vect_dump, "unexpected vops in stmt: ");
2077 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2079 return false;
2082 if (!no_vuse)
2084 memref = TREE_OPERAND (stmt, 1);
2085 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
2086 is_read = true;
2088 else /* vdefs */
2090 memref = TREE_OPERAND (stmt, 0);
2091 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
2092 is_read = false;
2095 scalar_type = TREE_TYPE (memref);
2096 vectype = get_vectype_for_scalar_type (scalar_type);
2097 if (!vectype)
2099 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2101 fprintf (vect_dump, "no vectype for stmt: ");
2102 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2103 fprintf (vect_dump, " scalar_type: ");
2104 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2106 /* It is not possible to vectorize this data reference. */
2107 return false;
2109 /* Analyze MEMREF. If it is of a supported form, build data_reference
2110 struct for it (DR). */
2111 dr = NULL;
2112 base = vect_object_analysis (memref, stmt, is_read, vectype, &dr,
2113 &offset, &misalign, &step,
2114 &base_aligned, &tag, &ptr_info,
2115 &subvars);
2116 if (!base)
2118 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2119 LOOP_LOC (loop_vinfo)))
2121 fprintf (vect_dump, "not vectorized: unhandled data ref: ");
2122 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2124 return false;
2126 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info) = base;
2127 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
2128 STMT_VINFO_VECT_STEP (stmt_info) = step;
2129 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
2130 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned;
2131 STMT_VINFO_MEMTAG (stmt_info) = tag;
2132 STMT_VINFO_PTR_INFO (stmt_info) = ptr_info;
2133 STMT_VINFO_SUBVARS (stmt_info) = subvars;
2134 STMT_VINFO_VECTYPE (stmt_info) = vectype;
2135 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
2136 STMT_VINFO_DATA_REF (stmt_info) = dr;
2140 return true;
2144 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2146 /* Function vect_mark_relevant.
2148 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2150 static void
2151 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2152 bool relevant_p, bool live_p)
2154 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2155 bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
2156 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2158 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2159 fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
2161 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2162 STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
2164 if (TREE_CODE (stmt) == PHI_NODE)
2165 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
2166 or live will fail vectorization later on. */
2167 return;
2169 if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
2170 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2172 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2173 fprintf (vect_dump, "already marked relevant/live.");
2174 return;
2177 VEC_safe_push (tree, heap, *worklist, stmt);
2181 /* Function vect_stmt_relevant_p.
2183 Return true if STMT in loop that is represented by LOOP_VINFO is
2184 "relevant for vectorization".
2186 A stmt is considered "relevant for vectorization" if:
2187 - it has uses outside the loop.
2188 - it has vdefs (it alters memory).
2189 - control stmts in the loop (except for the exit condition).
2191 CHECKME: what other side effects would the vectorizer allow? */
2193 static bool
2194 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2195 bool *relevant_p, bool *live_p)
2197 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2198 ssa_op_iter op_iter;
2199 imm_use_iterator imm_iter;
2200 use_operand_p use_p;
2201 def_operand_p def_p;
2203 *relevant_p = false;
2204 *live_p = false;
2206 /* cond stmt other than loop exit cond. */
2207 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2208 *relevant_p = true;
2210 /* changing memory. */
2211 if (TREE_CODE (stmt) != PHI_NODE)
2212 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2214 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2215 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2216 *relevant_p = true;
2219 /* uses outside the loop. */
2220 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2222 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2224 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2225 if (!flow_bb_inside_loop_p (loop, bb))
2227 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2228 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2230 /* We expect all such uses to be in the loop exit phis
2231 (because of loop closed form) */
2232 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2233 gcc_assert (bb == loop->single_exit->dest);
2235 *live_p = true;
2240 return (*live_p || *relevant_p);
2244 /* Function vect_mark_stmts_to_be_vectorized.
2246 Not all stmts in the loop need to be vectorized. For example:
2248 for i...
2249 for j...
2250 1. T0 = i + j
2251 2. T1 = a[T0]
2253 3. j = j + 1
2255 Stmt 1 and 3 do not need to be vectorized, because loop control and
2256 addressing of vectorized data-refs are handled differently.
2258 This pass detects such stmts. */
2260 static bool
2261 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2263 VEC(tree,heap) *worklist;
2264 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2265 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2266 unsigned int nbbs = loop->num_nodes;
2267 block_stmt_iterator si;
2268 tree stmt, use;
2269 stmt_ann_t ann;
2270 ssa_op_iter iter;
2271 unsigned int i;
2272 stmt_vec_info stmt_vinfo;
2273 basic_block bb;
2274 tree phi;
2275 bool relevant_p, live_p;
2276 tree def, def_stmt;
2277 enum vect_def_type dt;
2279 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2280 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2282 worklist = VEC_alloc (tree, heap, 64);
2284 /* 1. Init worklist. */
2286 bb = loop->header;
2287 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2289 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2291 fprintf (vect_dump, "init: phi relevant? ");
2292 print_generic_expr (vect_dump, phi, TDF_SLIM);
2295 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
2296 vect_mark_relevant (&worklist, phi, relevant_p, live_p);
2299 for (i = 0; i < nbbs; i++)
2301 bb = bbs[i];
2302 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2304 stmt = bsi_stmt (si);
2306 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2308 fprintf (vect_dump, "init: stmt relevant? ");
2309 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2312 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
2313 vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
2318 /* 2. Process_worklist */
2320 while (VEC_length (tree, worklist) > 0)
2322 stmt = VEC_pop (tree, worklist);
2324 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2326 fprintf (vect_dump, "worklist: examine stmt: ");
2327 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2330 /* Examine the USEs of STMT. For each ssa-name USE thta is defined
2331 in the loop, mark the stmt that defines it (DEF_STMT) as
2332 relevant/irrelevant and live/dead according to the liveness and
2333 relevance properties of STMT.
2336 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2338 ann = stmt_ann (stmt);
2339 stmt_vinfo = vinfo_for_stmt (stmt);
2341 relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
2342 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2344 /* Generally, the liveness and relevance properties of STMT are
2345 propagated to the DEF_STMTs of its USEs:
2346 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2347 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
2349 Exceptions:
2351 (case 1)
2352 If USE is used only for address computations (e.g. array indexing),
2353 which does not need to be directly vectorized, then the
2354 liveness/relevance of the respective DEF_STMT is left unchanged.
2356 (case 2)
2357 If STMT has been identified as defining a reduction variable, then
2358 we have two cases:
2359 (case 2.1)
2360 The last use of STMT is the reduction-variable, which is defined
2361 by a loop-header-phi. We don't want to mark the phi as live or
2362 relevant (because it does not need to be vectorized, it is handled
2363 as part of the vectorization of the reduction), so in this case we
2364 skip the call to vect_mark_relevant.
2365 (case 2.2)
2366 The rest of the uses of STMT are defined in the loop body. For
2367 the def_stmt of these uses we want to set liveness/relevance
2368 as follows:
2369 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
2370 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
2371 because even though STMT is classified as live (since it defines a
2372 value that is used across loop iterations) and irrelevant (since it
2373 is not used inside the loop), it will be vectorized, and therefore
2374 the corresponding DEF_STMTs need to marked as relevant.
2377 /* case 2.2: */
2378 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2380 gcc_assert (!relevant_p && live_p);
2381 relevant_p = true;
2382 live_p = false;
2385 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2387 /* case 1: we are only interested in uses that need to be vectorized.
2388 Uses that are used for address computation are not considered
2389 relevant.
2391 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2392 continue;
2394 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2396 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2397 LOOP_LOC (loop_vinfo)))
2398 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2399 VEC_free (tree, heap, worklist);
2400 return false;
2403 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2404 continue;
2406 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2408 fprintf (vect_dump, "worklist: examine use %d: ", i);
2409 print_generic_expr (vect_dump, use, TDF_SLIM);
2412 bb = bb_for_stmt (def_stmt);
2413 if (!flow_bb_inside_loop_p (loop, bb))
2414 continue;
2416 /* case 2.1: the reduction-use does not mark the defining-phi
2417 as relevant. */
2418 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2419 && TREE_CODE (def_stmt) == PHI_NODE)
2420 continue;
2422 vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
2424 } /* while worklist */
2426 VEC_free (tree, heap, worklist);
2427 return true;
2431 /* Function vect_can_advance_ivs_p
2433 In case the number of iterations that LOOP iterates in unknown at compile
2434 time, an epilog loop will be generated, and the loop induction variables
2435 (IVs) will be "advanced" to the value they are supposed to take just before
2436 the epilog loop. Here we check that the access function of the loop IVs
2437 and the expression that represents the loop bound are simple enough.
2438 These restrictions will be relaxed in the future. */
2440 static bool
2441 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2443 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2444 basic_block bb = loop->header;
2445 tree phi;
2447 /* Analyze phi functions of the loop header. */
2449 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2450 fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
2452 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2454 tree access_fn = NULL;
2455 tree evolution_part;
2457 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2459 fprintf (vect_dump, "Analyze phi: ");
2460 print_generic_expr (vect_dump, phi, TDF_SLIM);
2463 /* Skip virtual phi's. The data dependences that are associated with
2464 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2466 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2468 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2469 fprintf (vect_dump, "virtual phi. skip.");
2470 continue;
2473 /* Skip reduction phis. */
2475 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2477 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2478 fprintf (vect_dump, "reduc phi. skip.");
2479 continue;
2482 /* Analyze the evolution function. */
2484 access_fn = instantiate_parameters
2485 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2487 if (!access_fn)
2489 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2490 fprintf (vect_dump, "No Access function.");
2491 return false;
2494 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2496 fprintf (vect_dump, "Access function of PHI: ");
2497 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2500 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2502 if (evolution_part == NULL_TREE)
2504 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2505 fprintf (vect_dump, "No evolution.");
2506 return false;
2509 /* FORNOW: We do not transform initial conditions of IVs
2510 which evolution functions are a polynomial of degree >= 2. */
2512 if (tree_is_chrec (evolution_part))
2513 return false;
2516 return true;
2520 /* Function vect_get_loop_niters.
2522 Determine how many iterations the loop is executed.
2523 If an expression that represents the number of iterations
2524 can be constructed, place it in NUMBER_OF_ITERATIONS.
2525 Return the loop exit condition. */
2527 static tree
2528 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2530 tree niters;
2532 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2533 fprintf (vect_dump, "=== get_loop_niters ===");
2535 niters = number_of_iterations_in_loop (loop);
2537 if (niters != NULL_TREE
2538 && niters != chrec_dont_know)
2540 *number_of_iterations = niters;
2542 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2544 fprintf (vect_dump, "==> get_loop_niters:" );
2545 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2549 return get_loop_exit_condition (loop);
2553 /* Function vect_analyze_loop_form.
2555 Verify the following restrictions (some may be relaxed in the future):
2556 - it's an inner-most loop
2557 - number of BBs = 2 (which are the loop header and the latch)
2558 - the loop has a pre-header
2559 - the loop has a single entry and exit
2560 - the loop exit condition is simple enough, and the number of iterations
2561 can be analyzed (a countable loop). */
2563 static loop_vec_info
2564 vect_analyze_loop_form (struct loop *loop)
2566 loop_vec_info loop_vinfo;
2567 tree loop_cond;
2568 tree number_of_iterations = NULL;
2569 LOC loop_loc;
2571 loop_loc = find_loop_location (loop);
2573 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2574 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2576 if (loop->inner)
2578 if (vect_print_dump_info (REPORT_OUTER_LOOPS, loop_loc))
2579 fprintf (vect_dump, "not vectorized: nested loop.");
2580 return NULL;
2583 if (!loop->single_exit
2584 || loop->num_nodes != 2
2585 || EDGE_COUNT (loop->header->preds) != 2)
2587 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2589 if (!loop->single_exit)
2590 fprintf (vect_dump, "not vectorized: multiple exits.");
2591 else if (loop->num_nodes != 2)
2592 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2593 else if (EDGE_COUNT (loop->header->preds) != 2)
2594 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2597 return NULL;
2600 /* We assume that the loop exit condition is at the end of the loop. i.e,
2601 that the loop is represented as a do-while (with a proper if-guard
2602 before the loop if needed), where the loop header contains all the
2603 executable statements, and the latch is empty. */
2604 if (!empty_block_p (loop->latch))
2606 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2607 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2608 return NULL;
2611 /* Make sure there exists a single-predecessor exit bb: */
2612 if (!single_pred_p (loop->single_exit->dest))
2614 edge e = loop->single_exit;
2615 if (!(e->flags & EDGE_ABNORMAL))
2617 split_loop_exit_edge (e);
2618 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2619 fprintf (vect_dump, "split exit edge.");
2621 else
2623 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2624 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2625 return NULL;
2629 if (empty_block_p (loop->header))
2631 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2632 fprintf (vect_dump, "not vectorized: empty loop.");
2633 return NULL;
2636 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2637 if (!loop_cond)
2639 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2640 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2641 return NULL;
2644 if (!number_of_iterations)
2646 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2647 fprintf (vect_dump,
2648 "not vectorized: number of iterations cannot be computed.");
2649 return NULL;
2652 if (chrec_contains_undetermined (number_of_iterations))
2654 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2655 fprintf (vect_dump, "Infinite number of iterations.");
2656 return false;
2659 loop_vinfo = new_loop_vec_info (loop);
2660 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2662 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2664 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2666 fprintf (vect_dump, "Symbolic number of iterations is ");
2667 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2670 else
2671 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2673 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, loop_loc))
2674 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2675 return NULL;
2678 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2679 LOOP_VINFO_LOC (loop_vinfo) = loop_loc;
2681 return loop_vinfo;
2685 /* Function vect_analyze_loop.
2687 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2688 for it. The different analyses will record information in the
2689 loop_vec_info struct. */
2690 loop_vec_info
2691 vect_analyze_loop (struct loop *loop)
2693 bool ok;
2694 loop_vec_info loop_vinfo;
2696 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2697 fprintf (vect_dump, "===== analyze_loop_nest =====");
2699 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2701 loop_vinfo = vect_analyze_loop_form (loop);
2702 if (!loop_vinfo)
2704 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2705 fprintf (vect_dump, "bad loop form.");
2706 return NULL;
2709 /* Find all data references in the loop (which correspond to vdefs/vuses)
2710 and analyze their evolution in the loop.
2712 FORNOW: Handle only simple, array references, which
2713 alignment can be forced, and aligned pointer-references. */
2715 ok = vect_analyze_data_refs (loop_vinfo);
2716 if (!ok)
2718 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2719 fprintf (vect_dump, "bad data references.");
2720 destroy_loop_vec_info (loop_vinfo);
2721 return NULL;
2724 /* Classify all cross-iteration scalar data-flow cycles.
2725 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2727 vect_analyze_scalar_cycles (loop_vinfo);
2729 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2731 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2732 if (!ok)
2734 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2735 fprintf (vect_dump, "unexpected pattern.");
2736 destroy_loop_vec_info (loop_vinfo);
2737 return NULL;
2740 ok = vect_determine_vectorization_factor (loop_vinfo);
2741 if (!ok)
2743 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2744 fprintf (vect_dump, "can't determine vectorization factor.");
2745 destroy_loop_vec_info (loop_vinfo);
2746 return NULL;
2749 /* Analyze data dependences between the data-refs in the loop.
2750 FORNOW: fail at the first data dependence that we encounter. */
2752 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2753 if (!ok)
2755 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2756 fprintf (vect_dump, "bad data dependence.");
2757 destroy_loop_vec_info (loop_vinfo);
2758 return NULL;
2761 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2762 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2764 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2765 if (!ok)
2767 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2768 fprintf (vect_dump, "bad data access.");
2769 destroy_loop_vec_info (loop_vinfo);
2770 return NULL;
2773 /* Analyze the alignment of the data-refs in the loop.
2774 FORNOW: Only aligned accesses are handled. */
2776 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2777 if (!ok)
2779 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2780 fprintf (vect_dump, "bad data alignment.");
2781 destroy_loop_vec_info (loop_vinfo);
2782 return NULL;
2785 /* Scan all the operations in the loop and make sure they are
2786 vectorizable. */
2788 ok = vect_analyze_operations (loop_vinfo);
2789 if (!ok)
2791 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2792 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2793 destroy_loop_vec_info (loop_vinfo);
2794 return NULL;
2797 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2799 return loop_vinfo;