2005-06-07 Thomas Koenig <Thomas.Koenig@online.de>
[official-gcc.git] / gcc / tree-vect-analyze.c
blob0b8e26ce9e15a890057f98b685dd5f54f8013d3d
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "tree-chrec.h"
37 #include "tree-data-ref.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
41 /* Main analysis functions. */
42 static loop_vec_info vect_analyze_loop_form (struct loop *);
43 static bool vect_analyze_data_refs (loop_vec_info);
44 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
45 static void vect_analyze_scalar_cycles (loop_vec_info);
46 static bool vect_analyze_data_ref_accesses (loop_vec_info);
47 static bool vect_analyze_data_ref_dependences (loop_vec_info);
48 static bool vect_analyze_data_refs_alignment (loop_vec_info);
49 static bool vect_compute_data_refs_alignment (loop_vec_info);
50 static void vect_enhance_data_refs_alignment (loop_vec_info);
51 static bool vect_analyze_operations (loop_vec_info);
52 static bool vect_determine_vectorization_factor (loop_vec_info);
54 /* Utility functions for the analyses. */
55 static bool exist_non_indexing_operands_for_use_p (tree, tree);
56 static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
57 static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_reference *, struct data_reference *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static struct data_reference * vect_analyze_pointer_ref_access
64 (tree, tree, bool, tree, tree *, tree *);
65 static bool vect_can_advance_ivs_p (loop_vec_info);
66 static tree vect_get_ptr_offset (tree, tree, tree *);
67 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
68 tree *, tree *);
69 static bool vect_base_addr_differ_p (struct data_reference *,
70 struct data_reference *drb, bool *);
71 static tree vect_object_analysis (tree, tree, bool, tree,
72 struct data_reference **, tree *, tree *,
73 tree *, bool *, tree *, struct ptr_info_def **,
74 subvar_t *);
75 static tree vect_address_analysis (tree, tree, bool, tree,
76 struct data_reference *, tree *, tree *,
77 tree *, bool *);
80 /* Function vect_get_ptr_offset
82 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
84 static tree
85 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
86 tree vectype ATTRIBUTE_UNUSED,
87 tree *offset ATTRIBUTE_UNUSED)
89 /* TODO: Use alignment information. */
90 return NULL_TREE;
94 /* Function vect_analyze_offset_expr
96 Given an offset expression EXPR received from get_inner_reference, analyze
97 it and create an expression for INITIAL_OFFSET by substituting the variables
98 of EXPR with initial_condition of the corresponding access_fn in the loop.
99 E.g.,
100 for i
101 for (j = 3; j < N; j++)
102 a[j].b[i][j] = 0;
104 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
105 substituted, since its access_fn in the inner loop is i. 'j' will be
106 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
107 C` = 3 * C_j + C.
109 Compute MISALIGN (the misalignment of the data reference initial access from
110 its base) if possible. Misalignment can be calculated only if all the
111 variables can be substituted with constants, or if a variable is multiplied
112 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
113 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
114 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
115 VECTYPE_ALIGNMENT computation in the caller of this function).
117 STEP is an evolution of the data reference in this loop in bytes.
118 In the above example, STEP is C_j.
120 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
121 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
122 are NULL_TREEs. Otherwise, return TRUE.
126 static bool
127 vect_analyze_offset_expr (tree expr,
128 struct loop *loop,
129 tree vectype_alignment,
130 tree *initial_offset,
131 tree *misalign,
132 tree *step)
134 tree oprnd0;
135 tree oprnd1;
136 tree left_offset = ssize_int (0);
137 tree right_offset = ssize_int (0);
138 tree left_misalign = ssize_int (0);
139 tree right_misalign = ssize_int (0);
140 tree left_step = ssize_int (0);
141 tree right_step = ssize_int (0);
142 enum tree_code code;
143 tree init, evolution;
145 *step = NULL_TREE;
146 *misalign = NULL_TREE;
147 *initial_offset = NULL_TREE;
149 /* Strip conversions that don't narrow the mode. */
150 expr = vect_strip_conversion (expr);
151 if (!expr)
152 return false;
154 /* Stop conditions:
155 1. Constant. */
156 if (TREE_CODE (expr) == INTEGER_CST)
158 *initial_offset = fold_convert (ssizetype, expr);
159 *misalign = fold_convert (ssizetype, expr);
160 *step = ssize_int (0);
161 return true;
164 /* 2. Variable. Try to substitute with initial_condition of the corresponding
165 access_fn in the current loop. */
166 if (SSA_VAR_P (expr))
168 tree access_fn = analyze_scalar_evolution (loop, expr);
170 if (access_fn == chrec_dont_know)
171 /* No access_fn. */
172 return false;
174 init = initial_condition_in_loop_num (access_fn, loop->num);
175 if (init == expr && !expr_invariant_in_loop_p (loop, init))
176 /* Not enough information: may be not loop invariant.
177 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
178 initial_condition is D, but it depends on i - loop's induction
179 variable. */
180 return false;
182 evolution = evolution_part_in_loop_num (access_fn, loop->num);
183 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
184 /* Evolution is not constant. */
185 return false;
187 if (TREE_CODE (init) == INTEGER_CST)
188 *misalign = fold_convert (ssizetype, init);
189 else
190 /* Not constant, misalignment cannot be calculated. */
191 *misalign = NULL_TREE;
193 *initial_offset = fold_convert (ssizetype, init);
195 *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0);
196 return true;
199 /* Recursive computation. */
200 if (!BINARY_CLASS_P (expr))
202 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
203 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
205 fprintf (vect_dump, "Not binary expression ");
206 print_generic_expr (vect_dump, expr, TDF_SLIM);
208 return false;
210 oprnd0 = TREE_OPERAND (expr, 0);
211 oprnd1 = TREE_OPERAND (expr, 1);
213 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
214 &left_misalign, &left_step)
215 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
216 &right_offset, &right_misalign, &right_step))
217 return false;
219 /* The type of the operation: plus, minus or mult. */
220 code = TREE_CODE (expr);
221 switch (code)
223 case MULT_EXPR:
224 if (TREE_CODE (right_offset) != INTEGER_CST)
225 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
226 sized types.
227 FORNOW: We don't support such cases. */
228 return false;
230 /* Strip conversions that don't narrow the mode. */
231 left_offset = vect_strip_conversion (left_offset);
232 if (!left_offset)
233 return false;
234 /* Misalignment computation. */
235 if (SSA_VAR_P (left_offset))
237 /* If the left side contains variables that can't be substituted with
238 constants, we check if the right side is a multiple of ALIGNMENT.
240 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
241 fold_convert (ssizetype, vectype_alignment))))
242 *misalign = ssize_int (0);
243 else
244 /* If the remainder is not zero or the right side isn't constant,
245 we can't compute misalignment. */
246 *misalign = NULL_TREE;
248 else
250 /* The left operand was successfully substituted with constant. */
251 if (left_misalign)
252 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
253 NULL_TREE. */
254 *misalign = size_binop (code, left_misalign, right_misalign);
255 else
256 *misalign = NULL_TREE;
259 /* Step calculation. */
260 /* Multiply the step by the right operand. */
261 *step = size_binop (MULT_EXPR, left_step, right_offset);
262 break;
264 case PLUS_EXPR:
265 case MINUS_EXPR:
266 /* Combine the recursive calculations for step and misalignment. */
267 *step = size_binop (code, left_step, right_step);
269 if (left_misalign && right_misalign)
270 *misalign = size_binop (code, left_misalign, right_misalign);
271 else
272 *misalign = NULL_TREE;
274 break;
276 default:
277 gcc_unreachable ();
280 /* Compute offset. */
281 *initial_offset = fold_convert (ssizetype,
282 fold (build2 (code, TREE_TYPE (left_offset),
283 left_offset,
284 right_offset)));
285 return true;
289 /* Function vect_determine_vectorization_factor
291 Determine the vectorization factor (VF). VF is the number of data elements
292 that are operated upon in parallel in a single iteration of the vectorized
293 loop. For example, when vectorizing a loop that operates on 4byte elements,
294 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
295 elements can fit in a single vector register.
297 We currently support vectorization of loops in which all types operated upon
298 are of the same size. Therefore this function currently sets VF according to
299 the size of the types operated upon, and fails if there are multiple sizes
300 in the loop.
302 VF is also the factor by which the loop iterations are strip-mined, e.g.:
303 original loop:
304 for (i=0; i<N; i++){
305 a[i] = b[i] + c[i];
308 vectorized loop:
309 for (i=0; i<N; i+=VF){
310 a[i:VF] = b[i:VF] + c[i:VF];
314 static bool
315 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
317 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
318 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
319 int nbbs = loop->num_nodes;
320 block_stmt_iterator si;
321 unsigned int vectorization_factor = 0;
322 int i;
323 tree scalar_type;
325 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
326 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
328 for (i = 0; i < nbbs; i++)
330 basic_block bb = bbs[i];
332 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
334 tree stmt = bsi_stmt (si);
335 unsigned int nunits;
336 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
337 tree vectype;
339 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
341 fprintf (vect_dump, "==> examining statement: ");
342 print_generic_expr (vect_dump, stmt, TDF_SLIM);
345 gcc_assert (stmt_info);
346 /* skip stmts which do not need to be vectorized. */
347 if (!STMT_VINFO_RELEVANT_P (stmt_info)
348 && !STMT_VINFO_LIVE_P (stmt_info))
350 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
351 fprintf (vect_dump, "skip.");
352 continue;
355 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
357 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
358 LOOP_LOC (loop_vinfo)))
360 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
361 print_generic_expr (vect_dump, stmt, TDF_SLIM);
363 return false;
366 if (STMT_VINFO_DATA_REF (stmt_info))
367 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
368 else if (TREE_CODE (stmt) == MODIFY_EXPR)
369 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
370 else
371 scalar_type = TREE_TYPE (stmt);
373 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
375 fprintf (vect_dump, "get vectype for scalar type: ");
376 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
379 vectype = get_vectype_for_scalar_type (scalar_type);
380 if (!vectype)
382 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
383 LOOP_LOC (loop_vinfo)))
385 fprintf (vect_dump, "not vectorized: unsupported data-type ");
386 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
388 return false;
390 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
392 fprintf (vect_dump, "vectype: ");
393 print_generic_expr (vect_dump, vectype, TDF_SLIM);
395 STMT_VINFO_VECTYPE (stmt_info) = vectype;
397 nunits = TYPE_VECTOR_SUBPARTS (vectype);
398 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
399 fprintf (vect_dump, "nunits = %d", nunits);
401 if (vectorization_factor)
403 /* FORNOW: don't allow mixed units.
404 This restriction will be relaxed in the future. */
405 if (nunits != vectorization_factor)
407 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
408 LOOP_LOC (loop_vinfo)))
409 fprintf (vect_dump, "not vectorized: mixed data-types");
410 return false;
413 else
414 vectorization_factor = nunits;
416 #ifdef ENABLE_CHECKING
417 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
418 * vectorization_factor == UNITS_PER_SIMD_WORD);
419 #endif
423 /* TODO: Analyze cost. Decide if worth while to vectorize. */
425 if (vectorization_factor <= 1)
427 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
428 LOOP_LOC (loop_vinfo)))
429 fprintf (vect_dump, "not vectorized: unsupported data-type");
430 return false;
432 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
434 return true;
438 /* Function vect_analyze_operations.
440 Scan the loop stmts and make sure they are all vectorizable. */
442 static bool
443 vect_analyze_operations (loop_vec_info loop_vinfo)
445 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
446 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
447 int nbbs = loop->num_nodes;
448 block_stmt_iterator si;
449 unsigned int vectorization_factor = 0;
450 int i;
451 bool ok;
452 tree phi;
453 stmt_vec_info stmt_info;
454 bool need_to_vectorize = false;
456 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
457 fprintf (vect_dump, "=== vect_analyze_operations ===");
459 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
460 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
462 for (i = 0; i < nbbs; i++)
464 basic_block bb = bbs[i];
466 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
468 stmt_info = vinfo_for_stmt (phi);
469 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
471 fprintf (vect_dump, "examining phi: ");
472 print_generic_expr (vect_dump, phi, TDF_SLIM);
475 gcc_assert (stmt_info);
477 if (STMT_VINFO_LIVE_P (stmt_info))
479 /* FORNOW: not yet supported. */
480 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
481 LOOP_LOC (loop_vinfo)))
482 fprintf (vect_dump, "not vectorized: value used after loop.");
483 return false;
486 gcc_assert (!STMT_VINFO_RELEVANT_P (stmt_info));
489 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
491 tree stmt = bsi_stmt (si);
492 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
494 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
496 fprintf (vect_dump, "==> examining statement: ");
497 print_generic_expr (vect_dump, stmt, TDF_SLIM);
500 gcc_assert (stmt_info);
502 /* skip stmts which do not need to be vectorized.
503 this is expected to include:
504 - the COND_EXPR which is the loop exit condition
505 - any LABEL_EXPRs in the loop
506 - computations that are used only for array indexing or loop
507 control */
509 if (!STMT_VINFO_RELEVANT_P (stmt_info)
510 && !STMT_VINFO_LIVE_P (stmt_info))
512 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
513 fprintf (vect_dump, "irrelevant.");
514 continue;
517 if (STMT_VINFO_RELEVANT_P (stmt_info))
519 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
520 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
522 ok = (vectorizable_operation (stmt, NULL, NULL)
523 || vectorizable_assignment (stmt, NULL, NULL)
524 || vectorizable_load (stmt, NULL, NULL)
525 || vectorizable_store (stmt, NULL, NULL)
526 || vectorizable_condition (stmt, NULL, NULL));
528 if (!ok)
530 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
531 LOOP_LOC (loop_vinfo)))
533 fprintf (vect_dump,
534 "not vectorized: relevant stmt not supported: ");
535 print_generic_expr (vect_dump, stmt, TDF_SLIM);
537 return false;
539 need_to_vectorize = true;
542 if (STMT_VINFO_LIVE_P (stmt_info))
544 ok = vectorizable_live_operation (stmt, NULL, NULL);
546 if (!ok)
548 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
549 LOOP_LOC (loop_vinfo)))
551 fprintf (vect_dump,
552 "not vectorized: live stmt not supported: ");
553 print_generic_expr (vect_dump, stmt, TDF_SLIM);
555 return false;
558 } /* stmts in bb */
559 } /* bbs */
561 /* TODO: Analyze cost. Decide if worth while to vectorize. */
563 /* All operations in the loop are either irrelevant (deal with loop
564 control, or dead), or only used outside the loop and can be moved
565 out of the loop (e.g. invariants, inductions). The loop can be
566 optimized away by scalar optimizations. We're better off not
567 touching this loop. */
568 if (!need_to_vectorize)
570 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
571 fprintf (vect_dump,
572 "All the computation can be taken out of the loop.");
573 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
574 LOOP_LOC (loop_vinfo)))
575 fprintf (vect_dump,
576 "not vectorized: redundant loop. no profit to vectorize.");
577 return false;
580 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
581 && vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
582 fprintf (vect_dump,
583 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
584 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
586 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
587 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
589 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
590 LOOP_LOC (loop_vinfo)))
591 fprintf (vect_dump, "not vectorized: iteration count too small.");
592 return false;
595 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
596 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
598 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
599 fprintf (vect_dump, "epilog loop required.");
600 if (!vect_can_advance_ivs_p (loop_vinfo))
602 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
603 LOOP_LOC (loop_vinfo)))
604 fprintf (vect_dump,
605 "not vectorized: can't create epilog loop 1.");
606 return false;
608 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
610 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
611 LOOP_LOC (loop_vinfo)))
612 fprintf (vect_dump,
613 "not vectorized: can't create epilog loop 2.");
614 return false;
618 return true;
622 /* Function exist_non_indexing_operands_for_use_p
624 USE is one of the uses attached to STMT. Check if USE is
625 used in STMT for anything other than indexing an array. */
627 static bool
628 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
630 tree operand;
631 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
633 /* USE corresponds to some operand in STMT. If there is no data
634 reference in STMT, then any operand that corresponds to USE
635 is not indexing an array. */
636 if (!STMT_VINFO_DATA_REF (stmt_info))
637 return true;
639 /* STMT has a data_ref. FORNOW this means that its of one of
640 the following forms:
641 -1- ARRAY_REF = var
642 -2- var = ARRAY_REF
643 (This should have been verified in analyze_data_refs).
645 'var' in the second case corresponds to a def, not a use,
646 so USE cannot correspond to any operands that are not used
647 for array indexing.
649 Therefore, all we need to check is if STMT falls into the
650 first case, and whether var corresponds to USE. */
652 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
653 return false;
655 operand = TREE_OPERAND (stmt, 1);
657 if (TREE_CODE (operand) != SSA_NAME)
658 return false;
660 if (operand == use)
661 return true;
663 return false;
667 /* Function vect_analyze_scalar_cycles.
669 Examine the cross iteration def-use cycles of scalar variables, by
670 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
671 following: invariant, induction, reduction, unknown.
673 Some forms of scalar cycles are not yet supported.
675 Example1: reduction: (unsupported yet)
677 loop1:
678 for (i=0; i<N; i++)
679 sum += a[i];
681 Example2: induction: (unsupported yet)
683 loop2:
684 for (i=0; i<N; i++)
685 a[i] = i;
687 Note: the following loop *is* vectorizable:
689 loop3:
690 for (i=0; i<N; i++)
691 a[i] = b[i];
693 even though it has a def-use cycle caused by the induction variable i:
695 loop: i_2 = PHI (i_0, i_1)
696 a[i_2] = ...;
697 i_1 = i_2 + 1;
698 GOTO loop;
700 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
701 it does not need to be vectorized because it is only used for array
702 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
703 loop2 on the other hand is relevant (it is being written to memory).
706 static void
707 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
709 tree phi;
710 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
711 basic_block bb = loop->header;
712 tree dummy;
714 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
715 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
717 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
719 tree access_fn = NULL;
720 tree def = PHI_RESULT (phi);
721 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
722 tree reduc_stmt;
724 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
726 fprintf (vect_dump, "Analyze phi: ");
727 print_generic_expr (vect_dump, phi, TDF_SLIM);
730 /* Skip virtual phi's. The data dependences that are associated with
731 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
733 if (!is_gimple_reg (SSA_NAME_VAR (def)))
735 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
736 fprintf (vect_dump, "virtual phi. skip.");
737 continue;
740 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
742 /* Analyze the evolution function. */
744 access_fn = analyze_scalar_evolution (loop, def);
746 if (!access_fn)
747 continue;
749 if (vect_print_dump_info (REPORT_DETAILS,
750 LOOP_LOC (loop_vinfo)))
752 fprintf (vect_dump, "Access function of PHI: ");
753 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
756 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
758 if (vect_print_dump_info (REPORT_DETAILS,LOOP_LOC (loop_vinfo)))
759 fprintf (vect_dump, "Detected induction.");
760 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
761 continue;
764 /* TODO: handle invariant phis */
766 reduc_stmt = vect_is_simple_reduction (loop, phi);
767 if (reduc_stmt)
769 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
770 fprintf (vect_dump, "Detected reduction.");
771 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
772 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
773 vect_reduction_def;
775 else
776 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
777 fprintf (vect_dump, "Unknown def-use cycle pattern.");
781 return;
785 /* Function vect_base_addr_differ_p.
787 This is the simplest data dependence test: determines whether the
788 data references A and B access the same array/region. Returns
789 false when the property is not computable at compile time.
790 Otherwise return true, and DIFFER_P will record the result. This
791 utility will not be necessary when alias_sets_conflict_p will be
792 less conservative. */
794 static bool
795 vect_base_addr_differ_p (struct data_reference *dra,
796 struct data_reference *drb,
797 bool *differ_p)
799 tree stmt_a = DR_STMT (dra);
800 stmt_vec_info stmt_info_a = vinfo_for_stmt (stmt_a);
801 tree stmt_b = DR_STMT (drb);
802 stmt_vec_info stmt_info_b = vinfo_for_stmt (stmt_b);
803 tree addr_a = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_a);
804 tree addr_b = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_b);
805 tree type_a = TREE_TYPE (addr_a);
806 tree type_b = TREE_TYPE (addr_b);
807 HOST_WIDE_INT alias_set_a, alias_set_b;
809 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
811 /* Both references are ADDR_EXPR, i.e., we have the objects. */
812 if (TREE_CODE (addr_a) == ADDR_EXPR && TREE_CODE (addr_b) == ADDR_EXPR)
813 return array_base_name_differ_p (dra, drb, differ_p);
815 alias_set_a = (TREE_CODE (addr_a) == ADDR_EXPR) ?
816 get_alias_set (TREE_OPERAND (addr_a, 0)) : get_alias_set (addr_a);
817 alias_set_b = (TREE_CODE (addr_b) == ADDR_EXPR) ?
818 get_alias_set (TREE_OPERAND (addr_b, 0)) : get_alias_set (addr_b);
820 if (!alias_sets_conflict_p (alias_set_a, alias_set_b))
822 *differ_p = true;
823 return true;
826 /* An instruction writing through a restricted pointer is "independent" of any
827 instruction reading or writing through a different pointer, in the same
828 block/scope. */
829 else if ((TYPE_RESTRICT (type_a) && !DR_IS_READ (dra))
830 || (TYPE_RESTRICT (type_b) && !DR_IS_READ (drb)))
832 *differ_p = true;
833 return true;
835 return false;
839 /* Function vect_analyze_data_ref_dependence.
841 Return TRUE if there (might) exist a dependence between a memory-reference
842 DRA and a memory-reference DRB. */
844 static bool
845 vect_analyze_data_ref_dependence (struct data_reference *dra,
846 struct data_reference *drb,
847 loop_vec_info loop_vinfo)
849 bool differ_p;
850 struct data_dependence_relation *ddr;
851 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
852 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
853 int dist = 0;
854 unsigned int loop_depth = 0;
855 struct loop *loop_nest = loop;
858 if (!vect_base_addr_differ_p (dra, drb, &differ_p))
860 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
861 LOOP_LOC (loop_vinfo)))
863 fprintf (vect_dump,
864 "not vectorized: can't determine dependence between: ");
865 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
866 fprintf (vect_dump, " and ");
867 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
869 return true;
872 if (differ_p)
873 return false;
875 ddr = initialize_data_dependence_relation (dra, drb);
876 compute_affine_dependence (ddr);
878 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
879 return false;
881 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
883 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
884 LOOP_LOC (loop_vinfo)))
886 fprintf (vect_dump,
887 "not vectorized: can't determine dependence between ");
888 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
889 fprintf (vect_dump, " and ");
890 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
892 return true;
895 /* Find loop depth. */
896 while (loop_nest)
898 if (loop_nest->outer && loop_nest->outer->outer)
900 loop_nest = loop_nest->outer;
901 loop_depth++;
903 else
904 break;
907 /* Compute distance vector. */
908 compute_subscript_distance (ddr);
909 build_classic_dist_vector (ddr, vect_loops_num, loop_nest->depth);
911 if (!DDR_DIST_VECT (ddr))
913 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
914 LOOP_LOC (loop_vinfo)))
916 fprintf (vect_dump, "not vectorized: bad dist vector for ");
917 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
918 fprintf (vect_dump, " and ");
919 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
921 return true;
924 dist = DDR_DIST_VECT (ddr)[loop_depth];
926 /* Same loop iteration. */
927 if (dist == 0)
929 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
930 fprintf (vect_dump, "dependence distance 0.");
931 return false;
934 if (dist >= vectorization_factor)
935 /* Dependence distance does not create dependence, as far as vectorization
936 is concerned, in this case. */
937 return false;
939 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
940 LOOP_LOC (loop_vinfo)))
942 fprintf (vect_dump,
943 "not vectorized: possible dependence between data-refs ");
944 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
945 fprintf (vect_dump, " and ");
946 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
949 return true;
953 /* Function vect_analyze_data_ref_dependences.
955 Examine all the data references in the loop, and make sure there do not
956 exist any data dependences between them. */
958 static bool
959 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
961 unsigned int i, j;
962 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
963 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
965 /* Examine store-store (output) dependences. */
967 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
968 fprintf (vect_dump, "=== vect_analyze_dependences ===");
970 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
971 fprintf (vect_dump, "compare all store-store pairs.");
973 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
975 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
977 struct data_reference *dra =
978 VARRAY_GENERIC_PTR (loop_write_refs, i);
979 struct data_reference *drb =
980 VARRAY_GENERIC_PTR (loop_write_refs, j);
981 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
982 return false;
986 /* Examine load-store (true/anti) dependences. */
988 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
989 fprintf (vect_dump, "compare all load-store pairs.");
991 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
993 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
995 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
996 struct data_reference *drb =
997 VARRAY_GENERIC_PTR (loop_write_refs, j);
998 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
999 return false;
1003 return true;
1007 /* Function vect_compute_data_ref_alignment
1009 Compute the misalignment of the data reference DR.
1011 Output:
1012 1. If during the misalignment computation it is found that the data reference
1013 cannot be vectorized then false is returned.
1014 2. DR_MISALIGNMENT (DR) is defined.
1016 FOR NOW: No analysis is actually performed. Misalignment is calculated
1017 only for trivial cases. TODO. */
1019 static bool
1020 vect_compute_data_ref_alignment (struct data_reference *dr)
1022 tree stmt = DR_STMT (dr);
1023 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1024 tree ref = DR_REF (dr);
1025 tree vectype;
1026 tree base, alignment;
1027 bool base_aligned_p;
1028 tree misalign;
1030 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1031 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1033 /* Initialize misalignment to unknown. */
1034 DR_MISALIGNMENT (dr) = -1;
1036 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
1037 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
1038 base = build_fold_indirect_ref (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
1039 vectype = STMT_VINFO_VECTYPE (stmt_info);
1041 if (!misalign)
1043 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1045 fprintf (vect_dump, "Unknown alignment for access: ");
1046 print_generic_expr (vect_dump, base, TDF_SLIM);
1048 return true;
1051 if (!base_aligned_p)
1053 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
1055 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1057 fprintf (vect_dump, "can't force alignment of ref: ");
1058 print_generic_expr (vect_dump, ref, TDF_SLIM);
1060 return true;
1063 /* Force the alignment of the decl.
1064 NOTE: This is the only change to the code we make during
1065 the analysis phase, before deciding to vectorize the loop. */
1066 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1067 fprintf (vect_dump, "force alignment");
1068 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1069 DECL_USER_ALIGN (base) = 1;
1072 /* At this point we assume that the base is aligned. */
1073 gcc_assert (base_aligned_p
1074 || (TREE_CODE (base) == VAR_DECL
1075 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1077 /* Alignment required, in bytes: */
1078 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1080 /* Modulo alignment. */
1081 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1082 if (tree_int_cst_sgn (misalign) < 0)
1084 /* Negative misalignment value. */
1085 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1086 fprintf (vect_dump, "unexpected misalign value");
1087 return false;
1090 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
1092 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1093 fprintf (vect_dump, "misalign = %d bytes", DR_MISALIGNMENT (dr));
1095 return true;
1099 /* Function vect_compute_data_refs_alignment
1101 Compute the misalignment of data references in the loop.
1102 This pass may take place at function granularity instead of at loop
1103 granularity.
1105 FOR NOW: No analysis is actually performed. Misalignment is calculated
1106 only for trivial cases. TODO. */
1108 static bool
1109 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1111 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1112 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1113 unsigned int i;
1115 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1117 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1118 if (!vect_compute_data_ref_alignment (dr))
1119 return false;
1122 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1124 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1125 if (!vect_compute_data_ref_alignment (dr))
1126 return false;
1129 return true;
1133 /* Function vect_enhance_data_refs_alignment
1135 This pass will use loop versioning and loop peeling in order to enhance
1136 the alignment of data references in the loop.
1138 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1139 original loop is to be vectorized; Any other loops that are created by
1140 the transformations performed in this pass - are not supposed to be
1141 vectorized. This restriction will be relaxed. */
1143 static void
1144 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1146 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1147 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1148 varray_type datarefs;
1149 struct data_reference *dr0 = NULL;
1150 unsigned int i, j;
1153 This pass will require a cost model to guide it whether to apply peeling
1154 or versioning or a combination of the two. For example, the scheme that
1155 intel uses when given a loop with several memory accesses, is as follows:
1156 choose one memory access ('p') which alignment you want to force by doing
1157 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1158 other accesses are not necessarily aligned, or (2) use loop versioning to
1159 generate one loop in which all accesses are aligned, and another loop in
1160 which only 'p' is necessarily aligned.
1162 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1163 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1164 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1166 Devising a cost model is the most critical aspect of this work. It will
1167 guide us on which access to peel for, whether to use loop versioning, how
1168 many versions to create, etc. The cost model will probably consist of
1169 generic considerations as well as target specific considerations (on
1170 powerpc for example, misaligned stores are more painful than misaligned
1171 loads).
1173 Here is the general steps involved in alignment enhancements:
1175 -- original loop, before alignment analysis:
1176 for (i=0; i<N; i++){
1177 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1178 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1181 -- After vect_compute_data_refs_alignment:
1182 for (i=0; i<N; i++){
1183 x = q[i]; # DR_MISALIGNMENT(q) = 3
1184 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1187 -- Possibility 1: we do loop versioning:
1188 if (p is aligned) {
1189 for (i=0; i<N; i++){ # loop 1A
1190 x = q[i]; # DR_MISALIGNMENT(q) = 3
1191 p[i] = y; # DR_MISALIGNMENT(p) = 0
1194 else {
1195 for (i=0; i<N; i++){ # loop 1B
1196 x = q[i]; # DR_MISALIGNMENT(q) = 3
1197 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1201 -- Possibility 2: we do loop peeling:
1202 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1203 x = q[i];
1204 p[i] = y;
1206 for (i = 3; i < N; i++){ # loop 2A
1207 x = q[i]; # DR_MISALIGNMENT(q) = 0
1208 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1211 -- Possibility 3: combination of loop peeling and versioning:
1212 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1213 x = q[i];
1214 p[i] = y;
1216 if (p is aligned) {
1217 for (i = 3; i<N; i++){ # loop 3A
1218 x = q[i]; # DR_MISALIGNMENT(q) = 0
1219 p[i] = y; # DR_MISALIGNMENT(p) = 0
1222 else {
1223 for (i = 3; i<N; i++){ # loop 3B
1224 x = q[i]; # DR_MISALIGNMENT(q) = 0
1225 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1229 These loops are later passed to loop_transform to be vectorized. The
1230 vectorizer will use the alignment information to guide the transformation
1231 (whether to generate regular loads/stores, or with special handling for
1232 misalignment).
1235 /* (1) Peeling to force alignment. */
1237 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1238 Considerations:
1239 + How many accesses will become aligned due to the peeling
1240 - How many accesses will become unaligned due to the peeling,
1241 and the cost of misaligned accesses.
1242 - The cost of peeling (the extra runtime checks, the increase
1243 in code size).
1245 The scheme we use FORNOW: peel to force the alignment of the first
1246 misaligned store in the loop.
1247 Rationale: misaligned stores are not yet supported.
1249 TODO: Use a better cost model. */
1251 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1253 dr0 = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1254 if (!aligned_access_p (dr0))
1256 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1257 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1258 break;
1262 /* (1.2) Update the alignment info according to the peeling factor.
1263 If the misalignment of the DR we peel for is M, then the
1264 peeling factor is VF - M, and the misalignment of each access DR_i
1265 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
1266 If the misalignment of the DR we peel for is unknown, then the
1267 misalignment of each access DR_i in the loop is also unknown.
1269 TODO: - consider accesses that are known to have the same
1270 alignment, even if that alignment is unknown. */
1272 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1274 int mis;
1275 int npeel = 0;
1277 if (known_alignment_for_access_p (dr0))
1279 /* Since it's known at compile time, compute the number of iterations
1280 in the peeled loop (the peeling factor) for use in updating
1281 DR_MISALIGNMENT values. The peeling factor is the vectorization
1282 factor minus the misalignment as an element count. */
1283 mis = DR_MISALIGNMENT (dr0);
1284 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1285 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1288 datarefs = loop_write_datarefs;
1289 for (j = 0; j < 2; j++)
1291 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1293 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1295 if (dr == dr0)
1296 continue;
1297 if (known_alignment_for_access_p (dr)
1298 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr0))
1299 DR_MISALIGNMENT (dr) = 0;
1300 else if (known_alignment_for_access_p (dr)
1301 && known_alignment_for_access_p (dr0))
1303 int drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1305 DR_MISALIGNMENT (dr) += npeel * drsize;
1306 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
1308 else
1309 DR_MISALIGNMENT (dr) = -1;
1311 datarefs = loop_read_datarefs;
1314 DR_MISALIGNMENT (dr0) = 0;
1319 /* Function vect_analyze_data_refs_alignment
1321 Analyze the alignment of the data-references in the loop.
1322 FOR NOW: Until support for misaligned accesses is in place, only if all
1323 accesses are aligned can the loop be vectorized. This restriction will be
1324 relaxed. */
1326 static bool
1327 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1329 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1330 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1331 enum dr_alignment_support supportable_dr_alignment;
1332 unsigned int i;
1334 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1335 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1338 /* This pass may take place at function granularity instead of at loop
1339 granularity. */
1341 if (!vect_compute_data_refs_alignment (loop_vinfo))
1343 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1344 LOOP_LOC (loop_vinfo)))
1345 fprintf (vect_dump,
1346 "not vectorized: can't calculate alignment for data ref.");
1347 return false;
1351 /* This pass will decide on using loop versioning and/or loop peeling in
1352 order to enhance the alignment of data references in the loop. */
1354 vect_enhance_data_refs_alignment (loop_vinfo);
1357 /* Finally, check that all the data references in the loop can be
1358 handled with respect to their alignment. */
1360 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1362 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1363 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1364 if (!supportable_dr_alignment)
1366 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1367 LOOP_LOC (loop_vinfo)))
1368 fprintf (vect_dump, "not vectorized: unsupported unaligned load.");
1369 return false;
1371 if (supportable_dr_alignment != dr_aligned
1372 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1373 fprintf (vect_dump, "Vectorizing an unaligned access.");
1375 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1377 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1378 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1379 if (!supportable_dr_alignment)
1381 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1382 LOOP_LOC (loop_vinfo)))
1383 fprintf (vect_dump, "not vectorized: unsupported unaligned store.");
1384 return false;
1386 if (supportable_dr_alignment != dr_aligned
1387 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1388 fprintf (vect_dump, "Vectorizing an unaligned access.");
1390 if (LOOP_VINFO_UNALIGNED_DR (loop_vinfo)
1391 && vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
1392 fprintf (vect_dump, "Alignment of access forced using peeling.");
1394 return true;
1398 /* Function vect_analyze_data_ref_access.
1400 Analyze the access pattern of the data-reference DR. For now, a data access
1401 has to consecutive to be considered vectorizable. */
1403 static bool
1404 vect_analyze_data_ref_access (struct data_reference *dr)
1406 tree stmt = DR_STMT (dr);
1407 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1408 tree step = STMT_VINFO_VECT_STEP (stmt_info);
1409 tree scalar_type = TREE_TYPE (DR_REF (dr));
1411 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1413 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1414 fprintf (vect_dump, "not consecutive access");
1415 return false;
1417 return true;
1421 /* Function vect_analyze_data_ref_accesses.
1423 Analyze the access pattern of all the data references in the loop.
1425 FORNOW: the only access pattern that is considered vectorizable is a
1426 simple step 1 (consecutive) access.
1428 FORNOW: handle only arrays and pointer accesses. */
1430 static bool
1431 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1433 unsigned int i;
1434 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1435 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1437 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1438 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1440 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1442 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1443 bool ok = vect_analyze_data_ref_access (dr);
1444 if (!ok)
1446 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1447 LOOP_LOC (loop_vinfo)))
1448 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1449 return false;
1453 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1455 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1456 bool ok = vect_analyze_data_ref_access (dr);
1457 if (!ok)
1459 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1460 LOOP_LOC (loop_vinfo)))
1461 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1462 return false;
1466 return true;
1470 /* Function vect_analyze_pointer_ref_access.
1472 Input:
1473 STMT - a stmt that contains a data-ref.
1474 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
1475 ACCESS_FN - the access function of MEMREF.
1477 Output:
1478 If the data-ref access is vectorizable, return a data_reference structure
1479 that represents it (DR). Otherwise - return NULL.
1480 STEP - the stride of MEMREF in the loop.
1481 INIT - the initial condition of MEMREF in the loop.
1484 static struct data_reference *
1485 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read,
1486 tree access_fn, tree *ptr_init, tree *ptr_step)
1488 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1489 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1490 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1491 tree step, init;
1492 tree reftype, innertype;
1493 tree indx_access_fn;
1494 int loopnum = loop->num;
1495 struct data_reference *dr;
1497 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step))
1499 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1500 LOOP_LOC (loop_vinfo)))
1501 fprintf (vect_dump, "not vectorized: pointer access is not simple.");
1502 return NULL;
1505 STRIP_NOPS (init);
1507 if (!expr_invariant_in_loop_p (loop, init))
1509 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1510 LOOP_LOC (loop_vinfo)))
1511 fprintf (vect_dump,
1512 "not vectorized: initial condition is not loop invariant.");
1513 return NULL;
1516 if (TREE_CODE (step) != INTEGER_CST)
1518 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1519 LOOP_LOC (loop_vinfo)))
1520 fprintf (vect_dump,
1521 "not vectorized: non constant step for pointer access.");
1522 return NULL;
1525 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
1526 if (!POINTER_TYPE_P (reftype))
1528 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1529 LOOP_LOC (loop_vinfo)))
1530 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1531 return NULL;
1534 if (!POINTER_TYPE_P (TREE_TYPE (init)))
1536 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1537 LOOP_LOC (loop_vinfo)))
1538 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1539 return NULL;
1542 *ptr_step = fold_convert (ssizetype, step);
1543 innertype = TREE_TYPE (reftype);
1544 if (!COMPLETE_TYPE_P (innertype))
1546 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1547 LOOP_LOC (loop_vinfo)))
1548 fprintf (vect_dump, "not vectorized: pointer to incomplete type.");
1549 return NULL;
1552 /* Check that STEP is a multiple of type size. */
1553 if (!integer_zerop (size_binop (TRUNC_MOD_EXPR, *ptr_step,
1554 fold_convert (ssizetype, TYPE_SIZE_UNIT (innertype)))))
1556 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1557 LOOP_LOC (loop_vinfo)))
1558 fprintf (vect_dump, "not vectorized: non consecutive access.");
1559 return NULL;
1562 indx_access_fn =
1563 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
1564 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1566 fprintf (vect_dump, "Access function of ptr indx: ");
1567 print_generic_expr (vect_dump, indx_access_fn, TDF_SLIM);
1569 dr = init_data_ref (stmt, memref, NULL_TREE, indx_access_fn, is_read);
1570 *ptr_init = init;
1571 return dr;
1575 /* Function vect_address_analysis
1577 Return the BASE of the address expression EXPR.
1578 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1580 Input:
1581 EXPR - the address expression that is being analyzed
1582 STMT - the statement that contains EXPR or its original memory reference
1583 IS_READ - TRUE if STMT reads from EXPR, FALSE if writes to EXPR
1584 VECTYPE - the type that defines the alignment (i.e, we compute
1585 alignment relative to TYPE_ALIGN(VECTYPE))
1586 DR - data_reference struct for the original memory reference
1588 Output:
1589 BASE (returned value) - the base of the data reference EXPR.
1590 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1591 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1592 computation is impossible
1593 STEP - evolution of EXPR in the loop
1594 BASE_ALIGNED - indicates if BASE is aligned
1596 If something unexpected is encountered (an unsupported form of data-ref),
1597 then NULL_TREE is returned.
1600 static tree
1601 vect_address_analysis (tree expr, tree stmt, bool is_read, tree vectype,
1602 struct data_reference *dr, tree *offset, tree *misalign,
1603 tree *step, bool *base_aligned)
1605 tree oprnd0, oprnd1, base_address, offset_expr, base_addr0, base_addr1;
1606 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1607 tree dummy;
1608 struct ptr_info_def *dummy1;
1609 subvar_t dummy2;
1611 switch (TREE_CODE (expr))
1613 case PLUS_EXPR:
1614 case MINUS_EXPR:
1615 /* EXPR is of form {base +/- offset} (or {offset +/- base}). */
1616 oprnd0 = TREE_OPERAND (expr, 0);
1617 oprnd1 = TREE_OPERAND (expr, 1);
1619 STRIP_NOPS (oprnd0);
1620 STRIP_NOPS (oprnd1);
1622 /* Recursively try to find the base of the address contained in EXPR.
1623 For offset, the returned base will be NULL. */
1624 base_addr0 = vect_address_analysis (oprnd0, stmt, is_read, vectype, dr,
1625 &address_offset, &address_misalign, step,
1626 base_aligned);
1628 base_addr1 = vect_address_analysis (oprnd1, stmt, is_read, vectype, dr,
1629 &address_offset, &address_misalign, step,
1630 base_aligned);
1632 /* We support cases where only one of the operands contains an
1633 address. */
1634 if ((base_addr0 && base_addr1) || (!base_addr0 && !base_addr1))
1635 return NULL_TREE;
1637 /* To revert STRIP_NOPS. */
1638 oprnd0 = TREE_OPERAND (expr, 0);
1639 oprnd1 = TREE_OPERAND (expr, 1);
1641 offset_expr = base_addr0 ?
1642 fold_convert (ssizetype, oprnd1) : fold_convert (ssizetype, oprnd0);
1644 /* EXPR is of form {base +/- offset} (or {offset +/- base}). If offset is
1645 a number, we can add it to the misalignment value calculated for base,
1646 otherwise, misalignment is NULL. */
1647 if (TREE_CODE (offset_expr) == INTEGER_CST && address_misalign)
1648 *misalign = size_binop (TREE_CODE (expr), address_misalign,
1649 offset_expr);
1650 else
1651 *misalign = NULL_TREE;
1653 /* Combine offset (from EXPR {base + offset}) with the offset calculated
1654 for base. */
1655 *offset = size_binop (TREE_CODE (expr), address_offset, offset_expr);
1656 return base_addr0 ? base_addr0 : base_addr1;
1658 case ADDR_EXPR:
1659 base_address = vect_object_analysis (TREE_OPERAND (expr, 0), stmt,
1660 is_read, vectype, &dr, offset,
1661 misalign, step, base_aligned,
1662 &dummy, &dummy1, &dummy2);
1663 return base_address;
1665 case SSA_NAME:
1666 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1667 return NULL_TREE;
1669 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1671 if (vect_get_ptr_offset (expr, vectype, misalign))
1672 *base_aligned = true;
1673 else
1674 *base_aligned = false;
1676 else
1678 *base_aligned = true;
1679 *misalign = ssize_int (0);
1681 *offset = ssize_int (0);
1682 *step = ssize_int (0);
1683 return expr;
1685 default:
1686 return NULL_TREE;
1691 /* Function vect_object_analysis
1693 Return the BASE of the data reference MEMREF.
1694 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1695 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1696 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1697 instantiated with initial_conditions of access_functions of variables,
1698 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1700 Function get_inner_reference is used for the above in case of ARRAY_REF and
1701 COMPONENT_REF.
1703 The structure of the function is as follows:
1704 Part 1:
1705 Case 1. For handled_component_p refs
1706 1.1 call get_inner_reference
1707 1.1.1 analyze offset expr received from get_inner_reference
1708 1.2. build data-reference structure for MEMREF
1709 (fall through with BASE)
1710 Case 2. For declarations
1711 2.1 check alignment
1712 2.2 update DR_BASE_NAME if necessary for alias
1713 Case 3. For INDIRECT_REFs
1714 3.1 get the access function
1715 3.2 analyze evolution of MEMREF
1716 3.3 set data-reference structure for MEMREF
1717 3.4 call vect_address_analysis to analyze INIT of the access function
1719 Part 2:
1720 Combine the results of object and address analysis to calculate
1721 INITIAL_OFFSET, STEP and misalignment info.
1723 Input:
1724 MEMREF - the memory reference that is being analyzed
1725 STMT - the statement that contains MEMREF
1726 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
1727 VECTYPE - the type that defines the alignment (i.e, we compute
1728 alignment relative to TYPE_ALIGN(VECTYPE))
1730 Output:
1731 BASE_ADDRESS (returned value) - the base address of the data reference MEMREF
1732 E.g, if MEMREF is a.b[k].c[i][j] the returned
1733 base is &a.
1734 DR - data_reference struct for MEMREF
1735 INITIAL_OFFSET - initial offset of MEMREF from BASE (an expression)
1736 MISALIGN - offset of MEMREF from BASE in bytes (a constant) or NULL_TREE if
1737 the computation is impossible
1738 STEP - evolution of the DR_REF in the loop
1739 BASE_ALIGNED - indicates if BASE is aligned
1740 MEMTAG - memory tag for aliasing purposes
1741 PTR_INFO - NULL or points-to aliasing info from a pointer SSA_NAME
1742 SUBVAR - Sub-variables of the variable
1744 If something unexpected is encountered (an unsupported form of data-ref),
1745 then NULL_TREE is returned. */
1747 static tree
1748 vect_object_analysis (tree memref, tree stmt, bool is_read,
1749 tree vectype, struct data_reference **dr,
1750 tree *offset, tree *misalign, tree *step,
1751 bool *base_aligned, tree *memtag,
1752 struct ptr_info_def **ptr_info, subvar_t *subvars)
1754 tree base = NULL_TREE, base_address = NULL_TREE;
1755 tree object_offset = ssize_int (0), object_misalign = ssize_int (0);
1756 tree object_step = ssize_int (0), address_step = ssize_int (0);
1757 bool object_base_aligned = true, address_base_aligned = true;
1758 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1759 HOST_WIDE_INT pbitsize, pbitpos;
1760 tree poffset, bit_pos_in_bytes;
1761 enum machine_mode pmode;
1762 int punsignedp, pvolatilep;
1763 tree ptr_step = ssize_int (0), ptr_init = NULL_TREE;
1764 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1765 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1766 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1767 struct data_reference *ptr_dr = NULL;
1768 tree access_fn, evolution_part, address_to_analyze;
1770 *ptr_info = NULL;
1772 /* Part 1: */
1773 /* Case 1. handled_component_p refs. */
1774 if (handled_component_p (memref))
1776 /* 1.1 call get_inner_reference. */
1777 /* Find the base and the offset from it. */
1778 base = get_inner_reference (memref, &pbitsize, &pbitpos, &poffset,
1779 &pmode, &punsignedp, &pvolatilep, false);
1780 if (!base)
1781 return NULL_TREE;
1783 /* 1.1.1 analyze offset expr received from get_inner_reference. */
1784 if (poffset
1785 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1786 &object_offset, &object_misalign, &object_step))
1788 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1790 fprintf (vect_dump, "failed to compute offset or step for ");
1791 print_generic_expr (vect_dump, memref, TDF_SLIM);
1793 return NULL_TREE;
1796 /* Add bit position to OFFSET and MISALIGN. */
1798 bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT);
1799 /* Check that there is no remainder in bits. */
1800 if (pbitpos%BITS_PER_UNIT)
1802 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1803 fprintf (vect_dump, "bit offset alignment.");
1804 return NULL_TREE;
1806 object_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes, object_offset);
1807 if (object_misalign)
1808 object_misalign = size_binop (PLUS_EXPR, object_misalign,
1809 bit_pos_in_bytes);
1811 /* Create data-reference for MEMREF. TODO: handle COMPONENT_REFs. */
1812 if (!(*dr))
1814 if (TREE_CODE (memref) == ARRAY_REF)
1815 *dr = analyze_array (stmt, memref, is_read);
1816 else
1817 /* FORNOW. */
1818 return NULL_TREE;
1820 memref = base; /* To continue analysis of BASE. */
1821 /* fall through */
1824 /* Part 1: Case 2. Declarations. */
1825 if (DECL_P (memref))
1827 /* We expect to get a decl only if we already have a DR. */
1828 if (!(*dr))
1830 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1832 fprintf (vect_dump, "unhandled decl ");
1833 print_generic_expr (vect_dump, memref, TDF_SLIM);
1835 return NULL_TREE;
1838 /* 2.1 check the alignment. */
1839 if (DECL_ALIGN (memref) >= TYPE_ALIGN (vectype))
1840 object_base_aligned = true;
1841 else
1842 object_base_aligned = false;
1844 /* 2.2 update DR_BASE_NAME if necessary. */
1845 if (!DR_BASE_NAME ((*dr)))
1846 /* For alias analysis. In case the analysis of INDIRECT_REF brought
1847 us to object. */
1848 DR_BASE_NAME ((*dr)) = memref;
1850 if (SSA_VAR_P (memref) && var_can_have_subvars (memref))
1851 *subvars = get_subvars_for_var (memref);
1852 base_address = build_fold_addr_expr (memref);
1853 *memtag = memref;
1856 /* Part 1: Case 3. INDIRECT_REFs. */
1857 else if (TREE_CODE (memref) == INDIRECT_REF)
1859 tree ptr_ref = TREE_OPERAND (memref, 0);
1860 if (TREE_CODE (ptr_ref) == SSA_NAME)
1861 *ptr_info = SSA_NAME_PTR_INFO (ptr_ref);
1863 /* 3.1 get the access function. */
1864 access_fn = analyze_scalar_evolution (loop, ptr_ref);
1865 if (!access_fn)
1867 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1868 LOOP_LOC (loop_vinfo)))
1869 fprintf (vect_dump, "not vectorized: complicated pointer access.");
1870 return NULL_TREE;
1872 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1874 fprintf (vect_dump, "Access function of ptr: ");
1875 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1878 /* 3.2 analyze evolution of MEMREF. */
1879 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1880 if (evolution_part)
1882 ptr_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read,
1883 access_fn, &ptr_init, &ptr_step);
1884 if (!(ptr_dr))
1885 return NULL_TREE;
1887 object_step = size_binop (PLUS_EXPR, object_step, ptr_step);
1888 address_to_analyze = ptr_init;
1890 else
1892 if (!(*dr))
1894 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1895 LOOP_LOC (loop_vinfo)))
1896 fprintf (vect_dump, "not vectorized: ptr is loop invariant.");
1897 return NULL_TREE;
1899 /* Since there exists DR for MEMREF, we are analyzing the init of
1900 the access function, which not necessary has evolution in the
1901 loop. */
1902 address_to_analyze = initial_condition_in_loop_num (access_fn,
1903 loop->num);
1906 /* 3.3 set data-reference structure for MEMREF. */
1907 *dr = (*dr) ? *dr : ptr_dr;
1909 /* 3.4 call vect_address_analysis to analyze INIT of the access
1910 function. */
1911 base_address = vect_address_analysis (address_to_analyze, stmt, is_read,
1912 vectype, *dr, &address_offset, &address_misalign,
1913 &address_step, &address_base_aligned);
1914 if (!base_address)
1915 return NULL_TREE;
1917 switch (TREE_CODE (base_address))
1919 case SSA_NAME:
1920 *memtag = get_var_ann (SSA_NAME_VAR (base_address))->type_mem_tag;
1921 if (!(*memtag) && TREE_CODE (TREE_OPERAND (memref, 0)) == SSA_NAME)
1922 *memtag = get_var_ann (
1923 SSA_NAME_VAR (TREE_OPERAND (memref, 0)))->type_mem_tag;
1924 break;
1925 case ADDR_EXPR:
1926 *memtag = TREE_OPERAND (base_address, 0);
1927 break;
1928 default:
1929 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1930 LOOP_LOC (loop_vinfo)))
1932 fprintf (vect_dump, "not vectorized: no memtag ref: ");
1933 print_generic_expr (vect_dump, memref, TDF_SLIM);
1935 return NULL_TREE;
1939 if (!base_address)
1940 /* MEMREF cannot be analyzed. */
1941 return NULL_TREE;
1943 if (SSA_VAR_P (*memtag) && var_can_have_subvars (*memtag))
1944 *subvars = get_subvars_for_var (*memtag);
1946 /* Part 2: Combine the results of object and address analysis to calculate
1947 INITIAL_OFFSET, STEP and misalignment info. */
1948 *offset = size_binop (PLUS_EXPR, object_offset, address_offset);
1949 if (object_misalign && address_misalign)
1950 *misalign = size_binop (PLUS_EXPR, object_misalign, address_misalign);
1951 else
1952 *misalign = NULL_TREE;
1953 *step = size_binop (PLUS_EXPR, object_step, address_step);
1954 *base_aligned = object_base_aligned && address_base_aligned;
1956 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1958 fprintf (vect_dump, "Results of object analysis for: ");
1959 print_generic_expr (vect_dump, memref, TDF_SLIM);
1960 fprintf (vect_dump, "\n\tbase_address: ");
1961 print_generic_expr (vect_dump, base_address, TDF_SLIM);
1962 fprintf (vect_dump, "\n\toffset: ");
1963 print_generic_expr (vect_dump, *offset, TDF_SLIM);
1964 fprintf (vect_dump, "\n\tstep: ");
1965 print_generic_expr (vect_dump, *step, TDF_SLIM);
1966 fprintf (vect_dump, "\n\tbase aligned %d\n\tmisalign: ", *base_aligned);
1967 print_generic_expr (vect_dump, *misalign, TDF_SLIM);
1969 return base_address;
1973 /* Function vect_analyze_data_refs.
1975 Find all the data references in the loop.
1977 The general structure of the analysis of data refs in the vectorizer is as
1978 follows:
1979 1- vect_analyze_data_refs(loop):
1980 Find and analyze all data-refs in the loop:
1981 foreach ref
1982 base_address = vect_object_analysis(ref)
1983 1.1- vect_object_analysis(ref):
1984 Analyze ref, and build a DR (data_reference struct) for it;
1985 compute base, initial_offset, step and alignment.
1986 Call get_inner_reference for refs handled in this function.
1987 Call vect_addr_analysis(addr) to analyze pointer type expressions.
1988 Set ref_stmt.base, ref_stmt.initial_offset, ref_stmt.alignment,
1989 ref_stmt.memtag, ref_stmt.ptr_info and ref_stmt.step accordingly.
1990 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
1991 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1992 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1994 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
1995 which base is really an array (not a pointer) and which alignment
1996 can be forced. This restriction will be relaxed. */
1998 static bool
1999 vect_analyze_data_refs (loop_vec_info loop_vinfo)
2001 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2002 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2003 int nbbs = loop->num_nodes;
2004 block_stmt_iterator si;
2005 int j;
2006 struct data_reference *dr;
2008 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2009 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
2011 for (j = 0; j < nbbs; j++)
2013 basic_block bb = bbs[j];
2014 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2016 bool is_read = false;
2017 tree stmt = bsi_stmt (si);
2018 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2019 varray_type *datarefs = NULL;
2020 tree memref = NULL;
2021 tree scalar_type, vectype;
2022 tree base, offset, misalign, step, tag;
2023 struct ptr_info_def *ptr_info;
2024 bool base_aligned;
2025 subvar_t subvars = NULL;
2026 bool no_vuse, no_vmaymust;
2028 /* Assumption: there exists a data-ref in stmt, if and only if
2029 it has vuses/vdefs. */
2031 no_vuse = ZERO_SSA_OPERANDS (stmt, SSA_OP_VUSE);
2032 no_vmaymust = ZERO_SSA_OPERANDS (stmt,
2033 SSA_OP_VMAYDEF | SSA_OP_VMUSTDEF);
2034 if (no_vuse && no_vmaymust)
2035 continue;
2037 if (!no_vuse && !no_vmaymust)
2039 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2041 fprintf (vect_dump, "unexpected vdefs and vuses in stmt: ");
2042 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2044 return false;
2047 if (TREE_CODE (stmt) != MODIFY_EXPR)
2049 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2051 fprintf (vect_dump, "unexpected vops in stmt: ");
2052 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2054 return false;
2057 if (!no_vuse)
2059 memref = TREE_OPERAND (stmt, 1);
2060 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
2061 is_read = true;
2063 else /* vdefs */
2065 memref = TREE_OPERAND (stmt, 0);
2066 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
2067 is_read = false;
2070 scalar_type = TREE_TYPE (memref);
2071 vectype = get_vectype_for_scalar_type (scalar_type);
2072 if (!vectype)
2074 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2076 fprintf (vect_dump, "no vectype for stmt: ");
2077 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2078 fprintf (vect_dump, " scalar_type: ");
2079 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2081 /* It is not possible to vectorize this data reference. */
2082 return false;
2084 /* Analyze MEMREF. If it is of a supported form, build data_reference
2085 struct for it (DR). */
2086 dr = NULL;
2087 base = vect_object_analysis (memref, stmt, is_read, vectype, &dr,
2088 &offset, &misalign, &step,
2089 &base_aligned, &tag, &ptr_info,
2090 &subvars);
2091 if (!base)
2093 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2094 LOOP_LOC (loop_vinfo)))
2096 fprintf (vect_dump, "not vectorized: unhandled data ref: ");
2097 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2099 return false;
2101 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info) = base;
2102 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
2103 STMT_VINFO_VECT_STEP (stmt_info) = step;
2104 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
2105 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned;
2106 STMT_VINFO_MEMTAG (stmt_info) = tag;
2107 STMT_VINFO_PTR_INFO (stmt_info) = ptr_info;
2108 STMT_VINFO_SUBVARS (stmt_info) = subvars;
2109 STMT_VINFO_VECTYPE (stmt_info) = vectype;
2110 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
2111 STMT_VINFO_DATA_REF (stmt_info) = dr;
2115 return true;
2119 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2121 /* Function vect_mark_relevant.
2123 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2125 static void
2126 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2127 bool relevant_p, bool live_p)
2129 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2130 bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
2131 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2133 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2134 fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
2136 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2138 if (TREE_CODE (stmt) == PHI_NODE)
2139 /* Don't mark as relevant because it's not going to vectorized. */
2140 return;
2142 STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
2144 if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
2145 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2147 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2148 fprintf (vect_dump, "already marked relevant/live.");
2149 return;
2152 VEC_safe_push (tree, heap, *worklist, stmt);
2156 /* Function vect_stmt_relevant_p.
2158 Return true if STMT in loop that is represented by LOOP_VINFO is
2159 "relevant for vectorization".
2161 A stmt is considered "relevant for vectorization" if:
2162 - it has uses outside the loop.
2163 - it has vdefs (it alters memory).
2164 - control stmts in the loop (except for the exit condition).
2166 CHECKME: what other side effects would the vectorizer allow? */
2168 static bool
2169 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2170 bool *relevant_p, bool *live_p)
2172 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2173 ssa_op_iter op_iter;
2174 imm_use_iterator imm_iter;
2175 use_operand_p use_p;
2176 def_operand_p def_p;
2178 *relevant_p = false;
2179 *live_p = false;
2181 /* cond stmt other than loop exit cond. */
2182 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2183 *relevant_p = true;
2185 /* changing memory. */
2186 if (TREE_CODE (stmt) != PHI_NODE)
2187 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2189 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2190 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2191 *relevant_p = true;
2194 /* uses outside the loop. */
2195 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2197 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2199 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2200 if (!flow_bb_inside_loop_p (loop, bb))
2202 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2203 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2205 /* We expect all such uses to be in the loop exit phis
2206 (because of loop closed form) */
2207 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2208 gcc_assert (bb == loop->single_exit->dest);
2210 *live_p = true;
2215 return (*live_p || *relevant_p);
2219 /* Function vect_mark_stmts_to_be_vectorized.
2221 Not all stmts in the loop need to be vectorized. For example:
2223 for i...
2224 for j...
2225 1. T0 = i + j
2226 2. T1 = a[T0]
2228 3. j = j + 1
2230 Stmt 1 and 3 do not need to be vectorized, because loop control and
2231 addressing of vectorized data-refs are handled differently.
2233 This pass detects such stmts. */
2235 static bool
2236 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2238 VEC(tree,heap) *worklist;
2239 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2240 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2241 unsigned int nbbs = loop->num_nodes;
2242 block_stmt_iterator si;
2243 tree stmt, use;
2244 stmt_ann_t ann;
2245 ssa_op_iter iter;
2246 unsigned int i;
2247 stmt_vec_info stmt_vinfo;
2248 basic_block bb;
2249 tree phi;
2250 bool relevant_p, live_p;
2251 tree def, def_stmt;
2252 enum vect_def_type dt;
2254 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2255 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2257 worklist = VEC_alloc (tree, heap, 64);
2259 /* 1. Init worklist. */
2261 bb = loop->header;
2262 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2264 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2266 fprintf (vect_dump, "init: phi relevant? ");
2267 print_generic_expr (vect_dump, phi, TDF_SLIM);
2270 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
2271 vect_mark_relevant (&worklist, phi, relevant_p, live_p);
2274 for (i = 0; i < nbbs; i++)
2276 bb = bbs[i];
2277 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2279 stmt = bsi_stmt (si);
2281 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2283 fprintf (vect_dump, "init: stmt relevant? ");
2284 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2287 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
2288 vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
2293 /* 2. Process_worklist */
2295 while (VEC_length (tree, worklist) > 0)
2297 stmt = VEC_pop (tree, worklist);
2299 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2301 fprintf (vect_dump, "worklist: examine stmt: ");
2302 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2305 /* Examine the USEs of STMT. For each ssa-name USE thta is defined
2306 in the loop, mark the stmt that defines it (DEF_STMT) as
2307 relevant/irrelevant and live/dead according to the liveness and
2308 relevance properties of STMT.
2311 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2313 ann = stmt_ann (stmt);
2314 stmt_vinfo = vinfo_for_stmt (stmt);
2316 relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
2317 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2319 /* Generally, the liveness and relevance properties of STMT are
2320 propagated to the DEF_STMTs of its USEs:
2321 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2322 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
2324 Exceptions:
2326 - if USE is used only for address computations (e.g. array indexing),
2327 which does not need to be directly vectorized, then the
2328 liveness/relevance of the respective DEF_STMT is left unchanged.
2330 - if STMT has been identified as defining a reduction variable, then:
2331 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
2332 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
2333 because even though STMT is classified as live (since it defines a
2334 value that is used across loop iterations) and irrelevant (since it
2335 is not used inside the loop), it will be vectorized, and therefore
2336 the corresponding DEF_STMTs need to marked as relevant.
2339 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2341 gcc_assert (!relevant_p && live_p);
2342 relevant_p = true;
2343 live_p = false;
2346 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2348 /* We are only interested in uses that need to be vectorized. Uses
2349 that are used for address computation are not considered relevant.
2351 if (exist_non_indexing_operands_for_use_p (use, stmt))
2353 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2355 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2356 LOOP_LOC (loop_vinfo)))
2357 fprintf (vect_dump,
2358 "not vectorized: unsupported use in stmt.");
2359 VEC_free (tree, heap, worklist);
2360 return false;
2363 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2364 continue;
2366 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2368 fprintf (vect_dump, "worklist: examine use %d: ", i);
2369 print_generic_expr (vect_dump, use, TDF_SLIM);
2372 bb = bb_for_stmt (def_stmt);
2373 if (!flow_bb_inside_loop_p (loop, bb))
2374 continue;
2376 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2378 fprintf (vect_dump, "def_stmt: ");
2379 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2382 vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
2385 } /* while worklist */
2387 VEC_free (tree, heap, worklist);
2388 return true;
2392 /* Function vect_can_advance_ivs_p
2394 In case the number of iterations that LOOP iterates in unknown at compile
2395 time, an epilog loop will be generated, and the loop induction variables
2396 (IVs) will be "advanced" to the value they are supposed to take just before
2397 the epilog loop. Here we check that the access function of the loop IVs
2398 and the expression that represents the loop bound are simple enough.
2399 These restrictions will be relaxed in the future. */
2401 static bool
2402 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2404 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2405 basic_block bb = loop->header;
2406 tree phi;
2408 /* Analyze phi functions of the loop header. */
2410 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2411 fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
2413 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2415 tree access_fn = NULL;
2416 tree evolution_part;
2418 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2420 fprintf (vect_dump, "Analyze phi: ");
2421 print_generic_expr (vect_dump, phi, TDF_SLIM);
2424 /* Skip virtual phi's. The data dependences that are associated with
2425 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2427 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2429 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2430 fprintf (vect_dump, "virtual phi. skip.");
2431 continue;
2434 /* Analyze the evolution function. */
2436 access_fn = instantiate_parameters
2437 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2439 if (!access_fn)
2441 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2442 fprintf (vect_dump, "No Access function.");
2443 return false;
2446 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2448 fprintf (vect_dump, "Access function of PHI: ");
2449 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2452 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2454 if (evolution_part == NULL_TREE)
2456 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2457 fprintf (vect_dump, "No evolution.");
2458 return false;
2461 /* FORNOW: We do not transform initial conditions of IVs
2462 which evolution functions are a polynomial of degree >= 2. */
2464 if (tree_is_chrec (evolution_part))
2465 return false;
2468 return true;
2472 /* Function vect_get_loop_niters.
2474 Determine how many iterations the loop is executed.
2475 If an expression that represents the number of iterations
2476 can be constructed, place it in NUMBER_OF_ITERATIONS.
2477 Return the loop exit condition. */
2479 static tree
2480 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2482 tree niters;
2484 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2485 fprintf (vect_dump, "=== get_loop_niters ===");
2487 niters = number_of_iterations_in_loop (loop);
2489 if (niters != NULL_TREE
2490 && niters != chrec_dont_know)
2492 *number_of_iterations = niters;
2494 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2496 fprintf (vect_dump, "==> get_loop_niters:" );
2497 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2501 return get_loop_exit_condition (loop);
2505 /* Function vect_analyze_loop_form.
2507 Verify the following restrictions (some may be relaxed in the future):
2508 - it's an inner-most loop
2509 - number of BBs = 2 (which are the loop header and the latch)
2510 - the loop has a pre-header
2511 - the loop has a single entry and exit
2512 - the loop exit condition is simple enough, and the number of iterations
2513 can be analyzed (a countable loop). */
2515 static loop_vec_info
2516 vect_analyze_loop_form (struct loop *loop)
2518 loop_vec_info loop_vinfo;
2519 tree loop_cond;
2520 tree number_of_iterations = NULL;
2521 LOC loop_loc;
2523 loop_loc = find_loop_location (loop);
2525 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2526 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2528 if (loop->inner)
2530 if (vect_print_dump_info (REPORT_OUTER_LOOPS, loop_loc))
2531 fprintf (vect_dump, "not vectorized: nested loop.");
2532 return NULL;
2535 if (!loop->single_exit
2536 || loop->num_nodes != 2
2537 || EDGE_COUNT (loop->header->preds) != 2)
2539 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2541 if (!loop->single_exit)
2542 fprintf (vect_dump, "not vectorized: multiple exits.");
2543 else if (loop->num_nodes != 2)
2544 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2545 else if (EDGE_COUNT (loop->header->preds) != 2)
2546 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2549 return NULL;
2552 /* We assume that the loop exit condition is at the end of the loop. i.e,
2553 that the loop is represented as a do-while (with a proper if-guard
2554 before the loop if needed), where the loop header contains all the
2555 executable statements, and the latch is empty. */
2556 if (!empty_block_p (loop->latch))
2558 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2559 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2560 return NULL;
2563 /* Make sure there exists a single-predecessor exit bb: */
2564 if (!single_pred_p (loop->single_exit->dest))
2566 edge e = loop->single_exit;
2567 if (!(e->flags & EDGE_ABNORMAL))
2569 split_loop_exit_edge (e);
2570 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2571 fprintf (vect_dump, "split exit edge.");
2573 else
2575 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2576 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2577 return NULL;
2581 if (empty_block_p (loop->header))
2583 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2584 fprintf (vect_dump, "not vectorized: empty loop.");
2585 return NULL;
2588 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2589 if (!loop_cond)
2591 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2592 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2593 return NULL;
2596 if (!number_of_iterations)
2598 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2599 fprintf (vect_dump,
2600 "not vectorized: number of iterations cannot be computed.");
2601 return NULL;
2604 if (chrec_contains_undetermined (number_of_iterations))
2606 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2607 fprintf (vect_dump, "Infinite number of iterations.");
2608 return false;
2611 loop_vinfo = new_loop_vec_info (loop);
2612 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2614 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2616 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2618 fprintf (vect_dump, "Symbolic number of iterations is ");
2619 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2622 else
2623 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2625 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, loop_loc))
2626 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2627 return NULL;
2630 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2631 LOOP_VINFO_LOC (loop_vinfo) = loop_loc;
2633 return loop_vinfo;
2637 /* Function vect_analyze_loop.
2639 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2640 for it. The different analyses will record information in the
2641 loop_vec_info struct. */
2642 loop_vec_info
2643 vect_analyze_loop (struct loop *loop)
2645 bool ok;
2646 loop_vec_info loop_vinfo;
2648 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2649 fprintf (vect_dump, "===== analyze_loop_nest =====");
2651 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2653 loop_vinfo = vect_analyze_loop_form (loop);
2654 if (!loop_vinfo)
2656 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2657 fprintf (vect_dump, "bad loop form.");
2658 return NULL;
2661 /* Find all data references in the loop (which correspond to vdefs/vuses)
2662 and analyze their evolution in the loop.
2664 FORNOW: Handle only simple, array references, which
2665 alignment can be forced, and aligned pointer-references. */
2667 ok = vect_analyze_data_refs (loop_vinfo);
2668 if (!ok)
2670 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2671 fprintf (vect_dump, "bad data references.");
2672 destroy_loop_vec_info (loop_vinfo);
2673 return NULL;
2676 /* Classify all cross-iteration scalar data-flow cycles.
2677 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2679 vect_analyze_scalar_cycles (loop_vinfo);
2681 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2683 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2684 if (!ok)
2686 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2687 fprintf (vect_dump, "unexpected pattern.");
2688 destroy_loop_vec_info (loop_vinfo);
2689 return NULL;
2692 ok = vect_determine_vectorization_factor (loop_vinfo);
2693 if (!ok)
2695 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2696 fprintf (vect_dump, "can't determine vectorization factor.");
2697 destroy_loop_vec_info (loop_vinfo);
2698 return NULL;
2701 /* Analyze data dependences between the data-refs in the loop.
2702 FORNOW: fail at the first data dependence that we encounter. */
2704 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2705 if (!ok)
2707 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2708 fprintf (vect_dump, "bad data dependence.");
2709 destroy_loop_vec_info (loop_vinfo);
2710 return NULL;
2713 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2714 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2716 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2717 if (!ok)
2719 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2720 fprintf (vect_dump, "bad data access.");
2721 destroy_loop_vec_info (loop_vinfo);
2722 return NULL;
2725 /* Analyze the alignment of the data-refs in the loop.
2726 FORNOW: Only aligned accesses are handled. */
2728 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2729 if (!ok)
2731 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2732 fprintf (vect_dump, "bad data alignment.");
2733 destroy_loop_vec_info (loop_vinfo);
2734 return NULL;
2737 /* Scan all the operations in the loop and make sure they are
2738 vectorizable. */
2740 ok = vect_analyze_operations (loop_vinfo);
2741 if (!ok)
2743 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2744 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2745 destroy_loop_vec_info (loop_vinfo);
2746 return NULL;
2749 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2751 return loop_vinfo;