Mark ChangeLog
[official-gcc.git] / gcc / tree-vect-analyze.c
blob4c36b029bad0252942ac269f751becc1376d6b5b
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "errors.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
42 /* Main analysis functions. */
43 static loop_vec_info vect_analyze_loop_form (struct loop *);
44 static bool vect_analyze_data_refs (loop_vec_info);
45 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
46 static bool vect_analyze_scalar_cycles (loop_vec_info);
47 static bool vect_analyze_data_ref_accesses (loop_vec_info);
48 static bool vect_analyze_data_ref_dependences (loop_vec_info);
49 static bool vect_analyze_data_refs_alignment (loop_vec_info);
50 static bool vect_compute_data_refs_alignment (loop_vec_info);
51 static void vect_enhance_data_refs_alignment (loop_vec_info);
52 static bool vect_analyze_operations (loop_vec_info);
54 /* Utility functions for the analyses. */
55 static bool exist_non_indexing_operands_for_use_p (tree, tree);
56 static void vect_mark_relevant (varray_type *, tree);
57 static bool vect_stmt_relevant_p (tree, loop_vec_info);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_reference *, struct data_reference *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static struct data_reference * vect_analyze_pointer_ref_access
64 (tree, tree, bool, tree, tree *, tree *);
65 static bool vect_can_advance_ivs_p (loop_vec_info);
66 static tree vect_get_ptr_offset (tree, tree, tree *);
67 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
68 tree *, tree *);
69 static bool vect_base_addr_differ_p (struct data_reference *,
70 struct data_reference *drb, bool *);
71 static tree vect_object_analysis (tree, tree, bool, tree,
72 struct data_reference **, tree *, tree *,
73 tree *, bool *, tree *);
74 static tree vect_address_analysis (tree, tree, bool, tree,
75 struct data_reference *, tree *, tree *,
76 tree *, bool *);
79 /* Function vect_get_ptr_offset
81 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
83 static tree
84 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
85 tree vectype ATTRIBUTE_UNUSED,
86 tree *offset ATTRIBUTE_UNUSED)
88 /* TODO: Use alignment information. */
89 return NULL_TREE;
93 /* Function vect_analyze_offset_expr
95 Given an offset expression EXPR received from get_inner_reference, analyze
96 it and create an expression for INITIAL_OFFSET by substituting the variables
97 of EXPR with initial_condition of the corresponding access_fn in the loop.
98 E.g.,
99 for i
100 for (j = 3; j < N; j++)
101 a[j].b[i][j] = 0;
103 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
104 substituted, since its access_fn in the inner loop is i. 'j' will be
105 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
106 C` = 3 * C_j + C.
108 Compute MISALIGN (the misalignment of the data reference initial access from
109 its base) if possible. Misalignment can be calculated only if all the
110 variables can be substituted with constants, or if a variable is multiplied
111 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
112 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
113 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
114 VECTYPE_ALIGNMENT computation in the caller of this function).
116 STEP is an evolution of the data reference in this loop in bytes.
117 In the above example, STEP is C_j.
119 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
120 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
121 are NULL_TREEs. Otherwise, return TRUE.
125 static bool
126 vect_analyze_offset_expr (tree expr,
127 struct loop *loop,
128 tree vectype_alignment,
129 tree *initial_offset,
130 tree *misalign,
131 tree *step)
133 tree oprnd0;
134 tree oprnd1;
135 tree left_offset = ssize_int (0);
136 tree right_offset = ssize_int (0);
137 tree left_misalign = ssize_int (0);
138 tree right_misalign = ssize_int (0);
139 tree left_step = ssize_int (0);
140 tree right_step = ssize_int (0);
141 enum tree_code code;
142 tree init, evolution;
144 *step = NULL_TREE;
145 *misalign = NULL_TREE;
146 *initial_offset = NULL_TREE;
148 /* Strip conversions that don't narrow the mode. */
149 expr = vect_strip_conversion (expr);
150 if (!expr)
151 return false;
153 /* Stop conditions:
154 1. Constant. */
155 if (TREE_CODE (expr) == INTEGER_CST)
157 *initial_offset = fold_convert (ssizetype, expr);
158 *misalign = fold_convert (ssizetype, expr);
159 *step = ssize_int (0);
160 return true;
163 /* 2. Variable. Try to substitute with initial_condition of the corresponding
164 access_fn in the current loop. */
165 if (SSA_VAR_P (expr))
167 tree access_fn = analyze_scalar_evolution (loop, expr);
169 if (access_fn == chrec_dont_know)
170 /* No access_fn. */
171 return false;
173 init = initial_condition_in_loop_num (access_fn, loop->num);
174 if (init == expr && !expr_invariant_in_loop_p (loop, init))
175 /* Not enough information: may be not loop invariant.
176 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
177 initial_condition is D, but it depends on i - loop's induction
178 variable. */
179 return false;
181 evolution = evolution_part_in_loop_num (access_fn, loop->num);
182 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
183 /* Evolution is not constant. */
184 return false;
186 if (TREE_CODE (init) == INTEGER_CST)
187 *misalign = fold_convert (ssizetype, init);
188 else
189 /* Not constant, misalignment cannot be calculated. */
190 *misalign = NULL_TREE;
192 *initial_offset = fold_convert (ssizetype, init);
194 *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0);
195 return true;
198 /* Recursive computation. */
199 if (!BINARY_CLASS_P (expr))
201 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
202 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
204 fprintf (vect_dump, "Not binary expression ");
205 print_generic_expr (vect_dump, expr, TDF_SLIM);
207 return false;
209 oprnd0 = TREE_OPERAND (expr, 0);
210 oprnd1 = TREE_OPERAND (expr, 1);
212 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
213 &left_misalign, &left_step)
214 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
215 &right_offset, &right_misalign, &right_step))
216 return false;
218 /* The type of the operation: plus, minus or mult. */
219 code = TREE_CODE (expr);
220 switch (code)
222 case MULT_EXPR:
223 if (TREE_CODE (right_offset) != INTEGER_CST)
224 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
225 sized types.
226 FORNOW: We don't support such cases. */
227 return false;
229 /* Strip conversions that don't narrow the mode. */
230 left_offset = vect_strip_conversion (left_offset);
231 if (!left_offset)
232 return false;
233 /* Misalignment computation. */
234 if (SSA_VAR_P (left_offset))
236 /* If the left side contains variables that can't be substituted with
237 constants, we check if the right side is a multiple of ALIGNMENT.
239 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
240 fold_convert (ssizetype, vectype_alignment))))
241 *misalign = ssize_int (0);
242 else
243 /* If the remainder is not zero or the right side isn't constant,
244 we can't compute misalignment. */
245 *misalign = NULL_TREE;
247 else
249 /* The left operand was successfully substituted with constant. */
250 if (left_misalign)
251 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
252 NULL_TREE. */
253 *misalign = size_binop (code, left_misalign, right_misalign);
254 else
255 *misalign = NULL_TREE;
258 /* Step calculation. */
259 /* Multiply the step by the right operand. */
260 *step = size_binop (MULT_EXPR, left_step, right_offset);
261 break;
263 case PLUS_EXPR:
264 case MINUS_EXPR:
265 /* Combine the recursive calculations for step and misalignment. */
266 *step = size_binop (code, left_step, right_step);
268 if (left_misalign && right_misalign)
269 *misalign = size_binop (code, left_misalign, right_misalign);
270 else
271 *misalign = NULL_TREE;
273 break;
275 default:
276 gcc_unreachable ();
279 /* Compute offset. */
280 *initial_offset = fold_convert (ssizetype,
281 fold (build2 (code, TREE_TYPE (left_offset),
282 left_offset,
283 right_offset)));
284 return true;
288 /* Function vect_analyze_operations.
290 Scan the loop stmts and make sure they are all vectorizable. */
292 static bool
293 vect_analyze_operations (loop_vec_info loop_vinfo)
295 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
296 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
297 int nbbs = loop->num_nodes;
298 block_stmt_iterator si;
299 unsigned int vectorization_factor = 0;
300 int i;
301 bool ok;
302 tree scalar_type;
304 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
305 fprintf (vect_dump, "=== vect_analyze_operations ===");
307 for (i = 0; i < nbbs; i++)
309 basic_block bb = bbs[i];
311 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
313 tree stmt = bsi_stmt (si);
314 unsigned int nunits;
315 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
316 tree vectype;
318 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
320 fprintf (vect_dump, "==> examining statement: ");
321 print_generic_expr (vect_dump, stmt, TDF_SLIM);
324 gcc_assert (stmt_info);
326 /* skip stmts which do not need to be vectorized.
327 this is expected to include:
328 - the COND_EXPR which is the loop exit condition
329 - any LABEL_EXPRs in the loop
330 - computations that are used only for array indexing or loop
331 control */
333 if (!STMT_VINFO_RELEVANT_P (stmt_info))
335 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
336 fprintf (vect_dump, "irrelevant.");
337 continue;
340 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
342 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
343 LOOP_LOC (loop_vinfo)))
345 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
346 print_generic_expr (vect_dump, stmt, TDF_SLIM);
348 return false;
351 if (STMT_VINFO_DATA_REF (stmt_info))
352 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
353 else if (TREE_CODE (stmt) == MODIFY_EXPR)
354 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
355 else
356 scalar_type = TREE_TYPE (stmt);
358 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
360 fprintf (vect_dump, "get vectype for scalar type: ");
361 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
364 vectype = get_vectype_for_scalar_type (scalar_type);
365 if (!vectype)
367 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
368 LOOP_LOC (loop_vinfo)))
370 fprintf (vect_dump,
371 "not vectorized: unsupported data-type ");
372 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
374 return false;
377 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
379 fprintf (vect_dump, "vectype: ");
380 print_generic_expr (vect_dump, vectype, TDF_SLIM);
382 STMT_VINFO_VECTYPE (stmt_info) = vectype;
384 ok = (vectorizable_operation (stmt, NULL, NULL)
385 || vectorizable_assignment (stmt, NULL, NULL)
386 || vectorizable_load (stmt, NULL, NULL)
387 || vectorizable_store (stmt, NULL, NULL));
389 if (!ok)
391 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
392 LOOP_LOC (loop_vinfo)))
394 fprintf (vect_dump, "not vectorized: stmt not supported: ");
395 print_generic_expr (vect_dump, stmt, TDF_SLIM);
397 return false;
400 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
401 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
402 fprintf (vect_dump, "nunits = %d", nunits);
404 if (vectorization_factor)
406 /* FORNOW: don't allow mixed units.
407 This restriction will be relaxed in the future. */
408 if (nunits != vectorization_factor)
410 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
411 LOOP_LOC (loop_vinfo)))
412 fprintf (vect_dump, "not vectorized: mixed data-types");
413 return false;
416 else
417 vectorization_factor = nunits;
419 #ifdef ENABLE_CHECKING
420 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
421 * vectorization_factor == UNITS_PER_SIMD_WORD);
422 #endif
426 /* TODO: Analyze cost. Decide if worth while to vectorize. */
428 if (vectorization_factor <= 1)
430 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
431 LOOP_LOC (loop_vinfo)))
432 fprintf (vect_dump, "not vectorized: unsupported data-type");
433 return false;
435 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
437 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
438 && vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
439 fprintf (vect_dump,
440 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
441 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
443 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
444 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
446 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
447 LOOP_LOC (loop_vinfo)))
448 fprintf (vect_dump, "not vectorized: iteration count too small.");
449 return false;
452 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
453 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
455 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
456 fprintf (vect_dump, "epilog loop required.");
457 if (!vect_can_advance_ivs_p (loop_vinfo))
459 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
460 LOOP_LOC (loop_vinfo)))
461 fprintf (vect_dump,
462 "not vectorized: can't create epilog loop 1.");
463 return false;
465 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
467 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
468 LOOP_LOC (loop_vinfo)))
469 fprintf (vect_dump,
470 "not vectorized: can't create epilog loop 2.");
471 return false;
475 return true;
479 /* Function exist_non_indexing_operands_for_use_p
481 USE is one of the uses attached to STMT. Check if USE is
482 used in STMT for anything other than indexing an array. */
484 static bool
485 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
487 tree operand;
488 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
490 /* USE corresponds to some operand in STMT. If there is no data
491 reference in STMT, then any operand that corresponds to USE
492 is not indexing an array. */
493 if (!STMT_VINFO_DATA_REF (stmt_info))
494 return true;
496 /* STMT has a data_ref. FORNOW this means that its of one of
497 the following forms:
498 -1- ARRAY_REF = var
499 -2- var = ARRAY_REF
500 (This should have been verified in analyze_data_refs).
502 'var' in the second case corresponds to a def, not a use,
503 so USE cannot correspond to any operands that are not used
504 for array indexing.
506 Therefore, all we need to check is if STMT falls into the
507 first case, and whether var corresponds to USE. */
509 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
510 return false;
512 operand = TREE_OPERAND (stmt, 1);
514 if (TREE_CODE (operand) != SSA_NAME)
515 return false;
517 if (operand == use)
518 return true;
520 return false;
524 /* Function vect_analyze_scalar_cycles.
526 Examine the cross iteration def-use cycles of scalar variables, by
527 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
528 cycles that they represent do not impede vectorization.
530 FORNOW: Reduction as in the following loop, is not supported yet:
531 loop1:
532 for (i=0; i<N; i++)
533 sum += a[i];
534 The cross-iteration cycle corresponding to variable 'sum' will be
535 considered too complicated and will impede vectorization.
537 FORNOW: Induction as in the following loop, is not supported yet:
538 loop2:
539 for (i=0; i<N; i++)
540 a[i] = i;
542 However, the following loop *is* vectorizable:
543 loop3:
544 for (i=0; i<N; i++)
545 a[i] = b[i];
547 In both loops there exists a def-use cycle for the variable i:
548 loop: i_2 = PHI (i_0, i_1)
549 a[i_2] = ...;
550 i_1 = i_2 + 1;
551 GOTO loop;
553 The evolution of the above cycle is considered simple enough,
554 however, we also check that the cycle does not need to be
555 vectorized, i.e - we check that the variable that this cycle
556 defines is only used for array indexing or in stmts that do not
557 need to be vectorized. This is not the case in loop2, but it
558 *is* the case in loop3. */
560 static bool
561 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
563 tree phi;
564 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
565 basic_block bb = loop->header;
566 tree dummy;
568 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
569 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
571 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
573 tree access_fn = NULL;
575 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
577 fprintf (vect_dump, "Analyze phi: ");
578 print_generic_expr (vect_dump, phi, TDF_SLIM);
581 /* Skip virtual phi's. The data dependences that are associated with
582 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
584 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
586 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
587 fprintf (vect_dump, "virtual phi. skip.");
588 continue;
591 /* Analyze the evolution function. */
593 /* FORNOW: The only scalar cross-iteration cycles that we allow are
594 those of loop induction variables; This property is verified here.
596 Furthermore, if that induction variable is used in an operation
597 that needs to be vectorized (i.e, is not solely used to index
598 arrays and check the exit condition) - we do not support its
599 vectorization yet. This property is verified in vect_is_simple_use,
600 during vect_analyze_operations. */
602 access_fn = /* instantiate_parameters
603 (loop,*/
604 analyze_scalar_evolution (loop, PHI_RESULT (phi));
606 if (!access_fn)
608 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
609 LOOP_LOC (loop_vinfo)))
610 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
611 return false;
614 if (vect_print_dump_info (REPORT_DETAILS,
615 LOOP_LOC (loop_vinfo)))
617 fprintf (vect_dump, "Access function of PHI: ");
618 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
621 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
623 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
624 LOOP_LOC (loop_vinfo)))
625 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
626 return false;
630 return true;
634 /* Function vect_base_addr_differ_p.
636 This is the simplest data dependence test: determines whether the
637 data references A and B access the same array/region. Returns
638 false when the property is not computable at compile time.
639 Otherwise return true, and DIFFER_P will record the result. This
640 utility will not be necessary when alias_sets_conflict_p will be
641 less conservative. */
643 static bool
644 vect_base_addr_differ_p (struct data_reference *dra,
645 struct data_reference *drb,
646 bool *differ_p)
648 tree stmt_a = DR_STMT (dra);
649 stmt_vec_info stmt_info_a = vinfo_for_stmt (stmt_a);
650 tree stmt_b = DR_STMT (drb);
651 stmt_vec_info stmt_info_b = vinfo_for_stmt (stmt_b);
652 tree addr_a = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_a);
653 tree addr_b = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_b);
654 tree type_a = TREE_TYPE (addr_a);
655 tree type_b = TREE_TYPE (addr_b);
656 HOST_WIDE_INT alias_set_a, alias_set_b;
658 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
660 /* Both references are ADDR_EXPR, i.e., we have the objects. */
661 if (TREE_CODE (addr_a) == ADDR_EXPR && TREE_CODE (addr_b) == ADDR_EXPR)
662 return array_base_name_differ_p (dra, drb, differ_p);
664 alias_set_a = (TREE_CODE (addr_a) == ADDR_EXPR) ?
665 get_alias_set (TREE_OPERAND (addr_a, 0)) : get_alias_set (addr_a);
666 alias_set_b = (TREE_CODE (addr_b) == ADDR_EXPR) ?
667 get_alias_set (TREE_OPERAND (addr_b, 0)) : get_alias_set (addr_b);
669 if (!alias_sets_conflict_p (alias_set_a, alias_set_b))
671 *differ_p = true;
672 return true;
675 /* An instruction writing through a restricted pointer is "independent" of any
676 instruction reading or writing through a different pointer, in the same
677 block/scope. */
678 else if ((TYPE_RESTRICT (type_a) && !DR_IS_READ (dra))
679 || (TYPE_RESTRICT (type_b) && !DR_IS_READ (drb)))
681 *differ_p = true;
682 return true;
684 return false;
688 /* Function vect_analyze_data_ref_dependence.
690 Return TRUE if there (might) exist a dependence between a memory-reference
691 DRA and a memory-reference DRB. */
693 static bool
694 vect_analyze_data_ref_dependence (struct data_reference *dra,
695 struct data_reference *drb,
696 loop_vec_info loop_vinfo)
698 bool differ_p;
699 struct data_dependence_relation *ddr;
701 if (!vect_base_addr_differ_p (dra, drb, &differ_p))
703 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
704 LOOP_LOC (loop_vinfo)))
706 fprintf (vect_dump,
707 "not vectorized: can't determine dependence between: ");
708 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
709 fprintf (vect_dump, " and ");
710 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
712 return true;
715 if (differ_p)
716 return false;
718 ddr = initialize_data_dependence_relation (dra, drb);
719 compute_affine_dependence (ddr);
721 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
722 return false;
724 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
725 LOOP_LOC (loop_vinfo)))
727 fprintf (vect_dump,
728 "not vectorized: possible dependence between data-refs ");
729 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
730 fprintf (vect_dump, " and ");
731 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
734 return true;
738 /* Function vect_analyze_data_ref_dependences.
740 Examine all the data references in the loop, and make sure there do not
741 exist any data dependences between them.
743 TODO: dependences which distance is greater than the vectorization factor
744 can be ignored. */
746 static bool
747 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
749 unsigned int i, j;
750 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
751 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
753 /* Examine store-store (output) dependences. */
755 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
756 fprintf (vect_dump, "=== vect_analyze_dependences ===");
758 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
759 fprintf (vect_dump, "compare all store-store pairs.");
761 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
763 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
765 struct data_reference *dra =
766 VARRAY_GENERIC_PTR (loop_write_refs, i);
767 struct data_reference *drb =
768 VARRAY_GENERIC_PTR (loop_write_refs, j);
769 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
770 return false;
774 /* Examine load-store (true/anti) dependences. */
776 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
777 fprintf (vect_dump, "compare all load-store pairs.");
779 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
781 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
783 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
784 struct data_reference *drb =
785 VARRAY_GENERIC_PTR (loop_write_refs, j);
786 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
787 return false;
791 return true;
795 /* Function vect_compute_data_ref_alignment
797 Compute the misalignment of the data reference DR.
799 Output:
800 1. If during the misalignment computation it is found that the data reference
801 cannot be vectorized then false is returned.
802 2. DR_MISALIGNMENT (DR) is defined.
804 FOR NOW: No analysis is actually performed. Misalignment is calculated
805 only for trivial cases. TODO. */
807 static bool
808 vect_compute_data_ref_alignment (struct data_reference *dr)
810 tree stmt = DR_STMT (dr);
811 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
812 tree ref = DR_REF (dr);
813 tree vectype;
814 tree base, alignment;
815 bool base_aligned_p;
816 tree misalign;
818 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
819 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
821 /* Initialize misalignment to unknown. */
822 DR_MISALIGNMENT (dr) = -1;
824 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
825 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
826 base = build_fold_indirect_ref (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
827 vectype = STMT_VINFO_VECTYPE (stmt_info);
829 if (!misalign)
831 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
833 fprintf (vect_dump, "Unknown alignment for access: ");
834 print_generic_expr (vect_dump, base, TDF_SLIM);
836 return true;
839 if (!base_aligned_p)
841 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
843 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
845 fprintf (vect_dump, "can't force alignment of ref: ");
846 print_generic_expr (vect_dump, ref, TDF_SLIM);
848 return true;
851 /* Force the alignment of the decl.
852 NOTE: This is the only change to the code we make during
853 the analysis phase, before deciding to vectorize the loop. */
854 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
855 fprintf (vect_dump, "force alignment");
856 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
857 DECL_USER_ALIGN (base) = 1;
860 /* At this point we assume that the base is aligned. */
861 gcc_assert (base_aligned_p
862 || (TREE_CODE (base) == VAR_DECL
863 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
865 /* Alignment required, in bytes: */
866 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
868 /* Modulo alignment. */
869 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
870 if (tree_int_cst_sgn (misalign) < 0)
872 /* Negative misalignment value. */
873 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
874 fprintf (vect_dump, "unexpected misalign value");
875 return false;
878 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
880 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
881 fprintf (vect_dump, "misalign = %d bytes", DR_MISALIGNMENT (dr));
883 return true;
887 /* Function vect_compute_data_refs_alignment
889 Compute the misalignment of data references in the loop.
890 This pass may take place at function granularity instead of at loop
891 granularity.
893 FOR NOW: No analysis is actually performed. Misalignment is calculated
894 only for trivial cases. TODO. */
896 static bool
897 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
899 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
900 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
901 unsigned int i;
903 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
905 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
906 if (!vect_compute_data_ref_alignment (dr))
907 return false;
910 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
912 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
913 if (!vect_compute_data_ref_alignment (dr))
914 return false;
917 return true;
921 /* Function vect_enhance_data_refs_alignment
923 This pass will use loop versioning and loop peeling in order to enhance
924 the alignment of data references in the loop.
926 FOR NOW: we assume that whatever versioning/peeling takes place, only the
927 original loop is to be vectorized; Any other loops that are created by
928 the transformations performed in this pass - are not supposed to be
929 vectorized. This restriction will be relaxed. */
931 static void
932 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
934 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
935 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
936 unsigned int i;
939 This pass will require a cost model to guide it whether to apply peeling
940 or versioning or a combination of the two. For example, the scheme that
941 intel uses when given a loop with several memory accesses, is as follows:
942 choose one memory access ('p') which alignment you want to force by doing
943 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
944 other accesses are not necessarily aligned, or (2) use loop versioning to
945 generate one loop in which all accesses are aligned, and another loop in
946 which only 'p' is necessarily aligned.
948 ("Automatic Intra-Register Vectorization for the Intel Architecture",
949 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
950 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
952 Devising a cost model is the most critical aspect of this work. It will
953 guide us on which access to peel for, whether to use loop versioning, how
954 many versions to create, etc. The cost model will probably consist of
955 generic considerations as well as target specific considerations (on
956 powerpc for example, misaligned stores are more painful than misaligned
957 loads).
959 Here is the general steps involved in alignment enhancements:
961 -- original loop, before alignment analysis:
962 for (i=0; i<N; i++){
963 x = q[i]; # DR_MISALIGNMENT(q) = unknown
964 p[i] = y; # DR_MISALIGNMENT(p) = unknown
967 -- After vect_compute_data_refs_alignment:
968 for (i=0; i<N; i++){
969 x = q[i]; # DR_MISALIGNMENT(q) = 3
970 p[i] = y; # DR_MISALIGNMENT(p) = unknown
973 -- Possibility 1: we do loop versioning:
974 if (p is aligned) {
975 for (i=0; i<N; i++){ # loop 1A
976 x = q[i]; # DR_MISALIGNMENT(q) = 3
977 p[i] = y; # DR_MISALIGNMENT(p) = 0
980 else {
981 for (i=0; i<N; i++){ # loop 1B
982 x = q[i]; # DR_MISALIGNMENT(q) = 3
983 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
987 -- Possibility 2: we do loop peeling:
988 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
989 x = q[i];
990 p[i] = y;
992 for (i = 3; i < N; i++){ # loop 2A
993 x = q[i]; # DR_MISALIGNMENT(q) = 0
994 p[i] = y; # DR_MISALIGNMENT(p) = unknown
997 -- Possibility 3: combination of loop peeling and versioning:
998 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
999 x = q[i];
1000 p[i] = y;
1002 if (p is aligned) {
1003 for (i = 3; i<N; i++){ # loop 3A
1004 x = q[i]; # DR_MISALIGNMENT(q) = 0
1005 p[i] = y; # DR_MISALIGNMENT(p) = 0
1008 else {
1009 for (i = 3; i<N; i++){ # loop 3B
1010 x = q[i]; # DR_MISALIGNMENT(q) = 0
1011 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1015 These loops are later passed to loop_transform to be vectorized. The
1016 vectorizer will use the alignment information to guide the transformation
1017 (whether to generate regular loads/stores, or with special handling for
1018 misalignment).
1021 /* (1) Peeling to force alignment. */
1023 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1024 Considerations:
1025 + How many accesses will become aligned due to the peeling
1026 - How many accesses will become unaligned due to the peeling,
1027 and the cost of misaligned accesses.
1028 - The cost of peeling (the extra runtime checks, the increase
1029 in code size).
1031 The scheme we use FORNOW: peel to force the alignment of the first
1032 misaligned store in the loop.
1033 Rationale: misaligned stores are not yet supported.
1035 TODO: Use a better cost model. */
1037 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1039 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1040 if (!aligned_access_p (dr))
1042 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
1043 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
1044 break;
1048 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
1050 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
1051 fprintf (vect_dump, "Peeling for alignment will not be applied.");
1052 return;
1054 else
1055 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
1056 fprintf (vect_dump, "Peeling for alignment will be applied.");
1059 /* (1.2) Update the alignment info according to the peeling factor.
1060 If the misalignment of the DR we peel for is M, then the
1061 peeling factor is VF - M, and the misalignment of each access DR_i
1062 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
1063 If the misalignment of the DR we peel for is unknown, then the
1064 misalignment of each access DR_i in the loop is also unknown.
1066 FORNOW: set the misalignment of the accesses to unknown even
1067 if the peeling factor is known at compile time.
1069 TODO: - if the peeling factor is known at compile time, use that
1070 when updating the misalignment info of the loop DRs.
1071 - consider accesses that are known to have the same
1072 alignment, even if that alignment is unknown. */
1074 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1076 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1077 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
1079 DR_MISALIGNMENT (dr) = 0;
1080 if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
1081 fprintf (vect_dump, "Alignment of access forced using peeling.");
1083 else
1084 DR_MISALIGNMENT (dr) = -1;
1086 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1088 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1089 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
1091 DR_MISALIGNMENT (dr) = 0;
1092 if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
1093 fprintf (vect_dump, "Alignment of access forced using peeling.");
1095 else
1096 DR_MISALIGNMENT (dr) = -1;
1101 /* Function vect_analyze_data_refs_alignment
1103 Analyze the alignment of the data-references in the loop.
1104 FOR NOW: Until support for misliagned accesses is in place, only if all
1105 accesses are aligned can the loop be vectorized. This restriction will be
1106 relaxed. */
1108 static bool
1109 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1111 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1112 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1113 enum dr_alignment_support supportable_dr_alignment;
1114 unsigned int i;
1116 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1117 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1120 /* This pass may take place at function granularity instead of at loop
1121 granularity. */
1123 if (!vect_compute_data_refs_alignment (loop_vinfo))
1125 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1126 LOOP_LOC (loop_vinfo)))
1127 fprintf (vect_dump,
1128 "not vectorized: can't calculate alignment for data ref.");
1129 return false;
1133 /* This pass will decide on using loop versioning and/or loop peeling in
1134 order to enhance the alignment of data references in the loop. */
1136 vect_enhance_data_refs_alignment (loop_vinfo);
1139 /* Finally, check that all the data references in the loop can be
1140 handled with respect to their alignment. */
1142 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1144 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1145 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1146 if (!supportable_dr_alignment)
1148 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1149 LOOP_LOC (loop_vinfo)))
1150 fprintf (vect_dump, "not vectorized: unsupported unaligned load.");
1151 return false;
1153 if (supportable_dr_alignment != dr_aligned
1154 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1155 fprintf (vect_dump, "Vectorizing an unaligned access.");
1157 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1159 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1160 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1161 if (!supportable_dr_alignment)
1163 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1164 LOOP_LOC (loop_vinfo)))
1165 fprintf (vect_dump, "not vectorized: unsupported unaligned store.");
1166 return false;
1168 if (supportable_dr_alignment != dr_aligned
1169 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1170 fprintf (vect_dump, "Vectorizing an unaligned access.");
1173 return true;
1177 /* Function vect_analyze_data_ref_access.
1179 Analyze the access pattern of the data-reference DR. For now, a data access
1180 has to consecutive to be considered vectorizable. */
1182 static bool
1183 vect_analyze_data_ref_access (struct data_reference *dr)
1185 tree stmt = DR_STMT (dr);
1186 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1187 tree step = STMT_VINFO_VECT_STEP (stmt_info);
1188 tree scalar_type = TREE_TYPE (DR_REF (dr));
1190 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1192 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1193 fprintf (vect_dump, "not consecutive access");
1194 return false;
1196 return true;
1200 /* Function vect_analyze_data_ref_accesses.
1202 Analyze the access pattern of all the data references in the loop.
1204 FORNOW: the only access pattern that is considered vectorizable is a
1205 simple step 1 (consecutive) access.
1207 FORNOW: handle only arrays and pointer accesses. */
1209 static bool
1210 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1212 unsigned int i;
1213 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1214 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1216 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1217 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1219 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1221 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1222 bool ok = vect_analyze_data_ref_access (dr);
1223 if (!ok)
1225 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1226 LOOP_LOC (loop_vinfo)))
1227 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1228 return false;
1232 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1234 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1235 bool ok = vect_analyze_data_ref_access (dr);
1236 if (!ok)
1238 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1239 LOOP_LOC (loop_vinfo)))
1240 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1241 return false;
1245 return true;
1249 /* Function vect_analyze_pointer_ref_access.
1251 Input:
1252 STMT - a stmt that contains a data-ref.
1253 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
1254 ACCESS_FN - the access function of MEMREF.
1256 Output:
1257 If the data-ref access is vectorizable, return a data_reference structure
1258 that represents it (DR). Otherwise - return NULL.
1259 STEP - the stride of MEMREF in the loop.
1260 INIT - the initial condition of MEMREF in the loop.
1263 static struct data_reference *
1264 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read,
1265 tree access_fn, tree *ptr_init, tree *ptr_step)
1267 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1268 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1269 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1270 tree step, init;
1271 tree reftype, innertype;
1272 tree indx_access_fn;
1273 int loopnum = loop->num;
1274 struct data_reference *dr;
1276 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step))
1278 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1279 LOOP_LOC (loop_vinfo)))
1280 fprintf (vect_dump, "not vectorized: pointer access is not simple.");
1281 return NULL;
1284 STRIP_NOPS (init);
1286 if (!expr_invariant_in_loop_p (loop, init))
1288 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1289 LOOP_LOC (loop_vinfo)))
1290 fprintf (vect_dump,
1291 "not vectorized: initial condition is not loop invariant.");
1292 return NULL;
1295 if (TREE_CODE (step) != INTEGER_CST)
1297 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1298 LOOP_LOC (loop_vinfo)))
1299 fprintf (vect_dump,
1300 "not vectorized: non constant step for pointer access.");
1301 return NULL;
1304 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
1305 if (!POINTER_TYPE_P (reftype))
1307 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1308 LOOP_LOC (loop_vinfo)))
1309 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1310 return NULL;
1313 if (!POINTER_TYPE_P (TREE_TYPE (init)))
1315 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1316 LOOP_LOC (loop_vinfo)))
1317 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1318 return NULL;
1321 *ptr_step = fold_convert (ssizetype, step);
1322 innertype = TREE_TYPE (reftype);
1324 if (!COMPLETE_TYPE_P (innertype))
1326 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1327 LOOP_LOC (loop_vinfo)))
1328 fprintf (vect_dump, "not vectorized: pointer to incomplete type.");
1329 return NULL;
1331 /* Check that STEP is a multiple of type size. */
1332 if (!integer_zerop (size_binop (TRUNC_MOD_EXPR, *ptr_step,
1333 fold_convert (ssizetype, TYPE_SIZE_UNIT (innertype)))))
1335 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1336 LOOP_LOC (loop_vinfo)))
1337 fprintf (vect_dump, "not vectorized: non consecutive access.");
1338 return NULL;
1341 indx_access_fn =
1342 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
1343 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1345 fprintf (vect_dump, "Access function of ptr indx: ");
1346 print_generic_expr (vect_dump, indx_access_fn, TDF_SLIM);
1348 dr = init_data_ref (stmt, memref, NULL_TREE, indx_access_fn, is_read);
1349 *ptr_init = init;
1350 return dr;
1354 /* Function vect_address_analysis
1356 Return the BASE of the address expression EXPR.
1357 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1359 Input:
1360 EXPR - the address expression that is being analyzed
1361 STMT - the statement that contains EXPR or its original memory reference
1362 IS_READ - TRUE if STMT reads from EXPR, FALSE if writes to EXPR
1363 VECTYPE - the type that defines the alignment (i.e, we compute
1364 alignment relative to TYPE_ALIGN(VECTYPE))
1365 DR - data_reference struct for the original memory reference
1367 Output:
1368 BASE (returned value) - the base of the data reference EXPR.
1369 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1370 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1371 computation is impossible
1372 STEP - evolution of EXPR in the loop
1373 BASE_ALIGNED - indicates if BASE is aligned
1375 If something unexpected is encountered (an unsupported form of data-ref),
1376 then NULL_TREE is returned.
1379 static tree
1380 vect_address_analysis (tree expr, tree stmt, bool is_read, tree vectype,
1381 struct data_reference *dr, tree *offset, tree *misalign,
1382 tree *step, bool *base_aligned)
1384 tree oprnd0, oprnd1, base_address, offset_expr, base_addr0, base_addr1;
1385 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1386 tree dummy;
1388 switch (TREE_CODE (expr))
1390 case PLUS_EXPR:
1391 case MINUS_EXPR:
1392 /* EXPR is of form {base +/- offset} (or {offset +/- base}). */
1393 oprnd0 = TREE_OPERAND (expr, 0);
1394 oprnd1 = TREE_OPERAND (expr, 1);
1396 STRIP_NOPS (oprnd0);
1397 STRIP_NOPS (oprnd1);
1399 /* Recursively try to find the base of the address contained in EXPR.
1400 For offset, the returned base will be NULL. */
1401 base_addr0 = vect_address_analysis (oprnd0, stmt, is_read, vectype, dr,
1402 &address_offset, &address_misalign, step,
1403 base_aligned);
1405 base_addr1 = vect_address_analysis (oprnd1, stmt, is_read, vectype, dr,
1406 &address_offset, &address_misalign, step,
1407 base_aligned);
1409 /* We support cases where only one of the operands contains an
1410 address. */
1411 if ((base_addr0 && base_addr1) || (!base_addr0 && !base_addr1))
1412 return NULL_TREE;
1414 /* To revert STRIP_NOPS. */
1415 oprnd0 = TREE_OPERAND (expr, 0);
1416 oprnd1 = TREE_OPERAND (expr, 1);
1418 offset_expr = base_addr0 ?
1419 fold_convert (ssizetype, oprnd1) : fold_convert (ssizetype, oprnd0);
1421 /* EXPR is of form {base +/- offset} (or {offset +/- base}). If offset is
1422 a number, we can add it to the misalignment value calculated for base,
1423 otherwise, misalignment is NULL. */
1424 if (TREE_CODE (offset_expr) == INTEGER_CST && address_misalign)
1425 *misalign = size_binop (TREE_CODE (expr), address_misalign,
1426 offset_expr);
1427 else
1428 *misalign = NULL_TREE;
1430 /* Combine offset (from EXPR {base + offset}) with the offset calculated
1431 for base. */
1432 *offset = size_binop (TREE_CODE (expr), address_offset, offset_expr);
1433 return base_addr0 ? base_addr0 : base_addr1;
1435 case ADDR_EXPR:
1436 base_address = vect_object_analysis (TREE_OPERAND (expr, 0), stmt, is_read,
1437 vectype, &dr, offset, misalign, step,
1438 base_aligned, &dummy);
1439 return base_address;
1441 case SSA_NAME:
1442 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1443 return NULL_TREE;
1445 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1447 if (vect_get_ptr_offset (expr, vectype, misalign))
1448 *base_aligned = true;
1449 else
1450 *base_aligned = false;
1452 else
1454 *base_aligned = true;
1455 *misalign = ssize_int (0);
1457 *offset = ssize_int (0);
1458 *step = ssize_int (0);
1459 return expr;
1461 default:
1462 return NULL_TREE;
1467 /* Function vect_object_analysis
1469 Return the BASE of the data reference MEMREF.
1470 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1471 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1472 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1473 instantiated with initial_conditions of access_functions of variables,
1474 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1476 Function get_inner_reference is used for the above in case of ARRAY_REF and
1477 COMPONENT_REF.
1479 The structure of the function is as follows:
1480 Part 1:
1481 Case 1. For handled_component_p refs
1482 1.1 call get_inner_reference
1483 1.1.1 analyze offset expr received from get_inner_reference
1484 1.2. build data-reference structure for MEMREF
1485 (fall through with BASE)
1486 Case 2. For declarations
1487 2.1 check alignment
1488 2.2 update DR_BASE_NAME if necessary for alias
1489 Case 3. For INDIRECT_REFs
1490 3.1 get the access function
1491 3.2 analyze evolution of MEMREF
1492 3.3 set data-reference structure for MEMREF
1493 3.4 call vect_address_analysis to analyze INIT of the access function
1495 Part 2:
1496 Combine the results of object and address analysis to calculate
1497 INITIAL_OFFSET, STEP and misalignment info.
1499 Input:
1500 MEMREF - the memory reference that is being analyzed
1501 STMT - the statement that contains MEMREF
1502 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
1503 VECTYPE - the type that defines the alignment (i.e, we compute
1504 alignment relative to TYPE_ALIGN(VECTYPE))
1506 Output:
1507 BASE_ADDRESS (returned value) - the base address of the data reference MEMREF
1508 E.g, if MEMREF is a.b[k].c[i][j] the returned
1509 base is &a.
1510 DR - data_reference struct for MEMREF
1511 INITIAL_OFFSET - initial offset of MEMREF from BASE (an expression)
1512 MISALIGN - offset of MEMREF from BASE in bytes (a constant) or NULL_TREE if
1513 the computation is impossible
1514 STEP - evolution of the DR_REF in the loop
1515 BASE_ALIGNED - indicates if BASE is aligned
1516 MEMTAG - memory tag for aliasing purposes
1518 If something unexpected is encountered (an unsupported form of data-ref),
1519 then NULL_TREE is returned. */
1521 static tree
1522 vect_object_analysis (tree memref, tree stmt, bool is_read,
1523 tree vectype, struct data_reference **dr,
1524 tree *offset, tree *misalign, tree *step,
1525 bool *base_aligned, tree *memtag)
1527 tree base = NULL_TREE, base_address = NULL_TREE;
1528 tree object_offset = ssize_int (0), object_misalign = ssize_int (0);
1529 tree object_step = ssize_int (0), address_step = ssize_int (0);
1530 bool object_base_aligned = true, address_base_aligned = true;
1531 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1532 HOST_WIDE_INT pbitsize, pbitpos;
1533 tree poffset, bit_pos_in_bytes;
1534 enum machine_mode pmode;
1535 int punsignedp, pvolatilep;
1536 tree ptr_step = ssize_int (0), ptr_init = NULL_TREE;
1537 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1538 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1539 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1540 struct data_reference *ptr_dr = NULL;
1541 tree access_fn, evolution_part, address_to_analyze;
1543 /* Part 1: */
1544 /* Case 1. handled_component_p refs. */
1545 if (handled_component_p (memref))
1547 /* 1.1 call get_inner_reference. */
1548 /* Find the base and the offset from it. */
1549 base = get_inner_reference (memref, &pbitsize, &pbitpos, &poffset,
1550 &pmode, &punsignedp, &pvolatilep, false);
1551 if (!base)
1552 return NULL_TREE;
1554 /* 1.1.1 analyze offset expr received from get_inner_reference. */
1555 if (poffset
1556 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1557 &object_offset, &object_misalign, &object_step))
1559 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1561 fprintf (vect_dump, "failed to compute offset or step for ");
1562 print_generic_expr (vect_dump, memref, TDF_SLIM);
1564 return NULL_TREE;
1567 /* Add bit position to OFFSET and MISALIGN. */
1569 bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT);
1570 /* Check that there is no remainder in bits. */
1571 if (pbitpos%BITS_PER_UNIT)
1573 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1574 fprintf (vect_dump, "bit offset alignment.");
1575 return NULL_TREE;
1577 object_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes, object_offset);
1578 if (object_misalign)
1579 object_misalign = size_binop (PLUS_EXPR, object_misalign,
1580 bit_pos_in_bytes);
1582 /* Create data-reference for MEMREF. TODO: handle COMPONENT_REFs. */
1583 if (!(*dr))
1585 if (TREE_CODE (memref) == ARRAY_REF)
1586 *dr = analyze_array (stmt, memref, is_read);
1587 else
1588 /* FORNOW. */
1589 return NULL_TREE;
1591 memref = base; /* To continue analysis of BASE. */
1592 /* fall through */
1595 /* Part 1: Case 2. Declarations. */
1596 if (DECL_P (memref))
1598 /* We expect to get a decl only if we already have a DR. */
1599 if (!(*dr))
1601 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1603 fprintf (vect_dump, "unhandled decl ");
1604 print_generic_expr (vect_dump, memref, TDF_SLIM);
1606 return NULL_TREE;
1609 /* 2.1 check the alignment. */
1610 if (DECL_ALIGN (memref) >= TYPE_ALIGN (vectype))
1611 object_base_aligned = true;
1612 else
1613 object_base_aligned = false;
1615 /* 2.2 update DR_BASE_NAME if necessary. */
1616 if (!DR_BASE_NAME ((*dr)))
1617 /* For alias analysis. In case the analysis of INDIRECT_REF brought
1618 us to object. */
1619 DR_BASE_NAME ((*dr)) = memref;
1621 base_address = build_fold_addr_expr (memref);
1622 *memtag = memref;
1625 /* Part 1: Case 3. INDIRECT_REFs. */
1626 else if (TREE_CODE (memref) == INDIRECT_REF)
1628 /* 3.1 get the access function. */
1629 access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
1630 if (!access_fn)
1632 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1633 LOOP_LOC (loop_vinfo)))
1634 fprintf (vect_dump, "not vectorized: complicated pointer access.");
1635 return NULL_TREE;
1637 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1639 fprintf (vect_dump, "Access function of ptr: ");
1640 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1643 /* 3.2 analyze evolution of MEMREF. */
1644 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1645 if (evolution_part)
1647 ptr_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read,
1648 access_fn, &ptr_init, &ptr_step);
1649 if (!(ptr_dr))
1650 return NULL_TREE;
1652 object_step = size_binop (PLUS_EXPR, object_step, ptr_step);
1653 address_to_analyze = ptr_init;
1655 else
1657 if (!(*dr))
1659 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1660 LOOP_LOC (loop_vinfo)))
1661 fprintf (vect_dump, "not vectorized: ptr is loop invariant.");
1662 return NULL_TREE;
1664 /* Since there exists DR for MEMREF, we are analyzing the init of
1665 the access function, which not necessary has evolution in the
1666 loop. */
1667 address_to_analyze = initial_condition_in_loop_num (access_fn,
1668 loop->num);
1671 /* 3.3 set data-reference structure for MEMREF. */
1672 *dr = (*dr) ? *dr : ptr_dr;
1674 /* 3.4 call vect_address_analysis to analyze INIT of the access
1675 function. */
1676 base_address = vect_address_analysis (address_to_analyze, stmt, is_read,
1677 vectype, *dr, &address_offset, &address_misalign,
1678 &address_step, &address_base_aligned);
1679 if (!base_address)
1680 return NULL_TREE;
1682 switch (TREE_CODE (base_address))
1684 case SSA_NAME:
1685 *memtag = get_var_ann (SSA_NAME_VAR (base_address))->type_mem_tag;
1686 if (!(*memtag) && TREE_CODE (TREE_OPERAND (memref, 0)) == SSA_NAME)
1687 *memtag = get_var_ann (
1688 SSA_NAME_VAR (TREE_OPERAND (memref, 0)))->type_mem_tag;
1689 break;
1690 case ADDR_EXPR:
1691 *memtag = TREE_OPERAND (base_address, 0);
1692 break;
1693 default:
1694 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1695 LOOP_LOC (loop_vinfo)))
1697 fprintf (vect_dump, "not vectorized: no memtag ref: ");
1698 print_generic_expr (vect_dump, memref, TDF_SLIM);
1700 return NULL_TREE;
1704 if (!base_address)
1705 /* MEMREF cannot be analyzed. */
1706 return NULL_TREE;
1708 /* Part 2: Combine the results of object and address analysis to calculate
1709 INITIAL_OFFSET, STEP and misalignment info. */
1710 *offset = size_binop (PLUS_EXPR, object_offset, address_offset);
1711 if (object_misalign && address_misalign)
1712 *misalign = size_binop (PLUS_EXPR, object_misalign, address_misalign);
1713 else
1714 *misalign = NULL_TREE;
1715 *step = size_binop (PLUS_EXPR, object_step, address_step);
1716 *base_aligned = object_base_aligned && address_base_aligned;
1718 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1720 fprintf (vect_dump, "Results of object analysis for: ");
1721 print_generic_expr (vect_dump, memref, TDF_SLIM);
1722 fprintf (vect_dump, "\n\tbase_address: ");
1723 print_generic_expr (vect_dump, base_address, TDF_SLIM);
1724 fprintf (vect_dump, "\n\toffset: ");
1725 print_generic_expr (vect_dump, *offset, TDF_SLIM);
1726 fprintf (vect_dump, "\n\tstep: ");
1727 print_generic_expr (vect_dump, *step, TDF_SLIM);
1728 fprintf (vect_dump, "\n\tbase aligned %d\n\tmisalign: ", *base_aligned);
1729 print_generic_expr (vect_dump, *misalign, TDF_SLIM);
1731 return base_address;
1735 /* Function vect_analyze_data_refs.
1737 Find all the data references in the loop.
1739 The general structure of the analysis of data refs in the vectorizer is as
1740 follows:
1741 1- vect_analyze_data_refs(loop):
1742 Find and analyze all data-refs in the loop:
1743 foreach ref
1744 base_address = vect_object_analysis(ref)
1745 1.1- vect_object_analysis(ref):
1746 Analyze ref, and build a DR (data_referece struct) for it;
1747 compute base, initial_offset, step and alignment.
1748 Call get_inner_reference for refs handled in this function.
1749 Call vect_addr_analysis(addr) to analyze pointer type expressions.
1750 Set ref_stmt.base, ref_stmt.initial_offset, ref_stmt.alignment,
1751 ref_stmt.memtag and ref_stmt.step accordingly.
1752 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
1753 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1754 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1756 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
1757 which base is really an array (not a pointer) and which alignment
1758 can be forced. This restriction will be relaxed. */
1760 static bool
1761 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1763 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1764 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1765 int nbbs = loop->num_nodes;
1766 block_stmt_iterator si;
1767 int j;
1768 struct data_reference *dr;
1770 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1771 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1773 for (j = 0; j < nbbs; j++)
1775 basic_block bb = bbs[j];
1776 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1778 bool is_read = false;
1779 tree stmt = bsi_stmt (si);
1780 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1781 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1782 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1783 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1784 varray_type *datarefs = NULL;
1785 int nvuses, nv_may_defs, nv_must_defs;
1786 tree memref = NULL;
1787 tree scalar_type, vectype;
1788 tree base, offset, misalign, step, tag;
1789 bool base_aligned;
1791 /* Assumption: there exists a data-ref in stmt, if and only if
1792 it has vuses/vdefs. */
1794 if (!vuses && !v_may_defs && !v_must_defs)
1795 continue;
1797 nvuses = NUM_VUSES (vuses);
1798 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1799 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1801 if (nvuses && (nv_may_defs || nv_must_defs))
1803 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1805 fprintf (vect_dump, "unexpected vdefs and vuses in stmt: ");
1806 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1808 return false;
1811 if (TREE_CODE (stmt) != MODIFY_EXPR)
1813 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1815 fprintf (vect_dump, "unexpected vops in stmt: ");
1816 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1818 return false;
1821 if (vuses)
1823 memref = TREE_OPERAND (stmt, 1);
1824 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
1825 is_read = true;
1827 else /* vdefs */
1829 memref = TREE_OPERAND (stmt, 0);
1830 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1831 is_read = false;
1834 scalar_type = TREE_TYPE (memref);
1835 vectype = get_vectype_for_scalar_type (scalar_type);
1836 if (!vectype)
1838 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1840 fprintf (vect_dump, "no vectype for stmt: ");
1841 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1842 fprintf (vect_dump, " scalar_type: ");
1843 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1845 /* It is not possible to vectorize this data reference. */
1846 return false;
1848 /* Analyze MEMREF. If it is of a supported form, build data_reference
1849 struct for it (DR). */
1850 dr = NULL;
1851 base = vect_object_analysis (memref, stmt, is_read, vectype, &dr,
1852 &offset, &misalign, &step,
1853 &base_aligned, &tag);
1854 if (!base)
1856 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1857 LOOP_LOC (loop_vinfo)))
1859 fprintf (vect_dump, "not vectorized: unhandled data ref: ");
1860 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1862 return false;
1864 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info) = base;
1865 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
1866 STMT_VINFO_VECT_STEP (stmt_info) = step;
1867 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
1868 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned;
1869 STMT_VINFO_MEMTAG (stmt_info) = tag;
1870 STMT_VINFO_VECTYPE (stmt_info) = vectype;
1871 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
1872 STMT_VINFO_DATA_REF (stmt_info) = dr;
1876 return true;
1880 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1882 /* Function vect_mark_relevant.
1884 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
1886 static void
1887 vect_mark_relevant (varray_type *worklist, tree stmt)
1889 stmt_vec_info stmt_info;
1891 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1892 fprintf (vect_dump, "mark relevant.");
1894 if (TREE_CODE (stmt) == PHI_NODE)
1896 VARRAY_PUSH_TREE (*worklist, stmt);
1897 return;
1900 stmt_info = vinfo_for_stmt (stmt);
1902 if (!stmt_info)
1904 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1906 fprintf (vect_dump, "mark relevant: no stmt info!!.");
1907 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1909 return;
1912 if (STMT_VINFO_RELEVANT_P (stmt_info))
1914 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1915 fprintf (vect_dump, "already marked relevant.");
1916 return;
1919 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
1920 VARRAY_PUSH_TREE (*worklist, stmt);
1924 /* Function vect_stmt_relevant_p.
1926 Return true if STMT in loop that is represented by LOOP_VINFO is
1927 "relevant for vectorization".
1929 A stmt is considered "relevant for vectorization" if:
1930 - it has uses outside the loop.
1931 - it has vdefs (it alters memory).
1932 - control stmts in the loop (except for the exit condition).
1934 CHECKME: what other side effects would the vectorizer allow? */
1936 static bool
1937 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
1939 v_may_def_optype v_may_defs;
1940 v_must_def_optype v_must_defs;
1941 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1942 int i;
1943 dataflow_t df;
1944 int num_uses;
1946 /* cond stmt other than loop exit cond. */
1947 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
1948 return true;
1950 /* changing memory. */
1951 if (TREE_CODE (stmt) != PHI_NODE)
1953 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1954 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1955 if (v_may_defs || v_must_defs)
1957 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1958 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
1959 return true;
1963 /* uses outside the loop. */
1964 df = get_immediate_uses (stmt);
1965 num_uses = num_immediate_uses (df);
1966 for (i = 0; i < num_uses; i++)
1968 tree use = immediate_use (df, i);
1969 basic_block bb = bb_for_stmt (use);
1970 if (!flow_bb_inside_loop_p (loop, bb))
1972 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1973 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
1974 return true;
1978 return false;
1982 /* Function vect_mark_stmts_to_be_vectorized.
1984 Not all stmts in the loop need to be vectorized. For example:
1986 for i...
1987 for j...
1988 1. T0 = i + j
1989 2. T1 = a[T0]
1991 3. j = j + 1
1993 Stmt 1 and 3 do not need to be vectorized, because loop control and
1994 addressing of vectorized data-refs are handled differently.
1996 This pass detects such stmts. */
1998 static bool
1999 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2001 varray_type worklist;
2002 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2003 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2004 unsigned int nbbs = loop->num_nodes;
2005 block_stmt_iterator si;
2006 tree stmt;
2007 stmt_ann_t ann;
2008 unsigned int i;
2009 int j;
2010 use_optype use_ops;
2011 stmt_vec_info stmt_info;
2012 basic_block bb;
2013 tree phi;
2015 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2016 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2018 bb = loop->header;
2019 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2021 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2023 fprintf (vect_dump, "init: phi relevant? ");
2024 print_generic_expr (vect_dump, phi, TDF_SLIM);
2027 if (vect_stmt_relevant_p (phi, loop_vinfo))
2029 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2030 LOOP_LOC (loop_vinfo)))
2031 fprintf (vect_dump, "unsupported reduction/induction.");
2032 return false;
2036 VARRAY_TREE_INIT (worklist, 64, "work list");
2038 /* 1. Init worklist. */
2040 for (i = 0; i < nbbs; i++)
2042 bb = bbs[i];
2043 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2045 stmt = bsi_stmt (si);
2047 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2049 fprintf (vect_dump, "init: stmt relevant? ");
2050 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2053 stmt_info = vinfo_for_stmt (stmt);
2054 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
2056 if (vect_stmt_relevant_p (stmt, loop_vinfo))
2057 vect_mark_relevant (&worklist, stmt);
2062 /* 2. Process_worklist */
2064 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
2066 stmt = VARRAY_TOP_TREE (worklist);
2067 VARRAY_POP (worklist);
2069 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2071 fprintf (vect_dump, "worklist: examine stmt: ");
2072 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2075 /* Examine the USES in this statement. Mark all the statements which
2076 feed this statement's uses as "relevant", unless the USE is used as
2077 an array index. */
2079 if (TREE_CODE (stmt) == PHI_NODE)
2081 /* follow the def-use chain inside the loop. */
2082 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
2084 tree arg = PHI_ARG_DEF (stmt, j);
2085 tree def_stmt = NULL_TREE;
2086 basic_block bb;
2087 if (!vect_is_simple_use (arg, loop_vinfo, &def_stmt))
2089 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2090 LOOP_LOC (loop_vinfo)))
2091 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2092 varray_clear (worklist);
2093 return false;
2095 if (!def_stmt)
2096 continue;
2098 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2100 fprintf (vect_dump, "worklist: def_stmt: ");
2101 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2104 bb = bb_for_stmt (def_stmt);
2105 if (flow_bb_inside_loop_p (loop, bb))
2106 vect_mark_relevant (&worklist, def_stmt);
2110 ann = stmt_ann (stmt);
2111 use_ops = USE_OPS (ann);
2113 for (i = 0; i < NUM_USES (use_ops); i++)
2115 tree use = USE_OP (use_ops, i);
2117 /* We are only interested in uses that need to be vectorized. Uses
2118 that are used for address computation are not considered relevant.
2120 if (exist_non_indexing_operands_for_use_p (use, stmt))
2122 tree def_stmt = NULL_TREE;
2123 basic_block bb;
2124 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt))
2126 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2127 LOOP_LOC (loop_vinfo)))
2128 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2129 varray_clear (worklist);
2130 return false;
2133 if (!def_stmt)
2134 continue;
2136 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2138 fprintf (vect_dump, "worklist: examine use %d: ", i);
2139 print_generic_expr (vect_dump, use, TDF_SLIM);
2142 bb = bb_for_stmt (def_stmt);
2143 if (flow_bb_inside_loop_p (loop, bb))
2144 vect_mark_relevant (&worklist, def_stmt);
2147 } /* while worklist */
2149 varray_clear (worklist);
2150 return true;
2154 /* Function vect_can_advance_ivs_p
2156 In case the number of iterations that LOOP iterates in unknown at compile
2157 time, an epilog loop will be generated, and the loop induction variables
2158 (IVs) will be "advanced" to the value they are supposed to take just before
2159 the epilog loop. Here we check that the access function of the loop IVs
2160 and the expression that represents the loop bound are simple enough.
2161 These restrictions will be relaxed in the future. */
2163 static bool
2164 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2166 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2167 basic_block bb = loop->header;
2168 tree phi;
2170 /* Analyze phi functions of the loop header. */
2172 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2174 tree access_fn = NULL;
2175 tree evolution_part;
2177 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2179 fprintf (vect_dump, "Analyze phi: ");
2180 print_generic_expr (vect_dump, phi, TDF_SLIM);
2183 /* Skip virtual phi's. The data dependences that are associated with
2184 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2186 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2188 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2189 fprintf (vect_dump, "virtual phi. skip.");
2190 continue;
2193 /* Analyze the evolution function. */
2195 access_fn = instantiate_parameters
2196 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2198 if (!access_fn)
2200 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2201 fprintf (vect_dump, "No Access function.");
2202 return false;
2205 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2207 fprintf (vect_dump, "Access function of PHI: ");
2208 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2211 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2213 if (evolution_part == NULL_TREE)
2214 return false;
2216 /* FORNOW: We do not transform initial conditions of IVs
2217 which evolution functions are a polynomial of degree >= 2. */
2219 if (tree_is_chrec (evolution_part))
2220 return false;
2223 return true;
2227 /* Function vect_get_loop_niters.
2229 Determine how many iterations the loop is executed.
2230 If an expression that represents the number of iterations
2231 can be constructed, place it in NUMBER_OF_ITERATIONS.
2232 Return the loop exit condition. */
2234 static tree
2235 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2237 tree niters;
2239 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2240 fprintf (vect_dump, "=== get_loop_niters ===");
2242 niters = number_of_iterations_in_loop (loop);
2244 if (niters != NULL_TREE
2245 && niters != chrec_dont_know)
2247 *number_of_iterations = niters;
2249 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2251 fprintf (vect_dump, "==> get_loop_niters:" );
2252 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2256 return get_loop_exit_condition (loop);
2260 /* Function vect_analyze_loop_form.
2262 Verify the following restrictions (some may be relaxed in the future):
2263 - it's an inner-most loop
2264 - number of BBs = 2 (which are the loop header and the latch)
2265 - the loop has a pre-header
2266 - the loop has a single entry and exit
2267 - the loop exit condition is simple enough, and the number of iterations
2268 can be analyzed (a countable loop). */
2270 static loop_vec_info
2271 vect_analyze_loop_form (struct loop *loop)
2273 loop_vec_info loop_vinfo;
2274 tree loop_cond;
2275 tree number_of_iterations = NULL;
2276 bool rescan = false;
2277 LOC loop_loc;
2279 loop_loc = find_loop_location (loop);
2281 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2282 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2284 if (loop->inner)
2286 if (vect_print_dump_info (REPORT_OUTER_LOOPS, loop_loc))
2287 fprintf (vect_dump, "not vectorized: nested loop.");
2288 return NULL;
2291 if (!loop->single_exit
2292 || loop->num_nodes != 2
2293 || EDGE_COUNT (loop->header->preds) != 2
2294 || loop->num_entries != 1)
2296 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2298 if (!loop->single_exit)
2299 fprintf (vect_dump, "not vectorized: multiple exits.");
2300 else if (loop->num_nodes != 2)
2301 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2302 else if (EDGE_COUNT (loop->header->preds) != 2)
2303 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2304 else if (loop->num_entries != 1)
2305 fprintf (vect_dump, "not vectorized: too many entries.");
2308 return NULL;
2311 /* We assume that the loop exit condition is at the end of the loop. i.e,
2312 that the loop is represented as a do-while (with a proper if-guard
2313 before the loop if needed), where the loop header contains all the
2314 executable statements, and the latch is empty. */
2315 if (!empty_block_p (loop->latch))
2317 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2318 fprintf (vect_dump, "not vectorized: unexpectd loop form.");
2319 return NULL;
2322 /* Make sure we have a preheader basic block. */
2323 if (!loop->pre_header || EDGE_COUNT (loop->pre_header->succs) != 1)
2325 edge e = loop_preheader_edge (loop);
2326 loop_split_edge_with (e, NULL);
2327 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2328 fprintf (vect_dump, "split preheader edge.");
2329 rescan = true;
2332 /* Make sure there exists a single-predecessor exit bb: */
2333 if (EDGE_COUNT (loop->single_exit->dest->preds) != 1)
2335 edge e = loop->single_exit;
2336 if (!(e->flags & EDGE_ABNORMAL))
2338 loop_split_edge_with (e, NULL);
2339 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2340 fprintf (vect_dump, "split exit edge.");
2341 rescan = true;
2343 else
2345 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2346 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2347 return NULL;
2351 if (rescan)
2353 flow_loop_scan (loop, LOOP_ALL);
2354 /* Flow loop scan does not update loop->single_exit field. */
2355 loop->single_exit = loop->exit_edges[0];
2358 if (empty_block_p (loop->header))
2360 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2361 fprintf (vect_dump, "not vectorized: empty loop.");
2362 return NULL;
2365 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2366 if (!loop_cond)
2368 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2369 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2370 return NULL;
2373 if (!number_of_iterations)
2375 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2376 fprintf (vect_dump,
2377 "not vectorized: number of iterations cannot be computed.");
2378 return NULL;
2381 if (chrec_contains_undetermined (number_of_iterations))
2383 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2384 fprintf (vect_dump, "Infinite number of iterations.");
2385 return false;
2388 loop_vinfo = new_loop_vec_info (loop);
2389 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2391 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2393 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2395 fprintf (vect_dump, "Symbolic number of iterations is ");
2396 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2399 else
2400 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2402 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, loop_loc))
2403 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2404 return NULL;
2407 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2408 LOOP_VINFO_LOC (loop_vinfo) = loop_loc;
2410 return loop_vinfo;
2414 /* Function vect_analyze_loop.
2416 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2417 for it. The different analyses will record information in the
2418 loop_vec_info struct. */
2419 loop_vec_info
2420 vect_analyze_loop (struct loop *loop)
2422 bool ok;
2423 loop_vec_info loop_vinfo;
2425 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2426 fprintf (vect_dump, "===== analyze_loop_nest =====");
2428 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2430 loop_vinfo = vect_analyze_loop_form (loop);
2431 if (!loop_vinfo)
2433 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2434 fprintf (vect_dump, "bad loop form.");
2435 return NULL;
2438 /* Find all data references in the loop (which correspond to vdefs/vuses)
2439 and analyze their evolution in the loop.
2441 FORNOW: Handle only simple, array references, which
2442 alignment can be forced, and aligned pointer-references. */
2444 ok = vect_analyze_data_refs (loop_vinfo);
2445 if (!ok)
2447 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2448 fprintf (vect_dump, "bad data references.");
2449 destroy_loop_vec_info (loop_vinfo);
2450 return NULL;
2453 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2455 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2456 if (!ok)
2458 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2459 fprintf (vect_dump, "unexpected pattern.");
2460 destroy_loop_vec_info (loop_vinfo);
2461 return NULL;
2464 /* Check that all cross-iteration scalar data-flow cycles are OK.
2465 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2467 ok = vect_analyze_scalar_cycles (loop_vinfo);
2468 if (!ok)
2470 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2471 fprintf (vect_dump, "bad scalar cycle.");
2472 destroy_loop_vec_info (loop_vinfo);
2473 return NULL;
2476 /* Analyze data dependences between the data-refs in the loop.
2477 FORNOW: fail at the first data dependence that we encounter. */
2479 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2480 if (!ok)
2482 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2483 fprintf (vect_dump, "bad data dependence.");
2484 destroy_loop_vec_info (loop_vinfo);
2485 return NULL;
2488 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2489 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2491 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2492 if (!ok)
2494 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2495 fprintf (vect_dump, "bad data access.");
2496 destroy_loop_vec_info (loop_vinfo);
2497 return NULL;
2500 /* Analyze the alignment of the data-refs in the loop.
2501 FORNOW: Only aligned accesses are handled. */
2503 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2504 if (!ok)
2506 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2507 fprintf (vect_dump, "bad data alignment.");
2508 destroy_loop_vec_info (loop_vinfo);
2509 return NULL;
2512 /* Scan all the operations in the loop and make sure they are
2513 vectorizable. */
2515 ok = vect_analyze_operations (loop_vinfo);
2516 if (!ok)
2518 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2519 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2520 destroy_loop_vec_info (loop_vinfo);
2521 return NULL;
2524 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2526 return loop_vinfo;