EnumSet*.class: Regenerate
[official-gcc.git] / gcc / tree-vect-analyze.c
blob35e38d01471755cd057b551a8e7cb0520c66a922
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41 #include "toplev.h"
43 /* Main analysis functions. */
44 static loop_vec_info vect_analyze_loop_form (struct loop *);
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
78 in the loop.
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
81 original loop:
82 for (i=0; i<N; i++){
83 a[i] = b[i] + c[i];
86 vectorized loop:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
92 static bool
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
100 tree scalar_type;
101 tree phi;
102 tree vectype;
103 unsigned int nunits;
104 stmt_vec_info stmt_info;
105 int i;
107 if (vect_print_dump_info (REPORT_DETAILS))
108 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
110 for (i = 0; i < nbbs; i++)
112 basic_block bb = bbs[i];
114 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
116 stmt_info = vinfo_for_stmt (phi);
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining phi: ");
120 print_generic_expr (vect_dump, phi, TDF_SLIM);
123 gcc_assert (stmt_info);
125 if (STMT_VINFO_RELEVANT_P (stmt_info))
127 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
128 scalar_type = TREE_TYPE (PHI_RESULT (phi));
130 if (vect_print_dump_info (REPORT_DETAILS))
132 fprintf (vect_dump, "get vectype for scalar type: ");
133 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
136 vectype = get_vectype_for_scalar_type (scalar_type);
137 if (!vectype)
139 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
141 fprintf (vect_dump,
142 "not vectorized: unsupported data-type ");
143 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
145 return false;
147 STMT_VINFO_VECTYPE (stmt_info) = vectype;
149 if (vect_print_dump_info (REPORT_DETAILS))
151 fprintf (vect_dump, "vectype: ");
152 print_generic_expr (vect_dump, vectype, TDF_SLIM);
155 nunits = TYPE_VECTOR_SUBPARTS (vectype);
156 if (vect_print_dump_info (REPORT_DETAILS))
157 fprintf (vect_dump, "nunits = %d", nunits);
159 if (!vectorization_factor
160 || (nunits > vectorization_factor))
161 vectorization_factor = nunits;
165 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
167 tree stmt = bsi_stmt (si);
168 stmt_info = vinfo_for_stmt (stmt);
170 if (vect_print_dump_info (REPORT_DETAILS))
172 fprintf (vect_dump, "==> examining statement: ");
173 print_generic_expr (vect_dump, stmt, TDF_SLIM);
176 gcc_assert (stmt_info);
178 /* skip stmts which do not need to be vectorized. */
179 if (!STMT_VINFO_RELEVANT_P (stmt_info)
180 && !STMT_VINFO_LIVE_P (stmt_info))
182 if (vect_print_dump_info (REPORT_DETAILS))
183 fprintf (vect_dump, "skip.");
184 continue;
187 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
189 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
191 fprintf (vect_dump, "not vectorized: irregular stmt.");
192 print_generic_expr (vect_dump, stmt, TDF_SLIM);
194 return false;
197 if (!GIMPLE_STMT_P (stmt)
198 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
200 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
202 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
203 print_generic_expr (vect_dump, stmt, TDF_SLIM);
205 return false;
208 if (STMT_VINFO_VECTYPE (stmt_info))
210 /* The only case when a vectype had been already set is for stmts
211 that contain a dataref, or for "pattern-stmts" (stmts generated
212 by the vectorizer to represent/replace a certain idiom). */
213 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
214 || is_pattern_stmt_p (stmt_info));
215 vectype = STMT_VINFO_VECTYPE (stmt_info);
217 else
219 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
220 && !is_pattern_stmt_p (stmt_info));
222 /* We set the vectype according to the type of the result (lhs).
223 For stmts whose result-type is different than the type of the
224 arguments (e.g. demotion, promotion), vectype will be reset
225 appropriately (later). Note that we have to visit the smallest
226 datatype in this function, because that determines the VF.
227 If the smallest datatype in the loop is present only as the
228 rhs of a promotion operation - we'd miss it here.
229 However, in such a case, that a variable of this datatype
230 does not appear in the lhs anywhere in the loop, it shouldn't
231 affect the vectorization factor. */
232 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
234 if (vect_print_dump_info (REPORT_DETAILS))
236 fprintf (vect_dump, "get vectype for scalar type: ");
237 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
240 vectype = get_vectype_for_scalar_type (scalar_type);
241 if (!vectype)
243 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
245 fprintf (vect_dump,
246 "not vectorized: unsupported data-type ");
247 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
249 return false;
251 STMT_VINFO_VECTYPE (stmt_info) = vectype;
254 if (vect_print_dump_info (REPORT_DETAILS))
256 fprintf (vect_dump, "vectype: ");
257 print_generic_expr (vect_dump, vectype, TDF_SLIM);
260 nunits = TYPE_VECTOR_SUBPARTS (vectype);
261 if (vect_print_dump_info (REPORT_DETAILS))
262 fprintf (vect_dump, "nunits = %d", nunits);
264 if (!vectorization_factor
265 || (nunits > vectorization_factor))
266 vectorization_factor = nunits;
271 /* TODO: Analyze cost. Decide if worth while to vectorize. */
272 if (vect_print_dump_info (REPORT_DETAILS))
273 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
274 if (vectorization_factor <= 1)
276 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
277 fprintf (vect_dump, "not vectorized: unsupported data-type");
278 return false;
280 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
282 return true;
286 /* Function vect_analyze_operations.
288 Scan the loop stmts and make sure they are all vectorizable. */
290 static bool
291 vect_analyze_operations (loop_vec_info loop_vinfo)
293 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
294 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
295 int nbbs = loop->num_nodes;
296 block_stmt_iterator si;
297 unsigned int vectorization_factor = 0;
298 int i;
299 bool ok;
300 tree phi;
301 stmt_vec_info stmt_info;
302 bool need_to_vectorize = false;
303 int min_profitable_iters;
304 int min_scalar_loop_bound;
305 unsigned int th;
307 if (vect_print_dump_info (REPORT_DETAILS))
308 fprintf (vect_dump, "=== vect_analyze_operations ===");
310 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
311 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
313 for (i = 0; i < nbbs; i++)
315 basic_block bb = bbs[i];
317 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
319 ok = true;
321 stmt_info = vinfo_for_stmt (phi);
322 if (vect_print_dump_info (REPORT_DETAILS))
324 fprintf (vect_dump, "examining phi: ");
325 print_generic_expr (vect_dump, phi, TDF_SLIM);
328 if (! is_loop_header_bb_p (bb))
330 /* inner-loop loop-closed exit phi in outer-loop vectorization
331 (i.e. a phi in the tail of the outer-loop).
332 FORNOW: we currently don't support the case that these phis
333 are not used in the outerloop, cause this case requires
334 to actually do something here. */
335 if (!STMT_VINFO_RELEVANT_P (stmt_info)
336 || STMT_VINFO_LIVE_P (stmt_info))
338 if (vect_print_dump_info (REPORT_DETAILS))
339 fprintf (vect_dump,
340 "Unsupported loop-closed phi in outer-loop.");
341 return false;
343 continue;
346 gcc_assert (stmt_info);
348 if (STMT_VINFO_LIVE_P (stmt_info))
350 /* FORNOW: not yet supported. */
351 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
352 fprintf (vect_dump, "not vectorized: value used after loop.");
353 return false;
356 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
357 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
359 /* A scalar-dependence cycle that we don't support. */
360 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
361 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
362 return false;
365 if (STMT_VINFO_RELEVANT_P (stmt_info))
367 need_to_vectorize = true;
368 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
369 ok = vectorizable_induction (phi, NULL, NULL);
372 if (!ok)
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
376 fprintf (vect_dump,
377 "not vectorized: relevant phi not supported: ");
378 print_generic_expr (vect_dump, phi, TDF_SLIM);
380 return false;
384 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
386 tree stmt = bsi_stmt (si);
387 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
388 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
390 if (vect_print_dump_info (REPORT_DETAILS))
392 fprintf (vect_dump, "==> examining statement: ");
393 print_generic_expr (vect_dump, stmt, TDF_SLIM);
396 gcc_assert (stmt_info);
398 /* skip stmts which do not need to be vectorized.
399 this is expected to include:
400 - the COND_EXPR which is the loop exit condition
401 - any LABEL_EXPRs in the loop
402 - computations that are used only for array indexing or loop
403 control */
405 if (!STMT_VINFO_RELEVANT_P (stmt_info)
406 && !STMT_VINFO_LIVE_P (stmt_info))
408 if (vect_print_dump_info (REPORT_DETAILS))
409 fprintf (vect_dump, "irrelevant.");
410 continue;
413 switch (STMT_VINFO_DEF_TYPE (stmt_info))
415 case vect_loop_def:
416 break;
418 case vect_reduction_def:
419 gcc_assert (relevance == vect_used_in_outer
420 || relevance == vect_used_in_outer_by_reduction
421 || relevance == vect_unused_in_loop);
422 break;
424 case vect_induction_def:
425 case vect_constant_def:
426 case vect_invariant_def:
427 case vect_unknown_def_type:
428 default:
429 gcc_unreachable ();
432 if (STMT_VINFO_RELEVANT_P (stmt_info))
434 gcc_assert (GIMPLE_STMT_P (stmt)
435 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
436 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
437 need_to_vectorize = true;
440 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
441 || vectorizable_type_demotion (stmt, NULL, NULL)
442 || vectorizable_conversion (stmt, NULL, NULL)
443 || vectorizable_operation (stmt, NULL, NULL)
444 || vectorizable_assignment (stmt, NULL, NULL)
445 || vectorizable_load (stmt, NULL, NULL)
446 || vectorizable_call (stmt, NULL, NULL)
447 || vectorizable_store (stmt, NULL, NULL)
448 || vectorizable_condition (stmt, NULL, NULL)
449 || vectorizable_reduction (stmt, NULL, NULL));
451 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
452 need extra handling, except for vectorizable reductions. */
453 if (STMT_VINFO_LIVE_P (stmt_info)
454 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
455 ok |= vectorizable_live_operation (stmt, NULL, NULL);
457 if (!ok)
459 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
461 fprintf (vect_dump, "not vectorized: stmt not supported: ");
462 print_generic_expr (vect_dump, stmt, TDF_SLIM);
464 return false;
466 } /* stmts in bb */
467 } /* bbs */
469 /* All operations in the loop are either irrelevant (deal with loop
470 control, or dead), or only used outside the loop and can be moved
471 out of the loop (e.g. invariants, inductions). The loop can be
472 optimized away by scalar optimizations. We're better off not
473 touching this loop. */
474 if (!need_to_vectorize)
476 if (vect_print_dump_info (REPORT_DETAILS))
477 fprintf (vect_dump,
478 "All the computation can be taken out of the loop.");
479 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
480 fprintf (vect_dump,
481 "not vectorized: redundant loop. no profit to vectorize.");
482 return false;
485 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
486 && vect_print_dump_info (REPORT_DETAILS))
487 fprintf (vect_dump,
488 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
489 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
491 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
492 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
494 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
495 fprintf (vect_dump, "not vectorized: iteration count too small.");
496 if (vect_print_dump_info (REPORT_DETAILS))
497 fprintf (vect_dump,"not vectorized: iteration count smaller than "
498 "vectorization factor.");
499 return false;
502 /* Analyze cost. Decide if worth while to vectorize. */
504 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
505 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
506 if (min_profitable_iters < 0)
508 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
509 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
510 if (vect_print_dump_info (REPORT_DETAILS))
511 fprintf (vect_dump, "not vectorized: vector version will never be "
512 "profitable.");
513 return false;
516 min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
517 * vectorization_factor;
519 /* Use the cost model only if it is more conservative than user specified
520 threshold. */
522 th = (unsigned) min_scalar_loop_bound;
523 if (min_profitable_iters
524 && (!min_scalar_loop_bound
525 || min_profitable_iters > min_scalar_loop_bound))
526 th = (unsigned) min_profitable_iters;
528 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
529 && LOOP_VINFO_INT_NITERS (loop_vinfo) < th)
531 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
532 fprintf (vect_dump, "not vectorized: vectorization not "
533 "profitable.");
534 if (vect_print_dump_info (REPORT_DETAILS))
535 fprintf (vect_dump, "not vectorized: iteration count smaller than "
536 "user specified loop bound parameter or minimum "
537 "profitable iterations (whichever is more conservative).");
538 return false;
541 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
542 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
543 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
545 if (vect_print_dump_info (REPORT_DETAILS))
546 fprintf (vect_dump, "epilog loop required.");
547 if (!vect_can_advance_ivs_p (loop_vinfo))
549 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
550 fprintf (vect_dump,
551 "not vectorized: can't create epilog loop 1.");
552 return false;
554 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
556 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
557 fprintf (vect_dump,
558 "not vectorized: can't create epilog loop 2.");
559 return false;
563 return true;
567 /* Function exist_non_indexing_operands_for_use_p
569 USE is one of the uses attached to STMT. Check if USE is
570 used in STMT for anything other than indexing an array. */
572 static bool
573 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
575 tree operand;
576 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
578 /* USE corresponds to some operand in STMT. If there is no data
579 reference in STMT, then any operand that corresponds to USE
580 is not indexing an array. */
581 if (!STMT_VINFO_DATA_REF (stmt_info))
582 return true;
584 /* STMT has a data_ref. FORNOW this means that its of one of
585 the following forms:
586 -1- ARRAY_REF = var
587 -2- var = ARRAY_REF
588 (This should have been verified in analyze_data_refs).
590 'var' in the second case corresponds to a def, not a use,
591 so USE cannot correspond to any operands that are not used
592 for array indexing.
594 Therefore, all we need to check is if STMT falls into the
595 first case, and whether var corresponds to USE. */
597 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
598 return false;
600 operand = GIMPLE_STMT_OPERAND (stmt, 1);
602 if (TREE_CODE (operand) != SSA_NAME)
603 return false;
605 if (operand == use)
606 return true;
608 return false;
612 /* Function vect_analyze_scalar_cycles_1.
614 Examine the cross iteration def-use cycles of scalar variables
615 in LOOP. LOOP_VINFO represents the loop that is noe being
616 considered for vectorization (can be LOOP, or an outer-loop
617 enclosing LOOP). */
619 static void
620 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
622 tree phi;
623 basic_block bb = loop->header;
624 tree dumy;
625 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
627 if (vect_print_dump_info (REPORT_DETAILS))
628 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
630 /* First - identify all inductions. */
631 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
633 tree access_fn = NULL;
634 tree def = PHI_RESULT (phi);
635 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
637 if (vect_print_dump_info (REPORT_DETAILS))
639 fprintf (vect_dump, "Analyze phi: ");
640 print_generic_expr (vect_dump, phi, TDF_SLIM);
643 /* Skip virtual phi's. The data dependences that are associated with
644 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
645 if (!is_gimple_reg (SSA_NAME_VAR (def)))
646 continue;
648 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
650 /* Analyze the evolution function. */
651 access_fn = analyze_scalar_evolution (loop, def);
652 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
654 fprintf (vect_dump, "Access function of PHI: ");
655 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
658 if (!access_fn
659 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
661 VEC_safe_push (tree, heap, worklist, phi);
662 continue;
665 if (vect_print_dump_info (REPORT_DETAILS))
666 fprintf (vect_dump, "Detected induction.");
667 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
671 /* Second - identify all reductions. */
672 while (VEC_length (tree, worklist) > 0)
674 tree phi = VEC_pop (tree, worklist);
675 tree def = PHI_RESULT (phi);
676 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
677 tree reduc_stmt;
679 if (vect_print_dump_info (REPORT_DETAILS))
681 fprintf (vect_dump, "Analyze phi: ");
682 print_generic_expr (vect_dump, phi, TDF_SLIM);
685 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
686 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
688 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
689 if (reduc_stmt)
691 if (vect_print_dump_info (REPORT_DETAILS))
692 fprintf (vect_dump, "Detected reduction.");
693 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
694 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
695 vect_reduction_def;
697 else
698 if (vect_print_dump_info (REPORT_DETAILS))
699 fprintf (vect_dump, "Unknown def-use cycle pattern.");
702 VEC_free (tree, heap, worklist);
703 return;
707 /* Function vect_analyze_scalar_cycles.
709 Examine the cross iteration def-use cycles of scalar variables, by
710 analyzing the loop-header PHIs of scalar variables; Classify each
711 cycle as one of the following: invariant, induction, reduction, unknown.
712 We do that for the loop represented by LOOP_VINFO, and also to its
713 inner-loop, if exists.
714 Examples for scalar cycles:
716 Example1: reduction:
718 loop1:
719 for (i=0; i<N; i++)
720 sum += a[i];
722 Example2: induction:
724 loop2:
725 for (i=0; i<N; i++)
726 a[i] = i; */
728 static void
729 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
731 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
733 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
735 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
736 Reductions in such inner-loop therefore have different properties than
737 the reductions in the nest that gets vectorized:
738 1. When vectorized, they are executed in the same order as in the original
739 scalar loop, so we can't change the order of computation when
740 vectorizing them.
741 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
742 current checks are too strict. */
744 if (loop->inner)
745 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
749 /* Function vect_insert_into_interleaving_chain.
751 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
753 static void
754 vect_insert_into_interleaving_chain (struct data_reference *dra,
755 struct data_reference *drb)
757 tree prev, next, next_init;
758 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
759 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
761 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
762 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
763 while (next)
765 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
766 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
768 /* Insert here. */
769 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
770 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
771 return;
773 prev = next;
774 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
777 /* We got to the end of the list. Insert here. */
778 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
779 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
783 /* Function vect_update_interleaving_chain.
785 For two data-refs DRA and DRB that are a part of a chain interleaved data
786 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
788 There are four possible cases:
789 1. New stmts - both DRA and DRB are not a part of any chain:
790 FIRST_DR = DRB
791 NEXT_DR (DRB) = DRA
792 2. DRB is a part of a chain and DRA is not:
793 no need to update FIRST_DR
794 no need to insert DRB
795 insert DRA according to init
796 3. DRA is a part of a chain and DRB is not:
797 if (init of FIRST_DR > init of DRB)
798 FIRST_DR = DRB
799 NEXT(FIRST_DR) = previous FIRST_DR
800 else
801 insert DRB according to its init
802 4. both DRA and DRB are in some interleaving chains:
803 choose the chain with the smallest init of FIRST_DR
804 insert the nodes of the second chain into the first one. */
806 static void
807 vect_update_interleaving_chain (struct data_reference *drb,
808 struct data_reference *dra)
810 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
811 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
812 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
813 tree node, prev, next, node_init, first_stmt;
815 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
816 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
818 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
819 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
820 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
821 return;
824 /* 2. DRB is a part of a chain and DRA is not. */
825 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
827 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
828 /* Insert DRA into the chain of DRB. */
829 vect_insert_into_interleaving_chain (dra, drb);
830 return;
833 /* 3. DRA is a part of a chain and DRB is not. */
834 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
836 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
837 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
838 old_first_stmt)));
839 tree tmp;
841 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
843 /* DRB's init is smaller than the init of the stmt previously marked
844 as the first stmt of the interleaving chain of DRA. Therefore, we
845 update FIRST_STMT and put DRB in the head of the list. */
846 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
847 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
849 /* Update all the stmts in the list to point to the new FIRST_STMT. */
850 tmp = old_first_stmt;
851 while (tmp)
853 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
854 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
857 else
859 /* Insert DRB in the list of DRA. */
860 vect_insert_into_interleaving_chain (drb, dra);
861 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
863 return;
866 /* 4. both DRA and DRB are in some interleaving chains. */
867 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
868 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
869 if (first_a == first_b)
870 return;
871 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
872 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
874 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
876 /* Insert the nodes of DRA chain into the DRB chain.
877 After inserting a node, continue from this node of the DRB chain (don't
878 start from the beginning. */
879 node = DR_GROUP_FIRST_DR (stmtinfo_a);
880 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
881 first_stmt = first_b;
883 else
885 /* Insert the nodes of DRB chain into the DRA chain.
886 After inserting a node, continue from this node of the DRA chain (don't
887 start from the beginning. */
888 node = DR_GROUP_FIRST_DR (stmtinfo_b);
889 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
890 first_stmt = first_a;
893 while (node)
895 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
896 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
897 while (next)
899 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
900 if (tree_int_cst_compare (next_init, node_init) > 0)
902 /* Insert here. */
903 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
904 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
905 prev = node;
906 break;
908 prev = next;
909 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
911 if (!next)
913 /* We got to the end of the list. Insert here. */
914 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
915 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
916 prev = node;
918 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
919 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
924 /* Function vect_equal_offsets.
926 Check if OFFSET1 and OFFSET2 are identical expressions. */
928 static bool
929 vect_equal_offsets (tree offset1, tree offset2)
931 bool res0, res1;
933 STRIP_NOPS (offset1);
934 STRIP_NOPS (offset2);
936 if (offset1 == offset2)
937 return true;
939 if (TREE_CODE (offset1) != TREE_CODE (offset2)
940 || !BINARY_CLASS_P (offset1)
941 || !BINARY_CLASS_P (offset2))
942 return false;
944 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
945 TREE_OPERAND (offset2, 0));
946 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
947 TREE_OPERAND (offset2, 1));
949 return (res0 && res1);
953 /* Function vect_check_interleaving.
955 Check if DRA and DRB are a part of interleaving. In case they are, insert
956 DRA and DRB in an interleaving chain. */
958 static void
959 vect_check_interleaving (struct data_reference *dra,
960 struct data_reference *drb)
962 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
964 /* Check that the data-refs have same first location (except init) and they
965 are both either store or load (not load and store). */
966 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
967 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
968 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
969 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
970 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
971 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
972 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
973 || DR_IS_READ (dra) != DR_IS_READ (drb))
974 return;
976 /* Check:
977 1. data-refs are of the same type
978 2. their steps are equal
979 3. the step is greater than the difference between data-refs' inits */
980 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
981 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
983 if (type_size_a != type_size_b
984 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
985 return;
987 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
988 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
989 step = TREE_INT_CST_LOW (DR_STEP (dra));
991 if (init_a > init_b)
993 /* If init_a == init_b + the size of the type * k, we have an interleaving,
994 and DRB is accessed before DRA. */
995 diff_mod_size = (init_a - init_b) % type_size_a;
997 if ((init_a - init_b) > step)
998 return;
1000 if (diff_mod_size == 0)
1002 vect_update_interleaving_chain (drb, dra);
1003 if (vect_print_dump_info (REPORT_DR_DETAILS))
1005 fprintf (vect_dump, "Detected interleaving ");
1006 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1007 fprintf (vect_dump, " and ");
1008 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1010 return;
1013 else
1015 /* If init_b == init_a + the size of the type * k, we have an
1016 interleaving, and DRA is accessed before DRB. */
1017 diff_mod_size = (init_b - init_a) % type_size_a;
1019 if ((init_b - init_a) > step)
1020 return;
1022 if (diff_mod_size == 0)
1024 vect_update_interleaving_chain (dra, drb);
1025 if (vect_print_dump_info (REPORT_DR_DETAILS))
1027 fprintf (vect_dump, "Detected interleaving ");
1028 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1029 fprintf (vect_dump, " and ");
1030 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1032 return;
1038 /* Function vect_analyze_data_ref_dependence.
1040 Return TRUE if there (might) exist a dependence between a memory-reference
1041 DRA and a memory-reference DRB. */
1043 static bool
1044 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1045 loop_vec_info loop_vinfo)
1047 unsigned int i;
1048 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1049 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1050 struct data_reference *dra = DDR_A (ddr);
1051 struct data_reference *drb = DDR_B (ddr);
1052 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1053 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1054 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1055 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1056 lambda_vector dist_v;
1057 unsigned int loop_depth;
1059 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1061 /* Independent data accesses. */
1062 vect_check_interleaving (dra, drb);
1063 return false;
1066 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1067 return false;
1069 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1071 if (vect_print_dump_info (REPORT_DR_DETAILS))
1073 fprintf (vect_dump,
1074 "versioning for alias required: can't determine dependence between ");
1075 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1076 fprintf (vect_dump, " and ");
1077 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1079 return true;
1082 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1084 if (vect_print_dump_info (REPORT_DR_DETAILS))
1086 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1087 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1088 fprintf (vect_dump, " and ");
1089 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1091 return true;
1094 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1095 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1097 int dist = dist_v[loop_depth];
1099 if (vect_print_dump_info (REPORT_DR_DETAILS))
1100 fprintf (vect_dump, "dependence distance = %d.", dist);
1102 /* Same loop iteration. */
1103 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1105 /* Two references with distance zero have the same alignment. */
1106 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1107 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1108 if (vect_print_dump_info (REPORT_ALIGNMENT))
1109 fprintf (vect_dump, "accesses have the same alignment.");
1110 if (vect_print_dump_info (REPORT_DR_DETAILS))
1112 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1113 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1114 fprintf (vect_dump, " and ");
1115 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1118 /* For interleaving, mark that there is a read-write dependency if
1119 necessary. We check before that one of the data-refs is store. */
1120 if (DR_IS_READ (dra))
1121 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1122 else
1124 if (DR_IS_READ (drb))
1125 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1128 continue;
1131 if (abs (dist) >= vectorization_factor)
1133 /* Dependence distance does not create dependence, as far as vectorization
1134 is concerned, in this case. */
1135 if (vect_print_dump_info (REPORT_DR_DETAILS))
1136 fprintf (vect_dump, "dependence distance >= VF.");
1137 continue;
1140 if (vect_print_dump_info (REPORT_DR_DETAILS))
1142 fprintf (vect_dump,
1143 "versioning for alias required: possible dependence "
1144 "between data-refs ");
1145 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1146 fprintf (vect_dump, " and ");
1147 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1150 return true;
1153 return false;
1156 /* Return TRUE if DDR_NEW is already found in MAY_ALIAS_DDRS list. */
1158 static bool
1159 vect_is_duplicate_ddr (VEC (ddr_p, heap) * may_alias_ddrs, ddr_p ddr_new)
1161 unsigned i;
1162 ddr_p ddr;
1164 for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
1166 tree dref_A_i, dref_B_i, dref_A_j, dref_B_j;
1168 dref_A_i = DR_REF (DDR_A (ddr));
1169 dref_B_i = DR_REF (DDR_B (ddr));
1170 dref_A_j = DR_REF (DDR_A (ddr_new));
1171 dref_B_j = DR_REF (DDR_B (ddr_new));
1173 if ((operand_equal_p (dref_A_i, dref_A_j, 0)
1174 && operand_equal_p (dref_B_i, dref_B_j, 0))
1175 || (operand_equal_p (dref_A_i, dref_B_j, 0)
1176 && operand_equal_p (dref_B_i, dref_A_j, 0)))
1178 if (vect_print_dump_info (REPORT_DR_DETAILS))
1180 fprintf (vect_dump, "found same pair of data references ");
1181 print_generic_expr (vect_dump, dref_A_i, TDF_SLIM);
1182 fprintf (vect_dump, " and ");
1183 print_generic_expr (vect_dump, dref_B_i, TDF_SLIM);
1185 return true;
1188 return false;
1191 /* Save DDR in LOOP_VINFO list of ddrs that may alias and need to be
1192 tested at run-time. Returns false if number of run-time checks
1193 inserted by vectorizer is greater than maximum defined by
1194 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS. */
1195 static bool
1196 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1198 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1200 if (vect_print_dump_info (REPORT_DR_DETAILS))
1202 fprintf (vect_dump, "mark for run-time aliasing test between ");
1203 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1204 fprintf (vect_dump, " and ");
1205 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1208 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1209 if (loop->inner)
1211 if (vect_print_dump_info (REPORT_DR_DETAILS))
1212 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1213 return false;
1216 /* Do not add to the list duplicate ddrs. */
1217 if (vect_is_duplicate_ddr (LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr))
1218 return true;
1220 if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))
1221 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1223 if (vect_print_dump_info (REPORT_DR_DETAILS))
1225 fprintf (vect_dump,
1226 "disable versioning for alias - max number of generated "
1227 "checks exceeded.");
1230 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1232 return false;
1234 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1235 return true;
1238 /* Function vect_analyze_data_ref_dependences.
1240 Examine all the data references in the loop, and make sure there do not
1241 exist any data dependences between them. */
1243 static bool
1244 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1246 unsigned int i;
1247 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1248 struct data_dependence_relation *ddr;
1250 if (vect_print_dump_info (REPORT_DETAILS))
1251 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1253 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1254 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1256 /* Add to list of ddrs that need to be tested at run-time. */
1257 if (!vect_mark_for_runtime_alias_test (ddr, loop_vinfo))
1258 return false;
1261 return true;
1265 /* Function vect_compute_data_ref_alignment
1267 Compute the misalignment of the data reference DR.
1269 Output:
1270 1. If during the misalignment computation it is found that the data reference
1271 cannot be vectorized then false is returned.
1272 2. DR_MISALIGNMENT (DR) is defined.
1274 FOR NOW: No analysis is actually performed. Misalignment is calculated
1275 only for trivial cases. TODO. */
1277 static bool
1278 vect_compute_data_ref_alignment (struct data_reference *dr)
1280 tree stmt = DR_STMT (dr);
1281 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1282 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1283 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1284 tree ref = DR_REF (dr);
1285 tree vectype;
1286 tree base, base_addr;
1287 bool base_aligned;
1288 tree misalign;
1289 tree aligned_to, alignment;
1291 if (vect_print_dump_info (REPORT_DETAILS))
1292 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1294 /* Initialize misalignment to unknown. */
1295 SET_DR_MISALIGNMENT (dr, -1);
1297 misalign = DR_INIT (dr);
1298 aligned_to = DR_ALIGNED_TO (dr);
1299 base_addr = DR_BASE_ADDRESS (dr);
1301 /* In case the dataref is in an inner-loop of the loop that is being
1302 vectorized (LOOP), we use the base and misalignment information
1303 relative to the outer-loop (LOOP). This is ok only if the misalignment
1304 stays the same throughout the execution of the inner-loop, which is why
1305 we have to check that the stride of the dataref in the inner-loop evenly
1306 divides by the vector size. */
1307 if (nested_in_vect_loop_p (loop, stmt))
1309 tree step = DR_STEP (dr);
1310 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1312 if (dr_step % UNITS_PER_SIMD_WORD == 0)
1314 if (vect_print_dump_info (REPORT_ALIGNMENT))
1315 fprintf (vect_dump, "inner step divides the vector-size.");
1316 misalign = STMT_VINFO_DR_INIT (stmt_info);
1317 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1318 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1320 else
1322 if (vect_print_dump_info (REPORT_ALIGNMENT))
1323 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1324 misalign = NULL_TREE;
1328 base = build_fold_indirect_ref (base_addr);
1329 vectype = STMT_VINFO_VECTYPE (stmt_info);
1330 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1332 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1333 || !misalign)
1335 if (vect_print_dump_info (REPORT_ALIGNMENT))
1337 fprintf (vect_dump, "Unknown alignment for access: ");
1338 print_generic_expr (vect_dump, base, TDF_SLIM);
1340 return true;
1343 if ((DECL_P (base)
1344 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1345 alignment) >= 0)
1346 || (TREE_CODE (base_addr) == SSA_NAME
1347 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1348 TREE_TYPE (base_addr)))),
1349 alignment) >= 0))
1350 base_aligned = true;
1351 else
1352 base_aligned = false;
1354 if (!base_aligned)
1356 /* Do not change the alignment of global variables if
1357 flag_section_anchors is enabled. */
1358 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1359 || (TREE_STATIC (base) && flag_section_anchors))
1361 if (vect_print_dump_info (REPORT_DETAILS))
1363 fprintf (vect_dump, "can't force alignment of ref: ");
1364 print_generic_expr (vect_dump, ref, TDF_SLIM);
1366 return true;
1369 /* Force the alignment of the decl.
1370 NOTE: This is the only change to the code we make during
1371 the analysis phase, before deciding to vectorize the loop. */
1372 if (vect_print_dump_info (REPORT_DETAILS))
1373 fprintf (vect_dump, "force alignment");
1374 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1375 DECL_USER_ALIGN (base) = 1;
1378 /* At this point we assume that the base is aligned. */
1379 gcc_assert (base_aligned
1380 || (TREE_CODE (base) == VAR_DECL
1381 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1383 /* Modulo alignment. */
1384 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1386 if (!host_integerp (misalign, 1))
1388 /* Negative or overflowed misalignment value. */
1389 if (vect_print_dump_info (REPORT_DETAILS))
1390 fprintf (vect_dump, "unexpected misalign value");
1391 return false;
1394 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1396 if (vect_print_dump_info (REPORT_DETAILS))
1398 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1399 print_generic_expr (vect_dump, ref, TDF_SLIM);
1402 return true;
1406 /* Function vect_compute_data_refs_alignment
1408 Compute the misalignment of data references in the loop.
1409 Return FALSE if a data reference is found that cannot be vectorized. */
1411 static bool
1412 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1414 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1415 struct data_reference *dr;
1416 unsigned int i;
1418 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1419 if (!vect_compute_data_ref_alignment (dr))
1420 return false;
1422 return true;
1426 /* Function vect_update_misalignment_for_peel
1428 DR - the data reference whose misalignment is to be adjusted.
1429 DR_PEEL - the data reference whose misalignment is being made
1430 zero in the vector loop by the peel.
1431 NPEEL - the number of iterations in the peel loop if the misalignment
1432 of DR_PEEL is known at compile time. */
1434 static void
1435 vect_update_misalignment_for_peel (struct data_reference *dr,
1436 struct data_reference *dr_peel, int npeel)
1438 unsigned int i;
1439 VEC(dr_p,heap) *same_align_drs;
1440 struct data_reference *current_dr;
1441 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1442 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1443 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1444 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1446 /* For interleaved data accesses the step in the loop must be multiplied by
1447 the size of the interleaving group. */
1448 if (DR_GROUP_FIRST_DR (stmt_info))
1449 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1450 if (DR_GROUP_FIRST_DR (peel_stmt_info))
1451 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1453 /* It can be assumed that the data refs with the same alignment as dr_peel
1454 are aligned in the vector loop. */
1455 same_align_drs
1456 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1457 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1459 if (current_dr != dr)
1460 continue;
1461 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1462 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1463 SET_DR_MISALIGNMENT (dr, 0);
1464 return;
1467 if (known_alignment_for_access_p (dr)
1468 && known_alignment_for_access_p (dr_peel))
1470 int misal = DR_MISALIGNMENT (dr);
1471 misal += npeel * dr_size;
1472 misal %= UNITS_PER_SIMD_WORD;
1473 SET_DR_MISALIGNMENT (dr, misal);
1474 return;
1477 if (vect_print_dump_info (REPORT_DETAILS))
1478 fprintf (vect_dump, "Setting misalignment to -1.");
1479 SET_DR_MISALIGNMENT (dr, -1);
1483 /* Function vect_verify_datarefs_alignment
1485 Return TRUE if all data references in the loop can be
1486 handled with respect to alignment. */
1488 static bool
1489 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1491 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1492 struct data_reference *dr;
1493 enum dr_alignment_support supportable_dr_alignment;
1494 unsigned int i;
1496 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1498 tree stmt = DR_STMT (dr);
1499 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1501 /* For interleaving, only the alignment of the first access matters. */
1502 if (DR_GROUP_FIRST_DR (stmt_info)
1503 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1504 continue;
1506 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1507 if (!supportable_dr_alignment)
1509 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1511 if (DR_IS_READ (dr))
1512 fprintf (vect_dump,
1513 "not vectorized: unsupported unaligned load.");
1514 else
1515 fprintf (vect_dump,
1516 "not vectorized: unsupported unaligned store.");
1518 return false;
1520 if (supportable_dr_alignment != dr_aligned
1521 && vect_print_dump_info (REPORT_ALIGNMENT))
1522 fprintf (vect_dump, "Vectorizing an unaligned access.");
1524 return true;
1528 /* Function vector_alignment_reachable_p
1530 Return true if vector alignment for DR is reachable by peeling
1531 a few loop iterations. Return false otherwise. */
1533 static bool
1534 vector_alignment_reachable_p (struct data_reference *dr)
1536 tree stmt = DR_STMT (dr);
1537 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1538 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1540 if (DR_GROUP_FIRST_DR (stmt_info))
1542 /* For interleaved access we peel only if number of iterations in
1543 the prolog loop ({VF - misalignment}), is a multiple of the
1544 number of the interleaved accesses. */
1545 int elem_size, mis_in_elements;
1546 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1548 /* FORNOW: handle only known alignment. */
1549 if (!known_alignment_for_access_p (dr))
1550 return false;
1552 elem_size = UNITS_PER_SIMD_WORD / nelements;
1553 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1555 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1556 return false;
1559 /* If misalignment is known at the compile time then allow peeling
1560 only if natural alignment is reachable through peeling. */
1561 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1563 HOST_WIDE_INT elmsize =
1564 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1565 if (vect_print_dump_info (REPORT_DETAILS))
1567 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1568 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1570 if (DR_MISALIGNMENT (dr) % elmsize)
1572 if (vect_print_dump_info (REPORT_DETAILS))
1573 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1574 return false;
1578 if (!known_alignment_for_access_p (dr))
1580 tree type = (TREE_TYPE (DR_REF (dr)));
1581 tree ba = DR_BASE_OBJECT (dr);
1582 bool is_packed = false;
1584 if (ba)
1585 is_packed = contains_packed_reference (ba);
1587 if (vect_print_dump_info (REPORT_DETAILS))
1588 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1589 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1590 return true;
1591 else
1592 return false;
1595 return true;
1598 /* Function vect_enhance_data_refs_alignment
1600 This pass will use loop versioning and loop peeling in order to enhance
1601 the alignment of data references in the loop.
1603 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1604 original loop is to be vectorized; Any other loops that are created by
1605 the transformations performed in this pass - are not supposed to be
1606 vectorized. This restriction will be relaxed.
1608 This pass will require a cost model to guide it whether to apply peeling
1609 or versioning or a combination of the two. For example, the scheme that
1610 intel uses when given a loop with several memory accesses, is as follows:
1611 choose one memory access ('p') which alignment you want to force by doing
1612 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1613 other accesses are not necessarily aligned, or (2) use loop versioning to
1614 generate one loop in which all accesses are aligned, and another loop in
1615 which only 'p' is necessarily aligned.
1617 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1618 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1619 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1621 Devising a cost model is the most critical aspect of this work. It will
1622 guide us on which access to peel for, whether to use loop versioning, how
1623 many versions to create, etc. The cost model will probably consist of
1624 generic considerations as well as target specific considerations (on
1625 powerpc for example, misaligned stores are more painful than misaligned
1626 loads).
1628 Here are the general steps involved in alignment enhancements:
1630 -- original loop, before alignment analysis:
1631 for (i=0; i<N; i++){
1632 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1633 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1636 -- After vect_compute_data_refs_alignment:
1637 for (i=0; i<N; i++){
1638 x = q[i]; # DR_MISALIGNMENT(q) = 3
1639 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1642 -- Possibility 1: we do loop versioning:
1643 if (p is aligned) {
1644 for (i=0; i<N; i++){ # loop 1A
1645 x = q[i]; # DR_MISALIGNMENT(q) = 3
1646 p[i] = y; # DR_MISALIGNMENT(p) = 0
1649 else {
1650 for (i=0; i<N; i++){ # loop 1B
1651 x = q[i]; # DR_MISALIGNMENT(q) = 3
1652 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1656 -- Possibility 2: we do loop peeling:
1657 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1658 x = q[i];
1659 p[i] = y;
1661 for (i = 3; i < N; i++){ # loop 2A
1662 x = q[i]; # DR_MISALIGNMENT(q) = 0
1663 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1666 -- Possibility 3: combination of loop peeling and versioning:
1667 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1668 x = q[i];
1669 p[i] = y;
1671 if (p is aligned) {
1672 for (i = 3; i<N; i++){ # loop 3A
1673 x = q[i]; # DR_MISALIGNMENT(q) = 0
1674 p[i] = y; # DR_MISALIGNMENT(p) = 0
1677 else {
1678 for (i = 3; i<N; i++){ # loop 3B
1679 x = q[i]; # DR_MISALIGNMENT(q) = 0
1680 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1684 These loops are later passed to loop_transform to be vectorized. The
1685 vectorizer will use the alignment information to guide the transformation
1686 (whether to generate regular loads/stores, or with special handling for
1687 misalignment). */
1689 static bool
1690 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1692 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1693 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1694 enum dr_alignment_support supportable_dr_alignment;
1695 struct data_reference *dr0 = NULL;
1696 struct data_reference *dr;
1697 unsigned int i;
1698 bool do_peeling = false;
1699 bool do_versioning = false;
1700 bool stat;
1701 tree stmt;
1702 stmt_vec_info stmt_info;
1703 int vect_versioning_for_alias_required;
1705 if (vect_print_dump_info (REPORT_DETAILS))
1706 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1708 /* While cost model enhancements are expected in the future, the high level
1709 view of the code at this time is as follows:
1711 A) If there is a misaligned write then see if peeling to align this write
1712 can make all data references satisfy vect_supportable_dr_alignment.
1713 If so, update data structures as needed and return true. Note that
1714 at this time vect_supportable_dr_alignment is known to return false
1715 for a misaligned write.
1717 B) If peeling wasn't possible and there is a data reference with an
1718 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1719 then see if loop versioning checks can be used to make all data
1720 references satisfy vect_supportable_dr_alignment. If so, update
1721 data structures as needed and return true.
1723 C) If neither peeling nor versioning were successful then return false if
1724 any data reference does not satisfy vect_supportable_dr_alignment.
1726 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1728 Note, Possibility 3 above (which is peeling and versioning together) is not
1729 being done at this time. */
1731 /* (1) Peeling to force alignment. */
1733 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1734 Considerations:
1735 + How many accesses will become aligned due to the peeling
1736 - How many accesses will become unaligned due to the peeling,
1737 and the cost of misaligned accesses.
1738 - The cost of peeling (the extra runtime checks, the increase
1739 in code size).
1741 The scheme we use FORNOW: peel to force the alignment of the first
1742 misaligned store in the loop.
1743 Rationale: misaligned stores are not yet supported.
1745 TODO: Use a cost model. */
1747 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1749 stmt = DR_STMT (dr);
1750 stmt_info = vinfo_for_stmt (stmt);
1752 /* For interleaving, only the alignment of the first access
1753 matters. */
1754 if (DR_GROUP_FIRST_DR (stmt_info)
1755 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1756 continue;
1758 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1760 do_peeling = vector_alignment_reachable_p (dr);
1761 if (do_peeling)
1762 dr0 = dr;
1763 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1764 fprintf (vect_dump, "vector alignment may not be reachable");
1765 break;
1769 vect_versioning_for_alias_required =
1770 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1772 /* Temporarily, if versioning for alias is required, we disable peeling
1773 until we support peeling and versioning. Often peeling for alignment
1774 will require peeling for loop-bound, which in turn requires that we
1775 know how to adjust the loop ivs after the loop. */
1776 if (vect_versioning_for_alias_required
1777 || !vect_can_advance_ivs_p (loop_vinfo)
1778 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1779 do_peeling = false;
1781 if (do_peeling)
1783 int mis;
1784 int npeel = 0;
1785 tree stmt = DR_STMT (dr0);
1786 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1787 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1788 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1790 if (known_alignment_for_access_p (dr0))
1792 /* Since it's known at compile time, compute the number of iterations
1793 in the peeled loop (the peeling factor) for use in updating
1794 DR_MISALIGNMENT values. The peeling factor is the vectorization
1795 factor minus the misalignment as an element count. */
1796 mis = DR_MISALIGNMENT (dr0);
1797 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1798 npeel = nelements - mis;
1800 /* For interleaved data access every iteration accesses all the
1801 members of the group, therefore we divide the number of iterations
1802 by the group size. */
1803 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1804 if (DR_GROUP_FIRST_DR (stmt_info))
1805 npeel /= DR_GROUP_SIZE (stmt_info);
1807 if (vect_print_dump_info (REPORT_DETAILS))
1808 fprintf (vect_dump, "Try peeling by %d", npeel);
1811 /* Ensure that all data refs can be vectorized after the peel. */
1812 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1814 int save_misalignment;
1816 if (dr == dr0)
1817 continue;
1819 stmt = DR_STMT (dr);
1820 stmt_info = vinfo_for_stmt (stmt);
1821 /* For interleaving, only the alignment of the first access
1822 matters. */
1823 if (DR_GROUP_FIRST_DR (stmt_info)
1824 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1825 continue;
1827 save_misalignment = DR_MISALIGNMENT (dr);
1828 vect_update_misalignment_for_peel (dr, dr0, npeel);
1829 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1830 SET_DR_MISALIGNMENT (dr, save_misalignment);
1832 if (!supportable_dr_alignment)
1834 do_peeling = false;
1835 break;
1839 if (do_peeling)
1841 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1842 If the misalignment of DR_i is identical to that of dr0 then set
1843 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1844 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1845 by the peeling factor times the element size of DR_i (MOD the
1846 vectorization factor times the size). Otherwise, the
1847 misalignment of DR_i must be set to unknown. */
1848 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1849 if (dr != dr0)
1850 vect_update_misalignment_for_peel (dr, dr0, npeel);
1852 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1853 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1854 SET_DR_MISALIGNMENT (dr0, 0);
1855 if (vect_print_dump_info (REPORT_ALIGNMENT))
1856 fprintf (vect_dump, "Alignment of access forced using peeling.");
1858 if (vect_print_dump_info (REPORT_DETAILS))
1859 fprintf (vect_dump, "Peeling for alignment will be applied.");
1861 stat = vect_verify_datarefs_alignment (loop_vinfo);
1862 gcc_assert (stat);
1863 return stat;
1868 /* (2) Versioning to force alignment. */
1870 /* Try versioning if:
1871 1) flag_tree_vect_loop_version is TRUE
1872 2) optimize_size is FALSE
1873 3) there is at least one unsupported misaligned data ref with an unknown
1874 misalignment, and
1875 4) all misaligned data refs with a known misalignment are supported, and
1876 5) the number of runtime alignment checks is within reason. */
1878 do_versioning =
1879 flag_tree_vect_loop_version
1880 && (!optimize_size)
1881 && (!loop->inner); /* FORNOW */
1883 if (do_versioning)
1885 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1887 stmt = DR_STMT (dr);
1888 stmt_info = vinfo_for_stmt (stmt);
1890 /* For interleaving, only the alignment of the first access
1891 matters. */
1892 if (aligned_access_p (dr)
1893 || (DR_GROUP_FIRST_DR (stmt_info)
1894 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1895 continue;
1897 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1899 if (!supportable_dr_alignment)
1901 tree stmt;
1902 int mask;
1903 tree vectype;
1905 if (known_alignment_for_access_p (dr)
1906 || VEC_length (tree,
1907 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1908 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1910 do_versioning = false;
1911 break;
1914 stmt = DR_STMT (dr);
1915 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1916 gcc_assert (vectype);
1918 /* The rightmost bits of an aligned address must be zeros.
1919 Construct the mask needed for this test. For example,
1920 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1921 mask must be 15 = 0xf. */
1922 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1924 /* FORNOW: use the same mask to test all potentially unaligned
1925 references in the loop. The vectorizer currently supports
1926 a single vector size, see the reference to
1927 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1928 vectorization factor is computed. */
1929 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1930 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1931 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1932 VEC_safe_push (tree, heap,
1933 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1934 DR_STMT (dr));
1938 /* Versioning requires at least one misaligned data reference. */
1939 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1940 do_versioning = false;
1941 else if (!do_versioning)
1942 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1945 if (do_versioning)
1947 VEC(tree,heap) *may_misalign_stmts
1948 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1949 tree stmt;
1951 /* It can now be assumed that the data references in the statements
1952 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1953 of the loop being vectorized. */
1954 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1956 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1957 dr = STMT_VINFO_DATA_REF (stmt_info);
1958 SET_DR_MISALIGNMENT (dr, 0);
1959 if (vect_print_dump_info (REPORT_ALIGNMENT))
1960 fprintf (vect_dump, "Alignment of access forced using versioning.");
1963 if (vect_print_dump_info (REPORT_DETAILS))
1964 fprintf (vect_dump, "Versioning for alignment will be applied.");
1966 /* Peeling and versioning can't be done together at this time. */
1967 gcc_assert (! (do_peeling && do_versioning));
1969 stat = vect_verify_datarefs_alignment (loop_vinfo);
1970 gcc_assert (stat);
1971 return stat;
1974 /* This point is reached if neither peeling nor versioning is being done. */
1975 gcc_assert (! (do_peeling || do_versioning));
1977 stat = vect_verify_datarefs_alignment (loop_vinfo);
1978 return stat;
1982 /* Function vect_analyze_data_refs_alignment
1984 Analyze the alignment of the data-references in the loop.
1985 Return FALSE if a data reference is found that cannot be vectorized. */
1987 static bool
1988 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1990 if (vect_print_dump_info (REPORT_DETAILS))
1991 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1993 if (!vect_compute_data_refs_alignment (loop_vinfo))
1995 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1996 fprintf (vect_dump,
1997 "not vectorized: can't calculate alignment for data ref.");
1998 return false;
2001 return true;
2005 /* Function vect_analyze_data_ref_access.
2007 Analyze the access pattern of the data-reference DR. For now, a data access
2008 has to be consecutive to be considered vectorizable. */
2010 static bool
2011 vect_analyze_data_ref_access (struct data_reference *dr)
2013 tree step = DR_STEP (dr);
2014 tree scalar_type = TREE_TYPE (DR_REF (dr));
2015 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2016 tree stmt = DR_STMT (dr);
2017 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2018 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2019 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2020 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2021 HOST_WIDE_INT stride;
2023 /* Don't allow invariant accesses. */
2024 if (dr_step == 0)
2025 return false;
2027 if (nested_in_vect_loop_p (loop, stmt))
2029 /* For the rest of the analysis we use the outer-loop step. */
2030 step = STMT_VINFO_DR_STEP (stmt_info);
2031 dr_step = TREE_INT_CST_LOW (step);
2033 if (dr_step == 0)
2035 if (vect_print_dump_info (REPORT_ALIGNMENT))
2036 fprintf (vect_dump, "zero step in outer loop.");
2037 if (DR_IS_READ (dr))
2038 return true;
2039 else
2040 return false;
2044 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2045 interleaving group (including gaps). */
2046 stride = dr_step / type_size;
2048 /* Consecutive? */
2049 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2051 /* Mark that it is not interleaving. */
2052 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2053 return true;
2056 if (nested_in_vect_loop_p (loop, stmt))
2058 if (vect_print_dump_info (REPORT_ALIGNMENT))
2059 fprintf (vect_dump, "strided access in outer loop.");
2060 return false;
2063 /* Not consecutive access is possible only if it is a part of interleaving. */
2064 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2066 /* Check if it this DR is a part of interleaving, and is a single
2067 element of the group that is accessed in the loop. */
2069 /* Gaps are supported only for loads. STEP must be a multiple of the type
2070 size. The size of the group must be a power of 2. */
2071 if (DR_IS_READ (dr)
2072 && (dr_step % type_size) == 0
2073 && stride > 0
2074 && exact_log2 (stride) != -1)
2076 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2077 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2078 if (vect_print_dump_info (REPORT_DR_DETAILS))
2080 fprintf (vect_dump, "Detected single element interleaving %d ",
2081 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2082 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2083 fprintf (vect_dump, " step ");
2084 print_generic_expr (vect_dump, step, TDF_SLIM);
2086 return true;
2088 if (vect_print_dump_info (REPORT_DETAILS))
2089 fprintf (vect_dump, "not consecutive access");
2090 return false;
2093 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2095 /* First stmt in the interleaving chain. Check the chain. */
2096 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2097 struct data_reference *data_ref = dr;
2098 unsigned int count = 1;
2099 tree next_step;
2100 tree prev_init = DR_INIT (data_ref);
2101 tree prev = stmt;
2102 HOST_WIDE_INT diff, count_in_bytes;
2104 while (next)
2106 /* Skip same data-refs. In case that two or more stmts share data-ref
2107 (supported only for loads), we vectorize only the first stmt, and
2108 the rest get their vectorized loads from the first one. */
2109 if (!tree_int_cst_compare (DR_INIT (data_ref),
2110 DR_INIT (STMT_VINFO_DATA_REF (
2111 vinfo_for_stmt (next)))))
2113 if (!DR_IS_READ (data_ref))
2115 if (vect_print_dump_info (REPORT_DETAILS))
2116 fprintf (vect_dump, "Two store stmts share the same dr.");
2117 return false;
2120 /* Check that there is no load-store dependencies for this loads
2121 to prevent a case of load-store-load to the same location. */
2122 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2123 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2125 if (vect_print_dump_info (REPORT_DETAILS))
2126 fprintf (vect_dump,
2127 "READ_WRITE dependence in interleaving.");
2128 return false;
2131 /* For load use the same data-ref load. */
2132 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2134 prev = next;
2135 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2136 continue;
2138 prev = next;
2140 /* Check that all the accesses have the same STEP. */
2141 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2142 if (tree_int_cst_compare (step, next_step))
2144 if (vect_print_dump_info (REPORT_DETAILS))
2145 fprintf (vect_dump, "not consecutive access in interleaving");
2146 return false;
2149 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2150 /* Check that the distance between two accesses is equal to the type
2151 size. Otherwise, we have gaps. */
2152 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2153 - TREE_INT_CST_LOW (prev_init)) / type_size;
2154 if (!DR_IS_READ (data_ref) && diff != 1)
2156 if (vect_print_dump_info (REPORT_DETAILS))
2157 fprintf (vect_dump, "interleaved store with gaps");
2158 return false;
2160 /* Store the gap from the previous member of the group. If there is no
2161 gap in the access, DR_GROUP_GAP is always 1. */
2162 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2164 prev_init = DR_INIT (data_ref);
2165 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2166 /* Count the number of data-refs in the chain. */
2167 count++;
2170 /* COUNT is the number of accesses found, we multiply it by the size of
2171 the type to get COUNT_IN_BYTES. */
2172 count_in_bytes = type_size * count;
2174 /* Check that the size of the interleaving is not greater than STEP. */
2175 if (dr_step < count_in_bytes)
2177 if (vect_print_dump_info (REPORT_DETAILS))
2179 fprintf (vect_dump, "interleaving size is greater than step for ");
2180 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2182 return false;
2185 /* Check that the size of the interleaving is equal to STEP for stores,
2186 i.e., that there are no gaps. */
2187 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
2189 if (vect_print_dump_info (REPORT_DETAILS))
2190 fprintf (vect_dump, "interleaved store with gaps");
2191 return false;
2194 /* Check that STEP is a multiple of type size. */
2195 if ((dr_step % type_size) != 0)
2197 if (vect_print_dump_info (REPORT_DETAILS))
2199 fprintf (vect_dump, "step is not a multiple of type size: step ");
2200 print_generic_expr (vect_dump, step, TDF_SLIM);
2201 fprintf (vect_dump, " size ");
2202 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2203 TDF_SLIM);
2205 return false;
2208 /* FORNOW: we handle only interleaving that is a power of 2. */
2209 if (exact_log2 (stride) == -1)
2211 if (vect_print_dump_info (REPORT_DETAILS))
2212 fprintf (vect_dump, "interleaving is not a power of 2");
2213 return false;
2215 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2217 return true;
2221 /* Function vect_analyze_data_ref_accesses.
2223 Analyze the access pattern of all the data references in the loop.
2225 FORNOW: the only access pattern that is considered vectorizable is a
2226 simple step 1 (consecutive) access.
2228 FORNOW: handle only arrays and pointer accesses. */
2230 static bool
2231 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2233 unsigned int i;
2234 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2235 struct data_reference *dr;
2237 if (vect_print_dump_info (REPORT_DETAILS))
2238 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2240 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2241 if (!vect_analyze_data_ref_access (dr))
2243 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2244 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2245 return false;
2248 return true;
2252 /* Function vect_analyze_data_refs.
2254 Find all the data references in the loop.
2256 The general structure of the analysis of data refs in the vectorizer is as
2257 follows:
2258 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
2259 find and analyze all data-refs in the loop and their dependences.
2260 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2261 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2262 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2266 static bool
2267 vect_analyze_data_refs (loop_vec_info loop_vinfo)
2269 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2270 unsigned int i;
2271 VEC (data_reference_p, heap) *datarefs;
2272 struct data_reference *dr;
2273 tree scalar_type;
2275 if (vect_print_dump_info (REPORT_DETAILS))
2276 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2278 compute_data_dependences_for_loop (loop, true,
2279 &LOOP_VINFO_DATAREFS (loop_vinfo),
2280 &LOOP_VINFO_DDRS (loop_vinfo));
2282 /* Go through the data-refs, check that the analysis succeeded. Update pointer
2283 from stmt_vec_info struct to DR and vectype. */
2284 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2286 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2288 tree stmt;
2289 stmt_vec_info stmt_info;
2290 basic_block bb;
2291 tree base, offset, init;
2293 if (!dr || !DR_REF (dr))
2295 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2296 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2297 return false;
2300 stmt = DR_STMT (dr);
2301 stmt_info = vinfo_for_stmt (stmt);
2303 /* Check that analysis of the data-ref succeeded. */
2304 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2305 || !DR_STEP (dr))
2307 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2309 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2310 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2312 return false;
2315 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2317 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2318 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2319 "constant");
2320 return false;
2323 if (!DR_SYMBOL_TAG (dr))
2325 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2327 fprintf (vect_dump, "not vectorized: no memory tag for ");
2328 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2330 return false;
2333 base = unshare_expr (DR_BASE_ADDRESS (dr));
2334 offset = unshare_expr (DR_OFFSET (dr));
2335 init = unshare_expr (DR_INIT (dr));
2337 /* Update DR field in stmt_vec_info struct. */
2338 bb = bb_for_stmt (stmt);
2340 /* If the dataref is in an inner-loop of the loop that is considered for
2341 for vectorization, we also want to analyze the access relative to
2342 the outer-loop (DR contains information only relative to the
2343 inner-most enclosing loop). We do that by building a reference to the
2344 first location accessed by the inner-loop, and analyze it relative to
2345 the outer-loop. */
2346 if (nested_in_vect_loop_p (loop, stmt))
2348 tree outer_step, outer_base, outer_init;
2349 HOST_WIDE_INT pbitsize, pbitpos;
2350 tree poffset;
2351 enum machine_mode pmode;
2352 int punsignedp, pvolatilep;
2353 affine_iv base_iv, offset_iv;
2354 tree dinit;
2356 /* Build a reference to the first location accessed by the
2357 inner-loop: *(BASE+INIT). (The first location is actually
2358 BASE+INIT+OFFSET, but we add OFFSET separately later. */
2359 tree inner_base = build_fold_indirect_ref
2360 (fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, init));
2362 if (vect_print_dump_info (REPORT_DETAILS))
2364 fprintf (dump_file, "analyze in outer-loop: ");
2365 print_generic_expr (dump_file, inner_base, TDF_SLIM);
2368 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2369 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2370 gcc_assert (outer_base != NULL_TREE);
2372 if (pbitpos % BITS_PER_UNIT != 0)
2374 if (vect_print_dump_info (REPORT_DETAILS))
2375 fprintf (dump_file, "failed: bit offset alignment.\n");
2376 return false;
2379 outer_base = build_fold_addr_expr (outer_base);
2380 if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
2382 if (vect_print_dump_info (REPORT_DETAILS))
2383 fprintf (dump_file, "failed: evolution of base is not affine.\n");
2384 return false;
2387 if (offset)
2389 if (poffset)
2390 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
2391 else
2392 poffset = offset;
2395 if (!poffset)
2397 offset_iv.base = ssize_int (0);
2398 offset_iv.step = ssize_int (0);
2400 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
2402 if (vect_print_dump_info (REPORT_DETAILS))
2403 fprintf (dump_file, "evolution of offset is not affine.\n");
2404 return false;
2407 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2408 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2409 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2410 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2411 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2413 outer_step = size_binop (PLUS_EXPR,
2414 fold_convert (ssizetype, base_iv.step),
2415 fold_convert (ssizetype, offset_iv.step));
2417 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2418 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2419 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2420 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2421 STMT_VINFO_DR_OFFSET (stmt_info) =
2422 fold_convert (ssizetype, offset_iv.base);
2423 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2424 size_int (highest_pow2_factor (offset_iv.base));
2426 if (dump_file && (dump_flags & TDF_DETAILS))
2428 fprintf (dump_file, "\touter base_address: ");
2429 print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2430 fprintf (dump_file, "\n\touter offset from base address: ");
2431 print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2432 fprintf (dump_file, "\n\touter constant offset from base address: ");
2433 print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2434 fprintf (dump_file, "\n\touter step: ");
2435 print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2436 fprintf (dump_file, "\n\touter aligned to: ");
2437 print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2441 if (STMT_VINFO_DATA_REF (stmt_info))
2443 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2445 fprintf (vect_dump,
2446 "not vectorized: more than one data ref in stmt: ");
2447 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2449 return false;
2451 STMT_VINFO_DATA_REF (stmt_info) = dr;
2453 /* Set vectype for STMT. */
2454 scalar_type = TREE_TYPE (DR_REF (dr));
2455 STMT_VINFO_VECTYPE (stmt_info) =
2456 get_vectype_for_scalar_type (scalar_type);
2457 if (!STMT_VINFO_VECTYPE (stmt_info))
2459 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2461 fprintf (vect_dump,
2462 "not vectorized: no vectype for stmt: ");
2463 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2464 fprintf (vect_dump, " scalar_type: ");
2465 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2467 return false;
2471 return true;
2475 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2477 /* Function vect_mark_relevant.
2479 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2481 static void
2482 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2483 enum vect_relevant relevant, bool live_p)
2485 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2486 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2487 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2489 if (vect_print_dump_info (REPORT_DETAILS))
2490 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
2492 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2494 tree pattern_stmt;
2496 /* This is the last stmt in a sequence that was detected as a
2497 pattern that can potentially be vectorized. Don't mark the stmt
2498 as relevant/live because it's not going to be vectorized.
2499 Instead mark the pattern-stmt that replaces it. */
2501 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2503 if (vect_print_dump_info (REPORT_DETAILS))
2504 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
2505 stmt_info = vinfo_for_stmt (pattern_stmt);
2506 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
2507 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2508 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2509 stmt = pattern_stmt;
2512 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2513 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
2514 STMT_VINFO_RELEVANT (stmt_info) = relevant;
2516 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
2517 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2519 if (vect_print_dump_info (REPORT_DETAILS))
2520 fprintf (vect_dump, "already marked relevant/live.");
2521 return;
2524 VEC_safe_push (tree, heap, *worklist, stmt);
2528 /* Function vect_stmt_relevant_p.
2530 Return true if STMT in loop that is represented by LOOP_VINFO is
2531 "relevant for vectorization".
2533 A stmt is considered "relevant for vectorization" if:
2534 - it has uses outside the loop.
2535 - it has vdefs (it alters memory).
2536 - control stmts in the loop (except for the exit condition).
2538 CHECKME: what other side effects would the vectorizer allow? */
2540 static bool
2541 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2542 enum vect_relevant *relevant, bool *live_p)
2544 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2545 ssa_op_iter op_iter;
2546 imm_use_iterator imm_iter;
2547 use_operand_p use_p;
2548 def_operand_p def_p;
2550 *relevant = vect_unused_in_loop;
2551 *live_p = false;
2553 /* cond stmt other than loop exit cond. */
2554 if (is_ctrl_stmt (stmt)
2555 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
2556 *relevant = vect_used_in_loop;
2558 /* changing memory. */
2559 if (TREE_CODE (stmt) != PHI_NODE)
2560 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2562 if (vect_print_dump_info (REPORT_DETAILS))
2563 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2564 *relevant = vect_used_in_loop;
2567 /* uses outside the loop. */
2568 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2570 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2572 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2573 if (!flow_bb_inside_loop_p (loop, bb))
2575 if (vect_print_dump_info (REPORT_DETAILS))
2576 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2578 /* We expect all such uses to be in the loop exit phis
2579 (because of loop closed form) */
2580 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2581 gcc_assert (bb == single_exit (loop)->dest);
2583 *live_p = true;
2588 return (*live_p || *relevant);
2593 Function process_use.
2595 Inputs:
2596 - a USE in STMT in a loop represented by LOOP_VINFO
2597 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
2598 that defined USE. This is dont by calling mark_relevant and passing it
2599 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
2601 Outputs:
2602 Generally, LIVE_P and RELEVANT are used to define the liveness and
2603 relevance info of the DEF_STMT of this USE:
2604 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2605 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2606 Exceptions:
2607 - case 1: If USE is used only for address computations (e.g. array indexing),
2608 which does not need to be directly vectorized, then the liveness/relevance
2609 of the respective DEF_STMT is left unchanged.
2610 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
2611 skip DEF_STMT cause it had already been processed.
2612 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
2613 be modified accordingly.
2615 Return true if everything is as expected. Return false otherwise. */
2617 static bool
2618 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
2619 enum vect_relevant relevant, VEC(tree,heap) **worklist)
2621 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2622 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2623 stmt_vec_info dstmt_vinfo;
2624 basic_block bb, def_bb;
2625 tree def, def_stmt;
2626 enum vect_def_type dt;
2628 /* case 1: we are only interested in uses that need to be vectorized. Uses
2629 that are used for address computation are not considered relevant. */
2630 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2631 return true;
2633 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2635 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2636 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2637 return false;
2640 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2641 return true;
2643 def_bb = bb_for_stmt (def_stmt);
2644 if (!flow_bb_inside_loop_p (loop, def_bb))
2646 if (vect_print_dump_info (REPORT_DETAILS))
2647 fprintf (vect_dump, "def_stmt is out of loop.");
2648 return true;
2651 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
2652 DEF_STMT must have already been processed, because this should be the
2653 only way that STMT, which is a reduction-phi, was put in the worklist,
2654 as there should be no other uses for DEF_STMT in the loop. So we just
2655 check that everything is as expected, and we are done. */
2656 dstmt_vinfo = vinfo_for_stmt (def_stmt);
2657 bb = bb_for_stmt (stmt);
2658 if (TREE_CODE (stmt) == PHI_NODE
2659 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2660 && TREE_CODE (def_stmt) != PHI_NODE
2661 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
2662 && bb->loop_father == def_bb->loop_father)
2664 if (vect_print_dump_info (REPORT_DETAILS))
2665 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
2666 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
2667 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
2668 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
2669 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
2670 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
2671 return true;
2674 /* case 3a: outer-loop stmt defining an inner-loop stmt:
2675 outer-loop-header-bb:
2676 d = def_stmt
2677 inner-loop:
2678 stmt # use (d)
2679 outer-loop-tail-bb:
2680 ... */
2681 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
2683 if (vect_print_dump_info (REPORT_DETAILS))
2684 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
2685 switch (relevant)
2687 case vect_unused_in_loop:
2688 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
2689 vect_used_by_reduction : vect_unused_in_loop;
2690 break;
2691 case vect_used_in_outer_by_reduction:
2692 relevant = vect_used_by_reduction;
2693 break;
2694 case vect_used_in_outer:
2695 relevant = vect_used_in_loop;
2696 break;
2697 case vect_used_by_reduction:
2698 case vect_used_in_loop:
2699 break;
2701 default:
2702 gcc_unreachable ();
2706 /* case 3b: inner-loop stmt defining an outer-loop stmt:
2707 outer-loop-header-bb:
2709 inner-loop:
2710 d = def_stmt
2711 outer-loop-tail-bb:
2712 stmt # use (d) */
2713 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
2715 if (vect_print_dump_info (REPORT_DETAILS))
2716 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
2717 switch (relevant)
2719 case vect_unused_in_loop:
2720 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
2721 vect_used_in_outer_by_reduction : vect_unused_in_loop;
2722 break;
2724 case vect_used_in_outer_by_reduction:
2725 case vect_used_in_outer:
2726 break;
2728 case vect_used_by_reduction:
2729 relevant = vect_used_in_outer_by_reduction;
2730 break;
2732 case vect_used_in_loop:
2733 relevant = vect_used_in_outer;
2734 break;
2736 default:
2737 gcc_unreachable ();
2741 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
2742 return true;
2746 /* Function vect_mark_stmts_to_be_vectorized.
2748 Not all stmts in the loop need to be vectorized. For example:
2750 for i...
2751 for j...
2752 1. T0 = i + j
2753 2. T1 = a[T0]
2755 3. j = j + 1
2757 Stmt 1 and 3 do not need to be vectorized, because loop control and
2758 addressing of vectorized data-refs are handled differently.
2760 This pass detects such stmts. */
2762 static bool
2763 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2765 VEC(tree,heap) *worklist;
2766 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2767 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2768 unsigned int nbbs = loop->num_nodes;
2769 block_stmt_iterator si;
2770 tree stmt;
2771 stmt_ann_t ann;
2772 unsigned int i;
2773 stmt_vec_info stmt_vinfo;
2774 basic_block bb;
2775 tree phi;
2776 bool live_p;
2777 enum vect_relevant relevant;
2779 if (vect_print_dump_info (REPORT_DETAILS))
2780 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2782 worklist = VEC_alloc (tree, heap, 64);
2784 /* 1. Init worklist. */
2785 for (i = 0; i < nbbs; i++)
2787 bb = bbs[i];
2788 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2790 if (vect_print_dump_info (REPORT_DETAILS))
2792 fprintf (vect_dump, "init: phi relevant? ");
2793 print_generic_expr (vect_dump, phi, TDF_SLIM);
2796 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
2797 vect_mark_relevant (&worklist, phi, relevant, live_p);
2799 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2801 stmt = bsi_stmt (si);
2802 if (vect_print_dump_info (REPORT_DETAILS))
2804 fprintf (vect_dump, "init: stmt relevant? ");
2805 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2808 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
2809 vect_mark_relevant (&worklist, stmt, relevant, live_p);
2813 /* 2. Process_worklist */
2814 while (VEC_length (tree, worklist) > 0)
2816 use_operand_p use_p;
2817 ssa_op_iter iter;
2819 stmt = VEC_pop (tree, worklist);
2820 if (vect_print_dump_info (REPORT_DETAILS))
2822 fprintf (vect_dump, "worklist: examine stmt: ");
2823 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2826 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
2827 (DEF_STMT) as relevant/irrelevant and live/dead according to the
2828 liveness and relevance properties of STMT. */
2829 ann = stmt_ann (stmt);
2830 stmt_vinfo = vinfo_for_stmt (stmt);
2831 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
2832 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2834 /* Generally, the liveness and relevance properties of STMT are
2835 propagated as is to the DEF_STMTs of its USEs:
2836 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
2837 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
2839 One exception is when STMT has been identified as defining a reduction
2840 variable; in this case we set the liveness/relevance as follows:
2841 live_p = false
2842 relevant = vect_used_by_reduction
2843 This is because we distinguish between two kinds of relevant stmts -
2844 those that are used by a reduction computation, and those that are
2845 (also) used by a regular computation. This allows us later on to
2846 identify stmts that are used solely by a reduction, and therefore the
2847 order of the results that they produce does not have to be kept.
2849 Reduction phis are expected to be used by a reduction stmt, or by
2850 in an outer loop; Other reduction stmts are expected to be
2851 in the loop, and possibly used by a stmt in an outer loop.
2852 Here are the expected values of "relevant" for reduction phis/stmts:
2854 relevance: phi stmt
2855 vect_unused_in_loop ok
2856 vect_used_in_outer_by_reduction ok ok
2857 vect_used_in_outer ok ok
2858 vect_used_by_reduction ok
2859 vect_used_in_loop */
2861 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2863 enum vect_relevant tmp_relevant = relevant;
2864 switch (tmp_relevant)
2866 case vect_unused_in_loop:
2867 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2868 relevant = vect_used_by_reduction;
2869 break;
2871 case vect_used_in_outer_by_reduction:
2872 case vect_used_in_outer:
2873 gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR
2874 && TREE_CODE (stmt) != DOT_PROD_EXPR);
2875 break;
2877 case vect_used_by_reduction:
2878 if (TREE_CODE (stmt) == PHI_NODE)
2879 break;
2880 /* fall through */
2881 case vect_used_in_loop:
2882 default:
2883 if (vect_print_dump_info (REPORT_DETAILS))
2884 fprintf (vect_dump, "unsupported use of reduction.");
2885 VEC_free (tree, heap, worklist);
2886 return false;
2888 live_p = false;
2891 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2893 tree op = USE_FROM_PTR (use_p);
2894 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
2896 VEC_free (tree, heap, worklist);
2897 return false;
2900 } /* while worklist */
2902 VEC_free (tree, heap, worklist);
2903 return true;
2907 /* Function vect_can_advance_ivs_p
2909 In case the number of iterations that LOOP iterates is unknown at compile
2910 time, an epilog loop will be generated, and the loop induction variables
2911 (IVs) will be "advanced" to the value they are supposed to take just before
2912 the epilog loop. Here we check that the access function of the loop IVs
2913 and the expression that represents the loop bound are simple enough.
2914 These restrictions will be relaxed in the future. */
2916 static bool
2917 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2919 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2920 basic_block bb = loop->header;
2921 tree phi;
2923 /* Analyze phi functions of the loop header. */
2925 if (vect_print_dump_info (REPORT_DETAILS))
2926 fprintf (vect_dump, "vect_can_advance_ivs_p:");
2928 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2930 tree access_fn = NULL;
2931 tree evolution_part;
2933 if (vect_print_dump_info (REPORT_DETAILS))
2935 fprintf (vect_dump, "Analyze phi: ");
2936 print_generic_expr (vect_dump, phi, TDF_SLIM);
2939 /* Skip virtual phi's. The data dependences that are associated with
2940 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2942 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2944 if (vect_print_dump_info (REPORT_DETAILS))
2945 fprintf (vect_dump, "virtual phi. skip.");
2946 continue;
2949 /* Skip reduction phis. */
2951 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2953 if (vect_print_dump_info (REPORT_DETAILS))
2954 fprintf (vect_dump, "reduc phi. skip.");
2955 continue;
2958 /* Analyze the evolution function. */
2960 access_fn = instantiate_parameters
2961 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2963 if (!access_fn)
2965 if (vect_print_dump_info (REPORT_DETAILS))
2966 fprintf (vect_dump, "No Access function.");
2967 return false;
2970 if (vect_print_dump_info (REPORT_DETAILS))
2972 fprintf (vect_dump, "Access function of PHI: ");
2973 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2976 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2978 if (evolution_part == NULL_TREE)
2980 if (vect_print_dump_info (REPORT_DETAILS))
2981 fprintf (vect_dump, "No evolution.");
2982 return false;
2985 /* FORNOW: We do not transform initial conditions of IVs
2986 which evolution functions are a polynomial of degree >= 2. */
2988 if (tree_is_chrec (evolution_part))
2989 return false;
2992 return true;
2996 /* Function vect_get_loop_niters.
2998 Determine how many iterations the loop is executed.
2999 If an expression that represents the number of iterations
3000 can be constructed, place it in NUMBER_OF_ITERATIONS.
3001 Return the loop exit condition. */
3003 static tree
3004 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
3006 tree niters;
3008 if (vect_print_dump_info (REPORT_DETAILS))
3009 fprintf (vect_dump, "=== get_loop_niters ===");
3011 niters = number_of_exit_cond_executions (loop);
3013 if (niters != NULL_TREE
3014 && niters != chrec_dont_know)
3016 *number_of_iterations = niters;
3018 if (vect_print_dump_info (REPORT_DETAILS))
3020 fprintf (vect_dump, "==> get_loop_niters:" );
3021 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
3025 return get_loop_exit_condition (loop);
3029 /* Function vect_analyze_loop_1.
3031 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3032 for it. The different analyses will record information in the
3033 loop_vec_info struct. This is a subset of the analyses applied in
3034 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3035 that is now considered for (outer-loop) vectorization. */
3037 static loop_vec_info
3038 vect_analyze_loop_1 (struct loop *loop)
3040 loop_vec_info loop_vinfo;
3042 if (vect_print_dump_info (REPORT_DETAILS))
3043 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
3045 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3047 loop_vinfo = vect_analyze_loop_form (loop);
3048 if (!loop_vinfo)
3050 if (vect_print_dump_info (REPORT_DETAILS))
3051 fprintf (vect_dump, "bad inner-loop form.");
3052 return NULL;
3055 return loop_vinfo;
3059 /* Function vect_analyze_loop_form.
3061 Verify that certain CFG restrictions hold, including:
3062 - the loop has a pre-header
3063 - the loop has a single entry and exit
3064 - the loop exit condition is simple enough, and the number of iterations
3065 can be analyzed (a countable loop). */
3067 static loop_vec_info
3068 vect_analyze_loop_form (struct loop *loop)
3070 loop_vec_info loop_vinfo;
3071 tree loop_cond;
3072 tree number_of_iterations = NULL;
3073 loop_vec_info inner_loop_vinfo = NULL;
3075 if (vect_print_dump_info (REPORT_DETAILS))
3076 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
3078 /* Different restrictions apply when we are considering an inner-most loop,
3079 vs. an outer (nested) loop.
3080 (FORNOW. May want to relax some of these restrictions in the future). */
3082 if (!loop->inner)
3084 /* Inner-most loop. We currently require that the number of BBs is
3085 exactly 2 (the header and latch). Vectorizable inner-most loops
3086 look like this:
3088 (pre-header)
3090 header <--------+
3091 | | |
3092 | +--> latch --+
3094 (exit-bb) */
3096 if (loop->num_nodes != 2)
3098 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3099 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
3100 return NULL;
3103 if (empty_block_p (loop->header))
3105 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3106 fprintf (vect_dump, "not vectorized: empty loop.");
3107 return NULL;
3110 else
3112 struct loop *innerloop = loop->inner;
3113 edge backedge, entryedge;
3115 /* Nested loop. We currently require that the loop is doubly-nested,
3116 contains a single inner loop, and the number of BBs is exactly 5.
3117 Vectorizable outer-loops look like this:
3119 (pre-header)
3121 header <---+
3123 inner-loop |
3125 tail ------+
3127 (exit-bb)
3129 The inner-loop has the properties expected of inner-most loops
3130 as described above. */
3132 if ((loop->inner)->inner || (loop->inner)->next)
3134 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3135 fprintf (vect_dump, "not vectorized: multiple nested loops.");
3136 return NULL;
3139 /* Analyze the inner-loop. */
3140 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
3141 if (!inner_loop_vinfo)
3143 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3144 fprintf (vect_dump, "not vectorized: Bad inner loop.");
3145 return NULL;
3148 if (!expr_invariant_in_loop_p (loop,
3149 LOOP_VINFO_NITERS (inner_loop_vinfo)))
3151 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3152 fprintf (vect_dump,
3153 "not vectorized: inner-loop count not invariant.");
3154 destroy_loop_vec_info (inner_loop_vinfo, true);
3155 return NULL;
3158 if (loop->num_nodes != 5)
3160 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3161 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
3162 destroy_loop_vec_info (inner_loop_vinfo, true);
3163 return NULL;
3166 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
3167 backedge = EDGE_PRED (innerloop->header, 1);
3168 entryedge = EDGE_PRED (innerloop->header, 0);
3169 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
3171 backedge = EDGE_PRED (innerloop->header, 0);
3172 entryedge = EDGE_PRED (innerloop->header, 1);
3175 if (entryedge->src != loop->header
3176 || !single_exit (innerloop)
3177 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
3179 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3180 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
3181 destroy_loop_vec_info (inner_loop_vinfo, true);
3182 return NULL;
3185 if (vect_print_dump_info (REPORT_DETAILS))
3186 fprintf (vect_dump, "Considering outer-loop vectorization.");
3189 if (!single_exit (loop)
3190 || EDGE_COUNT (loop->header->preds) != 2)
3192 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3194 if (!single_exit (loop))
3195 fprintf (vect_dump, "not vectorized: multiple exits.");
3196 else if (EDGE_COUNT (loop->header->preds) != 2)
3197 fprintf (vect_dump, "not vectorized: too many incoming edges.");
3199 if (inner_loop_vinfo)
3200 destroy_loop_vec_info (inner_loop_vinfo, true);
3201 return NULL;
3204 /* We assume that the loop exit condition is at the end of the loop. i.e,
3205 that the loop is represented as a do-while (with a proper if-guard
3206 before the loop if needed), where the loop header contains all the
3207 executable statements, and the latch is empty. */
3208 if (!empty_block_p (loop->latch)
3209 || phi_nodes (loop->latch))
3211 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3212 fprintf (vect_dump, "not vectorized: unexpected loop form.");
3213 if (inner_loop_vinfo)
3214 destroy_loop_vec_info (inner_loop_vinfo, true);
3215 return NULL;
3218 /* Make sure there exists a single-predecessor exit bb: */
3219 if (!single_pred_p (single_exit (loop)->dest))
3221 edge e = single_exit (loop);
3222 if (!(e->flags & EDGE_ABNORMAL))
3224 split_loop_exit_edge (e);
3225 if (vect_print_dump_info (REPORT_DETAILS))
3226 fprintf (vect_dump, "split exit edge.");
3228 else
3230 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3231 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
3232 if (inner_loop_vinfo)
3233 destroy_loop_vec_info (inner_loop_vinfo, true);
3234 return NULL;
3238 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
3239 if (!loop_cond)
3241 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3242 fprintf (vect_dump, "not vectorized: complicated exit condition.");
3243 if (inner_loop_vinfo)
3244 destroy_loop_vec_info (inner_loop_vinfo, true);
3245 return NULL;
3248 if (!number_of_iterations)
3250 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3251 fprintf (vect_dump,
3252 "not vectorized: number of iterations cannot be computed.");
3253 if (inner_loop_vinfo)
3254 destroy_loop_vec_info (inner_loop_vinfo, true);
3255 return NULL;
3258 if (chrec_contains_undetermined (number_of_iterations))
3260 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3261 fprintf (vect_dump, "Infinite number of iterations.");
3262 if (inner_loop_vinfo)
3263 destroy_loop_vec_info (inner_loop_vinfo, true);
3264 return NULL;
3267 if (!NITERS_KNOWN_P (number_of_iterations))
3269 if (vect_print_dump_info (REPORT_DETAILS))
3271 fprintf (vect_dump, "Symbolic number of iterations is ");
3272 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
3275 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
3277 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3278 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
3279 if (inner_loop_vinfo)
3280 destroy_loop_vec_info (inner_loop_vinfo, false);
3281 return NULL;
3284 loop_vinfo = new_loop_vec_info (loop);
3285 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
3287 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
3289 /* CHECKME: May want to keep it around it in the future. */
3290 if (inner_loop_vinfo)
3291 destroy_loop_vec_info (inner_loop_vinfo, false);
3293 gcc_assert (!loop->aux);
3294 loop->aux = loop_vinfo;
3295 return loop_vinfo;
3299 /* Function vect_analyze_loop.
3301 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3302 for it. The different analyses will record information in the
3303 loop_vec_info struct. */
3304 loop_vec_info
3305 vect_analyze_loop (struct loop *loop)
3307 bool ok;
3308 loop_vec_info loop_vinfo;
3310 if (vect_print_dump_info (REPORT_DETAILS))
3311 fprintf (vect_dump, "===== analyze_loop_nest =====");
3313 if (loop_outer (loop)
3314 && loop_vec_info_for_loop (loop_outer (loop))
3315 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
3317 if (vect_print_dump_info (REPORT_DETAILS))
3318 fprintf (vect_dump, "outer-loop already vectorized.");
3319 return NULL;
3322 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3324 loop_vinfo = vect_analyze_loop_form (loop);
3325 if (!loop_vinfo)
3327 if (vect_print_dump_info (REPORT_DETAILS))
3328 fprintf (vect_dump, "bad loop form.");
3329 return NULL;
3332 /* Find all data references in the loop (which correspond to vdefs/vuses)
3333 and analyze their evolution in the loop.
3335 FORNOW: Handle only simple, array references, which
3336 alignment can be forced, and aligned pointer-references. */
3338 ok = vect_analyze_data_refs (loop_vinfo);
3339 if (!ok)
3341 if (vect_print_dump_info (REPORT_DETAILS))
3342 fprintf (vect_dump, "bad data references.");
3343 destroy_loop_vec_info (loop_vinfo, true);
3344 return NULL;
3347 /* Classify all cross-iteration scalar data-flow cycles.
3348 Cross-iteration cycles caused by virtual phis are analyzed separately. */
3350 vect_analyze_scalar_cycles (loop_vinfo);
3352 vect_pattern_recog (loop_vinfo);
3354 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
3356 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
3357 if (!ok)
3359 if (vect_print_dump_info (REPORT_DETAILS))
3360 fprintf (vect_dump, "unexpected pattern.");
3361 destroy_loop_vec_info (loop_vinfo, true);
3362 return NULL;
3365 /* Analyze the alignment of the data-refs in the loop.
3366 Fail if a data reference is found that cannot be vectorized. */
3368 ok = vect_analyze_data_refs_alignment (loop_vinfo);
3369 if (!ok)
3371 if (vect_print_dump_info (REPORT_DETAILS))
3372 fprintf (vect_dump, "bad data alignment.");
3373 destroy_loop_vec_info (loop_vinfo, true);
3374 return NULL;
3377 ok = vect_determine_vectorization_factor (loop_vinfo);
3378 if (!ok)
3380 if (vect_print_dump_info (REPORT_DETAILS))
3381 fprintf (vect_dump, "can't determine vectorization factor.");
3382 destroy_loop_vec_info (loop_vinfo, true);
3383 return NULL;
3386 /* Analyze data dependences between the data-refs in the loop.
3387 FORNOW: fail at the first data dependence that we encounter. */
3389 ok = vect_analyze_data_ref_dependences (loop_vinfo);
3390 if (!ok)
3392 if (vect_print_dump_info (REPORT_DETAILS))
3393 fprintf (vect_dump, "bad data dependence.");
3394 destroy_loop_vec_info (loop_vinfo, true);
3395 return NULL;
3398 /* Analyze the access patterns of the data-refs in the loop (consecutive,
3399 complex, etc.). FORNOW: Only handle consecutive access pattern. */
3401 ok = vect_analyze_data_ref_accesses (loop_vinfo);
3402 if (!ok)
3404 if (vect_print_dump_info (REPORT_DETAILS))
3405 fprintf (vect_dump, "bad data access.");
3406 destroy_loop_vec_info (loop_vinfo, true);
3407 return NULL;
3410 /* This pass will decide on using loop versioning and/or loop peeling in
3411 order to enhance the alignment of data references in the loop. */
3413 ok = vect_enhance_data_refs_alignment (loop_vinfo);
3414 if (!ok)
3416 if (vect_print_dump_info (REPORT_DETAILS))
3417 fprintf (vect_dump, "bad data alignment.");
3418 destroy_loop_vec_info (loop_vinfo, true);
3419 return NULL;
3422 /* Scan all the operations in the loop and make sure they are
3423 vectorizable. */
3425 ok = vect_analyze_operations (loop_vinfo);
3426 if (!ok)
3428 if (vect_print_dump_info (REPORT_DETAILS))
3429 fprintf (vect_dump, "bad operation or unsupported loop bound.");
3430 destroy_loop_vec_info (loop_vinfo, true);
3431 return NULL;
3434 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
3436 return loop_vinfo;