ChangeLog/libgcc
[official-gcc.git] / gcc / tree-vect-analyze.c
blobb4cd79d5b6ddc85c56da68d190fa06b6c722df7c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41 #include "toplev.h"
43 /* Main analysis functions. */
44 static loop_vec_info vect_analyze_loop_form (struct loop *);
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
78 in the loop.
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
81 original loop:
82 for (i=0; i<N; i++){
83 a[i] = b[i] + c[i];
86 vectorized loop:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
92 static bool
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
100 tree scalar_type;
101 tree phi;
102 tree vectype;
103 unsigned int nunits;
104 stmt_vec_info stmt_info;
105 int i;
107 if (vect_print_dump_info (REPORT_DETAILS))
108 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
110 for (i = 0; i < nbbs; i++)
112 basic_block bb = bbs[i];
114 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
116 stmt_info = vinfo_for_stmt (phi);
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining phi: ");
120 print_generic_expr (vect_dump, phi, TDF_SLIM);
123 gcc_assert (stmt_info);
125 if (STMT_VINFO_RELEVANT_P (stmt_info))
127 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
128 scalar_type = TREE_TYPE (PHI_RESULT (phi));
130 if (vect_print_dump_info (REPORT_DETAILS))
132 fprintf (vect_dump, "get vectype for scalar type: ");
133 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
136 vectype = get_vectype_for_scalar_type (scalar_type);
137 if (!vectype)
139 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
141 fprintf (vect_dump,
142 "not vectorized: unsupported data-type ");
143 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
145 return false;
147 STMT_VINFO_VECTYPE (stmt_info) = vectype;
149 if (vect_print_dump_info (REPORT_DETAILS))
151 fprintf (vect_dump, "vectype: ");
152 print_generic_expr (vect_dump, vectype, TDF_SLIM);
155 nunits = TYPE_VECTOR_SUBPARTS (vectype);
156 if (vect_print_dump_info (REPORT_DETAILS))
157 fprintf (vect_dump, "nunits = %d", nunits);
159 if (!vectorization_factor
160 || (nunits > vectorization_factor))
161 vectorization_factor = nunits;
165 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
167 tree stmt = bsi_stmt (si);
168 stmt_info = vinfo_for_stmt (stmt);
170 if (vect_print_dump_info (REPORT_DETAILS))
172 fprintf (vect_dump, "==> examining statement: ");
173 print_generic_expr (vect_dump, stmt, TDF_SLIM);
176 gcc_assert (stmt_info);
178 /* skip stmts which do not need to be vectorized. */
179 if (!STMT_VINFO_RELEVANT_P (stmt_info)
180 && !STMT_VINFO_LIVE_P (stmt_info))
182 if (vect_print_dump_info (REPORT_DETAILS))
183 fprintf (vect_dump, "skip.");
184 continue;
187 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
189 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
191 fprintf (vect_dump, "not vectorized: irregular stmt.");
192 print_generic_expr (vect_dump, stmt, TDF_SLIM);
194 return false;
197 if (!GIMPLE_STMT_P (stmt)
198 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
200 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
202 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
203 print_generic_expr (vect_dump, stmt, TDF_SLIM);
205 return false;
208 if (STMT_VINFO_VECTYPE (stmt_info))
210 /* The only case when a vectype had been already set is for stmts
211 that contain a dataref, or for "pattern-stmts" (stmts generated
212 by the vectorizer to represent/replace a certain idiom). */
213 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
214 || is_pattern_stmt_p (stmt_info));
215 vectype = STMT_VINFO_VECTYPE (stmt_info);
217 else
219 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
220 && !is_pattern_stmt_p (stmt_info));
222 /* We set the vectype according to the type of the result (lhs).
223 For stmts whose result-type is different than the type of the
224 arguments (e.g. demotion, promotion), vectype will be reset
225 appropriately (later). Note that we have to visit the smallest
226 datatype in this function, because that determines the VF.
227 If the smallest datatype in the loop is present only as the
228 rhs of a promotion operation - we'd miss it here.
229 However, in such a case, that a variable of this datatype
230 does not appear in the lhs anywhere in the loop, it shouldn't
231 affect the vectorization factor. */
232 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
234 if (vect_print_dump_info (REPORT_DETAILS))
236 fprintf (vect_dump, "get vectype for scalar type: ");
237 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
240 vectype = get_vectype_for_scalar_type (scalar_type);
241 if (!vectype)
243 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
245 fprintf (vect_dump,
246 "not vectorized: unsupported data-type ");
247 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
249 return false;
251 STMT_VINFO_VECTYPE (stmt_info) = vectype;
254 if (vect_print_dump_info (REPORT_DETAILS))
256 fprintf (vect_dump, "vectype: ");
257 print_generic_expr (vect_dump, vectype, TDF_SLIM);
260 nunits = TYPE_VECTOR_SUBPARTS (vectype);
261 if (vect_print_dump_info (REPORT_DETAILS))
262 fprintf (vect_dump, "nunits = %d", nunits);
264 if (!vectorization_factor
265 || (nunits > vectorization_factor))
266 vectorization_factor = nunits;
271 /* TODO: Analyze cost. Decide if worth while to vectorize. */
272 if (vect_print_dump_info (REPORT_DETAILS))
273 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
274 if (vectorization_factor <= 1)
276 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
277 fprintf (vect_dump, "not vectorized: unsupported data-type");
278 return false;
280 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
282 return true;
286 /* Function vect_analyze_operations.
288 Scan the loop stmts and make sure they are all vectorizable. */
290 static bool
291 vect_analyze_operations (loop_vec_info loop_vinfo)
293 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
294 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
295 int nbbs = loop->num_nodes;
296 block_stmt_iterator si;
297 unsigned int vectorization_factor = 0;
298 int i;
299 bool ok;
300 tree phi;
301 stmt_vec_info stmt_info;
302 bool need_to_vectorize = false;
303 int min_profitable_iters;
304 int min_scalar_loop_bound;
305 unsigned int th;
307 if (vect_print_dump_info (REPORT_DETAILS))
308 fprintf (vect_dump, "=== vect_analyze_operations ===");
310 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
311 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
313 for (i = 0; i < nbbs; i++)
315 basic_block bb = bbs[i];
317 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
319 ok = true;
321 stmt_info = vinfo_for_stmt (phi);
322 if (vect_print_dump_info (REPORT_DETAILS))
324 fprintf (vect_dump, "examining phi: ");
325 print_generic_expr (vect_dump, phi, TDF_SLIM);
328 gcc_assert (stmt_info);
330 if (STMT_VINFO_LIVE_P (stmt_info))
332 /* FORNOW: not yet supported. */
333 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
334 fprintf (vect_dump, "not vectorized: value used after loop.");
335 return false;
338 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
339 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
341 /* A scalar-dependence cycle that we don't support. */
342 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
343 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
344 return false;
347 if (STMT_VINFO_RELEVANT_P (stmt_info))
349 need_to_vectorize = true;
350 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
351 ok = vectorizable_induction (phi, NULL, NULL);
354 if (!ok)
356 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
358 fprintf (vect_dump,
359 "not vectorized: relevant phi not supported: ");
360 print_generic_expr (vect_dump, phi, TDF_SLIM);
362 return false;
366 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
368 tree stmt = bsi_stmt (si);
369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
370 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
372 if (vect_print_dump_info (REPORT_DETAILS))
374 fprintf (vect_dump, "==> examining statement: ");
375 print_generic_expr (vect_dump, stmt, TDF_SLIM);
378 gcc_assert (stmt_info);
380 /* skip stmts which do not need to be vectorized.
381 this is expected to include:
382 - the COND_EXPR which is the loop exit condition
383 - any LABEL_EXPRs in the loop
384 - computations that are used only for array indexing or loop
385 control */
387 if (!STMT_VINFO_RELEVANT_P (stmt_info)
388 && !STMT_VINFO_LIVE_P (stmt_info))
390 if (vect_print_dump_info (REPORT_DETAILS))
391 fprintf (vect_dump, "irrelevant.");
392 continue;
395 switch (STMT_VINFO_DEF_TYPE (stmt_info))
397 case vect_loop_def:
398 break;
400 case vect_reduction_def:
401 gcc_assert (relevance == vect_unused_in_loop);
402 break;
404 case vect_induction_def:
405 case vect_constant_def:
406 case vect_invariant_def:
407 case vect_unknown_def_type:
408 default:
409 gcc_unreachable ();
412 if (STMT_VINFO_RELEVANT_P (stmt_info))
414 gcc_assert (GIMPLE_STMT_P (stmt)
415 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
416 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
417 need_to_vectorize = true;
420 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
421 || vectorizable_type_demotion (stmt, NULL, NULL)
422 || vectorizable_conversion (stmt, NULL, NULL)
423 || vectorizable_operation (stmt, NULL, NULL)
424 || vectorizable_assignment (stmt, NULL, NULL)
425 || vectorizable_load (stmt, NULL, NULL)
426 || vectorizable_call (stmt, NULL, NULL)
427 || vectorizable_store (stmt, NULL, NULL)
428 || vectorizable_condition (stmt, NULL, NULL)
429 || vectorizable_reduction (stmt, NULL, NULL));
431 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
432 need extra handling, except for vectorizable reductions. */
433 if (STMT_VINFO_LIVE_P (stmt_info)
434 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
435 ok |= vectorizable_live_operation (stmt, NULL, NULL);
437 if (!ok)
439 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
441 fprintf (vect_dump, "not vectorized: stmt not supported: ");
442 print_generic_expr (vect_dump, stmt, TDF_SLIM);
444 return false;
446 } /* stmts in bb */
447 } /* bbs */
449 /* All operations in the loop are either irrelevant (deal with loop
450 control, or dead), or only used outside the loop and can be moved
451 out of the loop (e.g. invariants, inductions). The loop can be
452 optimized away by scalar optimizations. We're better off not
453 touching this loop. */
454 if (!need_to_vectorize)
456 if (vect_print_dump_info (REPORT_DETAILS))
457 fprintf (vect_dump,
458 "All the computation can be taken out of the loop.");
459 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
460 fprintf (vect_dump,
461 "not vectorized: redundant loop. no profit to vectorize.");
462 return false;
465 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
466 && vect_print_dump_info (REPORT_DETAILS))
467 fprintf (vect_dump,
468 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
469 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
471 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
472 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
474 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
475 fprintf (vect_dump, "not vectorized: iteration count too small.");
476 if (vect_print_dump_info (REPORT_DETAILS))
477 fprintf (vect_dump,"not vectorized: iteration count smaller than "
478 "vectorization factor.");
479 return false;
482 /* Analyze cost. Decide if worth while to vectorize. */
484 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
486 if (min_profitable_iters < 0)
488 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
489 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
490 if (vect_print_dump_info (REPORT_DETAILS))
491 fprintf (vect_dump, "not vectorized: vector version will never be "
492 "profitable.");
493 return false;
496 min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
497 * vectorization_factor;
499 /* Use the cost model only if it is more conservative than user specified
500 threshold. */
502 th = (unsigned) min_scalar_loop_bound;
503 if (min_profitable_iters
504 && (!min_scalar_loop_bound
505 || min_profitable_iters > min_scalar_loop_bound))
506 th = (unsigned) min_profitable_iters;
508 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
509 && LOOP_VINFO_INT_NITERS (loop_vinfo) < th)
511 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
512 fprintf (vect_dump, "not vectorized: vectorization not "
513 "profitable.");
514 if (vect_print_dump_info (REPORT_DETAILS))
515 fprintf (vect_dump, "not vectorized: iteration count smaller than "
516 "user specified loop bound parameter or minimum "
517 "profitable iterations (whichever is more conservative).");
518 return false;
521 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
522 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
523 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
525 if (vect_print_dump_info (REPORT_DETAILS))
526 fprintf (vect_dump, "epilog loop required.");
527 if (!vect_can_advance_ivs_p (loop_vinfo))
529 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
530 fprintf (vect_dump,
531 "not vectorized: can't create epilog loop 1.");
532 return false;
534 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
536 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
537 fprintf (vect_dump,
538 "not vectorized: can't create epilog loop 2.");
539 return false;
543 return true;
547 /* Function exist_non_indexing_operands_for_use_p
549 USE is one of the uses attached to STMT. Check if USE is
550 used in STMT for anything other than indexing an array. */
552 static bool
553 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
555 tree operand;
556 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
558 /* USE corresponds to some operand in STMT. If there is no data
559 reference in STMT, then any operand that corresponds to USE
560 is not indexing an array. */
561 if (!STMT_VINFO_DATA_REF (stmt_info))
562 return true;
564 /* STMT has a data_ref. FORNOW this means that its of one of
565 the following forms:
566 -1- ARRAY_REF = var
567 -2- var = ARRAY_REF
568 (This should have been verified in analyze_data_refs).
570 'var' in the second case corresponds to a def, not a use,
571 so USE cannot correspond to any operands that are not used
572 for array indexing.
574 Therefore, all we need to check is if STMT falls into the
575 first case, and whether var corresponds to USE. */
577 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
578 return false;
580 operand = GIMPLE_STMT_OPERAND (stmt, 1);
582 if (TREE_CODE (operand) != SSA_NAME)
583 return false;
585 if (operand == use)
586 return true;
588 return false;
592 /* Function vect_analyze_scalar_cycles.
594 Examine the cross iteration def-use cycles of scalar variables, by
595 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
596 following: invariant, induction, reduction, unknown.
598 Some forms of scalar cycles are not yet supported.
600 Example1: reduction: (unsupported yet)
602 loop1:
603 for (i=0; i<N; i++)
604 sum += a[i];
606 Example2: induction: (unsupported yet)
608 loop2:
609 for (i=0; i<N; i++)
610 a[i] = i;
612 Note: the following loop *is* vectorizable:
614 loop3:
615 for (i=0; i<N; i++)
616 a[i] = b[i];
618 even though it has a def-use cycle caused by the induction variable i:
620 loop: i_2 = PHI (i_0, i_1)
621 a[i_2] = ...;
622 i_1 = i_2 + 1;
623 GOTO loop;
625 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
626 it does not need to be vectorized because it is only used for array
627 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
628 loop2 on the other hand is relevant (it is being written to memory).
631 static void
632 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
634 tree phi;
635 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
636 basic_block bb = loop->header;
637 tree dumy;
638 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
640 if (vect_print_dump_info (REPORT_DETAILS))
641 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
643 /* First - identify all inductions. */
644 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
646 tree access_fn = NULL;
647 tree def = PHI_RESULT (phi);
648 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
650 if (vect_print_dump_info (REPORT_DETAILS))
652 fprintf (vect_dump, "Analyze phi: ");
653 print_generic_expr (vect_dump, phi, TDF_SLIM);
656 /* Skip virtual phi's. The data dependences that are associated with
657 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
658 if (!is_gimple_reg (SSA_NAME_VAR (def)))
659 continue;
661 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
663 /* Analyze the evolution function. */
664 access_fn = analyze_scalar_evolution (loop, def);
665 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
667 fprintf (vect_dump, "Access function of PHI: ");
668 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
671 if (!access_fn
672 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
674 VEC_safe_push (tree, heap, worklist, phi);
675 continue;
678 if (vect_print_dump_info (REPORT_DETAILS))
679 fprintf (vect_dump, "Detected induction.");
680 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
684 /* Second - identify all reductions. */
685 while (VEC_length (tree, worklist) > 0)
687 tree phi = VEC_pop (tree, worklist);
688 tree def = PHI_RESULT (phi);
689 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
690 tree reduc_stmt;
692 if (vect_print_dump_info (REPORT_DETAILS))
694 fprintf (vect_dump, "Analyze phi: ");
695 print_generic_expr (vect_dump, phi, TDF_SLIM);
698 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
699 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
701 reduc_stmt = vect_is_simple_reduction (loop, phi);
702 if (reduc_stmt)
704 if (vect_print_dump_info (REPORT_DETAILS))
705 fprintf (vect_dump, "Detected reduction.");
706 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
707 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
708 vect_reduction_def;
710 else
711 if (vect_print_dump_info (REPORT_DETAILS))
712 fprintf (vect_dump, "Unknown def-use cycle pattern.");
715 VEC_free (tree, heap, worklist);
716 return;
720 /* Function vect_insert_into_interleaving_chain.
722 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
724 static void
725 vect_insert_into_interleaving_chain (struct data_reference *dra,
726 struct data_reference *drb)
728 tree prev, next, next_init;
729 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
730 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
732 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
733 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
734 while (next)
736 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
737 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
739 /* Insert here. */
740 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
741 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
742 return;
744 prev = next;
745 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
748 /* We got to the end of the list. Insert here. */
749 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
750 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
754 /* Function vect_update_interleaving_chain.
756 For two data-refs DRA and DRB that are a part of a chain interleaved data
757 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
759 There are four possible cases:
760 1. New stmts - both DRA and DRB are not a part of any chain:
761 FIRST_DR = DRB
762 NEXT_DR (DRB) = DRA
763 2. DRB is a part of a chain and DRA is not:
764 no need to update FIRST_DR
765 no need to insert DRB
766 insert DRA according to init
767 3. DRA is a part of a chain and DRB is not:
768 if (init of FIRST_DR > init of DRB)
769 FIRST_DR = DRB
770 NEXT(FIRST_DR) = previous FIRST_DR
771 else
772 insert DRB according to its init
773 4. both DRA and DRB are in some interleaving chains:
774 choose the chain with the smallest init of FIRST_DR
775 insert the nodes of the second chain into the first one. */
777 static void
778 vect_update_interleaving_chain (struct data_reference *drb,
779 struct data_reference *dra)
781 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
782 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
783 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
784 tree node, prev, next, node_init, first_stmt;
786 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
787 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
789 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
790 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
791 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
792 return;
795 /* 2. DRB is a part of a chain and DRA is not. */
796 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
798 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
799 /* Insert DRA into the chain of DRB. */
800 vect_insert_into_interleaving_chain (dra, drb);
801 return;
804 /* 3. DRA is a part of a chain and DRB is not. */
805 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
807 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
808 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
809 old_first_stmt)));
810 tree tmp;
812 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
814 /* DRB's init is smaller than the init of the stmt previously marked
815 as the first stmt of the interleaving chain of DRA. Therefore, we
816 update FIRST_STMT and put DRB in the head of the list. */
817 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
818 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
820 /* Update all the stmts in the list to point to the new FIRST_STMT. */
821 tmp = old_first_stmt;
822 while (tmp)
824 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
825 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
828 else
830 /* Insert DRB in the list of DRA. */
831 vect_insert_into_interleaving_chain (drb, dra);
832 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
834 return;
837 /* 4. both DRA and DRB are in some interleaving chains. */
838 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
839 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
840 if (first_a == first_b)
841 return;
842 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
843 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
845 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
847 /* Insert the nodes of DRA chain into the DRB chain.
848 After inserting a node, continue from this node of the DRB chain (don't
849 start from the beginning. */
850 node = DR_GROUP_FIRST_DR (stmtinfo_a);
851 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
852 first_stmt = first_b;
854 else
856 /* Insert the nodes of DRB chain into the DRA chain.
857 After inserting a node, continue from this node of the DRA chain (don't
858 start from the beginning. */
859 node = DR_GROUP_FIRST_DR (stmtinfo_b);
860 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
861 first_stmt = first_a;
864 while (node)
866 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
867 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
868 while (next)
870 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
871 if (tree_int_cst_compare (next_init, node_init) > 0)
873 /* Insert here. */
874 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
875 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
876 prev = node;
877 break;
879 prev = next;
880 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
882 if (!next)
884 /* We got to the end of the list. Insert here. */
885 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
886 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
887 prev = node;
889 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
890 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
895 /* Function vect_equal_offsets.
897 Check if OFFSET1 and OFFSET2 are identical expressions. */
899 static bool
900 vect_equal_offsets (tree offset1, tree offset2)
902 bool res0, res1;
904 STRIP_NOPS (offset1);
905 STRIP_NOPS (offset2);
907 if (offset1 == offset2)
908 return true;
910 if (TREE_CODE (offset1) != TREE_CODE (offset2)
911 || !BINARY_CLASS_P (offset1)
912 || !BINARY_CLASS_P (offset2))
913 return false;
915 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
916 TREE_OPERAND (offset2, 0));
917 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
918 TREE_OPERAND (offset2, 1));
920 return (res0 && res1);
924 /* Function vect_check_interleaving.
926 Check if DRA and DRB are a part of interleaving. In case they are, insert
927 DRA and DRB in an interleaving chain. */
929 static void
930 vect_check_interleaving (struct data_reference *dra,
931 struct data_reference *drb)
933 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
935 /* Check that the data-refs have same first location (except init) and they
936 are both either store or load (not load and store). */
937 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
938 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
939 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
940 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
941 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
942 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
943 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
944 || DR_IS_READ (dra) != DR_IS_READ (drb))
945 return;
947 /* Check:
948 1. data-refs are of the same type
949 2. their steps are equal
950 3. the step is greater than the difference between data-refs' inits */
951 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
952 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
954 if (type_size_a != type_size_b
955 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
956 return;
958 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
959 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
960 step = TREE_INT_CST_LOW (DR_STEP (dra));
962 if (init_a > init_b)
964 /* If init_a == init_b + the size of the type * k, we have an interleaving,
965 and DRB is accessed before DRA. */
966 diff_mod_size = (init_a - init_b) % type_size_a;
968 if ((init_a - init_b) > step)
969 return;
971 if (diff_mod_size == 0)
973 vect_update_interleaving_chain (drb, dra);
974 if (vect_print_dump_info (REPORT_DR_DETAILS))
976 fprintf (vect_dump, "Detected interleaving ");
977 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
978 fprintf (vect_dump, " and ");
979 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
981 return;
984 else
986 /* If init_b == init_a + the size of the type * k, we have an
987 interleaving, and DRA is accessed before DRB. */
988 diff_mod_size = (init_b - init_a) % type_size_a;
990 if ((init_b - init_a) > step)
991 return;
993 if (diff_mod_size == 0)
995 vect_update_interleaving_chain (dra, drb);
996 if (vect_print_dump_info (REPORT_DR_DETAILS))
998 fprintf (vect_dump, "Detected interleaving ");
999 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1000 fprintf (vect_dump, " and ");
1001 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1003 return;
1009 /* Function vect_analyze_data_ref_dependence.
1011 Return TRUE if there (might) exist a dependence between a memory-reference
1012 DRA and a memory-reference DRB. */
1014 static bool
1015 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1016 loop_vec_info loop_vinfo)
1018 unsigned int i;
1019 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1020 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1021 struct data_reference *dra = DDR_A (ddr);
1022 struct data_reference *drb = DDR_B (ddr);
1023 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1024 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1025 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1026 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1027 lambda_vector dist_v;
1028 unsigned int loop_depth;
1030 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1032 /* Independent data accesses. */
1033 vect_check_interleaving (dra, drb);
1034 return false;
1037 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1038 return false;
1040 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1042 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1044 fprintf (vect_dump,
1045 "not vectorized: can't determine dependence between ");
1046 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1047 fprintf (vect_dump, " and ");
1048 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1050 return true;
1053 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1055 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1057 fprintf (vect_dump, "not vectorized: bad dist vector for ");
1058 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1059 fprintf (vect_dump, " and ");
1060 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1062 return true;
1065 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1066 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1068 int dist = dist_v[loop_depth];
1070 if (vect_print_dump_info (REPORT_DR_DETAILS))
1071 fprintf (vect_dump, "dependence distance = %d.", dist);
1073 /* Same loop iteration. */
1074 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1076 /* Two references with distance zero have the same alignment. */
1077 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1078 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1079 if (vect_print_dump_info (REPORT_ALIGNMENT))
1080 fprintf (vect_dump, "accesses have the same alignment.");
1081 if (vect_print_dump_info (REPORT_DR_DETAILS))
1083 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1084 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1085 fprintf (vect_dump, " and ");
1086 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1089 /* For interleaving, mark that there is a read-write dependency if
1090 necessary. We check before that one of the data-refs is store. */
1091 if (DR_IS_READ (dra))
1092 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1093 else
1095 if (DR_IS_READ (drb))
1096 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1099 continue;
1102 if (abs (dist) >= vectorization_factor)
1104 /* Dependence distance does not create dependence, as far as vectorization
1105 is concerned, in this case. */
1106 if (vect_print_dump_info (REPORT_DR_DETAILS))
1107 fprintf (vect_dump, "dependence distance >= VF.");
1108 continue;
1111 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1113 fprintf (vect_dump,
1114 "not vectorized: possible dependence between data-refs ");
1115 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1116 fprintf (vect_dump, " and ");
1117 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1120 return true;
1123 return false;
1127 /* Function vect_analyze_data_ref_dependences.
1129 Examine all the data references in the loop, and make sure there do not
1130 exist any data dependences between them. */
1132 static bool
1133 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1135 unsigned int i;
1136 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1137 struct data_dependence_relation *ddr;
1139 if (vect_print_dump_info (REPORT_DETAILS))
1140 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1142 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1143 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1144 return false;
1146 return true;
1150 /* Function vect_compute_data_ref_alignment
1152 Compute the misalignment of the data reference DR.
1154 Output:
1155 1. If during the misalignment computation it is found that the data reference
1156 cannot be vectorized then false is returned.
1157 2. DR_MISALIGNMENT (DR) is defined.
1159 FOR NOW: No analysis is actually performed. Misalignment is calculated
1160 only for trivial cases. TODO. */
1162 static bool
1163 vect_compute_data_ref_alignment (struct data_reference *dr)
1165 tree stmt = DR_STMT (dr);
1166 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1167 tree ref = DR_REF (dr);
1168 tree vectype;
1169 tree base, base_addr;
1170 bool base_aligned;
1171 tree misalign;
1172 tree aligned_to, alignment;
1174 if (vect_print_dump_info (REPORT_DETAILS))
1175 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1177 /* Initialize misalignment to unknown. */
1178 SET_DR_MISALIGNMENT (dr, -1);
1180 misalign = DR_INIT (dr);
1181 aligned_to = DR_ALIGNED_TO (dr);
1182 base_addr = DR_BASE_ADDRESS (dr);
1183 base = build_fold_indirect_ref (base_addr);
1184 vectype = STMT_VINFO_VECTYPE (stmt_info);
1185 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1187 if (tree_int_cst_compare (aligned_to, alignment) < 0)
1189 if (vect_print_dump_info (REPORT_DETAILS))
1191 fprintf (vect_dump, "Unknown alignment for access: ");
1192 print_generic_expr (vect_dump, base, TDF_SLIM);
1194 return true;
1197 if ((DECL_P (base)
1198 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1199 alignment) >= 0)
1200 || (TREE_CODE (base_addr) == SSA_NAME
1201 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1202 TREE_TYPE (base_addr)))),
1203 alignment) >= 0))
1204 base_aligned = true;
1205 else
1206 base_aligned = false;
1208 if (!base_aligned)
1210 /* Do not change the alignment of global variables if
1211 flag_section_anchors is enabled. */
1212 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1213 || (TREE_STATIC (base) && flag_section_anchors))
1215 if (vect_print_dump_info (REPORT_DETAILS))
1217 fprintf (vect_dump, "can't force alignment of ref: ");
1218 print_generic_expr (vect_dump, ref, TDF_SLIM);
1220 return true;
1223 /* Force the alignment of the decl.
1224 NOTE: This is the only change to the code we make during
1225 the analysis phase, before deciding to vectorize the loop. */
1226 if (vect_print_dump_info (REPORT_DETAILS))
1227 fprintf (vect_dump, "force alignment");
1228 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1229 DECL_USER_ALIGN (base) = 1;
1232 /* At this point we assume that the base is aligned. */
1233 gcc_assert (base_aligned
1234 || (TREE_CODE (base) == VAR_DECL
1235 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1237 /* Modulo alignment. */
1238 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1240 if (!host_integerp (misalign, 1))
1242 /* Negative or overflowed misalignment value. */
1243 if (vect_print_dump_info (REPORT_DETAILS))
1244 fprintf (vect_dump, "unexpected misalign value");
1245 return false;
1248 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1250 if (vect_print_dump_info (REPORT_DETAILS))
1252 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1253 print_generic_expr (vect_dump, ref, TDF_SLIM);
1256 return true;
1260 /* Function vect_compute_data_refs_alignment
1262 Compute the misalignment of data references in the loop.
1263 Return FALSE if a data reference is found that cannot be vectorized. */
1265 static bool
1266 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1268 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1269 struct data_reference *dr;
1270 unsigned int i;
1272 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1273 if (!vect_compute_data_ref_alignment (dr))
1274 return false;
1276 return true;
1280 /* Function vect_update_misalignment_for_peel
1282 DR - the data reference whose misalignment is to be adjusted.
1283 DR_PEEL - the data reference whose misalignment is being made
1284 zero in the vector loop by the peel.
1285 NPEEL - the number of iterations in the peel loop if the misalignment
1286 of DR_PEEL is known at compile time. */
1288 static void
1289 vect_update_misalignment_for_peel (struct data_reference *dr,
1290 struct data_reference *dr_peel, int npeel)
1292 unsigned int i;
1293 VEC(dr_p,heap) *same_align_drs;
1294 struct data_reference *current_dr;
1295 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1296 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1297 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1298 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1300 /* For interleaved data accesses the step in the loop must be multiplied by
1301 the size of the interleaving group. */
1302 if (DR_GROUP_FIRST_DR (stmt_info))
1303 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1304 if (DR_GROUP_FIRST_DR (peel_stmt_info))
1305 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1307 /* It can be assumed that the data refs with the same alignment as dr_peel
1308 are aligned in the vector loop. */
1309 same_align_drs
1310 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1311 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1313 if (current_dr != dr)
1314 continue;
1315 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1316 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1317 SET_DR_MISALIGNMENT (dr, 0);
1318 return;
1321 if (known_alignment_for_access_p (dr)
1322 && known_alignment_for_access_p (dr_peel))
1324 int misal = DR_MISALIGNMENT (dr);
1325 misal += npeel * dr_size;
1326 misal %= UNITS_PER_SIMD_WORD;
1327 SET_DR_MISALIGNMENT (dr, misal);
1328 return;
1331 if (vect_print_dump_info (REPORT_DETAILS))
1332 fprintf (vect_dump, "Setting misalignment to -1.");
1333 SET_DR_MISALIGNMENT (dr, -1);
1337 /* Function vect_verify_datarefs_alignment
1339 Return TRUE if all data references in the loop can be
1340 handled with respect to alignment. */
1342 static bool
1343 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1345 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1346 struct data_reference *dr;
1347 enum dr_alignment_support supportable_dr_alignment;
1348 unsigned int i;
1350 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1352 tree stmt = DR_STMT (dr);
1353 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1355 /* For interleaving, only the alignment of the first access matters. */
1356 if (DR_GROUP_FIRST_DR (stmt_info)
1357 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1358 continue;
1360 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1361 if (!supportable_dr_alignment)
1363 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1365 if (DR_IS_READ (dr))
1366 fprintf (vect_dump,
1367 "not vectorized: unsupported unaligned load.");
1368 else
1369 fprintf (vect_dump,
1370 "not vectorized: unsupported unaligned store.");
1372 return false;
1374 if (supportable_dr_alignment != dr_aligned
1375 && vect_print_dump_info (REPORT_ALIGNMENT))
1376 fprintf (vect_dump, "Vectorizing an unaligned access.");
1378 return true;
1382 /* Function vect_enhance_data_refs_alignment
1384 This pass will use loop versioning and loop peeling in order to enhance
1385 the alignment of data references in the loop.
1387 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1388 original loop is to be vectorized; Any other loops that are created by
1389 the transformations performed in this pass - are not supposed to be
1390 vectorized. This restriction will be relaxed.
1392 This pass will require a cost model to guide it whether to apply peeling
1393 or versioning or a combination of the two. For example, the scheme that
1394 intel uses when given a loop with several memory accesses, is as follows:
1395 choose one memory access ('p') which alignment you want to force by doing
1396 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1397 other accesses are not necessarily aligned, or (2) use loop versioning to
1398 generate one loop in which all accesses are aligned, and another loop in
1399 which only 'p' is necessarily aligned.
1401 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1402 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1403 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1405 Devising a cost model is the most critical aspect of this work. It will
1406 guide us on which access to peel for, whether to use loop versioning, how
1407 many versions to create, etc. The cost model will probably consist of
1408 generic considerations as well as target specific considerations (on
1409 powerpc for example, misaligned stores are more painful than misaligned
1410 loads).
1412 Here are the general steps involved in alignment enhancements:
1414 -- original loop, before alignment analysis:
1415 for (i=0; i<N; i++){
1416 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1417 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1420 -- After vect_compute_data_refs_alignment:
1421 for (i=0; i<N; i++){
1422 x = q[i]; # DR_MISALIGNMENT(q) = 3
1423 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1426 -- Possibility 1: we do loop versioning:
1427 if (p is aligned) {
1428 for (i=0; i<N; i++){ # loop 1A
1429 x = q[i]; # DR_MISALIGNMENT(q) = 3
1430 p[i] = y; # DR_MISALIGNMENT(p) = 0
1433 else {
1434 for (i=0; i<N; i++){ # loop 1B
1435 x = q[i]; # DR_MISALIGNMENT(q) = 3
1436 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1440 -- Possibility 2: we do loop peeling:
1441 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1442 x = q[i];
1443 p[i] = y;
1445 for (i = 3; i < N; i++){ # loop 2A
1446 x = q[i]; # DR_MISALIGNMENT(q) = 0
1447 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1450 -- Possibility 3: combination of loop peeling and versioning:
1451 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1452 x = q[i];
1453 p[i] = y;
1455 if (p is aligned) {
1456 for (i = 3; i<N; i++){ # loop 3A
1457 x = q[i]; # DR_MISALIGNMENT(q) = 0
1458 p[i] = y; # DR_MISALIGNMENT(p) = 0
1461 else {
1462 for (i = 3; i<N; i++){ # loop 3B
1463 x = q[i]; # DR_MISALIGNMENT(q) = 0
1464 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1468 These loops are later passed to loop_transform to be vectorized. The
1469 vectorizer will use the alignment information to guide the transformation
1470 (whether to generate regular loads/stores, or with special handling for
1471 misalignment). */
1473 static bool
1474 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1476 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1477 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1478 enum dr_alignment_support supportable_dr_alignment;
1479 struct data_reference *dr0 = NULL;
1480 struct data_reference *dr;
1481 unsigned int i;
1482 bool do_peeling = false;
1483 bool do_versioning = false;
1484 bool stat;
1485 tree stmt;
1486 stmt_vec_info stmt_info;
1488 if (vect_print_dump_info (REPORT_DETAILS))
1489 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1491 /* While cost model enhancements are expected in the future, the high level
1492 view of the code at this time is as follows:
1494 A) If there is a misaligned write then see if peeling to align this write
1495 can make all data references satisfy vect_supportable_dr_alignment.
1496 If so, update data structures as needed and return true. Note that
1497 at this time vect_supportable_dr_alignment is known to return false
1498 for a misaligned write.
1500 B) If peeling wasn't possible and there is a data reference with an
1501 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1502 then see if loop versioning checks can be used to make all data
1503 references satisfy vect_supportable_dr_alignment. If so, update
1504 data structures as needed and return true.
1506 C) If neither peeling nor versioning were successful then return false if
1507 any data reference does not satisfy vect_supportable_dr_alignment.
1509 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1511 Note, Possibility 3 above (which is peeling and versioning together) is not
1512 being done at this time. */
1514 /* (1) Peeling to force alignment. */
1516 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1517 Considerations:
1518 + How many accesses will become aligned due to the peeling
1519 - How many accesses will become unaligned due to the peeling,
1520 and the cost of misaligned accesses.
1521 - The cost of peeling (the extra runtime checks, the increase
1522 in code size).
1524 The scheme we use FORNOW: peel to force the alignment of the first
1525 misaligned store in the loop.
1526 Rationale: misaligned stores are not yet supported.
1528 TODO: Use a cost model. */
1530 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1532 stmt = DR_STMT (dr);
1533 stmt_info = vinfo_for_stmt (stmt);
1535 /* For interleaving, only the alignment of the first access
1536 matters. */
1537 if (DR_GROUP_FIRST_DR (stmt_info)
1538 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1539 continue;
1541 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1543 if (DR_GROUP_FIRST_DR (stmt_info))
1545 /* For interleaved access we peel only if number of iterations in
1546 the prolog loop ({VF - misalignment}), is a multiple of the
1547 number of the interleaved accesses. */
1548 int elem_size, mis_in_elements;
1549 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1550 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1552 /* FORNOW: handle only known alignment. */
1553 if (!known_alignment_for_access_p (dr))
1555 do_peeling = false;
1556 break;
1559 elem_size = UNITS_PER_SIMD_WORD / nelements;
1560 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1562 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1564 do_peeling = false;
1565 break;
1568 dr0 = dr;
1569 do_peeling = true;
1570 break;
1574 /* Often peeling for alignment will require peeling for loop-bound, which in
1575 turn requires that we know how to adjust the loop ivs after the loop. */
1576 if (!vect_can_advance_ivs_p (loop_vinfo)
1577 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1578 do_peeling = false;
1580 if (do_peeling)
1582 int mis;
1583 int npeel = 0;
1584 tree stmt = DR_STMT (dr0);
1585 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1586 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1587 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1589 if (known_alignment_for_access_p (dr0))
1591 /* Since it's known at compile time, compute the number of iterations
1592 in the peeled loop (the peeling factor) for use in updating
1593 DR_MISALIGNMENT values. The peeling factor is the vectorization
1594 factor minus the misalignment as an element count. */
1595 mis = DR_MISALIGNMENT (dr0);
1596 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1597 npeel = nelements - mis;
1599 /* For interleaved data access every iteration accesses all the
1600 members of the group, therefore we divide the number of iterations
1601 by the group size. */
1602 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1603 if (DR_GROUP_FIRST_DR (stmt_info))
1604 npeel /= DR_GROUP_SIZE (stmt_info);
1606 if (vect_print_dump_info (REPORT_DETAILS))
1607 fprintf (vect_dump, "Try peeling by %d", npeel);
1610 /* Ensure that all data refs can be vectorized after the peel. */
1611 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1613 int save_misalignment;
1615 if (dr == dr0)
1616 continue;
1618 stmt = DR_STMT (dr);
1619 stmt_info = vinfo_for_stmt (stmt);
1620 /* For interleaving, only the alignment of the first access
1621 matters. */
1622 if (DR_GROUP_FIRST_DR (stmt_info)
1623 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1624 continue;
1626 save_misalignment = DR_MISALIGNMENT (dr);
1627 vect_update_misalignment_for_peel (dr, dr0, npeel);
1628 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1629 SET_DR_MISALIGNMENT (dr, save_misalignment);
1631 if (!supportable_dr_alignment)
1633 do_peeling = false;
1634 break;
1638 if (do_peeling)
1640 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1641 If the misalignment of DR_i is identical to that of dr0 then set
1642 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1643 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1644 by the peeling factor times the element size of DR_i (MOD the
1645 vectorization factor times the size). Otherwise, the
1646 misalignment of DR_i must be set to unknown. */
1647 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1648 if (dr != dr0)
1649 vect_update_misalignment_for_peel (dr, dr0, npeel);
1651 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1652 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1653 SET_DR_MISALIGNMENT (dr0, 0);
1654 if (vect_print_dump_info (REPORT_ALIGNMENT))
1655 fprintf (vect_dump, "Alignment of access forced using peeling.");
1657 if (vect_print_dump_info (REPORT_DETAILS))
1658 fprintf (vect_dump, "Peeling for alignment will be applied.");
1660 stat = vect_verify_datarefs_alignment (loop_vinfo);
1661 gcc_assert (stat);
1662 return stat;
1667 /* (2) Versioning to force alignment. */
1669 /* Try versioning if:
1670 1) flag_tree_vect_loop_version is TRUE
1671 2) optimize_size is FALSE
1672 3) there is at least one unsupported misaligned data ref with an unknown
1673 misalignment, and
1674 4) all misaligned data refs with a known misalignment are supported, and
1675 5) the number of runtime alignment checks is within reason. */
1677 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1679 if (do_versioning)
1681 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1683 stmt = DR_STMT (dr);
1684 stmt_info = vinfo_for_stmt (stmt);
1686 /* For interleaving, only the alignment of the first access
1687 matters. */
1688 if (aligned_access_p (dr)
1689 || (DR_GROUP_FIRST_DR (stmt_info)
1690 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1691 continue;
1693 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1695 if (!supportable_dr_alignment)
1697 tree stmt;
1698 int mask;
1699 tree vectype;
1701 if (known_alignment_for_access_p (dr)
1702 || VEC_length (tree,
1703 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1704 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1706 do_versioning = false;
1707 break;
1710 stmt = DR_STMT (dr);
1711 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1712 gcc_assert (vectype);
1714 /* The rightmost bits of an aligned address must be zeros.
1715 Construct the mask needed for this test. For example,
1716 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1717 mask must be 15 = 0xf. */
1718 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1720 /* FORNOW: use the same mask to test all potentially unaligned
1721 references in the loop. The vectorizer currently supports
1722 a single vector size, see the reference to
1723 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1724 vectorization factor is computed. */
1725 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1726 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1727 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1728 VEC_safe_push (tree, heap,
1729 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1730 DR_STMT (dr));
1734 /* Versioning requires at least one misaligned data reference. */
1735 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1736 do_versioning = false;
1737 else if (!do_versioning)
1738 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1741 if (do_versioning)
1743 VEC(tree,heap) *may_misalign_stmts
1744 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1745 tree stmt;
1747 /* It can now be assumed that the data references in the statements
1748 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1749 of the loop being vectorized. */
1750 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1752 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1753 dr = STMT_VINFO_DATA_REF (stmt_info);
1754 SET_DR_MISALIGNMENT (dr, 0);
1755 if (vect_print_dump_info (REPORT_ALIGNMENT))
1756 fprintf (vect_dump, "Alignment of access forced using versioning.");
1759 if (vect_print_dump_info (REPORT_DETAILS))
1760 fprintf (vect_dump, "Versioning for alignment will be applied.");
1762 /* Peeling and versioning can't be done together at this time. */
1763 gcc_assert (! (do_peeling && do_versioning));
1765 stat = vect_verify_datarefs_alignment (loop_vinfo);
1766 gcc_assert (stat);
1767 return stat;
1770 /* This point is reached if neither peeling nor versioning is being done. */
1771 gcc_assert (! (do_peeling || do_versioning));
1773 stat = vect_verify_datarefs_alignment (loop_vinfo);
1774 return stat;
1778 /* Function vect_analyze_data_refs_alignment
1780 Analyze the alignment of the data-references in the loop.
1781 Return FALSE if a data reference is found that cannot be vectorized. */
1783 static bool
1784 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1786 if (vect_print_dump_info (REPORT_DETAILS))
1787 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1789 if (!vect_compute_data_refs_alignment (loop_vinfo))
1791 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1792 fprintf (vect_dump,
1793 "not vectorized: can't calculate alignment for data ref.");
1794 return false;
1797 return true;
1801 /* Function vect_analyze_data_ref_access.
1803 Analyze the access pattern of the data-reference DR. For now, a data access
1804 has to be consecutive to be considered vectorizable. */
1806 static bool
1807 vect_analyze_data_ref_access (struct data_reference *dr)
1809 tree step = DR_STEP (dr);
1810 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1811 tree scalar_type = TREE_TYPE (DR_REF (dr));
1812 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1813 tree stmt = DR_STMT (dr);
1814 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1815 interleaving group (including gaps). */
1816 HOST_WIDE_INT stride = dr_step / type_size;
1818 if (!step)
1820 if (vect_print_dump_info (REPORT_DETAILS))
1821 fprintf (vect_dump, "bad data-ref access");
1822 return false;
1825 /* Consecutive? */
1826 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1828 /* Mark that it is not interleaving. */
1829 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
1830 return true;
1833 /* Not consecutive access is possible only if it is a part of interleaving. */
1834 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1836 /* Check if it this DR is a part of interleaving, and is a single
1837 element of the group that is accessed in the loop. */
1839 /* Gaps are supported only for loads. STEP must be a multiple of the type
1840 size. The size of the group must be a power of 2. */
1841 if (DR_IS_READ (dr)
1842 && (dr_step % type_size) == 0
1843 && stride > 0
1844 && exact_log2 (stride) != -1)
1846 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1847 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1848 if (vect_print_dump_info (REPORT_DR_DETAILS))
1850 fprintf (vect_dump, "Detected single element interleaving %d ",
1851 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
1852 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1853 fprintf (vect_dump, " step ");
1854 print_generic_expr (vect_dump, step, TDF_SLIM);
1856 return true;
1858 if (vect_print_dump_info (REPORT_DETAILS))
1859 fprintf (vect_dump, "not consecutive access");
1860 return false;
1863 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1865 /* First stmt in the interleaving chain. Check the chain. */
1866 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1867 struct data_reference *data_ref = dr;
1868 unsigned int count = 1;
1869 tree next_step;
1870 tree prev_init = DR_INIT (data_ref);
1871 tree prev = stmt;
1872 HOST_WIDE_INT diff, count_in_bytes;
1874 while (next)
1876 /* Skip same data-refs. In case that two or more stmts share data-ref
1877 (supported only for loads), we vectorize only the first stmt, and
1878 the rest get their vectorized loads from the first one. */
1879 if (!tree_int_cst_compare (DR_INIT (data_ref),
1880 DR_INIT (STMT_VINFO_DATA_REF (
1881 vinfo_for_stmt (next)))))
1883 if (!DR_IS_READ (data_ref))
1885 if (vect_print_dump_info (REPORT_DETAILS))
1886 fprintf (vect_dump, "Two store stmts share the same dr.");
1887 return false;
1890 /* Check that there is no load-store dependencies for this loads
1891 to prevent a case of load-store-load to the same location. */
1892 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1893 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1895 if (vect_print_dump_info (REPORT_DETAILS))
1896 fprintf (vect_dump,
1897 "READ_WRITE dependence in interleaving.");
1898 return false;
1901 /* For load use the same data-ref load. */
1902 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1904 prev = next;
1905 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1906 continue;
1908 prev = next;
1910 /* Check that all the accesses have the same STEP. */
1911 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1912 if (tree_int_cst_compare (step, next_step))
1914 if (vect_print_dump_info (REPORT_DETAILS))
1915 fprintf (vect_dump, "not consecutive access in interleaving");
1916 return false;
1919 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1920 /* Check that the distance between two accesses is equal to the type
1921 size. Otherwise, we have gaps. */
1922 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1923 - TREE_INT_CST_LOW (prev_init)) / type_size;
1924 if (!DR_IS_READ (data_ref) && diff != 1)
1926 if (vect_print_dump_info (REPORT_DETAILS))
1927 fprintf (vect_dump, "interleaved store with gaps");
1928 return false;
1930 /* Store the gap from the previous member of the group. If there is no
1931 gap in the access, DR_GROUP_GAP is always 1. */
1932 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1934 prev_init = DR_INIT (data_ref);
1935 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1936 /* Count the number of data-refs in the chain. */
1937 count++;
1940 /* COUNT is the number of accesses found, we multiply it by the size of
1941 the type to get COUNT_IN_BYTES. */
1942 count_in_bytes = type_size * count;
1944 /* Check that the size of the interleaving is not greater than STEP. */
1945 if (dr_step < count_in_bytes)
1947 if (vect_print_dump_info (REPORT_DETAILS))
1949 fprintf (vect_dump, "interleaving size is greater than step for ");
1950 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1952 return false;
1955 /* Check that the size of the interleaving is equal to STEP for stores,
1956 i.e., that there are no gaps. */
1957 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
1959 if (vect_print_dump_info (REPORT_DETAILS))
1960 fprintf (vect_dump, "interleaved store with gaps");
1961 return false;
1964 /* Check that STEP is a multiple of type size. */
1965 if ((dr_step % type_size) != 0)
1967 if (vect_print_dump_info (REPORT_DETAILS))
1969 fprintf (vect_dump, "step is not a multiple of type size: step ");
1970 print_generic_expr (vect_dump, step, TDF_SLIM);
1971 fprintf (vect_dump, " size ");
1972 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1973 TDF_SLIM);
1975 return false;
1978 /* FORNOW: we handle only interleaving that is a power of 2. */
1979 if (exact_log2 (stride) == -1)
1981 if (vect_print_dump_info (REPORT_DETAILS))
1982 fprintf (vect_dump, "interleaving is not a power of 2");
1983 return false;
1985 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1987 return true;
1991 /* Function vect_analyze_data_ref_accesses.
1993 Analyze the access pattern of all the data references in the loop.
1995 FORNOW: the only access pattern that is considered vectorizable is a
1996 simple step 1 (consecutive) access.
1998 FORNOW: handle only arrays and pointer accesses. */
2000 static bool
2001 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2003 unsigned int i;
2004 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2005 struct data_reference *dr;
2007 if (vect_print_dump_info (REPORT_DETAILS))
2008 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2010 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2011 if (!vect_analyze_data_ref_access (dr))
2013 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2014 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2015 return false;
2018 return true;
2022 /* Function vect_analyze_data_refs.
2024 Find all the data references in the loop.
2026 The general structure of the analysis of data refs in the vectorizer is as
2027 follows:
2028 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
2029 find and analyze all data-refs in the loop and their dependences.
2030 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2031 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2032 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2036 static bool
2037 vect_analyze_data_refs (loop_vec_info loop_vinfo)
2039 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2040 unsigned int i;
2041 VEC (data_reference_p, heap) *datarefs;
2042 struct data_reference *dr;
2043 tree scalar_type;
2045 if (vect_print_dump_info (REPORT_DETAILS))
2046 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2048 compute_data_dependences_for_loop (loop, true,
2049 &LOOP_VINFO_DATAREFS (loop_vinfo),
2050 &LOOP_VINFO_DDRS (loop_vinfo));
2052 /* Go through the data-refs, check that the analysis succeeded. Update pointer
2053 from stmt_vec_info struct to DR and vectype. */
2054 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2056 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2058 tree stmt;
2059 stmt_vec_info stmt_info;
2061 if (!dr || !DR_REF (dr))
2063 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2064 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2065 return false;
2068 /* Update DR field in stmt_vec_info struct. */
2069 stmt = DR_STMT (dr);
2070 stmt_info = vinfo_for_stmt (stmt);
2072 if (STMT_VINFO_DATA_REF (stmt_info))
2074 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2076 fprintf (vect_dump,
2077 "not vectorized: more than one data ref in stmt: ");
2078 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2080 return false;
2082 STMT_VINFO_DATA_REF (stmt_info) = dr;
2084 /* Check that analysis of the data-ref succeeded. */
2085 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2086 || !DR_STEP (dr))
2088 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2090 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2091 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2093 return false;
2095 if (!DR_SYMBOL_TAG (dr))
2097 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2099 fprintf (vect_dump, "not vectorized: no memory tag for ");
2100 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2102 return false;
2105 /* Set vectype for STMT. */
2106 scalar_type = TREE_TYPE (DR_REF (dr));
2107 STMT_VINFO_VECTYPE (stmt_info) =
2108 get_vectype_for_scalar_type (scalar_type);
2109 if (!STMT_VINFO_VECTYPE (stmt_info))
2111 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2113 fprintf (vect_dump,
2114 "not vectorized: no vectype for stmt: ");
2115 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2116 fprintf (vect_dump, " scalar_type: ");
2117 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2119 return false;
2123 return true;
2127 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2129 /* Function vect_mark_relevant.
2131 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2133 static void
2134 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2135 enum vect_relevant relevant, bool live_p)
2137 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2138 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2139 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2141 if (vect_print_dump_info (REPORT_DETAILS))
2142 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
2144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2146 tree pattern_stmt;
2148 /* This is the last stmt in a sequence that was detected as a
2149 pattern that can potentially be vectorized. Don't mark the stmt
2150 as relevant/live because it's not going to vectorized.
2151 Instead mark the pattern-stmt that replaces it. */
2152 if (vect_print_dump_info (REPORT_DETAILS))
2153 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
2154 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2155 stmt_info = vinfo_for_stmt (pattern_stmt);
2156 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
2157 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2158 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2159 stmt = pattern_stmt;
2162 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2163 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
2164 STMT_VINFO_RELEVANT (stmt_info) = relevant;
2166 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
2167 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2169 if (vect_print_dump_info (REPORT_DETAILS))
2170 fprintf (vect_dump, "already marked relevant/live.");
2171 return;
2174 VEC_safe_push (tree, heap, *worklist, stmt);
2178 /* Function vect_stmt_relevant_p.
2180 Return true if STMT in loop that is represented by LOOP_VINFO is
2181 "relevant for vectorization".
2183 A stmt is considered "relevant for vectorization" if:
2184 - it has uses outside the loop.
2185 - it has vdefs (it alters memory).
2186 - control stmts in the loop (except for the exit condition).
2188 CHECKME: what other side effects would the vectorizer allow? */
2190 static bool
2191 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2192 enum vect_relevant *relevant, bool *live_p)
2194 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2195 ssa_op_iter op_iter;
2196 imm_use_iterator imm_iter;
2197 use_operand_p use_p;
2198 def_operand_p def_p;
2200 *relevant = vect_unused_in_loop;
2201 *live_p = false;
2203 /* cond stmt other than loop exit cond. */
2204 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2205 *relevant = vect_used_in_loop;
2207 /* changing memory. */
2208 if (TREE_CODE (stmt) != PHI_NODE)
2209 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2211 if (vect_print_dump_info (REPORT_DETAILS))
2212 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2213 *relevant = vect_used_in_loop;
2216 /* uses outside the loop. */
2217 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2219 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2221 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2222 if (!flow_bb_inside_loop_p (loop, bb))
2224 if (vect_print_dump_info (REPORT_DETAILS))
2225 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2227 /* We expect all such uses to be in the loop exit phis
2228 (because of loop closed form) */
2229 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2230 gcc_assert (bb == single_exit (loop)->dest);
2232 *live_p = true;
2237 return (*live_p || *relevant);
2242 Function process_use.
2244 Inputs:
2245 - a USE in STMT in a loop represented by LOOP_VINFO
2246 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
2247 that defined USE. This is dont by calling mark_relevant and passing it
2248 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
2250 Outputs:
2251 Generally, LIVE_P and RELEVANT are used to define the liveness and
2252 relevance info of the DEF_STMT of this USE:
2253 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2254 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2255 Exceptions:
2256 - case 1: If USE is used only for address computations (e.g. array indexing),
2257 which does not need to be directly vectorized, then the liveness/relevance
2258 of the respective DEF_STMT is left unchanged.
2259 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
2260 skip DEF_STMT cause it had already been processed.
2262 Return true if everything is as expected. Return false otherwise. */
2264 static bool
2265 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
2266 enum vect_relevant relevant, VEC(tree,heap) **worklist)
2268 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2269 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2270 stmt_vec_info dstmt_vinfo;
2271 basic_block def_bb;
2272 tree def, def_stmt;
2273 enum vect_def_type dt;
2275 /* case 1: we are only interested in uses that need to be vectorized. Uses
2276 that are used for address computation are not considered relevant. */
2277 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2278 return true;
2280 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2282 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2283 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2284 return false;
2287 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2288 return true;
2290 def_bb = bb_for_stmt (def_stmt);
2291 if (!flow_bb_inside_loop_p (loop, def_bb))
2292 return true;
2294 /* case 2: A reduction phi defining a reduction stmt (DEF_STMT). DEF_STMT
2295 must have already been processed, so we just check that everything is as
2296 expected, and we are done. */
2297 dstmt_vinfo = vinfo_for_stmt (def_stmt);
2298 if (TREE_CODE (stmt) == PHI_NODE
2299 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2300 && TREE_CODE (def_stmt) != PHI_NODE
2301 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def)
2303 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
2304 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
2305 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
2306 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
2307 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
2308 return true;
2311 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
2312 return true;
2316 /* Function vect_mark_stmts_to_be_vectorized.
2318 Not all stmts in the loop need to be vectorized. For example:
2320 for i...
2321 for j...
2322 1. T0 = i + j
2323 2. T1 = a[T0]
2325 3. j = j + 1
2327 Stmt 1 and 3 do not need to be vectorized, because loop control and
2328 addressing of vectorized data-refs are handled differently.
2330 This pass detects such stmts. */
2332 static bool
2333 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2335 VEC(tree,heap) *worklist;
2336 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2337 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2338 unsigned int nbbs = loop->num_nodes;
2339 block_stmt_iterator si;
2340 tree stmt;
2341 stmt_ann_t ann;
2342 unsigned int i;
2343 stmt_vec_info stmt_vinfo;
2344 basic_block bb;
2345 tree phi;
2346 bool live_p;
2347 enum vect_relevant relevant;
2349 if (vect_print_dump_info (REPORT_DETAILS))
2350 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2352 worklist = VEC_alloc (tree, heap, 64);
2354 /* 1. Init worklist. */
2355 for (i = 0; i < nbbs; i++)
2357 bb = bbs[i];
2358 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2360 if (vect_print_dump_info (REPORT_DETAILS))
2362 fprintf (vect_dump, "init: phi relevant? ");
2363 print_generic_expr (vect_dump, phi, TDF_SLIM);
2366 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
2367 vect_mark_relevant (&worklist, phi, relevant, live_p);
2369 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2371 stmt = bsi_stmt (si);
2372 if (vect_print_dump_info (REPORT_DETAILS))
2374 fprintf (vect_dump, "init: stmt relevant? ");
2375 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2378 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
2379 vect_mark_relevant (&worklist, stmt, relevant, live_p);
2383 /* 2. Process_worklist */
2384 while (VEC_length (tree, worklist) > 0)
2386 use_operand_p use_p;
2387 ssa_op_iter iter;
2389 stmt = VEC_pop (tree, worklist);
2390 if (vect_print_dump_info (REPORT_DETAILS))
2392 fprintf (vect_dump, "worklist: examine stmt: ");
2393 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2396 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
2397 (DEF_STMT) as relevant/irrelevant and live/dead according to the
2398 liveness and relevance properties of STMT. */
2399 ann = stmt_ann (stmt);
2400 stmt_vinfo = vinfo_for_stmt (stmt);
2401 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
2402 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2404 /* Generally, the liveness and relevance properties of STMT are
2405 propagated as is to the DEF_STMTs of its USEs:
2406 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
2407 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
2409 One exception is when STMT has been identified as defining a reduction
2410 variable; in this case we set the liveness/relevance as follows:
2411 live_p = false
2412 relevant = vect_used_by_reduction
2413 This is because we distinguish between two kinds of relevant stmts -
2414 those that are used by a reduction computation, and those that are
2415 (also) used by a regular computation. This allows us later on to
2416 identify stmts that are used solely by a reduction, and therefore the
2417 order of the results that they produce does not have to be kept.
2419 Reduction phis are expected to be used by a reduction stmt; Other
2420 reduction stmts are expected to be unused in the loop. These are the
2421 expected values of "relevant" for reduction phis/stmts in the loop:
2423 relevance: phi stmt
2424 vect_unused_in_loop ok
2425 vect_used_by_reduction ok
2426 vect_used_in_loop */
2428 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2430 switch (relevant)
2432 case vect_unused_in_loop:
2433 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2434 break;
2435 case vect_used_by_reduction:
2436 if (TREE_CODE (stmt) == PHI_NODE)
2437 break;
2438 case vect_used_in_loop:
2439 default:
2440 if (vect_print_dump_info (REPORT_DETAILS))
2441 fprintf (vect_dump, "unsupported use of reduction.");
2442 VEC_free (tree, heap, worklist);
2443 return false;
2445 relevant = vect_used_by_reduction;
2446 live_p = false;
2449 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2451 tree op = USE_FROM_PTR (use_p);
2452 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
2454 VEC_free (tree, heap, worklist);
2455 return false;
2458 } /* while worklist */
2460 VEC_free (tree, heap, worklist);
2461 return true;
2465 /* Function vect_can_advance_ivs_p
2467 In case the number of iterations that LOOP iterates is unknown at compile
2468 time, an epilog loop will be generated, and the loop induction variables
2469 (IVs) will be "advanced" to the value they are supposed to take just before
2470 the epilog loop. Here we check that the access function of the loop IVs
2471 and the expression that represents the loop bound are simple enough.
2472 These restrictions will be relaxed in the future. */
2474 static bool
2475 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2477 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2478 basic_block bb = loop->header;
2479 tree phi;
2481 /* Analyze phi functions of the loop header. */
2483 if (vect_print_dump_info (REPORT_DETAILS))
2484 fprintf (vect_dump, "vect_can_advance_ivs_p:");
2486 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2488 tree access_fn = NULL;
2489 tree evolution_part;
2491 if (vect_print_dump_info (REPORT_DETAILS))
2493 fprintf (vect_dump, "Analyze phi: ");
2494 print_generic_expr (vect_dump, phi, TDF_SLIM);
2497 /* Skip virtual phi's. The data dependences that are associated with
2498 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2500 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2502 if (vect_print_dump_info (REPORT_DETAILS))
2503 fprintf (vect_dump, "virtual phi. skip.");
2504 continue;
2507 /* Skip reduction phis. */
2509 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2511 if (vect_print_dump_info (REPORT_DETAILS))
2512 fprintf (vect_dump, "reduc phi. skip.");
2513 continue;
2516 /* Analyze the evolution function. */
2518 access_fn = instantiate_parameters
2519 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2521 if (!access_fn)
2523 if (vect_print_dump_info (REPORT_DETAILS))
2524 fprintf (vect_dump, "No Access function.");
2525 return false;
2528 if (vect_print_dump_info (REPORT_DETAILS))
2530 fprintf (vect_dump, "Access function of PHI: ");
2531 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2534 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2536 if (evolution_part == NULL_TREE)
2538 if (vect_print_dump_info (REPORT_DETAILS))
2539 fprintf (vect_dump, "No evolution.");
2540 return false;
2543 /* FORNOW: We do not transform initial conditions of IVs
2544 which evolution functions are a polynomial of degree >= 2. */
2546 if (tree_is_chrec (evolution_part))
2547 return false;
2550 return true;
2554 /* Function vect_get_loop_niters.
2556 Determine how many iterations the loop is executed.
2557 If an expression that represents the number of iterations
2558 can be constructed, place it in NUMBER_OF_ITERATIONS.
2559 Return the loop exit condition. */
2561 static tree
2562 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2564 tree niters;
2566 if (vect_print_dump_info (REPORT_DETAILS))
2567 fprintf (vect_dump, "=== get_loop_niters ===");
2569 niters = number_of_exit_cond_executions (loop);
2571 if (niters != NULL_TREE
2572 && niters != chrec_dont_know)
2574 *number_of_iterations = niters;
2576 if (vect_print_dump_info (REPORT_DETAILS))
2578 fprintf (vect_dump, "==> get_loop_niters:" );
2579 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2583 return get_loop_exit_condition (loop);
2587 /* Function vect_analyze_loop_form.
2589 Verify the following restrictions (some may be relaxed in the future):
2590 - it's an inner-most loop
2591 - number of BBs = 2 (which are the loop header and the latch)
2592 - the loop has a pre-header
2593 - the loop has a single entry and exit
2594 - the loop exit condition is simple enough, and the number of iterations
2595 can be analyzed (a countable loop). */
2597 static loop_vec_info
2598 vect_analyze_loop_form (struct loop *loop)
2600 loop_vec_info loop_vinfo;
2601 tree loop_cond;
2602 tree number_of_iterations = NULL;
2604 if (vect_print_dump_info (REPORT_DETAILS))
2605 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2607 if (loop->inner)
2609 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
2610 fprintf (vect_dump, "not vectorized: nested loop.");
2611 return NULL;
2614 if (!single_exit (loop)
2615 || loop->num_nodes != 2
2616 || EDGE_COUNT (loop->header->preds) != 2)
2618 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2620 if (!single_exit (loop))
2621 fprintf (vect_dump, "not vectorized: multiple exits.");
2622 else if (loop->num_nodes != 2)
2623 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2624 else if (EDGE_COUNT (loop->header->preds) != 2)
2625 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2628 return NULL;
2631 /* We assume that the loop exit condition is at the end of the loop. i.e,
2632 that the loop is represented as a do-while (with a proper if-guard
2633 before the loop if needed), where the loop header contains all the
2634 executable statements, and the latch is empty. */
2635 if (!empty_block_p (loop->latch)
2636 || phi_nodes (loop->latch))
2638 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2639 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2640 return NULL;
2643 /* Make sure there exists a single-predecessor exit bb: */
2644 if (!single_pred_p (single_exit (loop)->dest))
2646 edge e = single_exit (loop);
2647 if (!(e->flags & EDGE_ABNORMAL))
2649 split_loop_exit_edge (e);
2650 if (vect_print_dump_info (REPORT_DETAILS))
2651 fprintf (vect_dump, "split exit edge.");
2653 else
2655 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2656 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2657 return NULL;
2661 if (empty_block_p (loop->header))
2663 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2664 fprintf (vect_dump, "not vectorized: empty loop.");
2665 return NULL;
2668 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2669 if (!loop_cond)
2671 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2672 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2673 return NULL;
2676 if (!number_of_iterations)
2678 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2679 fprintf (vect_dump,
2680 "not vectorized: number of iterations cannot be computed.");
2681 return NULL;
2684 if (chrec_contains_undetermined (number_of_iterations))
2686 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2687 fprintf (vect_dump, "Infinite number of iterations.");
2688 return false;
2691 if (!NITERS_KNOWN_P (number_of_iterations))
2693 if (vect_print_dump_info (REPORT_DETAILS))
2695 fprintf (vect_dump, "Symbolic number of iterations is ");
2696 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2699 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
2701 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2702 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2703 return NULL;
2706 loop_vinfo = new_loop_vec_info (loop);
2707 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2708 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2710 gcc_assert (!loop->aux);
2711 loop->aux = loop_vinfo;
2712 return loop_vinfo;
2716 /* Function vect_analyze_loop.
2718 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2719 for it. The different analyses will record information in the
2720 loop_vec_info struct. */
2721 loop_vec_info
2722 vect_analyze_loop (struct loop *loop)
2724 bool ok;
2725 loop_vec_info loop_vinfo;
2727 if (vect_print_dump_info (REPORT_DETAILS))
2728 fprintf (vect_dump, "===== analyze_loop_nest =====");
2730 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2732 loop_vinfo = vect_analyze_loop_form (loop);
2733 if (!loop_vinfo)
2735 if (vect_print_dump_info (REPORT_DETAILS))
2736 fprintf (vect_dump, "bad loop form.");
2737 return NULL;
2740 /* Find all data references in the loop (which correspond to vdefs/vuses)
2741 and analyze their evolution in the loop.
2743 FORNOW: Handle only simple, array references, which
2744 alignment can be forced, and aligned pointer-references. */
2746 ok = vect_analyze_data_refs (loop_vinfo);
2747 if (!ok)
2749 if (vect_print_dump_info (REPORT_DETAILS))
2750 fprintf (vect_dump, "bad data references.");
2751 destroy_loop_vec_info (loop_vinfo);
2752 return NULL;
2755 /* Classify all cross-iteration scalar data-flow cycles.
2756 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2758 vect_analyze_scalar_cycles (loop_vinfo);
2760 vect_pattern_recog (loop_vinfo);
2762 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2764 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2765 if (!ok)
2767 if (vect_print_dump_info (REPORT_DETAILS))
2768 fprintf (vect_dump, "unexpected pattern.");
2769 destroy_loop_vec_info (loop_vinfo);
2770 return NULL;
2773 /* Analyze the alignment of the data-refs in the loop.
2774 Fail if a data reference is found that cannot be vectorized. */
2776 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2777 if (!ok)
2779 if (vect_print_dump_info (REPORT_DETAILS))
2780 fprintf (vect_dump, "bad data alignment.");
2781 destroy_loop_vec_info (loop_vinfo);
2782 return NULL;
2785 ok = vect_determine_vectorization_factor (loop_vinfo);
2786 if (!ok)
2788 if (vect_print_dump_info (REPORT_DETAILS))
2789 fprintf (vect_dump, "can't determine vectorization factor.");
2790 destroy_loop_vec_info (loop_vinfo);
2791 return NULL;
2794 /* Analyze data dependences between the data-refs in the loop.
2795 FORNOW: fail at the first data dependence that we encounter. */
2797 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2798 if (!ok)
2800 if (vect_print_dump_info (REPORT_DETAILS))
2801 fprintf (vect_dump, "bad data dependence.");
2802 destroy_loop_vec_info (loop_vinfo);
2803 return NULL;
2806 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2807 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2809 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2810 if (!ok)
2812 if (vect_print_dump_info (REPORT_DETAILS))
2813 fprintf (vect_dump, "bad data access.");
2814 destroy_loop_vec_info (loop_vinfo);
2815 return NULL;
2818 /* This pass will decide on using loop versioning and/or loop peeling in
2819 order to enhance the alignment of data references in the loop. */
2821 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2822 if (!ok)
2824 if (vect_print_dump_info (REPORT_DETAILS))
2825 fprintf (vect_dump, "bad data alignment.");
2826 destroy_loop_vec_info (loop_vinfo);
2827 return NULL;
2830 /* Scan all the operations in the loop and make sure they are
2831 vectorizable. */
2833 ok = vect_analyze_operations (loop_vinfo);
2834 if (!ok)
2836 if (vect_print_dump_info (REPORT_DETAILS))
2837 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2838 destroy_loop_vec_info (loop_vinfo);
2839 return NULL;
2842 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2844 return loop_vinfo;