Daily bump.
[official-gcc.git] / gcc / tree-vect-analyze.c
blob9f9ea823f20dbba5fcc03fbdf835c67ffc91cec1
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41 #include "toplev.h"
42 #include "recog.h"
44 static bool vect_can_advance_ivs_p (loop_vec_info);
46 /* Function vect_determine_vectorization_factor
48 Determine the vectorization factor (VF). VF is the number of data elements
49 that are operated upon in parallel in a single iteration of the vectorized
50 loop. For example, when vectorizing a loop that operates on 4byte elements,
51 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
52 elements can fit in a single vector register.
54 We currently support vectorization of loops in which all types operated upon
55 are of the same size. Therefore this function currently sets VF according to
56 the size of the types operated upon, and fails if there are multiple sizes
57 in the loop.
59 VF is also the factor by which the loop iterations are strip-mined, e.g.:
60 original loop:
61 for (i=0; i<N; i++){
62 a[i] = b[i] + c[i];
65 vectorized loop:
66 for (i=0; i<N; i+=VF){
67 a[i:VF] = b[i:VF] + c[i:VF];
71 static bool
72 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
74 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
75 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
76 int nbbs = loop->num_nodes;
77 block_stmt_iterator si;
78 unsigned int vectorization_factor = 0;
79 tree scalar_type;
80 tree phi;
81 tree vectype;
82 unsigned int nunits;
83 stmt_vec_info stmt_info;
84 int i;
86 if (vect_print_dump_info (REPORT_DETAILS))
87 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
89 for (i = 0; i < nbbs; i++)
91 basic_block bb = bbs[i];
93 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
95 stmt_info = vinfo_for_stmt (phi);
96 if (vect_print_dump_info (REPORT_DETAILS))
98 fprintf (vect_dump, "==> examining phi: ");
99 print_generic_expr (vect_dump, phi, TDF_SLIM);
102 gcc_assert (stmt_info);
104 if (STMT_VINFO_RELEVANT_P (stmt_info))
106 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
107 scalar_type = TREE_TYPE (PHI_RESULT (phi));
109 if (vect_print_dump_info (REPORT_DETAILS))
111 fprintf (vect_dump, "get vectype for scalar type: ");
112 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
115 vectype = get_vectype_for_scalar_type (scalar_type);
116 if (!vectype)
118 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
120 fprintf (vect_dump,
121 "not vectorized: unsupported data-type ");
122 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
124 return false;
126 STMT_VINFO_VECTYPE (stmt_info) = vectype;
128 if (vect_print_dump_info (REPORT_DETAILS))
130 fprintf (vect_dump, "vectype: ");
131 print_generic_expr (vect_dump, vectype, TDF_SLIM);
134 nunits = TYPE_VECTOR_SUBPARTS (vectype);
135 if (vect_print_dump_info (REPORT_DETAILS))
136 fprintf (vect_dump, "nunits = %d", nunits);
138 if (!vectorization_factor
139 || (nunits > vectorization_factor))
140 vectorization_factor = nunits;
144 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
146 tree stmt = bsi_stmt (si);
147 stmt_info = vinfo_for_stmt (stmt);
149 if (vect_print_dump_info (REPORT_DETAILS))
151 fprintf (vect_dump, "==> examining statement: ");
152 print_generic_expr (vect_dump, stmt, TDF_SLIM);
155 gcc_assert (stmt_info);
157 /* skip stmts which do not need to be vectorized. */
158 if (!STMT_VINFO_RELEVANT_P (stmt_info)
159 && !STMT_VINFO_LIVE_P (stmt_info))
161 if (vect_print_dump_info (REPORT_DETAILS))
162 fprintf (vect_dump, "skip.");
163 continue;
166 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
168 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
170 fprintf (vect_dump, "not vectorized: irregular stmt.");
171 print_generic_expr (vect_dump, stmt, TDF_SLIM);
173 return false;
176 if (!GIMPLE_STMT_P (stmt)
177 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
179 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
181 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
182 print_generic_expr (vect_dump, stmt, TDF_SLIM);
184 return false;
187 if (STMT_VINFO_VECTYPE (stmt_info))
189 /* The only case when a vectype had been already set is for stmts
190 that contain a dataref, or for "pattern-stmts" (stmts generated
191 by the vectorizer to represent/replace a certain idiom). */
192 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
193 || is_pattern_stmt_p (stmt_info));
194 vectype = STMT_VINFO_VECTYPE (stmt_info);
196 else
198 tree operation;
200 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
201 && !is_pattern_stmt_p (stmt_info));
203 /* We generally set the vectype according to the type of the
204 result (lhs).
205 For stmts whose result-type is different than the type of the
206 arguments (e.g. demotion, promotion), vectype will be reset
207 appropriately (later). Note that we have to visit the smallest
208 datatype in this function, because that determines the VF.
209 If the smallest datatype in the loop is present only as the
210 rhs of a promotion operation - we'd miss it here.
211 Such a case, where a variable of this datatype does not appear
212 in the lhs anywhere in the loop, can only occur if it's an
213 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
214 to have been optimized away by invariant motion. However, we
215 cannot rely on invariant motion to always take invariants out
216 of the loop, and so in the case of promotion we also have to
217 check the rhs. */
218 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
220 operation = GIMPLE_STMT_OPERAND (stmt, 1);
221 if (TREE_CODE (operation) == NOP_EXPR
222 || TREE_CODE (operation) == CONVERT_EXPR
223 || TREE_CODE (operation) == WIDEN_MULT_EXPR
224 || TREE_CODE (operation) == FLOAT_EXPR)
226 tree rhs_type = TREE_TYPE (TREE_OPERAND (operation, 0));
227 if (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)) <
228 TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)))
229 scalar_type = rhs_type;
232 if (vect_print_dump_info (REPORT_DETAILS))
234 fprintf (vect_dump, "get vectype for scalar type: ");
235 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
238 vectype = get_vectype_for_scalar_type (scalar_type);
239 if (!vectype)
241 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
243 fprintf (vect_dump,
244 "not vectorized: unsupported data-type ");
245 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
247 return false;
249 STMT_VINFO_VECTYPE (stmt_info) = vectype;
252 if (vect_print_dump_info (REPORT_DETAILS))
254 fprintf (vect_dump, "vectype: ");
255 print_generic_expr (vect_dump, vectype, TDF_SLIM);
258 nunits = TYPE_VECTOR_SUBPARTS (vectype);
259 if (vect_print_dump_info (REPORT_DETAILS))
260 fprintf (vect_dump, "nunits = %d", nunits);
262 if (!vectorization_factor
263 || (nunits > vectorization_factor))
264 vectorization_factor = nunits;
269 /* TODO: Analyze cost. Decide if worth while to vectorize. */
270 if (vect_print_dump_info (REPORT_DETAILS))
271 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
272 if (vectorization_factor <= 1)
274 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
275 fprintf (vect_dump, "not vectorized: unsupported data-type");
276 return false;
278 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
280 return true;
284 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
285 the number of created vector stmts depends on the unrolling factor). However,
286 the actual number of vector stmts for every SLP node depends on VF which is
287 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
288 In this function we assume that the inside costs calculated in
289 vect_model_xxx_cost are linear in ncopies. */
291 static void
292 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
294 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
295 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
296 slp_instance instance;
298 if (vect_print_dump_info (REPORT_SLP))
299 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
301 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
302 /* We assume that costs are linear in ncopies. */
303 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
304 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
308 /* Function vect_analyze_operations.
310 Scan the loop stmts and make sure they are all vectorizable. */
312 static bool
313 vect_analyze_operations (loop_vec_info loop_vinfo)
315 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
316 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
317 int nbbs = loop->num_nodes;
318 block_stmt_iterator si;
319 unsigned int vectorization_factor = 0;
320 int i;
321 bool ok;
322 tree phi;
323 stmt_vec_info stmt_info;
324 bool need_to_vectorize = false;
325 int min_profitable_iters;
326 int min_scalar_loop_bound;
327 unsigned int th;
328 bool only_slp_in_loop = true;
330 if (vect_print_dump_info (REPORT_DETAILS))
331 fprintf (vect_dump, "=== vect_analyze_operations ===");
333 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
334 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
336 for (i = 0; i < nbbs; i++)
338 basic_block bb = bbs[i];
340 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
342 ok = true;
344 stmt_info = vinfo_for_stmt (phi);
345 if (vect_print_dump_info (REPORT_DETAILS))
347 fprintf (vect_dump, "examining phi: ");
348 print_generic_expr (vect_dump, phi, TDF_SLIM);
351 if (! is_loop_header_bb_p (bb))
353 /* inner-loop loop-closed exit phi in outer-loop vectorization
354 (i.e. a phi in the tail of the outer-loop).
355 FORNOW: we currently don't support the case that these phis
356 are not used in the outerloop, cause this case requires
357 to actually do something here. */
358 if (!STMT_VINFO_RELEVANT_P (stmt_info)
359 || STMT_VINFO_LIVE_P (stmt_info))
361 if (vect_print_dump_info (REPORT_DETAILS))
362 fprintf (vect_dump,
363 "Unsupported loop-closed phi in outer-loop.");
364 return false;
366 continue;
369 gcc_assert (stmt_info);
371 if (STMT_VINFO_LIVE_P (stmt_info))
373 /* FORNOW: not yet supported. */
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
375 fprintf (vect_dump, "not vectorized: value used after loop.");
376 return false;
379 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
380 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
382 /* A scalar-dependence cycle that we don't support. */
383 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
384 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
385 return false;
388 if (STMT_VINFO_RELEVANT_P (stmt_info))
390 need_to_vectorize = true;
391 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
392 ok = vectorizable_induction (phi, NULL, NULL);
395 if (!ok)
397 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
399 fprintf (vect_dump,
400 "not vectorized: relevant phi not supported: ");
401 print_generic_expr (vect_dump, phi, TDF_SLIM);
403 return false;
407 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
409 tree stmt = bsi_stmt (si);
410 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
411 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
413 if (vect_print_dump_info (REPORT_DETAILS))
415 fprintf (vect_dump, "==> examining statement: ");
416 print_generic_expr (vect_dump, stmt, TDF_SLIM);
419 gcc_assert (stmt_info);
421 /* skip stmts which do not need to be vectorized.
422 this is expected to include:
423 - the COND_EXPR which is the loop exit condition
424 - any LABEL_EXPRs in the loop
425 - computations that are used only for array indexing or loop
426 control */
428 if (!STMT_VINFO_RELEVANT_P (stmt_info)
429 && !STMT_VINFO_LIVE_P (stmt_info))
431 if (vect_print_dump_info (REPORT_DETAILS))
432 fprintf (vect_dump, "irrelevant.");
433 continue;
436 switch (STMT_VINFO_DEF_TYPE (stmt_info))
438 case vect_loop_def:
439 break;
441 case vect_reduction_def:
442 gcc_assert (relevance == vect_used_in_outer
443 || relevance == vect_used_in_outer_by_reduction
444 || relevance == vect_unused_in_loop);
445 break;
447 case vect_induction_def:
448 case vect_constant_def:
449 case vect_invariant_def:
450 case vect_unknown_def_type:
451 default:
452 gcc_unreachable ();
455 if (STMT_VINFO_RELEVANT_P (stmt_info))
457 gcc_assert (GIMPLE_STMT_P (stmt)
458 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
459 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
460 need_to_vectorize = true;
463 ok = true;
464 if (STMT_VINFO_RELEVANT_P (stmt_info)
465 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
466 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
467 || vectorizable_type_demotion (stmt, NULL, NULL)
468 || vectorizable_conversion (stmt, NULL, NULL, NULL)
469 || vectorizable_operation (stmt, NULL, NULL, NULL)
470 || vectorizable_assignment (stmt, NULL, NULL, NULL)
471 || vectorizable_load (stmt, NULL, NULL, NULL)
472 || vectorizable_call (stmt, NULL, NULL)
473 || vectorizable_store (stmt, NULL, NULL, NULL)
474 || vectorizable_condition (stmt, NULL, NULL)
475 || vectorizable_reduction (stmt, NULL, NULL));
477 if (!ok)
479 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
481 fprintf (vect_dump, "not vectorized: relevant stmt not ");
482 fprintf (vect_dump, "supported: ");
483 print_generic_expr (vect_dump, stmt, TDF_SLIM);
485 return false;
488 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
489 need extra handling, except for vectorizable reductions. */
490 if (STMT_VINFO_LIVE_P (stmt_info)
491 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
492 ok = vectorizable_live_operation (stmt, NULL, NULL);
494 if (!ok)
496 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
498 fprintf (vect_dump, "not vectorized: live stmt not ");
499 fprintf (vect_dump, "supported: ");
500 print_generic_expr (vect_dump, stmt, TDF_SLIM);
502 return false;
505 if (!PURE_SLP_STMT (stmt_info))
507 /* STMT needs loop-based vectorization. */
508 only_slp_in_loop = false;
510 /* Groups of strided accesses whose size is not a power of 2 are
511 not vectorizable yet using loop-vectorization. Therefore, if
512 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
513 both SLPed and loop-based vectorzed), the loop cannot be
514 vectorized. */
515 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
516 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
517 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
519 if (vect_print_dump_info (REPORT_DETAILS))
521 fprintf (vect_dump, "not vectorized: the size of group "
522 "of strided accesses is not a power of 2");
523 print_generic_expr (vect_dump, stmt, TDF_SLIM);
525 return false;
528 } /* stmts in bb */
529 } /* bbs */
531 /* All operations in the loop are either irrelevant (deal with loop
532 control, or dead), or only used outside the loop and can be moved
533 out of the loop (e.g. invariants, inductions). The loop can be
534 optimized away by scalar optimizations. We're better off not
535 touching this loop. */
536 if (!need_to_vectorize)
538 if (vect_print_dump_info (REPORT_DETAILS))
539 fprintf (vect_dump,
540 "All the computation can be taken out of the loop.");
541 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
542 fprintf (vect_dump,
543 "not vectorized: redundant loop. no profit to vectorize.");
544 return false;
547 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
548 vectorization factor of the loop is the unrolling factor required by the
549 SLP instances. If that unrolling factor is 1, we say, that we perform
550 pure SLP on loop - cross iteration parallelism is not exploited. */
551 if (only_slp_in_loop)
552 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
553 else
554 vectorization_factor = least_common_multiple (vectorization_factor,
555 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
557 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
559 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
560 && vect_print_dump_info (REPORT_DETAILS))
561 fprintf (vect_dump,
562 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
563 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
565 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
566 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
568 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
569 fprintf (vect_dump, "not vectorized: iteration count too small.");
570 if (vect_print_dump_info (REPORT_DETAILS))
571 fprintf (vect_dump,"not vectorized: iteration count smaller than "
572 "vectorization factor.");
573 return false;
576 /* Analyze cost. Decide if worth while to vectorize. */
578 /* Once VF is set, SLP costs should be updated since the number of created
579 vector stmts depends on VF. */
580 vect_update_slp_costs_according_to_vf (loop_vinfo);
582 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
583 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
585 if (min_profitable_iters < 0)
587 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
588 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
589 if (vect_print_dump_info (REPORT_DETAILS))
590 fprintf (vect_dump, "not vectorized: vector version will never be "
591 "profitable.");
592 return false;
595 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
596 * vectorization_factor) - 1);
598 /* Use the cost model only if it is more conservative than user specified
599 threshold. */
601 th = (unsigned) min_scalar_loop_bound;
602 if (min_profitable_iters
603 && (!min_scalar_loop_bound
604 || min_profitable_iters > min_scalar_loop_bound))
605 th = (unsigned) min_profitable_iters;
607 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
608 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
610 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
611 fprintf (vect_dump, "not vectorized: vectorization not "
612 "profitable.");
613 if (vect_print_dump_info (REPORT_DETAILS))
614 fprintf (vect_dump, "not vectorized: iteration count smaller than "
615 "user specified loop bound parameter or minimum "
616 "profitable iterations (whichever is more conservative).");
617 return false;
620 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
621 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
622 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
624 if (vect_print_dump_info (REPORT_DETAILS))
625 fprintf (vect_dump, "epilog loop required.");
626 if (!vect_can_advance_ivs_p (loop_vinfo))
628 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
629 fprintf (vect_dump,
630 "not vectorized: can't create epilog loop 1.");
631 return false;
633 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
635 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
636 fprintf (vect_dump,
637 "not vectorized: can't create epilog loop 2.");
638 return false;
642 return true;
646 /* Function exist_non_indexing_operands_for_use_p
648 USE is one of the uses attached to STMT. Check if USE is
649 used in STMT for anything other than indexing an array. */
651 static bool
652 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
654 tree operand;
655 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
657 /* USE corresponds to some operand in STMT. If there is no data
658 reference in STMT, then any operand that corresponds to USE
659 is not indexing an array. */
660 if (!STMT_VINFO_DATA_REF (stmt_info))
661 return true;
663 /* STMT has a data_ref. FORNOW this means that its of one of
664 the following forms:
665 -1- ARRAY_REF = var
666 -2- var = ARRAY_REF
667 (This should have been verified in analyze_data_refs).
669 'var' in the second case corresponds to a def, not a use,
670 so USE cannot correspond to any operands that are not used
671 for array indexing.
673 Therefore, all we need to check is if STMT falls into the
674 first case, and whether var corresponds to USE. */
676 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
677 return false;
679 operand = GIMPLE_STMT_OPERAND (stmt, 1);
681 if (TREE_CODE (operand) != SSA_NAME)
682 return false;
684 if (operand == use)
685 return true;
687 return false;
691 /* Function vect_analyze_scalar_cycles_1.
693 Examine the cross iteration def-use cycles of scalar variables
694 in LOOP. LOOP_VINFO represents the loop that is noe being
695 considered for vectorization (can be LOOP, or an outer-loop
696 enclosing LOOP). */
698 static void
699 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
701 tree phi;
702 basic_block bb = loop->header;
703 tree dumy;
704 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
706 if (vect_print_dump_info (REPORT_DETAILS))
707 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
709 /* First - identify all inductions. */
710 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
712 tree access_fn = NULL;
713 tree def = PHI_RESULT (phi);
714 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
716 if (vect_print_dump_info (REPORT_DETAILS))
718 fprintf (vect_dump, "Analyze phi: ");
719 print_generic_expr (vect_dump, phi, TDF_SLIM);
722 /* Skip virtual phi's. The data dependences that are associated with
723 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
724 if (!is_gimple_reg (SSA_NAME_VAR (def)))
725 continue;
727 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
729 /* Analyze the evolution function. */
730 access_fn = analyze_scalar_evolution (loop, def);
731 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
733 fprintf (vect_dump, "Access function of PHI: ");
734 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
737 if (!access_fn
738 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
740 VEC_safe_push (tree, heap, worklist, phi);
741 continue;
744 if (vect_print_dump_info (REPORT_DETAILS))
745 fprintf (vect_dump, "Detected induction.");
746 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
750 /* Second - identify all reductions. */
751 while (VEC_length (tree, worklist) > 0)
753 tree phi = VEC_pop (tree, worklist);
754 tree def = PHI_RESULT (phi);
755 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
756 tree reduc_stmt;
758 if (vect_print_dump_info (REPORT_DETAILS))
760 fprintf (vect_dump, "Analyze phi: ");
761 print_generic_expr (vect_dump, phi, TDF_SLIM);
764 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
765 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
767 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
768 if (reduc_stmt)
770 if (vect_print_dump_info (REPORT_DETAILS))
771 fprintf (vect_dump, "Detected reduction.");
772 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
773 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
774 vect_reduction_def;
776 else
777 if (vect_print_dump_info (REPORT_DETAILS))
778 fprintf (vect_dump, "Unknown def-use cycle pattern.");
781 VEC_free (tree, heap, worklist);
782 return;
786 /* Function vect_analyze_scalar_cycles.
788 Examine the cross iteration def-use cycles of scalar variables, by
789 analyzing the loop-header PHIs of scalar variables; Classify each
790 cycle as one of the following: invariant, induction, reduction, unknown.
791 We do that for the loop represented by LOOP_VINFO, and also to its
792 inner-loop, if exists.
793 Examples for scalar cycles:
795 Example1: reduction:
797 loop1:
798 for (i=0; i<N; i++)
799 sum += a[i];
801 Example2: induction:
803 loop2:
804 for (i=0; i<N; i++)
805 a[i] = i; */
807 static void
808 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
810 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
812 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
814 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
815 Reductions in such inner-loop therefore have different properties than
816 the reductions in the nest that gets vectorized:
817 1. When vectorized, they are executed in the same order as in the original
818 scalar loop, so we can't change the order of computation when
819 vectorizing them.
820 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
821 current checks are too strict. */
823 if (loop->inner)
824 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
828 /* Function vect_insert_into_interleaving_chain.
830 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
832 static void
833 vect_insert_into_interleaving_chain (struct data_reference *dra,
834 struct data_reference *drb)
836 tree prev, next, next_init;
837 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
838 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
840 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
841 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
842 while (next)
844 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
845 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
847 /* Insert here. */
848 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
849 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
850 return;
852 prev = next;
853 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
856 /* We got to the end of the list. Insert here. */
857 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
858 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
862 /* Function vect_update_interleaving_chain.
864 For two data-refs DRA and DRB that are a part of a chain interleaved data
865 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
867 There are four possible cases:
868 1. New stmts - both DRA and DRB are not a part of any chain:
869 FIRST_DR = DRB
870 NEXT_DR (DRB) = DRA
871 2. DRB is a part of a chain and DRA is not:
872 no need to update FIRST_DR
873 no need to insert DRB
874 insert DRA according to init
875 3. DRA is a part of a chain and DRB is not:
876 if (init of FIRST_DR > init of DRB)
877 FIRST_DR = DRB
878 NEXT(FIRST_DR) = previous FIRST_DR
879 else
880 insert DRB according to its init
881 4. both DRA and DRB are in some interleaving chains:
882 choose the chain with the smallest init of FIRST_DR
883 insert the nodes of the second chain into the first one. */
885 static void
886 vect_update_interleaving_chain (struct data_reference *drb,
887 struct data_reference *dra)
889 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
890 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
891 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
892 tree node, prev, next, node_init, first_stmt;
894 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
895 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
897 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
898 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
899 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
900 return;
903 /* 2. DRB is a part of a chain and DRA is not. */
904 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
906 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
907 /* Insert DRA into the chain of DRB. */
908 vect_insert_into_interleaving_chain (dra, drb);
909 return;
912 /* 3. DRA is a part of a chain and DRB is not. */
913 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
915 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
916 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
917 old_first_stmt)));
918 tree tmp;
920 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
922 /* DRB's init is smaller than the init of the stmt previously marked
923 as the first stmt of the interleaving chain of DRA. Therefore, we
924 update FIRST_STMT and put DRB in the head of the list. */
925 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
926 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
928 /* Update all the stmts in the list to point to the new FIRST_STMT. */
929 tmp = old_first_stmt;
930 while (tmp)
932 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
933 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
936 else
938 /* Insert DRB in the list of DRA. */
939 vect_insert_into_interleaving_chain (drb, dra);
940 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
942 return;
945 /* 4. both DRA and DRB are in some interleaving chains. */
946 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
947 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
948 if (first_a == first_b)
949 return;
950 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
951 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
953 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
955 /* Insert the nodes of DRA chain into the DRB chain.
956 After inserting a node, continue from this node of the DRB chain (don't
957 start from the beginning. */
958 node = DR_GROUP_FIRST_DR (stmtinfo_a);
959 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
960 first_stmt = first_b;
962 else
964 /* Insert the nodes of DRB chain into the DRA chain.
965 After inserting a node, continue from this node of the DRA chain (don't
966 start from the beginning. */
967 node = DR_GROUP_FIRST_DR (stmtinfo_b);
968 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
969 first_stmt = first_a;
972 while (node)
974 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
975 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
976 while (next)
978 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
979 if (tree_int_cst_compare (next_init, node_init) > 0)
981 /* Insert here. */
982 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
983 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
984 prev = node;
985 break;
987 prev = next;
988 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
990 if (!next)
992 /* We got to the end of the list. Insert here. */
993 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
994 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
995 prev = node;
997 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
998 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1003 /* Function vect_equal_offsets.
1005 Check if OFFSET1 and OFFSET2 are identical expressions. */
1007 static bool
1008 vect_equal_offsets (tree offset1, tree offset2)
1010 bool res0, res1;
1012 STRIP_NOPS (offset1);
1013 STRIP_NOPS (offset2);
1015 if (offset1 == offset2)
1016 return true;
1018 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1019 || !BINARY_CLASS_P (offset1)
1020 || !BINARY_CLASS_P (offset2))
1021 return false;
1023 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1024 TREE_OPERAND (offset2, 0));
1025 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1026 TREE_OPERAND (offset2, 1));
1028 return (res0 && res1);
1032 /* Function vect_check_interleaving.
1034 Check if DRA and DRB are a part of interleaving. In case they are, insert
1035 DRA and DRB in an interleaving chain. */
1037 static void
1038 vect_check_interleaving (struct data_reference *dra,
1039 struct data_reference *drb)
1041 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1043 /* Check that the data-refs have same first location (except init) and they
1044 are both either store or load (not load and store). */
1045 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1046 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1047 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1048 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1049 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1050 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1051 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1052 || DR_IS_READ (dra) != DR_IS_READ (drb))
1053 return;
1055 /* Check:
1056 1. data-refs are of the same type
1057 2. their steps are equal
1058 3. the step is greater than the difference between data-refs' inits */
1059 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1060 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1062 if (type_size_a != type_size_b
1063 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
1064 return;
1066 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1067 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1068 step = TREE_INT_CST_LOW (DR_STEP (dra));
1070 if (init_a > init_b)
1072 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1073 and DRB is accessed before DRA. */
1074 diff_mod_size = (init_a - init_b) % type_size_a;
1076 if ((init_a - init_b) > step)
1077 return;
1079 if (diff_mod_size == 0)
1081 vect_update_interleaving_chain (drb, dra);
1082 if (vect_print_dump_info (REPORT_DR_DETAILS))
1084 fprintf (vect_dump, "Detected interleaving ");
1085 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1086 fprintf (vect_dump, " and ");
1087 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1089 return;
1092 else
1094 /* If init_b == init_a + the size of the type * k, we have an
1095 interleaving, and DRA is accessed before DRB. */
1096 diff_mod_size = (init_b - init_a) % type_size_a;
1098 if ((init_b - init_a) > step)
1099 return;
1101 if (diff_mod_size == 0)
1103 vect_update_interleaving_chain (dra, drb);
1104 if (vect_print_dump_info (REPORT_DR_DETAILS))
1106 fprintf (vect_dump, "Detected interleaving ");
1107 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1108 fprintf (vect_dump, " and ");
1109 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1111 return;
1116 /* Check if data references pointed by DR_I and DR_J are same or
1117 belong to same interleaving group. Return FALSE if drs are
1118 different, otherwise return TRUE. */
1120 static bool
1121 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
1123 tree stmt_i = DR_STMT (dr_i);
1124 tree stmt_j = DR_STMT (dr_j);
1126 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
1127 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1128 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
1129 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1130 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
1131 return true;
1132 else
1133 return false;
1136 /* If address ranges represented by DDR_I and DDR_J are equal,
1137 return TRUE, otherwise return FALSE. */
1139 static bool
1140 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
1142 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
1143 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
1144 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
1145 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
1146 return true;
1147 else
1148 return false;
1151 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1152 tested at run-time. Return TRUE if DDR was successfully inserted.
1153 Return false if versioning is not supported. */
1155 static bool
1156 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1158 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1160 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
1161 return false;
1163 if (vect_print_dump_info (REPORT_DR_DETAILS))
1165 fprintf (vect_dump, "mark for run-time aliasing test between ");
1166 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1167 fprintf (vect_dump, " and ");
1168 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1171 if (optimize_size)
1173 if (vect_print_dump_info (REPORT_DR_DETAILS))
1174 fprintf (vect_dump, "versioning not supported when optimizing for size.");
1175 return false;
1178 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1179 if (loop->inner)
1181 if (vect_print_dump_info (REPORT_DR_DETAILS))
1182 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1183 return false;
1186 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1187 return true;
1190 /* Function vect_analyze_data_ref_dependence.
1192 Return TRUE if there (might) exist a dependence between a memory-reference
1193 DRA and a memory-reference DRB. When versioning for alias may check a
1194 dependence at run-time, return FALSE. */
1196 static bool
1197 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1198 loop_vec_info loop_vinfo)
1200 unsigned int i;
1201 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1202 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1203 struct data_reference *dra = DDR_A (ddr);
1204 struct data_reference *drb = DDR_B (ddr);
1205 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1206 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1207 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1208 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1209 lambda_vector dist_v;
1210 unsigned int loop_depth;
1212 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1214 /* Independent data accesses. */
1215 vect_check_interleaving (dra, drb);
1216 return false;
1219 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1220 return false;
1222 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1224 if (vect_print_dump_info (REPORT_DR_DETAILS))
1226 fprintf (vect_dump,
1227 "versioning for alias required: can't determine dependence between ");
1228 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1229 fprintf (vect_dump, " and ");
1230 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1232 /* Add to list of ddrs that need to be tested at run-time. */
1233 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1236 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1238 if (vect_print_dump_info (REPORT_DR_DETAILS))
1240 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1241 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1242 fprintf (vect_dump, " and ");
1243 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1245 /* Add to list of ddrs that need to be tested at run-time. */
1246 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1249 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1250 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1252 int dist = dist_v[loop_depth];
1254 if (vect_print_dump_info (REPORT_DR_DETAILS))
1255 fprintf (vect_dump, "dependence distance = %d.", dist);
1257 /* Same loop iteration. */
1258 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1260 /* Two references with distance zero have the same alignment. */
1261 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1262 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1263 if (vect_print_dump_info (REPORT_ALIGNMENT))
1264 fprintf (vect_dump, "accesses have the same alignment.");
1265 if (vect_print_dump_info (REPORT_DR_DETAILS))
1267 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1268 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1269 fprintf (vect_dump, " and ");
1270 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1273 /* For interleaving, mark that there is a read-write dependency if
1274 necessary. We check before that one of the data-refs is store. */
1275 if (DR_IS_READ (dra))
1276 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1277 else
1279 if (DR_IS_READ (drb))
1280 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1283 continue;
1286 if (abs (dist) >= vectorization_factor
1287 || (dist > 0 && DDR_REVERSED_P (ddr)))
1289 /* Dependence distance does not create dependence, as far as
1290 vectorization is concerned, in this case. If DDR_REVERSED_P the
1291 order of the data-refs in DDR was reversed (to make distance
1292 vector positive), and the actual distance is negative. */
1293 if (vect_print_dump_info (REPORT_DR_DETAILS))
1294 fprintf (vect_dump, "dependence distance >= VF or negative.");
1295 continue;
1298 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1300 fprintf (vect_dump,
1301 "not vectorized, possible dependence "
1302 "between data-refs ");
1303 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1304 fprintf (vect_dump, " and ");
1305 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1308 return true;
1311 return false;
1314 /* Function vect_analyze_data_ref_dependences.
1316 Examine all the data references in the loop, and make sure there do not
1317 exist any data dependences between them. */
1319 static bool
1320 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1322 unsigned int i;
1323 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1324 struct data_dependence_relation *ddr;
1326 if (vect_print_dump_info (REPORT_DETAILS))
1327 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1329 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1330 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1331 return false;
1333 return true;
1337 /* Function vect_compute_data_ref_alignment
1339 Compute the misalignment of the data reference DR.
1341 Output:
1342 1. If during the misalignment computation it is found that the data reference
1343 cannot be vectorized then false is returned.
1344 2. DR_MISALIGNMENT (DR) is defined.
1346 FOR NOW: No analysis is actually performed. Misalignment is calculated
1347 only for trivial cases. TODO. */
1349 static bool
1350 vect_compute_data_ref_alignment (struct data_reference *dr)
1352 tree stmt = DR_STMT (dr);
1353 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1354 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1355 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1356 tree ref = DR_REF (dr);
1357 tree vectype;
1358 tree base, base_addr;
1359 bool base_aligned;
1360 tree misalign;
1361 tree aligned_to, alignment;
1363 if (vect_print_dump_info (REPORT_DETAILS))
1364 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1366 /* Initialize misalignment to unknown. */
1367 SET_DR_MISALIGNMENT (dr, -1);
1369 misalign = DR_INIT (dr);
1370 aligned_to = DR_ALIGNED_TO (dr);
1371 base_addr = DR_BASE_ADDRESS (dr);
1373 /* In case the dataref is in an inner-loop of the loop that is being
1374 vectorized (LOOP), we use the base and misalignment information
1375 relative to the outer-loop (LOOP). This is ok only if the misalignment
1376 stays the same throughout the execution of the inner-loop, which is why
1377 we have to check that the stride of the dataref in the inner-loop evenly
1378 divides by the vector size. */
1379 if (nested_in_vect_loop_p (loop, stmt))
1381 tree step = DR_STEP (dr);
1382 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1384 if (dr_step % UNITS_PER_SIMD_WORD == 0)
1386 if (vect_print_dump_info (REPORT_ALIGNMENT))
1387 fprintf (vect_dump, "inner step divides the vector-size.");
1388 misalign = STMT_VINFO_DR_INIT (stmt_info);
1389 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1390 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1392 else
1394 if (vect_print_dump_info (REPORT_ALIGNMENT))
1395 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1396 misalign = NULL_TREE;
1400 base = build_fold_indirect_ref (base_addr);
1401 vectype = STMT_VINFO_VECTYPE (stmt_info);
1402 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1404 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1405 || !misalign)
1407 if (vect_print_dump_info (REPORT_ALIGNMENT))
1409 fprintf (vect_dump, "Unknown alignment for access: ");
1410 print_generic_expr (vect_dump, base, TDF_SLIM);
1412 return true;
1415 if ((DECL_P (base)
1416 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1417 alignment) >= 0)
1418 || (TREE_CODE (base_addr) == SSA_NAME
1419 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1420 TREE_TYPE (base_addr)))),
1421 alignment) >= 0))
1422 base_aligned = true;
1423 else
1424 base_aligned = false;
1426 if (!base_aligned)
1428 /* Do not change the alignment of global variables if
1429 flag_section_anchors is enabled. */
1430 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1431 || (TREE_STATIC (base) && flag_section_anchors))
1433 if (vect_print_dump_info (REPORT_DETAILS))
1435 fprintf (vect_dump, "can't force alignment of ref: ");
1436 print_generic_expr (vect_dump, ref, TDF_SLIM);
1438 return true;
1441 /* Force the alignment of the decl.
1442 NOTE: This is the only change to the code we make during
1443 the analysis phase, before deciding to vectorize the loop. */
1444 if (vect_print_dump_info (REPORT_DETAILS))
1445 fprintf (vect_dump, "force alignment");
1446 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1447 DECL_USER_ALIGN (base) = 1;
1450 /* At this point we assume that the base is aligned. */
1451 gcc_assert (base_aligned
1452 || (TREE_CODE (base) == VAR_DECL
1453 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1455 /* Modulo alignment. */
1456 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1458 if (!host_integerp (misalign, 1))
1460 /* Negative or overflowed misalignment value. */
1461 if (vect_print_dump_info (REPORT_DETAILS))
1462 fprintf (vect_dump, "unexpected misalign value");
1463 return false;
1466 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1468 if (vect_print_dump_info (REPORT_DETAILS))
1470 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1471 print_generic_expr (vect_dump, ref, TDF_SLIM);
1474 return true;
1478 /* Function vect_compute_data_refs_alignment
1480 Compute the misalignment of data references in the loop.
1481 Return FALSE if a data reference is found that cannot be vectorized. */
1483 static bool
1484 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1486 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1487 struct data_reference *dr;
1488 unsigned int i;
1490 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1491 if (!vect_compute_data_ref_alignment (dr))
1492 return false;
1494 return true;
1498 /* Function vect_update_misalignment_for_peel
1500 DR - the data reference whose misalignment is to be adjusted.
1501 DR_PEEL - the data reference whose misalignment is being made
1502 zero in the vector loop by the peel.
1503 NPEEL - the number of iterations in the peel loop if the misalignment
1504 of DR_PEEL is known at compile time. */
1506 static void
1507 vect_update_misalignment_for_peel (struct data_reference *dr,
1508 struct data_reference *dr_peel, int npeel)
1510 unsigned int i;
1511 VEC(dr_p,heap) *same_align_drs;
1512 struct data_reference *current_dr;
1513 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1514 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1515 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1516 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1518 /* For interleaved data accesses the step in the loop must be multiplied by
1519 the size of the interleaving group. */
1520 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1521 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1522 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1523 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1525 /* It can be assumed that the data refs with the same alignment as dr_peel
1526 are aligned in the vector loop. */
1527 same_align_drs
1528 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1529 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1531 if (current_dr != dr)
1532 continue;
1533 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1534 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1535 SET_DR_MISALIGNMENT (dr, 0);
1536 return;
1539 if (known_alignment_for_access_p (dr)
1540 && known_alignment_for_access_p (dr_peel))
1542 int misal = DR_MISALIGNMENT (dr);
1543 misal += npeel * dr_size;
1544 misal %= UNITS_PER_SIMD_WORD;
1545 SET_DR_MISALIGNMENT (dr, misal);
1546 return;
1549 if (vect_print_dump_info (REPORT_DETAILS))
1550 fprintf (vect_dump, "Setting misalignment to -1.");
1551 SET_DR_MISALIGNMENT (dr, -1);
1555 /* Function vect_verify_datarefs_alignment
1557 Return TRUE if all data references in the loop can be
1558 handled with respect to alignment. */
1560 static bool
1561 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1563 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1564 struct data_reference *dr;
1565 enum dr_alignment_support supportable_dr_alignment;
1566 unsigned int i;
1568 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1570 tree stmt = DR_STMT (dr);
1571 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1573 /* For interleaving, only the alignment of the first access matters. */
1574 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1575 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1576 continue;
1578 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1579 if (!supportable_dr_alignment)
1581 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1583 if (DR_IS_READ (dr))
1584 fprintf (vect_dump,
1585 "not vectorized: unsupported unaligned load.");
1586 else
1587 fprintf (vect_dump,
1588 "not vectorized: unsupported unaligned store.");
1590 return false;
1592 if (supportable_dr_alignment != dr_aligned
1593 && vect_print_dump_info (REPORT_ALIGNMENT))
1594 fprintf (vect_dump, "Vectorizing an unaligned access.");
1596 return true;
1600 /* Function vector_alignment_reachable_p
1602 Return true if vector alignment for DR is reachable by peeling
1603 a few loop iterations. Return false otherwise. */
1605 static bool
1606 vector_alignment_reachable_p (struct data_reference *dr)
1608 tree stmt = DR_STMT (dr);
1609 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1610 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1612 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1614 /* For interleaved access we peel only if number of iterations in
1615 the prolog loop ({VF - misalignment}), is a multiple of the
1616 number of the interleaved accesses. */
1617 int elem_size, mis_in_elements;
1618 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1620 /* FORNOW: handle only known alignment. */
1621 if (!known_alignment_for_access_p (dr))
1622 return false;
1624 elem_size = UNITS_PER_SIMD_WORD / nelements;
1625 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1627 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1628 return false;
1631 /* If misalignment is known at the compile time then allow peeling
1632 only if natural alignment is reachable through peeling. */
1633 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1635 HOST_WIDE_INT elmsize =
1636 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1637 if (vect_print_dump_info (REPORT_DETAILS))
1639 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1640 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1642 if (DR_MISALIGNMENT (dr) % elmsize)
1644 if (vect_print_dump_info (REPORT_DETAILS))
1645 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1646 return false;
1650 if (!known_alignment_for_access_p (dr))
1652 tree type = (TREE_TYPE (DR_REF (dr)));
1653 tree ba = DR_BASE_OBJECT (dr);
1654 bool is_packed = false;
1656 if (ba)
1657 is_packed = contains_packed_reference (ba);
1659 if (vect_print_dump_info (REPORT_DETAILS))
1660 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1661 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1662 return true;
1663 else
1664 return false;
1667 return true;
1670 /* Function vect_enhance_data_refs_alignment
1672 This pass will use loop versioning and loop peeling in order to enhance
1673 the alignment of data references in the loop.
1675 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1676 original loop is to be vectorized; Any other loops that are created by
1677 the transformations performed in this pass - are not supposed to be
1678 vectorized. This restriction will be relaxed.
1680 This pass will require a cost model to guide it whether to apply peeling
1681 or versioning or a combination of the two. For example, the scheme that
1682 intel uses when given a loop with several memory accesses, is as follows:
1683 choose one memory access ('p') which alignment you want to force by doing
1684 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1685 other accesses are not necessarily aligned, or (2) use loop versioning to
1686 generate one loop in which all accesses are aligned, and another loop in
1687 which only 'p' is necessarily aligned.
1689 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1690 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1691 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1693 Devising a cost model is the most critical aspect of this work. It will
1694 guide us on which access to peel for, whether to use loop versioning, how
1695 many versions to create, etc. The cost model will probably consist of
1696 generic considerations as well as target specific considerations (on
1697 powerpc for example, misaligned stores are more painful than misaligned
1698 loads).
1700 Here are the general steps involved in alignment enhancements:
1702 -- original loop, before alignment analysis:
1703 for (i=0; i<N; i++){
1704 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1705 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1708 -- After vect_compute_data_refs_alignment:
1709 for (i=0; i<N; i++){
1710 x = q[i]; # DR_MISALIGNMENT(q) = 3
1711 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1714 -- Possibility 1: we do loop versioning:
1715 if (p is aligned) {
1716 for (i=0; i<N; i++){ # loop 1A
1717 x = q[i]; # DR_MISALIGNMENT(q) = 3
1718 p[i] = y; # DR_MISALIGNMENT(p) = 0
1721 else {
1722 for (i=0; i<N; i++){ # loop 1B
1723 x = q[i]; # DR_MISALIGNMENT(q) = 3
1724 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1728 -- Possibility 2: we do loop peeling:
1729 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1730 x = q[i];
1731 p[i] = y;
1733 for (i = 3; i < N; i++){ # loop 2A
1734 x = q[i]; # DR_MISALIGNMENT(q) = 0
1735 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1738 -- Possibility 3: combination of loop peeling and versioning:
1739 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1740 x = q[i];
1741 p[i] = y;
1743 if (p is aligned) {
1744 for (i = 3; i<N; i++){ # loop 3A
1745 x = q[i]; # DR_MISALIGNMENT(q) = 0
1746 p[i] = y; # DR_MISALIGNMENT(p) = 0
1749 else {
1750 for (i = 3; i<N; i++){ # loop 3B
1751 x = q[i]; # DR_MISALIGNMENT(q) = 0
1752 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1756 These loops are later passed to loop_transform to be vectorized. The
1757 vectorizer will use the alignment information to guide the transformation
1758 (whether to generate regular loads/stores, or with special handling for
1759 misalignment). */
1761 static bool
1762 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1764 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1765 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1766 enum dr_alignment_support supportable_dr_alignment;
1767 struct data_reference *dr0 = NULL;
1768 struct data_reference *dr;
1769 unsigned int i;
1770 bool do_peeling = false;
1771 bool do_versioning = false;
1772 bool stat;
1773 tree stmt;
1774 stmt_vec_info stmt_info;
1775 int vect_versioning_for_alias_required;
1777 if (vect_print_dump_info (REPORT_DETAILS))
1778 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1780 /* While cost model enhancements are expected in the future, the high level
1781 view of the code at this time is as follows:
1783 A) If there is a misaligned write then see if peeling to align this write
1784 can make all data references satisfy vect_supportable_dr_alignment.
1785 If so, update data structures as needed and return true. Note that
1786 at this time vect_supportable_dr_alignment is known to return false
1787 for a misaligned write.
1789 B) If peeling wasn't possible and there is a data reference with an
1790 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1791 then see if loop versioning checks can be used to make all data
1792 references satisfy vect_supportable_dr_alignment. If so, update
1793 data structures as needed and return true.
1795 C) If neither peeling nor versioning were successful then return false if
1796 any data reference does not satisfy vect_supportable_dr_alignment.
1798 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1800 Note, Possibility 3 above (which is peeling and versioning together) is not
1801 being done at this time. */
1803 /* (1) Peeling to force alignment. */
1805 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1806 Considerations:
1807 + How many accesses will become aligned due to the peeling
1808 - How many accesses will become unaligned due to the peeling,
1809 and the cost of misaligned accesses.
1810 - The cost of peeling (the extra runtime checks, the increase
1811 in code size).
1813 The scheme we use FORNOW: peel to force the alignment of the first
1814 misaligned store in the loop.
1815 Rationale: misaligned stores are not yet supported.
1817 TODO: Use a cost model. */
1819 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1821 stmt = DR_STMT (dr);
1822 stmt_info = vinfo_for_stmt (stmt);
1824 /* For interleaving, only the alignment of the first access
1825 matters. */
1826 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1827 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1828 continue;
1830 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1832 do_peeling = vector_alignment_reachable_p (dr);
1833 if (do_peeling)
1834 dr0 = dr;
1835 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1836 fprintf (vect_dump, "vector alignment may not be reachable");
1837 break;
1841 vect_versioning_for_alias_required =
1842 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1844 /* Temporarily, if versioning for alias is required, we disable peeling
1845 until we support peeling and versioning. Often peeling for alignment
1846 will require peeling for loop-bound, which in turn requires that we
1847 know how to adjust the loop ivs after the loop. */
1848 if (vect_versioning_for_alias_required
1849 || !vect_can_advance_ivs_p (loop_vinfo)
1850 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1851 do_peeling = false;
1853 if (do_peeling)
1855 int mis;
1856 int npeel = 0;
1857 tree stmt = DR_STMT (dr0);
1858 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1859 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1860 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1862 if (known_alignment_for_access_p (dr0))
1864 /* Since it's known at compile time, compute the number of iterations
1865 in the peeled loop (the peeling factor) for use in updating
1866 DR_MISALIGNMENT values. The peeling factor is the vectorization
1867 factor minus the misalignment as an element count. */
1868 mis = DR_MISALIGNMENT (dr0);
1869 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1870 npeel = nelements - mis;
1872 /* For interleaved data access every iteration accesses all the
1873 members of the group, therefore we divide the number of iterations
1874 by the group size. */
1875 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1876 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1877 npeel /= DR_GROUP_SIZE (stmt_info);
1879 if (vect_print_dump_info (REPORT_DETAILS))
1880 fprintf (vect_dump, "Try peeling by %d", npeel);
1883 /* Ensure that all data refs can be vectorized after the peel. */
1884 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1886 int save_misalignment;
1888 if (dr == dr0)
1889 continue;
1891 stmt = DR_STMT (dr);
1892 stmt_info = vinfo_for_stmt (stmt);
1893 /* For interleaving, only the alignment of the first access
1894 matters. */
1895 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1896 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1897 continue;
1899 save_misalignment = DR_MISALIGNMENT (dr);
1900 vect_update_misalignment_for_peel (dr, dr0, npeel);
1901 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1902 SET_DR_MISALIGNMENT (dr, save_misalignment);
1904 if (!supportable_dr_alignment)
1906 do_peeling = false;
1907 break;
1911 if (do_peeling)
1913 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1914 If the misalignment of DR_i is identical to that of dr0 then set
1915 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1916 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1917 by the peeling factor times the element size of DR_i (MOD the
1918 vectorization factor times the size). Otherwise, the
1919 misalignment of DR_i must be set to unknown. */
1920 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1921 if (dr != dr0)
1922 vect_update_misalignment_for_peel (dr, dr0, npeel);
1924 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1925 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1926 SET_DR_MISALIGNMENT (dr0, 0);
1927 if (vect_print_dump_info (REPORT_ALIGNMENT))
1928 fprintf (vect_dump, "Alignment of access forced using peeling.");
1930 if (vect_print_dump_info (REPORT_DETAILS))
1931 fprintf (vect_dump, "Peeling for alignment will be applied.");
1933 stat = vect_verify_datarefs_alignment (loop_vinfo);
1934 gcc_assert (stat);
1935 return stat;
1940 /* (2) Versioning to force alignment. */
1942 /* Try versioning if:
1943 1) flag_tree_vect_loop_version is TRUE
1944 2) optimize_size is FALSE
1945 3) there is at least one unsupported misaligned data ref with an unknown
1946 misalignment, and
1947 4) all misaligned data refs with a known misalignment are supported, and
1948 5) the number of runtime alignment checks is within reason. */
1950 do_versioning =
1951 flag_tree_vect_loop_version
1952 && (!optimize_size)
1953 && (!loop->inner); /* FORNOW */
1955 if (do_versioning)
1957 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1959 stmt = DR_STMT (dr);
1960 stmt_info = vinfo_for_stmt (stmt);
1962 /* For interleaving, only the alignment of the first access
1963 matters. */
1964 if (aligned_access_p (dr)
1965 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1966 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1967 continue;
1969 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1971 if (!supportable_dr_alignment)
1973 tree stmt;
1974 int mask;
1975 tree vectype;
1977 if (known_alignment_for_access_p (dr)
1978 || VEC_length (tree,
1979 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1980 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1982 do_versioning = false;
1983 break;
1986 stmt = DR_STMT (dr);
1987 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1988 gcc_assert (vectype);
1990 /* The rightmost bits of an aligned address must be zeros.
1991 Construct the mask needed for this test. For example,
1992 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1993 mask must be 15 = 0xf. */
1994 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1996 /* FORNOW: use the same mask to test all potentially unaligned
1997 references in the loop. The vectorizer currently supports
1998 a single vector size, see the reference to
1999 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2000 vectorization factor is computed. */
2001 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2002 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2003 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2004 VEC_safe_push (tree, heap,
2005 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2006 DR_STMT (dr));
2010 /* Versioning requires at least one misaligned data reference. */
2011 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2012 do_versioning = false;
2013 else if (!do_versioning)
2014 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2017 if (do_versioning)
2019 VEC(tree,heap) *may_misalign_stmts
2020 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2021 tree stmt;
2023 /* It can now be assumed that the data references in the statements
2024 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2025 of the loop being vectorized. */
2026 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
2028 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2029 dr = STMT_VINFO_DATA_REF (stmt_info);
2030 SET_DR_MISALIGNMENT (dr, 0);
2031 if (vect_print_dump_info (REPORT_ALIGNMENT))
2032 fprintf (vect_dump, "Alignment of access forced using versioning.");
2035 if (vect_print_dump_info (REPORT_DETAILS))
2036 fprintf (vect_dump, "Versioning for alignment will be applied.");
2038 /* Peeling and versioning can't be done together at this time. */
2039 gcc_assert (! (do_peeling && do_versioning));
2041 stat = vect_verify_datarefs_alignment (loop_vinfo);
2042 gcc_assert (stat);
2043 return stat;
2046 /* This point is reached if neither peeling nor versioning is being done. */
2047 gcc_assert (! (do_peeling || do_versioning));
2049 stat = vect_verify_datarefs_alignment (loop_vinfo);
2050 return stat;
2054 /* Function vect_analyze_data_refs_alignment
2056 Analyze the alignment of the data-references in the loop.
2057 Return FALSE if a data reference is found that cannot be vectorized. */
2059 static bool
2060 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2062 if (vect_print_dump_info (REPORT_DETAILS))
2063 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2065 if (!vect_compute_data_refs_alignment (loop_vinfo))
2067 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2068 fprintf (vect_dump,
2069 "not vectorized: can't calculate alignment for data ref.");
2070 return false;
2073 return true;
2077 /* Analyze groups of strided accesses: check that DR belongs to a group of
2078 strided accesses of legal size, step, etc. Detect gaps, single element
2079 interleaving, and other special cases. Set strided access info.
2080 Collect groups of strided stores for further use in SLP analysis. */
2082 static bool
2083 vect_analyze_group_access (struct data_reference *dr)
2085 tree step = DR_STEP (dr);
2086 tree scalar_type = TREE_TYPE (DR_REF (dr));
2087 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2088 tree stmt = DR_STMT (dr);
2089 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2090 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2091 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2092 HOST_WIDE_INT stride;
2093 bool slp_impossible = false;
2095 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2096 interleaving group (including gaps). */
2097 stride = dr_step / type_size;
2099 /* Not consecutive access is possible only if it is a part of interleaving. */
2100 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2102 /* Check if it this DR is a part of interleaving, and is a single
2103 element of the group that is accessed in the loop. */
2105 /* Gaps are supported only for loads. STEP must be a multiple of the type
2106 size. The size of the group must be a power of 2. */
2107 if (DR_IS_READ (dr)
2108 && (dr_step % type_size) == 0
2109 && stride > 0
2110 && exact_log2 (stride) != -1)
2112 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2113 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2114 if (vect_print_dump_info (REPORT_DR_DETAILS))
2116 fprintf (vect_dump, "Detected single element interleaving %d ",
2117 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2118 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2119 fprintf (vect_dump, " step ");
2120 print_generic_expr (vect_dump, step, TDF_SLIM);
2122 return true;
2124 if (vect_print_dump_info (REPORT_DETAILS))
2125 fprintf (vect_dump, "not consecutive access");
2126 return false;
2129 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2131 /* First stmt in the interleaving chain. Check the chain. */
2132 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2133 struct data_reference *data_ref = dr;
2134 unsigned int count = 1;
2135 tree next_step;
2136 tree prev_init = DR_INIT (data_ref);
2137 tree prev = stmt;
2138 HOST_WIDE_INT diff, count_in_bytes;
2140 while (next)
2142 /* Skip same data-refs. In case that two or more stmts share data-ref
2143 (supported only for loads), we vectorize only the first stmt, and
2144 the rest get their vectorized loads from the first one. */
2145 if (!tree_int_cst_compare (DR_INIT (data_ref),
2146 DR_INIT (STMT_VINFO_DATA_REF (
2147 vinfo_for_stmt (next)))))
2149 if (!DR_IS_READ (data_ref))
2151 if (vect_print_dump_info (REPORT_DETAILS))
2152 fprintf (vect_dump, "Two store stmts share the same dr.");
2153 return false;
2156 /* Check that there is no load-store dependencies for this loads
2157 to prevent a case of load-store-load to the same location. */
2158 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2159 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2161 if (vect_print_dump_info (REPORT_DETAILS))
2162 fprintf (vect_dump,
2163 "READ_WRITE dependence in interleaving.");
2164 return false;
2167 /* For load use the same data-ref load. */
2168 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2170 prev = next;
2171 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2172 continue;
2174 prev = next;
2176 /* Check that all the accesses have the same STEP. */
2177 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2178 if (tree_int_cst_compare (step, next_step))
2180 if (vect_print_dump_info (REPORT_DETAILS))
2181 fprintf (vect_dump, "not consecutive access in interleaving");
2182 return false;
2185 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2186 /* Check that the distance between two accesses is equal to the type
2187 size. Otherwise, we have gaps. */
2188 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2189 - TREE_INT_CST_LOW (prev_init)) / type_size;
2190 if (diff != 1)
2192 /* FORNOW: SLP of accesses with gaps is not supported. */
2193 slp_impossible = true;
2194 if (!DR_IS_READ (data_ref))
2196 if (vect_print_dump_info (REPORT_DETAILS))
2197 fprintf (vect_dump, "interleaved store with gaps");
2198 return false;
2202 /* Store the gap from the previous member of the group. If there is no
2203 gap in the access, DR_GROUP_GAP is always 1. */
2204 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2206 prev_init = DR_INIT (data_ref);
2207 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2208 /* Count the number of data-refs in the chain. */
2209 count++;
2212 /* COUNT is the number of accesses found, we multiply it by the size of
2213 the type to get COUNT_IN_BYTES. */
2214 count_in_bytes = type_size * count;
2216 /* Check that the size of the interleaving is not greater than STEP. */
2217 if (dr_step < count_in_bytes)
2219 if (vect_print_dump_info (REPORT_DETAILS))
2221 fprintf (vect_dump, "interleaving size is greater than step for ");
2222 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2224 return false;
2227 /* Check that the size of the interleaving is equal to STEP for stores,
2228 i.e., that there are no gaps. */
2229 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
2231 if (vect_print_dump_info (REPORT_DETAILS))
2232 fprintf (vect_dump, "interleaved store with gaps");
2233 return false;
2236 /* Check that STEP is a multiple of type size. */
2237 if ((dr_step % type_size) != 0)
2239 if (vect_print_dump_info (REPORT_DETAILS))
2241 fprintf (vect_dump, "step is not a multiple of type size: step ");
2242 print_generic_expr (vect_dump, step, TDF_SLIM);
2243 fprintf (vect_dump, " size ");
2244 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2245 TDF_SLIM);
2247 return false;
2250 /* FORNOW: we handle only interleaving that is a power of 2.
2251 We don't fail here if it may be still possible to vectorize the
2252 group using SLP. If not, the size of the group will be checked in
2253 vect_analyze_operations, and the vectorization will fail. */
2254 if (exact_log2 (stride) == -1)
2256 if (vect_print_dump_info (REPORT_DETAILS))
2257 fprintf (vect_dump, "interleaving is not a power of 2");
2259 if (slp_impossible)
2260 return false;
2262 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2263 if (vect_print_dump_info (REPORT_DETAILS))
2264 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2266 /* SLP: create an SLP data structure for every interleaving group of
2267 stores for further analysis in vect_analyse_slp. */
2268 if (!DR_IS_READ (dr) && !slp_impossible)
2269 VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2272 return true;
2276 /* Analyze the access pattern of the data-reference DR.
2277 In case of non-consecutive accesses call vect_analyze_group_access() to
2278 analyze groups of strided accesses. */
2280 static bool
2281 vect_analyze_data_ref_access (struct data_reference *dr)
2283 tree step = DR_STEP (dr);
2284 tree scalar_type = TREE_TYPE (DR_REF (dr));
2285 tree stmt = DR_STMT (dr);
2286 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2287 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2288 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2289 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2291 if (!step)
2293 if (vect_print_dump_info (REPORT_DETAILS))
2294 fprintf (vect_dump, "bad data-ref access");
2295 return false;
2298 /* Don't allow invariant accesses. */
2299 if (dr_step == 0)
2300 return false;
2302 if (nested_in_vect_loop_p (loop, stmt))
2304 /* Interleaved accesses are not yet supported within outer-loop
2305 vectorization for references in the inner-loop. */
2306 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2308 /* For the rest of the analysis we use the outer-loop step. */
2309 step = STMT_VINFO_DR_STEP (stmt_info);
2310 dr_step = TREE_INT_CST_LOW (step);
2312 if (dr_step == 0)
2314 if (vect_print_dump_info (REPORT_ALIGNMENT))
2315 fprintf (vect_dump, "zero step in outer loop.");
2316 if (DR_IS_READ (dr))
2317 return true;
2318 else
2319 return false;
2323 /* Consecutive? */
2324 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2326 /* Mark that it is not interleaving. */
2327 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2328 return true;
2331 if (nested_in_vect_loop_p (loop, stmt))
2333 if (vect_print_dump_info (REPORT_ALIGNMENT))
2334 fprintf (vect_dump, "strided access in outer loop.");
2335 return false;
2338 /* Not consecutive access - check if it's a part of interleaving group. */
2339 return vect_analyze_group_access (dr);
2343 /* Function vect_analyze_data_ref_accesses.
2345 Analyze the access pattern of all the data references in the loop.
2347 FORNOW: the only access pattern that is considered vectorizable is a
2348 simple step 1 (consecutive) access.
2350 FORNOW: handle only arrays and pointer accesses. */
2352 static bool
2353 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2355 unsigned int i;
2356 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2357 struct data_reference *dr;
2359 if (vect_print_dump_info (REPORT_DETAILS))
2360 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2362 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2363 if (!vect_analyze_data_ref_access (dr))
2365 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2366 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2367 return false;
2370 return true;
2373 /* Function vect_prune_runtime_alias_test_list.
2375 Prune a list of ddrs to be tested at run-time by versioning for alias.
2376 Return FALSE if resulting list of ddrs is longer then allowed by
2377 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2379 static bool
2380 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2382 VEC (ddr_p, heap) * ddrs =
2383 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2384 unsigned i, j;
2386 if (vect_print_dump_info (REPORT_DETAILS))
2387 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2389 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2391 bool found;
2392 ddr_p ddr_i;
2394 ddr_i = VEC_index (ddr_p, ddrs, i);
2395 found = false;
2397 for (j = 0; j < i; j++)
2399 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2401 if (vect_vfa_range_equal (ddr_i, ddr_j))
2403 if (vect_print_dump_info (REPORT_DR_DETAILS))
2405 fprintf (vect_dump, "found equal ranges ");
2406 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2407 fprintf (vect_dump, ", ");
2408 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2409 fprintf (vect_dump, " and ");
2410 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2411 fprintf (vect_dump, ", ");
2412 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2414 found = true;
2415 break;
2419 if (found)
2421 VEC_ordered_remove (ddr_p, ddrs, i);
2422 continue;
2424 i++;
2427 if (VEC_length (ddr_p, ddrs) >
2428 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2430 if (vect_print_dump_info (REPORT_DR_DETAILS))
2432 fprintf (vect_dump,
2433 "disable versioning for alias - max number of generated "
2434 "checks exceeded.");
2437 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2439 return false;
2442 return true;
2445 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2447 void
2448 vect_free_slp_tree (slp_tree node)
2450 if (!node)
2451 return;
2453 if (SLP_TREE_LEFT (node))
2454 vect_free_slp_tree (SLP_TREE_LEFT (node));
2456 if (SLP_TREE_RIGHT (node))
2457 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2459 VEC_free (tree, heap, SLP_TREE_SCALAR_STMTS (node));
2461 if (SLP_TREE_VEC_STMTS (node))
2462 VEC_free (tree, heap, SLP_TREE_VEC_STMTS (node));
2464 free (node);
2468 /* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are
2469 of a legal type and that they match the defs of the first stmt of the SLP
2470 group (stored in FIRST_STMT_...). */
2472 static bool
2473 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2474 tree rhs, VEC (tree, heap) **def_stmts0,
2475 VEC (tree, heap) **def_stmts1,
2476 enum vect_def_type *first_stmt_dt0,
2477 enum vect_def_type *first_stmt_dt1,
2478 tree *first_stmt_def0_type,
2479 tree *first_stmt_def1_type,
2480 tree *first_stmt_const_oprnd,
2481 int ncopies_for_cost)
2483 tree oprnd;
2484 enum operation_type op_type = TREE_OPERAND_LENGTH (rhs);
2485 unsigned int i, number_of_oprnds = op_type;
2486 tree def, def_stmt;
2487 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2488 stmt_vec_info stmt_info =
2489 vinfo_for_stmt (VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2491 /* Store. */
2492 if (!op_type)
2493 number_of_oprnds = 1;
2494 else
2495 gcc_assert (op_type == unary_op || op_type == binary_op);
2497 for (i = 0; i < number_of_oprnds; i++)
2499 if (op_type)
2500 oprnd = TREE_OPERAND (rhs, i);
2501 else
2502 oprnd = rhs;
2504 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2505 || (!def_stmt && dt[i] != vect_constant_def))
2507 if (vect_print_dump_info (REPORT_SLP))
2509 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2510 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2513 return false;
2516 if (!*first_stmt_dt0)
2518 /* op0 of the first stmt of the group - store its info. */
2519 *first_stmt_dt0 = dt[i];
2520 if (def)
2521 *first_stmt_def0_type = TREE_TYPE (def);
2522 else
2523 *first_stmt_const_oprnd = oprnd;
2525 /* Analyze costs (for the first stmt of the group only). */
2526 if (op_type)
2527 /* Not memory operation (we don't call this functions for loads). */
2528 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2529 else
2530 /* Store. */
2531 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2534 else
2536 if (!*first_stmt_dt1 && i == 1)
2538 /* op1 of the first stmt of the group - store its info. */
2539 *first_stmt_dt1 = dt[i];
2540 if (def)
2541 *first_stmt_def1_type = TREE_TYPE (def);
2542 else
2544 /* We assume that the stmt contains only one constant
2545 operand. We fail otherwise, to be on the safe side. */
2546 if (*first_stmt_const_oprnd)
2548 if (vect_print_dump_info (REPORT_SLP))
2549 fprintf (vect_dump, "Build SLP failed: two constant "
2550 "oprnds in stmt");
2551 return false;
2553 *first_stmt_const_oprnd = oprnd;
2556 else
2558 /* Not first stmt of the group, check that the def-stmt/s match
2559 the def-stmt/s of the first stmt. */
2560 if ((i == 0
2561 && (*first_stmt_dt0 != dt[i]
2562 || (*first_stmt_def0_type && def
2563 && *first_stmt_def0_type != TREE_TYPE (def))))
2564 || (i == 1
2565 && (*first_stmt_dt1 != dt[i]
2566 || (*first_stmt_def1_type && def
2567 && *first_stmt_def1_type != TREE_TYPE (def))))
2568 || (!def
2569 && TREE_TYPE (*first_stmt_const_oprnd)
2570 != TREE_TYPE (oprnd)))
2572 if (vect_print_dump_info (REPORT_SLP))
2573 fprintf (vect_dump, "Build SLP failed: different types ");
2575 return false;
2580 /* Check the types of the definitions. */
2581 switch (dt[i])
2583 case vect_constant_def:
2584 case vect_invariant_def:
2585 break;
2587 case vect_loop_def:
2588 if (i == 0)
2589 VEC_safe_push (tree, heap, *def_stmts0, def_stmt);
2590 else
2591 VEC_safe_push (tree, heap, *def_stmts1, def_stmt);
2592 break;
2594 default:
2595 /* FORNOW: Not supported. */
2596 if (vect_print_dump_info (REPORT_SLP))
2598 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2599 print_generic_expr (vect_dump, def, TDF_SLIM);
2602 return false;
2606 return true;
2610 /* Recursively build an SLP tree starting from NODE.
2611 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2612 permutation or are of unsupported types of operation. Otherwise, return
2613 TRUE.
2614 SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2615 in the case of multiple types for now. */
2617 static bool
2618 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2619 unsigned int group_size, bool *slp_impossible,
2620 int *inside_cost, int *outside_cost,
2621 int ncopies_for_cost)
2623 VEC (tree, heap) *def_stmts0 = VEC_alloc (tree, heap, group_size);
2624 VEC (tree, heap) *def_stmts1 = VEC_alloc (tree, heap, group_size);
2625 unsigned int i;
2626 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2627 tree stmt = VEC_index (tree, stmts, 0);
2628 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2629 enum tree_code first_stmt_code = 0;
2630 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2631 tree lhs, rhs, prev_stmt = NULL_TREE;
2632 bool stop_recursion = false, need_same_oprnds = false;
2633 tree vectype, scalar_type, first_op1 = NULL_TREE;
2634 unsigned int vectorization_factor = 0, ncopies;
2635 optab optab;
2636 int icode;
2637 enum machine_mode optab_op2_mode;
2638 enum machine_mode vec_mode;
2639 tree first_stmt_const_oprnd = NULL_TREE;
2640 struct data_reference *first_dr;
2642 /* For every stmt in NODE find its def stmt/s. */
2643 for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
2645 if (vect_print_dump_info (REPORT_SLP))
2647 fprintf (vect_dump, "Build SLP for ");
2648 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2651 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2653 if (vect_print_dump_info (REPORT_SLP))
2655 fprintf (vect_dump, "Build SLP failed: not MODIFY_STMT ");
2656 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2659 return false;
2662 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2663 vectype = get_vectype_for_scalar_type (scalar_type);
2664 if (!vectype)
2666 if (vect_print_dump_info (REPORT_SLP))
2668 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
2669 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2671 return false;
2674 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2675 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2676 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2677 if (ncopies > 1)
2679 /* FORNOW. */
2680 if (vect_print_dump_info (REPORT_SLP))
2681 fprintf (vect_dump, "SLP failed - multiple types ");
2683 *slp_impossible = true;
2684 return false;
2687 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
2688 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2690 /* Check the operation. */
2691 if (i == 0)
2693 first_stmt_code = TREE_CODE (rhs);
2695 /* Shift arguments should be equal in all the packed stmts for a
2696 vector shift with scalar shift operand. */
2697 if (TREE_CODE (rhs) == LSHIFT_EXPR || TREE_CODE (rhs) == RSHIFT_EXPR)
2699 vec_mode = TYPE_MODE (vectype);
2700 optab = optab_for_tree_code (TREE_CODE (rhs), vectype);
2701 if (!optab)
2703 if (vect_print_dump_info (REPORT_SLP))
2704 fprintf (vect_dump, "Build SLP failed: no optab.");
2705 return false;
2707 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2708 if (icode == CODE_FOR_nothing)
2710 if (vect_print_dump_info (REPORT_SLP))
2711 fprintf (vect_dump,
2712 "Build SLP failed: op not supported by target.");
2713 return false;
2715 optab_op2_mode = insn_data[icode].operand[2].mode;
2716 if (!VECTOR_MODE_P (optab_op2_mode))
2718 need_same_oprnds = true;
2719 first_op1 = TREE_OPERAND (rhs, 1);
2723 else
2725 if (first_stmt_code != TREE_CODE (rhs))
2727 if (vect_print_dump_info (REPORT_SLP))
2729 fprintf (vect_dump,
2730 "Build SLP failed: different operation in stmt ");
2731 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2734 return false;
2737 if (need_same_oprnds
2738 && !operand_equal_p (first_op1, TREE_OPERAND (rhs, 1), 0))
2740 if (vect_print_dump_info (REPORT_SLP))
2742 fprintf (vect_dump,
2743 "Build SLP failed: different shift arguments in ");
2744 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2747 return false;
2751 /* Strided store or load. */
2752 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2754 if (REFERENCE_CLASS_P (lhs))
2756 /* Store. */
2757 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs,
2758 &def_stmts0, &def_stmts1,
2759 &first_stmt_dt0,
2760 &first_stmt_dt1,
2761 &first_stmt_def0_type,
2762 &first_stmt_def1_type,
2763 &first_stmt_const_oprnd,
2764 ncopies_for_cost))
2765 return false;
2767 else
2769 /* Load. */
2770 if (i == 0)
2772 /* First stmt of the SLP group should be the first load of
2773 the interleaving loop if data permutation is not
2774 allowed. */
2775 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt)
2777 /* FORNOW: data permutations are not supported. */
2778 if (vect_print_dump_info (REPORT_SLP))
2780 fprintf (vect_dump, "Build SLP failed: strided "
2781 " loads need permutation ");
2782 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2785 return false;
2788 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2789 if (vect_supportable_dr_alignment (first_dr)
2790 == dr_unaligned_unsupported)
2792 if (vect_print_dump_info (REPORT_SLP))
2794 fprintf (vect_dump, "Build SLP failed: unsupported "
2795 " unaligned load ");
2796 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2799 return false;
2802 /* Analyze costs (for the first stmt in the group). */
2803 vect_model_load_cost (vinfo_for_stmt (stmt),
2804 ncopies_for_cost, *node);
2806 else
2808 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt)
2810 /* FORNOW: data permutations are not supported. */
2811 if (vect_print_dump_info (REPORT_SLP))
2813 fprintf (vect_dump, "Build SLP failed: strided "
2814 " loads need permutation ");
2815 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2817 return false;
2821 prev_stmt = stmt;
2823 /* We stop the tree when we reach a group of loads. */
2824 stop_recursion = true;
2825 continue;
2827 } /* Strided access. */
2828 else
2830 if (REFERENCE_CLASS_P (rhs))
2832 /* Not strided load. */
2833 if (vect_print_dump_info (REPORT_SLP))
2835 fprintf (vect_dump, "Build SLP failed: not strided load ");
2836 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2839 /* FORNOW: Not strided loads are not supported. */
2840 return false;
2843 /* Not memory operation. */
2844 if (!BINARY_CLASS_P (rhs) && !UNARY_CLASS_P (rhs))
2846 if (vect_print_dump_info (REPORT_SLP))
2848 fprintf (vect_dump, "Build SLP failed: operation");
2849 fprintf (vect_dump, " unsupported ");
2850 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2853 return false;
2856 /* Find the def-stmts. */
2857 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, &def_stmts0,
2858 &def_stmts1, &first_stmt_dt0,
2859 &first_stmt_dt1,
2860 &first_stmt_def0_type,
2861 &first_stmt_def1_type,
2862 &first_stmt_const_oprnd,
2863 ncopies_for_cost))
2864 return false;
2868 /* Add the costs of the node to the overall instance costs. */
2869 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
2870 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
2872 /* Strided loads were reached - stop the recursion. */
2873 if (stop_recursion)
2874 return true;
2876 /* Create SLP_TREE nodes for the definition node/s. */
2877 if (first_stmt_dt0 == vect_loop_def)
2879 slp_tree left_node = XNEW (struct _slp_tree);
2880 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
2881 SLP_TREE_VEC_STMTS (left_node) = NULL;
2882 SLP_TREE_LEFT (left_node) = NULL;
2883 SLP_TREE_RIGHT (left_node) = NULL;
2884 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
2885 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
2886 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
2887 slp_impossible, inside_cost, outside_cost,
2888 ncopies_for_cost))
2889 return false;
2891 SLP_TREE_LEFT (*node) = left_node;
2894 if (first_stmt_dt1 == vect_loop_def)
2896 slp_tree right_node = XNEW (struct _slp_tree);
2897 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
2898 SLP_TREE_VEC_STMTS (right_node) = NULL;
2899 SLP_TREE_LEFT (right_node) = NULL;
2900 SLP_TREE_RIGHT (right_node) = NULL;
2901 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
2902 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
2903 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
2904 slp_impossible, inside_cost, outside_cost,
2905 ncopies_for_cost))
2906 return false;
2908 SLP_TREE_RIGHT (*node) = right_node;
2911 return true;
2915 static void
2916 vect_print_slp_tree (slp_tree node)
2918 int i;
2919 tree stmt;
2921 if (!node)
2922 return;
2924 fprintf (vect_dump, "node ");
2925 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2927 fprintf (vect_dump, "\n\tstmt %d ", i);
2928 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2930 fprintf (vect_dump, "\n");
2932 vect_print_slp_tree (SLP_TREE_LEFT (node));
2933 vect_print_slp_tree (SLP_TREE_RIGHT (node));
2937 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2938 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2939 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2940 stmts in NODE are to be marked. */
2942 static void
2943 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
2945 int i;
2946 tree stmt;
2948 if (!node)
2949 return;
2951 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2952 if (j < 0 || i == j)
2953 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
2955 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
2956 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
2960 /* Analyze an SLP instance starting from a group of strided stores. Call
2961 vect_build_slp_tree to build a tree of packed stmts if possible.
2962 Return FALSE if it's impossible to SLP any stmt in the loop. */
2964 static bool
2965 vect_analyze_slp_instance (loop_vec_info loop_vinfo, tree stmt)
2967 slp_instance new_instance;
2968 slp_tree node = XNEW (struct _slp_tree);
2969 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
2970 unsigned int unrolling_factor = 1, nunits;
2971 tree vectype, scalar_type, next;
2972 unsigned int vectorization_factor = 0, ncopies;
2973 bool slp_impossible = false;
2974 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
2976 /* FORNOW: multiple types are not supported. */
2977 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
2978 vectype = get_vectype_for_scalar_type (scalar_type);
2979 if (!vectype)
2981 if (vect_print_dump_info (REPORT_SLP))
2983 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
2984 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2986 return false;
2989 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2990 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2991 ncopies = vectorization_factor / nunits;
2992 if (ncopies > 1)
2994 if (vect_print_dump_info (REPORT_SLP))
2995 fprintf (vect_dump, "SLP failed - multiple types ");
2997 return false;
3000 /* Create a node (a root of the SLP tree) for the packed strided stores. */
3001 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (tree, heap, group_size);
3002 next = stmt;
3003 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
3004 while (next)
3006 VEC_safe_push (tree, heap, SLP_TREE_SCALAR_STMTS (node), next);
3007 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
3010 SLP_TREE_VEC_STMTS (node) = NULL;
3011 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
3012 SLP_TREE_LEFT (node) = NULL;
3013 SLP_TREE_RIGHT (node) = NULL;
3014 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
3015 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
3017 /* Calculate the unrolling factor. */
3018 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
3020 /* Calculate the number of vector stmts to create based on the unrolling
3021 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
3022 GROUP_SIZE / NUNITS otherwise. */
3023 ncopies_for_cost = unrolling_factor * group_size / nunits;
3025 /* Build the tree for the SLP instance. */
3026 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
3027 &inside_cost, &outside_cost, ncopies_for_cost))
3029 /* Create a new SLP instance. */
3030 new_instance = XNEW (struct _slp_instance);
3031 SLP_INSTANCE_TREE (new_instance) = node;
3032 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
3033 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
3034 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
3035 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
3036 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
3037 new_instance);
3038 if (vect_print_dump_info (REPORT_SLP))
3039 vect_print_slp_tree (node);
3041 return true;
3044 /* Failed to SLP. */
3045 /* Free the allocated memory. */
3046 vect_free_slp_tree (node);
3048 if (slp_impossible)
3049 return false;
3051 /* SLP failed for this instance, but it is still possible to SLP other stmts
3052 in the loop. */
3053 return true;
3057 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3058 trees of packed scalar stmts if SLP is possible. */
3060 static bool
3061 vect_analyze_slp (loop_vec_info loop_vinfo)
3063 unsigned int i;
3064 VEC (tree, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
3065 tree store;
3067 if (vect_print_dump_info (REPORT_SLP))
3068 fprintf (vect_dump, "=== vect_analyze_slp ===");
3070 for (i = 0; VEC_iterate (tree, strided_stores, i, store); i++)
3071 if (!vect_analyze_slp_instance (loop_vinfo, store))
3073 /* SLP failed. No instance can be SLPed in the loop. */
3074 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3075 fprintf (vect_dump, "SLP failed.");
3077 return false;
3080 return true;
3084 /* For each possible SLP instance decide whether to SLP it and calculate overall
3085 unrolling factor needed to SLP the loop. */
3087 static void
3088 vect_make_slp_decision (loop_vec_info loop_vinfo)
3090 unsigned int i, unrolling_factor = 1;
3091 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3092 slp_instance instance;
3093 int decided_to_slp = 0;
3095 if (vect_print_dump_info (REPORT_SLP))
3096 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3098 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3100 /* FORNOW: SLP if you can. */
3101 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3102 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3104 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3105 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3106 loop-based vectorization. Such stmts will be marked as HYBRID. */
3107 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3108 decided_to_slp++;
3111 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3113 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3114 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3115 decided_to_slp, unrolling_factor);
3119 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3120 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3122 static void
3123 vect_detect_hybrid_slp_stmts (slp_tree node)
3125 int i;
3126 tree stmt;
3127 imm_use_iterator imm_iter;
3128 tree use_stmt;
3130 if (!node)
3131 return;
3133 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3134 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3135 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
3136 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
3137 if (vinfo_for_stmt (use_stmt)
3138 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
3139 vect_mark_slp_stmts (node, hybrid, i);
3141 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3142 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3146 /* Find stmts that must be both vectorized and SLPed. */
3148 static void
3149 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3151 unsigned int i;
3152 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3153 slp_instance instance;
3155 if (vect_print_dump_info (REPORT_SLP))
3156 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3158 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3159 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3163 /* Function vect_analyze_data_refs.
3165 Find all the data references in the loop.
3167 The general structure of the analysis of data refs in the vectorizer is as
3168 follows:
3169 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3170 find and analyze all data-refs in the loop and their dependences.
3171 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3172 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3173 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3177 static bool
3178 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3180 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3181 unsigned int i;
3182 VEC (data_reference_p, heap) *datarefs;
3183 struct data_reference *dr;
3184 tree scalar_type;
3186 if (vect_print_dump_info (REPORT_DETAILS))
3187 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3189 compute_data_dependences_for_loop (loop, true,
3190 &LOOP_VINFO_DATAREFS (loop_vinfo),
3191 &LOOP_VINFO_DDRS (loop_vinfo));
3193 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3194 from stmt_vec_info struct to DR and vectype. */
3195 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3197 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3199 tree stmt;
3200 stmt_vec_info stmt_info;
3201 basic_block bb;
3202 tree base, offset, init;
3204 if (!dr || !DR_REF (dr))
3206 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3207 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3208 return false;
3211 stmt = DR_STMT (dr);
3212 stmt_info = vinfo_for_stmt (stmt);
3214 /* Check that analysis of the data-ref succeeded. */
3215 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3216 || !DR_STEP (dr))
3218 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3220 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3221 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3223 return false;
3226 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3228 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3229 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3230 "constant");
3231 return false;
3234 if (!DR_SYMBOL_TAG (dr))
3236 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3238 fprintf (vect_dump, "not vectorized: no memory tag for ");
3239 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3241 return false;
3244 base = unshare_expr (DR_BASE_ADDRESS (dr));
3245 offset = unshare_expr (DR_OFFSET (dr));
3246 init = unshare_expr (DR_INIT (dr));
3248 /* Update DR field in stmt_vec_info struct. */
3249 bb = bb_for_stmt (stmt);
3251 /* If the dataref is in an inner-loop of the loop that is considered for
3252 for vectorization, we also want to analyze the access relative to
3253 the outer-loop (DR contains information only relative to the
3254 inner-most enclosing loop). We do that by building a reference to the
3255 first location accessed by the inner-loop, and analyze it relative to
3256 the outer-loop. */
3257 if (nested_in_vect_loop_p (loop, stmt))
3259 tree outer_step, outer_base, outer_init;
3260 HOST_WIDE_INT pbitsize, pbitpos;
3261 tree poffset;
3262 enum machine_mode pmode;
3263 int punsignedp, pvolatilep;
3264 affine_iv base_iv, offset_iv;
3265 tree dinit;
3267 /* Build a reference to the first location accessed by the
3268 inner-loop: *(BASE+INIT). (The first location is actually
3269 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3270 tree inner_base = build_fold_indirect_ref
3271 (fold_build2 (POINTER_PLUS_EXPR,
3272 TREE_TYPE (base), base,
3273 fold_convert (sizetype, init)));
3275 if (vect_print_dump_info (REPORT_DETAILS))
3277 fprintf (dump_file, "analyze in outer-loop: ");
3278 print_generic_expr (dump_file, inner_base, TDF_SLIM);
3281 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3282 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3283 gcc_assert (outer_base != NULL_TREE);
3285 if (pbitpos % BITS_PER_UNIT != 0)
3287 if (vect_print_dump_info (REPORT_DETAILS))
3288 fprintf (dump_file, "failed: bit offset alignment.\n");
3289 return false;
3292 outer_base = build_fold_addr_expr (outer_base);
3293 if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
3295 if (vect_print_dump_info (REPORT_DETAILS))
3296 fprintf (dump_file, "failed: evolution of base is not affine.\n");
3297 return false;
3300 if (offset)
3302 if (poffset)
3303 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3304 else
3305 poffset = offset;
3308 if (!poffset)
3310 offset_iv.base = ssize_int (0);
3311 offset_iv.step = ssize_int (0);
3313 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
3315 if (vect_print_dump_info (REPORT_DETAILS))
3316 fprintf (dump_file, "evolution of offset is not affine.\n");
3317 return false;
3320 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3321 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3322 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3323 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3324 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3326 outer_step = size_binop (PLUS_EXPR,
3327 fold_convert (ssizetype, base_iv.step),
3328 fold_convert (ssizetype, offset_iv.step));
3330 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3331 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3332 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3333 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3334 STMT_VINFO_DR_OFFSET (stmt_info) =
3335 fold_convert (ssizetype, offset_iv.base);
3336 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3337 size_int (highest_pow2_factor (offset_iv.base));
3339 if (dump_file && (dump_flags & TDF_DETAILS))
3341 fprintf (dump_file, "\touter base_address: ");
3342 print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3343 fprintf (dump_file, "\n\touter offset from base address: ");
3344 print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3345 fprintf (dump_file, "\n\touter constant offset from base address: ");
3346 print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3347 fprintf (dump_file, "\n\touter step: ");
3348 print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3349 fprintf (dump_file, "\n\touter aligned to: ");
3350 print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3354 if (STMT_VINFO_DATA_REF (stmt_info))
3356 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3358 fprintf (vect_dump,
3359 "not vectorized: more than one data ref in stmt: ");
3360 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3362 return false;
3364 STMT_VINFO_DATA_REF (stmt_info) = dr;
3366 /* Set vectype for STMT. */
3367 scalar_type = TREE_TYPE (DR_REF (dr));
3368 STMT_VINFO_VECTYPE (stmt_info) =
3369 get_vectype_for_scalar_type (scalar_type);
3370 if (!STMT_VINFO_VECTYPE (stmt_info))
3372 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3374 fprintf (vect_dump,
3375 "not vectorized: no vectype for stmt: ");
3376 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3377 fprintf (vect_dump, " scalar_type: ");
3378 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3380 return false;
3384 return true;
3388 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3390 /* Function vect_mark_relevant.
3392 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3394 static void
3395 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
3396 enum vect_relevant relevant, bool live_p)
3398 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3399 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3400 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3402 if (vect_print_dump_info (REPORT_DETAILS))
3403 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3405 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3407 tree pattern_stmt;
3409 /* This is the last stmt in a sequence that was detected as a
3410 pattern that can potentially be vectorized. Don't mark the stmt
3411 as relevant/live because it's not going to be vectorized.
3412 Instead mark the pattern-stmt that replaces it. */
3414 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3416 if (vect_print_dump_info (REPORT_DETAILS))
3417 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3418 stmt_info = vinfo_for_stmt (pattern_stmt);
3419 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3420 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3421 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3422 stmt = pattern_stmt;
3425 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3426 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3427 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3429 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3430 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3432 if (vect_print_dump_info (REPORT_DETAILS))
3433 fprintf (vect_dump, "already marked relevant/live.");
3434 return;
3437 VEC_safe_push (tree, heap, *worklist, stmt);
3441 /* Function vect_stmt_relevant_p.
3443 Return true if STMT in loop that is represented by LOOP_VINFO is
3444 "relevant for vectorization".
3446 A stmt is considered "relevant for vectorization" if:
3447 - it has uses outside the loop.
3448 - it has vdefs (it alters memory).
3449 - control stmts in the loop (except for the exit condition).
3451 CHECKME: what other side effects would the vectorizer allow? */
3453 static bool
3454 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
3455 enum vect_relevant *relevant, bool *live_p)
3457 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3458 ssa_op_iter op_iter;
3459 imm_use_iterator imm_iter;
3460 use_operand_p use_p;
3461 def_operand_p def_p;
3463 *relevant = vect_unused_in_loop;
3464 *live_p = false;
3466 /* cond stmt other than loop exit cond. */
3467 if (is_ctrl_stmt (stmt)
3468 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3469 *relevant = vect_used_in_loop;
3471 /* changing memory. */
3472 if (TREE_CODE (stmt) != PHI_NODE)
3473 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3475 if (vect_print_dump_info (REPORT_DETAILS))
3476 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3477 *relevant = vect_used_in_loop;
3480 /* uses outside the loop. */
3481 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3483 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3485 basic_block bb = bb_for_stmt (USE_STMT (use_p));
3486 if (!flow_bb_inside_loop_p (loop, bb))
3488 if (vect_print_dump_info (REPORT_DETAILS))
3489 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3491 /* We expect all such uses to be in the loop exit phis
3492 (because of loop closed form) */
3493 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
3494 gcc_assert (bb == single_exit (loop)->dest);
3496 *live_p = true;
3501 return (*live_p || *relevant);
3506 Function process_use.
3508 Inputs:
3509 - a USE in STMT in a loop represented by LOOP_VINFO
3510 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3511 that defined USE. This is dont by calling mark_relevant and passing it
3512 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
3514 Outputs:
3515 Generally, LIVE_P and RELEVANT are used to define the liveness and
3516 relevance info of the DEF_STMT of this USE:
3517 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3518 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3519 Exceptions:
3520 - case 1: If USE is used only for address computations (e.g. array indexing),
3521 which does not need to be directly vectorized, then the liveness/relevance
3522 of the respective DEF_STMT is left unchanged.
3523 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3524 skip DEF_STMT cause it had already been processed.
3525 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3526 be modified accordingly.
3528 Return true if everything is as expected. Return false otherwise. */
3530 static bool
3531 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3532 enum vect_relevant relevant, VEC(tree,heap) **worklist)
3534 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3535 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3536 stmt_vec_info dstmt_vinfo;
3537 basic_block bb, def_bb;
3538 tree def, def_stmt;
3539 enum vect_def_type dt;
3541 /* case 1: we are only interested in uses that need to be vectorized. Uses
3542 that are used for address computation are not considered relevant. */
3543 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3544 return true;
3546 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3548 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3549 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3550 return false;
3553 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
3554 return true;
3556 def_bb = bb_for_stmt (def_stmt);
3557 if (!flow_bb_inside_loop_p (loop, def_bb))
3559 if (vect_print_dump_info (REPORT_DETAILS))
3560 fprintf (vect_dump, "def_stmt is out of loop.");
3561 return true;
3564 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3565 DEF_STMT must have already been processed, because this should be the
3566 only way that STMT, which is a reduction-phi, was put in the worklist,
3567 as there should be no other uses for DEF_STMT in the loop. So we just
3568 check that everything is as expected, and we are done. */
3569 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3570 bb = bb_for_stmt (stmt);
3571 if (TREE_CODE (stmt) == PHI_NODE
3572 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3573 && TREE_CODE (def_stmt) != PHI_NODE
3574 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3575 && bb->loop_father == def_bb->loop_father)
3577 if (vect_print_dump_info (REPORT_DETAILS))
3578 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3579 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3580 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3581 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3582 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3583 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3584 return true;
3587 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3588 outer-loop-header-bb:
3589 d = def_stmt
3590 inner-loop:
3591 stmt # use (d)
3592 outer-loop-tail-bb:
3593 ... */
3594 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3596 if (vect_print_dump_info (REPORT_DETAILS))
3597 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3598 switch (relevant)
3600 case vect_unused_in_loop:
3601 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3602 vect_used_by_reduction : vect_unused_in_loop;
3603 break;
3604 case vect_used_in_outer_by_reduction:
3605 relevant = vect_used_by_reduction;
3606 break;
3607 case vect_used_in_outer:
3608 relevant = vect_used_in_loop;
3609 break;
3610 case vect_used_by_reduction:
3611 case vect_used_in_loop:
3612 break;
3614 default:
3615 gcc_unreachable ();
3619 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3620 outer-loop-header-bb:
3622 inner-loop:
3623 d = def_stmt
3624 outer-loop-tail-bb:
3625 stmt # use (d) */
3626 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3628 if (vect_print_dump_info (REPORT_DETAILS))
3629 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3630 switch (relevant)
3632 case vect_unused_in_loop:
3633 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3634 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3635 break;
3637 case vect_used_in_outer_by_reduction:
3638 case vect_used_in_outer:
3639 break;
3641 case vect_used_by_reduction:
3642 relevant = vect_used_in_outer_by_reduction;
3643 break;
3645 case vect_used_in_loop:
3646 relevant = vect_used_in_outer;
3647 break;
3649 default:
3650 gcc_unreachable ();
3654 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
3655 return true;
3659 /* Function vect_mark_stmts_to_be_vectorized.
3661 Not all stmts in the loop need to be vectorized. For example:
3663 for i...
3664 for j...
3665 1. T0 = i + j
3666 2. T1 = a[T0]
3668 3. j = j + 1
3670 Stmt 1 and 3 do not need to be vectorized, because loop control and
3671 addressing of vectorized data-refs are handled differently.
3673 This pass detects such stmts. */
3675 static bool
3676 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3678 VEC(tree,heap) *worklist;
3679 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3680 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3681 unsigned int nbbs = loop->num_nodes;
3682 block_stmt_iterator si;
3683 tree stmt;
3684 stmt_ann_t ann;
3685 unsigned int i;
3686 stmt_vec_info stmt_vinfo;
3687 basic_block bb;
3688 tree phi;
3689 bool live_p;
3690 enum vect_relevant relevant;
3692 if (vect_print_dump_info (REPORT_DETAILS))
3693 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
3695 worklist = VEC_alloc (tree, heap, 64);
3697 /* 1. Init worklist. */
3698 for (i = 0; i < nbbs; i++)
3700 bb = bbs[i];
3701 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3703 if (vect_print_dump_info (REPORT_DETAILS))
3705 fprintf (vect_dump, "init: phi relevant? ");
3706 print_generic_expr (vect_dump, phi, TDF_SLIM);
3709 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
3710 vect_mark_relevant (&worklist, phi, relevant, live_p);
3712 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3714 stmt = bsi_stmt (si);
3715 if (vect_print_dump_info (REPORT_DETAILS))
3717 fprintf (vect_dump, "init: stmt relevant? ");
3718 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3721 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
3722 vect_mark_relevant (&worklist, stmt, relevant, live_p);
3726 /* 2. Process_worklist */
3727 while (VEC_length (tree, worklist) > 0)
3729 use_operand_p use_p;
3730 ssa_op_iter iter;
3732 stmt = VEC_pop (tree, worklist);
3733 if (vect_print_dump_info (REPORT_DETAILS))
3735 fprintf (vect_dump, "worklist: examine stmt: ");
3736 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3739 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3740 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3741 liveness and relevance properties of STMT. */
3742 ann = stmt_ann (stmt);
3743 stmt_vinfo = vinfo_for_stmt (stmt);
3744 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
3745 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
3747 /* Generally, the liveness and relevance properties of STMT are
3748 propagated as is to the DEF_STMTs of its USEs:
3749 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3750 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3752 One exception is when STMT has been identified as defining a reduction
3753 variable; in this case we set the liveness/relevance as follows:
3754 live_p = false
3755 relevant = vect_used_by_reduction
3756 This is because we distinguish between two kinds of relevant stmts -
3757 those that are used by a reduction computation, and those that are
3758 (also) used by a regular computation. This allows us later on to
3759 identify stmts that are used solely by a reduction, and therefore the
3760 order of the results that they produce does not have to be kept.
3762 Reduction phis are expected to be used by a reduction stmt, or by
3763 in an outer loop; Other reduction stmts are expected to be
3764 in the loop, and possibly used by a stmt in an outer loop.
3765 Here are the expected values of "relevant" for reduction phis/stmts:
3767 relevance: phi stmt
3768 vect_unused_in_loop ok
3769 vect_used_in_outer_by_reduction ok ok
3770 vect_used_in_outer ok ok
3771 vect_used_by_reduction ok
3772 vect_used_in_loop */
3774 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
3776 enum vect_relevant tmp_relevant = relevant;
3777 switch (tmp_relevant)
3779 case vect_unused_in_loop:
3780 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
3781 relevant = vect_used_by_reduction;
3782 break;
3784 case vect_used_in_outer_by_reduction:
3785 case vect_used_in_outer:
3786 gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR
3787 && TREE_CODE (stmt) != DOT_PROD_EXPR);
3788 break;
3790 case vect_used_by_reduction:
3791 if (TREE_CODE (stmt) == PHI_NODE)
3792 break;
3793 /* fall through */
3794 case vect_used_in_loop:
3795 default:
3796 if (vect_print_dump_info (REPORT_DETAILS))
3797 fprintf (vect_dump, "unsupported use of reduction.");
3798 VEC_free (tree, heap, worklist);
3799 return false;
3801 live_p = false;
3804 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3806 tree op = USE_FROM_PTR (use_p);
3807 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
3809 VEC_free (tree, heap, worklist);
3810 return false;
3813 } /* while worklist */
3815 VEC_free (tree, heap, worklist);
3816 return true;
3820 /* Function vect_can_advance_ivs_p
3822 In case the number of iterations that LOOP iterates is unknown at compile
3823 time, an epilog loop will be generated, and the loop induction variables
3824 (IVs) will be "advanced" to the value they are supposed to take just before
3825 the epilog loop. Here we check that the access function of the loop IVs
3826 and the expression that represents the loop bound are simple enough.
3827 These restrictions will be relaxed in the future. */
3829 static bool
3830 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
3832 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3833 basic_block bb = loop->header;
3834 tree phi;
3836 /* Analyze phi functions of the loop header. */
3838 if (vect_print_dump_info (REPORT_DETAILS))
3839 fprintf (vect_dump, "vect_can_advance_ivs_p:");
3841 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3843 tree access_fn = NULL;
3844 tree evolution_part;
3846 if (vect_print_dump_info (REPORT_DETAILS))
3848 fprintf (vect_dump, "Analyze phi: ");
3849 print_generic_expr (vect_dump, phi, TDF_SLIM);
3852 /* Skip virtual phi's. The data dependences that are associated with
3853 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3855 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3857 if (vect_print_dump_info (REPORT_DETAILS))
3858 fprintf (vect_dump, "virtual phi. skip.");
3859 continue;
3862 /* Skip reduction phis. */
3864 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3866 if (vect_print_dump_info (REPORT_DETAILS))
3867 fprintf (vect_dump, "reduc phi. skip.");
3868 continue;
3871 /* Analyze the evolution function. */
3873 access_fn = instantiate_parameters
3874 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
3876 if (!access_fn)
3878 if (vect_print_dump_info (REPORT_DETAILS))
3879 fprintf (vect_dump, "No Access function.");
3880 return false;
3883 if (vect_print_dump_info (REPORT_DETAILS))
3885 fprintf (vect_dump, "Access function of PHI: ");
3886 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
3889 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
3891 if (evolution_part == NULL_TREE)
3893 if (vect_print_dump_info (REPORT_DETAILS))
3894 fprintf (vect_dump, "No evolution.");
3895 return false;
3898 /* FORNOW: We do not transform initial conditions of IVs
3899 which evolution functions are a polynomial of degree >= 2. */
3901 if (tree_is_chrec (evolution_part))
3902 return false;
3905 return true;
3909 /* Function vect_get_loop_niters.
3911 Determine how many iterations the loop is executed.
3912 If an expression that represents the number of iterations
3913 can be constructed, place it in NUMBER_OF_ITERATIONS.
3914 Return the loop exit condition. */
3916 static tree
3917 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
3919 tree niters;
3921 if (vect_print_dump_info (REPORT_DETAILS))
3922 fprintf (vect_dump, "=== get_loop_niters ===");
3924 niters = number_of_exit_cond_executions (loop);
3926 if (niters != NULL_TREE
3927 && niters != chrec_dont_know)
3929 *number_of_iterations = niters;
3931 if (vect_print_dump_info (REPORT_DETAILS))
3933 fprintf (vect_dump, "==> get_loop_niters:" );
3934 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
3938 return get_loop_exit_condition (loop);
3942 /* Function vect_analyze_loop_1.
3944 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3945 for it. The different analyses will record information in the
3946 loop_vec_info struct. This is a subset of the analyses applied in
3947 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3948 that is now considered for (outer-loop) vectorization. */
3950 static loop_vec_info
3951 vect_analyze_loop_1 (struct loop *loop)
3953 loop_vec_info loop_vinfo;
3955 if (vect_print_dump_info (REPORT_DETAILS))
3956 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
3958 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3960 loop_vinfo = vect_analyze_loop_form (loop);
3961 if (!loop_vinfo)
3963 if (vect_print_dump_info (REPORT_DETAILS))
3964 fprintf (vect_dump, "bad inner-loop form.");
3965 return NULL;
3968 return loop_vinfo;
3972 /* Function vect_analyze_loop_form.
3974 Verify that certain CFG restrictions hold, including:
3975 - the loop has a pre-header
3976 - the loop has a single entry and exit
3977 - the loop exit condition is simple enough, and the number of iterations
3978 can be analyzed (a countable loop). */
3980 loop_vec_info
3981 vect_analyze_loop_form (struct loop *loop)
3983 loop_vec_info loop_vinfo;
3984 tree loop_cond;
3985 tree number_of_iterations = NULL;
3986 loop_vec_info inner_loop_vinfo = NULL;
3988 if (vect_print_dump_info (REPORT_DETAILS))
3989 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
3991 /* Different restrictions apply when we are considering an inner-most loop,
3992 vs. an outer (nested) loop.
3993 (FORNOW. May want to relax some of these restrictions in the future). */
3995 if (!loop->inner)
3997 /* Inner-most loop. We currently require that the number of BBs is
3998 exactly 2 (the header and latch). Vectorizable inner-most loops
3999 look like this:
4001 (pre-header)
4003 header <--------+
4004 | | |
4005 | +--> latch --+
4007 (exit-bb) */
4009 if (loop->num_nodes != 2)
4011 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4012 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4013 return NULL;
4016 if (empty_block_p (loop->header))
4018 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4019 fprintf (vect_dump, "not vectorized: empty loop.");
4020 return NULL;
4023 else
4025 struct loop *innerloop = loop->inner;
4026 edge backedge, entryedge;
4028 /* Nested loop. We currently require that the loop is doubly-nested,
4029 contains a single inner loop, and the number of BBs is exactly 5.
4030 Vectorizable outer-loops look like this:
4032 (pre-header)
4034 header <---+
4036 inner-loop |
4038 tail ------+
4040 (exit-bb)
4042 The inner-loop has the properties expected of inner-most loops
4043 as described above. */
4045 if ((loop->inner)->inner || (loop->inner)->next)
4047 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4048 fprintf (vect_dump, "not vectorized: multiple nested loops.");
4049 return NULL;
4052 /* Analyze the inner-loop. */
4053 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
4054 if (!inner_loop_vinfo)
4056 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4057 fprintf (vect_dump, "not vectorized: Bad inner loop.");
4058 return NULL;
4061 if (!expr_invariant_in_loop_p (loop,
4062 LOOP_VINFO_NITERS (inner_loop_vinfo)))
4064 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4065 fprintf (vect_dump,
4066 "not vectorized: inner-loop count not invariant.");
4067 destroy_loop_vec_info (inner_loop_vinfo, true);
4068 return NULL;
4071 if (loop->num_nodes != 5)
4073 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4074 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4075 destroy_loop_vec_info (inner_loop_vinfo, true);
4076 return NULL;
4079 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
4080 backedge = EDGE_PRED (innerloop->header, 1);
4081 entryedge = EDGE_PRED (innerloop->header, 0);
4082 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
4084 backedge = EDGE_PRED (innerloop->header, 0);
4085 entryedge = EDGE_PRED (innerloop->header, 1);
4088 if (entryedge->src != loop->header
4089 || !single_exit (innerloop)
4090 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
4092 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4093 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4094 destroy_loop_vec_info (inner_loop_vinfo, true);
4095 return NULL;
4098 if (vect_print_dump_info (REPORT_DETAILS))
4099 fprintf (vect_dump, "Considering outer-loop vectorization.");
4102 if (!single_exit (loop)
4103 || EDGE_COUNT (loop->header->preds) != 2)
4105 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4107 if (!single_exit (loop))
4108 fprintf (vect_dump, "not vectorized: multiple exits.");
4109 else if (EDGE_COUNT (loop->header->preds) != 2)
4110 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4112 if (inner_loop_vinfo)
4113 destroy_loop_vec_info (inner_loop_vinfo, true);
4114 return NULL;
4117 /* We assume that the loop exit condition is at the end of the loop. i.e,
4118 that the loop is represented as a do-while (with a proper if-guard
4119 before the loop if needed), where the loop header contains all the
4120 executable statements, and the latch is empty. */
4121 if (!empty_block_p (loop->latch)
4122 || phi_nodes (loop->latch))
4124 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4125 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4126 if (inner_loop_vinfo)
4127 destroy_loop_vec_info (inner_loop_vinfo, true);
4128 return NULL;
4131 /* Make sure there exists a single-predecessor exit bb: */
4132 if (!single_pred_p (single_exit (loop)->dest))
4134 edge e = single_exit (loop);
4135 if (!(e->flags & EDGE_ABNORMAL))
4137 split_loop_exit_edge (e);
4138 if (vect_print_dump_info (REPORT_DETAILS))
4139 fprintf (vect_dump, "split exit edge.");
4141 else
4143 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4144 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4145 if (inner_loop_vinfo)
4146 destroy_loop_vec_info (inner_loop_vinfo, true);
4147 return NULL;
4151 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4152 if (!loop_cond)
4154 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4155 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4156 if (inner_loop_vinfo)
4157 destroy_loop_vec_info (inner_loop_vinfo, true);
4158 return NULL;
4161 if (!number_of_iterations)
4163 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4164 fprintf (vect_dump,
4165 "not vectorized: number of iterations cannot be computed.");
4166 if (inner_loop_vinfo)
4167 destroy_loop_vec_info (inner_loop_vinfo, true);
4168 return NULL;
4171 if (chrec_contains_undetermined (number_of_iterations))
4173 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4174 fprintf (vect_dump, "Infinite number of iterations.");
4175 if (inner_loop_vinfo)
4176 destroy_loop_vec_info (inner_loop_vinfo, true);
4177 return NULL;
4180 if (!NITERS_KNOWN_P (number_of_iterations))
4182 if (vect_print_dump_info (REPORT_DETAILS))
4184 fprintf (vect_dump, "Symbolic number of iterations is ");
4185 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4188 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4190 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4191 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4192 if (inner_loop_vinfo)
4193 destroy_loop_vec_info (inner_loop_vinfo, false);
4194 return NULL;
4197 loop_vinfo = new_loop_vec_info (loop);
4198 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4199 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
4201 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4203 /* CHECKME: May want to keep it around it in the future. */
4204 if (inner_loop_vinfo)
4205 destroy_loop_vec_info (inner_loop_vinfo, false);
4207 gcc_assert (!loop->aux);
4208 loop->aux = loop_vinfo;
4209 return loop_vinfo;
4213 /* Function vect_analyze_loop.
4215 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4216 for it. The different analyses will record information in the
4217 loop_vec_info struct. */
4218 loop_vec_info
4219 vect_analyze_loop (struct loop *loop)
4221 bool ok;
4222 loop_vec_info loop_vinfo;
4224 if (vect_print_dump_info (REPORT_DETAILS))
4225 fprintf (vect_dump, "===== analyze_loop_nest =====");
4227 if (loop_outer (loop)
4228 && loop_vec_info_for_loop (loop_outer (loop))
4229 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4231 if (vect_print_dump_info (REPORT_DETAILS))
4232 fprintf (vect_dump, "outer-loop already vectorized.");
4233 return NULL;
4236 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4238 loop_vinfo = vect_analyze_loop_form (loop);
4239 if (!loop_vinfo)
4241 if (vect_print_dump_info (REPORT_DETAILS))
4242 fprintf (vect_dump, "bad loop form.");
4243 return NULL;
4246 /* Find all data references in the loop (which correspond to vdefs/vuses)
4247 and analyze their evolution in the loop.
4249 FORNOW: Handle only simple, array references, which
4250 alignment can be forced, and aligned pointer-references. */
4252 ok = vect_analyze_data_refs (loop_vinfo);
4253 if (!ok)
4255 if (vect_print_dump_info (REPORT_DETAILS))
4256 fprintf (vect_dump, "bad data references.");
4257 destroy_loop_vec_info (loop_vinfo, true);
4258 return NULL;
4261 /* Classify all cross-iteration scalar data-flow cycles.
4262 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4264 vect_analyze_scalar_cycles (loop_vinfo);
4266 vect_pattern_recog (loop_vinfo);
4268 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4270 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4271 if (!ok)
4273 if (vect_print_dump_info (REPORT_DETAILS))
4274 fprintf (vect_dump, "unexpected pattern.");
4275 destroy_loop_vec_info (loop_vinfo, true);
4276 return NULL;
4279 /* Analyze the alignment of the data-refs in the loop.
4280 Fail if a data reference is found that cannot be vectorized. */
4282 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4283 if (!ok)
4285 if (vect_print_dump_info (REPORT_DETAILS))
4286 fprintf (vect_dump, "bad data alignment.");
4287 destroy_loop_vec_info (loop_vinfo, true);
4288 return NULL;
4291 ok = vect_determine_vectorization_factor (loop_vinfo);
4292 if (!ok)
4294 if (vect_print_dump_info (REPORT_DETAILS))
4295 fprintf (vect_dump, "can't determine vectorization factor.");
4296 destroy_loop_vec_info (loop_vinfo, true);
4297 return NULL;
4300 /* Analyze data dependences between the data-refs in the loop.
4301 FORNOW: fail at the first data dependence that we encounter. */
4303 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4304 if (!ok)
4306 if (vect_print_dump_info (REPORT_DETAILS))
4307 fprintf (vect_dump, "bad data dependence.");
4308 destroy_loop_vec_info (loop_vinfo, true);
4309 return NULL;
4312 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4313 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4315 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4316 if (!ok)
4318 if (vect_print_dump_info (REPORT_DETAILS))
4319 fprintf (vect_dump, "bad data access.");
4320 destroy_loop_vec_info (loop_vinfo, true);
4321 return NULL;
4324 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4325 It is important to call pruning after vect_analyze_data_ref_accesses,
4326 since we use grouping information gathered by interleaving analysis. */
4327 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
4328 if (!ok)
4330 if (vect_print_dump_info (REPORT_DETAILS))
4331 fprintf (vect_dump, "too long list of versioning for alias "
4332 "run-time tests.");
4333 destroy_loop_vec_info (loop_vinfo, true);
4334 return NULL;
4337 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4338 ok = vect_analyze_slp (loop_vinfo);
4339 if (ok)
4341 /* Decide which possible SLP instances to SLP. */
4342 vect_make_slp_decision (loop_vinfo);
4344 /* Find stmts that need to be both vectorized and SLPed. */
4345 vect_detect_hybrid_slp (loop_vinfo);
4348 /* This pass will decide on using loop versioning and/or loop peeling in
4349 order to enhance the alignment of data references in the loop. */
4351 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4352 if (!ok)
4354 if (vect_print_dump_info (REPORT_DETAILS))
4355 fprintf (vect_dump, "bad data alignment.");
4356 destroy_loop_vec_info (loop_vinfo, true);
4357 return NULL;
4360 /* Scan all the operations in the loop and make sure they are
4361 vectorizable. */
4363 ok = vect_analyze_operations (loop_vinfo);
4364 if (!ok)
4366 if (vect_print_dump_info (REPORT_DETAILS))
4367 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4368 destroy_loop_vec_info (loop_vinfo, true);
4369 return NULL;
4372 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4374 return loop_vinfo;