2008-05-07 Kai Tietz <kai,tietz@onevision.com>
[official-gcc.git] / gcc / tree-vect-analyze.c
blob729ad799822d421fced80e731024da1115014a9f
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41 #include "toplev.h"
42 #include "recog.h"
44 static bool vect_can_advance_ivs_p (loop_vec_info);
46 /* Function vect_determine_vectorization_factor
48 Determine the vectorization factor (VF). VF is the number of data elements
49 that are operated upon in parallel in a single iteration of the vectorized
50 loop. For example, when vectorizing a loop that operates on 4byte elements,
51 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
52 elements can fit in a single vector register.
54 We currently support vectorization of loops in which all types operated upon
55 are of the same size. Therefore this function currently sets VF according to
56 the size of the types operated upon, and fails if there are multiple sizes
57 in the loop.
59 VF is also the factor by which the loop iterations are strip-mined, e.g.:
60 original loop:
61 for (i=0; i<N; i++){
62 a[i] = b[i] + c[i];
65 vectorized loop:
66 for (i=0; i<N; i+=VF){
67 a[i:VF] = b[i:VF] + c[i:VF];
71 static bool
72 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
74 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
75 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
76 int nbbs = loop->num_nodes;
77 block_stmt_iterator si;
78 unsigned int vectorization_factor = 0;
79 tree scalar_type;
80 tree phi;
81 tree vectype;
82 unsigned int nunits;
83 stmt_vec_info stmt_info;
84 int i;
86 if (vect_print_dump_info (REPORT_DETAILS))
87 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
89 for (i = 0; i < nbbs; i++)
91 basic_block bb = bbs[i];
93 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
95 stmt_info = vinfo_for_stmt (phi);
96 if (vect_print_dump_info (REPORT_DETAILS))
98 fprintf (vect_dump, "==> examining phi: ");
99 print_generic_expr (vect_dump, phi, TDF_SLIM);
102 gcc_assert (stmt_info);
104 if (STMT_VINFO_RELEVANT_P (stmt_info))
106 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
107 scalar_type = TREE_TYPE (PHI_RESULT (phi));
109 if (vect_print_dump_info (REPORT_DETAILS))
111 fprintf (vect_dump, "get vectype for scalar type: ");
112 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
115 vectype = get_vectype_for_scalar_type (scalar_type);
116 if (!vectype)
118 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
120 fprintf (vect_dump,
121 "not vectorized: unsupported data-type ");
122 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
124 return false;
126 STMT_VINFO_VECTYPE (stmt_info) = vectype;
128 if (vect_print_dump_info (REPORT_DETAILS))
130 fprintf (vect_dump, "vectype: ");
131 print_generic_expr (vect_dump, vectype, TDF_SLIM);
134 nunits = TYPE_VECTOR_SUBPARTS (vectype);
135 if (vect_print_dump_info (REPORT_DETAILS))
136 fprintf (vect_dump, "nunits = %d", nunits);
138 if (!vectorization_factor
139 || (nunits > vectorization_factor))
140 vectorization_factor = nunits;
144 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
146 tree stmt = bsi_stmt (si);
147 stmt_info = vinfo_for_stmt (stmt);
149 if (vect_print_dump_info (REPORT_DETAILS))
151 fprintf (vect_dump, "==> examining statement: ");
152 print_generic_expr (vect_dump, stmt, TDF_SLIM);
155 gcc_assert (stmt_info);
157 /* skip stmts which do not need to be vectorized. */
158 if (!STMT_VINFO_RELEVANT_P (stmt_info)
159 && !STMT_VINFO_LIVE_P (stmt_info))
161 if (vect_print_dump_info (REPORT_DETAILS))
162 fprintf (vect_dump, "skip.");
163 continue;
166 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
168 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
170 fprintf (vect_dump, "not vectorized: irregular stmt.");
171 print_generic_expr (vect_dump, stmt, TDF_SLIM);
173 return false;
176 if (!GIMPLE_STMT_P (stmt)
177 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
179 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
181 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
182 print_generic_expr (vect_dump, stmt, TDF_SLIM);
184 return false;
187 if (STMT_VINFO_VECTYPE (stmt_info))
189 /* The only case when a vectype had been already set is for stmts
190 that contain a dataref, or for "pattern-stmts" (stmts generated
191 by the vectorizer to represent/replace a certain idiom). */
192 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
193 || is_pattern_stmt_p (stmt_info));
194 vectype = STMT_VINFO_VECTYPE (stmt_info);
196 else
198 tree operation;
200 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
201 && !is_pattern_stmt_p (stmt_info));
203 /* We generally set the vectype according to the type of the
204 result (lhs).
205 For stmts whose result-type is different than the type of the
206 arguments (e.g. demotion, promotion), vectype will be reset
207 appropriately (later). Note that we have to visit the smallest
208 datatype in this function, because that determines the VF.
209 If the smallest datatype in the loop is present only as the
210 rhs of a promotion operation - we'd miss it here.
211 Such a case, where a variable of this datatype does not appear
212 in the lhs anywhere in the loop, can only occur if it's an
213 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
214 to have been optimized away by invariant motion. However, we
215 cannot rely on invariant motion to always take invariants out
216 of the loop, and so in the case of promotion we also have to
217 check the rhs. */
218 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
220 operation = GIMPLE_STMT_OPERAND (stmt, 1);
221 if (TREE_CODE (operation) == NOP_EXPR
222 || TREE_CODE (operation) == CONVERT_EXPR
223 || TREE_CODE (operation) == WIDEN_MULT_EXPR
224 || TREE_CODE (operation) == FLOAT_EXPR)
226 tree rhs_type = TREE_TYPE (TREE_OPERAND (operation, 0));
227 if (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)) <
228 TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)))
229 scalar_type = rhs_type;
232 if (vect_print_dump_info (REPORT_DETAILS))
234 fprintf (vect_dump, "get vectype for scalar type: ");
235 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
238 vectype = get_vectype_for_scalar_type (scalar_type);
239 if (!vectype)
241 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
243 fprintf (vect_dump,
244 "not vectorized: unsupported data-type ");
245 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
247 return false;
249 STMT_VINFO_VECTYPE (stmt_info) = vectype;
252 if (vect_print_dump_info (REPORT_DETAILS))
254 fprintf (vect_dump, "vectype: ");
255 print_generic_expr (vect_dump, vectype, TDF_SLIM);
258 nunits = TYPE_VECTOR_SUBPARTS (vectype);
259 if (vect_print_dump_info (REPORT_DETAILS))
260 fprintf (vect_dump, "nunits = %d", nunits);
262 if (!vectorization_factor
263 || (nunits > vectorization_factor))
264 vectorization_factor = nunits;
269 /* TODO: Analyze cost. Decide if worth while to vectorize. */
270 if (vect_print_dump_info (REPORT_DETAILS))
271 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
272 if (vectorization_factor <= 1)
274 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
275 fprintf (vect_dump, "not vectorized: unsupported data-type");
276 return false;
278 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
280 return true;
284 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
285 the number of created vector stmts depends on the unrolling factor). However,
286 the actual number of vector stmts for every SLP node depends on VF which is
287 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
288 In this function we assume that the inside costs calculated in
289 vect_model_xxx_cost are linear in ncopies. */
291 static void
292 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
294 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
295 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
296 slp_instance instance;
298 if (vect_print_dump_info (REPORT_SLP))
299 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
301 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
302 /* We assume that costs are linear in ncopies. */
303 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
304 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
308 /* Function vect_analyze_operations.
310 Scan the loop stmts and make sure they are all vectorizable. */
312 static bool
313 vect_analyze_operations (loop_vec_info loop_vinfo)
315 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
316 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
317 int nbbs = loop->num_nodes;
318 block_stmt_iterator si;
319 unsigned int vectorization_factor = 0;
320 int i;
321 bool ok;
322 tree phi;
323 stmt_vec_info stmt_info;
324 bool need_to_vectorize = false;
325 int min_profitable_iters;
326 int min_scalar_loop_bound;
327 unsigned int th;
328 bool only_slp_in_loop = true;
330 if (vect_print_dump_info (REPORT_DETAILS))
331 fprintf (vect_dump, "=== vect_analyze_operations ===");
333 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
334 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
336 for (i = 0; i < nbbs; i++)
338 basic_block bb = bbs[i];
340 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
342 ok = true;
344 stmt_info = vinfo_for_stmt (phi);
345 if (vect_print_dump_info (REPORT_DETAILS))
347 fprintf (vect_dump, "examining phi: ");
348 print_generic_expr (vect_dump, phi, TDF_SLIM);
351 if (! is_loop_header_bb_p (bb))
353 /* inner-loop loop-closed exit phi in outer-loop vectorization
354 (i.e. a phi in the tail of the outer-loop).
355 FORNOW: we currently don't support the case that these phis
356 are not used in the outerloop, cause this case requires
357 to actually do something here. */
358 if (!STMT_VINFO_RELEVANT_P (stmt_info)
359 || STMT_VINFO_LIVE_P (stmt_info))
361 if (vect_print_dump_info (REPORT_DETAILS))
362 fprintf (vect_dump,
363 "Unsupported loop-closed phi in outer-loop.");
364 return false;
366 continue;
369 gcc_assert (stmt_info);
371 if (STMT_VINFO_LIVE_P (stmt_info))
373 /* FORNOW: not yet supported. */
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
375 fprintf (vect_dump, "not vectorized: value used after loop.");
376 return false;
379 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
380 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
382 /* A scalar-dependence cycle that we don't support. */
383 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
384 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
385 return false;
388 if (STMT_VINFO_RELEVANT_P (stmt_info))
390 need_to_vectorize = true;
391 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
392 ok = vectorizable_induction (phi, NULL, NULL);
395 if (!ok)
397 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
399 fprintf (vect_dump,
400 "not vectorized: relevant phi not supported: ");
401 print_generic_expr (vect_dump, phi, TDF_SLIM);
403 return false;
407 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
409 tree stmt = bsi_stmt (si);
410 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
411 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
413 if (vect_print_dump_info (REPORT_DETAILS))
415 fprintf (vect_dump, "==> examining statement: ");
416 print_generic_expr (vect_dump, stmt, TDF_SLIM);
419 gcc_assert (stmt_info);
421 /* skip stmts which do not need to be vectorized.
422 this is expected to include:
423 - the COND_EXPR which is the loop exit condition
424 - any LABEL_EXPRs in the loop
425 - computations that are used only for array indexing or loop
426 control */
428 if (!STMT_VINFO_RELEVANT_P (stmt_info)
429 && !STMT_VINFO_LIVE_P (stmt_info))
431 if (vect_print_dump_info (REPORT_DETAILS))
432 fprintf (vect_dump, "irrelevant.");
433 continue;
436 switch (STMT_VINFO_DEF_TYPE (stmt_info))
438 case vect_loop_def:
439 break;
441 case vect_reduction_def:
442 gcc_assert (relevance == vect_used_in_outer
443 || relevance == vect_used_in_outer_by_reduction
444 || relevance == vect_unused_in_loop);
445 break;
447 case vect_induction_def:
448 case vect_constant_def:
449 case vect_invariant_def:
450 case vect_unknown_def_type:
451 default:
452 gcc_unreachable ();
455 if (STMT_VINFO_RELEVANT_P (stmt_info))
457 gcc_assert (GIMPLE_STMT_P (stmt)
458 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
459 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
460 need_to_vectorize = true;
463 ok = true;
464 if (STMT_VINFO_RELEVANT_P (stmt_info)
465 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
466 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
467 || vectorizable_type_demotion (stmt, NULL, NULL)
468 || vectorizable_conversion (stmt, NULL, NULL, NULL)
469 || vectorizable_operation (stmt, NULL, NULL, NULL)
470 || vectorizable_assignment (stmt, NULL, NULL, NULL)
471 || vectorizable_load (stmt, NULL, NULL, NULL)
472 || vectorizable_call (stmt, NULL, NULL)
473 || vectorizable_store (stmt, NULL, NULL, NULL)
474 || vectorizable_condition (stmt, NULL, NULL)
475 || vectorizable_reduction (stmt, NULL, NULL));
477 if (!ok)
479 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
481 fprintf (vect_dump, "not vectorized: relevant stmt not ");
482 fprintf (vect_dump, "supported: ");
483 print_generic_expr (vect_dump, stmt, TDF_SLIM);
485 return false;
488 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
489 need extra handling, except for vectorizable reductions. */
490 if (STMT_VINFO_LIVE_P (stmt_info)
491 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
492 ok = vectorizable_live_operation (stmt, NULL, NULL);
494 if (!ok)
496 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
498 fprintf (vect_dump, "not vectorized: live stmt not ");
499 fprintf (vect_dump, "supported: ");
500 print_generic_expr (vect_dump, stmt, TDF_SLIM);
502 return false;
505 if (!PURE_SLP_STMT (stmt_info))
507 /* STMT needs loop-based vectorization. */
508 only_slp_in_loop = false;
510 /* Groups of strided accesses whose size is not a power of 2 are
511 not vectorizable yet using loop-vectorization. Therefore, if
512 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
513 both SLPed and loop-based vectorzed), the loop cannot be
514 vectorized. */
515 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
516 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
517 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
519 if (vect_print_dump_info (REPORT_DETAILS))
521 fprintf (vect_dump, "not vectorized: the size of group "
522 "of strided accesses is not a power of 2");
523 print_generic_expr (vect_dump, stmt, TDF_SLIM);
525 return false;
528 } /* stmts in bb */
529 } /* bbs */
531 /* All operations in the loop are either irrelevant (deal with loop
532 control, or dead), or only used outside the loop and can be moved
533 out of the loop (e.g. invariants, inductions). The loop can be
534 optimized away by scalar optimizations. We're better off not
535 touching this loop. */
536 if (!need_to_vectorize)
538 if (vect_print_dump_info (REPORT_DETAILS))
539 fprintf (vect_dump,
540 "All the computation can be taken out of the loop.");
541 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
542 fprintf (vect_dump,
543 "not vectorized: redundant loop. no profit to vectorize.");
544 return false;
547 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
548 vectorization factor of the loop is the unrolling factor required by the
549 SLP instances. If that unrolling factor is 1, we say, that we perform
550 pure SLP on loop - cross iteration parallelism is not exploited. */
551 if (only_slp_in_loop)
552 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
553 else
554 vectorization_factor = least_common_multiple (vectorization_factor,
555 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
557 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
559 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
560 && vect_print_dump_info (REPORT_DETAILS))
561 fprintf (vect_dump,
562 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
563 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
565 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
566 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
568 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
569 fprintf (vect_dump, "not vectorized: iteration count too small.");
570 if (vect_print_dump_info (REPORT_DETAILS))
571 fprintf (vect_dump,"not vectorized: iteration count smaller than "
572 "vectorization factor.");
573 return false;
576 /* Analyze cost. Decide if worth while to vectorize. */
578 /* Once VF is set, SLP costs should be updated since the number of created
579 vector stmts depends on VF. */
580 vect_update_slp_costs_according_to_vf (loop_vinfo);
582 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
583 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
585 if (min_profitable_iters < 0)
587 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
588 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
589 if (vect_print_dump_info (REPORT_DETAILS))
590 fprintf (vect_dump, "not vectorized: vector version will never be "
591 "profitable.");
592 return false;
595 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
596 * vectorization_factor) - 1);
598 /* Use the cost model only if it is more conservative than user specified
599 threshold. */
601 th = (unsigned) min_scalar_loop_bound;
602 if (min_profitable_iters
603 && (!min_scalar_loop_bound
604 || min_profitable_iters > min_scalar_loop_bound))
605 th = (unsigned) min_profitable_iters;
607 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
608 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
610 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
611 fprintf (vect_dump, "not vectorized: vectorization not "
612 "profitable.");
613 if (vect_print_dump_info (REPORT_DETAILS))
614 fprintf (vect_dump, "not vectorized: iteration count smaller than "
615 "user specified loop bound parameter or minimum "
616 "profitable iterations (whichever is more conservative).");
617 return false;
620 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
621 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
622 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
624 if (vect_print_dump_info (REPORT_DETAILS))
625 fprintf (vect_dump, "epilog loop required.");
626 if (!vect_can_advance_ivs_p (loop_vinfo))
628 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
629 fprintf (vect_dump,
630 "not vectorized: can't create epilog loop 1.");
631 return false;
633 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
635 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
636 fprintf (vect_dump,
637 "not vectorized: can't create epilog loop 2.");
638 return false;
642 return true;
646 /* Function exist_non_indexing_operands_for_use_p
648 USE is one of the uses attached to STMT. Check if USE is
649 used in STMT for anything other than indexing an array. */
651 static bool
652 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
654 tree operand;
655 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
657 /* USE corresponds to some operand in STMT. If there is no data
658 reference in STMT, then any operand that corresponds to USE
659 is not indexing an array. */
660 if (!STMT_VINFO_DATA_REF (stmt_info))
661 return true;
663 /* STMT has a data_ref. FORNOW this means that its of one of
664 the following forms:
665 -1- ARRAY_REF = var
666 -2- var = ARRAY_REF
667 (This should have been verified in analyze_data_refs).
669 'var' in the second case corresponds to a def, not a use,
670 so USE cannot correspond to any operands that are not used
671 for array indexing.
673 Therefore, all we need to check is if STMT falls into the
674 first case, and whether var corresponds to USE. */
676 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
677 return false;
679 operand = GIMPLE_STMT_OPERAND (stmt, 1);
681 if (TREE_CODE (operand) != SSA_NAME)
682 return false;
684 if (operand == use)
685 return true;
687 return false;
691 /* Function vect_analyze_scalar_cycles_1.
693 Examine the cross iteration def-use cycles of scalar variables
694 in LOOP. LOOP_VINFO represents the loop that is noe being
695 considered for vectorization (can be LOOP, or an outer-loop
696 enclosing LOOP). */
698 static void
699 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
701 tree phi;
702 basic_block bb = loop->header;
703 tree dumy;
704 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
706 if (vect_print_dump_info (REPORT_DETAILS))
707 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
709 /* First - identify all inductions. */
710 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
712 tree access_fn = NULL;
713 tree def = PHI_RESULT (phi);
714 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
716 if (vect_print_dump_info (REPORT_DETAILS))
718 fprintf (vect_dump, "Analyze phi: ");
719 print_generic_expr (vect_dump, phi, TDF_SLIM);
722 /* Skip virtual phi's. The data dependences that are associated with
723 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
724 if (!is_gimple_reg (SSA_NAME_VAR (def)))
725 continue;
727 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
729 /* Analyze the evolution function. */
730 access_fn = analyze_scalar_evolution (loop, def);
731 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
733 fprintf (vect_dump, "Access function of PHI: ");
734 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
737 if (!access_fn
738 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
740 VEC_safe_push (tree, heap, worklist, phi);
741 continue;
744 if (vect_print_dump_info (REPORT_DETAILS))
745 fprintf (vect_dump, "Detected induction.");
746 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
750 /* Second - identify all reductions. */
751 while (VEC_length (tree, worklist) > 0)
753 tree phi = VEC_pop (tree, worklist);
754 tree def = PHI_RESULT (phi);
755 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
756 tree reduc_stmt;
758 if (vect_print_dump_info (REPORT_DETAILS))
760 fprintf (vect_dump, "Analyze phi: ");
761 print_generic_expr (vect_dump, phi, TDF_SLIM);
764 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
765 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
767 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
768 if (reduc_stmt)
770 if (vect_print_dump_info (REPORT_DETAILS))
771 fprintf (vect_dump, "Detected reduction.");
772 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
773 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
774 vect_reduction_def;
776 else
777 if (vect_print_dump_info (REPORT_DETAILS))
778 fprintf (vect_dump, "Unknown def-use cycle pattern.");
781 VEC_free (tree, heap, worklist);
782 return;
786 /* Function vect_analyze_scalar_cycles.
788 Examine the cross iteration def-use cycles of scalar variables, by
789 analyzing the loop-header PHIs of scalar variables; Classify each
790 cycle as one of the following: invariant, induction, reduction, unknown.
791 We do that for the loop represented by LOOP_VINFO, and also to its
792 inner-loop, if exists.
793 Examples for scalar cycles:
795 Example1: reduction:
797 loop1:
798 for (i=0; i<N; i++)
799 sum += a[i];
801 Example2: induction:
803 loop2:
804 for (i=0; i<N; i++)
805 a[i] = i; */
807 static void
808 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
810 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
812 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
814 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
815 Reductions in such inner-loop therefore have different properties than
816 the reductions in the nest that gets vectorized:
817 1. When vectorized, they are executed in the same order as in the original
818 scalar loop, so we can't change the order of computation when
819 vectorizing them.
820 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
821 current checks are too strict. */
823 if (loop->inner)
824 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
828 /* Function vect_insert_into_interleaving_chain.
830 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
832 static void
833 vect_insert_into_interleaving_chain (struct data_reference *dra,
834 struct data_reference *drb)
836 tree prev, next, next_init;
837 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
838 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
840 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
841 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
842 while (next)
844 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
845 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
847 /* Insert here. */
848 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
849 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
850 return;
852 prev = next;
853 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
856 /* We got to the end of the list. Insert here. */
857 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
858 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
862 /* Function vect_update_interleaving_chain.
864 For two data-refs DRA and DRB that are a part of a chain interleaved data
865 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
867 There are four possible cases:
868 1. New stmts - both DRA and DRB are not a part of any chain:
869 FIRST_DR = DRB
870 NEXT_DR (DRB) = DRA
871 2. DRB is a part of a chain and DRA is not:
872 no need to update FIRST_DR
873 no need to insert DRB
874 insert DRA according to init
875 3. DRA is a part of a chain and DRB is not:
876 if (init of FIRST_DR > init of DRB)
877 FIRST_DR = DRB
878 NEXT(FIRST_DR) = previous FIRST_DR
879 else
880 insert DRB according to its init
881 4. both DRA and DRB are in some interleaving chains:
882 choose the chain with the smallest init of FIRST_DR
883 insert the nodes of the second chain into the first one. */
885 static void
886 vect_update_interleaving_chain (struct data_reference *drb,
887 struct data_reference *dra)
889 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
890 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
891 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
892 tree node, prev, next, node_init, first_stmt;
894 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
895 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
897 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
898 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
899 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
900 return;
903 /* 2. DRB is a part of a chain and DRA is not. */
904 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
906 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
907 /* Insert DRA into the chain of DRB. */
908 vect_insert_into_interleaving_chain (dra, drb);
909 return;
912 /* 3. DRA is a part of a chain and DRB is not. */
913 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
915 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
916 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
917 old_first_stmt)));
918 tree tmp;
920 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
922 /* DRB's init is smaller than the init of the stmt previously marked
923 as the first stmt of the interleaving chain of DRA. Therefore, we
924 update FIRST_STMT and put DRB in the head of the list. */
925 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
926 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
928 /* Update all the stmts in the list to point to the new FIRST_STMT. */
929 tmp = old_first_stmt;
930 while (tmp)
932 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
933 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
936 else
938 /* Insert DRB in the list of DRA. */
939 vect_insert_into_interleaving_chain (drb, dra);
940 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
942 return;
945 /* 4. both DRA and DRB are in some interleaving chains. */
946 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
947 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
948 if (first_a == first_b)
949 return;
950 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
951 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
953 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
955 /* Insert the nodes of DRA chain into the DRB chain.
956 After inserting a node, continue from this node of the DRB chain (don't
957 start from the beginning. */
958 node = DR_GROUP_FIRST_DR (stmtinfo_a);
959 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
960 first_stmt = first_b;
962 else
964 /* Insert the nodes of DRB chain into the DRA chain.
965 After inserting a node, continue from this node of the DRA chain (don't
966 start from the beginning. */
967 node = DR_GROUP_FIRST_DR (stmtinfo_b);
968 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
969 first_stmt = first_a;
972 while (node)
974 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
975 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
976 while (next)
978 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
979 if (tree_int_cst_compare (next_init, node_init) > 0)
981 /* Insert here. */
982 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
983 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
984 prev = node;
985 break;
987 prev = next;
988 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
990 if (!next)
992 /* We got to the end of the list. Insert here. */
993 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
994 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
995 prev = node;
997 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
998 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1003 /* Function vect_equal_offsets.
1005 Check if OFFSET1 and OFFSET2 are identical expressions. */
1007 static bool
1008 vect_equal_offsets (tree offset1, tree offset2)
1010 bool res0, res1;
1012 STRIP_NOPS (offset1);
1013 STRIP_NOPS (offset2);
1015 if (offset1 == offset2)
1016 return true;
1018 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1019 || !BINARY_CLASS_P (offset1)
1020 || !BINARY_CLASS_P (offset2))
1021 return false;
1023 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1024 TREE_OPERAND (offset2, 0));
1025 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1026 TREE_OPERAND (offset2, 1));
1028 return (res0 && res1);
1032 /* Function vect_check_interleaving.
1034 Check if DRA and DRB are a part of interleaving. In case they are, insert
1035 DRA and DRB in an interleaving chain. */
1037 static void
1038 vect_check_interleaving (struct data_reference *dra,
1039 struct data_reference *drb)
1041 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1043 /* Check that the data-refs have same first location (except init) and they
1044 are both either store or load (not load and store). */
1045 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1046 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1047 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1048 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1049 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1050 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1051 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1052 || DR_IS_READ (dra) != DR_IS_READ (drb))
1053 return;
1055 /* Check:
1056 1. data-refs are of the same type
1057 2. their steps are equal
1058 3. the step is greater than the difference between data-refs' inits */
1059 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1060 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1062 if (type_size_a != type_size_b
1063 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
1064 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
1065 TREE_TYPE (DR_REF (drb))))
1066 return;
1068 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1069 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1070 step = TREE_INT_CST_LOW (DR_STEP (dra));
1072 if (init_a > init_b)
1074 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1075 and DRB is accessed before DRA. */
1076 diff_mod_size = (init_a - init_b) % type_size_a;
1078 if ((init_a - init_b) > step)
1079 return;
1081 if (diff_mod_size == 0)
1083 vect_update_interleaving_chain (drb, dra);
1084 if (vect_print_dump_info (REPORT_DR_DETAILS))
1086 fprintf (vect_dump, "Detected interleaving ");
1087 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1088 fprintf (vect_dump, " and ");
1089 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1091 return;
1094 else
1096 /* If init_b == init_a + the size of the type * k, we have an
1097 interleaving, and DRA is accessed before DRB. */
1098 diff_mod_size = (init_b - init_a) % type_size_a;
1100 if ((init_b - init_a) > step)
1101 return;
1103 if (diff_mod_size == 0)
1105 vect_update_interleaving_chain (dra, drb);
1106 if (vect_print_dump_info (REPORT_DR_DETAILS))
1108 fprintf (vect_dump, "Detected interleaving ");
1109 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1110 fprintf (vect_dump, " and ");
1111 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1113 return;
1118 /* Check if data references pointed by DR_I and DR_J are same or
1119 belong to same interleaving group. Return FALSE if drs are
1120 different, otherwise return TRUE. */
1122 static bool
1123 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
1125 tree stmt_i = DR_STMT (dr_i);
1126 tree stmt_j = DR_STMT (dr_j);
1128 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
1129 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1130 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
1131 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1132 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
1133 return true;
1134 else
1135 return false;
1138 /* If address ranges represented by DDR_I and DDR_J are equal,
1139 return TRUE, otherwise return FALSE. */
1141 static bool
1142 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
1144 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
1145 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
1146 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
1147 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
1148 return true;
1149 else
1150 return false;
1153 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1154 tested at run-time. Return TRUE if DDR was successfully inserted.
1155 Return false if versioning is not supported. */
1157 static bool
1158 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1160 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1162 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
1163 return false;
1165 if (vect_print_dump_info (REPORT_DR_DETAILS))
1167 fprintf (vect_dump, "mark for run-time aliasing test between ");
1168 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1169 fprintf (vect_dump, " and ");
1170 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1173 if (optimize_size)
1175 if (vect_print_dump_info (REPORT_DR_DETAILS))
1176 fprintf (vect_dump, "versioning not supported when optimizing for size.");
1177 return false;
1180 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1181 if (loop->inner)
1183 if (vect_print_dump_info (REPORT_DR_DETAILS))
1184 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1185 return false;
1188 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1189 return true;
1192 /* Function vect_analyze_data_ref_dependence.
1194 Return TRUE if there (might) exist a dependence between a memory-reference
1195 DRA and a memory-reference DRB. When versioning for alias may check a
1196 dependence at run-time, return FALSE. */
1198 static bool
1199 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1200 loop_vec_info loop_vinfo)
1202 unsigned int i;
1203 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1204 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1205 struct data_reference *dra = DDR_A (ddr);
1206 struct data_reference *drb = DDR_B (ddr);
1207 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1208 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1209 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1210 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1211 lambda_vector dist_v;
1212 unsigned int loop_depth;
1214 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1216 /* Independent data accesses. */
1217 vect_check_interleaving (dra, drb);
1218 return false;
1221 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1222 return false;
1224 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1226 if (vect_print_dump_info (REPORT_DR_DETAILS))
1228 fprintf (vect_dump,
1229 "versioning for alias required: can't determine dependence between ");
1230 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1231 fprintf (vect_dump, " and ");
1232 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1234 /* Add to list of ddrs that need to be tested at run-time. */
1235 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1238 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1240 if (vect_print_dump_info (REPORT_DR_DETAILS))
1242 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1243 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1244 fprintf (vect_dump, " and ");
1245 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1247 /* Add to list of ddrs that need to be tested at run-time. */
1248 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1251 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1252 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1254 int dist = dist_v[loop_depth];
1256 if (vect_print_dump_info (REPORT_DR_DETAILS))
1257 fprintf (vect_dump, "dependence distance = %d.", dist);
1259 /* Same loop iteration. */
1260 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1262 /* Two references with distance zero have the same alignment. */
1263 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1264 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1265 if (vect_print_dump_info (REPORT_ALIGNMENT))
1266 fprintf (vect_dump, "accesses have the same alignment.");
1267 if (vect_print_dump_info (REPORT_DR_DETAILS))
1269 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1270 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1271 fprintf (vect_dump, " and ");
1272 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1275 /* For interleaving, mark that there is a read-write dependency if
1276 necessary. We check before that one of the data-refs is store. */
1277 if (DR_IS_READ (dra))
1278 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1279 else
1281 if (DR_IS_READ (drb))
1282 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1285 continue;
1288 if (abs (dist) >= vectorization_factor
1289 || (dist > 0 && DDR_REVERSED_P (ddr)))
1291 /* Dependence distance does not create dependence, as far as
1292 vectorization is concerned, in this case. If DDR_REVERSED_P the
1293 order of the data-refs in DDR was reversed (to make distance
1294 vector positive), and the actual distance is negative. */
1295 if (vect_print_dump_info (REPORT_DR_DETAILS))
1296 fprintf (vect_dump, "dependence distance >= VF or negative.");
1297 continue;
1300 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1302 fprintf (vect_dump,
1303 "not vectorized, possible dependence "
1304 "between data-refs ");
1305 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1306 fprintf (vect_dump, " and ");
1307 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1310 return true;
1313 return false;
1316 /* Function vect_analyze_data_ref_dependences.
1318 Examine all the data references in the loop, and make sure there do not
1319 exist any data dependences between them. */
1321 static bool
1322 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1324 unsigned int i;
1325 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1326 struct data_dependence_relation *ddr;
1328 if (vect_print_dump_info (REPORT_DETAILS))
1329 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1331 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1332 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1333 return false;
1335 return true;
1339 /* Function vect_compute_data_ref_alignment
1341 Compute the misalignment of the data reference DR.
1343 Output:
1344 1. If during the misalignment computation it is found that the data reference
1345 cannot be vectorized then false is returned.
1346 2. DR_MISALIGNMENT (DR) is defined.
1348 FOR NOW: No analysis is actually performed. Misalignment is calculated
1349 only for trivial cases. TODO. */
1351 static bool
1352 vect_compute_data_ref_alignment (struct data_reference *dr)
1354 tree stmt = DR_STMT (dr);
1355 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1356 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1357 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1358 tree ref = DR_REF (dr);
1359 tree vectype;
1360 tree base, base_addr;
1361 bool base_aligned;
1362 tree misalign;
1363 tree aligned_to, alignment;
1365 if (vect_print_dump_info (REPORT_DETAILS))
1366 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1368 /* Initialize misalignment to unknown. */
1369 SET_DR_MISALIGNMENT (dr, -1);
1371 misalign = DR_INIT (dr);
1372 aligned_to = DR_ALIGNED_TO (dr);
1373 base_addr = DR_BASE_ADDRESS (dr);
1375 /* In case the dataref is in an inner-loop of the loop that is being
1376 vectorized (LOOP), we use the base and misalignment information
1377 relative to the outer-loop (LOOP). This is ok only if the misalignment
1378 stays the same throughout the execution of the inner-loop, which is why
1379 we have to check that the stride of the dataref in the inner-loop evenly
1380 divides by the vector size. */
1381 if (nested_in_vect_loop_p (loop, stmt))
1383 tree step = DR_STEP (dr);
1384 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1386 if (dr_step % UNITS_PER_SIMD_WORD == 0)
1388 if (vect_print_dump_info (REPORT_ALIGNMENT))
1389 fprintf (vect_dump, "inner step divides the vector-size.");
1390 misalign = STMT_VINFO_DR_INIT (stmt_info);
1391 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1392 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1394 else
1396 if (vect_print_dump_info (REPORT_ALIGNMENT))
1397 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1398 misalign = NULL_TREE;
1402 base = build_fold_indirect_ref (base_addr);
1403 vectype = STMT_VINFO_VECTYPE (stmt_info);
1404 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1406 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1407 || !misalign)
1409 if (vect_print_dump_info (REPORT_ALIGNMENT))
1411 fprintf (vect_dump, "Unknown alignment for access: ");
1412 print_generic_expr (vect_dump, base, TDF_SLIM);
1414 return true;
1417 if ((DECL_P (base)
1418 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1419 alignment) >= 0)
1420 || (TREE_CODE (base_addr) == SSA_NAME
1421 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1422 TREE_TYPE (base_addr)))),
1423 alignment) >= 0))
1424 base_aligned = true;
1425 else
1426 base_aligned = false;
1428 if (!base_aligned)
1430 /* Do not change the alignment of global variables if
1431 flag_section_anchors is enabled. */
1432 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1433 || (TREE_STATIC (base) && flag_section_anchors))
1435 if (vect_print_dump_info (REPORT_DETAILS))
1437 fprintf (vect_dump, "can't force alignment of ref: ");
1438 print_generic_expr (vect_dump, ref, TDF_SLIM);
1440 return true;
1443 /* Force the alignment of the decl.
1444 NOTE: This is the only change to the code we make during
1445 the analysis phase, before deciding to vectorize the loop. */
1446 if (vect_print_dump_info (REPORT_DETAILS))
1447 fprintf (vect_dump, "force alignment");
1448 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1449 DECL_USER_ALIGN (base) = 1;
1452 /* At this point we assume that the base is aligned. */
1453 gcc_assert (base_aligned
1454 || (TREE_CODE (base) == VAR_DECL
1455 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1457 /* Modulo alignment. */
1458 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1460 if (!host_integerp (misalign, 1))
1462 /* Negative or overflowed misalignment value. */
1463 if (vect_print_dump_info (REPORT_DETAILS))
1464 fprintf (vect_dump, "unexpected misalign value");
1465 return false;
1468 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1470 if (vect_print_dump_info (REPORT_DETAILS))
1472 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1473 print_generic_expr (vect_dump, ref, TDF_SLIM);
1476 return true;
1480 /* Function vect_compute_data_refs_alignment
1482 Compute the misalignment of data references in the loop.
1483 Return FALSE if a data reference is found that cannot be vectorized. */
1485 static bool
1486 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1488 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1489 struct data_reference *dr;
1490 unsigned int i;
1492 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1493 if (!vect_compute_data_ref_alignment (dr))
1494 return false;
1496 return true;
1500 /* Function vect_update_misalignment_for_peel
1502 DR - the data reference whose misalignment is to be adjusted.
1503 DR_PEEL - the data reference whose misalignment is being made
1504 zero in the vector loop by the peel.
1505 NPEEL - the number of iterations in the peel loop if the misalignment
1506 of DR_PEEL is known at compile time. */
1508 static void
1509 vect_update_misalignment_for_peel (struct data_reference *dr,
1510 struct data_reference *dr_peel, int npeel)
1512 unsigned int i;
1513 VEC(dr_p,heap) *same_align_drs;
1514 struct data_reference *current_dr;
1515 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1516 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1517 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1518 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1520 /* For interleaved data accesses the step in the loop must be multiplied by
1521 the size of the interleaving group. */
1522 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1523 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1524 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1525 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1527 /* It can be assumed that the data refs with the same alignment as dr_peel
1528 are aligned in the vector loop. */
1529 same_align_drs
1530 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1531 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1533 if (current_dr != dr)
1534 continue;
1535 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1536 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1537 SET_DR_MISALIGNMENT (dr, 0);
1538 return;
1541 if (known_alignment_for_access_p (dr)
1542 && known_alignment_for_access_p (dr_peel))
1544 int misal = DR_MISALIGNMENT (dr);
1545 misal += npeel * dr_size;
1546 misal %= UNITS_PER_SIMD_WORD;
1547 SET_DR_MISALIGNMENT (dr, misal);
1548 return;
1551 if (vect_print_dump_info (REPORT_DETAILS))
1552 fprintf (vect_dump, "Setting misalignment to -1.");
1553 SET_DR_MISALIGNMENT (dr, -1);
1557 /* Function vect_verify_datarefs_alignment
1559 Return TRUE if all data references in the loop can be
1560 handled with respect to alignment. */
1562 static bool
1563 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1565 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1566 struct data_reference *dr;
1567 enum dr_alignment_support supportable_dr_alignment;
1568 unsigned int i;
1570 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1572 tree stmt = DR_STMT (dr);
1573 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1575 /* For interleaving, only the alignment of the first access matters. */
1576 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1577 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1578 continue;
1580 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1581 if (!supportable_dr_alignment)
1583 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1585 if (DR_IS_READ (dr))
1586 fprintf (vect_dump,
1587 "not vectorized: unsupported unaligned load.");
1588 else
1589 fprintf (vect_dump,
1590 "not vectorized: unsupported unaligned store.");
1592 return false;
1594 if (supportable_dr_alignment != dr_aligned
1595 && vect_print_dump_info (REPORT_ALIGNMENT))
1596 fprintf (vect_dump, "Vectorizing an unaligned access.");
1598 return true;
1602 /* Function vector_alignment_reachable_p
1604 Return true if vector alignment for DR is reachable by peeling
1605 a few loop iterations. Return false otherwise. */
1607 static bool
1608 vector_alignment_reachable_p (struct data_reference *dr)
1610 tree stmt = DR_STMT (dr);
1611 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1612 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1614 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1616 /* For interleaved access we peel only if number of iterations in
1617 the prolog loop ({VF - misalignment}), is a multiple of the
1618 number of the interleaved accesses. */
1619 int elem_size, mis_in_elements;
1620 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1622 /* FORNOW: handle only known alignment. */
1623 if (!known_alignment_for_access_p (dr))
1624 return false;
1626 elem_size = UNITS_PER_SIMD_WORD / nelements;
1627 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1629 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1630 return false;
1633 /* If misalignment is known at the compile time then allow peeling
1634 only if natural alignment is reachable through peeling. */
1635 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1637 HOST_WIDE_INT elmsize =
1638 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1639 if (vect_print_dump_info (REPORT_DETAILS))
1641 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1642 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1644 if (DR_MISALIGNMENT (dr) % elmsize)
1646 if (vect_print_dump_info (REPORT_DETAILS))
1647 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1648 return false;
1652 if (!known_alignment_for_access_p (dr))
1654 tree type = (TREE_TYPE (DR_REF (dr)));
1655 tree ba = DR_BASE_OBJECT (dr);
1656 bool is_packed = false;
1658 if (ba)
1659 is_packed = contains_packed_reference (ba);
1661 if (vect_print_dump_info (REPORT_DETAILS))
1662 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1663 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1664 return true;
1665 else
1666 return false;
1669 return true;
1672 /* Function vect_enhance_data_refs_alignment
1674 This pass will use loop versioning and loop peeling in order to enhance
1675 the alignment of data references in the loop.
1677 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1678 original loop is to be vectorized; Any other loops that are created by
1679 the transformations performed in this pass - are not supposed to be
1680 vectorized. This restriction will be relaxed.
1682 This pass will require a cost model to guide it whether to apply peeling
1683 or versioning or a combination of the two. For example, the scheme that
1684 intel uses when given a loop with several memory accesses, is as follows:
1685 choose one memory access ('p') which alignment you want to force by doing
1686 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1687 other accesses are not necessarily aligned, or (2) use loop versioning to
1688 generate one loop in which all accesses are aligned, and another loop in
1689 which only 'p' is necessarily aligned.
1691 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1692 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1693 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1695 Devising a cost model is the most critical aspect of this work. It will
1696 guide us on which access to peel for, whether to use loop versioning, how
1697 many versions to create, etc. The cost model will probably consist of
1698 generic considerations as well as target specific considerations (on
1699 powerpc for example, misaligned stores are more painful than misaligned
1700 loads).
1702 Here are the general steps involved in alignment enhancements:
1704 -- original loop, before alignment analysis:
1705 for (i=0; i<N; i++){
1706 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1707 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1710 -- After vect_compute_data_refs_alignment:
1711 for (i=0; i<N; i++){
1712 x = q[i]; # DR_MISALIGNMENT(q) = 3
1713 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1716 -- Possibility 1: we do loop versioning:
1717 if (p is aligned) {
1718 for (i=0; i<N; i++){ # loop 1A
1719 x = q[i]; # DR_MISALIGNMENT(q) = 3
1720 p[i] = y; # DR_MISALIGNMENT(p) = 0
1723 else {
1724 for (i=0; i<N; i++){ # loop 1B
1725 x = q[i]; # DR_MISALIGNMENT(q) = 3
1726 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1730 -- Possibility 2: we do loop peeling:
1731 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1732 x = q[i];
1733 p[i] = y;
1735 for (i = 3; i < N; i++){ # loop 2A
1736 x = q[i]; # DR_MISALIGNMENT(q) = 0
1737 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1740 -- Possibility 3: combination of loop peeling and versioning:
1741 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1742 x = q[i];
1743 p[i] = y;
1745 if (p is aligned) {
1746 for (i = 3; i<N; i++){ # loop 3A
1747 x = q[i]; # DR_MISALIGNMENT(q) = 0
1748 p[i] = y; # DR_MISALIGNMENT(p) = 0
1751 else {
1752 for (i = 3; i<N; i++){ # loop 3B
1753 x = q[i]; # DR_MISALIGNMENT(q) = 0
1754 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1758 These loops are later passed to loop_transform to be vectorized. The
1759 vectorizer will use the alignment information to guide the transformation
1760 (whether to generate regular loads/stores, or with special handling for
1761 misalignment). */
1763 static bool
1764 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1766 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1767 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1768 enum dr_alignment_support supportable_dr_alignment;
1769 struct data_reference *dr0 = NULL;
1770 struct data_reference *dr;
1771 unsigned int i;
1772 bool do_peeling = false;
1773 bool do_versioning = false;
1774 bool stat;
1775 tree stmt;
1776 stmt_vec_info stmt_info;
1777 int vect_versioning_for_alias_required;
1779 if (vect_print_dump_info (REPORT_DETAILS))
1780 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1782 /* While cost model enhancements are expected in the future, the high level
1783 view of the code at this time is as follows:
1785 A) If there is a misaligned write then see if peeling to align this write
1786 can make all data references satisfy vect_supportable_dr_alignment.
1787 If so, update data structures as needed and return true. Note that
1788 at this time vect_supportable_dr_alignment is known to return false
1789 for a misaligned write.
1791 B) If peeling wasn't possible and there is a data reference with an
1792 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1793 then see if loop versioning checks can be used to make all data
1794 references satisfy vect_supportable_dr_alignment. If so, update
1795 data structures as needed and return true.
1797 C) If neither peeling nor versioning were successful then return false if
1798 any data reference does not satisfy vect_supportable_dr_alignment.
1800 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1802 Note, Possibility 3 above (which is peeling and versioning together) is not
1803 being done at this time. */
1805 /* (1) Peeling to force alignment. */
1807 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1808 Considerations:
1809 + How many accesses will become aligned due to the peeling
1810 - How many accesses will become unaligned due to the peeling,
1811 and the cost of misaligned accesses.
1812 - The cost of peeling (the extra runtime checks, the increase
1813 in code size).
1815 The scheme we use FORNOW: peel to force the alignment of the first
1816 misaligned store in the loop.
1817 Rationale: misaligned stores are not yet supported.
1819 TODO: Use a cost model. */
1821 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1823 stmt = DR_STMT (dr);
1824 stmt_info = vinfo_for_stmt (stmt);
1826 /* For interleaving, only the alignment of the first access
1827 matters. */
1828 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1829 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1830 continue;
1832 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1834 do_peeling = vector_alignment_reachable_p (dr);
1835 if (do_peeling)
1836 dr0 = dr;
1837 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1838 fprintf (vect_dump, "vector alignment may not be reachable");
1839 break;
1843 vect_versioning_for_alias_required =
1844 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1846 /* Temporarily, if versioning for alias is required, we disable peeling
1847 until we support peeling and versioning. Often peeling for alignment
1848 will require peeling for loop-bound, which in turn requires that we
1849 know how to adjust the loop ivs after the loop. */
1850 if (vect_versioning_for_alias_required
1851 || !vect_can_advance_ivs_p (loop_vinfo)
1852 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1853 do_peeling = false;
1855 if (do_peeling)
1857 int mis;
1858 int npeel = 0;
1859 tree stmt = DR_STMT (dr0);
1860 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1861 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1862 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1864 if (known_alignment_for_access_p (dr0))
1866 /* Since it's known at compile time, compute the number of iterations
1867 in the peeled loop (the peeling factor) for use in updating
1868 DR_MISALIGNMENT values. The peeling factor is the vectorization
1869 factor minus the misalignment as an element count. */
1870 mis = DR_MISALIGNMENT (dr0);
1871 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1872 npeel = nelements - mis;
1874 /* For interleaved data access every iteration accesses all the
1875 members of the group, therefore we divide the number of iterations
1876 by the group size. */
1877 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1878 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1879 npeel /= DR_GROUP_SIZE (stmt_info);
1881 if (vect_print_dump_info (REPORT_DETAILS))
1882 fprintf (vect_dump, "Try peeling by %d", npeel);
1885 /* Ensure that all data refs can be vectorized after the peel. */
1886 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1888 int save_misalignment;
1890 if (dr == dr0)
1891 continue;
1893 stmt = DR_STMT (dr);
1894 stmt_info = vinfo_for_stmt (stmt);
1895 /* For interleaving, only the alignment of the first access
1896 matters. */
1897 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1898 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1899 continue;
1901 save_misalignment = DR_MISALIGNMENT (dr);
1902 vect_update_misalignment_for_peel (dr, dr0, npeel);
1903 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1904 SET_DR_MISALIGNMENT (dr, save_misalignment);
1906 if (!supportable_dr_alignment)
1908 do_peeling = false;
1909 break;
1913 if (do_peeling)
1915 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1916 If the misalignment of DR_i is identical to that of dr0 then set
1917 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1918 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1919 by the peeling factor times the element size of DR_i (MOD the
1920 vectorization factor times the size). Otherwise, the
1921 misalignment of DR_i must be set to unknown. */
1922 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1923 if (dr != dr0)
1924 vect_update_misalignment_for_peel (dr, dr0, npeel);
1926 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1927 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1928 SET_DR_MISALIGNMENT (dr0, 0);
1929 if (vect_print_dump_info (REPORT_ALIGNMENT))
1930 fprintf (vect_dump, "Alignment of access forced using peeling.");
1932 if (vect_print_dump_info (REPORT_DETAILS))
1933 fprintf (vect_dump, "Peeling for alignment will be applied.");
1935 stat = vect_verify_datarefs_alignment (loop_vinfo);
1936 gcc_assert (stat);
1937 return stat;
1942 /* (2) Versioning to force alignment. */
1944 /* Try versioning if:
1945 1) flag_tree_vect_loop_version is TRUE
1946 2) optimize_size is FALSE
1947 3) there is at least one unsupported misaligned data ref with an unknown
1948 misalignment, and
1949 4) all misaligned data refs with a known misalignment are supported, and
1950 5) the number of runtime alignment checks is within reason. */
1952 do_versioning =
1953 flag_tree_vect_loop_version
1954 && (!optimize_size)
1955 && (!loop->inner); /* FORNOW */
1957 if (do_versioning)
1959 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1961 stmt = DR_STMT (dr);
1962 stmt_info = vinfo_for_stmt (stmt);
1964 /* For interleaving, only the alignment of the first access
1965 matters. */
1966 if (aligned_access_p (dr)
1967 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1968 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1969 continue;
1971 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1973 if (!supportable_dr_alignment)
1975 tree stmt;
1976 int mask;
1977 tree vectype;
1979 if (known_alignment_for_access_p (dr)
1980 || VEC_length (tree,
1981 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1982 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1984 do_versioning = false;
1985 break;
1988 stmt = DR_STMT (dr);
1989 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1990 gcc_assert (vectype);
1992 /* The rightmost bits of an aligned address must be zeros.
1993 Construct the mask needed for this test. For example,
1994 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1995 mask must be 15 = 0xf. */
1996 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1998 /* FORNOW: use the same mask to test all potentially unaligned
1999 references in the loop. The vectorizer currently supports
2000 a single vector size, see the reference to
2001 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2002 vectorization factor is computed. */
2003 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2004 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2005 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2006 VEC_safe_push (tree, heap,
2007 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2008 DR_STMT (dr));
2012 /* Versioning requires at least one misaligned data reference. */
2013 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2014 do_versioning = false;
2015 else if (!do_versioning)
2016 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2019 if (do_versioning)
2021 VEC(tree,heap) *may_misalign_stmts
2022 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2023 tree stmt;
2025 /* It can now be assumed that the data references in the statements
2026 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2027 of the loop being vectorized. */
2028 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
2030 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2031 dr = STMT_VINFO_DATA_REF (stmt_info);
2032 SET_DR_MISALIGNMENT (dr, 0);
2033 if (vect_print_dump_info (REPORT_ALIGNMENT))
2034 fprintf (vect_dump, "Alignment of access forced using versioning.");
2037 if (vect_print_dump_info (REPORT_DETAILS))
2038 fprintf (vect_dump, "Versioning for alignment will be applied.");
2040 /* Peeling and versioning can't be done together at this time. */
2041 gcc_assert (! (do_peeling && do_versioning));
2043 stat = vect_verify_datarefs_alignment (loop_vinfo);
2044 gcc_assert (stat);
2045 return stat;
2048 /* This point is reached if neither peeling nor versioning is being done. */
2049 gcc_assert (! (do_peeling || do_versioning));
2051 stat = vect_verify_datarefs_alignment (loop_vinfo);
2052 return stat;
2056 /* Function vect_analyze_data_refs_alignment
2058 Analyze the alignment of the data-references in the loop.
2059 Return FALSE if a data reference is found that cannot be vectorized. */
2061 static bool
2062 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2064 if (vect_print_dump_info (REPORT_DETAILS))
2065 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2067 if (!vect_compute_data_refs_alignment (loop_vinfo))
2069 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2070 fprintf (vect_dump,
2071 "not vectorized: can't calculate alignment for data ref.");
2072 return false;
2075 return true;
2079 /* Analyze groups of strided accesses: check that DR belongs to a group of
2080 strided accesses of legal size, step, etc. Detect gaps, single element
2081 interleaving, and other special cases. Set strided access info.
2082 Collect groups of strided stores for further use in SLP analysis. */
2084 static bool
2085 vect_analyze_group_access (struct data_reference *dr)
2087 tree step = DR_STEP (dr);
2088 tree scalar_type = TREE_TYPE (DR_REF (dr));
2089 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2090 tree stmt = DR_STMT (dr);
2091 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2092 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2093 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2094 HOST_WIDE_INT stride;
2095 bool slp_impossible = false;
2097 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2098 interleaving group (including gaps). */
2099 stride = dr_step / type_size;
2101 /* Not consecutive access is possible only if it is a part of interleaving. */
2102 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2104 /* Check if it this DR is a part of interleaving, and is a single
2105 element of the group that is accessed in the loop. */
2107 /* Gaps are supported only for loads. STEP must be a multiple of the type
2108 size. The size of the group must be a power of 2. */
2109 if (DR_IS_READ (dr)
2110 && (dr_step % type_size) == 0
2111 && stride > 0
2112 && exact_log2 (stride) != -1)
2114 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2115 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2116 if (vect_print_dump_info (REPORT_DR_DETAILS))
2118 fprintf (vect_dump, "Detected single element interleaving %d ",
2119 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2120 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2121 fprintf (vect_dump, " step ");
2122 print_generic_expr (vect_dump, step, TDF_SLIM);
2124 return true;
2126 if (vect_print_dump_info (REPORT_DETAILS))
2127 fprintf (vect_dump, "not consecutive access");
2128 return false;
2131 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2133 /* First stmt in the interleaving chain. Check the chain. */
2134 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2135 struct data_reference *data_ref = dr;
2136 unsigned int count = 1;
2137 tree next_step;
2138 tree prev_init = DR_INIT (data_ref);
2139 tree prev = stmt;
2140 HOST_WIDE_INT diff, count_in_bytes;
2142 while (next)
2144 /* Skip same data-refs. In case that two or more stmts share data-ref
2145 (supported only for loads), we vectorize only the first stmt, and
2146 the rest get their vectorized loads from the first one. */
2147 if (!tree_int_cst_compare (DR_INIT (data_ref),
2148 DR_INIT (STMT_VINFO_DATA_REF (
2149 vinfo_for_stmt (next)))))
2151 if (!DR_IS_READ (data_ref))
2153 if (vect_print_dump_info (REPORT_DETAILS))
2154 fprintf (vect_dump, "Two store stmts share the same dr.");
2155 return false;
2158 /* Check that there is no load-store dependencies for this loads
2159 to prevent a case of load-store-load to the same location. */
2160 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2161 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2163 if (vect_print_dump_info (REPORT_DETAILS))
2164 fprintf (vect_dump,
2165 "READ_WRITE dependence in interleaving.");
2166 return false;
2169 /* For load use the same data-ref load. */
2170 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2172 prev = next;
2173 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2174 continue;
2176 prev = next;
2178 /* Check that all the accesses have the same STEP. */
2179 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2180 if (tree_int_cst_compare (step, next_step))
2182 if (vect_print_dump_info (REPORT_DETAILS))
2183 fprintf (vect_dump, "not consecutive access in interleaving");
2184 return false;
2187 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2188 /* Check that the distance between two accesses is equal to the type
2189 size. Otherwise, we have gaps. */
2190 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2191 - TREE_INT_CST_LOW (prev_init)) / type_size;
2192 if (diff != 1)
2194 /* FORNOW: SLP of accesses with gaps is not supported. */
2195 slp_impossible = true;
2196 if (!DR_IS_READ (data_ref))
2198 if (vect_print_dump_info (REPORT_DETAILS))
2199 fprintf (vect_dump, "interleaved store with gaps");
2200 return false;
2204 /* Store the gap from the previous member of the group. If there is no
2205 gap in the access, DR_GROUP_GAP is always 1. */
2206 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2208 prev_init = DR_INIT (data_ref);
2209 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2210 /* Count the number of data-refs in the chain. */
2211 count++;
2214 /* COUNT is the number of accesses found, we multiply it by the size of
2215 the type to get COUNT_IN_BYTES. */
2216 count_in_bytes = type_size * count;
2218 /* Check that the size of the interleaving is not greater than STEP. */
2219 if (dr_step < count_in_bytes)
2221 if (vect_print_dump_info (REPORT_DETAILS))
2223 fprintf (vect_dump, "interleaving size is greater than step for ");
2224 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2226 return false;
2229 /* Check that the size of the interleaving is equal to STEP for stores,
2230 i.e., that there are no gaps. */
2231 if (dr_step != count_in_bytes)
2233 if (DR_IS_READ (dr))
2234 slp_impossible = true;
2235 else
2237 if (vect_print_dump_info (REPORT_DETAILS))
2238 fprintf (vect_dump, "interleaved store with gaps");
2239 return false;
2243 /* Check that STEP is a multiple of type size. */
2244 if ((dr_step % type_size) != 0)
2246 if (vect_print_dump_info (REPORT_DETAILS))
2248 fprintf (vect_dump, "step is not a multiple of type size: step ");
2249 print_generic_expr (vect_dump, step, TDF_SLIM);
2250 fprintf (vect_dump, " size ");
2251 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2252 TDF_SLIM);
2254 return false;
2257 /* FORNOW: we handle only interleaving that is a power of 2.
2258 We don't fail here if it may be still possible to vectorize the
2259 group using SLP. If not, the size of the group will be checked in
2260 vect_analyze_operations, and the vectorization will fail. */
2261 if (exact_log2 (stride) == -1)
2263 if (vect_print_dump_info (REPORT_DETAILS))
2264 fprintf (vect_dump, "interleaving is not a power of 2");
2266 if (slp_impossible)
2267 return false;
2269 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2270 if (vect_print_dump_info (REPORT_DETAILS))
2271 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2273 /* SLP: create an SLP data structure for every interleaving group of
2274 stores for further analysis in vect_analyse_slp. */
2275 if (!DR_IS_READ (dr) && !slp_impossible)
2276 VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2279 return true;
2283 /* Analyze the access pattern of the data-reference DR.
2284 In case of non-consecutive accesses call vect_analyze_group_access() to
2285 analyze groups of strided accesses. */
2287 static bool
2288 vect_analyze_data_ref_access (struct data_reference *dr)
2290 tree step = DR_STEP (dr);
2291 tree scalar_type = TREE_TYPE (DR_REF (dr));
2292 tree stmt = DR_STMT (dr);
2293 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2294 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2295 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2296 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2298 if (!step)
2300 if (vect_print_dump_info (REPORT_DETAILS))
2301 fprintf (vect_dump, "bad data-ref access");
2302 return false;
2305 /* Don't allow invariant accesses. */
2306 if (dr_step == 0)
2307 return false;
2309 if (nested_in_vect_loop_p (loop, stmt))
2311 /* Interleaved accesses are not yet supported within outer-loop
2312 vectorization for references in the inner-loop. */
2313 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2315 /* For the rest of the analysis we use the outer-loop step. */
2316 step = STMT_VINFO_DR_STEP (stmt_info);
2317 dr_step = TREE_INT_CST_LOW (step);
2319 if (dr_step == 0)
2321 if (vect_print_dump_info (REPORT_ALIGNMENT))
2322 fprintf (vect_dump, "zero step in outer loop.");
2323 if (DR_IS_READ (dr))
2324 return true;
2325 else
2326 return false;
2330 /* Consecutive? */
2331 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2333 /* Mark that it is not interleaving. */
2334 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2335 return true;
2338 if (nested_in_vect_loop_p (loop, stmt))
2340 if (vect_print_dump_info (REPORT_ALIGNMENT))
2341 fprintf (vect_dump, "strided access in outer loop.");
2342 return false;
2345 /* Not consecutive access - check if it's a part of interleaving group. */
2346 return vect_analyze_group_access (dr);
2350 /* Function vect_analyze_data_ref_accesses.
2352 Analyze the access pattern of all the data references in the loop.
2354 FORNOW: the only access pattern that is considered vectorizable is a
2355 simple step 1 (consecutive) access.
2357 FORNOW: handle only arrays and pointer accesses. */
2359 static bool
2360 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2362 unsigned int i;
2363 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2364 struct data_reference *dr;
2366 if (vect_print_dump_info (REPORT_DETAILS))
2367 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2369 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2370 if (!vect_analyze_data_ref_access (dr))
2372 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2373 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2374 return false;
2377 return true;
2380 /* Function vect_prune_runtime_alias_test_list.
2382 Prune a list of ddrs to be tested at run-time by versioning for alias.
2383 Return FALSE if resulting list of ddrs is longer then allowed by
2384 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2386 static bool
2387 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2389 VEC (ddr_p, heap) * ddrs =
2390 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2391 unsigned i, j;
2393 if (vect_print_dump_info (REPORT_DETAILS))
2394 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2396 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2398 bool found;
2399 ddr_p ddr_i;
2401 ddr_i = VEC_index (ddr_p, ddrs, i);
2402 found = false;
2404 for (j = 0; j < i; j++)
2406 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2408 if (vect_vfa_range_equal (ddr_i, ddr_j))
2410 if (vect_print_dump_info (REPORT_DR_DETAILS))
2412 fprintf (vect_dump, "found equal ranges ");
2413 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2414 fprintf (vect_dump, ", ");
2415 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2416 fprintf (vect_dump, " and ");
2417 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2418 fprintf (vect_dump, ", ");
2419 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2421 found = true;
2422 break;
2426 if (found)
2428 VEC_ordered_remove (ddr_p, ddrs, i);
2429 continue;
2431 i++;
2434 if (VEC_length (ddr_p, ddrs) >
2435 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2437 if (vect_print_dump_info (REPORT_DR_DETAILS))
2439 fprintf (vect_dump,
2440 "disable versioning for alias - max number of generated "
2441 "checks exceeded.");
2444 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2446 return false;
2449 return true;
2452 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2454 void
2455 vect_free_slp_tree (slp_tree node)
2457 if (!node)
2458 return;
2460 if (SLP_TREE_LEFT (node))
2461 vect_free_slp_tree (SLP_TREE_LEFT (node));
2463 if (SLP_TREE_RIGHT (node))
2464 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2466 VEC_free (tree, heap, SLP_TREE_SCALAR_STMTS (node));
2468 if (SLP_TREE_VEC_STMTS (node))
2469 VEC_free (tree, heap, SLP_TREE_VEC_STMTS (node));
2471 free (node);
2475 /* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are
2476 of a legal type and that they match the defs of the first stmt of the SLP
2477 group (stored in FIRST_STMT_...). */
2479 static bool
2480 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2481 tree rhs, VEC (tree, heap) **def_stmts0,
2482 VEC (tree, heap) **def_stmts1,
2483 enum vect_def_type *first_stmt_dt0,
2484 enum vect_def_type *first_stmt_dt1,
2485 tree *first_stmt_def0_type,
2486 tree *first_stmt_def1_type,
2487 tree *first_stmt_const_oprnd,
2488 int ncopies_for_cost)
2490 tree oprnd;
2491 enum operation_type op_type = TREE_OPERAND_LENGTH (rhs);
2492 unsigned int i, number_of_oprnds = op_type;
2493 tree def, def_stmt;
2494 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2495 stmt_vec_info stmt_info =
2496 vinfo_for_stmt (VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2498 /* Store. */
2499 if (!op_type)
2500 number_of_oprnds = 1;
2501 else
2502 gcc_assert (op_type == unary_op || op_type == binary_op);
2504 for (i = 0; i < number_of_oprnds; i++)
2506 if (op_type)
2507 oprnd = TREE_OPERAND (rhs, i);
2508 else
2509 oprnd = rhs;
2511 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2512 || (!def_stmt && dt[i] != vect_constant_def))
2514 if (vect_print_dump_info (REPORT_SLP))
2516 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2517 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2520 return false;
2523 if (!*first_stmt_dt0)
2525 /* op0 of the first stmt of the group - store its info. */
2526 *first_stmt_dt0 = dt[i];
2527 if (def)
2528 *first_stmt_def0_type = TREE_TYPE (def);
2529 else
2530 *first_stmt_const_oprnd = oprnd;
2532 /* Analyze costs (for the first stmt of the group only). */
2533 if (op_type)
2534 /* Not memory operation (we don't call this functions for loads). */
2535 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2536 else
2537 /* Store. */
2538 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2541 else
2543 if (!*first_stmt_dt1 && i == 1)
2545 /* op1 of the first stmt of the group - store its info. */
2546 *first_stmt_dt1 = dt[i];
2547 if (def)
2548 *first_stmt_def1_type = TREE_TYPE (def);
2549 else
2551 /* We assume that the stmt contains only one constant
2552 operand. We fail otherwise, to be on the safe side. */
2553 if (*first_stmt_const_oprnd)
2555 if (vect_print_dump_info (REPORT_SLP))
2556 fprintf (vect_dump, "Build SLP failed: two constant "
2557 "oprnds in stmt");
2558 return false;
2560 *first_stmt_const_oprnd = oprnd;
2563 else
2565 /* Not first stmt of the group, check that the def-stmt/s match
2566 the def-stmt/s of the first stmt. */
2567 if ((i == 0
2568 && (*first_stmt_dt0 != dt[i]
2569 || (*first_stmt_def0_type && def
2570 && *first_stmt_def0_type != TREE_TYPE (def))))
2571 || (i == 1
2572 && (*first_stmt_dt1 != dt[i]
2573 || (*first_stmt_def1_type && def
2574 && *first_stmt_def1_type != TREE_TYPE (def))))
2575 || (!def
2576 && TREE_TYPE (*first_stmt_const_oprnd)
2577 != TREE_TYPE (oprnd)))
2579 if (vect_print_dump_info (REPORT_SLP))
2580 fprintf (vect_dump, "Build SLP failed: different types ");
2582 return false;
2587 /* Check the types of the definitions. */
2588 switch (dt[i])
2590 case vect_constant_def:
2591 case vect_invariant_def:
2592 break;
2594 case vect_loop_def:
2595 if (i == 0)
2596 VEC_safe_push (tree, heap, *def_stmts0, def_stmt);
2597 else
2598 VEC_safe_push (tree, heap, *def_stmts1, def_stmt);
2599 break;
2601 default:
2602 /* FORNOW: Not supported. */
2603 if (vect_print_dump_info (REPORT_SLP))
2605 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2606 print_generic_expr (vect_dump, def, TDF_SLIM);
2609 return false;
2613 return true;
2617 /* Recursively build an SLP tree starting from NODE.
2618 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2619 permutation or are of unsupported types of operation. Otherwise, return
2620 TRUE.
2621 SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2622 in the case of multiple types for now. */
2624 static bool
2625 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2626 unsigned int group_size, bool *slp_impossible,
2627 int *inside_cost, int *outside_cost,
2628 int ncopies_for_cost)
2630 VEC (tree, heap) *def_stmts0 = VEC_alloc (tree, heap, group_size);
2631 VEC (tree, heap) *def_stmts1 = VEC_alloc (tree, heap, group_size);
2632 unsigned int i;
2633 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2634 tree stmt = VEC_index (tree, stmts, 0);
2635 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2636 enum tree_code first_stmt_code = 0;
2637 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2638 tree lhs, rhs, prev_stmt = NULL_TREE;
2639 bool stop_recursion = false, need_same_oprnds = false;
2640 tree vectype, scalar_type, first_op1 = NULL_TREE;
2641 unsigned int vectorization_factor = 0, ncopies;
2642 optab optab;
2643 int icode;
2644 enum machine_mode optab_op2_mode;
2645 enum machine_mode vec_mode;
2646 tree first_stmt_const_oprnd = NULL_TREE;
2647 struct data_reference *first_dr;
2649 /* For every stmt in NODE find its def stmt/s. */
2650 for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
2652 if (vect_print_dump_info (REPORT_SLP))
2654 fprintf (vect_dump, "Build SLP for ");
2655 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2658 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2660 if (vect_print_dump_info (REPORT_SLP))
2662 fprintf (vect_dump, "Build SLP failed: not MODIFY_STMT ");
2663 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2666 return false;
2669 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2670 vectype = get_vectype_for_scalar_type (scalar_type);
2671 if (!vectype)
2673 if (vect_print_dump_info (REPORT_SLP))
2675 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
2676 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2678 return false;
2681 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2682 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2683 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2684 if (ncopies > 1)
2686 /* FORNOW. */
2687 if (vect_print_dump_info (REPORT_SLP))
2688 fprintf (vect_dump, "SLP failed - multiple types ");
2690 *slp_impossible = true;
2691 return false;
2694 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
2695 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2697 /* Check the operation. */
2698 if (i == 0)
2700 first_stmt_code = TREE_CODE (rhs);
2702 /* Shift arguments should be equal in all the packed stmts for a
2703 vector shift with scalar shift operand. */
2704 if (TREE_CODE (rhs) == LSHIFT_EXPR || TREE_CODE (rhs) == RSHIFT_EXPR)
2706 vec_mode = TYPE_MODE (vectype);
2707 optab = optab_for_tree_code (TREE_CODE (rhs), vectype);
2708 if (!optab)
2710 if (vect_print_dump_info (REPORT_SLP))
2711 fprintf (vect_dump, "Build SLP failed: no optab.");
2712 return false;
2714 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2715 if (icode == CODE_FOR_nothing)
2717 if (vect_print_dump_info (REPORT_SLP))
2718 fprintf (vect_dump,
2719 "Build SLP failed: op not supported by target.");
2720 return false;
2722 optab_op2_mode = insn_data[icode].operand[2].mode;
2723 if (!VECTOR_MODE_P (optab_op2_mode))
2725 need_same_oprnds = true;
2726 first_op1 = TREE_OPERAND (rhs, 1);
2730 else
2732 if (first_stmt_code != TREE_CODE (rhs))
2734 if (vect_print_dump_info (REPORT_SLP))
2736 fprintf (vect_dump,
2737 "Build SLP failed: different operation in stmt ");
2738 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2741 return false;
2744 if (need_same_oprnds
2745 && !operand_equal_p (first_op1, TREE_OPERAND (rhs, 1), 0))
2747 if (vect_print_dump_info (REPORT_SLP))
2749 fprintf (vect_dump,
2750 "Build SLP failed: different shift arguments in ");
2751 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2754 return false;
2758 /* Strided store or load. */
2759 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2761 if (REFERENCE_CLASS_P (lhs))
2763 /* Store. */
2764 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs,
2765 &def_stmts0, &def_stmts1,
2766 &first_stmt_dt0,
2767 &first_stmt_dt1,
2768 &first_stmt_def0_type,
2769 &first_stmt_def1_type,
2770 &first_stmt_const_oprnd,
2771 ncopies_for_cost))
2772 return false;
2774 else
2776 /* Load. */
2777 if (i == 0)
2779 /* First stmt of the SLP group should be the first load of
2780 the interleaving loop if data permutation is not
2781 allowed. */
2782 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt)
2784 /* FORNOW: data permutations are not supported. */
2785 if (vect_print_dump_info (REPORT_SLP))
2787 fprintf (vect_dump, "Build SLP failed: strided "
2788 " loads need permutation ");
2789 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2792 return false;
2795 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2796 if (vect_supportable_dr_alignment (first_dr)
2797 == dr_unaligned_unsupported)
2799 if (vect_print_dump_info (REPORT_SLP))
2801 fprintf (vect_dump, "Build SLP failed: unsupported "
2802 " unaligned load ");
2803 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2806 return false;
2809 /* Analyze costs (for the first stmt in the group). */
2810 vect_model_load_cost (vinfo_for_stmt (stmt),
2811 ncopies_for_cost, *node);
2813 else
2815 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt)
2817 /* FORNOW: data permutations are not supported. */
2818 if (vect_print_dump_info (REPORT_SLP))
2820 fprintf (vect_dump, "Build SLP failed: strided "
2821 " loads need permutation ");
2822 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2824 return false;
2828 prev_stmt = stmt;
2830 /* We stop the tree when we reach a group of loads. */
2831 stop_recursion = true;
2832 continue;
2834 } /* Strided access. */
2835 else
2837 if (REFERENCE_CLASS_P (rhs))
2839 /* Not strided load. */
2840 if (vect_print_dump_info (REPORT_SLP))
2842 fprintf (vect_dump, "Build SLP failed: not strided load ");
2843 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2846 /* FORNOW: Not strided loads are not supported. */
2847 return false;
2850 /* Not memory operation. */
2851 if (!BINARY_CLASS_P (rhs) && !UNARY_CLASS_P (rhs))
2853 if (vect_print_dump_info (REPORT_SLP))
2855 fprintf (vect_dump, "Build SLP failed: operation");
2856 fprintf (vect_dump, " unsupported ");
2857 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2860 return false;
2863 /* Find the def-stmts. */
2864 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, &def_stmts0,
2865 &def_stmts1, &first_stmt_dt0,
2866 &first_stmt_dt1,
2867 &first_stmt_def0_type,
2868 &first_stmt_def1_type,
2869 &first_stmt_const_oprnd,
2870 ncopies_for_cost))
2871 return false;
2875 /* Add the costs of the node to the overall instance costs. */
2876 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
2877 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
2879 /* Strided loads were reached - stop the recursion. */
2880 if (stop_recursion)
2881 return true;
2883 /* Create SLP_TREE nodes for the definition node/s. */
2884 if (first_stmt_dt0 == vect_loop_def)
2886 slp_tree left_node = XNEW (struct _slp_tree);
2887 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
2888 SLP_TREE_VEC_STMTS (left_node) = NULL;
2889 SLP_TREE_LEFT (left_node) = NULL;
2890 SLP_TREE_RIGHT (left_node) = NULL;
2891 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
2892 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
2893 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
2894 slp_impossible, inside_cost, outside_cost,
2895 ncopies_for_cost))
2896 return false;
2898 SLP_TREE_LEFT (*node) = left_node;
2901 if (first_stmt_dt1 == vect_loop_def)
2903 slp_tree right_node = XNEW (struct _slp_tree);
2904 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
2905 SLP_TREE_VEC_STMTS (right_node) = NULL;
2906 SLP_TREE_LEFT (right_node) = NULL;
2907 SLP_TREE_RIGHT (right_node) = NULL;
2908 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
2909 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
2910 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
2911 slp_impossible, inside_cost, outside_cost,
2912 ncopies_for_cost))
2913 return false;
2915 SLP_TREE_RIGHT (*node) = right_node;
2918 return true;
2922 static void
2923 vect_print_slp_tree (slp_tree node)
2925 int i;
2926 tree stmt;
2928 if (!node)
2929 return;
2931 fprintf (vect_dump, "node ");
2932 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2934 fprintf (vect_dump, "\n\tstmt %d ", i);
2935 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2937 fprintf (vect_dump, "\n");
2939 vect_print_slp_tree (SLP_TREE_LEFT (node));
2940 vect_print_slp_tree (SLP_TREE_RIGHT (node));
2944 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2945 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2946 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2947 stmts in NODE are to be marked. */
2949 static void
2950 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
2952 int i;
2953 tree stmt;
2955 if (!node)
2956 return;
2958 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2959 if (j < 0 || i == j)
2960 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
2962 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
2963 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
2967 /* Analyze an SLP instance starting from a group of strided stores. Call
2968 vect_build_slp_tree to build a tree of packed stmts if possible.
2969 Return FALSE if it's impossible to SLP any stmt in the loop. */
2971 static bool
2972 vect_analyze_slp_instance (loop_vec_info loop_vinfo, tree stmt)
2974 slp_instance new_instance;
2975 slp_tree node = XNEW (struct _slp_tree);
2976 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
2977 unsigned int unrolling_factor = 1, nunits;
2978 tree vectype, scalar_type, next;
2979 unsigned int vectorization_factor = 0, ncopies;
2980 bool slp_impossible = false;
2981 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
2983 /* FORNOW: multiple types are not supported. */
2984 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
2985 vectype = get_vectype_for_scalar_type (scalar_type);
2986 if (!vectype)
2988 if (vect_print_dump_info (REPORT_SLP))
2990 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
2991 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2993 return false;
2996 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2997 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2998 ncopies = vectorization_factor / nunits;
2999 if (ncopies > 1)
3001 if (vect_print_dump_info (REPORT_SLP))
3002 fprintf (vect_dump, "SLP failed - multiple types ");
3004 return false;
3007 /* Create a node (a root of the SLP tree) for the packed strided stores. */
3008 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (tree, heap, group_size);
3009 next = stmt;
3010 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
3011 while (next)
3013 VEC_safe_push (tree, heap, SLP_TREE_SCALAR_STMTS (node), next);
3014 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
3017 SLP_TREE_VEC_STMTS (node) = NULL;
3018 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
3019 SLP_TREE_LEFT (node) = NULL;
3020 SLP_TREE_RIGHT (node) = NULL;
3021 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
3022 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
3024 /* Calculate the unrolling factor. */
3025 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
3027 /* Calculate the number of vector stmts to create based on the unrolling
3028 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
3029 GROUP_SIZE / NUNITS otherwise. */
3030 ncopies_for_cost = unrolling_factor * group_size / nunits;
3032 /* Build the tree for the SLP instance. */
3033 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
3034 &inside_cost, &outside_cost, ncopies_for_cost))
3036 /* Create a new SLP instance. */
3037 new_instance = XNEW (struct _slp_instance);
3038 SLP_INSTANCE_TREE (new_instance) = node;
3039 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
3040 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
3041 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
3042 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
3043 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
3044 new_instance);
3045 if (vect_print_dump_info (REPORT_SLP))
3046 vect_print_slp_tree (node);
3048 return true;
3051 /* Failed to SLP. */
3052 /* Free the allocated memory. */
3053 vect_free_slp_tree (node);
3055 if (slp_impossible)
3056 return false;
3058 /* SLP failed for this instance, but it is still possible to SLP other stmts
3059 in the loop. */
3060 return true;
3064 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3065 trees of packed scalar stmts if SLP is possible. */
3067 static bool
3068 vect_analyze_slp (loop_vec_info loop_vinfo)
3070 unsigned int i;
3071 VEC (tree, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
3072 tree store;
3074 if (vect_print_dump_info (REPORT_SLP))
3075 fprintf (vect_dump, "=== vect_analyze_slp ===");
3077 for (i = 0; VEC_iterate (tree, strided_stores, i, store); i++)
3078 if (!vect_analyze_slp_instance (loop_vinfo, store))
3080 /* SLP failed. No instance can be SLPed in the loop. */
3081 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3082 fprintf (vect_dump, "SLP failed.");
3084 return false;
3087 return true;
3091 /* For each possible SLP instance decide whether to SLP it and calculate overall
3092 unrolling factor needed to SLP the loop. */
3094 static void
3095 vect_make_slp_decision (loop_vec_info loop_vinfo)
3097 unsigned int i, unrolling_factor = 1;
3098 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3099 slp_instance instance;
3100 int decided_to_slp = 0;
3102 if (vect_print_dump_info (REPORT_SLP))
3103 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3105 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3107 /* FORNOW: SLP if you can. */
3108 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3109 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3111 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3112 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3113 loop-based vectorization. Such stmts will be marked as HYBRID. */
3114 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3115 decided_to_slp++;
3118 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3120 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3121 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3122 decided_to_slp, unrolling_factor);
3126 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3127 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3129 static void
3130 vect_detect_hybrid_slp_stmts (slp_tree node)
3132 int i;
3133 tree stmt;
3134 imm_use_iterator imm_iter;
3135 tree use_stmt;
3137 if (!node)
3138 return;
3140 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3141 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3142 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
3143 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
3144 if (vinfo_for_stmt (use_stmt)
3145 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
3146 vect_mark_slp_stmts (node, hybrid, i);
3148 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3149 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3153 /* Find stmts that must be both vectorized and SLPed. */
3155 static void
3156 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3158 unsigned int i;
3159 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3160 slp_instance instance;
3162 if (vect_print_dump_info (REPORT_SLP))
3163 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3165 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3166 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3170 /* Function vect_analyze_data_refs.
3172 Find all the data references in the loop.
3174 The general structure of the analysis of data refs in the vectorizer is as
3175 follows:
3176 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3177 find and analyze all data-refs in the loop and their dependences.
3178 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3179 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3180 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3184 static bool
3185 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3187 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3188 unsigned int i;
3189 VEC (data_reference_p, heap) *datarefs;
3190 struct data_reference *dr;
3191 tree scalar_type;
3193 if (vect_print_dump_info (REPORT_DETAILS))
3194 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3196 compute_data_dependences_for_loop (loop, true,
3197 &LOOP_VINFO_DATAREFS (loop_vinfo),
3198 &LOOP_VINFO_DDRS (loop_vinfo));
3200 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3201 from stmt_vec_info struct to DR and vectype. */
3202 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3204 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3206 tree stmt;
3207 stmt_vec_info stmt_info;
3208 basic_block bb;
3209 tree base, offset, init;
3211 if (!dr || !DR_REF (dr))
3213 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3214 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3215 return false;
3218 stmt = DR_STMT (dr);
3219 stmt_info = vinfo_for_stmt (stmt);
3221 /* Check that analysis of the data-ref succeeded. */
3222 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3223 || !DR_STEP (dr))
3225 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3227 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3228 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3230 return false;
3233 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3235 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3236 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3237 "constant");
3238 return false;
3241 if (!DR_SYMBOL_TAG (dr))
3243 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3245 fprintf (vect_dump, "not vectorized: no memory tag for ");
3246 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3248 return false;
3251 base = unshare_expr (DR_BASE_ADDRESS (dr));
3252 offset = unshare_expr (DR_OFFSET (dr));
3253 init = unshare_expr (DR_INIT (dr));
3255 /* Update DR field in stmt_vec_info struct. */
3256 bb = bb_for_stmt (stmt);
3258 /* If the dataref is in an inner-loop of the loop that is considered for
3259 for vectorization, we also want to analyze the access relative to
3260 the outer-loop (DR contains information only relative to the
3261 inner-most enclosing loop). We do that by building a reference to the
3262 first location accessed by the inner-loop, and analyze it relative to
3263 the outer-loop. */
3264 if (nested_in_vect_loop_p (loop, stmt))
3266 tree outer_step, outer_base, outer_init;
3267 HOST_WIDE_INT pbitsize, pbitpos;
3268 tree poffset;
3269 enum machine_mode pmode;
3270 int punsignedp, pvolatilep;
3271 affine_iv base_iv, offset_iv;
3272 tree dinit;
3274 /* Build a reference to the first location accessed by the
3275 inner-loop: *(BASE+INIT). (The first location is actually
3276 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3277 tree inner_base = build_fold_indirect_ref
3278 (fold_build2 (POINTER_PLUS_EXPR,
3279 TREE_TYPE (base), base,
3280 fold_convert (sizetype, init)));
3282 if (vect_print_dump_info (REPORT_DETAILS))
3284 fprintf (dump_file, "analyze in outer-loop: ");
3285 print_generic_expr (dump_file, inner_base, TDF_SLIM);
3288 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3289 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3290 gcc_assert (outer_base != NULL_TREE);
3292 if (pbitpos % BITS_PER_UNIT != 0)
3294 if (vect_print_dump_info (REPORT_DETAILS))
3295 fprintf (dump_file, "failed: bit offset alignment.\n");
3296 return false;
3299 outer_base = build_fold_addr_expr (outer_base);
3300 if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
3302 if (vect_print_dump_info (REPORT_DETAILS))
3303 fprintf (dump_file, "failed: evolution of base is not affine.\n");
3304 return false;
3307 if (offset)
3309 if (poffset)
3310 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3311 else
3312 poffset = offset;
3315 if (!poffset)
3317 offset_iv.base = ssize_int (0);
3318 offset_iv.step = ssize_int (0);
3320 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
3322 if (vect_print_dump_info (REPORT_DETAILS))
3323 fprintf (dump_file, "evolution of offset is not affine.\n");
3324 return false;
3327 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3328 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3329 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3330 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3331 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3333 outer_step = size_binop (PLUS_EXPR,
3334 fold_convert (ssizetype, base_iv.step),
3335 fold_convert (ssizetype, offset_iv.step));
3337 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3338 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3339 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3340 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3341 STMT_VINFO_DR_OFFSET (stmt_info) =
3342 fold_convert (ssizetype, offset_iv.base);
3343 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3344 size_int (highest_pow2_factor (offset_iv.base));
3346 if (dump_file && (dump_flags & TDF_DETAILS))
3348 fprintf (dump_file, "\touter base_address: ");
3349 print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3350 fprintf (dump_file, "\n\touter offset from base address: ");
3351 print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3352 fprintf (dump_file, "\n\touter constant offset from base address: ");
3353 print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3354 fprintf (dump_file, "\n\touter step: ");
3355 print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3356 fprintf (dump_file, "\n\touter aligned to: ");
3357 print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3361 if (STMT_VINFO_DATA_REF (stmt_info))
3363 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3365 fprintf (vect_dump,
3366 "not vectorized: more than one data ref in stmt: ");
3367 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3369 return false;
3371 STMT_VINFO_DATA_REF (stmt_info) = dr;
3373 /* Set vectype for STMT. */
3374 scalar_type = TREE_TYPE (DR_REF (dr));
3375 STMT_VINFO_VECTYPE (stmt_info) =
3376 get_vectype_for_scalar_type (scalar_type);
3377 if (!STMT_VINFO_VECTYPE (stmt_info))
3379 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3381 fprintf (vect_dump,
3382 "not vectorized: no vectype for stmt: ");
3383 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3384 fprintf (vect_dump, " scalar_type: ");
3385 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3387 return false;
3391 return true;
3395 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3397 /* Function vect_mark_relevant.
3399 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3401 static void
3402 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
3403 enum vect_relevant relevant, bool live_p)
3405 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3406 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3407 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3409 if (vect_print_dump_info (REPORT_DETAILS))
3410 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3412 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3414 tree pattern_stmt;
3416 /* This is the last stmt in a sequence that was detected as a
3417 pattern that can potentially be vectorized. Don't mark the stmt
3418 as relevant/live because it's not going to be vectorized.
3419 Instead mark the pattern-stmt that replaces it. */
3421 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3423 if (vect_print_dump_info (REPORT_DETAILS))
3424 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3425 stmt_info = vinfo_for_stmt (pattern_stmt);
3426 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3427 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3428 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3429 stmt = pattern_stmt;
3432 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3433 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3434 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3436 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3437 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3439 if (vect_print_dump_info (REPORT_DETAILS))
3440 fprintf (vect_dump, "already marked relevant/live.");
3441 return;
3444 VEC_safe_push (tree, heap, *worklist, stmt);
3448 /* Function vect_stmt_relevant_p.
3450 Return true if STMT in loop that is represented by LOOP_VINFO is
3451 "relevant for vectorization".
3453 A stmt is considered "relevant for vectorization" if:
3454 - it has uses outside the loop.
3455 - it has vdefs (it alters memory).
3456 - control stmts in the loop (except for the exit condition).
3458 CHECKME: what other side effects would the vectorizer allow? */
3460 static bool
3461 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
3462 enum vect_relevant *relevant, bool *live_p)
3464 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3465 ssa_op_iter op_iter;
3466 imm_use_iterator imm_iter;
3467 use_operand_p use_p;
3468 def_operand_p def_p;
3470 *relevant = vect_unused_in_loop;
3471 *live_p = false;
3473 /* cond stmt other than loop exit cond. */
3474 if (is_ctrl_stmt (stmt)
3475 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3476 *relevant = vect_used_in_loop;
3478 /* changing memory. */
3479 if (TREE_CODE (stmt) != PHI_NODE)
3480 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3482 if (vect_print_dump_info (REPORT_DETAILS))
3483 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3484 *relevant = vect_used_in_loop;
3487 /* uses outside the loop. */
3488 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3490 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3492 basic_block bb = bb_for_stmt (USE_STMT (use_p));
3493 if (!flow_bb_inside_loop_p (loop, bb))
3495 if (vect_print_dump_info (REPORT_DETAILS))
3496 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3498 /* We expect all such uses to be in the loop exit phis
3499 (because of loop closed form) */
3500 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
3501 gcc_assert (bb == single_exit (loop)->dest);
3503 *live_p = true;
3508 return (*live_p || *relevant);
3513 Function process_use.
3515 Inputs:
3516 - a USE in STMT in a loop represented by LOOP_VINFO
3517 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3518 that defined USE. This is dont by calling mark_relevant and passing it
3519 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
3521 Outputs:
3522 Generally, LIVE_P and RELEVANT are used to define the liveness and
3523 relevance info of the DEF_STMT of this USE:
3524 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3525 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3526 Exceptions:
3527 - case 1: If USE is used only for address computations (e.g. array indexing),
3528 which does not need to be directly vectorized, then the liveness/relevance
3529 of the respective DEF_STMT is left unchanged.
3530 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3531 skip DEF_STMT cause it had already been processed.
3532 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3533 be modified accordingly.
3535 Return true if everything is as expected. Return false otherwise. */
3537 static bool
3538 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3539 enum vect_relevant relevant, VEC(tree,heap) **worklist)
3541 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3542 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3543 stmt_vec_info dstmt_vinfo;
3544 basic_block bb, def_bb;
3545 tree def, def_stmt;
3546 enum vect_def_type dt;
3548 /* case 1: we are only interested in uses that need to be vectorized. Uses
3549 that are used for address computation are not considered relevant. */
3550 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3551 return true;
3553 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3555 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3556 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3557 return false;
3560 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
3561 return true;
3563 def_bb = bb_for_stmt (def_stmt);
3564 if (!flow_bb_inside_loop_p (loop, def_bb))
3566 if (vect_print_dump_info (REPORT_DETAILS))
3567 fprintf (vect_dump, "def_stmt is out of loop.");
3568 return true;
3571 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3572 DEF_STMT must have already been processed, because this should be the
3573 only way that STMT, which is a reduction-phi, was put in the worklist,
3574 as there should be no other uses for DEF_STMT in the loop. So we just
3575 check that everything is as expected, and we are done. */
3576 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3577 bb = bb_for_stmt (stmt);
3578 if (TREE_CODE (stmt) == PHI_NODE
3579 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3580 && TREE_CODE (def_stmt) != PHI_NODE
3581 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3582 && bb->loop_father == def_bb->loop_father)
3584 if (vect_print_dump_info (REPORT_DETAILS))
3585 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3586 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3587 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3588 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3589 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3590 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3591 return true;
3594 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3595 outer-loop-header-bb:
3596 d = def_stmt
3597 inner-loop:
3598 stmt # use (d)
3599 outer-loop-tail-bb:
3600 ... */
3601 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3603 if (vect_print_dump_info (REPORT_DETAILS))
3604 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3605 switch (relevant)
3607 case vect_unused_in_loop:
3608 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3609 vect_used_by_reduction : vect_unused_in_loop;
3610 break;
3611 case vect_used_in_outer_by_reduction:
3612 relevant = vect_used_by_reduction;
3613 break;
3614 case vect_used_in_outer:
3615 relevant = vect_used_in_loop;
3616 break;
3617 case vect_used_by_reduction:
3618 case vect_used_in_loop:
3619 break;
3621 default:
3622 gcc_unreachable ();
3626 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3627 outer-loop-header-bb:
3629 inner-loop:
3630 d = def_stmt
3631 outer-loop-tail-bb:
3632 stmt # use (d) */
3633 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3635 if (vect_print_dump_info (REPORT_DETAILS))
3636 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3637 switch (relevant)
3639 case vect_unused_in_loop:
3640 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3641 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3642 break;
3644 case vect_used_in_outer_by_reduction:
3645 case vect_used_in_outer:
3646 break;
3648 case vect_used_by_reduction:
3649 relevant = vect_used_in_outer_by_reduction;
3650 break;
3652 case vect_used_in_loop:
3653 relevant = vect_used_in_outer;
3654 break;
3656 default:
3657 gcc_unreachable ();
3661 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
3662 return true;
3666 /* Function vect_mark_stmts_to_be_vectorized.
3668 Not all stmts in the loop need to be vectorized. For example:
3670 for i...
3671 for j...
3672 1. T0 = i + j
3673 2. T1 = a[T0]
3675 3. j = j + 1
3677 Stmt 1 and 3 do not need to be vectorized, because loop control and
3678 addressing of vectorized data-refs are handled differently.
3680 This pass detects such stmts. */
3682 static bool
3683 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3685 VEC(tree,heap) *worklist;
3686 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3687 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3688 unsigned int nbbs = loop->num_nodes;
3689 block_stmt_iterator si;
3690 tree stmt;
3691 stmt_ann_t ann;
3692 unsigned int i;
3693 stmt_vec_info stmt_vinfo;
3694 basic_block bb;
3695 tree phi;
3696 bool live_p;
3697 enum vect_relevant relevant;
3699 if (vect_print_dump_info (REPORT_DETAILS))
3700 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
3702 worklist = VEC_alloc (tree, heap, 64);
3704 /* 1. Init worklist. */
3705 for (i = 0; i < nbbs; i++)
3707 bb = bbs[i];
3708 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3710 if (vect_print_dump_info (REPORT_DETAILS))
3712 fprintf (vect_dump, "init: phi relevant? ");
3713 print_generic_expr (vect_dump, phi, TDF_SLIM);
3716 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
3717 vect_mark_relevant (&worklist, phi, relevant, live_p);
3719 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3721 stmt = bsi_stmt (si);
3722 if (vect_print_dump_info (REPORT_DETAILS))
3724 fprintf (vect_dump, "init: stmt relevant? ");
3725 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3728 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
3729 vect_mark_relevant (&worklist, stmt, relevant, live_p);
3733 /* 2. Process_worklist */
3734 while (VEC_length (tree, worklist) > 0)
3736 use_operand_p use_p;
3737 ssa_op_iter iter;
3739 stmt = VEC_pop (tree, worklist);
3740 if (vect_print_dump_info (REPORT_DETAILS))
3742 fprintf (vect_dump, "worklist: examine stmt: ");
3743 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3746 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3747 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3748 liveness and relevance properties of STMT. */
3749 ann = stmt_ann (stmt);
3750 stmt_vinfo = vinfo_for_stmt (stmt);
3751 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
3752 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
3754 /* Generally, the liveness and relevance properties of STMT are
3755 propagated as is to the DEF_STMTs of its USEs:
3756 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3757 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3759 One exception is when STMT has been identified as defining a reduction
3760 variable; in this case we set the liveness/relevance as follows:
3761 live_p = false
3762 relevant = vect_used_by_reduction
3763 This is because we distinguish between two kinds of relevant stmts -
3764 those that are used by a reduction computation, and those that are
3765 (also) used by a regular computation. This allows us later on to
3766 identify stmts that are used solely by a reduction, and therefore the
3767 order of the results that they produce does not have to be kept.
3769 Reduction phis are expected to be used by a reduction stmt, or by
3770 in an outer loop; Other reduction stmts are expected to be
3771 in the loop, and possibly used by a stmt in an outer loop.
3772 Here are the expected values of "relevant" for reduction phis/stmts:
3774 relevance: phi stmt
3775 vect_unused_in_loop ok
3776 vect_used_in_outer_by_reduction ok ok
3777 vect_used_in_outer ok ok
3778 vect_used_by_reduction ok
3779 vect_used_in_loop */
3781 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
3783 enum vect_relevant tmp_relevant = relevant;
3784 switch (tmp_relevant)
3786 case vect_unused_in_loop:
3787 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
3788 relevant = vect_used_by_reduction;
3789 break;
3791 case vect_used_in_outer_by_reduction:
3792 case vect_used_in_outer:
3793 gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR
3794 && TREE_CODE (stmt) != DOT_PROD_EXPR);
3795 break;
3797 case vect_used_by_reduction:
3798 if (TREE_CODE (stmt) == PHI_NODE)
3799 break;
3800 /* fall through */
3801 case vect_used_in_loop:
3802 default:
3803 if (vect_print_dump_info (REPORT_DETAILS))
3804 fprintf (vect_dump, "unsupported use of reduction.");
3805 VEC_free (tree, heap, worklist);
3806 return false;
3808 live_p = false;
3811 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3813 tree op = USE_FROM_PTR (use_p);
3814 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
3816 VEC_free (tree, heap, worklist);
3817 return false;
3820 } /* while worklist */
3822 VEC_free (tree, heap, worklist);
3823 return true;
3827 /* Function vect_can_advance_ivs_p
3829 In case the number of iterations that LOOP iterates is unknown at compile
3830 time, an epilog loop will be generated, and the loop induction variables
3831 (IVs) will be "advanced" to the value they are supposed to take just before
3832 the epilog loop. Here we check that the access function of the loop IVs
3833 and the expression that represents the loop bound are simple enough.
3834 These restrictions will be relaxed in the future. */
3836 static bool
3837 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
3839 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3840 basic_block bb = loop->header;
3841 tree phi;
3843 /* Analyze phi functions of the loop header. */
3845 if (vect_print_dump_info (REPORT_DETAILS))
3846 fprintf (vect_dump, "vect_can_advance_ivs_p:");
3848 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3850 tree access_fn = NULL;
3851 tree evolution_part;
3853 if (vect_print_dump_info (REPORT_DETAILS))
3855 fprintf (vect_dump, "Analyze phi: ");
3856 print_generic_expr (vect_dump, phi, TDF_SLIM);
3859 /* Skip virtual phi's. The data dependences that are associated with
3860 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3862 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3864 if (vect_print_dump_info (REPORT_DETAILS))
3865 fprintf (vect_dump, "virtual phi. skip.");
3866 continue;
3869 /* Skip reduction phis. */
3871 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3873 if (vect_print_dump_info (REPORT_DETAILS))
3874 fprintf (vect_dump, "reduc phi. skip.");
3875 continue;
3878 /* Analyze the evolution function. */
3880 access_fn = instantiate_parameters
3881 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
3883 if (!access_fn)
3885 if (vect_print_dump_info (REPORT_DETAILS))
3886 fprintf (vect_dump, "No Access function.");
3887 return false;
3890 if (vect_print_dump_info (REPORT_DETAILS))
3892 fprintf (vect_dump, "Access function of PHI: ");
3893 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
3896 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
3898 if (evolution_part == NULL_TREE)
3900 if (vect_print_dump_info (REPORT_DETAILS))
3901 fprintf (vect_dump, "No evolution.");
3902 return false;
3905 /* FORNOW: We do not transform initial conditions of IVs
3906 which evolution functions are a polynomial of degree >= 2. */
3908 if (tree_is_chrec (evolution_part))
3909 return false;
3912 return true;
3916 /* Function vect_get_loop_niters.
3918 Determine how many iterations the loop is executed.
3919 If an expression that represents the number of iterations
3920 can be constructed, place it in NUMBER_OF_ITERATIONS.
3921 Return the loop exit condition. */
3923 static tree
3924 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
3926 tree niters;
3928 if (vect_print_dump_info (REPORT_DETAILS))
3929 fprintf (vect_dump, "=== get_loop_niters ===");
3931 niters = number_of_exit_cond_executions (loop);
3933 if (niters != NULL_TREE
3934 && niters != chrec_dont_know)
3936 *number_of_iterations = niters;
3938 if (vect_print_dump_info (REPORT_DETAILS))
3940 fprintf (vect_dump, "==> get_loop_niters:" );
3941 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
3945 return get_loop_exit_condition (loop);
3949 /* Function vect_analyze_loop_1.
3951 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3952 for it. The different analyses will record information in the
3953 loop_vec_info struct. This is a subset of the analyses applied in
3954 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3955 that is now considered for (outer-loop) vectorization. */
3957 static loop_vec_info
3958 vect_analyze_loop_1 (struct loop *loop)
3960 loop_vec_info loop_vinfo;
3962 if (vect_print_dump_info (REPORT_DETAILS))
3963 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
3965 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3967 loop_vinfo = vect_analyze_loop_form (loop);
3968 if (!loop_vinfo)
3970 if (vect_print_dump_info (REPORT_DETAILS))
3971 fprintf (vect_dump, "bad inner-loop form.");
3972 return NULL;
3975 return loop_vinfo;
3979 /* Function vect_analyze_loop_form.
3981 Verify that certain CFG restrictions hold, including:
3982 - the loop has a pre-header
3983 - the loop has a single entry and exit
3984 - the loop exit condition is simple enough, and the number of iterations
3985 can be analyzed (a countable loop). */
3987 loop_vec_info
3988 vect_analyze_loop_form (struct loop *loop)
3990 loop_vec_info loop_vinfo;
3991 tree loop_cond;
3992 tree number_of_iterations = NULL;
3993 loop_vec_info inner_loop_vinfo = NULL;
3995 if (vect_print_dump_info (REPORT_DETAILS))
3996 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
3998 /* Different restrictions apply when we are considering an inner-most loop,
3999 vs. an outer (nested) loop.
4000 (FORNOW. May want to relax some of these restrictions in the future). */
4002 if (!loop->inner)
4004 /* Inner-most loop. We currently require that the number of BBs is
4005 exactly 2 (the header and latch). Vectorizable inner-most loops
4006 look like this:
4008 (pre-header)
4010 header <--------+
4011 | | |
4012 | +--> latch --+
4014 (exit-bb) */
4016 if (loop->num_nodes != 2)
4018 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4019 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4020 return NULL;
4023 if (empty_block_p (loop->header))
4025 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4026 fprintf (vect_dump, "not vectorized: empty loop.");
4027 return NULL;
4030 else
4032 struct loop *innerloop = loop->inner;
4033 edge backedge, entryedge;
4035 /* Nested loop. We currently require that the loop is doubly-nested,
4036 contains a single inner loop, and the number of BBs is exactly 5.
4037 Vectorizable outer-loops look like this:
4039 (pre-header)
4041 header <---+
4043 inner-loop |
4045 tail ------+
4047 (exit-bb)
4049 The inner-loop has the properties expected of inner-most loops
4050 as described above. */
4052 if ((loop->inner)->inner || (loop->inner)->next)
4054 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4055 fprintf (vect_dump, "not vectorized: multiple nested loops.");
4056 return NULL;
4059 /* Analyze the inner-loop. */
4060 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
4061 if (!inner_loop_vinfo)
4063 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4064 fprintf (vect_dump, "not vectorized: Bad inner loop.");
4065 return NULL;
4068 if (!expr_invariant_in_loop_p (loop,
4069 LOOP_VINFO_NITERS (inner_loop_vinfo)))
4071 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4072 fprintf (vect_dump,
4073 "not vectorized: inner-loop count not invariant.");
4074 destroy_loop_vec_info (inner_loop_vinfo, true);
4075 return NULL;
4078 if (loop->num_nodes != 5)
4080 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4081 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4082 destroy_loop_vec_info (inner_loop_vinfo, true);
4083 return NULL;
4086 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
4087 backedge = EDGE_PRED (innerloop->header, 1);
4088 entryedge = EDGE_PRED (innerloop->header, 0);
4089 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
4091 backedge = EDGE_PRED (innerloop->header, 0);
4092 entryedge = EDGE_PRED (innerloop->header, 1);
4095 if (entryedge->src != loop->header
4096 || !single_exit (innerloop)
4097 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
4099 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4100 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4101 destroy_loop_vec_info (inner_loop_vinfo, true);
4102 return NULL;
4105 if (vect_print_dump_info (REPORT_DETAILS))
4106 fprintf (vect_dump, "Considering outer-loop vectorization.");
4109 if (!single_exit (loop)
4110 || EDGE_COUNT (loop->header->preds) != 2)
4112 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4114 if (!single_exit (loop))
4115 fprintf (vect_dump, "not vectorized: multiple exits.");
4116 else if (EDGE_COUNT (loop->header->preds) != 2)
4117 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4119 if (inner_loop_vinfo)
4120 destroy_loop_vec_info (inner_loop_vinfo, true);
4121 return NULL;
4124 /* We assume that the loop exit condition is at the end of the loop. i.e,
4125 that the loop is represented as a do-while (with a proper if-guard
4126 before the loop if needed), where the loop header contains all the
4127 executable statements, and the latch is empty. */
4128 if (!empty_block_p (loop->latch)
4129 || phi_nodes (loop->latch))
4131 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4132 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4133 if (inner_loop_vinfo)
4134 destroy_loop_vec_info (inner_loop_vinfo, true);
4135 return NULL;
4138 /* Make sure there exists a single-predecessor exit bb: */
4139 if (!single_pred_p (single_exit (loop)->dest))
4141 edge e = single_exit (loop);
4142 if (!(e->flags & EDGE_ABNORMAL))
4144 split_loop_exit_edge (e);
4145 if (vect_print_dump_info (REPORT_DETAILS))
4146 fprintf (vect_dump, "split exit edge.");
4148 else
4150 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4151 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4152 if (inner_loop_vinfo)
4153 destroy_loop_vec_info (inner_loop_vinfo, true);
4154 return NULL;
4158 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4159 if (!loop_cond)
4161 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4162 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4163 if (inner_loop_vinfo)
4164 destroy_loop_vec_info (inner_loop_vinfo, true);
4165 return NULL;
4168 if (!number_of_iterations)
4170 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4171 fprintf (vect_dump,
4172 "not vectorized: number of iterations cannot be computed.");
4173 if (inner_loop_vinfo)
4174 destroy_loop_vec_info (inner_loop_vinfo, true);
4175 return NULL;
4178 if (chrec_contains_undetermined (number_of_iterations))
4180 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4181 fprintf (vect_dump, "Infinite number of iterations.");
4182 if (inner_loop_vinfo)
4183 destroy_loop_vec_info (inner_loop_vinfo, true);
4184 return NULL;
4187 if (!NITERS_KNOWN_P (number_of_iterations))
4189 if (vect_print_dump_info (REPORT_DETAILS))
4191 fprintf (vect_dump, "Symbolic number of iterations is ");
4192 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4195 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4197 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4198 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4199 if (inner_loop_vinfo)
4200 destroy_loop_vec_info (inner_loop_vinfo, false);
4201 return NULL;
4204 loop_vinfo = new_loop_vec_info (loop);
4205 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4206 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
4208 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4210 /* CHECKME: May want to keep it around it in the future. */
4211 if (inner_loop_vinfo)
4212 destroy_loop_vec_info (inner_loop_vinfo, false);
4214 gcc_assert (!loop->aux);
4215 loop->aux = loop_vinfo;
4216 return loop_vinfo;
4220 /* Function vect_analyze_loop.
4222 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4223 for it. The different analyses will record information in the
4224 loop_vec_info struct. */
4225 loop_vec_info
4226 vect_analyze_loop (struct loop *loop)
4228 bool ok;
4229 loop_vec_info loop_vinfo;
4231 if (vect_print_dump_info (REPORT_DETAILS))
4232 fprintf (vect_dump, "===== analyze_loop_nest =====");
4234 if (loop_outer (loop)
4235 && loop_vec_info_for_loop (loop_outer (loop))
4236 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4238 if (vect_print_dump_info (REPORT_DETAILS))
4239 fprintf (vect_dump, "outer-loop already vectorized.");
4240 return NULL;
4243 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4245 loop_vinfo = vect_analyze_loop_form (loop);
4246 if (!loop_vinfo)
4248 if (vect_print_dump_info (REPORT_DETAILS))
4249 fprintf (vect_dump, "bad loop form.");
4250 return NULL;
4253 /* Find all data references in the loop (which correspond to vdefs/vuses)
4254 and analyze their evolution in the loop.
4256 FORNOW: Handle only simple, array references, which
4257 alignment can be forced, and aligned pointer-references. */
4259 ok = vect_analyze_data_refs (loop_vinfo);
4260 if (!ok)
4262 if (vect_print_dump_info (REPORT_DETAILS))
4263 fprintf (vect_dump, "bad data references.");
4264 destroy_loop_vec_info (loop_vinfo, true);
4265 return NULL;
4268 /* Classify all cross-iteration scalar data-flow cycles.
4269 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4271 vect_analyze_scalar_cycles (loop_vinfo);
4273 vect_pattern_recog (loop_vinfo);
4275 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4277 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4278 if (!ok)
4280 if (vect_print_dump_info (REPORT_DETAILS))
4281 fprintf (vect_dump, "unexpected pattern.");
4282 destroy_loop_vec_info (loop_vinfo, true);
4283 return NULL;
4286 /* Analyze the alignment of the data-refs in the loop.
4287 Fail if a data reference is found that cannot be vectorized. */
4289 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4290 if (!ok)
4292 if (vect_print_dump_info (REPORT_DETAILS))
4293 fprintf (vect_dump, "bad data alignment.");
4294 destroy_loop_vec_info (loop_vinfo, true);
4295 return NULL;
4298 ok = vect_determine_vectorization_factor (loop_vinfo);
4299 if (!ok)
4301 if (vect_print_dump_info (REPORT_DETAILS))
4302 fprintf (vect_dump, "can't determine vectorization factor.");
4303 destroy_loop_vec_info (loop_vinfo, true);
4304 return NULL;
4307 /* Analyze data dependences between the data-refs in the loop.
4308 FORNOW: fail at the first data dependence that we encounter. */
4310 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4311 if (!ok)
4313 if (vect_print_dump_info (REPORT_DETAILS))
4314 fprintf (vect_dump, "bad data dependence.");
4315 destroy_loop_vec_info (loop_vinfo, true);
4316 return NULL;
4319 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4320 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4322 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4323 if (!ok)
4325 if (vect_print_dump_info (REPORT_DETAILS))
4326 fprintf (vect_dump, "bad data access.");
4327 destroy_loop_vec_info (loop_vinfo, true);
4328 return NULL;
4331 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4332 It is important to call pruning after vect_analyze_data_ref_accesses,
4333 since we use grouping information gathered by interleaving analysis. */
4334 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
4335 if (!ok)
4337 if (vect_print_dump_info (REPORT_DETAILS))
4338 fprintf (vect_dump, "too long list of versioning for alias "
4339 "run-time tests.");
4340 destroy_loop_vec_info (loop_vinfo, true);
4341 return NULL;
4344 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4345 ok = vect_analyze_slp (loop_vinfo);
4346 if (ok)
4348 /* Decide which possible SLP instances to SLP. */
4349 vect_make_slp_decision (loop_vinfo);
4351 /* Find stmts that need to be both vectorized and SLPed. */
4352 vect_detect_hybrid_slp (loop_vinfo);
4355 /* This pass will decide on using loop versioning and/or loop peeling in
4356 order to enhance the alignment of data references in the loop. */
4358 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4359 if (!ok)
4361 if (vect_print_dump_info (REPORT_DETAILS))
4362 fprintf (vect_dump, "bad data alignment.");
4363 destroy_loop_vec_info (loop_vinfo, true);
4364 return NULL;
4367 /* Scan all the operations in the loop and make sure they are
4368 vectorizable. */
4370 ok = vect_analyze_operations (loop_vinfo);
4371 if (!ok)
4373 if (vect_print_dump_info (REPORT_DETAILS))
4374 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4375 destroy_loop_vec_info (loop_vinfo, true);
4376 return NULL;
4379 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4381 return loop_vinfo;