2008-07-01 Jerry DeLisle <jvdelisle@gcc.gnu.org>
[official-gcc.git] / gcc / tree-vect-analyze.c
blob9f2640d09d6f5f733308dc91bf37db9592df7342
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "params.h"
38 #include "tree-chrec.h"
39 #include "tree-data-ref.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
42 #include "toplev.h"
43 #include "recog.h"
45 static bool vect_can_advance_ivs_p (loop_vec_info);
47 /* Function vect_determine_vectorization_factor
49 Determine the vectorization factor (VF). VF is the number of data elements
50 that are operated upon in parallel in a single iteration of the vectorized
51 loop. For example, when vectorizing a loop that operates on 4byte elements,
52 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
53 elements can fit in a single vector register.
55 We currently support vectorization of loops in which all types operated upon
56 are of the same size. Therefore this function currently sets VF according to
57 the size of the types operated upon, and fails if there are multiple sizes
58 in the loop.
60 VF is also the factor by which the loop iterations are strip-mined, e.g.:
61 original loop:
62 for (i=0; i<N; i++){
63 a[i] = b[i] + c[i];
66 vectorized loop:
67 for (i=0; i<N; i+=VF){
68 a[i:VF] = b[i:VF] + c[i:VF];
72 static bool
73 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
75 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
76 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
77 int nbbs = loop->num_nodes;
78 block_stmt_iterator si;
79 unsigned int vectorization_factor = 0;
80 tree scalar_type;
81 tree phi;
82 tree vectype;
83 unsigned int nunits;
84 stmt_vec_info stmt_info;
85 int i;
87 if (vect_print_dump_info (REPORT_DETAILS))
88 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
90 for (i = 0; i < nbbs; i++)
92 basic_block bb = bbs[i];
94 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
96 stmt_info = vinfo_for_stmt (phi);
97 if (vect_print_dump_info (REPORT_DETAILS))
99 fprintf (vect_dump, "==> examining phi: ");
100 print_generic_expr (vect_dump, phi, TDF_SLIM);
103 gcc_assert (stmt_info);
105 if (STMT_VINFO_RELEVANT_P (stmt_info))
107 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
108 scalar_type = TREE_TYPE (PHI_RESULT (phi));
110 if (vect_print_dump_info (REPORT_DETAILS))
112 fprintf (vect_dump, "get vectype for scalar type: ");
113 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
116 vectype = get_vectype_for_scalar_type (scalar_type);
117 if (!vectype)
119 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
121 fprintf (vect_dump,
122 "not vectorized: unsupported data-type ");
123 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
125 return false;
127 STMT_VINFO_VECTYPE (stmt_info) = vectype;
129 if (vect_print_dump_info (REPORT_DETAILS))
131 fprintf (vect_dump, "vectype: ");
132 print_generic_expr (vect_dump, vectype, TDF_SLIM);
135 nunits = TYPE_VECTOR_SUBPARTS (vectype);
136 if (vect_print_dump_info (REPORT_DETAILS))
137 fprintf (vect_dump, "nunits = %d", nunits);
139 if (!vectorization_factor
140 || (nunits > vectorization_factor))
141 vectorization_factor = nunits;
145 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
147 tree stmt = bsi_stmt (si);
148 stmt_info = vinfo_for_stmt (stmt);
150 if (vect_print_dump_info (REPORT_DETAILS))
152 fprintf (vect_dump, "==> examining statement: ");
153 print_generic_expr (vect_dump, stmt, TDF_SLIM);
156 gcc_assert (stmt_info);
158 /* skip stmts which do not need to be vectorized. */
159 if (!STMT_VINFO_RELEVANT_P (stmt_info)
160 && !STMT_VINFO_LIVE_P (stmt_info))
162 if (vect_print_dump_info (REPORT_DETAILS))
163 fprintf (vect_dump, "skip.");
164 continue;
167 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
169 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
171 fprintf (vect_dump, "not vectorized: irregular stmt.");
172 print_generic_expr (vect_dump, stmt, TDF_SLIM);
174 return false;
177 if (!GIMPLE_STMT_P (stmt)
178 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
180 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
182 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
183 print_generic_expr (vect_dump, stmt, TDF_SLIM);
185 return false;
188 if (STMT_VINFO_VECTYPE (stmt_info))
190 /* The only case when a vectype had been already set is for stmts
191 that contain a dataref, or for "pattern-stmts" (stmts generated
192 by the vectorizer to represent/replace a certain idiom). */
193 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
194 || is_pattern_stmt_p (stmt_info));
195 vectype = STMT_VINFO_VECTYPE (stmt_info);
197 else
199 tree operation;
201 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
202 && !is_pattern_stmt_p (stmt_info));
204 /* We generally set the vectype according to the type of the
205 result (lhs).
206 For stmts whose result-type is different than the type of the
207 arguments (e.g. demotion, promotion), vectype will be reset
208 appropriately (later). Note that we have to visit the smallest
209 datatype in this function, because that determines the VF.
210 If the smallest datatype in the loop is present only as the
211 rhs of a promotion operation - we'd miss it here.
212 Such a case, where a variable of this datatype does not appear
213 in the lhs anywhere in the loop, can only occur if it's an
214 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
215 to have been optimized away by invariant motion. However, we
216 cannot rely on invariant motion to always take invariants out
217 of the loop, and so in the case of promotion we also have to
218 check the rhs. */
219 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
221 operation = GIMPLE_STMT_OPERAND (stmt, 1);
222 if (CONVERT_EXPR_P (operation)
223 || TREE_CODE (operation) == WIDEN_MULT_EXPR
224 || TREE_CODE (operation) == FLOAT_EXPR)
226 tree rhs_type = TREE_TYPE (TREE_OPERAND (operation, 0));
227 if (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)) <
228 TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)))
229 scalar_type = rhs_type;
232 if (vect_print_dump_info (REPORT_DETAILS))
234 fprintf (vect_dump, "get vectype for scalar type: ");
235 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
238 vectype = get_vectype_for_scalar_type (scalar_type);
239 if (!vectype)
241 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
243 fprintf (vect_dump,
244 "not vectorized: unsupported data-type ");
245 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
247 return false;
249 STMT_VINFO_VECTYPE (stmt_info) = vectype;
252 if (vect_print_dump_info (REPORT_DETAILS))
254 fprintf (vect_dump, "vectype: ");
255 print_generic_expr (vect_dump, vectype, TDF_SLIM);
258 nunits = TYPE_VECTOR_SUBPARTS (vectype);
259 if (vect_print_dump_info (REPORT_DETAILS))
260 fprintf (vect_dump, "nunits = %d", nunits);
262 if (!vectorization_factor
263 || (nunits > vectorization_factor))
264 vectorization_factor = nunits;
269 /* TODO: Analyze cost. Decide if worth while to vectorize. */
270 if (vect_print_dump_info (REPORT_DETAILS))
271 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
272 if (vectorization_factor <= 1)
274 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
275 fprintf (vect_dump, "not vectorized: unsupported data-type");
276 return false;
278 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
280 return true;
284 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
285 the number of created vector stmts depends on the unrolling factor). However,
286 the actual number of vector stmts for every SLP node depends on VF which is
287 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
288 In this function we assume that the inside costs calculated in
289 vect_model_xxx_cost are linear in ncopies. */
291 static void
292 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
294 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
295 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
296 slp_instance instance;
298 if (vect_print_dump_info (REPORT_SLP))
299 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
301 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
302 /* We assume that costs are linear in ncopies. */
303 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
304 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
308 /* Function vect_analyze_operations.
310 Scan the loop stmts and make sure they are all vectorizable. */
312 static bool
313 vect_analyze_operations (loop_vec_info loop_vinfo)
315 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
316 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
317 int nbbs = loop->num_nodes;
318 block_stmt_iterator si;
319 unsigned int vectorization_factor = 0;
320 int i;
321 bool ok;
322 tree phi;
323 stmt_vec_info stmt_info;
324 bool need_to_vectorize = false;
325 int min_profitable_iters;
326 int min_scalar_loop_bound;
327 unsigned int th;
328 bool only_slp_in_loop = true;
330 if (vect_print_dump_info (REPORT_DETAILS))
331 fprintf (vect_dump, "=== vect_analyze_operations ===");
333 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
334 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
336 for (i = 0; i < nbbs; i++)
338 basic_block bb = bbs[i];
340 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
342 ok = true;
344 stmt_info = vinfo_for_stmt (phi);
345 if (vect_print_dump_info (REPORT_DETAILS))
347 fprintf (vect_dump, "examining phi: ");
348 print_generic_expr (vect_dump, phi, TDF_SLIM);
351 if (! is_loop_header_bb_p (bb))
353 /* inner-loop loop-closed exit phi in outer-loop vectorization
354 (i.e. a phi in the tail of the outer-loop).
355 FORNOW: we currently don't support the case that these phis
356 are not used in the outerloop, cause this case requires
357 to actually do something here. */
358 if (!STMT_VINFO_RELEVANT_P (stmt_info)
359 || STMT_VINFO_LIVE_P (stmt_info))
361 if (vect_print_dump_info (REPORT_DETAILS))
362 fprintf (vect_dump,
363 "Unsupported loop-closed phi in outer-loop.");
364 return false;
366 continue;
369 gcc_assert (stmt_info);
371 if (STMT_VINFO_LIVE_P (stmt_info))
373 /* FORNOW: not yet supported. */
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
375 fprintf (vect_dump, "not vectorized: value used after loop.");
376 return false;
379 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
380 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
382 /* A scalar-dependence cycle that we don't support. */
383 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
384 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
385 return false;
388 if (STMT_VINFO_RELEVANT_P (stmt_info))
390 need_to_vectorize = true;
391 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
392 ok = vectorizable_induction (phi, NULL, NULL);
395 if (!ok)
397 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
399 fprintf (vect_dump,
400 "not vectorized: relevant phi not supported: ");
401 print_generic_expr (vect_dump, phi, TDF_SLIM);
403 return false;
407 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
409 tree stmt = bsi_stmt (si);
410 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
411 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
413 if (vect_print_dump_info (REPORT_DETAILS))
415 fprintf (vect_dump, "==> examining statement: ");
416 print_generic_expr (vect_dump, stmt, TDF_SLIM);
419 gcc_assert (stmt_info);
421 /* skip stmts which do not need to be vectorized.
422 this is expected to include:
423 - the COND_EXPR which is the loop exit condition
424 - any LABEL_EXPRs in the loop
425 - computations that are used only for array indexing or loop
426 control */
428 if (!STMT_VINFO_RELEVANT_P (stmt_info)
429 && !STMT_VINFO_LIVE_P (stmt_info))
431 if (vect_print_dump_info (REPORT_DETAILS))
432 fprintf (vect_dump, "irrelevant.");
433 continue;
436 switch (STMT_VINFO_DEF_TYPE (stmt_info))
438 case vect_loop_def:
439 break;
441 case vect_reduction_def:
442 gcc_assert (relevance == vect_used_in_outer
443 || relevance == vect_used_in_outer_by_reduction
444 || relevance == vect_unused_in_loop);
445 break;
447 case vect_induction_def:
448 case vect_constant_def:
449 case vect_invariant_def:
450 case vect_unknown_def_type:
451 default:
452 gcc_unreachable ();
455 if (STMT_VINFO_RELEVANT_P (stmt_info))
457 gcc_assert (GIMPLE_STMT_P (stmt)
458 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
459 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
460 need_to_vectorize = true;
463 ok = true;
464 if (STMT_VINFO_RELEVANT_P (stmt_info)
465 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
466 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
467 || vectorizable_type_demotion (stmt, NULL, NULL)
468 || vectorizable_conversion (stmt, NULL, NULL, NULL)
469 || vectorizable_operation (stmt, NULL, NULL, NULL)
470 || vectorizable_assignment (stmt, NULL, NULL, NULL)
471 || vectorizable_load (stmt, NULL, NULL, NULL)
472 || vectorizable_call (stmt, NULL, NULL)
473 || vectorizable_store (stmt, NULL, NULL, NULL)
474 || vectorizable_condition (stmt, NULL, NULL)
475 || vectorizable_reduction (stmt, NULL, NULL));
477 if (!ok)
479 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
481 fprintf (vect_dump, "not vectorized: relevant stmt not ");
482 fprintf (vect_dump, "supported: ");
483 print_generic_expr (vect_dump, stmt, TDF_SLIM);
485 return false;
488 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
489 need extra handling, except for vectorizable reductions. */
490 if (STMT_VINFO_LIVE_P (stmt_info)
491 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
492 ok = vectorizable_live_operation (stmt, NULL, NULL);
494 if (!ok)
496 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
498 fprintf (vect_dump, "not vectorized: live stmt not ");
499 fprintf (vect_dump, "supported: ");
500 print_generic_expr (vect_dump, stmt, TDF_SLIM);
502 return false;
505 if (!PURE_SLP_STMT (stmt_info))
507 /* STMT needs loop-based vectorization. */
508 only_slp_in_loop = false;
510 /* Groups of strided accesses whose size is not a power of 2 are
511 not vectorizable yet using loop-vectorization. Therefore, if
512 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
513 both SLPed and loop-based vectorized), the loop cannot be
514 vectorized. */
515 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
516 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
517 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
519 if (vect_print_dump_info (REPORT_DETAILS))
521 fprintf (vect_dump, "not vectorized: the size of group "
522 "of strided accesses is not a power of 2");
523 print_generic_expr (vect_dump, stmt, TDF_SLIM);
525 return false;
528 } /* stmts in bb */
529 } /* bbs */
531 /* All operations in the loop are either irrelevant (deal with loop
532 control, or dead), or only used outside the loop and can be moved
533 out of the loop (e.g. invariants, inductions). The loop can be
534 optimized away by scalar optimizations. We're better off not
535 touching this loop. */
536 if (!need_to_vectorize)
538 if (vect_print_dump_info (REPORT_DETAILS))
539 fprintf (vect_dump,
540 "All the computation can be taken out of the loop.");
541 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
542 fprintf (vect_dump,
543 "not vectorized: redundant loop. no profit to vectorize.");
544 return false;
547 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
548 vectorization factor of the loop is the unrolling factor required by the
549 SLP instances. If that unrolling factor is 1, we say, that we perform
550 pure SLP on loop - cross iteration parallelism is not exploited. */
551 if (only_slp_in_loop)
552 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
553 else
554 vectorization_factor = least_common_multiple (vectorization_factor,
555 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
557 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
559 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
560 && vect_print_dump_info (REPORT_DETAILS))
561 fprintf (vect_dump,
562 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
563 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
565 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
566 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
568 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
569 fprintf (vect_dump, "not vectorized: iteration count too small.");
570 if (vect_print_dump_info (REPORT_DETAILS))
571 fprintf (vect_dump,"not vectorized: iteration count smaller than "
572 "vectorization factor.");
573 return false;
576 /* Analyze cost. Decide if worth while to vectorize. */
578 /* Once VF is set, SLP costs should be updated since the number of created
579 vector stmts depends on VF. */
580 vect_update_slp_costs_according_to_vf (loop_vinfo);
582 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
583 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
585 if (min_profitable_iters < 0)
587 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
588 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
589 if (vect_print_dump_info (REPORT_DETAILS))
590 fprintf (vect_dump, "not vectorized: vector version will never be "
591 "profitable.");
592 return false;
595 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
596 * vectorization_factor) - 1);
598 /* Use the cost model only if it is more conservative than user specified
599 threshold. */
601 th = (unsigned) min_scalar_loop_bound;
602 if (min_profitable_iters
603 && (!min_scalar_loop_bound
604 || min_profitable_iters > min_scalar_loop_bound))
605 th = (unsigned) min_profitable_iters;
607 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
608 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
610 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
611 fprintf (vect_dump, "not vectorized: vectorization not "
612 "profitable.");
613 if (vect_print_dump_info (REPORT_DETAILS))
614 fprintf (vect_dump, "not vectorized: iteration count smaller than "
615 "user specified loop bound parameter or minimum "
616 "profitable iterations (whichever is more conservative).");
617 return false;
620 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
621 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
622 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
624 if (vect_print_dump_info (REPORT_DETAILS))
625 fprintf (vect_dump, "epilog loop required.");
626 if (!vect_can_advance_ivs_p (loop_vinfo))
628 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
629 fprintf (vect_dump,
630 "not vectorized: can't create epilog loop 1.");
631 return false;
633 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
635 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
636 fprintf (vect_dump,
637 "not vectorized: can't create epilog loop 2.");
638 return false;
642 return true;
646 /* Function exist_non_indexing_operands_for_use_p
648 USE is one of the uses attached to STMT. Check if USE is
649 used in STMT for anything other than indexing an array. */
651 static bool
652 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
654 tree operand;
655 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
657 /* USE corresponds to some operand in STMT. If there is no data
658 reference in STMT, then any operand that corresponds to USE
659 is not indexing an array. */
660 if (!STMT_VINFO_DATA_REF (stmt_info))
661 return true;
663 /* STMT has a data_ref. FORNOW this means that its of one of
664 the following forms:
665 -1- ARRAY_REF = var
666 -2- var = ARRAY_REF
667 (This should have been verified in analyze_data_refs).
669 'var' in the second case corresponds to a def, not a use,
670 so USE cannot correspond to any operands that are not used
671 for array indexing.
673 Therefore, all we need to check is if STMT falls into the
674 first case, and whether var corresponds to USE. */
676 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
677 return false;
679 operand = GIMPLE_STMT_OPERAND (stmt, 1);
681 if (TREE_CODE (operand) != SSA_NAME)
682 return false;
684 if (operand == use)
685 return true;
687 return false;
691 /* Function vect_analyze_scalar_cycles_1.
693 Examine the cross iteration def-use cycles of scalar variables
694 in LOOP. LOOP_VINFO represents the loop that is now being
695 considered for vectorization (can be LOOP, or an outer-loop
696 enclosing LOOP). */
698 static void
699 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
701 tree phi;
702 basic_block bb = loop->header;
703 tree dumy;
704 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
706 if (vect_print_dump_info (REPORT_DETAILS))
707 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
709 /* First - identify all inductions. */
710 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
712 tree access_fn = NULL;
713 tree def = PHI_RESULT (phi);
714 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
716 if (vect_print_dump_info (REPORT_DETAILS))
718 fprintf (vect_dump, "Analyze phi: ");
719 print_generic_expr (vect_dump, phi, TDF_SLIM);
722 /* Skip virtual phi's. The data dependences that are associated with
723 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
724 if (!is_gimple_reg (SSA_NAME_VAR (def)))
725 continue;
727 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
729 /* Analyze the evolution function. */
730 access_fn = analyze_scalar_evolution (loop, def);
731 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
733 fprintf (vect_dump, "Access function of PHI: ");
734 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
737 if (!access_fn
738 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
740 VEC_safe_push (tree, heap, worklist, phi);
741 continue;
744 if (vect_print_dump_info (REPORT_DETAILS))
745 fprintf (vect_dump, "Detected induction.");
746 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
750 /* Second - identify all reductions. */
751 while (VEC_length (tree, worklist) > 0)
753 tree phi = VEC_pop (tree, worklist);
754 tree def = PHI_RESULT (phi);
755 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
756 tree reduc_stmt;
758 if (vect_print_dump_info (REPORT_DETAILS))
760 fprintf (vect_dump, "Analyze phi: ");
761 print_generic_expr (vect_dump, phi, TDF_SLIM);
764 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
765 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
767 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
768 if (reduc_stmt)
770 if (vect_print_dump_info (REPORT_DETAILS))
771 fprintf (vect_dump, "Detected reduction.");
772 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
773 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
774 vect_reduction_def;
776 else
777 if (vect_print_dump_info (REPORT_DETAILS))
778 fprintf (vect_dump, "Unknown def-use cycle pattern.");
781 VEC_free (tree, heap, worklist);
782 return;
786 /* Function vect_analyze_scalar_cycles.
788 Examine the cross iteration def-use cycles of scalar variables, by
789 analyzing the loop-header PHIs of scalar variables; Classify each
790 cycle as one of the following: invariant, induction, reduction, unknown.
791 We do that for the loop represented by LOOP_VINFO, and also to its
792 inner-loop, if exists.
793 Examples for scalar cycles:
795 Example1: reduction:
797 loop1:
798 for (i=0; i<N; i++)
799 sum += a[i];
801 Example2: induction:
803 loop2:
804 for (i=0; i<N; i++)
805 a[i] = i; */
807 static void
808 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
810 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
812 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
814 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
815 Reductions in such inner-loop therefore have different properties than
816 the reductions in the nest that gets vectorized:
817 1. When vectorized, they are executed in the same order as in the original
818 scalar loop, so we can't change the order of computation when
819 vectorizing them.
820 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
821 current checks are too strict. */
823 if (loop->inner)
824 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
828 /* Function vect_insert_into_interleaving_chain.
830 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
832 static void
833 vect_insert_into_interleaving_chain (struct data_reference *dra,
834 struct data_reference *drb)
836 tree prev, next, next_init;
837 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
838 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
840 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
841 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
842 while (next)
844 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
845 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
847 /* Insert here. */
848 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
849 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
850 return;
852 prev = next;
853 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
856 /* We got to the end of the list. Insert here. */
857 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
858 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
862 /* Function vect_update_interleaving_chain.
864 For two data-refs DRA and DRB that are a part of a chain interleaved data
865 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
867 There are four possible cases:
868 1. New stmts - both DRA and DRB are not a part of any chain:
869 FIRST_DR = DRB
870 NEXT_DR (DRB) = DRA
871 2. DRB is a part of a chain and DRA is not:
872 no need to update FIRST_DR
873 no need to insert DRB
874 insert DRA according to init
875 3. DRA is a part of a chain and DRB is not:
876 if (init of FIRST_DR > init of DRB)
877 FIRST_DR = DRB
878 NEXT(FIRST_DR) = previous FIRST_DR
879 else
880 insert DRB according to its init
881 4. both DRA and DRB are in some interleaving chains:
882 choose the chain with the smallest init of FIRST_DR
883 insert the nodes of the second chain into the first one. */
885 static void
886 vect_update_interleaving_chain (struct data_reference *drb,
887 struct data_reference *dra)
889 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
890 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
891 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
892 tree node, prev, next, node_init, first_stmt;
894 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
895 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
897 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
898 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
899 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
900 return;
903 /* 2. DRB is a part of a chain and DRA is not. */
904 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
906 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
907 /* Insert DRA into the chain of DRB. */
908 vect_insert_into_interleaving_chain (dra, drb);
909 return;
912 /* 3. DRA is a part of a chain and DRB is not. */
913 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
915 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
916 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
917 old_first_stmt)));
918 tree tmp;
920 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
922 /* DRB's init is smaller than the init of the stmt previously marked
923 as the first stmt of the interleaving chain of DRA. Therefore, we
924 update FIRST_STMT and put DRB in the head of the list. */
925 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
926 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
928 /* Update all the stmts in the list to point to the new FIRST_STMT. */
929 tmp = old_first_stmt;
930 while (tmp)
932 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
933 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
936 else
938 /* Insert DRB in the list of DRA. */
939 vect_insert_into_interleaving_chain (drb, dra);
940 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
942 return;
945 /* 4. both DRA and DRB are in some interleaving chains. */
946 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
947 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
948 if (first_a == first_b)
949 return;
950 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
951 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
953 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
955 /* Insert the nodes of DRA chain into the DRB chain.
956 After inserting a node, continue from this node of the DRB chain (don't
957 start from the beginning. */
958 node = DR_GROUP_FIRST_DR (stmtinfo_a);
959 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
960 first_stmt = first_b;
962 else
964 /* Insert the nodes of DRB chain into the DRA chain.
965 After inserting a node, continue from this node of the DRA chain (don't
966 start from the beginning. */
967 node = DR_GROUP_FIRST_DR (stmtinfo_b);
968 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
969 first_stmt = first_a;
972 while (node)
974 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
975 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
976 while (next)
978 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
979 if (tree_int_cst_compare (next_init, node_init) > 0)
981 /* Insert here. */
982 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
983 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
984 prev = node;
985 break;
987 prev = next;
988 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
990 if (!next)
992 /* We got to the end of the list. Insert here. */
993 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
994 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
995 prev = node;
997 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
998 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1003 /* Function vect_equal_offsets.
1005 Check if OFFSET1 and OFFSET2 are identical expressions. */
1007 static bool
1008 vect_equal_offsets (tree offset1, tree offset2)
1010 bool res0, res1;
1012 STRIP_NOPS (offset1);
1013 STRIP_NOPS (offset2);
1015 if (offset1 == offset2)
1016 return true;
1018 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1019 || !BINARY_CLASS_P (offset1)
1020 || !BINARY_CLASS_P (offset2))
1021 return false;
1023 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1024 TREE_OPERAND (offset2, 0));
1025 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1026 TREE_OPERAND (offset2, 1));
1028 return (res0 && res1);
1032 /* Function vect_check_interleaving.
1034 Check if DRA and DRB are a part of interleaving. In case they are, insert
1035 DRA and DRB in an interleaving chain. */
1037 static void
1038 vect_check_interleaving (struct data_reference *dra,
1039 struct data_reference *drb)
1041 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1043 /* Check that the data-refs have same first location (except init) and they
1044 are both either store or load (not load and store). */
1045 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1046 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1047 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1048 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1049 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1050 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1051 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1052 || DR_IS_READ (dra) != DR_IS_READ (drb))
1053 return;
1055 /* Check:
1056 1. data-refs are of the same type
1057 2. their steps are equal
1058 3. the step is greater than the difference between data-refs' inits */
1059 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1060 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1062 if (type_size_a != type_size_b
1063 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
1064 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
1065 TREE_TYPE (DR_REF (drb))))
1066 return;
1068 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1069 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1070 step = TREE_INT_CST_LOW (DR_STEP (dra));
1072 if (init_a > init_b)
1074 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1075 and DRB is accessed before DRA. */
1076 diff_mod_size = (init_a - init_b) % type_size_a;
1078 if ((init_a - init_b) > step)
1079 return;
1081 if (diff_mod_size == 0)
1083 vect_update_interleaving_chain (drb, dra);
1084 if (vect_print_dump_info (REPORT_DR_DETAILS))
1086 fprintf (vect_dump, "Detected interleaving ");
1087 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1088 fprintf (vect_dump, " and ");
1089 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1091 return;
1094 else
1096 /* If init_b == init_a + the size of the type * k, we have an
1097 interleaving, and DRA is accessed before DRB. */
1098 diff_mod_size = (init_b - init_a) % type_size_a;
1100 if ((init_b - init_a) > step)
1101 return;
1103 if (diff_mod_size == 0)
1105 vect_update_interleaving_chain (dra, drb);
1106 if (vect_print_dump_info (REPORT_DR_DETAILS))
1108 fprintf (vect_dump, "Detected interleaving ");
1109 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1110 fprintf (vect_dump, " and ");
1111 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1113 return;
1118 /* Check if data references pointed by DR_I and DR_J are same or
1119 belong to same interleaving group. Return FALSE if drs are
1120 different, otherwise return TRUE. */
1122 static bool
1123 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
1125 tree stmt_i = DR_STMT (dr_i);
1126 tree stmt_j = DR_STMT (dr_j);
1128 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
1129 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1130 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
1131 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1132 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
1133 return true;
1134 else
1135 return false;
1138 /* If address ranges represented by DDR_I and DDR_J are equal,
1139 return TRUE, otherwise return FALSE. */
1141 static bool
1142 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
1144 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
1145 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
1146 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
1147 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
1148 return true;
1149 else
1150 return false;
1153 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1154 tested at run-time. Return TRUE if DDR was successfully inserted.
1155 Return false if versioning is not supported. */
1157 static bool
1158 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1160 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1162 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
1163 return false;
1165 if (vect_print_dump_info (REPORT_DR_DETAILS))
1167 fprintf (vect_dump, "mark for run-time aliasing test between ");
1168 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1169 fprintf (vect_dump, " and ");
1170 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1173 if (optimize_size)
1175 if (vect_print_dump_info (REPORT_DR_DETAILS))
1176 fprintf (vect_dump, "versioning not supported when optimizing for size.");
1177 return false;
1180 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1181 if (loop->inner)
1183 if (vect_print_dump_info (REPORT_DR_DETAILS))
1184 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1185 return false;
1188 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1189 return true;
1192 /* Function vect_analyze_data_ref_dependence.
1194 Return TRUE if there (might) exist a dependence between a memory-reference
1195 DRA and a memory-reference DRB. When versioning for alias may check a
1196 dependence at run-time, return FALSE. */
1198 static bool
1199 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1200 loop_vec_info loop_vinfo)
1202 unsigned int i;
1203 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1204 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1205 struct data_reference *dra = DDR_A (ddr);
1206 struct data_reference *drb = DDR_B (ddr);
1207 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1208 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1209 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1210 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1211 lambda_vector dist_v;
1212 unsigned int loop_depth;
1214 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1216 /* Independent data accesses. */
1217 vect_check_interleaving (dra, drb);
1218 return false;
1221 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1222 return false;
1224 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1226 if (vect_print_dump_info (REPORT_DR_DETAILS))
1228 fprintf (vect_dump,
1229 "versioning for alias required: can't determine dependence between ");
1230 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1231 fprintf (vect_dump, " and ");
1232 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1234 /* Add to list of ddrs that need to be tested at run-time. */
1235 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1238 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1240 if (vect_print_dump_info (REPORT_DR_DETAILS))
1242 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1243 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1244 fprintf (vect_dump, " and ");
1245 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1247 /* Add to list of ddrs that need to be tested at run-time. */
1248 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1251 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1252 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1254 int dist = dist_v[loop_depth];
1256 if (vect_print_dump_info (REPORT_DR_DETAILS))
1257 fprintf (vect_dump, "dependence distance = %d.", dist);
1259 /* Same loop iteration. */
1260 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1262 /* Two references with distance zero have the same alignment. */
1263 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1264 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1265 if (vect_print_dump_info (REPORT_ALIGNMENT))
1266 fprintf (vect_dump, "accesses have the same alignment.");
1267 if (vect_print_dump_info (REPORT_DR_DETAILS))
1269 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1270 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1271 fprintf (vect_dump, " and ");
1272 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1275 /* For interleaving, mark that there is a read-write dependency if
1276 necessary. We check before that one of the data-refs is store. */
1277 if (DR_IS_READ (dra))
1278 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1279 else
1281 if (DR_IS_READ (drb))
1282 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1285 continue;
1288 if (abs (dist) >= vectorization_factor
1289 || (dist > 0 && DDR_REVERSED_P (ddr)))
1291 /* Dependence distance does not create dependence, as far as
1292 vectorization is concerned, in this case. If DDR_REVERSED_P the
1293 order of the data-refs in DDR was reversed (to make distance
1294 vector positive), and the actual distance is negative. */
1295 if (vect_print_dump_info (REPORT_DR_DETAILS))
1296 fprintf (vect_dump, "dependence distance >= VF or negative.");
1297 continue;
1300 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1302 fprintf (vect_dump,
1303 "not vectorized, possible dependence "
1304 "between data-refs ");
1305 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1306 fprintf (vect_dump, " and ");
1307 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1310 return true;
1313 return false;
1316 /* Function vect_analyze_data_ref_dependences.
1318 Examine all the data references in the loop, and make sure there do not
1319 exist any data dependences between them. */
1321 static bool
1322 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1324 unsigned int i;
1325 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1326 struct data_dependence_relation *ddr;
1328 if (vect_print_dump_info (REPORT_DETAILS))
1329 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1331 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1332 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1333 return false;
1335 return true;
1339 /* Function vect_compute_data_ref_alignment
1341 Compute the misalignment of the data reference DR.
1343 Output:
1344 1. If during the misalignment computation it is found that the data reference
1345 cannot be vectorized then false is returned.
1346 2. DR_MISALIGNMENT (DR) is defined.
1348 FOR NOW: No analysis is actually performed. Misalignment is calculated
1349 only for trivial cases. TODO. */
1351 static bool
1352 vect_compute_data_ref_alignment (struct data_reference *dr)
1354 tree stmt = DR_STMT (dr);
1355 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1356 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1357 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1358 tree ref = DR_REF (dr);
1359 tree vectype;
1360 tree base, base_addr;
1361 bool base_aligned;
1362 tree misalign;
1363 tree aligned_to, alignment;
1365 if (vect_print_dump_info (REPORT_DETAILS))
1366 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1368 /* Initialize misalignment to unknown. */
1369 SET_DR_MISALIGNMENT (dr, -1);
1371 misalign = DR_INIT (dr);
1372 aligned_to = DR_ALIGNED_TO (dr);
1373 base_addr = DR_BASE_ADDRESS (dr);
1374 vectype = STMT_VINFO_VECTYPE (stmt_info);
1376 /* In case the dataref is in an inner-loop of the loop that is being
1377 vectorized (LOOP), we use the base and misalignment information
1378 relative to the outer-loop (LOOP). This is ok only if the misalignment
1379 stays the same throughout the execution of the inner-loop, which is why
1380 we have to check that the stride of the dataref in the inner-loop evenly
1381 divides by the vector size. */
1382 if (nested_in_vect_loop_p (loop, stmt))
1384 tree step = DR_STEP (dr);
1385 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1387 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
1389 if (vect_print_dump_info (REPORT_ALIGNMENT))
1390 fprintf (vect_dump, "inner step divides the vector-size.");
1391 misalign = STMT_VINFO_DR_INIT (stmt_info);
1392 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1393 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1395 else
1397 if (vect_print_dump_info (REPORT_ALIGNMENT))
1398 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1399 misalign = NULL_TREE;
1403 base = build_fold_indirect_ref (base_addr);
1404 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1406 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1407 || !misalign)
1409 if (vect_print_dump_info (REPORT_ALIGNMENT))
1411 fprintf (vect_dump, "Unknown alignment for access: ");
1412 print_generic_expr (vect_dump, base, TDF_SLIM);
1414 return true;
1417 if ((DECL_P (base)
1418 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1419 alignment) >= 0)
1420 || (TREE_CODE (base_addr) == SSA_NAME
1421 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1422 TREE_TYPE (base_addr)))),
1423 alignment) >= 0))
1424 base_aligned = true;
1425 else
1426 base_aligned = false;
1428 if (!base_aligned)
1430 /* Do not change the alignment of global variables if
1431 flag_section_anchors is enabled. */
1432 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1433 || (TREE_STATIC (base) && flag_section_anchors))
1435 if (vect_print_dump_info (REPORT_DETAILS))
1437 fprintf (vect_dump, "can't force alignment of ref: ");
1438 print_generic_expr (vect_dump, ref, TDF_SLIM);
1440 return true;
1443 /* Force the alignment of the decl.
1444 NOTE: This is the only change to the code we make during
1445 the analysis phase, before deciding to vectorize the loop. */
1446 if (vect_print_dump_info (REPORT_DETAILS))
1447 fprintf (vect_dump, "force alignment");
1448 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1449 DECL_USER_ALIGN (base) = 1;
1452 /* At this point we assume that the base is aligned. */
1453 gcc_assert (base_aligned
1454 || (TREE_CODE (base) == VAR_DECL
1455 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1457 /* Modulo alignment. */
1458 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1460 if (!host_integerp (misalign, 1))
1462 /* Negative or overflowed misalignment value. */
1463 if (vect_print_dump_info (REPORT_DETAILS))
1464 fprintf (vect_dump, "unexpected misalign value");
1465 return false;
1468 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1470 if (vect_print_dump_info (REPORT_DETAILS))
1472 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1473 print_generic_expr (vect_dump, ref, TDF_SLIM);
1476 return true;
1480 /* Function vect_compute_data_refs_alignment
1482 Compute the misalignment of data references in the loop.
1483 Return FALSE if a data reference is found that cannot be vectorized. */
1485 static bool
1486 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1488 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1489 struct data_reference *dr;
1490 unsigned int i;
1492 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1493 if (!vect_compute_data_ref_alignment (dr))
1494 return false;
1496 return true;
1500 /* Function vect_update_misalignment_for_peel
1502 DR - the data reference whose misalignment is to be adjusted.
1503 DR_PEEL - the data reference whose misalignment is being made
1504 zero in the vector loop by the peel.
1505 NPEEL - the number of iterations in the peel loop if the misalignment
1506 of DR_PEEL is known at compile time. */
1508 static void
1509 vect_update_misalignment_for_peel (struct data_reference *dr,
1510 struct data_reference *dr_peel, int npeel)
1512 unsigned int i;
1513 VEC(dr_p,heap) *same_align_drs;
1514 struct data_reference *current_dr;
1515 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1516 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1517 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1518 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1520 /* For interleaved data accesses the step in the loop must be multiplied by
1521 the size of the interleaving group. */
1522 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1523 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1524 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1525 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1527 /* It can be assumed that the data refs with the same alignment as dr_peel
1528 are aligned in the vector loop. */
1529 same_align_drs
1530 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1531 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1533 if (current_dr != dr)
1534 continue;
1535 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1536 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1537 SET_DR_MISALIGNMENT (dr, 0);
1538 return;
1541 if (known_alignment_for_access_p (dr)
1542 && known_alignment_for_access_p (dr_peel))
1544 int misal = DR_MISALIGNMENT (dr);
1545 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1546 misal += npeel * dr_size;
1547 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
1548 SET_DR_MISALIGNMENT (dr, misal);
1549 return;
1552 if (vect_print_dump_info (REPORT_DETAILS))
1553 fprintf (vect_dump, "Setting misalignment to -1.");
1554 SET_DR_MISALIGNMENT (dr, -1);
1558 /* Function vect_verify_datarefs_alignment
1560 Return TRUE if all data references in the loop can be
1561 handled with respect to alignment. */
1563 static bool
1564 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1566 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1567 struct data_reference *dr;
1568 enum dr_alignment_support supportable_dr_alignment;
1569 unsigned int i;
1571 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1573 tree stmt = DR_STMT (dr);
1574 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1576 /* For interleaving, only the alignment of the first access matters. */
1577 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1578 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1579 continue;
1581 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1582 if (!supportable_dr_alignment)
1584 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1586 if (DR_IS_READ (dr))
1587 fprintf (vect_dump,
1588 "not vectorized: unsupported unaligned load.");
1589 else
1590 fprintf (vect_dump,
1591 "not vectorized: unsupported unaligned store.");
1593 return false;
1595 if (supportable_dr_alignment != dr_aligned
1596 && vect_print_dump_info (REPORT_ALIGNMENT))
1597 fprintf (vect_dump, "Vectorizing an unaligned access.");
1599 return true;
1603 /* Function vector_alignment_reachable_p
1605 Return true if vector alignment for DR is reachable by peeling
1606 a few loop iterations. Return false otherwise. */
1608 static bool
1609 vector_alignment_reachable_p (struct data_reference *dr)
1611 tree stmt = DR_STMT (dr);
1612 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1613 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1615 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1617 /* For interleaved access we peel only if number of iterations in
1618 the prolog loop ({VF - misalignment}), is a multiple of the
1619 number of the interleaved accesses. */
1620 int elem_size, mis_in_elements;
1621 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1623 /* FORNOW: handle only known alignment. */
1624 if (!known_alignment_for_access_p (dr))
1625 return false;
1627 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1628 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1630 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1631 return false;
1634 /* If misalignment is known at the compile time then allow peeling
1635 only if natural alignment is reachable through peeling. */
1636 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1638 HOST_WIDE_INT elmsize =
1639 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1640 if (vect_print_dump_info (REPORT_DETAILS))
1642 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1643 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1645 if (DR_MISALIGNMENT (dr) % elmsize)
1647 if (vect_print_dump_info (REPORT_DETAILS))
1648 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1649 return false;
1653 if (!known_alignment_for_access_p (dr))
1655 tree type = (TREE_TYPE (DR_REF (dr)));
1656 tree ba = DR_BASE_OBJECT (dr);
1657 bool is_packed = false;
1659 if (ba)
1660 is_packed = contains_packed_reference (ba);
1662 if (vect_print_dump_info (REPORT_DETAILS))
1663 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1664 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1665 return true;
1666 else
1667 return false;
1670 return true;
1673 /* Function vect_enhance_data_refs_alignment
1675 This pass will use loop versioning and loop peeling in order to enhance
1676 the alignment of data references in the loop.
1678 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1679 original loop is to be vectorized; Any other loops that are created by
1680 the transformations performed in this pass - are not supposed to be
1681 vectorized. This restriction will be relaxed.
1683 This pass will require a cost model to guide it whether to apply peeling
1684 or versioning or a combination of the two. For example, the scheme that
1685 intel uses when given a loop with several memory accesses, is as follows:
1686 choose one memory access ('p') which alignment you want to force by doing
1687 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1688 other accesses are not necessarily aligned, or (2) use loop versioning to
1689 generate one loop in which all accesses are aligned, and another loop in
1690 which only 'p' is necessarily aligned.
1692 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1693 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1694 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1696 Devising a cost model is the most critical aspect of this work. It will
1697 guide us on which access to peel for, whether to use loop versioning, how
1698 many versions to create, etc. The cost model will probably consist of
1699 generic considerations as well as target specific considerations (on
1700 powerpc for example, misaligned stores are more painful than misaligned
1701 loads).
1703 Here are the general steps involved in alignment enhancements:
1705 -- original loop, before alignment analysis:
1706 for (i=0; i<N; i++){
1707 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1708 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1711 -- After vect_compute_data_refs_alignment:
1712 for (i=0; i<N; i++){
1713 x = q[i]; # DR_MISALIGNMENT(q) = 3
1714 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1717 -- Possibility 1: we do loop versioning:
1718 if (p is aligned) {
1719 for (i=0; i<N; i++){ # loop 1A
1720 x = q[i]; # DR_MISALIGNMENT(q) = 3
1721 p[i] = y; # DR_MISALIGNMENT(p) = 0
1724 else {
1725 for (i=0; i<N; i++){ # loop 1B
1726 x = q[i]; # DR_MISALIGNMENT(q) = 3
1727 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1731 -- Possibility 2: we do loop peeling:
1732 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1733 x = q[i];
1734 p[i] = y;
1736 for (i = 3; i < N; i++){ # loop 2A
1737 x = q[i]; # DR_MISALIGNMENT(q) = 0
1738 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1741 -- Possibility 3: combination of loop peeling and versioning:
1742 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1743 x = q[i];
1744 p[i] = y;
1746 if (p is aligned) {
1747 for (i = 3; i<N; i++){ # loop 3A
1748 x = q[i]; # DR_MISALIGNMENT(q) = 0
1749 p[i] = y; # DR_MISALIGNMENT(p) = 0
1752 else {
1753 for (i = 3; i<N; i++){ # loop 3B
1754 x = q[i]; # DR_MISALIGNMENT(q) = 0
1755 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1759 These loops are later passed to loop_transform to be vectorized. The
1760 vectorizer will use the alignment information to guide the transformation
1761 (whether to generate regular loads/stores, or with special handling for
1762 misalignment). */
1764 static bool
1765 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1767 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1768 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1769 enum dr_alignment_support supportable_dr_alignment;
1770 struct data_reference *dr0 = NULL;
1771 struct data_reference *dr;
1772 unsigned int i;
1773 bool do_peeling = false;
1774 bool do_versioning = false;
1775 bool stat;
1776 tree stmt;
1777 stmt_vec_info stmt_info;
1778 int vect_versioning_for_alias_required;
1780 if (vect_print_dump_info (REPORT_DETAILS))
1781 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1783 /* While cost model enhancements are expected in the future, the high level
1784 view of the code at this time is as follows:
1786 A) If there is a misaligned write then see if peeling to align this write
1787 can make all data references satisfy vect_supportable_dr_alignment.
1788 If so, update data structures as needed and return true. Note that
1789 at this time vect_supportable_dr_alignment is known to return false
1790 for a misaligned write.
1792 B) If peeling wasn't possible and there is a data reference with an
1793 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1794 then see if loop versioning checks can be used to make all data
1795 references satisfy vect_supportable_dr_alignment. If so, update
1796 data structures as needed and return true.
1798 C) If neither peeling nor versioning were successful then return false if
1799 any data reference does not satisfy vect_supportable_dr_alignment.
1801 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1803 Note, Possibility 3 above (which is peeling and versioning together) is not
1804 being done at this time. */
1806 /* (1) Peeling to force alignment. */
1808 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1809 Considerations:
1810 + How many accesses will become aligned due to the peeling
1811 - How many accesses will become unaligned due to the peeling,
1812 and the cost of misaligned accesses.
1813 - The cost of peeling (the extra runtime checks, the increase
1814 in code size).
1816 The scheme we use FORNOW: peel to force the alignment of the first
1817 misaligned store in the loop.
1818 Rationale: misaligned stores are not yet supported.
1820 TODO: Use a cost model. */
1822 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1824 stmt = DR_STMT (dr);
1825 stmt_info = vinfo_for_stmt (stmt);
1827 /* For interleaving, only the alignment of the first access
1828 matters. */
1829 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1830 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1831 continue;
1833 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1835 do_peeling = vector_alignment_reachable_p (dr);
1836 if (do_peeling)
1837 dr0 = dr;
1838 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1839 fprintf (vect_dump, "vector alignment may not be reachable");
1840 break;
1844 vect_versioning_for_alias_required =
1845 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1847 /* Temporarily, if versioning for alias is required, we disable peeling
1848 until we support peeling and versioning. Often peeling for alignment
1849 will require peeling for loop-bound, which in turn requires that we
1850 know how to adjust the loop ivs after the loop. */
1851 if (vect_versioning_for_alias_required
1852 || !vect_can_advance_ivs_p (loop_vinfo)
1853 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1854 do_peeling = false;
1856 if (do_peeling)
1858 int mis;
1859 int npeel = 0;
1860 tree stmt = DR_STMT (dr0);
1861 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1862 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1863 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1865 if (known_alignment_for_access_p (dr0))
1867 /* Since it's known at compile time, compute the number of iterations
1868 in the peeled loop (the peeling factor) for use in updating
1869 DR_MISALIGNMENT values. The peeling factor is the vectorization
1870 factor minus the misalignment as an element count. */
1871 mis = DR_MISALIGNMENT (dr0);
1872 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1873 npeel = nelements - mis;
1875 /* For interleaved data access every iteration accesses all the
1876 members of the group, therefore we divide the number of iterations
1877 by the group size. */
1878 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1879 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1880 npeel /= DR_GROUP_SIZE (stmt_info);
1882 if (vect_print_dump_info (REPORT_DETAILS))
1883 fprintf (vect_dump, "Try peeling by %d", npeel);
1886 /* Ensure that all data refs can be vectorized after the peel. */
1887 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1889 int save_misalignment;
1891 if (dr == dr0)
1892 continue;
1894 stmt = DR_STMT (dr);
1895 stmt_info = vinfo_for_stmt (stmt);
1896 /* For interleaving, only the alignment of the first access
1897 matters. */
1898 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1899 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1900 continue;
1902 save_misalignment = DR_MISALIGNMENT (dr);
1903 vect_update_misalignment_for_peel (dr, dr0, npeel);
1904 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1905 SET_DR_MISALIGNMENT (dr, save_misalignment);
1907 if (!supportable_dr_alignment)
1909 do_peeling = false;
1910 break;
1914 if (do_peeling)
1916 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1917 If the misalignment of DR_i is identical to that of dr0 then set
1918 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1919 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1920 by the peeling factor times the element size of DR_i (MOD the
1921 vectorization factor times the size). Otherwise, the
1922 misalignment of DR_i must be set to unknown. */
1923 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1924 if (dr != dr0)
1925 vect_update_misalignment_for_peel (dr, dr0, npeel);
1927 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1928 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1929 SET_DR_MISALIGNMENT (dr0, 0);
1930 if (vect_print_dump_info (REPORT_ALIGNMENT))
1931 fprintf (vect_dump, "Alignment of access forced using peeling.");
1933 if (vect_print_dump_info (REPORT_DETAILS))
1934 fprintf (vect_dump, "Peeling for alignment will be applied.");
1936 stat = vect_verify_datarefs_alignment (loop_vinfo);
1937 gcc_assert (stat);
1938 return stat;
1943 /* (2) Versioning to force alignment. */
1945 /* Try versioning if:
1946 1) flag_tree_vect_loop_version is TRUE
1947 2) optimize_size is FALSE
1948 3) there is at least one unsupported misaligned data ref with an unknown
1949 misalignment, and
1950 4) all misaligned data refs with a known misalignment are supported, and
1951 5) the number of runtime alignment checks is within reason. */
1953 do_versioning =
1954 flag_tree_vect_loop_version
1955 && (!optimize_size)
1956 && (!loop->inner); /* FORNOW */
1958 if (do_versioning)
1960 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1962 stmt = DR_STMT (dr);
1963 stmt_info = vinfo_for_stmt (stmt);
1965 /* For interleaving, only the alignment of the first access
1966 matters. */
1967 if (aligned_access_p (dr)
1968 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1969 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1970 continue;
1972 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1974 if (!supportable_dr_alignment)
1976 tree stmt;
1977 int mask;
1978 tree vectype;
1980 if (known_alignment_for_access_p (dr)
1981 || VEC_length (tree,
1982 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1983 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1985 do_versioning = false;
1986 break;
1989 stmt = DR_STMT (dr);
1990 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1991 gcc_assert (vectype);
1993 /* The rightmost bits of an aligned address must be zeros.
1994 Construct the mask needed for this test. For example,
1995 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1996 mask must be 15 = 0xf. */
1997 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1999 /* FORNOW: use the same mask to test all potentially unaligned
2000 references in the loop. The vectorizer currently supports
2001 a single vector size, see the reference to
2002 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2003 vectorization factor is computed. */
2004 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2005 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2006 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2007 VEC_safe_push (tree, heap,
2008 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2009 DR_STMT (dr));
2013 /* Versioning requires at least one misaligned data reference. */
2014 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2015 do_versioning = false;
2016 else if (!do_versioning)
2017 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2020 if (do_versioning)
2022 VEC(tree,heap) *may_misalign_stmts
2023 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2024 tree stmt;
2026 /* It can now be assumed that the data references in the statements
2027 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2028 of the loop being vectorized. */
2029 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
2031 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2032 dr = STMT_VINFO_DATA_REF (stmt_info);
2033 SET_DR_MISALIGNMENT (dr, 0);
2034 if (vect_print_dump_info (REPORT_ALIGNMENT))
2035 fprintf (vect_dump, "Alignment of access forced using versioning.");
2038 if (vect_print_dump_info (REPORT_DETAILS))
2039 fprintf (vect_dump, "Versioning for alignment will be applied.");
2041 /* Peeling and versioning can't be done together at this time. */
2042 gcc_assert (! (do_peeling && do_versioning));
2044 stat = vect_verify_datarefs_alignment (loop_vinfo);
2045 gcc_assert (stat);
2046 return stat;
2049 /* This point is reached if neither peeling nor versioning is being done. */
2050 gcc_assert (! (do_peeling || do_versioning));
2052 stat = vect_verify_datarefs_alignment (loop_vinfo);
2053 return stat;
2057 /* Function vect_analyze_data_refs_alignment
2059 Analyze the alignment of the data-references in the loop.
2060 Return FALSE if a data reference is found that cannot be vectorized. */
2062 static bool
2063 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2065 if (vect_print_dump_info (REPORT_DETAILS))
2066 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2068 if (!vect_compute_data_refs_alignment (loop_vinfo))
2070 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2071 fprintf (vect_dump,
2072 "not vectorized: can't calculate alignment for data ref.");
2073 return false;
2076 return true;
2080 /* Analyze groups of strided accesses: check that DR belongs to a group of
2081 strided accesses of legal size, step, etc. Detect gaps, single element
2082 interleaving, and other special cases. Set strided access info.
2083 Collect groups of strided stores for further use in SLP analysis. */
2085 static bool
2086 vect_analyze_group_access (struct data_reference *dr)
2088 tree step = DR_STEP (dr);
2089 tree scalar_type = TREE_TYPE (DR_REF (dr));
2090 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2091 tree stmt = DR_STMT (dr);
2092 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2093 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2094 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2095 HOST_WIDE_INT stride;
2096 bool slp_impossible = false;
2098 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2099 interleaving group (including gaps). */
2100 stride = dr_step / type_size;
2102 /* Not consecutive access is possible only if it is a part of interleaving. */
2103 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2105 /* Check if it this DR is a part of interleaving, and is a single
2106 element of the group that is accessed in the loop. */
2108 /* Gaps are supported only for loads. STEP must be a multiple of the type
2109 size. The size of the group must be a power of 2. */
2110 if (DR_IS_READ (dr)
2111 && (dr_step % type_size) == 0
2112 && stride > 0
2113 && exact_log2 (stride) != -1)
2115 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2116 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2117 if (vect_print_dump_info (REPORT_DR_DETAILS))
2119 fprintf (vect_dump, "Detected single element interleaving %d ",
2120 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2121 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2122 fprintf (vect_dump, " step ");
2123 print_generic_expr (vect_dump, step, TDF_SLIM);
2125 return true;
2127 if (vect_print_dump_info (REPORT_DETAILS))
2128 fprintf (vect_dump, "not consecutive access");
2129 return false;
2132 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2134 /* First stmt in the interleaving chain. Check the chain. */
2135 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2136 struct data_reference *data_ref = dr;
2137 unsigned int count = 1;
2138 tree next_step;
2139 tree prev_init = DR_INIT (data_ref);
2140 tree prev = stmt;
2141 HOST_WIDE_INT diff, count_in_bytes;
2143 while (next)
2145 /* Skip same data-refs. In case that two or more stmts share data-ref
2146 (supported only for loads), we vectorize only the first stmt, and
2147 the rest get their vectorized loads from the first one. */
2148 if (!tree_int_cst_compare (DR_INIT (data_ref),
2149 DR_INIT (STMT_VINFO_DATA_REF (
2150 vinfo_for_stmt (next)))))
2152 if (!DR_IS_READ (data_ref))
2154 if (vect_print_dump_info (REPORT_DETAILS))
2155 fprintf (vect_dump, "Two store stmts share the same dr.");
2156 return false;
2159 /* Check that there is no load-store dependencies for this loads
2160 to prevent a case of load-store-load to the same location. */
2161 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2162 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2164 if (vect_print_dump_info (REPORT_DETAILS))
2165 fprintf (vect_dump,
2166 "READ_WRITE dependence in interleaving.");
2167 return false;
2170 /* For load use the same data-ref load. */
2171 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2173 prev = next;
2174 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2175 continue;
2177 prev = next;
2179 /* Check that all the accesses have the same STEP. */
2180 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2181 if (tree_int_cst_compare (step, next_step))
2183 if (vect_print_dump_info (REPORT_DETAILS))
2184 fprintf (vect_dump, "not consecutive access in interleaving");
2185 return false;
2188 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2189 /* Check that the distance between two accesses is equal to the type
2190 size. Otherwise, we have gaps. */
2191 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2192 - TREE_INT_CST_LOW (prev_init)) / type_size;
2193 if (diff != 1)
2195 /* FORNOW: SLP of accesses with gaps is not supported. */
2196 slp_impossible = true;
2197 if (!DR_IS_READ (data_ref))
2199 if (vect_print_dump_info (REPORT_DETAILS))
2200 fprintf (vect_dump, "interleaved store with gaps");
2201 return false;
2205 /* Store the gap from the previous member of the group. If there is no
2206 gap in the access, DR_GROUP_GAP is always 1. */
2207 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2209 prev_init = DR_INIT (data_ref);
2210 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2211 /* Count the number of data-refs in the chain. */
2212 count++;
2215 /* COUNT is the number of accesses found, we multiply it by the size of
2216 the type to get COUNT_IN_BYTES. */
2217 count_in_bytes = type_size * count;
2219 /* Check that the size of the interleaving is not greater than STEP. */
2220 if (dr_step < count_in_bytes)
2222 if (vect_print_dump_info (REPORT_DETAILS))
2224 fprintf (vect_dump, "interleaving size is greater than step for ");
2225 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2227 return false;
2230 /* Check that the size of the interleaving is equal to STEP for stores,
2231 i.e., that there are no gaps. */
2232 if (dr_step != count_in_bytes)
2234 if (DR_IS_READ (dr))
2236 slp_impossible = true;
2237 /* There is a gap after the last load in the group. This gap is a
2238 difference between the stride and the number of elements. When
2239 there is no gap, this difference should be 0. */
2240 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2242 else
2244 if (vect_print_dump_info (REPORT_DETAILS))
2245 fprintf (vect_dump, "interleaved store with gaps");
2246 return false;
2250 /* Check that STEP is a multiple of type size. */
2251 if ((dr_step % type_size) != 0)
2253 if (vect_print_dump_info (REPORT_DETAILS))
2255 fprintf (vect_dump, "step is not a multiple of type size: step ");
2256 print_generic_expr (vect_dump, step, TDF_SLIM);
2257 fprintf (vect_dump, " size ");
2258 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2259 TDF_SLIM);
2261 return false;
2264 /* FORNOW: we handle only interleaving that is a power of 2.
2265 We don't fail here if it may be still possible to vectorize the
2266 group using SLP. If not, the size of the group will be checked in
2267 vect_analyze_operations, and the vectorization will fail. */
2268 if (exact_log2 (stride) == -1)
2270 if (vect_print_dump_info (REPORT_DETAILS))
2271 fprintf (vect_dump, "interleaving is not a power of 2");
2273 if (slp_impossible)
2274 return false;
2276 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2277 if (vect_print_dump_info (REPORT_DETAILS))
2278 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2280 /* SLP: create an SLP data structure for every interleaving group of
2281 stores for further analysis in vect_analyse_slp. */
2282 if (!DR_IS_READ (dr) && !slp_impossible)
2283 VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2286 return true;
2290 /* Analyze the access pattern of the data-reference DR.
2291 In case of non-consecutive accesses call vect_analyze_group_access() to
2292 analyze groups of strided accesses. */
2294 static bool
2295 vect_analyze_data_ref_access (struct data_reference *dr)
2297 tree step = DR_STEP (dr);
2298 tree scalar_type = TREE_TYPE (DR_REF (dr));
2299 tree stmt = DR_STMT (dr);
2300 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2301 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2303 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2305 if (!step)
2307 if (vect_print_dump_info (REPORT_DETAILS))
2308 fprintf (vect_dump, "bad data-ref access");
2309 return false;
2312 /* Don't allow invariant accesses. */
2313 if (dr_step == 0)
2314 return false;
2316 if (nested_in_vect_loop_p (loop, stmt))
2318 /* Interleaved accesses are not yet supported within outer-loop
2319 vectorization for references in the inner-loop. */
2320 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2322 /* For the rest of the analysis we use the outer-loop step. */
2323 step = STMT_VINFO_DR_STEP (stmt_info);
2324 dr_step = TREE_INT_CST_LOW (step);
2326 if (dr_step == 0)
2328 if (vect_print_dump_info (REPORT_ALIGNMENT))
2329 fprintf (vect_dump, "zero step in outer loop.");
2330 if (DR_IS_READ (dr))
2331 return true;
2332 else
2333 return false;
2337 /* Consecutive? */
2338 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2340 /* Mark that it is not interleaving. */
2341 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2342 return true;
2345 if (nested_in_vect_loop_p (loop, stmt))
2347 if (vect_print_dump_info (REPORT_ALIGNMENT))
2348 fprintf (vect_dump, "strided access in outer loop.");
2349 return false;
2352 /* Not consecutive access - check if it's a part of interleaving group. */
2353 return vect_analyze_group_access (dr);
2357 /* Function vect_analyze_data_ref_accesses.
2359 Analyze the access pattern of all the data references in the loop.
2361 FORNOW: the only access pattern that is considered vectorizable is a
2362 simple step 1 (consecutive) access.
2364 FORNOW: handle only arrays and pointer accesses. */
2366 static bool
2367 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2369 unsigned int i;
2370 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2371 struct data_reference *dr;
2373 if (vect_print_dump_info (REPORT_DETAILS))
2374 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2376 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2377 if (!vect_analyze_data_ref_access (dr))
2379 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2380 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2381 return false;
2384 return true;
2387 /* Function vect_prune_runtime_alias_test_list.
2389 Prune a list of ddrs to be tested at run-time by versioning for alias.
2390 Return FALSE if resulting list of ddrs is longer then allowed by
2391 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2393 static bool
2394 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2396 VEC (ddr_p, heap) * ddrs =
2397 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2398 unsigned i, j;
2400 if (vect_print_dump_info (REPORT_DETAILS))
2401 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2403 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2405 bool found;
2406 ddr_p ddr_i;
2408 ddr_i = VEC_index (ddr_p, ddrs, i);
2409 found = false;
2411 for (j = 0; j < i; j++)
2413 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2415 if (vect_vfa_range_equal (ddr_i, ddr_j))
2417 if (vect_print_dump_info (REPORT_DR_DETAILS))
2419 fprintf (vect_dump, "found equal ranges ");
2420 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2421 fprintf (vect_dump, ", ");
2422 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2423 fprintf (vect_dump, " and ");
2424 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2425 fprintf (vect_dump, ", ");
2426 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2428 found = true;
2429 break;
2433 if (found)
2435 VEC_ordered_remove (ddr_p, ddrs, i);
2436 continue;
2438 i++;
2441 if (VEC_length (ddr_p, ddrs) >
2442 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2444 if (vect_print_dump_info (REPORT_DR_DETAILS))
2446 fprintf (vect_dump,
2447 "disable versioning for alias - max number of generated "
2448 "checks exceeded.");
2451 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2453 return false;
2456 return true;
2459 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2461 void
2462 vect_free_slp_tree (slp_tree node)
2464 if (!node)
2465 return;
2467 if (SLP_TREE_LEFT (node))
2468 vect_free_slp_tree (SLP_TREE_LEFT (node));
2470 if (SLP_TREE_RIGHT (node))
2471 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2473 VEC_free (tree, heap, SLP_TREE_SCALAR_STMTS (node));
2475 if (SLP_TREE_VEC_STMTS (node))
2476 VEC_free (tree, heap, SLP_TREE_VEC_STMTS (node));
2478 free (node);
2482 /* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are
2483 of a legal type and that they match the defs of the first stmt of the SLP
2484 group (stored in FIRST_STMT_...). */
2486 static bool
2487 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2488 tree rhs, VEC (tree, heap) **def_stmts0,
2489 VEC (tree, heap) **def_stmts1,
2490 enum vect_def_type *first_stmt_dt0,
2491 enum vect_def_type *first_stmt_dt1,
2492 tree *first_stmt_def0_type,
2493 tree *first_stmt_def1_type,
2494 tree *first_stmt_const_oprnd,
2495 int ncopies_for_cost)
2497 tree oprnd;
2498 enum operation_type op_type = TREE_OPERAND_LENGTH (rhs);
2499 unsigned int i, number_of_oprnds = op_type;
2500 tree def, def_stmt;
2501 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2502 stmt_vec_info stmt_info =
2503 vinfo_for_stmt (VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2505 /* Store. */
2506 if (!op_type)
2507 number_of_oprnds = 1;
2508 else
2509 gcc_assert (op_type == unary_op || op_type == binary_op);
2511 for (i = 0; i < number_of_oprnds; i++)
2513 if (op_type)
2514 oprnd = TREE_OPERAND (rhs, i);
2515 else
2516 oprnd = rhs;
2518 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2519 || (!def_stmt && dt[i] != vect_constant_def))
2521 if (vect_print_dump_info (REPORT_SLP))
2523 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2524 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2527 return false;
2530 if (!*first_stmt_dt0)
2532 /* op0 of the first stmt of the group - store its info. */
2533 *first_stmt_dt0 = dt[i];
2534 if (def)
2535 *first_stmt_def0_type = TREE_TYPE (def);
2536 else
2537 *first_stmt_const_oprnd = oprnd;
2539 /* Analyze costs (for the first stmt of the group only). */
2540 if (op_type)
2541 /* Not memory operation (we don't call this functions for loads). */
2542 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2543 else
2544 /* Store. */
2545 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2548 else
2550 if (!*first_stmt_dt1 && i == 1)
2552 /* op1 of the first stmt of the group - store its info. */
2553 *first_stmt_dt1 = dt[i];
2554 if (def)
2555 *first_stmt_def1_type = TREE_TYPE (def);
2556 else
2558 /* We assume that the stmt contains only one constant
2559 operand. We fail otherwise, to be on the safe side. */
2560 if (*first_stmt_const_oprnd)
2562 if (vect_print_dump_info (REPORT_SLP))
2563 fprintf (vect_dump, "Build SLP failed: two constant "
2564 "oprnds in stmt");
2565 return false;
2567 *first_stmt_const_oprnd = oprnd;
2570 else
2572 /* Not first stmt of the group, check that the def-stmt/s match
2573 the def-stmt/s of the first stmt. */
2574 if ((i == 0
2575 && (*first_stmt_dt0 != dt[i]
2576 || (*first_stmt_def0_type && def
2577 && *first_stmt_def0_type != TREE_TYPE (def))))
2578 || (i == 1
2579 && (*first_stmt_dt1 != dt[i]
2580 || (*first_stmt_def1_type && def
2581 && *first_stmt_def1_type != TREE_TYPE (def))))
2582 || (!def
2583 && TREE_TYPE (*first_stmt_const_oprnd)
2584 != TREE_TYPE (oprnd)))
2586 if (vect_print_dump_info (REPORT_SLP))
2587 fprintf (vect_dump, "Build SLP failed: different types ");
2589 return false;
2594 /* Check the types of the definitions. */
2595 switch (dt[i])
2597 case vect_constant_def:
2598 case vect_invariant_def:
2599 break;
2601 case vect_loop_def:
2602 if (i == 0)
2603 VEC_safe_push (tree, heap, *def_stmts0, def_stmt);
2604 else
2605 VEC_safe_push (tree, heap, *def_stmts1, def_stmt);
2606 break;
2608 default:
2609 /* FORNOW: Not supported. */
2610 if (vect_print_dump_info (REPORT_SLP))
2612 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2613 print_generic_expr (vect_dump, def, TDF_SLIM);
2616 return false;
2620 return true;
2624 /* Recursively build an SLP tree starting from NODE.
2625 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2626 permutation or are of unsupported types of operation. Otherwise, return
2627 TRUE.
2628 SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2629 in the case of multiple types for now. */
2631 static bool
2632 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2633 unsigned int group_size, bool *slp_impossible,
2634 int *inside_cost, int *outside_cost,
2635 int ncopies_for_cost)
2637 VEC (tree, heap) *def_stmts0 = VEC_alloc (tree, heap, group_size);
2638 VEC (tree, heap) *def_stmts1 = VEC_alloc (tree, heap, group_size);
2639 unsigned int i;
2640 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2641 tree stmt = VEC_index (tree, stmts, 0);
2642 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2643 enum tree_code first_stmt_code = 0;
2644 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2645 tree lhs, rhs, prev_stmt = NULL_TREE;
2646 bool stop_recursion = false, need_same_oprnds = false;
2647 tree vectype, scalar_type, first_op1 = NULL_TREE;
2648 unsigned int vectorization_factor = 0, ncopies;
2649 optab optab;
2650 int icode;
2651 enum machine_mode optab_op2_mode;
2652 enum machine_mode vec_mode;
2653 tree first_stmt_const_oprnd = NULL_TREE;
2654 struct data_reference *first_dr;
2656 /* For every stmt in NODE find its def stmt/s. */
2657 for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
2659 if (vect_print_dump_info (REPORT_SLP))
2661 fprintf (vect_dump, "Build SLP for ");
2662 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2665 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2667 if (vect_print_dump_info (REPORT_SLP))
2669 fprintf (vect_dump, "Build SLP failed: not MODIFY_STMT ");
2670 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2673 return false;
2676 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2677 vectype = get_vectype_for_scalar_type (scalar_type);
2678 if (!vectype)
2680 if (vect_print_dump_info (REPORT_SLP))
2682 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
2683 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2685 return false;
2688 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2689 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2690 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2691 if (ncopies > 1)
2693 /* FORNOW. */
2694 if (vect_print_dump_info (REPORT_SLP))
2695 fprintf (vect_dump, "SLP failed - multiple types ");
2697 *slp_impossible = true;
2698 return false;
2701 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
2702 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2704 /* Check the operation. */
2705 if (i == 0)
2707 first_stmt_code = TREE_CODE (rhs);
2709 /* Shift arguments should be equal in all the packed stmts for a
2710 vector shift with scalar shift operand. */
2711 if (TREE_CODE (rhs) == LSHIFT_EXPR || TREE_CODE (rhs) == RSHIFT_EXPR
2712 || TREE_CODE (rhs) == LROTATE_EXPR
2713 || TREE_CODE (rhs) == RROTATE_EXPR)
2715 vec_mode = TYPE_MODE (vectype);
2717 /* First see if we have a vector/vector shift. */
2718 optab = optab_for_tree_code (TREE_CODE (rhs), vectype,
2719 optab_vector);
2721 if (!optab
2722 || (optab->handlers[(int) vec_mode].insn_code
2723 == CODE_FOR_nothing))
2725 /* No vector/vector shift, try for a vector/scalar shift. */
2726 optab = optab_for_tree_code (TREE_CODE (rhs), vectype,
2727 optab_scalar);
2729 if (!optab)
2731 if (vect_print_dump_info (REPORT_SLP))
2732 fprintf (vect_dump, "Build SLP failed: no optab.");
2733 return false;
2735 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2736 if (icode == CODE_FOR_nothing)
2738 if (vect_print_dump_info (REPORT_SLP))
2739 fprintf (vect_dump,
2740 "Build SLP failed: op not supported by target.");
2741 return false;
2743 optab_op2_mode = insn_data[icode].operand[2].mode;
2744 if (!VECTOR_MODE_P (optab_op2_mode))
2746 need_same_oprnds = true;
2747 first_op1 = TREE_OPERAND (rhs, 1);
2752 else
2754 if (first_stmt_code != TREE_CODE (rhs))
2756 if (vect_print_dump_info (REPORT_SLP))
2758 fprintf (vect_dump,
2759 "Build SLP failed: different operation in stmt ");
2760 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2763 return false;
2766 if (need_same_oprnds
2767 && !operand_equal_p (first_op1, TREE_OPERAND (rhs, 1), 0))
2769 if (vect_print_dump_info (REPORT_SLP))
2771 fprintf (vect_dump,
2772 "Build SLP failed: different shift arguments in ");
2773 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2776 return false;
2780 /* Strided store or load. */
2781 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2783 if (REFERENCE_CLASS_P (lhs))
2785 /* Store. */
2786 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs,
2787 &def_stmts0, &def_stmts1,
2788 &first_stmt_dt0,
2789 &first_stmt_dt1,
2790 &first_stmt_def0_type,
2791 &first_stmt_def1_type,
2792 &first_stmt_const_oprnd,
2793 ncopies_for_cost))
2794 return false;
2796 else
2798 /* Load. */
2799 if (i == 0)
2801 /* First stmt of the SLP group should be the first load of
2802 the interleaving loop if data permutation is not allowed.
2803 Check that there is no gap between the loads. */
2804 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
2805 || DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
2807 /* FORNOW: data permutations and gaps in loads are not
2808 supported. */
2809 if (vect_print_dump_info (REPORT_SLP))
2811 fprintf (vect_dump, "Build SLP failed: strided "
2812 " loads need permutation or have gaps ");
2813 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2816 return false;
2819 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2820 if (vect_supportable_dr_alignment (first_dr)
2821 == dr_unaligned_unsupported)
2823 if (vect_print_dump_info (REPORT_SLP))
2825 fprintf (vect_dump, "Build SLP failed: unsupported "
2826 " unaligned load ");
2827 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2830 return false;
2833 /* Analyze costs (for the first stmt in the group). */
2834 vect_model_load_cost (vinfo_for_stmt (stmt),
2835 ncopies_for_cost, *node);
2837 else
2839 /* Check that we have consecutive loads from interleaving
2840 chain and that there is no gap between the loads. */
2841 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt
2842 || DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1)
2844 /* FORNOW: data permutations and gaps in loads are not
2845 supported. */
2846 if (vect_print_dump_info (REPORT_SLP))
2848 fprintf (vect_dump, "Build SLP failed: strided "
2849 " loads need permutation or have gaps ");
2850 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2852 return false;
2856 prev_stmt = stmt;
2858 /* We stop the tree when we reach a group of loads. */
2859 stop_recursion = true;
2860 continue;
2862 } /* Strided access. */
2863 else
2865 if (REFERENCE_CLASS_P (rhs))
2867 /* Not strided load. */
2868 if (vect_print_dump_info (REPORT_SLP))
2870 fprintf (vect_dump, "Build SLP failed: not strided load ");
2871 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2874 /* FORNOW: Not strided loads are not supported. */
2875 return false;
2878 /* Not memory operation. */
2879 if (!BINARY_CLASS_P (rhs) && !UNARY_CLASS_P (rhs))
2881 if (vect_print_dump_info (REPORT_SLP))
2883 fprintf (vect_dump, "Build SLP failed: operation");
2884 fprintf (vect_dump, " unsupported ");
2885 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2888 return false;
2891 /* Find the def-stmts. */
2892 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, &def_stmts0,
2893 &def_stmts1, &first_stmt_dt0,
2894 &first_stmt_dt1,
2895 &first_stmt_def0_type,
2896 &first_stmt_def1_type,
2897 &first_stmt_const_oprnd,
2898 ncopies_for_cost))
2899 return false;
2903 /* Add the costs of the node to the overall instance costs. */
2904 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
2905 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
2907 /* Strided loads were reached - stop the recursion. */
2908 if (stop_recursion)
2909 return true;
2911 /* Create SLP_TREE nodes for the definition node/s. */
2912 if (first_stmt_dt0 == vect_loop_def)
2914 slp_tree left_node = XNEW (struct _slp_tree);
2915 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
2916 SLP_TREE_VEC_STMTS (left_node) = NULL;
2917 SLP_TREE_LEFT (left_node) = NULL;
2918 SLP_TREE_RIGHT (left_node) = NULL;
2919 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
2920 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
2921 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
2922 slp_impossible, inside_cost, outside_cost,
2923 ncopies_for_cost))
2924 return false;
2926 SLP_TREE_LEFT (*node) = left_node;
2929 if (first_stmt_dt1 == vect_loop_def)
2931 slp_tree right_node = XNEW (struct _slp_tree);
2932 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
2933 SLP_TREE_VEC_STMTS (right_node) = NULL;
2934 SLP_TREE_LEFT (right_node) = NULL;
2935 SLP_TREE_RIGHT (right_node) = NULL;
2936 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
2937 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
2938 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
2939 slp_impossible, inside_cost, outside_cost,
2940 ncopies_for_cost))
2941 return false;
2943 SLP_TREE_RIGHT (*node) = right_node;
2946 return true;
2950 static void
2951 vect_print_slp_tree (slp_tree node)
2953 int i;
2954 tree stmt;
2956 if (!node)
2957 return;
2959 fprintf (vect_dump, "node ");
2960 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2962 fprintf (vect_dump, "\n\tstmt %d ", i);
2963 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2965 fprintf (vect_dump, "\n");
2967 vect_print_slp_tree (SLP_TREE_LEFT (node));
2968 vect_print_slp_tree (SLP_TREE_RIGHT (node));
2972 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2973 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2974 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2975 stmts in NODE are to be marked. */
2977 static void
2978 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
2980 int i;
2981 tree stmt;
2983 if (!node)
2984 return;
2986 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2987 if (j < 0 || i == j)
2988 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
2990 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
2991 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
2995 /* Analyze an SLP instance starting from a group of strided stores. Call
2996 vect_build_slp_tree to build a tree of packed stmts if possible.
2997 Return FALSE if it's impossible to SLP any stmt in the loop. */
2999 static bool
3000 vect_analyze_slp_instance (loop_vec_info loop_vinfo, tree stmt)
3002 slp_instance new_instance;
3003 slp_tree node = XNEW (struct _slp_tree);
3004 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
3005 unsigned int unrolling_factor = 1, nunits;
3006 tree vectype, scalar_type, next;
3007 unsigned int vectorization_factor = 0, ncopies;
3008 bool slp_impossible = false;
3009 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
3011 /* FORNOW: multiple types are not supported. */
3012 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
3013 vectype = get_vectype_for_scalar_type (scalar_type);
3014 if (!vectype)
3016 if (vect_print_dump_info (REPORT_SLP))
3018 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
3019 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3021 return false;
3024 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3025 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3026 ncopies = vectorization_factor / nunits;
3027 if (ncopies > 1)
3029 if (vect_print_dump_info (REPORT_SLP))
3030 fprintf (vect_dump, "SLP failed - multiple types ");
3032 return false;
3035 /* Create a node (a root of the SLP tree) for the packed strided stores. */
3036 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (tree, heap, group_size);
3037 next = stmt;
3038 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
3039 while (next)
3041 VEC_safe_push (tree, heap, SLP_TREE_SCALAR_STMTS (node), next);
3042 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
3045 SLP_TREE_VEC_STMTS (node) = NULL;
3046 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
3047 SLP_TREE_LEFT (node) = NULL;
3048 SLP_TREE_RIGHT (node) = NULL;
3049 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
3050 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
3052 /* Calculate the unrolling factor. */
3053 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
3055 /* Calculate the number of vector stmts to create based on the unrolling
3056 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
3057 GROUP_SIZE / NUNITS otherwise. */
3058 ncopies_for_cost = unrolling_factor * group_size / nunits;
3060 /* Build the tree for the SLP instance. */
3061 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
3062 &inside_cost, &outside_cost, ncopies_for_cost))
3064 /* Create a new SLP instance. */
3065 new_instance = XNEW (struct _slp_instance);
3066 SLP_INSTANCE_TREE (new_instance) = node;
3067 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
3068 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
3069 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
3070 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
3071 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
3072 new_instance);
3073 if (vect_print_dump_info (REPORT_SLP))
3074 vect_print_slp_tree (node);
3076 return true;
3079 /* Failed to SLP. */
3080 /* Free the allocated memory. */
3081 vect_free_slp_tree (node);
3083 if (slp_impossible)
3084 return false;
3086 /* SLP failed for this instance, but it is still possible to SLP other stmts
3087 in the loop. */
3088 return true;
3092 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3093 trees of packed scalar stmts if SLP is possible. */
3095 static bool
3096 vect_analyze_slp (loop_vec_info loop_vinfo)
3098 unsigned int i;
3099 VEC (tree, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
3100 tree store;
3102 if (vect_print_dump_info (REPORT_SLP))
3103 fprintf (vect_dump, "=== vect_analyze_slp ===");
3105 for (i = 0; VEC_iterate (tree, strided_stores, i, store); i++)
3106 if (!vect_analyze_slp_instance (loop_vinfo, store))
3108 /* SLP failed. No instance can be SLPed in the loop. */
3109 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3110 fprintf (vect_dump, "SLP failed.");
3112 return false;
3115 return true;
3119 /* For each possible SLP instance decide whether to SLP it and calculate overall
3120 unrolling factor needed to SLP the loop. */
3122 static void
3123 vect_make_slp_decision (loop_vec_info loop_vinfo)
3125 unsigned int i, unrolling_factor = 1;
3126 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3127 slp_instance instance;
3128 int decided_to_slp = 0;
3130 if (vect_print_dump_info (REPORT_SLP))
3131 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3133 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3135 /* FORNOW: SLP if you can. */
3136 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3137 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3139 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3140 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3141 loop-based vectorization. Such stmts will be marked as HYBRID. */
3142 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3143 decided_to_slp++;
3146 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3148 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3149 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3150 decided_to_slp, unrolling_factor);
3154 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3155 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3157 static void
3158 vect_detect_hybrid_slp_stmts (slp_tree node)
3160 int i;
3161 tree stmt;
3162 imm_use_iterator imm_iter;
3163 tree use_stmt;
3165 if (!node)
3166 return;
3168 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3169 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3170 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
3171 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
3172 if (vinfo_for_stmt (use_stmt)
3173 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
3174 vect_mark_slp_stmts (node, hybrid, i);
3176 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3177 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3181 /* Find stmts that must be both vectorized and SLPed. */
3183 static void
3184 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3186 unsigned int i;
3187 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3188 slp_instance instance;
3190 if (vect_print_dump_info (REPORT_SLP))
3191 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3193 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3194 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3198 /* Function vect_analyze_data_refs.
3200 Find all the data references in the loop.
3202 The general structure of the analysis of data refs in the vectorizer is as
3203 follows:
3204 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3205 find and analyze all data-refs in the loop and their dependences.
3206 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3207 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3208 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3212 static bool
3213 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3215 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3216 unsigned int i;
3217 VEC (data_reference_p, heap) *datarefs;
3218 struct data_reference *dr;
3219 tree scalar_type;
3221 if (vect_print_dump_info (REPORT_DETAILS))
3222 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3224 compute_data_dependences_for_loop (loop, true,
3225 &LOOP_VINFO_DATAREFS (loop_vinfo),
3226 &LOOP_VINFO_DDRS (loop_vinfo));
3228 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3229 from stmt_vec_info struct to DR and vectype. */
3230 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3232 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3234 tree stmt;
3235 stmt_vec_info stmt_info;
3236 basic_block bb;
3237 tree base, offset, init;
3239 if (!dr || !DR_REF (dr))
3241 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3242 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3243 return false;
3246 stmt = DR_STMT (dr);
3247 stmt_info = vinfo_for_stmt (stmt);
3249 /* Check that analysis of the data-ref succeeded. */
3250 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3251 || !DR_STEP (dr))
3253 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3255 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3256 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3258 return false;
3261 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3263 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3264 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3265 "constant");
3266 return false;
3269 if (!DR_SYMBOL_TAG (dr))
3271 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3273 fprintf (vect_dump, "not vectorized: no memory tag for ");
3274 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3276 return false;
3279 base = unshare_expr (DR_BASE_ADDRESS (dr));
3280 offset = unshare_expr (DR_OFFSET (dr));
3281 init = unshare_expr (DR_INIT (dr));
3283 /* Update DR field in stmt_vec_info struct. */
3284 bb = bb_for_stmt (stmt);
3286 /* If the dataref is in an inner-loop of the loop that is considered for
3287 for vectorization, we also want to analyze the access relative to
3288 the outer-loop (DR contains information only relative to the
3289 inner-most enclosing loop). We do that by building a reference to the
3290 first location accessed by the inner-loop, and analyze it relative to
3291 the outer-loop. */
3292 if (nested_in_vect_loop_p (loop, stmt))
3294 tree outer_step, outer_base, outer_init;
3295 HOST_WIDE_INT pbitsize, pbitpos;
3296 tree poffset;
3297 enum machine_mode pmode;
3298 int punsignedp, pvolatilep;
3299 affine_iv base_iv, offset_iv;
3300 tree dinit;
3302 /* Build a reference to the first location accessed by the
3303 inner-loop: *(BASE+INIT). (The first location is actually
3304 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3305 tree inner_base = build_fold_indirect_ref
3306 (fold_build2 (POINTER_PLUS_EXPR,
3307 TREE_TYPE (base), base,
3308 fold_convert (sizetype, init)));
3310 if (vect_print_dump_info (REPORT_DETAILS))
3312 fprintf (dump_file, "analyze in outer-loop: ");
3313 print_generic_expr (dump_file, inner_base, TDF_SLIM);
3316 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3317 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3318 gcc_assert (outer_base != NULL_TREE);
3320 if (pbitpos % BITS_PER_UNIT != 0)
3322 if (vect_print_dump_info (REPORT_DETAILS))
3323 fprintf (dump_file, "failed: bit offset alignment.\n");
3324 return false;
3327 outer_base = build_fold_addr_expr (outer_base);
3328 if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
3330 if (vect_print_dump_info (REPORT_DETAILS))
3331 fprintf (dump_file, "failed: evolution of base is not affine.\n");
3332 return false;
3335 if (offset)
3337 if (poffset)
3338 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3339 else
3340 poffset = offset;
3343 if (!poffset)
3345 offset_iv.base = ssize_int (0);
3346 offset_iv.step = ssize_int (0);
3348 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
3350 if (vect_print_dump_info (REPORT_DETAILS))
3351 fprintf (dump_file, "evolution of offset is not affine.\n");
3352 return false;
3355 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3356 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3357 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3358 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3359 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3361 outer_step = size_binop (PLUS_EXPR,
3362 fold_convert (ssizetype, base_iv.step),
3363 fold_convert (ssizetype, offset_iv.step));
3365 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3366 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3367 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3368 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3369 STMT_VINFO_DR_OFFSET (stmt_info) =
3370 fold_convert (ssizetype, offset_iv.base);
3371 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3372 size_int (highest_pow2_factor (offset_iv.base));
3374 if (dump_file && (dump_flags & TDF_DETAILS))
3376 fprintf (dump_file, "\touter base_address: ");
3377 print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3378 fprintf (dump_file, "\n\touter offset from base address: ");
3379 print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3380 fprintf (dump_file, "\n\touter constant offset from base address: ");
3381 print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3382 fprintf (dump_file, "\n\touter step: ");
3383 print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3384 fprintf (dump_file, "\n\touter aligned to: ");
3385 print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3389 if (STMT_VINFO_DATA_REF (stmt_info))
3391 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3393 fprintf (vect_dump,
3394 "not vectorized: more than one data ref in stmt: ");
3395 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3397 return false;
3399 STMT_VINFO_DATA_REF (stmt_info) = dr;
3401 /* Set vectype for STMT. */
3402 scalar_type = TREE_TYPE (DR_REF (dr));
3403 STMT_VINFO_VECTYPE (stmt_info) =
3404 get_vectype_for_scalar_type (scalar_type);
3405 if (!STMT_VINFO_VECTYPE (stmt_info))
3407 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3409 fprintf (vect_dump,
3410 "not vectorized: no vectype for stmt: ");
3411 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3412 fprintf (vect_dump, " scalar_type: ");
3413 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3415 return false;
3419 return true;
3423 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3425 /* Function vect_mark_relevant.
3427 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3429 static void
3430 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
3431 enum vect_relevant relevant, bool live_p)
3433 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3434 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3435 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3437 if (vect_print_dump_info (REPORT_DETAILS))
3438 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3440 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3442 tree pattern_stmt;
3444 /* This is the last stmt in a sequence that was detected as a
3445 pattern that can potentially be vectorized. Don't mark the stmt
3446 as relevant/live because it's not going to be vectorized.
3447 Instead mark the pattern-stmt that replaces it. */
3449 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3451 if (vect_print_dump_info (REPORT_DETAILS))
3452 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3453 stmt_info = vinfo_for_stmt (pattern_stmt);
3454 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3455 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3456 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3457 stmt = pattern_stmt;
3460 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3461 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3462 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3464 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3465 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3467 if (vect_print_dump_info (REPORT_DETAILS))
3468 fprintf (vect_dump, "already marked relevant/live.");
3469 return;
3472 VEC_safe_push (tree, heap, *worklist, stmt);
3476 /* Function vect_stmt_relevant_p.
3478 Return true if STMT in loop that is represented by LOOP_VINFO is
3479 "relevant for vectorization".
3481 A stmt is considered "relevant for vectorization" if:
3482 - it has uses outside the loop.
3483 - it has vdefs (it alters memory).
3484 - control stmts in the loop (except for the exit condition).
3486 CHECKME: what other side effects would the vectorizer allow? */
3488 static bool
3489 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
3490 enum vect_relevant *relevant, bool *live_p)
3492 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3493 ssa_op_iter op_iter;
3494 imm_use_iterator imm_iter;
3495 use_operand_p use_p;
3496 def_operand_p def_p;
3498 *relevant = vect_unused_in_loop;
3499 *live_p = false;
3501 /* cond stmt other than loop exit cond. */
3502 if (is_ctrl_stmt (stmt)
3503 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3504 *relevant = vect_used_in_loop;
3506 /* changing memory. */
3507 if (TREE_CODE (stmt) != PHI_NODE)
3508 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3510 if (vect_print_dump_info (REPORT_DETAILS))
3511 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3512 *relevant = vect_used_in_loop;
3515 /* uses outside the loop. */
3516 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3518 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3520 basic_block bb = bb_for_stmt (USE_STMT (use_p));
3521 if (!flow_bb_inside_loop_p (loop, bb))
3523 if (vect_print_dump_info (REPORT_DETAILS))
3524 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3526 /* We expect all such uses to be in the loop exit phis
3527 (because of loop closed form) */
3528 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
3529 gcc_assert (bb == single_exit (loop)->dest);
3531 *live_p = true;
3536 return (*live_p || *relevant);
3541 Function process_use.
3543 Inputs:
3544 - a USE in STMT in a loop represented by LOOP_VINFO
3545 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3546 that defined USE. This is done by calling mark_relevant and passing it
3547 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
3549 Outputs:
3550 Generally, LIVE_P and RELEVANT are used to define the liveness and
3551 relevance info of the DEF_STMT of this USE:
3552 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3553 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3554 Exceptions:
3555 - case 1: If USE is used only for address computations (e.g. array indexing),
3556 which does not need to be directly vectorized, then the liveness/relevance
3557 of the respective DEF_STMT is left unchanged.
3558 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3559 skip DEF_STMT cause it had already been processed.
3560 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3561 be modified accordingly.
3563 Return true if everything is as expected. Return false otherwise. */
3565 static bool
3566 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3567 enum vect_relevant relevant, VEC(tree,heap) **worklist)
3569 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3570 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3571 stmt_vec_info dstmt_vinfo;
3572 basic_block bb, def_bb;
3573 tree def, def_stmt;
3574 enum vect_def_type dt;
3576 /* case 1: we are only interested in uses that need to be vectorized. Uses
3577 that are used for address computation are not considered relevant. */
3578 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3579 return true;
3581 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3583 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3584 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3585 return false;
3588 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
3589 return true;
3591 def_bb = bb_for_stmt (def_stmt);
3592 if (!flow_bb_inside_loop_p (loop, def_bb))
3594 if (vect_print_dump_info (REPORT_DETAILS))
3595 fprintf (vect_dump, "def_stmt is out of loop.");
3596 return true;
3599 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3600 DEF_STMT must have already been processed, because this should be the
3601 only way that STMT, which is a reduction-phi, was put in the worklist,
3602 as there should be no other uses for DEF_STMT in the loop. So we just
3603 check that everything is as expected, and we are done. */
3604 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3605 bb = bb_for_stmt (stmt);
3606 if (TREE_CODE (stmt) == PHI_NODE
3607 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3608 && TREE_CODE (def_stmt) != PHI_NODE
3609 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3610 && bb->loop_father == def_bb->loop_father)
3612 if (vect_print_dump_info (REPORT_DETAILS))
3613 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3614 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3615 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3616 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3617 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3618 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3619 return true;
3622 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3623 outer-loop-header-bb:
3624 d = def_stmt
3625 inner-loop:
3626 stmt # use (d)
3627 outer-loop-tail-bb:
3628 ... */
3629 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3631 if (vect_print_dump_info (REPORT_DETAILS))
3632 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3633 switch (relevant)
3635 case vect_unused_in_loop:
3636 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3637 vect_used_by_reduction : vect_unused_in_loop;
3638 break;
3639 case vect_used_in_outer_by_reduction:
3640 relevant = vect_used_by_reduction;
3641 break;
3642 case vect_used_in_outer:
3643 relevant = vect_used_in_loop;
3644 break;
3645 case vect_used_by_reduction:
3646 case vect_used_in_loop:
3647 break;
3649 default:
3650 gcc_unreachable ();
3654 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3655 outer-loop-header-bb:
3657 inner-loop:
3658 d = def_stmt
3659 outer-loop-tail-bb:
3660 stmt # use (d) */
3661 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3663 if (vect_print_dump_info (REPORT_DETAILS))
3664 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3665 switch (relevant)
3667 case vect_unused_in_loop:
3668 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3669 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3670 break;
3672 case vect_used_in_outer_by_reduction:
3673 case vect_used_in_outer:
3674 break;
3676 case vect_used_by_reduction:
3677 relevant = vect_used_in_outer_by_reduction;
3678 break;
3680 case vect_used_in_loop:
3681 relevant = vect_used_in_outer;
3682 break;
3684 default:
3685 gcc_unreachable ();
3689 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
3690 return true;
3694 /* Function vect_mark_stmts_to_be_vectorized.
3696 Not all stmts in the loop need to be vectorized. For example:
3698 for i...
3699 for j...
3700 1. T0 = i + j
3701 2. T1 = a[T0]
3703 3. j = j + 1
3705 Stmt 1 and 3 do not need to be vectorized, because loop control and
3706 addressing of vectorized data-refs are handled differently.
3708 This pass detects such stmts. */
3710 static bool
3711 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3713 VEC(tree,heap) *worklist;
3714 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3715 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3716 unsigned int nbbs = loop->num_nodes;
3717 block_stmt_iterator si;
3718 tree stmt;
3719 stmt_ann_t ann;
3720 unsigned int i;
3721 stmt_vec_info stmt_vinfo;
3722 basic_block bb;
3723 tree phi;
3724 bool live_p;
3725 enum vect_relevant relevant;
3727 if (vect_print_dump_info (REPORT_DETAILS))
3728 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
3730 worklist = VEC_alloc (tree, heap, 64);
3732 /* 1. Init worklist. */
3733 for (i = 0; i < nbbs; i++)
3735 bb = bbs[i];
3736 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3738 if (vect_print_dump_info (REPORT_DETAILS))
3740 fprintf (vect_dump, "init: phi relevant? ");
3741 print_generic_expr (vect_dump, phi, TDF_SLIM);
3744 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
3745 vect_mark_relevant (&worklist, phi, relevant, live_p);
3747 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3749 stmt = bsi_stmt (si);
3750 if (vect_print_dump_info (REPORT_DETAILS))
3752 fprintf (vect_dump, "init: stmt relevant? ");
3753 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3756 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
3757 vect_mark_relevant (&worklist, stmt, relevant, live_p);
3761 /* 2. Process_worklist */
3762 while (VEC_length (tree, worklist) > 0)
3764 use_operand_p use_p;
3765 ssa_op_iter iter;
3767 stmt = VEC_pop (tree, worklist);
3768 if (vect_print_dump_info (REPORT_DETAILS))
3770 fprintf (vect_dump, "worklist: examine stmt: ");
3771 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3774 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3775 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3776 liveness and relevance properties of STMT. */
3777 ann = stmt_ann (stmt);
3778 stmt_vinfo = vinfo_for_stmt (stmt);
3779 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
3780 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
3782 /* Generally, the liveness and relevance properties of STMT are
3783 propagated as is to the DEF_STMTs of its USEs:
3784 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3785 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3787 One exception is when STMT has been identified as defining a reduction
3788 variable; in this case we set the liveness/relevance as follows:
3789 live_p = false
3790 relevant = vect_used_by_reduction
3791 This is because we distinguish between two kinds of relevant stmts -
3792 those that are used by a reduction computation, and those that are
3793 (also) used by a regular computation. This allows us later on to
3794 identify stmts that are used solely by a reduction, and therefore the
3795 order of the results that they produce does not have to be kept.
3797 Reduction phis are expected to be used by a reduction stmt, or by
3798 in an outer loop; Other reduction stmts are expected to be
3799 in the loop, and possibly used by a stmt in an outer loop.
3800 Here are the expected values of "relevant" for reduction phis/stmts:
3802 relevance: phi stmt
3803 vect_unused_in_loop ok
3804 vect_used_in_outer_by_reduction ok ok
3805 vect_used_in_outer ok ok
3806 vect_used_by_reduction ok
3807 vect_used_in_loop */
3809 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
3811 enum vect_relevant tmp_relevant = relevant;
3812 switch (tmp_relevant)
3814 case vect_unused_in_loop:
3815 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
3816 relevant = vect_used_by_reduction;
3817 break;
3819 case vect_used_in_outer_by_reduction:
3820 case vect_used_in_outer:
3821 gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR
3822 && TREE_CODE (stmt) != DOT_PROD_EXPR);
3823 break;
3825 case vect_used_by_reduction:
3826 if (TREE_CODE (stmt) == PHI_NODE)
3827 break;
3828 /* fall through */
3829 case vect_used_in_loop:
3830 default:
3831 if (vect_print_dump_info (REPORT_DETAILS))
3832 fprintf (vect_dump, "unsupported use of reduction.");
3833 VEC_free (tree, heap, worklist);
3834 return false;
3836 live_p = false;
3839 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3841 tree op = USE_FROM_PTR (use_p);
3842 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
3844 VEC_free (tree, heap, worklist);
3845 return false;
3848 } /* while worklist */
3850 VEC_free (tree, heap, worklist);
3851 return true;
3855 /* Function vect_can_advance_ivs_p
3857 In case the number of iterations that LOOP iterates is unknown at compile
3858 time, an epilog loop will be generated, and the loop induction variables
3859 (IVs) will be "advanced" to the value they are supposed to take just before
3860 the epilog loop. Here we check that the access function of the loop IVs
3861 and the expression that represents the loop bound are simple enough.
3862 These restrictions will be relaxed in the future. */
3864 static bool
3865 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
3867 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3868 basic_block bb = loop->header;
3869 tree phi;
3871 /* Analyze phi functions of the loop header. */
3873 if (vect_print_dump_info (REPORT_DETAILS))
3874 fprintf (vect_dump, "vect_can_advance_ivs_p:");
3876 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3878 tree access_fn = NULL;
3879 tree evolution_part;
3881 if (vect_print_dump_info (REPORT_DETAILS))
3883 fprintf (vect_dump, "Analyze phi: ");
3884 print_generic_expr (vect_dump, phi, TDF_SLIM);
3887 /* Skip virtual phi's. The data dependences that are associated with
3888 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3890 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3892 if (vect_print_dump_info (REPORT_DETAILS))
3893 fprintf (vect_dump, "virtual phi. skip.");
3894 continue;
3897 /* Skip reduction phis. */
3899 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3901 if (vect_print_dump_info (REPORT_DETAILS))
3902 fprintf (vect_dump, "reduc phi. skip.");
3903 continue;
3906 /* Analyze the evolution function. */
3908 access_fn = instantiate_parameters
3909 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
3911 if (!access_fn)
3913 if (vect_print_dump_info (REPORT_DETAILS))
3914 fprintf (vect_dump, "No Access function.");
3915 return false;
3918 if (vect_print_dump_info (REPORT_DETAILS))
3920 fprintf (vect_dump, "Access function of PHI: ");
3921 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
3924 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
3926 if (evolution_part == NULL_TREE)
3928 if (vect_print_dump_info (REPORT_DETAILS))
3929 fprintf (vect_dump, "No evolution.");
3930 return false;
3933 /* FORNOW: We do not transform initial conditions of IVs
3934 which evolution functions are a polynomial of degree >= 2. */
3936 if (tree_is_chrec (evolution_part))
3937 return false;
3940 return true;
3944 /* Function vect_get_loop_niters.
3946 Determine how many iterations the loop is executed.
3947 If an expression that represents the number of iterations
3948 can be constructed, place it in NUMBER_OF_ITERATIONS.
3949 Return the loop exit condition. */
3951 static tree
3952 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
3954 tree niters;
3956 if (vect_print_dump_info (REPORT_DETAILS))
3957 fprintf (vect_dump, "=== get_loop_niters ===");
3959 niters = number_of_exit_cond_executions (loop);
3961 if (niters != NULL_TREE
3962 && niters != chrec_dont_know)
3964 *number_of_iterations = niters;
3966 if (vect_print_dump_info (REPORT_DETAILS))
3968 fprintf (vect_dump, "==> get_loop_niters:" );
3969 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
3973 return get_loop_exit_condition (loop);
3977 /* Function vect_analyze_loop_1.
3979 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3980 for it. The different analyses will record information in the
3981 loop_vec_info struct. This is a subset of the analyses applied in
3982 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3983 that is now considered for (outer-loop) vectorization. */
3985 static loop_vec_info
3986 vect_analyze_loop_1 (struct loop *loop)
3988 loop_vec_info loop_vinfo;
3990 if (vect_print_dump_info (REPORT_DETAILS))
3991 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
3993 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3995 loop_vinfo = vect_analyze_loop_form (loop);
3996 if (!loop_vinfo)
3998 if (vect_print_dump_info (REPORT_DETAILS))
3999 fprintf (vect_dump, "bad inner-loop form.");
4000 return NULL;
4003 return loop_vinfo;
4007 /* Function vect_analyze_loop_form.
4009 Verify that certain CFG restrictions hold, including:
4010 - the loop has a pre-header
4011 - the loop has a single entry and exit
4012 - the loop exit condition is simple enough, and the number of iterations
4013 can be analyzed (a countable loop). */
4015 loop_vec_info
4016 vect_analyze_loop_form (struct loop *loop)
4018 loop_vec_info loop_vinfo;
4019 tree loop_cond;
4020 tree number_of_iterations = NULL;
4021 loop_vec_info inner_loop_vinfo = NULL;
4023 if (vect_print_dump_info (REPORT_DETAILS))
4024 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
4026 /* Different restrictions apply when we are considering an inner-most loop,
4027 vs. an outer (nested) loop.
4028 (FORNOW. May want to relax some of these restrictions in the future). */
4030 if (!loop->inner)
4032 /* Inner-most loop. We currently require that the number of BBs is
4033 exactly 2 (the header and latch). Vectorizable inner-most loops
4034 look like this:
4036 (pre-header)
4038 header <--------+
4039 | | |
4040 | +--> latch --+
4042 (exit-bb) */
4044 if (loop->num_nodes != 2)
4046 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4047 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4048 return NULL;
4051 if (empty_block_p (loop->header))
4053 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4054 fprintf (vect_dump, "not vectorized: empty loop.");
4055 return NULL;
4058 else
4060 struct loop *innerloop = loop->inner;
4061 edge backedge, entryedge;
4063 /* Nested loop. We currently require that the loop is doubly-nested,
4064 contains a single inner loop, and the number of BBs is exactly 5.
4065 Vectorizable outer-loops look like this:
4067 (pre-header)
4069 header <---+
4071 inner-loop |
4073 tail ------+
4075 (exit-bb)
4077 The inner-loop has the properties expected of inner-most loops
4078 as described above. */
4080 if ((loop->inner)->inner || (loop->inner)->next)
4082 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4083 fprintf (vect_dump, "not vectorized: multiple nested loops.");
4084 return NULL;
4087 /* Analyze the inner-loop. */
4088 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
4089 if (!inner_loop_vinfo)
4091 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4092 fprintf (vect_dump, "not vectorized: Bad inner loop.");
4093 return NULL;
4096 if (!expr_invariant_in_loop_p (loop,
4097 LOOP_VINFO_NITERS (inner_loop_vinfo)))
4099 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4100 fprintf (vect_dump,
4101 "not vectorized: inner-loop count not invariant.");
4102 destroy_loop_vec_info (inner_loop_vinfo, true);
4103 return NULL;
4106 if (loop->num_nodes != 5)
4108 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4109 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4110 destroy_loop_vec_info (inner_loop_vinfo, true);
4111 return NULL;
4114 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
4115 backedge = EDGE_PRED (innerloop->header, 1);
4116 entryedge = EDGE_PRED (innerloop->header, 0);
4117 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
4119 backedge = EDGE_PRED (innerloop->header, 0);
4120 entryedge = EDGE_PRED (innerloop->header, 1);
4123 if (entryedge->src != loop->header
4124 || !single_exit (innerloop)
4125 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
4127 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4128 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4129 destroy_loop_vec_info (inner_loop_vinfo, true);
4130 return NULL;
4133 if (vect_print_dump_info (REPORT_DETAILS))
4134 fprintf (vect_dump, "Considering outer-loop vectorization.");
4137 if (!single_exit (loop)
4138 || EDGE_COUNT (loop->header->preds) != 2)
4140 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4142 if (!single_exit (loop))
4143 fprintf (vect_dump, "not vectorized: multiple exits.");
4144 else if (EDGE_COUNT (loop->header->preds) != 2)
4145 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4147 if (inner_loop_vinfo)
4148 destroy_loop_vec_info (inner_loop_vinfo, true);
4149 return NULL;
4152 /* We assume that the loop exit condition is at the end of the loop. i.e,
4153 that the loop is represented as a do-while (with a proper if-guard
4154 before the loop if needed), where the loop header contains all the
4155 executable statements, and the latch is empty. */
4156 if (!empty_block_p (loop->latch)
4157 || phi_nodes (loop->latch))
4159 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4160 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4161 if (inner_loop_vinfo)
4162 destroy_loop_vec_info (inner_loop_vinfo, true);
4163 return NULL;
4166 /* Make sure there exists a single-predecessor exit bb: */
4167 if (!single_pred_p (single_exit (loop)->dest))
4169 edge e = single_exit (loop);
4170 if (!(e->flags & EDGE_ABNORMAL))
4172 split_loop_exit_edge (e);
4173 if (vect_print_dump_info (REPORT_DETAILS))
4174 fprintf (vect_dump, "split exit edge.");
4176 else
4178 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4179 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4180 if (inner_loop_vinfo)
4181 destroy_loop_vec_info (inner_loop_vinfo, true);
4182 return NULL;
4186 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4187 if (!loop_cond)
4189 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4190 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4191 if (inner_loop_vinfo)
4192 destroy_loop_vec_info (inner_loop_vinfo, true);
4193 return NULL;
4196 if (!number_of_iterations)
4198 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4199 fprintf (vect_dump,
4200 "not vectorized: number of iterations cannot be computed.");
4201 if (inner_loop_vinfo)
4202 destroy_loop_vec_info (inner_loop_vinfo, true);
4203 return NULL;
4206 if (chrec_contains_undetermined (number_of_iterations))
4208 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4209 fprintf (vect_dump, "Infinite number of iterations.");
4210 if (inner_loop_vinfo)
4211 destroy_loop_vec_info (inner_loop_vinfo, true);
4212 return NULL;
4215 if (!NITERS_KNOWN_P (number_of_iterations))
4217 if (vect_print_dump_info (REPORT_DETAILS))
4219 fprintf (vect_dump, "Symbolic number of iterations is ");
4220 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4223 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4225 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4226 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4227 if (inner_loop_vinfo)
4228 destroy_loop_vec_info (inner_loop_vinfo, false);
4229 return NULL;
4232 loop_vinfo = new_loop_vec_info (loop);
4233 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4234 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
4236 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4238 /* CHECKME: May want to keep it around it in the future. */
4239 if (inner_loop_vinfo)
4240 destroy_loop_vec_info (inner_loop_vinfo, false);
4242 gcc_assert (!loop->aux);
4243 loop->aux = loop_vinfo;
4244 return loop_vinfo;
4248 /* Function vect_analyze_loop.
4250 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4251 for it. The different analyses will record information in the
4252 loop_vec_info struct. */
4253 loop_vec_info
4254 vect_analyze_loop (struct loop *loop)
4256 bool ok;
4257 loop_vec_info loop_vinfo;
4259 if (vect_print_dump_info (REPORT_DETAILS))
4260 fprintf (vect_dump, "===== analyze_loop_nest =====");
4262 if (loop_outer (loop)
4263 && loop_vec_info_for_loop (loop_outer (loop))
4264 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4266 if (vect_print_dump_info (REPORT_DETAILS))
4267 fprintf (vect_dump, "outer-loop already vectorized.");
4268 return NULL;
4271 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4273 loop_vinfo = vect_analyze_loop_form (loop);
4274 if (!loop_vinfo)
4276 if (vect_print_dump_info (REPORT_DETAILS))
4277 fprintf (vect_dump, "bad loop form.");
4278 return NULL;
4281 /* Find all data references in the loop (which correspond to vdefs/vuses)
4282 and analyze their evolution in the loop.
4284 FORNOW: Handle only simple, array references, which
4285 alignment can be forced, and aligned pointer-references. */
4287 ok = vect_analyze_data_refs (loop_vinfo);
4288 if (!ok)
4290 if (vect_print_dump_info (REPORT_DETAILS))
4291 fprintf (vect_dump, "bad data references.");
4292 destroy_loop_vec_info (loop_vinfo, true);
4293 return NULL;
4296 /* Classify all cross-iteration scalar data-flow cycles.
4297 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4299 vect_analyze_scalar_cycles (loop_vinfo);
4301 vect_pattern_recog (loop_vinfo);
4303 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4305 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4306 if (!ok)
4308 if (vect_print_dump_info (REPORT_DETAILS))
4309 fprintf (vect_dump, "unexpected pattern.");
4310 destroy_loop_vec_info (loop_vinfo, true);
4311 return NULL;
4314 /* Analyze the alignment of the data-refs in the loop.
4315 Fail if a data reference is found that cannot be vectorized. */
4317 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4318 if (!ok)
4320 if (vect_print_dump_info (REPORT_DETAILS))
4321 fprintf (vect_dump, "bad data alignment.");
4322 destroy_loop_vec_info (loop_vinfo, true);
4323 return NULL;
4326 ok = vect_determine_vectorization_factor (loop_vinfo);
4327 if (!ok)
4329 if (vect_print_dump_info (REPORT_DETAILS))
4330 fprintf (vect_dump, "can't determine vectorization factor.");
4331 destroy_loop_vec_info (loop_vinfo, true);
4332 return NULL;
4335 /* Analyze data dependences between the data-refs in the loop.
4336 FORNOW: fail at the first data dependence that we encounter. */
4338 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4339 if (!ok)
4341 if (vect_print_dump_info (REPORT_DETAILS))
4342 fprintf (vect_dump, "bad data dependence.");
4343 destroy_loop_vec_info (loop_vinfo, true);
4344 return NULL;
4347 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4348 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4350 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4351 if (!ok)
4353 if (vect_print_dump_info (REPORT_DETAILS))
4354 fprintf (vect_dump, "bad data access.");
4355 destroy_loop_vec_info (loop_vinfo, true);
4356 return NULL;
4359 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4360 It is important to call pruning after vect_analyze_data_ref_accesses,
4361 since we use grouping information gathered by interleaving analysis. */
4362 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
4363 if (!ok)
4365 if (vect_print_dump_info (REPORT_DETAILS))
4366 fprintf (vect_dump, "too long list of versioning for alias "
4367 "run-time tests.");
4368 destroy_loop_vec_info (loop_vinfo, true);
4369 return NULL;
4372 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4373 ok = vect_analyze_slp (loop_vinfo);
4374 if (ok)
4376 /* Decide which possible SLP instances to SLP. */
4377 vect_make_slp_decision (loop_vinfo);
4379 /* Find stmts that need to be both vectorized and SLPed. */
4380 vect_detect_hybrid_slp (loop_vinfo);
4383 /* This pass will decide on using loop versioning and/or loop peeling in
4384 order to enhance the alignment of data references in the loop. */
4386 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4387 if (!ok)
4389 if (vect_print_dump_info (REPORT_DETAILS))
4390 fprintf (vect_dump, "bad data alignment.");
4391 destroy_loop_vec_info (loop_vinfo, true);
4392 return NULL;
4395 /* Scan all the operations in the loop and make sure they are
4396 vectorizable. */
4398 ok = vect_analyze_operations (loop_vinfo);
4399 if (!ok)
4401 if (vect_print_dump_info (REPORT_DETAILS))
4402 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4403 destroy_loop_vec_info (loop_vinfo, true);
4404 return NULL;
4407 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4409 return loop_vinfo;