Daily bump.
[official-gcc.git] / gcc / tree-vect-analyze.c
blobf741903b718253651c3afca54aada8b6bb0c53c2
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41 #include "toplev.h"
42 #include "recog.h"
44 /* Main analysis functions. */
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
78 in the loop.
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
81 original loop:
82 for (i=0; i<N; i++){
83 a[i] = b[i] + c[i];
86 vectorized loop:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
92 static bool
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
100 tree scalar_type;
101 tree phi;
102 tree vectype;
103 unsigned int nunits;
104 stmt_vec_info stmt_info;
105 int i;
107 if (vect_print_dump_info (REPORT_DETAILS))
108 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
110 for (i = 0; i < nbbs; i++)
112 basic_block bb = bbs[i];
114 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
116 stmt_info = vinfo_for_stmt (phi);
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining phi: ");
120 print_generic_expr (vect_dump, phi, TDF_SLIM);
123 gcc_assert (stmt_info);
125 if (STMT_VINFO_RELEVANT_P (stmt_info))
127 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
128 scalar_type = TREE_TYPE (PHI_RESULT (phi));
130 if (vect_print_dump_info (REPORT_DETAILS))
132 fprintf (vect_dump, "get vectype for scalar type: ");
133 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
136 vectype = get_vectype_for_scalar_type (scalar_type);
137 if (!vectype)
139 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
141 fprintf (vect_dump,
142 "not vectorized: unsupported data-type ");
143 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
145 return false;
147 STMT_VINFO_VECTYPE (stmt_info) = vectype;
149 if (vect_print_dump_info (REPORT_DETAILS))
151 fprintf (vect_dump, "vectype: ");
152 print_generic_expr (vect_dump, vectype, TDF_SLIM);
155 nunits = TYPE_VECTOR_SUBPARTS (vectype);
156 if (vect_print_dump_info (REPORT_DETAILS))
157 fprintf (vect_dump, "nunits = %d", nunits);
159 if (!vectorization_factor
160 || (nunits > vectorization_factor))
161 vectorization_factor = nunits;
165 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
167 tree stmt = bsi_stmt (si);
168 stmt_info = vinfo_for_stmt (stmt);
170 if (vect_print_dump_info (REPORT_DETAILS))
172 fprintf (vect_dump, "==> examining statement: ");
173 print_generic_expr (vect_dump, stmt, TDF_SLIM);
176 gcc_assert (stmt_info);
178 /* skip stmts which do not need to be vectorized. */
179 if (!STMT_VINFO_RELEVANT_P (stmt_info)
180 && !STMT_VINFO_LIVE_P (stmt_info))
182 if (vect_print_dump_info (REPORT_DETAILS))
183 fprintf (vect_dump, "skip.");
184 continue;
187 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
189 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
191 fprintf (vect_dump, "not vectorized: irregular stmt.");
192 print_generic_expr (vect_dump, stmt, TDF_SLIM);
194 return false;
197 if (!GIMPLE_STMT_P (stmt)
198 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
200 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
202 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
203 print_generic_expr (vect_dump, stmt, TDF_SLIM);
205 return false;
208 if (STMT_VINFO_VECTYPE (stmt_info))
210 /* The only case when a vectype had been already set is for stmts
211 that contain a dataref, or for "pattern-stmts" (stmts generated
212 by the vectorizer to represent/replace a certain idiom). */
213 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
214 || is_pattern_stmt_p (stmt_info));
215 vectype = STMT_VINFO_VECTYPE (stmt_info);
217 else
219 tree operation;
221 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
222 && !is_pattern_stmt_p (stmt_info));
224 /* We generally set the vectype according to the type of the
225 result (lhs).
226 For stmts whose result-type is different than the type of the
227 arguments (e.g. demotion, promotion), vectype will be reset
228 appropriately (later). Note that we have to visit the smallest
229 datatype in this function, because that determines the VF.
230 If the smallest datatype in the loop is present only as the
231 rhs of a promotion operation - we'd miss it here.
232 Such a case, where a variable of this datatype does not appear
233 in the lhs anywhere in the loop, can only occur if it's an
234 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
235 to have been optimized away by invariant motion. However, we
236 cannot rely on invariant motion to always take invariants out
237 of the loop, and so in the case of promotion we also have to
238 check the rhs. */
239 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
241 operation = GIMPLE_STMT_OPERAND (stmt, 1);
242 if (TREE_CODE (operation) == NOP_EXPR
243 || TREE_CODE (operation) == CONVERT_EXPR
244 || TREE_CODE (operation) == WIDEN_MULT_EXPR
245 || TREE_CODE (operation) == FLOAT_EXPR)
247 tree rhs_type = TREE_TYPE (TREE_OPERAND (operation, 0));
248 if (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)) <
249 TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)))
250 scalar_type = rhs_type;
253 if (vect_print_dump_info (REPORT_DETAILS))
255 fprintf (vect_dump, "get vectype for scalar type: ");
256 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
259 vectype = get_vectype_for_scalar_type (scalar_type);
260 if (!vectype)
262 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
264 fprintf (vect_dump,
265 "not vectorized: unsupported data-type ");
266 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
268 return false;
270 STMT_VINFO_VECTYPE (stmt_info) = vectype;
273 if (vect_print_dump_info (REPORT_DETAILS))
275 fprintf (vect_dump, "vectype: ");
276 print_generic_expr (vect_dump, vectype, TDF_SLIM);
279 nunits = TYPE_VECTOR_SUBPARTS (vectype);
280 if (vect_print_dump_info (REPORT_DETAILS))
281 fprintf (vect_dump, "nunits = %d", nunits);
283 if (!vectorization_factor
284 || (nunits > vectorization_factor))
285 vectorization_factor = nunits;
290 /* TODO: Analyze cost. Decide if worth while to vectorize. */
291 if (vect_print_dump_info (REPORT_DETAILS))
292 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
293 if (vectorization_factor <= 1)
295 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
296 fprintf (vect_dump, "not vectorized: unsupported data-type");
297 return false;
299 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
301 return true;
305 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
306 the number of created vector stmts depends on the unrolling factor). However,
307 the actual number of vector stmts for every SLP node depends on VF which is
308 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
309 In this function we assume that the inside costs calculated in
310 vect_model_xxx_cost are linear in ncopies. */
312 static void
313 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
315 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
316 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
317 slp_instance instance;
319 if (vect_print_dump_info (REPORT_SLP))
320 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
322 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
323 /* We assume that costs are linear in ncopies. */
324 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
325 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
329 /* Function vect_analyze_operations.
331 Scan the loop stmts and make sure they are all vectorizable. */
333 static bool
334 vect_analyze_operations (loop_vec_info loop_vinfo)
336 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
337 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
338 int nbbs = loop->num_nodes;
339 block_stmt_iterator si;
340 unsigned int vectorization_factor = 0;
341 int i;
342 bool ok;
343 tree phi;
344 stmt_vec_info stmt_info;
345 bool need_to_vectorize = false;
346 int min_profitable_iters;
347 int min_scalar_loop_bound;
348 unsigned int th;
349 bool only_slp_in_loop = true;
351 if (vect_print_dump_info (REPORT_DETAILS))
352 fprintf (vect_dump, "=== vect_analyze_operations ===");
354 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
355 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
357 for (i = 0; i < nbbs; i++)
359 basic_block bb = bbs[i];
361 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
363 ok = true;
365 stmt_info = vinfo_for_stmt (phi);
366 if (vect_print_dump_info (REPORT_DETAILS))
368 fprintf (vect_dump, "examining phi: ");
369 print_generic_expr (vect_dump, phi, TDF_SLIM);
372 if (! is_loop_header_bb_p (bb))
374 /* inner-loop loop-closed exit phi in outer-loop vectorization
375 (i.e. a phi in the tail of the outer-loop).
376 FORNOW: we currently don't support the case that these phis
377 are not used in the outerloop, cause this case requires
378 to actually do something here. */
379 if (!STMT_VINFO_RELEVANT_P (stmt_info)
380 || STMT_VINFO_LIVE_P (stmt_info))
382 if (vect_print_dump_info (REPORT_DETAILS))
383 fprintf (vect_dump,
384 "Unsupported loop-closed phi in outer-loop.");
385 return false;
387 continue;
390 gcc_assert (stmt_info);
392 if (STMT_VINFO_LIVE_P (stmt_info))
394 /* FORNOW: not yet supported. */
395 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
396 fprintf (vect_dump, "not vectorized: value used after loop.");
397 return false;
400 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
401 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
403 /* A scalar-dependence cycle that we don't support. */
404 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
405 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
406 return false;
409 if (STMT_VINFO_RELEVANT_P (stmt_info))
411 need_to_vectorize = true;
412 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
413 ok = vectorizable_induction (phi, NULL, NULL);
416 if (!ok)
418 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
420 fprintf (vect_dump,
421 "not vectorized: relevant phi not supported: ");
422 print_generic_expr (vect_dump, phi, TDF_SLIM);
424 return false;
428 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
430 tree stmt = bsi_stmt (si);
431 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
432 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
434 if (vect_print_dump_info (REPORT_DETAILS))
436 fprintf (vect_dump, "==> examining statement: ");
437 print_generic_expr (vect_dump, stmt, TDF_SLIM);
440 gcc_assert (stmt_info);
442 /* skip stmts which do not need to be vectorized.
443 this is expected to include:
444 - the COND_EXPR which is the loop exit condition
445 - any LABEL_EXPRs in the loop
446 - computations that are used only for array indexing or loop
447 control */
449 if (!STMT_VINFO_RELEVANT_P (stmt_info)
450 && !STMT_VINFO_LIVE_P (stmt_info))
452 if (vect_print_dump_info (REPORT_DETAILS))
453 fprintf (vect_dump, "irrelevant.");
454 continue;
457 switch (STMT_VINFO_DEF_TYPE (stmt_info))
459 case vect_loop_def:
460 break;
462 case vect_reduction_def:
463 gcc_assert (relevance == vect_used_in_outer
464 || relevance == vect_used_in_outer_by_reduction
465 || relevance == vect_unused_in_loop);
466 break;
468 case vect_induction_def:
469 case vect_constant_def:
470 case vect_invariant_def:
471 case vect_unknown_def_type:
472 default:
473 gcc_unreachable ();
476 if (STMT_VINFO_RELEVANT_P (stmt_info))
478 gcc_assert (GIMPLE_STMT_P (stmt)
479 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
480 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
481 need_to_vectorize = true;
484 ok = true;
485 if (STMT_VINFO_RELEVANT_P (stmt_info)
486 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
487 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
488 || vectorizable_type_demotion (stmt, NULL, NULL)
489 || vectorizable_conversion (stmt, NULL, NULL, NULL)
490 || vectorizable_operation (stmt, NULL, NULL, NULL)
491 || vectorizable_assignment (stmt, NULL, NULL, NULL)
492 || vectorizable_load (stmt, NULL, NULL, NULL)
493 || vectorizable_call (stmt, NULL, NULL)
494 || vectorizable_store (stmt, NULL, NULL, NULL)
495 || vectorizable_condition (stmt, NULL, NULL)
496 || vectorizable_reduction (stmt, NULL, NULL));
498 if (!ok)
500 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
502 fprintf (vect_dump, "not vectorized: relevant stmt not ");
503 fprintf (vect_dump, "supported: ");
504 print_generic_expr (vect_dump, stmt, TDF_SLIM);
506 return false;
509 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
510 need extra handling, except for vectorizable reductions. */
511 if (STMT_VINFO_LIVE_P (stmt_info)
512 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
513 ok = vectorizable_live_operation (stmt, NULL, NULL);
515 if (!ok)
517 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
519 fprintf (vect_dump, "not vectorized: live stmt not ");
520 fprintf (vect_dump, "supported: ");
521 print_generic_expr (vect_dump, stmt, TDF_SLIM);
523 return false;
526 if (!PURE_SLP_STMT (stmt_info))
528 /* STMT needs loop-based vectorization. */
529 only_slp_in_loop = false;
531 /* Groups of strided accesses whose size is not a power of 2 are
532 not vectorizable yet using loop-vectorization. Therefore, if
533 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
534 both SLPed and loop-based vectorzed), the loop cannot be
535 vectorized. */
536 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
537 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
538 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
540 if (vect_print_dump_info (REPORT_DETAILS))
542 fprintf (vect_dump, "not vectorized: the size of group "
543 "of strided accesses is not a power of 2");
544 print_generic_expr (vect_dump, stmt, TDF_SLIM);
546 return false;
549 } /* stmts in bb */
550 } /* bbs */
552 /* All operations in the loop are either irrelevant (deal with loop
553 control, or dead), or only used outside the loop and can be moved
554 out of the loop (e.g. invariants, inductions). The loop can be
555 optimized away by scalar optimizations. We're better off not
556 touching this loop. */
557 if (!need_to_vectorize)
559 if (vect_print_dump_info (REPORT_DETAILS))
560 fprintf (vect_dump,
561 "All the computation can be taken out of the loop.");
562 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
563 fprintf (vect_dump,
564 "not vectorized: redundant loop. no profit to vectorize.");
565 return false;
568 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
569 vectorization factor of the loop is the unrolling factor required by the
570 SLP instances. If that unrolling factor is 1, we say, that we perform
571 pure SLP on loop - cross iteration parallelism is not exploited. */
572 if (only_slp_in_loop)
573 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
574 else
575 vectorization_factor = least_common_multiple (vectorization_factor,
576 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
578 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
580 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
581 && vect_print_dump_info (REPORT_DETAILS))
582 fprintf (vect_dump,
583 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
584 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
586 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
587 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
589 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
590 fprintf (vect_dump, "not vectorized: iteration count too small.");
591 if (vect_print_dump_info (REPORT_DETAILS))
592 fprintf (vect_dump,"not vectorized: iteration count smaller than "
593 "vectorization factor.");
594 return false;
597 /* Analyze cost. Decide if worth while to vectorize. */
599 /* Once VF is set, SLP costs should be updated since the number of created
600 vector stmts depends on VF. */
601 vect_update_slp_costs_according_to_vf (loop_vinfo);
603 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
604 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
605 if (min_profitable_iters < 0)
607 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
608 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
609 if (vect_print_dump_info (REPORT_DETAILS))
610 fprintf (vect_dump, "not vectorized: vector version will never be "
611 "profitable.");
612 return false;
615 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
616 * vectorization_factor) - 1);
618 /* Use the cost model only if it is more conservative than user specified
619 threshold. */
621 th = (unsigned) min_scalar_loop_bound;
622 if (min_profitable_iters
623 && (!min_scalar_loop_bound
624 || min_profitable_iters > min_scalar_loop_bound))
625 th = (unsigned) min_profitable_iters;
627 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
628 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
630 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
631 fprintf (vect_dump, "not vectorized: vectorization not "
632 "profitable.");
633 if (vect_print_dump_info (REPORT_DETAILS))
634 fprintf (vect_dump, "not vectorized: iteration count smaller than "
635 "user specified loop bound parameter or minimum "
636 "profitable iterations (whichever is more conservative).");
637 return false;
640 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
641 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
642 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
644 if (vect_print_dump_info (REPORT_DETAILS))
645 fprintf (vect_dump, "epilog loop required.");
646 if (!vect_can_advance_ivs_p (loop_vinfo))
648 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
649 fprintf (vect_dump,
650 "not vectorized: can't create epilog loop 1.");
651 return false;
653 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
655 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
656 fprintf (vect_dump,
657 "not vectorized: can't create epilog loop 2.");
658 return false;
662 return true;
666 /* Function exist_non_indexing_operands_for_use_p
668 USE is one of the uses attached to STMT. Check if USE is
669 used in STMT for anything other than indexing an array. */
671 static bool
672 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
674 tree operand;
675 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
677 /* USE corresponds to some operand in STMT. If there is no data
678 reference in STMT, then any operand that corresponds to USE
679 is not indexing an array. */
680 if (!STMT_VINFO_DATA_REF (stmt_info))
681 return true;
683 /* STMT has a data_ref. FORNOW this means that its of one of
684 the following forms:
685 -1- ARRAY_REF = var
686 -2- var = ARRAY_REF
687 (This should have been verified in analyze_data_refs).
689 'var' in the second case corresponds to a def, not a use,
690 so USE cannot correspond to any operands that are not used
691 for array indexing.
693 Therefore, all we need to check is if STMT falls into the
694 first case, and whether var corresponds to USE. */
696 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
697 return false;
699 operand = GIMPLE_STMT_OPERAND (stmt, 1);
701 if (TREE_CODE (operand) != SSA_NAME)
702 return false;
704 if (operand == use)
705 return true;
707 return false;
711 /* Function vect_analyze_scalar_cycles_1.
713 Examine the cross iteration def-use cycles of scalar variables
714 in LOOP. LOOP_VINFO represents the loop that is noe being
715 considered for vectorization (can be LOOP, or an outer-loop
716 enclosing LOOP). */
718 static void
719 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
721 tree phi;
722 basic_block bb = loop->header;
723 tree dumy;
724 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
726 if (vect_print_dump_info (REPORT_DETAILS))
727 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
729 /* First - identify all inductions. */
730 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
732 tree access_fn = NULL;
733 tree def = PHI_RESULT (phi);
734 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
736 if (vect_print_dump_info (REPORT_DETAILS))
738 fprintf (vect_dump, "Analyze phi: ");
739 print_generic_expr (vect_dump, phi, TDF_SLIM);
742 /* Skip virtual phi's. The data dependences that are associated with
743 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
744 if (!is_gimple_reg (SSA_NAME_VAR (def)))
745 continue;
747 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
749 /* Analyze the evolution function. */
750 access_fn = analyze_scalar_evolution (loop, def);
751 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
753 fprintf (vect_dump, "Access function of PHI: ");
754 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
757 if (!access_fn
758 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
760 VEC_safe_push (tree, heap, worklist, phi);
761 continue;
764 if (vect_print_dump_info (REPORT_DETAILS))
765 fprintf (vect_dump, "Detected induction.");
766 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
770 /* Second - identify all reductions. */
771 while (VEC_length (tree, worklist) > 0)
773 tree phi = VEC_pop (tree, worklist);
774 tree def = PHI_RESULT (phi);
775 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
776 tree reduc_stmt;
778 if (vect_print_dump_info (REPORT_DETAILS))
780 fprintf (vect_dump, "Analyze phi: ");
781 print_generic_expr (vect_dump, phi, TDF_SLIM);
784 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
785 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
787 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
788 if (reduc_stmt)
790 if (vect_print_dump_info (REPORT_DETAILS))
791 fprintf (vect_dump, "Detected reduction.");
792 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
793 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
794 vect_reduction_def;
796 else
797 if (vect_print_dump_info (REPORT_DETAILS))
798 fprintf (vect_dump, "Unknown def-use cycle pattern.");
801 VEC_free (tree, heap, worklist);
802 return;
806 /* Function vect_analyze_scalar_cycles.
808 Examine the cross iteration def-use cycles of scalar variables, by
809 analyzing the loop-header PHIs of scalar variables; Classify each
810 cycle as one of the following: invariant, induction, reduction, unknown.
811 We do that for the loop represented by LOOP_VINFO, and also to its
812 inner-loop, if exists.
813 Examples for scalar cycles:
815 Example1: reduction:
817 loop1:
818 for (i=0; i<N; i++)
819 sum += a[i];
821 Example2: induction:
823 loop2:
824 for (i=0; i<N; i++)
825 a[i] = i; */
827 static void
828 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
830 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
832 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
834 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
835 Reductions in such inner-loop therefore have different properties than
836 the reductions in the nest that gets vectorized:
837 1. When vectorized, they are executed in the same order as in the original
838 scalar loop, so we can't change the order of computation when
839 vectorizing them.
840 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
841 current checks are too strict. */
843 if (loop->inner)
844 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
848 /* Function vect_insert_into_interleaving_chain.
850 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
852 static void
853 vect_insert_into_interleaving_chain (struct data_reference *dra,
854 struct data_reference *drb)
856 tree prev, next, next_init;
857 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
858 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
860 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
861 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
862 while (next)
864 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
865 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
867 /* Insert here. */
868 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
869 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
870 return;
872 prev = next;
873 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
876 /* We got to the end of the list. Insert here. */
877 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
878 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
882 /* Function vect_update_interleaving_chain.
884 For two data-refs DRA and DRB that are a part of a chain interleaved data
885 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
887 There are four possible cases:
888 1. New stmts - both DRA and DRB are not a part of any chain:
889 FIRST_DR = DRB
890 NEXT_DR (DRB) = DRA
891 2. DRB is a part of a chain and DRA is not:
892 no need to update FIRST_DR
893 no need to insert DRB
894 insert DRA according to init
895 3. DRA is a part of a chain and DRB is not:
896 if (init of FIRST_DR > init of DRB)
897 FIRST_DR = DRB
898 NEXT(FIRST_DR) = previous FIRST_DR
899 else
900 insert DRB according to its init
901 4. both DRA and DRB are in some interleaving chains:
902 choose the chain with the smallest init of FIRST_DR
903 insert the nodes of the second chain into the first one. */
905 static void
906 vect_update_interleaving_chain (struct data_reference *drb,
907 struct data_reference *dra)
909 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
910 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
911 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
912 tree node, prev, next, node_init, first_stmt;
914 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
915 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
917 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
918 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
919 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
920 return;
923 /* 2. DRB is a part of a chain and DRA is not. */
924 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
926 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
927 /* Insert DRA into the chain of DRB. */
928 vect_insert_into_interleaving_chain (dra, drb);
929 return;
932 /* 3. DRA is a part of a chain and DRB is not. */
933 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
935 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
936 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
937 old_first_stmt)));
938 tree tmp;
940 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
942 /* DRB's init is smaller than the init of the stmt previously marked
943 as the first stmt of the interleaving chain of DRA. Therefore, we
944 update FIRST_STMT and put DRB in the head of the list. */
945 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
946 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
948 /* Update all the stmts in the list to point to the new FIRST_STMT. */
949 tmp = old_first_stmt;
950 while (tmp)
952 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
953 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
956 else
958 /* Insert DRB in the list of DRA. */
959 vect_insert_into_interleaving_chain (drb, dra);
960 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
962 return;
965 /* 4. both DRA and DRB are in some interleaving chains. */
966 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
967 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
968 if (first_a == first_b)
969 return;
970 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
971 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
973 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
975 /* Insert the nodes of DRA chain into the DRB chain.
976 After inserting a node, continue from this node of the DRB chain (don't
977 start from the beginning. */
978 node = DR_GROUP_FIRST_DR (stmtinfo_a);
979 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
980 first_stmt = first_b;
982 else
984 /* Insert the nodes of DRB chain into the DRA chain.
985 After inserting a node, continue from this node of the DRA chain (don't
986 start from the beginning. */
987 node = DR_GROUP_FIRST_DR (stmtinfo_b);
988 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
989 first_stmt = first_a;
992 while (node)
994 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
995 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
996 while (next)
998 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
999 if (tree_int_cst_compare (next_init, node_init) > 0)
1001 /* Insert here. */
1002 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
1003 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
1004 prev = node;
1005 break;
1007 prev = next;
1008 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
1010 if (!next)
1012 /* We got to the end of the list. Insert here. */
1013 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
1014 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
1015 prev = node;
1017 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
1018 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1023 /* Function vect_equal_offsets.
1025 Check if OFFSET1 and OFFSET2 are identical expressions. */
1027 static bool
1028 vect_equal_offsets (tree offset1, tree offset2)
1030 bool res0, res1;
1032 STRIP_NOPS (offset1);
1033 STRIP_NOPS (offset2);
1035 if (offset1 == offset2)
1036 return true;
1038 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1039 || !BINARY_CLASS_P (offset1)
1040 || !BINARY_CLASS_P (offset2))
1041 return false;
1043 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1044 TREE_OPERAND (offset2, 0));
1045 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1046 TREE_OPERAND (offset2, 1));
1048 return (res0 && res1);
1052 /* Function vect_check_interleaving.
1054 Check if DRA and DRB are a part of interleaving. In case they are, insert
1055 DRA and DRB in an interleaving chain. */
1057 static void
1058 vect_check_interleaving (struct data_reference *dra,
1059 struct data_reference *drb)
1061 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1063 /* Check that the data-refs have same first location (except init) and they
1064 are both either store or load (not load and store). */
1065 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1066 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1067 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1068 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1069 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1070 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1071 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1072 || DR_IS_READ (dra) != DR_IS_READ (drb))
1073 return;
1075 /* Check:
1076 1. data-refs are of the same type
1077 2. their steps are equal
1078 3. the step is greater than the difference between data-refs' inits */
1079 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1080 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1082 if (type_size_a != type_size_b
1083 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
1084 return;
1086 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1087 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1088 step = TREE_INT_CST_LOW (DR_STEP (dra));
1090 if (init_a > init_b)
1092 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1093 and DRB is accessed before DRA. */
1094 diff_mod_size = (init_a - init_b) % type_size_a;
1096 if ((init_a - init_b) > step)
1097 return;
1099 if (diff_mod_size == 0)
1101 vect_update_interleaving_chain (drb, dra);
1102 if (vect_print_dump_info (REPORT_DR_DETAILS))
1104 fprintf (vect_dump, "Detected interleaving ");
1105 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1106 fprintf (vect_dump, " and ");
1107 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1109 return;
1112 else
1114 /* If init_b == init_a + the size of the type * k, we have an
1115 interleaving, and DRA is accessed before DRB. */
1116 diff_mod_size = (init_b - init_a) % type_size_a;
1118 if ((init_b - init_a) > step)
1119 return;
1121 if (diff_mod_size == 0)
1123 vect_update_interleaving_chain (dra, drb);
1124 if (vect_print_dump_info (REPORT_DR_DETAILS))
1126 fprintf (vect_dump, "Detected interleaving ");
1127 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1128 fprintf (vect_dump, " and ");
1129 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1131 return;
1136 /* Check if data references pointed by DR_I and DR_J are same or
1137 belong to same interleaving group. Return FALSE if drs are
1138 different, otherwise return TRUE. */
1140 static bool
1141 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
1143 tree stmt_i = DR_STMT (dr_i);
1144 tree stmt_j = DR_STMT (dr_j);
1146 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
1147 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1148 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
1149 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1150 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
1151 return true;
1152 else
1153 return false;
1156 /* If address ranges represented by DDR_I and DDR_J are equal,
1157 return TRUE, otherwise return FALSE. */
1159 static bool
1160 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
1162 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
1163 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
1164 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
1165 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
1166 return true;
1167 else
1168 return false;
1171 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1172 tested at run-time. Return TRUE if DDR was successfully inserted.
1173 Return false if versioning is not supported. */
1175 static bool
1176 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1178 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1180 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
1181 return false;
1183 if (vect_print_dump_info (REPORT_DR_DETAILS))
1185 fprintf (vect_dump, "mark for run-time aliasing test between ");
1186 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1187 fprintf (vect_dump, " and ");
1188 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1191 if (optimize_size)
1193 if (vect_print_dump_info (REPORT_DR_DETAILS))
1194 fprintf (vect_dump, "versioning not supported when optimizing for size.");
1195 return false;
1198 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1199 if (loop->inner)
1201 if (vect_print_dump_info (REPORT_DR_DETAILS))
1202 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1203 return false;
1206 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1207 return true;
1210 /* Function vect_analyze_data_ref_dependence.
1212 Return TRUE if there (might) exist a dependence between a memory-reference
1213 DRA and a memory-reference DRB. When versioning for alias may check a
1214 dependence at run-time, return FALSE. */
1216 static bool
1217 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1218 loop_vec_info loop_vinfo)
1220 unsigned int i;
1221 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1222 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1223 struct data_reference *dra = DDR_A (ddr);
1224 struct data_reference *drb = DDR_B (ddr);
1225 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1226 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1227 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1228 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1229 lambda_vector dist_v;
1230 unsigned int loop_depth;
1232 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1234 /* Independent data accesses. */
1235 vect_check_interleaving (dra, drb);
1236 return false;
1239 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1240 return false;
1242 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1244 if (vect_print_dump_info (REPORT_DR_DETAILS))
1246 fprintf (vect_dump,
1247 "versioning for alias required: can't determine dependence between ");
1248 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1249 fprintf (vect_dump, " and ");
1250 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1252 /* Add to list of ddrs that need to be tested at run-time. */
1253 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1256 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1258 if (vect_print_dump_info (REPORT_DR_DETAILS))
1260 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1261 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1262 fprintf (vect_dump, " and ");
1263 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1265 /* Add to list of ddrs that need to be tested at run-time. */
1266 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1269 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1270 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1272 int dist = dist_v[loop_depth];
1274 if (vect_print_dump_info (REPORT_DR_DETAILS))
1275 fprintf (vect_dump, "dependence distance = %d.", dist);
1277 /* Same loop iteration. */
1278 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1280 /* Two references with distance zero have the same alignment. */
1281 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1282 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1283 if (vect_print_dump_info (REPORT_ALIGNMENT))
1284 fprintf (vect_dump, "accesses have the same alignment.");
1285 if (vect_print_dump_info (REPORT_DR_DETAILS))
1287 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1288 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1289 fprintf (vect_dump, " and ");
1290 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1293 /* For interleaving, mark that there is a read-write dependency if
1294 necessary. We check before that one of the data-refs is store. */
1295 if (DR_IS_READ (dra))
1296 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1297 else
1299 if (DR_IS_READ (drb))
1300 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1303 continue;
1306 if (abs (dist) >= vectorization_factor
1307 || (dist > 0 && DDR_REVERSED_P (ddr)))
1309 /* Dependence distance does not create dependence, as far as
1310 vectorization is concerned, in this case. If DDR_REVERSED_P the
1311 order of the data-refs in DDR was reversed (to make distance
1312 vector positive), and the actual distance is negative. */
1313 if (vect_print_dump_info (REPORT_DR_DETAILS))
1314 fprintf (vect_dump, "dependence distance >= VF or negative.");
1315 continue;
1318 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1320 fprintf (vect_dump,
1321 "not vectorized, possible dependence "
1322 "between data-refs ");
1323 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1324 fprintf (vect_dump, " and ");
1325 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1328 return true;
1331 return false;
1334 /* Function vect_analyze_data_ref_dependences.
1336 Examine all the data references in the loop, and make sure there do not
1337 exist any data dependences between them. */
1339 static bool
1340 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1342 unsigned int i;
1343 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1344 struct data_dependence_relation *ddr;
1346 if (vect_print_dump_info (REPORT_DETAILS))
1347 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1349 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1350 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1351 return false;
1353 return true;
1357 /* Function vect_compute_data_ref_alignment
1359 Compute the misalignment of the data reference DR.
1361 Output:
1362 1. If during the misalignment computation it is found that the data reference
1363 cannot be vectorized then false is returned.
1364 2. DR_MISALIGNMENT (DR) is defined.
1366 FOR NOW: No analysis is actually performed. Misalignment is calculated
1367 only for trivial cases. TODO. */
1369 static bool
1370 vect_compute_data_ref_alignment (struct data_reference *dr)
1372 tree stmt = DR_STMT (dr);
1373 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1374 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1375 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1376 tree ref = DR_REF (dr);
1377 tree vectype;
1378 tree base, base_addr;
1379 bool base_aligned;
1380 tree misalign;
1381 tree aligned_to, alignment;
1383 if (vect_print_dump_info (REPORT_DETAILS))
1384 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1386 /* Initialize misalignment to unknown. */
1387 SET_DR_MISALIGNMENT (dr, -1);
1389 misalign = DR_INIT (dr);
1390 aligned_to = DR_ALIGNED_TO (dr);
1391 base_addr = DR_BASE_ADDRESS (dr);
1393 /* In case the dataref is in an inner-loop of the loop that is being
1394 vectorized (LOOP), we use the base and misalignment information
1395 relative to the outer-loop (LOOP). This is ok only if the misalignment
1396 stays the same throughout the execution of the inner-loop, which is why
1397 we have to check that the stride of the dataref in the inner-loop evenly
1398 divides by the vector size. */
1399 if (nested_in_vect_loop_p (loop, stmt))
1401 tree step = DR_STEP (dr);
1402 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1404 if (dr_step % UNITS_PER_SIMD_WORD == 0)
1406 if (vect_print_dump_info (REPORT_ALIGNMENT))
1407 fprintf (vect_dump, "inner step divides the vector-size.");
1408 misalign = STMT_VINFO_DR_INIT (stmt_info);
1409 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1410 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1412 else
1414 if (vect_print_dump_info (REPORT_ALIGNMENT))
1415 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1416 misalign = NULL_TREE;
1420 base = build_fold_indirect_ref (base_addr);
1421 vectype = STMT_VINFO_VECTYPE (stmt_info);
1422 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1424 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1425 || !misalign)
1427 if (vect_print_dump_info (REPORT_ALIGNMENT))
1429 fprintf (vect_dump, "Unknown alignment for access: ");
1430 print_generic_expr (vect_dump, base, TDF_SLIM);
1432 return true;
1435 if ((DECL_P (base)
1436 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1437 alignment) >= 0)
1438 || (TREE_CODE (base_addr) == SSA_NAME
1439 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1440 TREE_TYPE (base_addr)))),
1441 alignment) >= 0))
1442 base_aligned = true;
1443 else
1444 base_aligned = false;
1446 if (!base_aligned)
1448 /* Do not change the alignment of global variables if
1449 flag_section_anchors is enabled. */
1450 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1451 || (TREE_STATIC (base) && flag_section_anchors))
1453 if (vect_print_dump_info (REPORT_DETAILS))
1455 fprintf (vect_dump, "can't force alignment of ref: ");
1456 print_generic_expr (vect_dump, ref, TDF_SLIM);
1458 return true;
1461 /* Force the alignment of the decl.
1462 NOTE: This is the only change to the code we make during
1463 the analysis phase, before deciding to vectorize the loop. */
1464 if (vect_print_dump_info (REPORT_DETAILS))
1465 fprintf (vect_dump, "force alignment");
1466 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1467 DECL_USER_ALIGN (base) = 1;
1470 /* At this point we assume that the base is aligned. */
1471 gcc_assert (base_aligned
1472 || (TREE_CODE (base) == VAR_DECL
1473 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1475 /* Modulo alignment. */
1476 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1478 if (!host_integerp (misalign, 1))
1480 /* Negative or overflowed misalignment value. */
1481 if (vect_print_dump_info (REPORT_DETAILS))
1482 fprintf (vect_dump, "unexpected misalign value");
1483 return false;
1486 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1488 if (vect_print_dump_info (REPORT_DETAILS))
1490 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1491 print_generic_expr (vect_dump, ref, TDF_SLIM);
1494 return true;
1498 /* Function vect_compute_data_refs_alignment
1500 Compute the misalignment of data references in the loop.
1501 Return FALSE if a data reference is found that cannot be vectorized. */
1503 static bool
1504 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1506 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1507 struct data_reference *dr;
1508 unsigned int i;
1510 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1511 if (!vect_compute_data_ref_alignment (dr))
1512 return false;
1514 return true;
1518 /* Function vect_update_misalignment_for_peel
1520 DR - the data reference whose misalignment is to be adjusted.
1521 DR_PEEL - the data reference whose misalignment is being made
1522 zero in the vector loop by the peel.
1523 NPEEL - the number of iterations in the peel loop if the misalignment
1524 of DR_PEEL is known at compile time. */
1526 static void
1527 vect_update_misalignment_for_peel (struct data_reference *dr,
1528 struct data_reference *dr_peel, int npeel)
1530 unsigned int i;
1531 VEC(dr_p,heap) *same_align_drs;
1532 struct data_reference *current_dr;
1533 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1534 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1535 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1536 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1538 /* For interleaved data accesses the step in the loop must be multiplied by
1539 the size of the interleaving group. */
1540 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1541 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1542 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1543 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1545 /* It can be assumed that the data refs with the same alignment as dr_peel
1546 are aligned in the vector loop. */
1547 same_align_drs
1548 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1549 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1551 if (current_dr != dr)
1552 continue;
1553 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1554 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1555 SET_DR_MISALIGNMENT (dr, 0);
1556 return;
1559 if (known_alignment_for_access_p (dr)
1560 && known_alignment_for_access_p (dr_peel))
1562 int misal = DR_MISALIGNMENT (dr);
1563 misal += npeel * dr_size;
1564 misal %= UNITS_PER_SIMD_WORD;
1565 SET_DR_MISALIGNMENT (dr, misal);
1566 return;
1569 if (vect_print_dump_info (REPORT_DETAILS))
1570 fprintf (vect_dump, "Setting misalignment to -1.");
1571 SET_DR_MISALIGNMENT (dr, -1);
1575 /* Function vect_verify_datarefs_alignment
1577 Return TRUE if all data references in the loop can be
1578 handled with respect to alignment. */
1580 static bool
1581 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1583 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1584 struct data_reference *dr;
1585 enum dr_alignment_support supportable_dr_alignment;
1586 unsigned int i;
1588 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1590 tree stmt = DR_STMT (dr);
1591 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1593 /* For interleaving, only the alignment of the first access matters. */
1594 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1595 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1596 continue;
1598 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1599 if (!supportable_dr_alignment)
1601 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1603 if (DR_IS_READ (dr))
1604 fprintf (vect_dump,
1605 "not vectorized: unsupported unaligned load.");
1606 else
1607 fprintf (vect_dump,
1608 "not vectorized: unsupported unaligned store.");
1610 return false;
1612 if (supportable_dr_alignment != dr_aligned
1613 && vect_print_dump_info (REPORT_ALIGNMENT))
1614 fprintf (vect_dump, "Vectorizing an unaligned access.");
1616 return true;
1620 /* Function vector_alignment_reachable_p
1622 Return true if vector alignment for DR is reachable by peeling
1623 a few loop iterations. Return false otherwise. */
1625 static bool
1626 vector_alignment_reachable_p (struct data_reference *dr)
1628 tree stmt = DR_STMT (dr);
1629 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1630 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1632 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1634 /* For interleaved access we peel only if number of iterations in
1635 the prolog loop ({VF - misalignment}), is a multiple of the
1636 number of the interleaved accesses. */
1637 int elem_size, mis_in_elements;
1638 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1640 /* FORNOW: handle only known alignment. */
1641 if (!known_alignment_for_access_p (dr))
1642 return false;
1644 elem_size = UNITS_PER_SIMD_WORD / nelements;
1645 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1647 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1648 return false;
1651 /* If misalignment is known at the compile time then allow peeling
1652 only if natural alignment is reachable through peeling. */
1653 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1655 HOST_WIDE_INT elmsize =
1656 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1657 if (vect_print_dump_info (REPORT_DETAILS))
1659 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1660 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1662 if (DR_MISALIGNMENT (dr) % elmsize)
1664 if (vect_print_dump_info (REPORT_DETAILS))
1665 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1666 return false;
1670 if (!known_alignment_for_access_p (dr))
1672 tree type = (TREE_TYPE (DR_REF (dr)));
1673 tree ba = DR_BASE_OBJECT (dr);
1674 bool is_packed = false;
1676 if (ba)
1677 is_packed = contains_packed_reference (ba);
1679 if (vect_print_dump_info (REPORT_DETAILS))
1680 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1681 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1682 return true;
1683 else
1684 return false;
1687 return true;
1690 /* Function vect_enhance_data_refs_alignment
1692 This pass will use loop versioning and loop peeling in order to enhance
1693 the alignment of data references in the loop.
1695 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1696 original loop is to be vectorized; Any other loops that are created by
1697 the transformations performed in this pass - are not supposed to be
1698 vectorized. This restriction will be relaxed.
1700 This pass will require a cost model to guide it whether to apply peeling
1701 or versioning or a combination of the two. For example, the scheme that
1702 intel uses when given a loop with several memory accesses, is as follows:
1703 choose one memory access ('p') which alignment you want to force by doing
1704 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1705 other accesses are not necessarily aligned, or (2) use loop versioning to
1706 generate one loop in which all accesses are aligned, and another loop in
1707 which only 'p' is necessarily aligned.
1709 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1710 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1711 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1713 Devising a cost model is the most critical aspect of this work. It will
1714 guide us on which access to peel for, whether to use loop versioning, how
1715 many versions to create, etc. The cost model will probably consist of
1716 generic considerations as well as target specific considerations (on
1717 powerpc for example, misaligned stores are more painful than misaligned
1718 loads).
1720 Here are the general steps involved in alignment enhancements:
1722 -- original loop, before alignment analysis:
1723 for (i=0; i<N; i++){
1724 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1725 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1728 -- After vect_compute_data_refs_alignment:
1729 for (i=0; i<N; i++){
1730 x = q[i]; # DR_MISALIGNMENT(q) = 3
1731 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1734 -- Possibility 1: we do loop versioning:
1735 if (p is aligned) {
1736 for (i=0; i<N; i++){ # loop 1A
1737 x = q[i]; # DR_MISALIGNMENT(q) = 3
1738 p[i] = y; # DR_MISALIGNMENT(p) = 0
1741 else {
1742 for (i=0; i<N; i++){ # loop 1B
1743 x = q[i]; # DR_MISALIGNMENT(q) = 3
1744 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1748 -- Possibility 2: we do loop peeling:
1749 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1750 x = q[i];
1751 p[i] = y;
1753 for (i = 3; i < N; i++){ # loop 2A
1754 x = q[i]; # DR_MISALIGNMENT(q) = 0
1755 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1758 -- Possibility 3: combination of loop peeling and versioning:
1759 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1760 x = q[i];
1761 p[i] = y;
1763 if (p is aligned) {
1764 for (i = 3; i<N; i++){ # loop 3A
1765 x = q[i]; # DR_MISALIGNMENT(q) = 0
1766 p[i] = y; # DR_MISALIGNMENT(p) = 0
1769 else {
1770 for (i = 3; i<N; i++){ # loop 3B
1771 x = q[i]; # DR_MISALIGNMENT(q) = 0
1772 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1776 These loops are later passed to loop_transform to be vectorized. The
1777 vectorizer will use the alignment information to guide the transformation
1778 (whether to generate regular loads/stores, or with special handling for
1779 misalignment). */
1781 static bool
1782 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1784 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1785 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1786 enum dr_alignment_support supportable_dr_alignment;
1787 struct data_reference *dr0 = NULL;
1788 struct data_reference *dr;
1789 unsigned int i;
1790 bool do_peeling = false;
1791 bool do_versioning = false;
1792 bool stat;
1793 tree stmt;
1794 stmt_vec_info stmt_info;
1795 int vect_versioning_for_alias_required;
1797 if (vect_print_dump_info (REPORT_DETAILS))
1798 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1800 /* While cost model enhancements are expected in the future, the high level
1801 view of the code at this time is as follows:
1803 A) If there is a misaligned write then see if peeling to align this write
1804 can make all data references satisfy vect_supportable_dr_alignment.
1805 If so, update data structures as needed and return true. Note that
1806 at this time vect_supportable_dr_alignment is known to return false
1807 for a misaligned write.
1809 B) If peeling wasn't possible and there is a data reference with an
1810 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1811 then see if loop versioning checks can be used to make all data
1812 references satisfy vect_supportable_dr_alignment. If so, update
1813 data structures as needed and return true.
1815 C) If neither peeling nor versioning were successful then return false if
1816 any data reference does not satisfy vect_supportable_dr_alignment.
1818 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1820 Note, Possibility 3 above (which is peeling and versioning together) is not
1821 being done at this time. */
1823 /* (1) Peeling to force alignment. */
1825 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1826 Considerations:
1827 + How many accesses will become aligned due to the peeling
1828 - How many accesses will become unaligned due to the peeling,
1829 and the cost of misaligned accesses.
1830 - The cost of peeling (the extra runtime checks, the increase
1831 in code size).
1833 The scheme we use FORNOW: peel to force the alignment of the first
1834 misaligned store in the loop.
1835 Rationale: misaligned stores are not yet supported.
1837 TODO: Use a cost model. */
1839 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1841 stmt = DR_STMT (dr);
1842 stmt_info = vinfo_for_stmt (stmt);
1844 /* For interleaving, only the alignment of the first access
1845 matters. */
1846 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1847 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1848 continue;
1850 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1852 do_peeling = vector_alignment_reachable_p (dr);
1853 if (do_peeling)
1854 dr0 = dr;
1855 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1856 fprintf (vect_dump, "vector alignment may not be reachable");
1857 break;
1861 vect_versioning_for_alias_required =
1862 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1864 /* Temporarily, if versioning for alias is required, we disable peeling
1865 until we support peeling and versioning. Often peeling for alignment
1866 will require peeling for loop-bound, which in turn requires that we
1867 know how to adjust the loop ivs after the loop. */
1868 if (vect_versioning_for_alias_required
1869 || !vect_can_advance_ivs_p (loop_vinfo)
1870 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1871 do_peeling = false;
1873 if (do_peeling)
1875 int mis;
1876 int npeel = 0;
1877 tree stmt = DR_STMT (dr0);
1878 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1879 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1880 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1882 if (known_alignment_for_access_p (dr0))
1884 /* Since it's known at compile time, compute the number of iterations
1885 in the peeled loop (the peeling factor) for use in updating
1886 DR_MISALIGNMENT values. The peeling factor is the vectorization
1887 factor minus the misalignment as an element count. */
1888 mis = DR_MISALIGNMENT (dr0);
1889 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1890 npeel = nelements - mis;
1892 /* For interleaved data access every iteration accesses all the
1893 members of the group, therefore we divide the number of iterations
1894 by the group size. */
1895 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1896 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1897 npeel /= DR_GROUP_SIZE (stmt_info);
1899 if (vect_print_dump_info (REPORT_DETAILS))
1900 fprintf (vect_dump, "Try peeling by %d", npeel);
1903 /* Ensure that all data refs can be vectorized after the peel. */
1904 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1906 int save_misalignment;
1908 if (dr == dr0)
1909 continue;
1911 stmt = DR_STMT (dr);
1912 stmt_info = vinfo_for_stmt (stmt);
1913 /* For interleaving, only the alignment of the first access
1914 matters. */
1915 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1916 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1917 continue;
1919 save_misalignment = DR_MISALIGNMENT (dr);
1920 vect_update_misalignment_for_peel (dr, dr0, npeel);
1921 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1922 SET_DR_MISALIGNMENT (dr, save_misalignment);
1924 if (!supportable_dr_alignment)
1926 do_peeling = false;
1927 break;
1931 if (do_peeling)
1933 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1934 If the misalignment of DR_i is identical to that of dr0 then set
1935 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1936 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1937 by the peeling factor times the element size of DR_i (MOD the
1938 vectorization factor times the size). Otherwise, the
1939 misalignment of DR_i must be set to unknown. */
1940 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1941 if (dr != dr0)
1942 vect_update_misalignment_for_peel (dr, dr0, npeel);
1944 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1945 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1946 SET_DR_MISALIGNMENT (dr0, 0);
1947 if (vect_print_dump_info (REPORT_ALIGNMENT))
1948 fprintf (vect_dump, "Alignment of access forced using peeling.");
1950 if (vect_print_dump_info (REPORT_DETAILS))
1951 fprintf (vect_dump, "Peeling for alignment will be applied.");
1953 stat = vect_verify_datarefs_alignment (loop_vinfo);
1954 gcc_assert (stat);
1955 return stat;
1960 /* (2) Versioning to force alignment. */
1962 /* Try versioning if:
1963 1) flag_tree_vect_loop_version is TRUE
1964 2) optimize_size is FALSE
1965 3) there is at least one unsupported misaligned data ref with an unknown
1966 misalignment, and
1967 4) all misaligned data refs with a known misalignment are supported, and
1968 5) the number of runtime alignment checks is within reason. */
1970 do_versioning =
1971 flag_tree_vect_loop_version
1972 && (!optimize_size)
1973 && (!loop->inner); /* FORNOW */
1975 if (do_versioning)
1977 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1979 stmt = DR_STMT (dr);
1980 stmt_info = vinfo_for_stmt (stmt);
1982 /* For interleaving, only the alignment of the first access
1983 matters. */
1984 if (aligned_access_p (dr)
1985 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1986 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1987 continue;
1989 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1991 if (!supportable_dr_alignment)
1993 tree stmt;
1994 int mask;
1995 tree vectype;
1997 if (known_alignment_for_access_p (dr)
1998 || VEC_length (tree,
1999 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
2000 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
2002 do_versioning = false;
2003 break;
2006 stmt = DR_STMT (dr);
2007 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2008 gcc_assert (vectype);
2010 /* The rightmost bits of an aligned address must be zeros.
2011 Construct the mask needed for this test. For example,
2012 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2013 mask must be 15 = 0xf. */
2014 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2016 /* FORNOW: use the same mask to test all potentially unaligned
2017 references in the loop. The vectorizer currently supports
2018 a single vector size, see the reference to
2019 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2020 vectorization factor is computed. */
2021 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2022 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2023 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2024 VEC_safe_push (tree, heap,
2025 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2026 DR_STMT (dr));
2030 /* Versioning requires at least one misaligned data reference. */
2031 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2032 do_versioning = false;
2033 else if (!do_versioning)
2034 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2037 if (do_versioning)
2039 VEC(tree,heap) *may_misalign_stmts
2040 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2041 tree stmt;
2043 /* It can now be assumed that the data references in the statements
2044 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2045 of the loop being vectorized. */
2046 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
2048 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2049 dr = STMT_VINFO_DATA_REF (stmt_info);
2050 SET_DR_MISALIGNMENT (dr, 0);
2051 if (vect_print_dump_info (REPORT_ALIGNMENT))
2052 fprintf (vect_dump, "Alignment of access forced using versioning.");
2055 if (vect_print_dump_info (REPORT_DETAILS))
2056 fprintf (vect_dump, "Versioning for alignment will be applied.");
2058 /* Peeling and versioning can't be done together at this time. */
2059 gcc_assert (! (do_peeling && do_versioning));
2061 stat = vect_verify_datarefs_alignment (loop_vinfo);
2062 gcc_assert (stat);
2063 return stat;
2066 /* This point is reached if neither peeling nor versioning is being done. */
2067 gcc_assert (! (do_peeling || do_versioning));
2069 stat = vect_verify_datarefs_alignment (loop_vinfo);
2070 return stat;
2074 /* Function vect_analyze_data_refs_alignment
2076 Analyze the alignment of the data-references in the loop.
2077 Return FALSE if a data reference is found that cannot be vectorized. */
2079 static bool
2080 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2082 if (vect_print_dump_info (REPORT_DETAILS))
2083 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2085 if (!vect_compute_data_refs_alignment (loop_vinfo))
2087 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2088 fprintf (vect_dump,
2089 "not vectorized: can't calculate alignment for data ref.");
2090 return false;
2093 return true;
2097 /* Analyze groups of strided accesses: check that DR belongs to a group of
2098 strided accesses of legal size, step, etc. Detect gaps, single element
2099 interleaving, and other special cases. Set strided access info.
2100 Collect groups of strided stores for further use in SLP analysis. */
2102 static bool
2103 vect_analyze_group_access (struct data_reference *dr)
2105 tree step = DR_STEP (dr);
2106 tree scalar_type = TREE_TYPE (DR_REF (dr));
2107 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2108 tree stmt = DR_STMT (dr);
2109 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2110 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2111 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2112 HOST_WIDE_INT stride;
2113 bool slp_impossible = false;
2115 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2116 interleaving group (including gaps). */
2117 stride = dr_step / type_size;
2119 /* Not consecutive access is possible only if it is a part of interleaving. */
2120 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2122 /* Check if it this DR is a part of interleaving, and is a single
2123 element of the group that is accessed in the loop. */
2125 /* Gaps are supported only for loads. STEP must be a multiple of the type
2126 size. The size of the group must be a power of 2. */
2127 if (DR_IS_READ (dr)
2128 && (dr_step % type_size) == 0
2129 && stride > 0
2130 && exact_log2 (stride) != -1)
2132 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2133 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2134 if (vect_print_dump_info (REPORT_DR_DETAILS))
2136 fprintf (vect_dump, "Detected single element interleaving %d ",
2137 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2138 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2139 fprintf (vect_dump, " step ");
2140 print_generic_expr (vect_dump, step, TDF_SLIM);
2142 return true;
2144 if (vect_print_dump_info (REPORT_DETAILS))
2145 fprintf (vect_dump, "not consecutive access");
2146 return false;
2149 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2151 /* First stmt in the interleaving chain. Check the chain. */
2152 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2153 struct data_reference *data_ref = dr;
2154 unsigned int count = 1;
2155 tree next_step;
2156 tree prev_init = DR_INIT (data_ref);
2157 tree prev = stmt;
2158 HOST_WIDE_INT diff, count_in_bytes;
2160 while (next)
2162 /* Skip same data-refs. In case that two or more stmts share data-ref
2163 (supported only for loads), we vectorize only the first stmt, and
2164 the rest get their vectorized loads from the first one. */
2165 if (!tree_int_cst_compare (DR_INIT (data_ref),
2166 DR_INIT (STMT_VINFO_DATA_REF (
2167 vinfo_for_stmt (next)))))
2169 if (!DR_IS_READ (data_ref))
2171 if (vect_print_dump_info (REPORT_DETAILS))
2172 fprintf (vect_dump, "Two store stmts share the same dr.");
2173 return false;
2176 /* Check that there is no load-store dependencies for this loads
2177 to prevent a case of load-store-load to the same location. */
2178 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2179 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2181 if (vect_print_dump_info (REPORT_DETAILS))
2182 fprintf (vect_dump,
2183 "READ_WRITE dependence in interleaving.");
2184 return false;
2187 /* For load use the same data-ref load. */
2188 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2190 prev = next;
2191 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2192 continue;
2194 prev = next;
2196 /* Check that all the accesses have the same STEP. */
2197 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2198 if (tree_int_cst_compare (step, next_step))
2200 if (vect_print_dump_info (REPORT_DETAILS))
2201 fprintf (vect_dump, "not consecutive access in interleaving");
2202 return false;
2205 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2206 /* Check that the distance between two accesses is equal to the type
2207 size. Otherwise, we have gaps. */
2208 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2209 - TREE_INT_CST_LOW (prev_init)) / type_size;
2210 if (diff != 1)
2212 /* FORNOW: SLP of accesses with gaps is not supported. */
2213 slp_impossible = true;
2214 if (!DR_IS_READ (data_ref))
2216 if (vect_print_dump_info (REPORT_DETAILS))
2217 fprintf (vect_dump, "interleaved store with gaps");
2218 return false;
2222 /* Store the gap from the previous member of the group. If there is no
2223 gap in the access, DR_GROUP_GAP is always 1. */
2224 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2226 prev_init = DR_INIT (data_ref);
2227 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2228 /* Count the number of data-refs in the chain. */
2229 count++;
2232 /* COUNT is the number of accesses found, we multiply it by the size of
2233 the type to get COUNT_IN_BYTES. */
2234 count_in_bytes = type_size * count;
2236 /* Check that the size of the interleaving is not greater than STEP. */
2237 if (dr_step < count_in_bytes)
2239 if (vect_print_dump_info (REPORT_DETAILS))
2241 fprintf (vect_dump, "interleaving size is greater than step for ");
2242 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2244 return false;
2247 /* Check that the size of the interleaving is equal to STEP for stores,
2248 i.e., that there are no gaps. */
2249 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
2251 if (vect_print_dump_info (REPORT_DETAILS))
2252 fprintf (vect_dump, "interleaved store with gaps");
2253 return false;
2256 /* Check that STEP is a multiple of type size. */
2257 if ((dr_step % type_size) != 0)
2259 if (vect_print_dump_info (REPORT_DETAILS))
2261 fprintf (vect_dump, "step is not a multiple of type size: step ");
2262 print_generic_expr (vect_dump, step, TDF_SLIM);
2263 fprintf (vect_dump, " size ");
2264 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2265 TDF_SLIM);
2267 return false;
2270 /* FORNOW: we handle only interleaving that is a power of 2.
2271 We don't fail here if it may be still possible to vectorize the
2272 group using SLP. If not, the size of the group will be checked in
2273 vect_analyze_operations, and the vectorization will fail. */
2274 if (exact_log2 (stride) == -1)
2276 if (vect_print_dump_info (REPORT_DETAILS))
2277 fprintf (vect_dump, "interleaving is not a power of 2");
2279 if (slp_impossible)
2280 return false;
2282 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2283 if (vect_print_dump_info (REPORT_DETAILS))
2284 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2286 /* SLP: create an SLP data structure for every interleaving group of
2287 stores for further analysis in vect_analyse_slp. */
2288 if (!DR_IS_READ (dr) && !slp_impossible)
2289 VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2292 return true;
2296 /* Analyze the access pattern of the data-reference DR.
2297 In case of non-consecutive accesses call vect_analyze_group_access() to
2298 analyze groups of strided accesses. */
2300 static bool
2301 vect_analyze_data_ref_access (struct data_reference *dr)
2303 tree step = DR_STEP (dr);
2304 tree scalar_type = TREE_TYPE (DR_REF (dr));
2305 tree stmt = DR_STMT (dr);
2306 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2307 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2308 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2309 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2311 if (!step)
2313 if (vect_print_dump_info (REPORT_DETAILS))
2314 fprintf (vect_dump, "bad data-ref access");
2315 return false;
2318 /* Don't allow invariant accesses. */
2319 if (dr_step == 0)
2320 return false;
2322 if (nested_in_vect_loop_p (loop, stmt))
2324 /* Interleaved accesses are not yet supported within outer-loop
2325 vectorization for references in the inner-loop. */
2326 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2328 /* For the rest of the analysis we use the outer-loop step. */
2329 step = STMT_VINFO_DR_STEP (stmt_info);
2330 dr_step = TREE_INT_CST_LOW (step);
2332 if (dr_step == 0)
2334 if (vect_print_dump_info (REPORT_ALIGNMENT))
2335 fprintf (vect_dump, "zero step in outer loop.");
2336 if (DR_IS_READ (dr))
2337 return true;
2338 else
2339 return false;
2343 /* Consecutive? */
2344 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2346 /* Mark that it is not interleaving. */
2347 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2348 return true;
2351 if (nested_in_vect_loop_p (loop, stmt))
2353 if (vect_print_dump_info (REPORT_ALIGNMENT))
2354 fprintf (vect_dump, "strided access in outer loop.");
2355 return false;
2358 /* Not consecutive access - check if it's a part of interleaving group. */
2359 return vect_analyze_group_access (dr);
2363 /* Function vect_analyze_data_ref_accesses.
2365 Analyze the access pattern of all the data references in the loop.
2367 FORNOW: the only access pattern that is considered vectorizable is a
2368 simple step 1 (consecutive) access.
2370 FORNOW: handle only arrays and pointer accesses. */
2372 static bool
2373 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2375 unsigned int i;
2376 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2377 struct data_reference *dr;
2379 if (vect_print_dump_info (REPORT_DETAILS))
2380 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2382 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2383 if (!vect_analyze_data_ref_access (dr))
2385 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2386 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2387 return false;
2390 return true;
2393 /* Function vect_prune_runtime_alias_test_list.
2395 Prune a list of ddrs to be tested at run-time by versioning for alias.
2396 Return FALSE if resulting list of ddrs is longer then allowed by
2397 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2399 static bool
2400 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2402 VEC (ddr_p, heap) * ddrs =
2403 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2404 unsigned i, j;
2406 if (vect_print_dump_info (REPORT_DETAILS))
2407 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2409 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2411 bool found;
2412 ddr_p ddr_i;
2414 ddr_i = VEC_index (ddr_p, ddrs, i);
2415 found = false;
2417 for (j = 0; j < i; j++)
2419 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2421 if (vect_vfa_range_equal (ddr_i, ddr_j))
2423 if (vect_print_dump_info (REPORT_DR_DETAILS))
2425 fprintf (vect_dump, "found equal ranges ");
2426 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2427 fprintf (vect_dump, ", ");
2428 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2429 fprintf (vect_dump, " and ");
2430 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2431 fprintf (vect_dump, ", ");
2432 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2434 found = true;
2435 break;
2439 if (found)
2441 VEC_ordered_remove (ddr_p, ddrs, i);
2442 continue;
2444 i++;
2447 if (VEC_length (ddr_p, ddrs) >
2448 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2450 if (vect_print_dump_info (REPORT_DR_DETAILS))
2452 fprintf (vect_dump,
2453 "disable versioning for alias - max number of generated "
2454 "checks exceeded.");
2457 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2459 return false;
2462 return true;
2465 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2467 void
2468 vect_free_slp_tree (slp_tree node)
2470 if (!node)
2471 return;
2473 if (SLP_TREE_LEFT (node))
2474 vect_free_slp_tree (SLP_TREE_LEFT (node));
2476 if (SLP_TREE_RIGHT (node))
2477 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2479 VEC_free (tree, heap, SLP_TREE_SCALAR_STMTS (node));
2481 if (SLP_TREE_VEC_STMTS (node))
2482 VEC_free (tree, heap, SLP_TREE_VEC_STMTS (node));
2484 free (node);
2488 /* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are
2489 of a legal type and that they match the defs of the first stmt of the SLP
2490 group (stored in FIRST_STMT_...). */
2492 static bool
2493 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2494 tree rhs, VEC (tree, heap) **def_stmts0,
2495 VEC (tree, heap) **def_stmts1,
2496 enum vect_def_type *first_stmt_dt0,
2497 enum vect_def_type *first_stmt_dt1,
2498 tree *first_stmt_def0_type,
2499 tree *first_stmt_def1_type,
2500 tree *first_stmt_const_oprnd,
2501 int ncopies_for_cost)
2503 tree oprnd;
2504 enum operation_type op_type = TREE_OPERAND_LENGTH (rhs);
2505 unsigned int i, number_of_oprnds = op_type;
2506 tree def, def_stmt;
2507 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2508 stmt_vec_info stmt_info =
2509 vinfo_for_stmt (VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2511 /* Store. */
2512 if (!op_type)
2513 number_of_oprnds = 1;
2514 else
2515 gcc_assert (op_type == unary_op || op_type == binary_op);
2517 for (i = 0; i < number_of_oprnds; i++)
2519 if (op_type)
2520 oprnd = TREE_OPERAND (rhs, i);
2521 else
2522 oprnd = rhs;
2524 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2525 || (!def_stmt && dt[i] != vect_constant_def))
2527 if (vect_print_dump_info (REPORT_SLP))
2529 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2530 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2533 return false;
2536 if (!*first_stmt_dt0)
2538 /* op0 of the first stmt of the group - store its info. */
2539 *first_stmt_dt0 = dt[i];
2540 if (def)
2541 *first_stmt_def0_type = TREE_TYPE (def);
2542 else
2543 *first_stmt_const_oprnd = oprnd;
2545 /* Analyze costs (for the first stmt of the group only). */
2546 if (op_type)
2547 /* Not memory operation (we don't call this functions for loads). */
2548 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2549 else
2550 /* Store. */
2551 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2554 else
2556 if (!*first_stmt_dt1 && i == 1)
2558 /* op1 of the first stmt of the group - store its info. */
2559 *first_stmt_dt1 = dt[i];
2560 if (def)
2561 *first_stmt_def1_type = TREE_TYPE (def);
2562 else
2564 /* We assume that the stmt contains only one constant
2565 operand. We fail otherwise, to be on the safe side. */
2566 if (*first_stmt_const_oprnd)
2568 if (vect_print_dump_info (REPORT_SLP))
2569 fprintf (vect_dump, "Build SLP failed: two constant "
2570 "oprnds in stmt");
2571 return false;
2573 *first_stmt_const_oprnd = oprnd;
2576 else
2578 /* Not first stmt of the group, check that the def-stmt/s match
2579 the def-stmt/s of the first stmt. */
2580 if ((i == 0
2581 && (*first_stmt_dt0 != dt[i]
2582 || (*first_stmt_def0_type && def
2583 && *first_stmt_def0_type != TREE_TYPE (def))))
2584 || (i == 1
2585 && (*first_stmt_dt1 != dt[i]
2586 || (*first_stmt_def1_type && def
2587 && *first_stmt_def1_type != TREE_TYPE (def))))
2588 || (!def
2589 && TREE_TYPE (*first_stmt_const_oprnd)
2590 != TREE_TYPE (oprnd)))
2592 if (vect_print_dump_info (REPORT_SLP))
2593 fprintf (vect_dump, "Build SLP failed: different types ");
2595 return false;
2600 /* Check the types of the definitions. */
2601 switch (dt[i])
2603 case vect_constant_def:
2604 case vect_invariant_def:
2605 break;
2607 case vect_loop_def:
2608 if (i == 0)
2609 VEC_safe_push (tree, heap, *def_stmts0, def_stmt);
2610 else
2611 VEC_safe_push (tree, heap, *def_stmts1, def_stmt);
2612 break;
2614 default:
2615 /* FORNOW: Not supported. */
2616 if (vect_print_dump_info (REPORT_SLP))
2618 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2619 print_generic_expr (vect_dump, def, TDF_SLIM);
2622 return false;
2626 return true;
2630 /* Recursively build an SLP tree starting from NODE.
2631 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2632 permutation or are of unsupported types of operation. Otherwise, return
2633 TRUE.
2634 SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2635 in the case of multiple types for now. */
2637 static bool
2638 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2639 unsigned int group_size, bool *slp_impossible,
2640 int *inside_cost, int *outside_cost,
2641 int ncopies_for_cost)
2643 VEC (tree, heap) *def_stmts0 = VEC_alloc (tree, heap, group_size);
2644 VEC (tree, heap) *def_stmts1 = VEC_alloc (tree, heap, group_size);
2645 unsigned int i;
2646 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2647 tree stmt = VEC_index (tree, stmts, 0);
2648 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2649 enum tree_code first_stmt_code = 0;
2650 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2651 tree lhs, rhs, prev_stmt = NULL_TREE;
2652 bool stop_recursion = false, need_same_oprnds = false;
2653 tree vectype, scalar_type, first_op1 = NULL_TREE;
2654 unsigned int vectorization_factor = 0, ncopies;
2655 optab optab;
2656 int icode;
2657 enum machine_mode optab_op2_mode;
2658 enum machine_mode vec_mode;
2659 tree first_stmt_const_oprnd = NULL_TREE;
2660 struct data_reference *first_dr;
2662 /* For every stmt in NODE find its def stmt/s. */
2663 for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
2665 if (vect_print_dump_info (REPORT_SLP))
2667 fprintf (vect_dump, "Build SLP for ");
2668 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2671 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2673 if (vect_print_dump_info (REPORT_SLP))
2675 fprintf (vect_dump, "Build SLP failed: not MODIFY_STMT ");
2676 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2679 return false;
2682 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2683 vectype = get_vectype_for_scalar_type (scalar_type);
2684 if (!vectype)
2686 if (vect_print_dump_info (REPORT_SLP))
2688 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
2689 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2691 return false;
2694 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2695 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2696 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2697 if (ncopies > 1)
2699 /* FORNOW. */
2700 if (vect_print_dump_info (REPORT_SLP))
2701 fprintf (vect_dump, "SLP failed - multiple types ");
2703 *slp_impossible = true;
2704 return false;
2707 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
2708 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2710 /* Check the operation. */
2711 if (i == 0)
2713 first_stmt_code = TREE_CODE (rhs);
2715 /* Shift arguments should be equal in all the packed stmts for a
2716 vector shift with scalar shift operand. */
2717 if (TREE_CODE (rhs) == LSHIFT_EXPR || TREE_CODE (rhs) == RSHIFT_EXPR)
2719 vec_mode = TYPE_MODE (vectype);
2720 optab = optab_for_tree_code (TREE_CODE (rhs), vectype);
2721 if (!optab)
2723 if (vect_print_dump_info (REPORT_SLP))
2724 fprintf (vect_dump, "Build SLP failed: no optab.");
2725 return false;
2727 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2728 if (icode == CODE_FOR_nothing)
2730 if (vect_print_dump_info (REPORT_SLP))
2731 fprintf (vect_dump,
2732 "Build SLP failed: op not supported by target.");
2733 return false;
2735 optab_op2_mode = insn_data[icode].operand[2].mode;
2736 if (!VECTOR_MODE_P (optab_op2_mode))
2738 need_same_oprnds = true;
2739 first_op1 = TREE_OPERAND (rhs, 1);
2743 else
2745 if (first_stmt_code != TREE_CODE (rhs))
2747 if (vect_print_dump_info (REPORT_SLP))
2749 fprintf (vect_dump,
2750 "Build SLP failed: different operation in stmt ");
2751 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2754 return false;
2757 if (need_same_oprnds
2758 && !operand_equal_p (first_op1, TREE_OPERAND (rhs, 1), 0))
2760 if (vect_print_dump_info (REPORT_SLP))
2762 fprintf (vect_dump,
2763 "Build SLP failed: different shift arguments in ");
2764 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2767 return false;
2771 /* Strided store or load. */
2772 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2774 if (REFERENCE_CLASS_P (lhs))
2776 /* Store. */
2777 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs,
2778 &def_stmts0, &def_stmts1,
2779 &first_stmt_dt0,
2780 &first_stmt_dt1,
2781 &first_stmt_def0_type,
2782 &first_stmt_def1_type,
2783 &first_stmt_const_oprnd,
2784 ncopies_for_cost))
2785 return false;
2787 else
2789 /* Load. */
2790 if (i == 0)
2792 /* First stmt of the SLP group should be the first load of
2793 the interleaving loop if data permutation is not
2794 allowed. */
2795 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt)
2797 /* FORNOW: data permutations are not supported. */
2798 if (vect_print_dump_info (REPORT_SLP))
2800 fprintf (vect_dump, "Build SLP failed: strided "
2801 " loads need permutation ");
2802 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2805 return false;
2808 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2809 if (vect_supportable_dr_alignment (first_dr)
2810 == dr_unaligned_unsupported)
2812 if (vect_print_dump_info (REPORT_SLP))
2814 fprintf (vect_dump, "Build SLP failed: unsupported "
2815 " unaligned load ");
2816 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2819 return false;
2822 /* Analyze costs (for the first stmt in the group). */
2823 vect_model_load_cost (vinfo_for_stmt (stmt),
2824 ncopies_for_cost, *node);
2826 else
2828 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt)
2830 /* FORNOW: data permutations are not supported. */
2831 if (vect_print_dump_info (REPORT_SLP))
2833 fprintf (vect_dump, "Build SLP failed: strided "
2834 " loads need permutation ");
2835 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2837 return false;
2841 prev_stmt = stmt;
2843 /* We stop the tree when we reach a group of loads. */
2844 stop_recursion = true;
2845 continue;
2847 } /* Strided access. */
2848 else
2850 if (REFERENCE_CLASS_P (rhs))
2852 /* Not strided load. */
2853 if (vect_print_dump_info (REPORT_SLP))
2855 fprintf (vect_dump, "Build SLP failed: not strided load ");
2856 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2859 /* FORNOW: Not strided loads are not supported. */
2860 return false;
2863 /* Not memory operation. */
2864 if (!BINARY_CLASS_P (rhs) && !UNARY_CLASS_P (rhs))
2866 if (vect_print_dump_info (REPORT_SLP))
2868 fprintf (vect_dump, "Build SLP failed: operation");
2869 fprintf (vect_dump, " unsupported ");
2870 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2873 return false;
2876 /* Find the def-stmts. */
2877 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, &def_stmts0,
2878 &def_stmts1, &first_stmt_dt0,
2879 &first_stmt_dt1,
2880 &first_stmt_def0_type,
2881 &first_stmt_def1_type,
2882 &first_stmt_const_oprnd,
2883 ncopies_for_cost))
2884 return false;
2888 /* Add the costs of the node to the overall instance costs. */
2889 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
2890 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
2892 /* Strided loads were reached - stop the recursion. */
2893 if (stop_recursion)
2894 return true;
2896 /* Create SLP_TREE nodes for the definition node/s. */
2897 if (first_stmt_dt0 == vect_loop_def)
2899 slp_tree left_node = XNEW (struct _slp_tree);
2900 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
2901 SLP_TREE_VEC_STMTS (left_node) = NULL;
2902 SLP_TREE_LEFT (left_node) = NULL;
2903 SLP_TREE_RIGHT (left_node) = NULL;
2904 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
2905 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
2906 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
2907 slp_impossible, inside_cost, outside_cost,
2908 ncopies_for_cost))
2909 return false;
2911 SLP_TREE_LEFT (*node) = left_node;
2914 if (first_stmt_dt1 == vect_loop_def)
2916 slp_tree right_node = XNEW (struct _slp_tree);
2917 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
2918 SLP_TREE_VEC_STMTS (right_node) = NULL;
2919 SLP_TREE_LEFT (right_node) = NULL;
2920 SLP_TREE_RIGHT (right_node) = NULL;
2921 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
2922 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
2923 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
2924 slp_impossible, inside_cost, outside_cost,
2925 ncopies_for_cost))
2926 return false;
2928 SLP_TREE_RIGHT (*node) = right_node;
2931 return true;
2935 static void
2936 vect_print_slp_tree (slp_tree node)
2938 int i;
2939 tree stmt;
2941 if (!node)
2942 return;
2944 fprintf (vect_dump, "node ");
2945 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2947 fprintf (vect_dump, "\n\tstmt %d ", i);
2948 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2950 fprintf (vect_dump, "\n");
2952 vect_print_slp_tree (SLP_TREE_LEFT (node));
2953 vect_print_slp_tree (SLP_TREE_RIGHT (node));
2957 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2958 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2959 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2960 stmts in NODE are to be marked. */
2962 static void
2963 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
2965 int i;
2966 tree stmt;
2968 if (!node)
2969 return;
2971 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2972 if (j < 0 || i == j)
2973 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
2975 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
2976 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
2980 /* Analyze an SLP instance starting from a group of strided stores. Call
2981 vect_build_slp_tree to build a tree of packed stmts if possible.
2982 Return FALSE if it's impossible to SLP any stmt in the loop. */
2984 static bool
2985 vect_analyze_slp_instance (loop_vec_info loop_vinfo, tree stmt)
2987 slp_instance new_instance;
2988 slp_tree node = XNEW (struct _slp_tree);
2989 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
2990 unsigned int unrolling_factor = 1, nunits;
2991 tree vectype, scalar_type, next;
2992 unsigned int vectorization_factor = 0, ncopies;
2993 bool slp_impossible = false;
2994 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
2996 /* FORNOW: multiple types are not supported. */
2997 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
2998 vectype = get_vectype_for_scalar_type (scalar_type);
2999 if (!vectype)
3001 if (vect_print_dump_info (REPORT_SLP))
3003 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
3004 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3006 return false;
3009 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3010 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3011 ncopies = vectorization_factor / nunits;
3012 if (ncopies > 1)
3014 if (vect_print_dump_info (REPORT_SLP))
3015 fprintf (vect_dump, "SLP failed - multiple types ");
3017 return false;
3020 /* Create a node (a root of the SLP tree) for the packed strided stores. */
3021 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (tree, heap, group_size);
3022 next = stmt;
3023 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
3024 while (next)
3026 VEC_safe_push (tree, heap, SLP_TREE_SCALAR_STMTS (node), next);
3027 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
3030 SLP_TREE_VEC_STMTS (node) = NULL;
3031 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
3032 SLP_TREE_LEFT (node) = NULL;
3033 SLP_TREE_RIGHT (node) = NULL;
3034 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
3035 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
3037 /* Calculate the unrolling factor. */
3038 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
3040 /* Calculate the number of vector stmts to create based on the unrolling
3041 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
3042 GROUP_SIZE / NUNITS otherwise. */
3043 ncopies_for_cost = unrolling_factor * group_size / nunits;
3045 /* Build the tree for the SLP instance. */
3046 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
3047 &inside_cost, &outside_cost, ncopies_for_cost))
3049 /* Create a new SLP instance. */
3050 new_instance = XNEW (struct _slp_instance);
3051 SLP_INSTANCE_TREE (new_instance) = node;
3052 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
3053 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
3054 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
3055 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
3056 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
3057 new_instance);
3058 if (vect_print_dump_info (REPORT_SLP))
3059 vect_print_slp_tree (node);
3061 return true;
3064 /* Failed to SLP. */
3065 /* Free the allocated memory. */
3066 vect_free_slp_tree (node);
3068 if (slp_impossible)
3069 return false;
3071 /* SLP failed for this instance, but it is still possible to SLP other stmts
3072 in the loop. */
3073 return true;
3077 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3078 trees of packed scalar stmts if SLP is possible. */
3080 static bool
3081 vect_analyze_slp (loop_vec_info loop_vinfo)
3083 unsigned int i;
3084 VEC (tree, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
3085 tree store;
3087 if (vect_print_dump_info (REPORT_SLP))
3088 fprintf (vect_dump, "=== vect_analyze_slp ===");
3090 for (i = 0; VEC_iterate (tree, strided_stores, i, store); i++)
3091 if (!vect_analyze_slp_instance (loop_vinfo, store))
3093 /* SLP failed. No instance can be SLPed in the loop. */
3094 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3095 fprintf (vect_dump, "SLP failed.");
3097 return false;
3100 return true;
3104 /* For each possible SLP instance decide whether to SLP it and calculate overall
3105 unrolling factor needed to SLP the loop. */
3107 static void
3108 vect_make_slp_decision (loop_vec_info loop_vinfo)
3110 unsigned int i, unrolling_factor = 1;
3111 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3112 slp_instance instance;
3113 int decided_to_slp = 0;
3115 if (vect_print_dump_info (REPORT_SLP))
3116 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3118 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3120 /* FORNOW: SLP if you can. */
3121 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3122 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3124 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3125 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3126 loop-based vectorization. Such stmts will be marked as HYBRID. */
3127 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3128 decided_to_slp++;
3131 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3133 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3134 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3135 decided_to_slp, unrolling_factor);
3139 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3140 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3142 static void
3143 vect_detect_hybrid_slp_stmts (slp_tree node)
3145 int i;
3146 tree stmt;
3147 imm_use_iterator imm_iter;
3148 tree use_stmt;
3150 if (!node)
3151 return;
3153 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3154 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3155 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
3156 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
3157 if (vinfo_for_stmt (use_stmt)
3158 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
3159 vect_mark_slp_stmts (node, hybrid, i);
3161 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3162 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3166 /* Find stmts that must be both vectorized and SLPed. */
3168 static void
3169 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3171 unsigned int i;
3172 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3173 slp_instance instance;
3175 if (vect_print_dump_info (REPORT_SLP))
3176 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3178 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3179 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3183 /* Function vect_analyze_data_refs.
3185 Find all the data references in the loop.
3187 The general structure of the analysis of data refs in the vectorizer is as
3188 follows:
3189 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3190 find and analyze all data-refs in the loop and their dependences.
3191 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3192 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3193 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3197 static bool
3198 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3200 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3201 unsigned int i;
3202 VEC (data_reference_p, heap) *datarefs;
3203 struct data_reference *dr;
3204 tree scalar_type;
3206 if (vect_print_dump_info (REPORT_DETAILS))
3207 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3209 compute_data_dependences_for_loop (loop, true,
3210 &LOOP_VINFO_DATAREFS (loop_vinfo),
3211 &LOOP_VINFO_DDRS (loop_vinfo));
3213 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3214 from stmt_vec_info struct to DR and vectype. */
3215 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3217 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3219 tree stmt;
3220 stmt_vec_info stmt_info;
3221 basic_block bb;
3222 tree base, offset, init;
3224 if (!dr || !DR_REF (dr))
3226 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3227 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3228 return false;
3231 stmt = DR_STMT (dr);
3232 stmt_info = vinfo_for_stmt (stmt);
3234 /* Check that analysis of the data-ref succeeded. */
3235 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3236 || !DR_STEP (dr))
3238 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3240 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3241 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3243 return false;
3246 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3248 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3249 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3250 "constant");
3251 return false;
3254 if (!DR_SYMBOL_TAG (dr))
3256 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3258 fprintf (vect_dump, "not vectorized: no memory tag for ");
3259 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3261 return false;
3264 base = unshare_expr (DR_BASE_ADDRESS (dr));
3265 offset = unshare_expr (DR_OFFSET (dr));
3266 init = unshare_expr (DR_INIT (dr));
3268 /* Update DR field in stmt_vec_info struct. */
3269 bb = bb_for_stmt (stmt);
3271 /* If the dataref is in an inner-loop of the loop that is considered for
3272 for vectorization, we also want to analyze the access relative to
3273 the outer-loop (DR contains information only relative to the
3274 inner-most enclosing loop). We do that by building a reference to the
3275 first location accessed by the inner-loop, and analyze it relative to
3276 the outer-loop. */
3277 if (nested_in_vect_loop_p (loop, stmt))
3279 tree outer_step, outer_base, outer_init;
3280 HOST_WIDE_INT pbitsize, pbitpos;
3281 tree poffset;
3282 enum machine_mode pmode;
3283 int punsignedp, pvolatilep;
3284 affine_iv base_iv, offset_iv;
3285 tree dinit;
3287 /* Build a reference to the first location accessed by the
3288 inner-loop: *(BASE+INIT). (The first location is actually
3289 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3290 tree inner_base = build_fold_indirect_ref
3291 (fold_build2 (POINTER_PLUS_EXPR,
3292 TREE_TYPE (base), base,
3293 fold_convert (sizetype, init)));
3295 if (vect_print_dump_info (REPORT_DETAILS))
3297 fprintf (dump_file, "analyze in outer-loop: ");
3298 print_generic_expr (dump_file, inner_base, TDF_SLIM);
3301 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3302 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3303 gcc_assert (outer_base != NULL_TREE);
3305 if (pbitpos % BITS_PER_UNIT != 0)
3307 if (vect_print_dump_info (REPORT_DETAILS))
3308 fprintf (dump_file, "failed: bit offset alignment.\n");
3309 return false;
3312 outer_base = build_fold_addr_expr (outer_base);
3313 if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
3315 if (vect_print_dump_info (REPORT_DETAILS))
3316 fprintf (dump_file, "failed: evolution of base is not affine.\n");
3317 return false;
3320 if (offset)
3322 if (poffset)
3323 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3324 else
3325 poffset = offset;
3328 if (!poffset)
3330 offset_iv.base = ssize_int (0);
3331 offset_iv.step = ssize_int (0);
3333 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
3335 if (vect_print_dump_info (REPORT_DETAILS))
3336 fprintf (dump_file, "evolution of offset is not affine.\n");
3337 return false;
3340 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3341 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3342 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3343 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3344 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3346 outer_step = size_binop (PLUS_EXPR,
3347 fold_convert (ssizetype, base_iv.step),
3348 fold_convert (ssizetype, offset_iv.step));
3350 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3351 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3352 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3353 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3354 STMT_VINFO_DR_OFFSET (stmt_info) =
3355 fold_convert (ssizetype, offset_iv.base);
3356 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3357 size_int (highest_pow2_factor (offset_iv.base));
3359 if (dump_file && (dump_flags & TDF_DETAILS))
3361 fprintf (dump_file, "\touter base_address: ");
3362 print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3363 fprintf (dump_file, "\n\touter offset from base address: ");
3364 print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3365 fprintf (dump_file, "\n\touter constant offset from base address: ");
3366 print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3367 fprintf (dump_file, "\n\touter step: ");
3368 print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3369 fprintf (dump_file, "\n\touter aligned to: ");
3370 print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3374 if (STMT_VINFO_DATA_REF (stmt_info))
3376 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3378 fprintf (vect_dump,
3379 "not vectorized: more than one data ref in stmt: ");
3380 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3382 return false;
3384 STMT_VINFO_DATA_REF (stmt_info) = dr;
3386 /* Set vectype for STMT. */
3387 scalar_type = TREE_TYPE (DR_REF (dr));
3388 STMT_VINFO_VECTYPE (stmt_info) =
3389 get_vectype_for_scalar_type (scalar_type);
3390 if (!STMT_VINFO_VECTYPE (stmt_info))
3392 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3394 fprintf (vect_dump,
3395 "not vectorized: no vectype for stmt: ");
3396 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3397 fprintf (vect_dump, " scalar_type: ");
3398 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3400 return false;
3404 return true;
3408 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3410 /* Function vect_mark_relevant.
3412 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3414 static void
3415 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
3416 enum vect_relevant relevant, bool live_p)
3418 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3419 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3420 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3422 if (vect_print_dump_info (REPORT_DETAILS))
3423 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3425 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3427 tree pattern_stmt;
3429 /* This is the last stmt in a sequence that was detected as a
3430 pattern that can potentially be vectorized. Don't mark the stmt
3431 as relevant/live because it's not going to be vectorized.
3432 Instead mark the pattern-stmt that replaces it. */
3434 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3436 if (vect_print_dump_info (REPORT_DETAILS))
3437 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3438 stmt_info = vinfo_for_stmt (pattern_stmt);
3439 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3440 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3441 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3442 stmt = pattern_stmt;
3445 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3446 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3447 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3449 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3450 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3452 if (vect_print_dump_info (REPORT_DETAILS))
3453 fprintf (vect_dump, "already marked relevant/live.");
3454 return;
3457 VEC_safe_push (tree, heap, *worklist, stmt);
3461 /* Function vect_stmt_relevant_p.
3463 Return true if STMT in loop that is represented by LOOP_VINFO is
3464 "relevant for vectorization".
3466 A stmt is considered "relevant for vectorization" if:
3467 - it has uses outside the loop.
3468 - it has vdefs (it alters memory).
3469 - control stmts in the loop (except for the exit condition).
3471 CHECKME: what other side effects would the vectorizer allow? */
3473 static bool
3474 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
3475 enum vect_relevant *relevant, bool *live_p)
3477 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3478 ssa_op_iter op_iter;
3479 imm_use_iterator imm_iter;
3480 use_operand_p use_p;
3481 def_operand_p def_p;
3483 *relevant = vect_unused_in_loop;
3484 *live_p = false;
3486 /* cond stmt other than loop exit cond. */
3487 if (is_ctrl_stmt (stmt)
3488 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3489 *relevant = vect_used_in_loop;
3491 /* changing memory. */
3492 if (TREE_CODE (stmt) != PHI_NODE)
3493 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3495 if (vect_print_dump_info (REPORT_DETAILS))
3496 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3497 *relevant = vect_used_in_loop;
3500 /* uses outside the loop. */
3501 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3503 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3505 basic_block bb = bb_for_stmt (USE_STMT (use_p));
3506 if (!flow_bb_inside_loop_p (loop, bb))
3508 if (vect_print_dump_info (REPORT_DETAILS))
3509 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3511 /* We expect all such uses to be in the loop exit phis
3512 (because of loop closed form) */
3513 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
3514 gcc_assert (bb == single_exit (loop)->dest);
3516 *live_p = true;
3521 return (*live_p || *relevant);
3526 Function process_use.
3528 Inputs:
3529 - a USE in STMT in a loop represented by LOOP_VINFO
3530 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3531 that defined USE. This is dont by calling mark_relevant and passing it
3532 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
3534 Outputs:
3535 Generally, LIVE_P and RELEVANT are used to define the liveness and
3536 relevance info of the DEF_STMT of this USE:
3537 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3538 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3539 Exceptions:
3540 - case 1: If USE is used only for address computations (e.g. array indexing),
3541 which does not need to be directly vectorized, then the liveness/relevance
3542 of the respective DEF_STMT is left unchanged.
3543 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3544 skip DEF_STMT cause it had already been processed.
3545 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3546 be modified accordingly.
3548 Return true if everything is as expected. Return false otherwise. */
3550 static bool
3551 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3552 enum vect_relevant relevant, VEC(tree,heap) **worklist)
3554 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3555 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3556 stmt_vec_info dstmt_vinfo;
3557 basic_block bb, def_bb;
3558 tree def, def_stmt;
3559 enum vect_def_type dt;
3561 /* case 1: we are only interested in uses that need to be vectorized. Uses
3562 that are used for address computation are not considered relevant. */
3563 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3564 return true;
3566 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3568 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3569 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3570 return false;
3573 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
3574 return true;
3576 def_bb = bb_for_stmt (def_stmt);
3577 if (!flow_bb_inside_loop_p (loop, def_bb))
3579 if (vect_print_dump_info (REPORT_DETAILS))
3580 fprintf (vect_dump, "def_stmt is out of loop.");
3581 return true;
3584 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3585 DEF_STMT must have already been processed, because this should be the
3586 only way that STMT, which is a reduction-phi, was put in the worklist,
3587 as there should be no other uses for DEF_STMT in the loop. So we just
3588 check that everything is as expected, and we are done. */
3589 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3590 bb = bb_for_stmt (stmt);
3591 if (TREE_CODE (stmt) == PHI_NODE
3592 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3593 && TREE_CODE (def_stmt) != PHI_NODE
3594 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3595 && bb->loop_father == def_bb->loop_father)
3597 if (vect_print_dump_info (REPORT_DETAILS))
3598 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3599 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3600 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3601 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3602 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3603 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3604 return true;
3607 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3608 outer-loop-header-bb:
3609 d = def_stmt
3610 inner-loop:
3611 stmt # use (d)
3612 outer-loop-tail-bb:
3613 ... */
3614 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3616 if (vect_print_dump_info (REPORT_DETAILS))
3617 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3618 switch (relevant)
3620 case vect_unused_in_loop:
3621 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3622 vect_used_by_reduction : vect_unused_in_loop;
3623 break;
3624 case vect_used_in_outer_by_reduction:
3625 relevant = vect_used_by_reduction;
3626 break;
3627 case vect_used_in_outer:
3628 relevant = vect_used_in_loop;
3629 break;
3630 case vect_used_by_reduction:
3631 case vect_used_in_loop:
3632 break;
3634 default:
3635 gcc_unreachable ();
3639 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3640 outer-loop-header-bb:
3642 inner-loop:
3643 d = def_stmt
3644 outer-loop-tail-bb:
3645 stmt # use (d) */
3646 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3648 if (vect_print_dump_info (REPORT_DETAILS))
3649 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3650 switch (relevant)
3652 case vect_unused_in_loop:
3653 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3654 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3655 break;
3657 case vect_used_in_outer_by_reduction:
3658 case vect_used_in_outer:
3659 break;
3661 case vect_used_by_reduction:
3662 relevant = vect_used_in_outer_by_reduction;
3663 break;
3665 case vect_used_in_loop:
3666 relevant = vect_used_in_outer;
3667 break;
3669 default:
3670 gcc_unreachable ();
3674 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
3675 return true;
3679 /* Function vect_mark_stmts_to_be_vectorized.
3681 Not all stmts in the loop need to be vectorized. For example:
3683 for i...
3684 for j...
3685 1. T0 = i + j
3686 2. T1 = a[T0]
3688 3. j = j + 1
3690 Stmt 1 and 3 do not need to be vectorized, because loop control and
3691 addressing of vectorized data-refs are handled differently.
3693 This pass detects such stmts. */
3695 static bool
3696 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3698 VEC(tree,heap) *worklist;
3699 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3700 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3701 unsigned int nbbs = loop->num_nodes;
3702 block_stmt_iterator si;
3703 tree stmt;
3704 stmt_ann_t ann;
3705 unsigned int i;
3706 stmt_vec_info stmt_vinfo;
3707 basic_block bb;
3708 tree phi;
3709 bool live_p;
3710 enum vect_relevant relevant;
3712 if (vect_print_dump_info (REPORT_DETAILS))
3713 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
3715 worklist = VEC_alloc (tree, heap, 64);
3717 /* 1. Init worklist. */
3718 for (i = 0; i < nbbs; i++)
3720 bb = bbs[i];
3721 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3723 if (vect_print_dump_info (REPORT_DETAILS))
3725 fprintf (vect_dump, "init: phi relevant? ");
3726 print_generic_expr (vect_dump, phi, TDF_SLIM);
3729 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
3730 vect_mark_relevant (&worklist, phi, relevant, live_p);
3732 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3734 stmt = bsi_stmt (si);
3735 if (vect_print_dump_info (REPORT_DETAILS))
3737 fprintf (vect_dump, "init: stmt relevant? ");
3738 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3741 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
3742 vect_mark_relevant (&worklist, stmt, relevant, live_p);
3746 /* 2. Process_worklist */
3747 while (VEC_length (tree, worklist) > 0)
3749 use_operand_p use_p;
3750 ssa_op_iter iter;
3752 stmt = VEC_pop (tree, worklist);
3753 if (vect_print_dump_info (REPORT_DETAILS))
3755 fprintf (vect_dump, "worklist: examine stmt: ");
3756 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3759 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3760 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3761 liveness and relevance properties of STMT. */
3762 ann = stmt_ann (stmt);
3763 stmt_vinfo = vinfo_for_stmt (stmt);
3764 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
3765 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
3767 /* Generally, the liveness and relevance properties of STMT are
3768 propagated as is to the DEF_STMTs of its USEs:
3769 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3770 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3772 One exception is when STMT has been identified as defining a reduction
3773 variable; in this case we set the liveness/relevance as follows:
3774 live_p = false
3775 relevant = vect_used_by_reduction
3776 This is because we distinguish between two kinds of relevant stmts -
3777 those that are used by a reduction computation, and those that are
3778 (also) used by a regular computation. This allows us later on to
3779 identify stmts that are used solely by a reduction, and therefore the
3780 order of the results that they produce does not have to be kept.
3782 Reduction phis are expected to be used by a reduction stmt, or by
3783 in an outer loop; Other reduction stmts are expected to be
3784 in the loop, and possibly used by a stmt in an outer loop.
3785 Here are the expected values of "relevant" for reduction phis/stmts:
3787 relevance: phi stmt
3788 vect_unused_in_loop ok
3789 vect_used_in_outer_by_reduction ok ok
3790 vect_used_in_outer ok ok
3791 vect_used_by_reduction ok
3792 vect_used_in_loop */
3794 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
3796 enum vect_relevant tmp_relevant = relevant;
3797 switch (tmp_relevant)
3799 case vect_unused_in_loop:
3800 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
3801 relevant = vect_used_by_reduction;
3802 break;
3804 case vect_used_in_outer_by_reduction:
3805 case vect_used_in_outer:
3806 gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR
3807 && TREE_CODE (stmt) != DOT_PROD_EXPR);
3808 break;
3810 case vect_used_by_reduction:
3811 if (TREE_CODE (stmt) == PHI_NODE)
3812 break;
3813 /* fall through */
3814 case vect_used_in_loop:
3815 default:
3816 if (vect_print_dump_info (REPORT_DETAILS))
3817 fprintf (vect_dump, "unsupported use of reduction.");
3818 VEC_free (tree, heap, worklist);
3819 return false;
3821 live_p = false;
3824 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3826 tree op = USE_FROM_PTR (use_p);
3827 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
3829 VEC_free (tree, heap, worklist);
3830 return false;
3833 } /* while worklist */
3835 VEC_free (tree, heap, worklist);
3836 return true;
3840 /* Function vect_can_advance_ivs_p
3842 In case the number of iterations that LOOP iterates is unknown at compile
3843 time, an epilog loop will be generated, and the loop induction variables
3844 (IVs) will be "advanced" to the value they are supposed to take just before
3845 the epilog loop. Here we check that the access function of the loop IVs
3846 and the expression that represents the loop bound are simple enough.
3847 These restrictions will be relaxed in the future. */
3849 static bool
3850 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
3852 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3853 basic_block bb = loop->header;
3854 tree phi;
3856 /* Analyze phi functions of the loop header. */
3858 if (vect_print_dump_info (REPORT_DETAILS))
3859 fprintf (vect_dump, "vect_can_advance_ivs_p:");
3861 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3863 tree access_fn = NULL;
3864 tree evolution_part;
3866 if (vect_print_dump_info (REPORT_DETAILS))
3868 fprintf (vect_dump, "Analyze phi: ");
3869 print_generic_expr (vect_dump, phi, TDF_SLIM);
3872 /* Skip virtual phi's. The data dependences that are associated with
3873 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3875 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3877 if (vect_print_dump_info (REPORT_DETAILS))
3878 fprintf (vect_dump, "virtual phi. skip.");
3879 continue;
3882 /* Skip reduction phis. */
3884 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3886 if (vect_print_dump_info (REPORT_DETAILS))
3887 fprintf (vect_dump, "reduc phi. skip.");
3888 continue;
3891 /* Analyze the evolution function. */
3893 access_fn = instantiate_parameters
3894 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
3896 if (!access_fn)
3898 if (vect_print_dump_info (REPORT_DETAILS))
3899 fprintf (vect_dump, "No Access function.");
3900 return false;
3903 if (vect_print_dump_info (REPORT_DETAILS))
3905 fprintf (vect_dump, "Access function of PHI: ");
3906 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
3909 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
3911 if (evolution_part == NULL_TREE)
3913 if (vect_print_dump_info (REPORT_DETAILS))
3914 fprintf (vect_dump, "No evolution.");
3915 return false;
3918 /* FORNOW: We do not transform initial conditions of IVs
3919 which evolution functions are a polynomial of degree >= 2. */
3921 if (tree_is_chrec (evolution_part))
3922 return false;
3925 return true;
3929 /* Function vect_get_loop_niters.
3931 Determine how many iterations the loop is executed.
3932 If an expression that represents the number of iterations
3933 can be constructed, place it in NUMBER_OF_ITERATIONS.
3934 Return the loop exit condition. */
3936 static tree
3937 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
3939 tree niters;
3941 if (vect_print_dump_info (REPORT_DETAILS))
3942 fprintf (vect_dump, "=== get_loop_niters ===");
3944 niters = number_of_exit_cond_executions (loop);
3946 if (niters != NULL_TREE
3947 && niters != chrec_dont_know)
3949 *number_of_iterations = niters;
3951 if (vect_print_dump_info (REPORT_DETAILS))
3953 fprintf (vect_dump, "==> get_loop_niters:" );
3954 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
3958 return get_loop_exit_condition (loop);
3962 /* Function vect_analyze_loop_1.
3964 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3965 for it. The different analyses will record information in the
3966 loop_vec_info struct. This is a subset of the analyses applied in
3967 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3968 that is now considered for (outer-loop) vectorization. */
3970 static loop_vec_info
3971 vect_analyze_loop_1 (struct loop *loop)
3973 loop_vec_info loop_vinfo;
3975 if (vect_print_dump_info (REPORT_DETAILS))
3976 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
3978 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3980 loop_vinfo = vect_analyze_loop_form (loop);
3981 if (!loop_vinfo)
3983 if (vect_print_dump_info (REPORT_DETAILS))
3984 fprintf (vect_dump, "bad inner-loop form.");
3985 return NULL;
3988 return loop_vinfo;
3992 /* Function vect_analyze_loop_form.
3994 Verify that certain CFG restrictions hold, including:
3995 - the loop has a pre-header
3996 - the loop has a single entry and exit
3997 - the loop exit condition is simple enough, and the number of iterations
3998 can be analyzed (a countable loop). */
4000 loop_vec_info
4001 vect_analyze_loop_form (struct loop *loop)
4003 loop_vec_info loop_vinfo;
4004 tree loop_cond;
4005 tree number_of_iterations = NULL;
4006 loop_vec_info inner_loop_vinfo = NULL;
4008 if (vect_print_dump_info (REPORT_DETAILS))
4009 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
4011 /* Different restrictions apply when we are considering an inner-most loop,
4012 vs. an outer (nested) loop.
4013 (FORNOW. May want to relax some of these restrictions in the future). */
4015 if (!loop->inner)
4017 /* Inner-most loop. We currently require that the number of BBs is
4018 exactly 2 (the header and latch). Vectorizable inner-most loops
4019 look like this:
4021 (pre-header)
4023 header <--------+
4024 | | |
4025 | +--> latch --+
4027 (exit-bb) */
4029 if (loop->num_nodes != 2)
4031 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4032 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4033 return NULL;
4036 if (empty_block_p (loop->header))
4038 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4039 fprintf (vect_dump, "not vectorized: empty loop.");
4040 return NULL;
4043 else
4045 struct loop *innerloop = loop->inner;
4046 edge backedge, entryedge;
4048 /* Nested loop. We currently require that the loop is doubly-nested,
4049 contains a single inner loop, and the number of BBs is exactly 5.
4050 Vectorizable outer-loops look like this:
4052 (pre-header)
4054 header <---+
4056 inner-loop |
4058 tail ------+
4060 (exit-bb)
4062 The inner-loop has the properties expected of inner-most loops
4063 as described above. */
4065 if ((loop->inner)->inner || (loop->inner)->next)
4067 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4068 fprintf (vect_dump, "not vectorized: multiple nested loops.");
4069 return NULL;
4072 /* Analyze the inner-loop. */
4073 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
4074 if (!inner_loop_vinfo)
4076 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4077 fprintf (vect_dump, "not vectorized: Bad inner loop.");
4078 return NULL;
4081 if (!expr_invariant_in_loop_p (loop,
4082 LOOP_VINFO_NITERS (inner_loop_vinfo)))
4084 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4085 fprintf (vect_dump,
4086 "not vectorized: inner-loop count not invariant.");
4087 destroy_loop_vec_info (inner_loop_vinfo, true);
4088 return NULL;
4091 if (loop->num_nodes != 5)
4093 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4094 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4095 destroy_loop_vec_info (inner_loop_vinfo, true);
4096 return NULL;
4099 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
4100 backedge = EDGE_PRED (innerloop->header, 1);
4101 entryedge = EDGE_PRED (innerloop->header, 0);
4102 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
4104 backedge = EDGE_PRED (innerloop->header, 0);
4105 entryedge = EDGE_PRED (innerloop->header, 1);
4108 if (entryedge->src != loop->header
4109 || !single_exit (innerloop)
4110 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
4112 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4113 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4114 destroy_loop_vec_info (inner_loop_vinfo, true);
4115 return NULL;
4118 if (vect_print_dump_info (REPORT_DETAILS))
4119 fprintf (vect_dump, "Considering outer-loop vectorization.");
4122 if (!single_exit (loop)
4123 || EDGE_COUNT (loop->header->preds) != 2)
4125 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4127 if (!single_exit (loop))
4128 fprintf (vect_dump, "not vectorized: multiple exits.");
4129 else if (EDGE_COUNT (loop->header->preds) != 2)
4130 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4132 if (inner_loop_vinfo)
4133 destroy_loop_vec_info (inner_loop_vinfo, true);
4134 return NULL;
4137 /* We assume that the loop exit condition is at the end of the loop. i.e,
4138 that the loop is represented as a do-while (with a proper if-guard
4139 before the loop if needed), where the loop header contains all the
4140 executable statements, and the latch is empty. */
4141 if (!empty_block_p (loop->latch)
4142 || phi_nodes (loop->latch))
4144 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4145 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4146 if (inner_loop_vinfo)
4147 destroy_loop_vec_info (inner_loop_vinfo, true);
4148 return NULL;
4151 /* Make sure there exists a single-predecessor exit bb: */
4152 if (!single_pred_p (single_exit (loop)->dest))
4154 edge e = single_exit (loop);
4155 if (!(e->flags & EDGE_ABNORMAL))
4157 split_loop_exit_edge (e);
4158 if (vect_print_dump_info (REPORT_DETAILS))
4159 fprintf (vect_dump, "split exit edge.");
4161 else
4163 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4164 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4165 if (inner_loop_vinfo)
4166 destroy_loop_vec_info (inner_loop_vinfo, true);
4167 return NULL;
4171 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4172 if (!loop_cond)
4174 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4175 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4176 if (inner_loop_vinfo)
4177 destroy_loop_vec_info (inner_loop_vinfo, true);
4178 return NULL;
4181 if (!number_of_iterations)
4183 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4184 fprintf (vect_dump,
4185 "not vectorized: number of iterations cannot be computed.");
4186 if (inner_loop_vinfo)
4187 destroy_loop_vec_info (inner_loop_vinfo, true);
4188 return NULL;
4191 if (chrec_contains_undetermined (number_of_iterations))
4193 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4194 fprintf (vect_dump, "Infinite number of iterations.");
4195 if (inner_loop_vinfo)
4196 destroy_loop_vec_info (inner_loop_vinfo, true);
4197 return NULL;
4200 if (!NITERS_KNOWN_P (number_of_iterations))
4202 if (vect_print_dump_info (REPORT_DETAILS))
4204 fprintf (vect_dump, "Symbolic number of iterations is ");
4205 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4208 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4210 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4211 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4212 if (inner_loop_vinfo)
4213 destroy_loop_vec_info (inner_loop_vinfo, false);
4214 return NULL;
4217 loop_vinfo = new_loop_vec_info (loop);
4218 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4220 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4222 /* CHECKME: May want to keep it around it in the future. */
4223 if (inner_loop_vinfo)
4224 destroy_loop_vec_info (inner_loop_vinfo, false);
4226 gcc_assert (!loop->aux);
4227 loop->aux = loop_vinfo;
4228 return loop_vinfo;
4232 /* Function vect_analyze_loop.
4234 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4235 for it. The different analyses will record information in the
4236 loop_vec_info struct. */
4237 loop_vec_info
4238 vect_analyze_loop (struct loop *loop)
4240 bool ok;
4241 loop_vec_info loop_vinfo;
4243 if (vect_print_dump_info (REPORT_DETAILS))
4244 fprintf (vect_dump, "===== analyze_loop_nest =====");
4246 if (loop_outer (loop)
4247 && loop_vec_info_for_loop (loop_outer (loop))
4248 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4250 if (vect_print_dump_info (REPORT_DETAILS))
4251 fprintf (vect_dump, "outer-loop already vectorized.");
4252 return NULL;
4255 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4257 loop_vinfo = vect_analyze_loop_form (loop);
4258 if (!loop_vinfo)
4260 if (vect_print_dump_info (REPORT_DETAILS))
4261 fprintf (vect_dump, "bad loop form.");
4262 return NULL;
4265 /* Find all data references in the loop (which correspond to vdefs/vuses)
4266 and analyze their evolution in the loop.
4268 FORNOW: Handle only simple, array references, which
4269 alignment can be forced, and aligned pointer-references. */
4271 ok = vect_analyze_data_refs (loop_vinfo);
4272 if (!ok)
4274 if (vect_print_dump_info (REPORT_DETAILS))
4275 fprintf (vect_dump, "bad data references.");
4276 destroy_loop_vec_info (loop_vinfo, true);
4277 return NULL;
4280 /* Classify all cross-iteration scalar data-flow cycles.
4281 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4283 vect_analyze_scalar_cycles (loop_vinfo);
4285 vect_pattern_recog (loop_vinfo);
4287 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4289 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4290 if (!ok)
4292 if (vect_print_dump_info (REPORT_DETAILS))
4293 fprintf (vect_dump, "unexpected pattern.");
4294 destroy_loop_vec_info (loop_vinfo, true);
4295 return NULL;
4298 /* Analyze the alignment of the data-refs in the loop.
4299 Fail if a data reference is found that cannot be vectorized. */
4301 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4302 if (!ok)
4304 if (vect_print_dump_info (REPORT_DETAILS))
4305 fprintf (vect_dump, "bad data alignment.");
4306 destroy_loop_vec_info (loop_vinfo, true);
4307 return NULL;
4310 ok = vect_determine_vectorization_factor (loop_vinfo);
4311 if (!ok)
4313 if (vect_print_dump_info (REPORT_DETAILS))
4314 fprintf (vect_dump, "can't determine vectorization factor.");
4315 destroy_loop_vec_info (loop_vinfo, true);
4316 return NULL;
4319 /* Analyze data dependences between the data-refs in the loop.
4320 FORNOW: fail at the first data dependence that we encounter. */
4322 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4323 if (!ok)
4325 if (vect_print_dump_info (REPORT_DETAILS))
4326 fprintf (vect_dump, "bad data dependence.");
4327 destroy_loop_vec_info (loop_vinfo, true);
4328 return NULL;
4331 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4332 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4334 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4335 if (!ok)
4337 if (vect_print_dump_info (REPORT_DETAILS))
4338 fprintf (vect_dump, "bad data access.");
4339 destroy_loop_vec_info (loop_vinfo, true);
4340 return NULL;
4343 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4344 It is important to call pruning after vect_analyze_data_ref_accesses,
4345 since we use grouping information gathered by interleaving analysis. */
4346 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
4347 if (!ok)
4349 if (vect_print_dump_info (REPORT_DETAILS))
4350 fprintf (vect_dump, "too long list of versioning for alias "
4351 "run-time tests.");
4352 destroy_loop_vec_info (loop_vinfo, true);
4353 return NULL;
4356 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4357 ok = vect_analyze_slp (loop_vinfo);
4358 if (ok)
4360 /* Decide which possible SLP instances to SLP. */
4361 vect_make_slp_decision (loop_vinfo);
4363 /* Find stmts that need to be both vectorized and SLPed. */
4364 vect_detect_hybrid_slp (loop_vinfo);
4367 /* This pass will decide on using loop versioning and/or loop peeling in
4368 order to enhance the alignment of data references in the loop. */
4370 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4371 if (!ok)
4373 if (vect_print_dump_info (REPORT_DETAILS))
4374 fprintf (vect_dump, "bad data alignment.");
4375 destroy_loop_vec_info (loop_vinfo, true);
4376 return NULL;
4379 /* Scan all the operations in the loop and make sure they are
4380 vectorizable. */
4382 ok = vect_analyze_operations (loop_vinfo);
4383 if (!ok)
4385 if (vect_print_dump_info (REPORT_DETAILS))
4386 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4387 destroy_loop_vec_info (loop_vinfo, true);
4388 return NULL;
4391 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4393 return loop_vinfo;