2008-01-22 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / tree-vect-analyze.c
blob3e8002f172b6b6713d5584289c4249c4e1b16798
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41 #include "toplev.h"
42 #include "recog.h"
44 /* Main analysis functions. */
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
78 in the loop.
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
81 original loop:
82 for (i=0; i<N; i++){
83 a[i] = b[i] + c[i];
86 vectorized loop:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
92 static bool
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
100 tree scalar_type;
101 tree phi;
102 tree vectype;
103 unsigned int nunits;
104 stmt_vec_info stmt_info;
105 int i;
107 if (vect_print_dump_info (REPORT_DETAILS))
108 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
110 for (i = 0; i < nbbs; i++)
112 basic_block bb = bbs[i];
114 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
116 stmt_info = vinfo_for_stmt (phi);
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining phi: ");
120 print_generic_expr (vect_dump, phi, TDF_SLIM);
123 gcc_assert (stmt_info);
125 if (STMT_VINFO_RELEVANT_P (stmt_info))
127 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
128 scalar_type = TREE_TYPE (PHI_RESULT (phi));
130 if (vect_print_dump_info (REPORT_DETAILS))
132 fprintf (vect_dump, "get vectype for scalar type: ");
133 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
136 vectype = get_vectype_for_scalar_type (scalar_type);
137 if (!vectype)
139 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
141 fprintf (vect_dump,
142 "not vectorized: unsupported data-type ");
143 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
145 return false;
147 STMT_VINFO_VECTYPE (stmt_info) = vectype;
149 if (vect_print_dump_info (REPORT_DETAILS))
151 fprintf (vect_dump, "vectype: ");
152 print_generic_expr (vect_dump, vectype, TDF_SLIM);
155 nunits = TYPE_VECTOR_SUBPARTS (vectype);
156 if (vect_print_dump_info (REPORT_DETAILS))
157 fprintf (vect_dump, "nunits = %d", nunits);
159 if (!vectorization_factor
160 || (nunits > vectorization_factor))
161 vectorization_factor = nunits;
165 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
167 tree stmt = bsi_stmt (si);
168 stmt_info = vinfo_for_stmt (stmt);
170 if (vect_print_dump_info (REPORT_DETAILS))
172 fprintf (vect_dump, "==> examining statement: ");
173 print_generic_expr (vect_dump, stmt, TDF_SLIM);
176 gcc_assert (stmt_info);
178 /* skip stmts which do not need to be vectorized. */
179 if (!STMT_VINFO_RELEVANT_P (stmt_info)
180 && !STMT_VINFO_LIVE_P (stmt_info))
182 if (vect_print_dump_info (REPORT_DETAILS))
183 fprintf (vect_dump, "skip.");
184 continue;
187 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
189 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
191 fprintf (vect_dump, "not vectorized: irregular stmt.");
192 print_generic_expr (vect_dump, stmt, TDF_SLIM);
194 return false;
197 if (!GIMPLE_STMT_P (stmt)
198 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
200 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
202 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
203 print_generic_expr (vect_dump, stmt, TDF_SLIM);
205 return false;
208 if (STMT_VINFO_VECTYPE (stmt_info))
210 /* The only case when a vectype had been already set is for stmts
211 that contain a dataref, or for "pattern-stmts" (stmts generated
212 by the vectorizer to represent/replace a certain idiom). */
213 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
214 || is_pattern_stmt_p (stmt_info));
215 vectype = STMT_VINFO_VECTYPE (stmt_info);
217 else
219 tree operation;
221 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
222 && !is_pattern_stmt_p (stmt_info));
224 /* We generally set the vectype according to the type of the
225 result (lhs).
226 For stmts whose result-type is different than the type of the
227 arguments (e.g. demotion, promotion), vectype will be reset
228 appropriately (later). Note that we have to visit the smallest
229 datatype in this function, because that determines the VF.
230 If the smallest datatype in the loop is present only as the
231 rhs of a promotion operation - we'd miss it here.
232 Such a case, where a variable of this datatype does not appear
233 in the lhs anywhere in the loop, can only occur if it's an
234 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
235 to have been optimized away by invariant motion. However, we
236 cannot rely on invariant motion to always take invariants out
237 of the loop, and so in the case of promotion we also have to
238 check the rhs. */
239 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
241 operation = GIMPLE_STMT_OPERAND (stmt, 1);
242 if (TREE_CODE (operation) == NOP_EXPR
243 || TREE_CODE (operation) == CONVERT_EXPR
244 || TREE_CODE (operation) == WIDEN_MULT_EXPR
245 || TREE_CODE (operation) == FLOAT_EXPR)
247 tree rhs_type = TREE_TYPE (TREE_OPERAND (operation, 0));
248 if (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)) <
249 TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)))
250 scalar_type = rhs_type;
253 if (vect_print_dump_info (REPORT_DETAILS))
255 fprintf (vect_dump, "get vectype for scalar type: ");
256 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
259 vectype = get_vectype_for_scalar_type (scalar_type);
260 if (!vectype)
262 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
264 fprintf (vect_dump,
265 "not vectorized: unsupported data-type ");
266 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
268 return false;
270 STMT_VINFO_VECTYPE (stmt_info) = vectype;
273 if (vect_print_dump_info (REPORT_DETAILS))
275 fprintf (vect_dump, "vectype: ");
276 print_generic_expr (vect_dump, vectype, TDF_SLIM);
279 nunits = TYPE_VECTOR_SUBPARTS (vectype);
280 if (vect_print_dump_info (REPORT_DETAILS))
281 fprintf (vect_dump, "nunits = %d", nunits);
283 if (!vectorization_factor
284 || (nunits > vectorization_factor))
285 vectorization_factor = nunits;
290 /* TODO: Analyze cost. Decide if worth while to vectorize. */
291 if (vect_print_dump_info (REPORT_DETAILS))
292 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
293 if (vectorization_factor <= 1)
295 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
296 fprintf (vect_dump, "not vectorized: unsupported data-type");
297 return false;
299 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
301 return true;
305 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
306 the number of created vector stmts depends on the unrolling factor). However,
307 the actual number of vector stmts for every SLP node depends on VF which is
308 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
309 In this function we assume that the inside costs calculated in
310 vect_model_xxx_cost are linear in ncopies. */
312 static void
313 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
315 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
316 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
317 slp_instance instance;
319 if (vect_print_dump_info (REPORT_SLP))
320 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
322 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
323 /* We assume that costs are linear in ncopies. */
324 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
325 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
329 /* Function vect_analyze_operations.
331 Scan the loop stmts and make sure they are all vectorizable. */
333 static bool
334 vect_analyze_operations (loop_vec_info loop_vinfo)
336 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
337 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
338 int nbbs = loop->num_nodes;
339 block_stmt_iterator si;
340 unsigned int vectorization_factor = 0;
341 int i;
342 bool ok;
343 tree phi;
344 stmt_vec_info stmt_info;
345 bool need_to_vectorize = false;
346 int min_profitable_iters;
347 int min_scalar_loop_bound;
348 unsigned int th;
349 bool only_slp_in_loop = true;
351 if (vect_print_dump_info (REPORT_DETAILS))
352 fprintf (vect_dump, "=== vect_analyze_operations ===");
354 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
355 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
357 for (i = 0; i < nbbs; i++)
359 basic_block bb = bbs[i];
361 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
363 ok = true;
365 stmt_info = vinfo_for_stmt (phi);
366 if (vect_print_dump_info (REPORT_DETAILS))
368 fprintf (vect_dump, "examining phi: ");
369 print_generic_expr (vect_dump, phi, TDF_SLIM);
372 if (! is_loop_header_bb_p (bb))
374 /* inner-loop loop-closed exit phi in outer-loop vectorization
375 (i.e. a phi in the tail of the outer-loop).
376 FORNOW: we currently don't support the case that these phis
377 are not used in the outerloop, cause this case requires
378 to actually do something here. */
379 if (!STMT_VINFO_RELEVANT_P (stmt_info)
380 || STMT_VINFO_LIVE_P (stmt_info))
382 if (vect_print_dump_info (REPORT_DETAILS))
383 fprintf (vect_dump,
384 "Unsupported loop-closed phi in outer-loop.");
385 return false;
387 continue;
390 gcc_assert (stmt_info);
392 if (STMT_VINFO_LIVE_P (stmt_info))
394 /* FORNOW: not yet supported. */
395 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
396 fprintf (vect_dump, "not vectorized: value used after loop.");
397 return false;
400 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
401 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
403 /* A scalar-dependence cycle that we don't support. */
404 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
405 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
406 return false;
409 if (STMT_VINFO_RELEVANT_P (stmt_info))
411 need_to_vectorize = true;
412 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
413 ok = vectorizable_induction (phi, NULL, NULL);
416 if (!ok)
418 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
420 fprintf (vect_dump,
421 "not vectorized: relevant phi not supported: ");
422 print_generic_expr (vect_dump, phi, TDF_SLIM);
424 return false;
428 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
430 tree stmt = bsi_stmt (si);
431 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
432 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
434 if (vect_print_dump_info (REPORT_DETAILS))
436 fprintf (vect_dump, "==> examining statement: ");
437 print_generic_expr (vect_dump, stmt, TDF_SLIM);
440 gcc_assert (stmt_info);
442 /* skip stmts which do not need to be vectorized.
443 this is expected to include:
444 - the COND_EXPR which is the loop exit condition
445 - any LABEL_EXPRs in the loop
446 - computations that are used only for array indexing or loop
447 control */
449 if (!STMT_VINFO_RELEVANT_P (stmt_info)
450 && !STMT_VINFO_LIVE_P (stmt_info))
452 if (vect_print_dump_info (REPORT_DETAILS))
453 fprintf (vect_dump, "irrelevant.");
454 continue;
457 switch (STMT_VINFO_DEF_TYPE (stmt_info))
459 case vect_loop_def:
460 break;
462 case vect_reduction_def:
463 gcc_assert (relevance == vect_used_in_outer
464 || relevance == vect_used_in_outer_by_reduction
465 || relevance == vect_unused_in_loop);
466 break;
468 case vect_induction_def:
469 case vect_constant_def:
470 case vect_invariant_def:
471 case vect_unknown_def_type:
472 default:
473 gcc_unreachable ();
476 if (STMT_VINFO_RELEVANT_P (stmt_info))
478 gcc_assert (GIMPLE_STMT_P (stmt)
479 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
480 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
481 need_to_vectorize = true;
484 ok = true;
485 if (STMT_VINFO_RELEVANT_P (stmt_info)
486 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
487 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
488 || vectorizable_type_demotion (stmt, NULL, NULL)
489 || vectorizable_conversion (stmt, NULL, NULL, NULL)
490 || vectorizable_operation (stmt, NULL, NULL, NULL)
491 || vectorizable_assignment (stmt, NULL, NULL, NULL)
492 || vectorizable_load (stmt, NULL, NULL, NULL)
493 || vectorizable_call (stmt, NULL, NULL)
494 || vectorizable_store (stmt, NULL, NULL, NULL)
495 || vectorizable_condition (stmt, NULL, NULL)
496 || vectorizable_reduction (stmt, NULL, NULL));
498 if (!ok)
500 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
502 fprintf (vect_dump, "not vectorized: relevant stmt not ");
503 fprintf (vect_dump, "supported: ");
504 print_generic_expr (vect_dump, stmt, TDF_SLIM);
506 return false;
509 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
510 need extra handling, except for vectorizable reductions. */
511 if (STMT_VINFO_LIVE_P (stmt_info)
512 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
513 ok = vectorizable_live_operation (stmt, NULL, NULL);
515 if (!ok)
517 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
519 fprintf (vect_dump, "not vectorized: live stmt not ");
520 fprintf (vect_dump, "supported: ");
521 print_generic_expr (vect_dump, stmt, TDF_SLIM);
523 return false;
526 if (!PURE_SLP_STMT (stmt_info))
528 /* STMT needs loop-based vectorization. */
529 only_slp_in_loop = false;
531 /* Groups of strided accesses whose size is not a power of 2 are
532 not vectorizable yet using loop-vectorization. Therefore, if
533 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
534 both SLPed and loop-based vectorzed), the loop cannot be
535 vectorized. */
536 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
537 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
538 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
540 if (vect_print_dump_info (REPORT_DETAILS))
542 fprintf (vect_dump, "not vectorized: the size of group "
543 "of strided accesses is not a power of 2");
544 print_generic_expr (vect_dump, stmt, TDF_SLIM);
546 return false;
549 } /* stmts in bb */
550 } /* bbs */
552 /* All operations in the loop are either irrelevant (deal with loop
553 control, or dead), or only used outside the loop and can be moved
554 out of the loop (e.g. invariants, inductions). The loop can be
555 optimized away by scalar optimizations. We're better off not
556 touching this loop. */
557 if (!need_to_vectorize)
559 if (vect_print_dump_info (REPORT_DETAILS))
560 fprintf (vect_dump,
561 "All the computation can be taken out of the loop.");
562 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
563 fprintf (vect_dump,
564 "not vectorized: redundant loop. no profit to vectorize.");
565 return false;
568 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
569 vectorization factor of the loop is the unrolling factor required by the
570 SLP instances. If that unrolling factor is 1, we say, that we perform
571 pure SLP on loop - cross iteration parallelism is not exploited. */
572 if (only_slp_in_loop)
573 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
574 else
575 vectorization_factor = least_common_multiple (vectorization_factor,
576 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
578 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
580 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
581 && vect_print_dump_info (REPORT_DETAILS))
582 fprintf (vect_dump,
583 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
584 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
586 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
587 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
589 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
590 fprintf (vect_dump, "not vectorized: iteration count too small.");
591 if (vect_print_dump_info (REPORT_DETAILS))
592 fprintf (vect_dump,"not vectorized: iteration count smaller than "
593 "vectorization factor.");
594 return false;
597 /* Analyze cost. Decide if worth while to vectorize. */
599 /* Once VF is set, SLP costs should be updated since the number of created
600 vector stmts depends on VF. */
601 vect_update_slp_costs_according_to_vf (loop_vinfo);
603 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
604 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
606 if (min_profitable_iters < 0)
608 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
609 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
610 if (vect_print_dump_info (REPORT_DETAILS))
611 fprintf (vect_dump, "not vectorized: vector version will never be "
612 "profitable.");
613 return false;
616 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
617 * vectorization_factor) - 1);
619 /* Use the cost model only if it is more conservative than user specified
620 threshold. */
622 th = (unsigned) min_scalar_loop_bound;
623 if (min_profitable_iters
624 && (!min_scalar_loop_bound
625 || min_profitable_iters > min_scalar_loop_bound))
626 th = (unsigned) min_profitable_iters;
628 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
629 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
631 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
632 fprintf (vect_dump, "not vectorized: vectorization not "
633 "profitable.");
634 if (vect_print_dump_info (REPORT_DETAILS))
635 fprintf (vect_dump, "not vectorized: iteration count smaller than "
636 "user specified loop bound parameter or minimum "
637 "profitable iterations (whichever is more conservative).");
638 return false;
641 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
642 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
643 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
645 if (vect_print_dump_info (REPORT_DETAILS))
646 fprintf (vect_dump, "epilog loop required.");
647 if (!vect_can_advance_ivs_p (loop_vinfo))
649 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
650 fprintf (vect_dump,
651 "not vectorized: can't create epilog loop 1.");
652 return false;
654 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
656 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
657 fprintf (vect_dump,
658 "not vectorized: can't create epilog loop 2.");
659 return false;
663 return true;
667 /* Function exist_non_indexing_operands_for_use_p
669 USE is one of the uses attached to STMT. Check if USE is
670 used in STMT for anything other than indexing an array. */
672 static bool
673 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
675 tree operand;
676 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
678 /* USE corresponds to some operand in STMT. If there is no data
679 reference in STMT, then any operand that corresponds to USE
680 is not indexing an array. */
681 if (!STMT_VINFO_DATA_REF (stmt_info))
682 return true;
684 /* STMT has a data_ref. FORNOW this means that its of one of
685 the following forms:
686 -1- ARRAY_REF = var
687 -2- var = ARRAY_REF
688 (This should have been verified in analyze_data_refs).
690 'var' in the second case corresponds to a def, not a use,
691 so USE cannot correspond to any operands that are not used
692 for array indexing.
694 Therefore, all we need to check is if STMT falls into the
695 first case, and whether var corresponds to USE. */
697 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
698 return false;
700 operand = GIMPLE_STMT_OPERAND (stmt, 1);
702 if (TREE_CODE (operand) != SSA_NAME)
703 return false;
705 if (operand == use)
706 return true;
708 return false;
712 /* Function vect_analyze_scalar_cycles_1.
714 Examine the cross iteration def-use cycles of scalar variables
715 in LOOP. LOOP_VINFO represents the loop that is noe being
716 considered for vectorization (can be LOOP, or an outer-loop
717 enclosing LOOP). */
719 static void
720 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
722 tree phi;
723 basic_block bb = loop->header;
724 tree dumy;
725 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
727 if (vect_print_dump_info (REPORT_DETAILS))
728 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
730 /* First - identify all inductions. */
731 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
733 tree access_fn = NULL;
734 tree def = PHI_RESULT (phi);
735 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
737 if (vect_print_dump_info (REPORT_DETAILS))
739 fprintf (vect_dump, "Analyze phi: ");
740 print_generic_expr (vect_dump, phi, TDF_SLIM);
743 /* Skip virtual phi's. The data dependences that are associated with
744 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
745 if (!is_gimple_reg (SSA_NAME_VAR (def)))
746 continue;
748 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
750 /* Analyze the evolution function. */
751 access_fn = analyze_scalar_evolution (loop, def);
752 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
754 fprintf (vect_dump, "Access function of PHI: ");
755 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
758 if (!access_fn
759 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
761 VEC_safe_push (tree, heap, worklist, phi);
762 continue;
765 if (vect_print_dump_info (REPORT_DETAILS))
766 fprintf (vect_dump, "Detected induction.");
767 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
771 /* Second - identify all reductions. */
772 while (VEC_length (tree, worklist) > 0)
774 tree phi = VEC_pop (tree, worklist);
775 tree def = PHI_RESULT (phi);
776 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
777 tree reduc_stmt;
779 if (vect_print_dump_info (REPORT_DETAILS))
781 fprintf (vect_dump, "Analyze phi: ");
782 print_generic_expr (vect_dump, phi, TDF_SLIM);
785 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
786 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
788 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
789 if (reduc_stmt)
791 if (vect_print_dump_info (REPORT_DETAILS))
792 fprintf (vect_dump, "Detected reduction.");
793 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
794 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
795 vect_reduction_def;
797 else
798 if (vect_print_dump_info (REPORT_DETAILS))
799 fprintf (vect_dump, "Unknown def-use cycle pattern.");
802 VEC_free (tree, heap, worklist);
803 return;
807 /* Function vect_analyze_scalar_cycles.
809 Examine the cross iteration def-use cycles of scalar variables, by
810 analyzing the loop-header PHIs of scalar variables; Classify each
811 cycle as one of the following: invariant, induction, reduction, unknown.
812 We do that for the loop represented by LOOP_VINFO, and also to its
813 inner-loop, if exists.
814 Examples for scalar cycles:
816 Example1: reduction:
818 loop1:
819 for (i=0; i<N; i++)
820 sum += a[i];
822 Example2: induction:
824 loop2:
825 for (i=0; i<N; i++)
826 a[i] = i; */
828 static void
829 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
831 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
833 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
835 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
836 Reductions in such inner-loop therefore have different properties than
837 the reductions in the nest that gets vectorized:
838 1. When vectorized, they are executed in the same order as in the original
839 scalar loop, so we can't change the order of computation when
840 vectorizing them.
841 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
842 current checks are too strict. */
844 if (loop->inner)
845 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
849 /* Function vect_insert_into_interleaving_chain.
851 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
853 static void
854 vect_insert_into_interleaving_chain (struct data_reference *dra,
855 struct data_reference *drb)
857 tree prev, next, next_init;
858 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
859 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
861 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
862 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
863 while (next)
865 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
866 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
868 /* Insert here. */
869 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
870 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
871 return;
873 prev = next;
874 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
877 /* We got to the end of the list. Insert here. */
878 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
879 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
883 /* Function vect_update_interleaving_chain.
885 For two data-refs DRA and DRB that are a part of a chain interleaved data
886 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
888 There are four possible cases:
889 1. New stmts - both DRA and DRB are not a part of any chain:
890 FIRST_DR = DRB
891 NEXT_DR (DRB) = DRA
892 2. DRB is a part of a chain and DRA is not:
893 no need to update FIRST_DR
894 no need to insert DRB
895 insert DRA according to init
896 3. DRA is a part of a chain and DRB is not:
897 if (init of FIRST_DR > init of DRB)
898 FIRST_DR = DRB
899 NEXT(FIRST_DR) = previous FIRST_DR
900 else
901 insert DRB according to its init
902 4. both DRA and DRB are in some interleaving chains:
903 choose the chain with the smallest init of FIRST_DR
904 insert the nodes of the second chain into the first one. */
906 static void
907 vect_update_interleaving_chain (struct data_reference *drb,
908 struct data_reference *dra)
910 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
911 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
912 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
913 tree node, prev, next, node_init, first_stmt;
915 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
916 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
918 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
919 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
920 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
921 return;
924 /* 2. DRB is a part of a chain and DRA is not. */
925 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
927 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
928 /* Insert DRA into the chain of DRB. */
929 vect_insert_into_interleaving_chain (dra, drb);
930 return;
933 /* 3. DRA is a part of a chain and DRB is not. */
934 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
936 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
937 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
938 old_first_stmt)));
939 tree tmp;
941 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
943 /* DRB's init is smaller than the init of the stmt previously marked
944 as the first stmt of the interleaving chain of DRA. Therefore, we
945 update FIRST_STMT and put DRB in the head of the list. */
946 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
947 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
949 /* Update all the stmts in the list to point to the new FIRST_STMT. */
950 tmp = old_first_stmt;
951 while (tmp)
953 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
954 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
957 else
959 /* Insert DRB in the list of DRA. */
960 vect_insert_into_interleaving_chain (drb, dra);
961 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
963 return;
966 /* 4. both DRA and DRB are in some interleaving chains. */
967 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
968 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
969 if (first_a == first_b)
970 return;
971 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
972 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
974 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
976 /* Insert the nodes of DRA chain into the DRB chain.
977 After inserting a node, continue from this node of the DRB chain (don't
978 start from the beginning. */
979 node = DR_GROUP_FIRST_DR (stmtinfo_a);
980 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
981 first_stmt = first_b;
983 else
985 /* Insert the nodes of DRB chain into the DRA chain.
986 After inserting a node, continue from this node of the DRA chain (don't
987 start from the beginning. */
988 node = DR_GROUP_FIRST_DR (stmtinfo_b);
989 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
990 first_stmt = first_a;
993 while (node)
995 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
996 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
997 while (next)
999 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1000 if (tree_int_cst_compare (next_init, node_init) > 0)
1002 /* Insert here. */
1003 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
1004 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
1005 prev = node;
1006 break;
1008 prev = next;
1009 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
1011 if (!next)
1013 /* We got to the end of the list. Insert here. */
1014 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
1015 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
1016 prev = node;
1018 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
1019 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1024 /* Function vect_equal_offsets.
1026 Check if OFFSET1 and OFFSET2 are identical expressions. */
1028 static bool
1029 vect_equal_offsets (tree offset1, tree offset2)
1031 bool res0, res1;
1033 STRIP_NOPS (offset1);
1034 STRIP_NOPS (offset2);
1036 if (offset1 == offset2)
1037 return true;
1039 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1040 || !BINARY_CLASS_P (offset1)
1041 || !BINARY_CLASS_P (offset2))
1042 return false;
1044 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1045 TREE_OPERAND (offset2, 0));
1046 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1047 TREE_OPERAND (offset2, 1));
1049 return (res0 && res1);
1053 /* Function vect_check_interleaving.
1055 Check if DRA and DRB are a part of interleaving. In case they are, insert
1056 DRA and DRB in an interleaving chain. */
1058 static void
1059 vect_check_interleaving (struct data_reference *dra,
1060 struct data_reference *drb)
1062 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1064 /* Check that the data-refs have same first location (except init) and they
1065 are both either store or load (not load and store). */
1066 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1067 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1068 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1069 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1070 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1071 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1072 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1073 || DR_IS_READ (dra) != DR_IS_READ (drb))
1074 return;
1076 /* Check:
1077 1. data-refs are of the same type
1078 2. their steps are equal
1079 3. the step is greater than the difference between data-refs' inits */
1080 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1081 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1083 if (type_size_a != type_size_b
1084 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
1085 return;
1087 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1088 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1089 step = TREE_INT_CST_LOW (DR_STEP (dra));
1091 if (init_a > init_b)
1093 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1094 and DRB is accessed before DRA. */
1095 diff_mod_size = (init_a - init_b) % type_size_a;
1097 if ((init_a - init_b) > step)
1098 return;
1100 if (diff_mod_size == 0)
1102 vect_update_interleaving_chain (drb, dra);
1103 if (vect_print_dump_info (REPORT_DR_DETAILS))
1105 fprintf (vect_dump, "Detected interleaving ");
1106 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1107 fprintf (vect_dump, " and ");
1108 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1110 return;
1113 else
1115 /* If init_b == init_a + the size of the type * k, we have an
1116 interleaving, and DRA is accessed before DRB. */
1117 diff_mod_size = (init_b - init_a) % type_size_a;
1119 if ((init_b - init_a) > step)
1120 return;
1122 if (diff_mod_size == 0)
1124 vect_update_interleaving_chain (dra, drb);
1125 if (vect_print_dump_info (REPORT_DR_DETAILS))
1127 fprintf (vect_dump, "Detected interleaving ");
1128 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1129 fprintf (vect_dump, " and ");
1130 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1132 return;
1137 /* Check if data references pointed by DR_I and DR_J are same or
1138 belong to same interleaving group. Return FALSE if drs are
1139 different, otherwise return TRUE. */
1141 static bool
1142 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
1144 tree stmt_i = DR_STMT (dr_i);
1145 tree stmt_j = DR_STMT (dr_j);
1147 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
1148 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1149 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
1150 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1151 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
1152 return true;
1153 else
1154 return false;
1157 /* If address ranges represented by DDR_I and DDR_J are equal,
1158 return TRUE, otherwise return FALSE. */
1160 static bool
1161 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
1163 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
1164 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
1165 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
1166 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
1167 return true;
1168 else
1169 return false;
1172 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1173 tested at run-time. Return TRUE if DDR was successfully inserted.
1174 Return false if versioning is not supported. */
1176 static bool
1177 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1179 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1181 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
1182 return false;
1184 if (vect_print_dump_info (REPORT_DR_DETAILS))
1186 fprintf (vect_dump, "mark for run-time aliasing test between ");
1187 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1188 fprintf (vect_dump, " and ");
1189 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1192 if (optimize_size)
1194 if (vect_print_dump_info (REPORT_DR_DETAILS))
1195 fprintf (vect_dump, "versioning not supported when optimizing for size.");
1196 return false;
1199 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1200 if (loop->inner)
1202 if (vect_print_dump_info (REPORT_DR_DETAILS))
1203 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1204 return false;
1207 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1208 return true;
1211 /* Function vect_analyze_data_ref_dependence.
1213 Return TRUE if there (might) exist a dependence between a memory-reference
1214 DRA and a memory-reference DRB. When versioning for alias may check a
1215 dependence at run-time, return FALSE. */
1217 static bool
1218 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1219 loop_vec_info loop_vinfo)
1221 unsigned int i;
1222 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1223 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1224 struct data_reference *dra = DDR_A (ddr);
1225 struct data_reference *drb = DDR_B (ddr);
1226 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1227 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1228 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1229 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1230 lambda_vector dist_v;
1231 unsigned int loop_depth;
1233 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1235 /* Independent data accesses. */
1236 vect_check_interleaving (dra, drb);
1237 return false;
1240 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1241 return false;
1243 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1245 if (vect_print_dump_info (REPORT_DR_DETAILS))
1247 fprintf (vect_dump,
1248 "versioning for alias required: can't determine dependence between ");
1249 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1250 fprintf (vect_dump, " and ");
1251 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1253 /* Add to list of ddrs that need to be tested at run-time. */
1254 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1257 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1259 if (vect_print_dump_info (REPORT_DR_DETAILS))
1261 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1262 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1263 fprintf (vect_dump, " and ");
1264 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1266 /* Add to list of ddrs that need to be tested at run-time. */
1267 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1270 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1271 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1273 int dist = dist_v[loop_depth];
1275 if (vect_print_dump_info (REPORT_DR_DETAILS))
1276 fprintf (vect_dump, "dependence distance = %d.", dist);
1278 /* Same loop iteration. */
1279 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1281 /* Two references with distance zero have the same alignment. */
1282 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1283 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1284 if (vect_print_dump_info (REPORT_ALIGNMENT))
1285 fprintf (vect_dump, "accesses have the same alignment.");
1286 if (vect_print_dump_info (REPORT_DR_DETAILS))
1288 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1289 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1290 fprintf (vect_dump, " and ");
1291 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1294 /* For interleaving, mark that there is a read-write dependency if
1295 necessary. We check before that one of the data-refs is store. */
1296 if (DR_IS_READ (dra))
1297 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1298 else
1300 if (DR_IS_READ (drb))
1301 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1304 continue;
1307 if (abs (dist) >= vectorization_factor
1308 || (dist > 0 && DDR_REVERSED_P (ddr)))
1310 /* Dependence distance does not create dependence, as far as
1311 vectorization is concerned, in this case. If DDR_REVERSED_P the
1312 order of the data-refs in DDR was reversed (to make distance
1313 vector positive), and the actual distance is negative. */
1314 if (vect_print_dump_info (REPORT_DR_DETAILS))
1315 fprintf (vect_dump, "dependence distance >= VF or negative.");
1316 continue;
1319 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1321 fprintf (vect_dump,
1322 "not vectorized, possible dependence "
1323 "between data-refs ");
1324 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1325 fprintf (vect_dump, " and ");
1326 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1329 return true;
1332 return false;
1335 /* Function vect_analyze_data_ref_dependences.
1337 Examine all the data references in the loop, and make sure there do not
1338 exist any data dependences between them. */
1340 static bool
1341 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1343 unsigned int i;
1344 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1345 struct data_dependence_relation *ddr;
1347 if (vect_print_dump_info (REPORT_DETAILS))
1348 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1350 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1351 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1352 return false;
1354 return true;
1358 /* Function vect_compute_data_ref_alignment
1360 Compute the misalignment of the data reference DR.
1362 Output:
1363 1. If during the misalignment computation it is found that the data reference
1364 cannot be vectorized then false is returned.
1365 2. DR_MISALIGNMENT (DR) is defined.
1367 FOR NOW: No analysis is actually performed. Misalignment is calculated
1368 only for trivial cases. TODO. */
1370 static bool
1371 vect_compute_data_ref_alignment (struct data_reference *dr)
1373 tree stmt = DR_STMT (dr);
1374 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1375 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1376 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1377 tree ref = DR_REF (dr);
1378 tree vectype;
1379 tree base, base_addr;
1380 bool base_aligned;
1381 tree misalign;
1382 tree aligned_to, alignment;
1384 if (vect_print_dump_info (REPORT_DETAILS))
1385 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1387 /* Initialize misalignment to unknown. */
1388 SET_DR_MISALIGNMENT (dr, -1);
1390 misalign = DR_INIT (dr);
1391 aligned_to = DR_ALIGNED_TO (dr);
1392 base_addr = DR_BASE_ADDRESS (dr);
1394 /* In case the dataref is in an inner-loop of the loop that is being
1395 vectorized (LOOP), we use the base and misalignment information
1396 relative to the outer-loop (LOOP). This is ok only if the misalignment
1397 stays the same throughout the execution of the inner-loop, which is why
1398 we have to check that the stride of the dataref in the inner-loop evenly
1399 divides by the vector size. */
1400 if (nested_in_vect_loop_p (loop, stmt))
1402 tree step = DR_STEP (dr);
1403 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1405 if (dr_step % UNITS_PER_SIMD_WORD == 0)
1407 if (vect_print_dump_info (REPORT_ALIGNMENT))
1408 fprintf (vect_dump, "inner step divides the vector-size.");
1409 misalign = STMT_VINFO_DR_INIT (stmt_info);
1410 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1411 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1413 else
1415 if (vect_print_dump_info (REPORT_ALIGNMENT))
1416 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1417 misalign = NULL_TREE;
1421 base = build_fold_indirect_ref (base_addr);
1422 vectype = STMT_VINFO_VECTYPE (stmt_info);
1423 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1425 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1426 || !misalign)
1428 if (vect_print_dump_info (REPORT_ALIGNMENT))
1430 fprintf (vect_dump, "Unknown alignment for access: ");
1431 print_generic_expr (vect_dump, base, TDF_SLIM);
1433 return true;
1436 if ((DECL_P (base)
1437 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1438 alignment) >= 0)
1439 || (TREE_CODE (base_addr) == SSA_NAME
1440 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1441 TREE_TYPE (base_addr)))),
1442 alignment) >= 0))
1443 base_aligned = true;
1444 else
1445 base_aligned = false;
1447 if (!base_aligned)
1449 /* Do not change the alignment of global variables if
1450 flag_section_anchors is enabled. */
1451 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1452 || (TREE_STATIC (base) && flag_section_anchors))
1454 if (vect_print_dump_info (REPORT_DETAILS))
1456 fprintf (vect_dump, "can't force alignment of ref: ");
1457 print_generic_expr (vect_dump, ref, TDF_SLIM);
1459 return true;
1462 /* Force the alignment of the decl.
1463 NOTE: This is the only change to the code we make during
1464 the analysis phase, before deciding to vectorize the loop. */
1465 if (vect_print_dump_info (REPORT_DETAILS))
1466 fprintf (vect_dump, "force alignment");
1467 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1468 DECL_USER_ALIGN (base) = 1;
1471 /* At this point we assume that the base is aligned. */
1472 gcc_assert (base_aligned
1473 || (TREE_CODE (base) == VAR_DECL
1474 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1476 /* Modulo alignment. */
1477 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1479 if (!host_integerp (misalign, 1))
1481 /* Negative or overflowed misalignment value. */
1482 if (vect_print_dump_info (REPORT_DETAILS))
1483 fprintf (vect_dump, "unexpected misalign value");
1484 return false;
1487 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1489 if (vect_print_dump_info (REPORT_DETAILS))
1491 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1492 print_generic_expr (vect_dump, ref, TDF_SLIM);
1495 return true;
1499 /* Function vect_compute_data_refs_alignment
1501 Compute the misalignment of data references in the loop.
1502 Return FALSE if a data reference is found that cannot be vectorized. */
1504 static bool
1505 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1507 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1508 struct data_reference *dr;
1509 unsigned int i;
1511 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1512 if (!vect_compute_data_ref_alignment (dr))
1513 return false;
1515 return true;
1519 /* Function vect_update_misalignment_for_peel
1521 DR - the data reference whose misalignment is to be adjusted.
1522 DR_PEEL - the data reference whose misalignment is being made
1523 zero in the vector loop by the peel.
1524 NPEEL - the number of iterations in the peel loop if the misalignment
1525 of DR_PEEL is known at compile time. */
1527 static void
1528 vect_update_misalignment_for_peel (struct data_reference *dr,
1529 struct data_reference *dr_peel, int npeel)
1531 unsigned int i;
1532 VEC(dr_p,heap) *same_align_drs;
1533 struct data_reference *current_dr;
1534 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1535 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1536 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1537 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1539 /* For interleaved data accesses the step in the loop must be multiplied by
1540 the size of the interleaving group. */
1541 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1542 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1543 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1544 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1546 /* It can be assumed that the data refs with the same alignment as dr_peel
1547 are aligned in the vector loop. */
1548 same_align_drs
1549 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1550 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1552 if (current_dr != dr)
1553 continue;
1554 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1555 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1556 SET_DR_MISALIGNMENT (dr, 0);
1557 return;
1560 if (known_alignment_for_access_p (dr)
1561 && known_alignment_for_access_p (dr_peel))
1563 int misal = DR_MISALIGNMENT (dr);
1564 misal += npeel * dr_size;
1565 misal %= UNITS_PER_SIMD_WORD;
1566 SET_DR_MISALIGNMENT (dr, misal);
1567 return;
1570 if (vect_print_dump_info (REPORT_DETAILS))
1571 fprintf (vect_dump, "Setting misalignment to -1.");
1572 SET_DR_MISALIGNMENT (dr, -1);
1576 /* Function vect_verify_datarefs_alignment
1578 Return TRUE if all data references in the loop can be
1579 handled with respect to alignment. */
1581 static bool
1582 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1584 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1585 struct data_reference *dr;
1586 enum dr_alignment_support supportable_dr_alignment;
1587 unsigned int i;
1589 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1591 tree stmt = DR_STMT (dr);
1592 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1594 /* For interleaving, only the alignment of the first access matters. */
1595 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1596 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1597 continue;
1599 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1600 if (!supportable_dr_alignment)
1602 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1604 if (DR_IS_READ (dr))
1605 fprintf (vect_dump,
1606 "not vectorized: unsupported unaligned load.");
1607 else
1608 fprintf (vect_dump,
1609 "not vectorized: unsupported unaligned store.");
1611 return false;
1613 if (supportable_dr_alignment != dr_aligned
1614 && vect_print_dump_info (REPORT_ALIGNMENT))
1615 fprintf (vect_dump, "Vectorizing an unaligned access.");
1617 return true;
1621 /* Function vector_alignment_reachable_p
1623 Return true if vector alignment for DR is reachable by peeling
1624 a few loop iterations. Return false otherwise. */
1626 static bool
1627 vector_alignment_reachable_p (struct data_reference *dr)
1629 tree stmt = DR_STMT (dr);
1630 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1631 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1633 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1635 /* For interleaved access we peel only if number of iterations in
1636 the prolog loop ({VF - misalignment}), is a multiple of the
1637 number of the interleaved accesses. */
1638 int elem_size, mis_in_elements;
1639 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1641 /* FORNOW: handle only known alignment. */
1642 if (!known_alignment_for_access_p (dr))
1643 return false;
1645 elem_size = UNITS_PER_SIMD_WORD / nelements;
1646 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1648 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1649 return false;
1652 /* If misalignment is known at the compile time then allow peeling
1653 only if natural alignment is reachable through peeling. */
1654 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1656 HOST_WIDE_INT elmsize =
1657 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1658 if (vect_print_dump_info (REPORT_DETAILS))
1660 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1661 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1663 if (DR_MISALIGNMENT (dr) % elmsize)
1665 if (vect_print_dump_info (REPORT_DETAILS))
1666 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1667 return false;
1671 if (!known_alignment_for_access_p (dr))
1673 tree type = (TREE_TYPE (DR_REF (dr)));
1674 tree ba = DR_BASE_OBJECT (dr);
1675 bool is_packed = false;
1677 if (ba)
1678 is_packed = contains_packed_reference (ba);
1680 if (vect_print_dump_info (REPORT_DETAILS))
1681 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1682 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1683 return true;
1684 else
1685 return false;
1688 return true;
1691 /* Function vect_enhance_data_refs_alignment
1693 This pass will use loop versioning and loop peeling in order to enhance
1694 the alignment of data references in the loop.
1696 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1697 original loop is to be vectorized; Any other loops that are created by
1698 the transformations performed in this pass - are not supposed to be
1699 vectorized. This restriction will be relaxed.
1701 This pass will require a cost model to guide it whether to apply peeling
1702 or versioning or a combination of the two. For example, the scheme that
1703 intel uses when given a loop with several memory accesses, is as follows:
1704 choose one memory access ('p') which alignment you want to force by doing
1705 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1706 other accesses are not necessarily aligned, or (2) use loop versioning to
1707 generate one loop in which all accesses are aligned, and another loop in
1708 which only 'p' is necessarily aligned.
1710 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1711 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1712 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1714 Devising a cost model is the most critical aspect of this work. It will
1715 guide us on which access to peel for, whether to use loop versioning, how
1716 many versions to create, etc. The cost model will probably consist of
1717 generic considerations as well as target specific considerations (on
1718 powerpc for example, misaligned stores are more painful than misaligned
1719 loads).
1721 Here are the general steps involved in alignment enhancements:
1723 -- original loop, before alignment analysis:
1724 for (i=0; i<N; i++){
1725 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1726 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1729 -- After vect_compute_data_refs_alignment:
1730 for (i=0; i<N; i++){
1731 x = q[i]; # DR_MISALIGNMENT(q) = 3
1732 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1735 -- Possibility 1: we do loop versioning:
1736 if (p is aligned) {
1737 for (i=0; i<N; i++){ # loop 1A
1738 x = q[i]; # DR_MISALIGNMENT(q) = 3
1739 p[i] = y; # DR_MISALIGNMENT(p) = 0
1742 else {
1743 for (i=0; i<N; i++){ # loop 1B
1744 x = q[i]; # DR_MISALIGNMENT(q) = 3
1745 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1749 -- Possibility 2: we do loop peeling:
1750 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1751 x = q[i];
1752 p[i] = y;
1754 for (i = 3; i < N; i++){ # loop 2A
1755 x = q[i]; # DR_MISALIGNMENT(q) = 0
1756 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1759 -- Possibility 3: combination of loop peeling and versioning:
1760 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1761 x = q[i];
1762 p[i] = y;
1764 if (p is aligned) {
1765 for (i = 3; i<N; i++){ # loop 3A
1766 x = q[i]; # DR_MISALIGNMENT(q) = 0
1767 p[i] = y; # DR_MISALIGNMENT(p) = 0
1770 else {
1771 for (i = 3; i<N; i++){ # loop 3B
1772 x = q[i]; # DR_MISALIGNMENT(q) = 0
1773 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1777 These loops are later passed to loop_transform to be vectorized. The
1778 vectorizer will use the alignment information to guide the transformation
1779 (whether to generate regular loads/stores, or with special handling for
1780 misalignment). */
1782 static bool
1783 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1785 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1786 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1787 enum dr_alignment_support supportable_dr_alignment;
1788 struct data_reference *dr0 = NULL;
1789 struct data_reference *dr;
1790 unsigned int i;
1791 bool do_peeling = false;
1792 bool do_versioning = false;
1793 bool stat;
1794 tree stmt;
1795 stmt_vec_info stmt_info;
1796 int vect_versioning_for_alias_required;
1798 if (vect_print_dump_info (REPORT_DETAILS))
1799 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1801 /* While cost model enhancements are expected in the future, the high level
1802 view of the code at this time is as follows:
1804 A) If there is a misaligned write then see if peeling to align this write
1805 can make all data references satisfy vect_supportable_dr_alignment.
1806 If so, update data structures as needed and return true. Note that
1807 at this time vect_supportable_dr_alignment is known to return false
1808 for a misaligned write.
1810 B) If peeling wasn't possible and there is a data reference with an
1811 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1812 then see if loop versioning checks can be used to make all data
1813 references satisfy vect_supportable_dr_alignment. If so, update
1814 data structures as needed and return true.
1816 C) If neither peeling nor versioning were successful then return false if
1817 any data reference does not satisfy vect_supportable_dr_alignment.
1819 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1821 Note, Possibility 3 above (which is peeling and versioning together) is not
1822 being done at this time. */
1824 /* (1) Peeling to force alignment. */
1826 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1827 Considerations:
1828 + How many accesses will become aligned due to the peeling
1829 - How many accesses will become unaligned due to the peeling,
1830 and the cost of misaligned accesses.
1831 - The cost of peeling (the extra runtime checks, the increase
1832 in code size).
1834 The scheme we use FORNOW: peel to force the alignment of the first
1835 misaligned store in the loop.
1836 Rationale: misaligned stores are not yet supported.
1838 TODO: Use a cost model. */
1840 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1842 stmt = DR_STMT (dr);
1843 stmt_info = vinfo_for_stmt (stmt);
1845 /* For interleaving, only the alignment of the first access
1846 matters. */
1847 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1848 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1849 continue;
1851 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1853 do_peeling = vector_alignment_reachable_p (dr);
1854 if (do_peeling)
1855 dr0 = dr;
1856 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1857 fprintf (vect_dump, "vector alignment may not be reachable");
1858 break;
1862 vect_versioning_for_alias_required =
1863 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1865 /* Temporarily, if versioning for alias is required, we disable peeling
1866 until we support peeling and versioning. Often peeling for alignment
1867 will require peeling for loop-bound, which in turn requires that we
1868 know how to adjust the loop ivs after the loop. */
1869 if (vect_versioning_for_alias_required
1870 || !vect_can_advance_ivs_p (loop_vinfo)
1871 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1872 do_peeling = false;
1874 if (do_peeling)
1876 int mis;
1877 int npeel = 0;
1878 tree stmt = DR_STMT (dr0);
1879 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1880 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1881 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1883 if (known_alignment_for_access_p (dr0))
1885 /* Since it's known at compile time, compute the number of iterations
1886 in the peeled loop (the peeling factor) for use in updating
1887 DR_MISALIGNMENT values. The peeling factor is the vectorization
1888 factor minus the misalignment as an element count. */
1889 mis = DR_MISALIGNMENT (dr0);
1890 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1891 npeel = nelements - mis;
1893 /* For interleaved data access every iteration accesses all the
1894 members of the group, therefore we divide the number of iterations
1895 by the group size. */
1896 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1897 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1898 npeel /= DR_GROUP_SIZE (stmt_info);
1900 if (vect_print_dump_info (REPORT_DETAILS))
1901 fprintf (vect_dump, "Try peeling by %d", npeel);
1904 /* Ensure that all data refs can be vectorized after the peel. */
1905 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1907 int save_misalignment;
1909 if (dr == dr0)
1910 continue;
1912 stmt = DR_STMT (dr);
1913 stmt_info = vinfo_for_stmt (stmt);
1914 /* For interleaving, only the alignment of the first access
1915 matters. */
1916 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1917 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1918 continue;
1920 save_misalignment = DR_MISALIGNMENT (dr);
1921 vect_update_misalignment_for_peel (dr, dr0, npeel);
1922 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1923 SET_DR_MISALIGNMENT (dr, save_misalignment);
1925 if (!supportable_dr_alignment)
1927 do_peeling = false;
1928 break;
1932 if (do_peeling)
1934 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1935 If the misalignment of DR_i is identical to that of dr0 then set
1936 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1937 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1938 by the peeling factor times the element size of DR_i (MOD the
1939 vectorization factor times the size). Otherwise, the
1940 misalignment of DR_i must be set to unknown. */
1941 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1942 if (dr != dr0)
1943 vect_update_misalignment_for_peel (dr, dr0, npeel);
1945 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1946 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1947 SET_DR_MISALIGNMENT (dr0, 0);
1948 if (vect_print_dump_info (REPORT_ALIGNMENT))
1949 fprintf (vect_dump, "Alignment of access forced using peeling.");
1951 if (vect_print_dump_info (REPORT_DETAILS))
1952 fprintf (vect_dump, "Peeling for alignment will be applied.");
1954 stat = vect_verify_datarefs_alignment (loop_vinfo);
1955 gcc_assert (stat);
1956 return stat;
1961 /* (2) Versioning to force alignment. */
1963 /* Try versioning if:
1964 1) flag_tree_vect_loop_version is TRUE
1965 2) optimize_size is FALSE
1966 3) there is at least one unsupported misaligned data ref with an unknown
1967 misalignment, and
1968 4) all misaligned data refs with a known misalignment are supported, and
1969 5) the number of runtime alignment checks is within reason. */
1971 do_versioning =
1972 flag_tree_vect_loop_version
1973 && (!optimize_size)
1974 && (!loop->inner); /* FORNOW */
1976 if (do_versioning)
1978 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1980 stmt = DR_STMT (dr);
1981 stmt_info = vinfo_for_stmt (stmt);
1983 /* For interleaving, only the alignment of the first access
1984 matters. */
1985 if (aligned_access_p (dr)
1986 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1987 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1988 continue;
1990 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1992 if (!supportable_dr_alignment)
1994 tree stmt;
1995 int mask;
1996 tree vectype;
1998 if (known_alignment_for_access_p (dr)
1999 || VEC_length (tree,
2000 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
2001 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
2003 do_versioning = false;
2004 break;
2007 stmt = DR_STMT (dr);
2008 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2009 gcc_assert (vectype);
2011 /* The rightmost bits of an aligned address must be zeros.
2012 Construct the mask needed for this test. For example,
2013 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2014 mask must be 15 = 0xf. */
2015 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2017 /* FORNOW: use the same mask to test all potentially unaligned
2018 references in the loop. The vectorizer currently supports
2019 a single vector size, see the reference to
2020 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2021 vectorization factor is computed. */
2022 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2023 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2024 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2025 VEC_safe_push (tree, heap,
2026 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2027 DR_STMT (dr));
2031 /* Versioning requires at least one misaligned data reference. */
2032 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2033 do_versioning = false;
2034 else if (!do_versioning)
2035 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2038 if (do_versioning)
2040 VEC(tree,heap) *may_misalign_stmts
2041 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2042 tree stmt;
2044 /* It can now be assumed that the data references in the statements
2045 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2046 of the loop being vectorized. */
2047 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
2049 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2050 dr = STMT_VINFO_DATA_REF (stmt_info);
2051 SET_DR_MISALIGNMENT (dr, 0);
2052 if (vect_print_dump_info (REPORT_ALIGNMENT))
2053 fprintf (vect_dump, "Alignment of access forced using versioning.");
2056 if (vect_print_dump_info (REPORT_DETAILS))
2057 fprintf (vect_dump, "Versioning for alignment will be applied.");
2059 /* Peeling and versioning can't be done together at this time. */
2060 gcc_assert (! (do_peeling && do_versioning));
2062 stat = vect_verify_datarefs_alignment (loop_vinfo);
2063 gcc_assert (stat);
2064 return stat;
2067 /* This point is reached if neither peeling nor versioning is being done. */
2068 gcc_assert (! (do_peeling || do_versioning));
2070 stat = vect_verify_datarefs_alignment (loop_vinfo);
2071 return stat;
2075 /* Function vect_analyze_data_refs_alignment
2077 Analyze the alignment of the data-references in the loop.
2078 Return FALSE if a data reference is found that cannot be vectorized. */
2080 static bool
2081 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2083 if (vect_print_dump_info (REPORT_DETAILS))
2084 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2086 if (!vect_compute_data_refs_alignment (loop_vinfo))
2088 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2089 fprintf (vect_dump,
2090 "not vectorized: can't calculate alignment for data ref.");
2091 return false;
2094 return true;
2098 /* Analyze groups of strided accesses: check that DR belongs to a group of
2099 strided accesses of legal size, step, etc. Detect gaps, single element
2100 interleaving, and other special cases. Set strided access info.
2101 Collect groups of strided stores for further use in SLP analysis. */
2103 static bool
2104 vect_analyze_group_access (struct data_reference *dr)
2106 tree step = DR_STEP (dr);
2107 tree scalar_type = TREE_TYPE (DR_REF (dr));
2108 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2109 tree stmt = DR_STMT (dr);
2110 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2111 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2112 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2113 HOST_WIDE_INT stride;
2114 bool slp_impossible = false;
2116 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2117 interleaving group (including gaps). */
2118 stride = dr_step / type_size;
2120 /* Not consecutive access is possible only if it is a part of interleaving. */
2121 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2123 /* Check if it this DR is a part of interleaving, and is a single
2124 element of the group that is accessed in the loop. */
2126 /* Gaps are supported only for loads. STEP must be a multiple of the type
2127 size. The size of the group must be a power of 2. */
2128 if (DR_IS_READ (dr)
2129 && (dr_step % type_size) == 0
2130 && stride > 0
2131 && exact_log2 (stride) != -1)
2133 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2134 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2135 if (vect_print_dump_info (REPORT_DR_DETAILS))
2137 fprintf (vect_dump, "Detected single element interleaving %d ",
2138 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2139 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2140 fprintf (vect_dump, " step ");
2141 print_generic_expr (vect_dump, step, TDF_SLIM);
2143 return true;
2145 if (vect_print_dump_info (REPORT_DETAILS))
2146 fprintf (vect_dump, "not consecutive access");
2147 return false;
2150 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2152 /* First stmt in the interleaving chain. Check the chain. */
2153 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2154 struct data_reference *data_ref = dr;
2155 unsigned int count = 1;
2156 tree next_step;
2157 tree prev_init = DR_INIT (data_ref);
2158 tree prev = stmt;
2159 HOST_WIDE_INT diff, count_in_bytes;
2161 while (next)
2163 /* Skip same data-refs. In case that two or more stmts share data-ref
2164 (supported only for loads), we vectorize only the first stmt, and
2165 the rest get their vectorized loads from the first one. */
2166 if (!tree_int_cst_compare (DR_INIT (data_ref),
2167 DR_INIT (STMT_VINFO_DATA_REF (
2168 vinfo_for_stmt (next)))))
2170 if (!DR_IS_READ (data_ref))
2172 if (vect_print_dump_info (REPORT_DETAILS))
2173 fprintf (vect_dump, "Two store stmts share the same dr.");
2174 return false;
2177 /* Check that there is no load-store dependencies for this loads
2178 to prevent a case of load-store-load to the same location. */
2179 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2180 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2182 if (vect_print_dump_info (REPORT_DETAILS))
2183 fprintf (vect_dump,
2184 "READ_WRITE dependence in interleaving.");
2185 return false;
2188 /* For load use the same data-ref load. */
2189 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2191 prev = next;
2192 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2193 continue;
2195 prev = next;
2197 /* Check that all the accesses have the same STEP. */
2198 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2199 if (tree_int_cst_compare (step, next_step))
2201 if (vect_print_dump_info (REPORT_DETAILS))
2202 fprintf (vect_dump, "not consecutive access in interleaving");
2203 return false;
2206 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2207 /* Check that the distance between two accesses is equal to the type
2208 size. Otherwise, we have gaps. */
2209 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2210 - TREE_INT_CST_LOW (prev_init)) / type_size;
2211 if (diff != 1)
2213 /* FORNOW: SLP of accesses with gaps is not supported. */
2214 slp_impossible = true;
2215 if (!DR_IS_READ (data_ref))
2217 if (vect_print_dump_info (REPORT_DETAILS))
2218 fprintf (vect_dump, "interleaved store with gaps");
2219 return false;
2223 /* Store the gap from the previous member of the group. If there is no
2224 gap in the access, DR_GROUP_GAP is always 1. */
2225 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2227 prev_init = DR_INIT (data_ref);
2228 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2229 /* Count the number of data-refs in the chain. */
2230 count++;
2233 /* COUNT is the number of accesses found, we multiply it by the size of
2234 the type to get COUNT_IN_BYTES. */
2235 count_in_bytes = type_size * count;
2237 /* Check that the size of the interleaving is not greater than STEP. */
2238 if (dr_step < count_in_bytes)
2240 if (vect_print_dump_info (REPORT_DETAILS))
2242 fprintf (vect_dump, "interleaving size is greater than step for ");
2243 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2245 return false;
2248 /* Check that the size of the interleaving is equal to STEP for stores,
2249 i.e., that there are no gaps. */
2250 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
2252 if (vect_print_dump_info (REPORT_DETAILS))
2253 fprintf (vect_dump, "interleaved store with gaps");
2254 return false;
2257 /* Check that STEP is a multiple of type size. */
2258 if ((dr_step % type_size) != 0)
2260 if (vect_print_dump_info (REPORT_DETAILS))
2262 fprintf (vect_dump, "step is not a multiple of type size: step ");
2263 print_generic_expr (vect_dump, step, TDF_SLIM);
2264 fprintf (vect_dump, " size ");
2265 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2266 TDF_SLIM);
2268 return false;
2271 /* FORNOW: we handle only interleaving that is a power of 2.
2272 We don't fail here if it may be still possible to vectorize the
2273 group using SLP. If not, the size of the group will be checked in
2274 vect_analyze_operations, and the vectorization will fail. */
2275 if (exact_log2 (stride) == -1)
2277 if (vect_print_dump_info (REPORT_DETAILS))
2278 fprintf (vect_dump, "interleaving is not a power of 2");
2280 if (slp_impossible)
2281 return false;
2283 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2284 if (vect_print_dump_info (REPORT_DETAILS))
2285 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2287 /* SLP: create an SLP data structure for every interleaving group of
2288 stores for further analysis in vect_analyse_slp. */
2289 if (!DR_IS_READ (dr) && !slp_impossible)
2290 VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2293 return true;
2297 /* Analyze the access pattern of the data-reference DR.
2298 In case of non-consecutive accesses call vect_analyze_group_access() to
2299 analyze groups of strided accesses. */
2301 static bool
2302 vect_analyze_data_ref_access (struct data_reference *dr)
2304 tree step = DR_STEP (dr);
2305 tree scalar_type = TREE_TYPE (DR_REF (dr));
2306 tree stmt = DR_STMT (dr);
2307 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2308 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2309 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2310 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2312 if (!step)
2314 if (vect_print_dump_info (REPORT_DETAILS))
2315 fprintf (vect_dump, "bad data-ref access");
2316 return false;
2319 /* Don't allow invariant accesses. */
2320 if (dr_step == 0)
2321 return false;
2323 if (nested_in_vect_loop_p (loop, stmt))
2325 /* Interleaved accesses are not yet supported within outer-loop
2326 vectorization for references in the inner-loop. */
2327 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2329 /* For the rest of the analysis we use the outer-loop step. */
2330 step = STMT_VINFO_DR_STEP (stmt_info);
2331 dr_step = TREE_INT_CST_LOW (step);
2333 if (dr_step == 0)
2335 if (vect_print_dump_info (REPORT_ALIGNMENT))
2336 fprintf (vect_dump, "zero step in outer loop.");
2337 if (DR_IS_READ (dr))
2338 return true;
2339 else
2340 return false;
2344 /* Consecutive? */
2345 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2347 /* Mark that it is not interleaving. */
2348 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2349 return true;
2352 if (nested_in_vect_loop_p (loop, stmt))
2354 if (vect_print_dump_info (REPORT_ALIGNMENT))
2355 fprintf (vect_dump, "strided access in outer loop.");
2356 return false;
2359 /* Not consecutive access - check if it's a part of interleaving group. */
2360 return vect_analyze_group_access (dr);
2364 /* Function vect_analyze_data_ref_accesses.
2366 Analyze the access pattern of all the data references in the loop.
2368 FORNOW: the only access pattern that is considered vectorizable is a
2369 simple step 1 (consecutive) access.
2371 FORNOW: handle only arrays and pointer accesses. */
2373 static bool
2374 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2376 unsigned int i;
2377 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2378 struct data_reference *dr;
2380 if (vect_print_dump_info (REPORT_DETAILS))
2381 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2383 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2384 if (!vect_analyze_data_ref_access (dr))
2386 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2387 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2388 return false;
2391 return true;
2394 /* Function vect_prune_runtime_alias_test_list.
2396 Prune a list of ddrs to be tested at run-time by versioning for alias.
2397 Return FALSE if resulting list of ddrs is longer then allowed by
2398 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2400 static bool
2401 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2403 VEC (ddr_p, heap) * ddrs =
2404 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2405 unsigned i, j;
2407 if (vect_print_dump_info (REPORT_DETAILS))
2408 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2410 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2412 bool found;
2413 ddr_p ddr_i;
2415 ddr_i = VEC_index (ddr_p, ddrs, i);
2416 found = false;
2418 for (j = 0; j < i; j++)
2420 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2422 if (vect_vfa_range_equal (ddr_i, ddr_j))
2424 if (vect_print_dump_info (REPORT_DR_DETAILS))
2426 fprintf (vect_dump, "found equal ranges ");
2427 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2428 fprintf (vect_dump, ", ");
2429 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2430 fprintf (vect_dump, " and ");
2431 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2432 fprintf (vect_dump, ", ");
2433 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2435 found = true;
2436 break;
2440 if (found)
2442 VEC_ordered_remove (ddr_p, ddrs, i);
2443 continue;
2445 i++;
2448 if (VEC_length (ddr_p, ddrs) >
2449 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2451 if (vect_print_dump_info (REPORT_DR_DETAILS))
2453 fprintf (vect_dump,
2454 "disable versioning for alias - max number of generated "
2455 "checks exceeded.");
2458 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2460 return false;
2463 return true;
2466 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2468 void
2469 vect_free_slp_tree (slp_tree node)
2471 if (!node)
2472 return;
2474 if (SLP_TREE_LEFT (node))
2475 vect_free_slp_tree (SLP_TREE_LEFT (node));
2477 if (SLP_TREE_RIGHT (node))
2478 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2480 VEC_free (tree, heap, SLP_TREE_SCALAR_STMTS (node));
2482 if (SLP_TREE_VEC_STMTS (node))
2483 VEC_free (tree, heap, SLP_TREE_VEC_STMTS (node));
2485 free (node);
2489 /* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are
2490 of a legal type and that they match the defs of the first stmt of the SLP
2491 group (stored in FIRST_STMT_...). */
2493 static bool
2494 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2495 tree rhs, VEC (tree, heap) **def_stmts0,
2496 VEC (tree, heap) **def_stmts1,
2497 enum vect_def_type *first_stmt_dt0,
2498 enum vect_def_type *first_stmt_dt1,
2499 tree *first_stmt_def0_type,
2500 tree *first_stmt_def1_type,
2501 tree *first_stmt_const_oprnd,
2502 int ncopies_for_cost)
2504 tree oprnd;
2505 enum operation_type op_type = TREE_OPERAND_LENGTH (rhs);
2506 unsigned int i, number_of_oprnds = op_type;
2507 tree def, def_stmt;
2508 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2509 stmt_vec_info stmt_info =
2510 vinfo_for_stmt (VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2512 /* Store. */
2513 if (!op_type)
2514 number_of_oprnds = 1;
2515 else
2516 gcc_assert (op_type == unary_op || op_type == binary_op);
2518 for (i = 0; i < number_of_oprnds; i++)
2520 if (op_type)
2521 oprnd = TREE_OPERAND (rhs, i);
2522 else
2523 oprnd = rhs;
2525 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2526 || (!def_stmt && dt[i] != vect_constant_def))
2528 if (vect_print_dump_info (REPORT_SLP))
2530 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2531 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2534 return false;
2537 if (!*first_stmt_dt0)
2539 /* op0 of the first stmt of the group - store its info. */
2540 *first_stmt_dt0 = dt[i];
2541 if (def)
2542 *first_stmt_def0_type = TREE_TYPE (def);
2543 else
2544 *first_stmt_const_oprnd = oprnd;
2546 /* Analyze costs (for the first stmt of the group only). */
2547 if (op_type)
2548 /* Not memory operation (we don't call this functions for loads). */
2549 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2550 else
2551 /* Store. */
2552 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2555 else
2557 if (!*first_stmt_dt1 && i == 1)
2559 /* op1 of the first stmt of the group - store its info. */
2560 *first_stmt_dt1 = dt[i];
2561 if (def)
2562 *first_stmt_def1_type = TREE_TYPE (def);
2563 else
2565 /* We assume that the stmt contains only one constant
2566 operand. We fail otherwise, to be on the safe side. */
2567 if (*first_stmt_const_oprnd)
2569 if (vect_print_dump_info (REPORT_SLP))
2570 fprintf (vect_dump, "Build SLP failed: two constant "
2571 "oprnds in stmt");
2572 return false;
2574 *first_stmt_const_oprnd = oprnd;
2577 else
2579 /* Not first stmt of the group, check that the def-stmt/s match
2580 the def-stmt/s of the first stmt. */
2581 if ((i == 0
2582 && (*first_stmt_dt0 != dt[i]
2583 || (*first_stmt_def0_type && def
2584 && *first_stmt_def0_type != TREE_TYPE (def))))
2585 || (i == 1
2586 && (*first_stmt_dt1 != dt[i]
2587 || (*first_stmt_def1_type && def
2588 && *first_stmt_def1_type != TREE_TYPE (def))))
2589 || (!def
2590 && TREE_TYPE (*first_stmt_const_oprnd)
2591 != TREE_TYPE (oprnd)))
2593 if (vect_print_dump_info (REPORT_SLP))
2594 fprintf (vect_dump, "Build SLP failed: different types ");
2596 return false;
2601 /* Check the types of the definitions. */
2602 switch (dt[i])
2604 case vect_constant_def:
2605 case vect_invariant_def:
2606 break;
2608 case vect_loop_def:
2609 if (i == 0)
2610 VEC_safe_push (tree, heap, *def_stmts0, def_stmt);
2611 else
2612 VEC_safe_push (tree, heap, *def_stmts1, def_stmt);
2613 break;
2615 default:
2616 /* FORNOW: Not supported. */
2617 if (vect_print_dump_info (REPORT_SLP))
2619 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2620 print_generic_expr (vect_dump, def, TDF_SLIM);
2623 return false;
2627 return true;
2631 /* Recursively build an SLP tree starting from NODE.
2632 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2633 permutation or are of unsupported types of operation. Otherwise, return
2634 TRUE.
2635 SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2636 in the case of multiple types for now. */
2638 static bool
2639 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2640 unsigned int group_size, bool *slp_impossible,
2641 int *inside_cost, int *outside_cost,
2642 int ncopies_for_cost)
2644 VEC (tree, heap) *def_stmts0 = VEC_alloc (tree, heap, group_size);
2645 VEC (tree, heap) *def_stmts1 = VEC_alloc (tree, heap, group_size);
2646 unsigned int i;
2647 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2648 tree stmt = VEC_index (tree, stmts, 0);
2649 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2650 enum tree_code first_stmt_code = 0;
2651 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2652 tree lhs, rhs, prev_stmt = NULL_TREE;
2653 bool stop_recursion = false, need_same_oprnds = false;
2654 tree vectype, scalar_type, first_op1 = NULL_TREE;
2655 unsigned int vectorization_factor = 0, ncopies;
2656 optab optab;
2657 int icode;
2658 enum machine_mode optab_op2_mode;
2659 enum machine_mode vec_mode;
2660 tree first_stmt_const_oprnd = NULL_TREE;
2661 struct data_reference *first_dr;
2663 /* For every stmt in NODE find its def stmt/s. */
2664 for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
2666 if (vect_print_dump_info (REPORT_SLP))
2668 fprintf (vect_dump, "Build SLP for ");
2669 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2672 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2674 if (vect_print_dump_info (REPORT_SLP))
2676 fprintf (vect_dump, "Build SLP failed: not MODIFY_STMT ");
2677 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2680 return false;
2683 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2684 vectype = get_vectype_for_scalar_type (scalar_type);
2685 if (!vectype)
2687 if (vect_print_dump_info (REPORT_SLP))
2689 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
2690 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2692 return false;
2695 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2696 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2697 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2698 if (ncopies > 1)
2700 /* FORNOW. */
2701 if (vect_print_dump_info (REPORT_SLP))
2702 fprintf (vect_dump, "SLP failed - multiple types ");
2704 *slp_impossible = true;
2705 return false;
2708 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
2709 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2711 /* Check the operation. */
2712 if (i == 0)
2714 first_stmt_code = TREE_CODE (rhs);
2716 /* Shift arguments should be equal in all the packed stmts for a
2717 vector shift with scalar shift operand. */
2718 if (TREE_CODE (rhs) == LSHIFT_EXPR || TREE_CODE (rhs) == RSHIFT_EXPR)
2720 vec_mode = TYPE_MODE (vectype);
2721 optab = optab_for_tree_code (TREE_CODE (rhs), vectype);
2722 if (!optab)
2724 if (vect_print_dump_info (REPORT_SLP))
2725 fprintf (vect_dump, "Build SLP failed: no optab.");
2726 return false;
2728 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2729 if (icode == CODE_FOR_nothing)
2731 if (vect_print_dump_info (REPORT_SLP))
2732 fprintf (vect_dump,
2733 "Build SLP failed: op not supported by target.");
2734 return false;
2736 optab_op2_mode = insn_data[icode].operand[2].mode;
2737 if (!VECTOR_MODE_P (optab_op2_mode))
2739 need_same_oprnds = true;
2740 first_op1 = TREE_OPERAND (rhs, 1);
2744 else
2746 if (first_stmt_code != TREE_CODE (rhs))
2748 if (vect_print_dump_info (REPORT_SLP))
2750 fprintf (vect_dump,
2751 "Build SLP failed: different operation in stmt ");
2752 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2755 return false;
2758 if (need_same_oprnds
2759 && !operand_equal_p (first_op1, TREE_OPERAND (rhs, 1), 0))
2761 if (vect_print_dump_info (REPORT_SLP))
2763 fprintf (vect_dump,
2764 "Build SLP failed: different shift arguments in ");
2765 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2768 return false;
2772 /* Strided store or load. */
2773 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2775 if (REFERENCE_CLASS_P (lhs))
2777 /* Store. */
2778 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs,
2779 &def_stmts0, &def_stmts1,
2780 &first_stmt_dt0,
2781 &first_stmt_dt1,
2782 &first_stmt_def0_type,
2783 &first_stmt_def1_type,
2784 &first_stmt_const_oprnd,
2785 ncopies_for_cost))
2786 return false;
2788 else
2790 /* Load. */
2791 if (i == 0)
2793 /* First stmt of the SLP group should be the first load of
2794 the interleaving loop if data permutation is not
2795 allowed. */
2796 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt)
2798 /* FORNOW: data permutations are not supported. */
2799 if (vect_print_dump_info (REPORT_SLP))
2801 fprintf (vect_dump, "Build SLP failed: strided "
2802 " loads need permutation ");
2803 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2806 return false;
2809 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2810 if (vect_supportable_dr_alignment (first_dr)
2811 == dr_unaligned_unsupported)
2813 if (vect_print_dump_info (REPORT_SLP))
2815 fprintf (vect_dump, "Build SLP failed: unsupported "
2816 " unaligned load ");
2817 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2820 return false;
2823 /* Analyze costs (for the first stmt in the group). */
2824 vect_model_load_cost (vinfo_for_stmt (stmt),
2825 ncopies_for_cost, *node);
2827 else
2829 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt)
2831 /* FORNOW: data permutations are not supported. */
2832 if (vect_print_dump_info (REPORT_SLP))
2834 fprintf (vect_dump, "Build SLP failed: strided "
2835 " loads need permutation ");
2836 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2838 return false;
2842 prev_stmt = stmt;
2844 /* We stop the tree when we reach a group of loads. */
2845 stop_recursion = true;
2846 continue;
2848 } /* Strided access. */
2849 else
2851 if (REFERENCE_CLASS_P (rhs))
2853 /* Not strided load. */
2854 if (vect_print_dump_info (REPORT_SLP))
2856 fprintf (vect_dump, "Build SLP failed: not strided load ");
2857 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2860 /* FORNOW: Not strided loads are not supported. */
2861 return false;
2864 /* Not memory operation. */
2865 if (!BINARY_CLASS_P (rhs) && !UNARY_CLASS_P (rhs))
2867 if (vect_print_dump_info (REPORT_SLP))
2869 fprintf (vect_dump, "Build SLP failed: operation");
2870 fprintf (vect_dump, " unsupported ");
2871 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2874 return false;
2877 /* Find the def-stmts. */
2878 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, &def_stmts0,
2879 &def_stmts1, &first_stmt_dt0,
2880 &first_stmt_dt1,
2881 &first_stmt_def0_type,
2882 &first_stmt_def1_type,
2883 &first_stmt_const_oprnd,
2884 ncopies_for_cost))
2885 return false;
2889 /* Add the costs of the node to the overall instance costs. */
2890 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
2891 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
2893 /* Strided loads were reached - stop the recursion. */
2894 if (stop_recursion)
2895 return true;
2897 /* Create SLP_TREE nodes for the definition node/s. */
2898 if (first_stmt_dt0 == vect_loop_def)
2900 slp_tree left_node = XNEW (struct _slp_tree);
2901 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
2902 SLP_TREE_VEC_STMTS (left_node) = NULL;
2903 SLP_TREE_LEFT (left_node) = NULL;
2904 SLP_TREE_RIGHT (left_node) = NULL;
2905 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
2906 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
2907 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
2908 slp_impossible, inside_cost, outside_cost,
2909 ncopies_for_cost))
2910 return false;
2912 SLP_TREE_LEFT (*node) = left_node;
2915 if (first_stmt_dt1 == vect_loop_def)
2917 slp_tree right_node = XNEW (struct _slp_tree);
2918 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
2919 SLP_TREE_VEC_STMTS (right_node) = NULL;
2920 SLP_TREE_LEFT (right_node) = NULL;
2921 SLP_TREE_RIGHT (right_node) = NULL;
2922 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
2923 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
2924 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
2925 slp_impossible, inside_cost, outside_cost,
2926 ncopies_for_cost))
2927 return false;
2929 SLP_TREE_RIGHT (*node) = right_node;
2932 return true;
2936 static void
2937 vect_print_slp_tree (slp_tree node)
2939 int i;
2940 tree stmt;
2942 if (!node)
2943 return;
2945 fprintf (vect_dump, "node ");
2946 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2948 fprintf (vect_dump, "\n\tstmt %d ", i);
2949 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2951 fprintf (vect_dump, "\n");
2953 vect_print_slp_tree (SLP_TREE_LEFT (node));
2954 vect_print_slp_tree (SLP_TREE_RIGHT (node));
2958 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2959 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2960 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2961 stmts in NODE are to be marked. */
2963 static void
2964 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
2966 int i;
2967 tree stmt;
2969 if (!node)
2970 return;
2972 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2973 if (j < 0 || i == j)
2974 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
2976 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
2977 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
2981 /* Analyze an SLP instance starting from a group of strided stores. Call
2982 vect_build_slp_tree to build a tree of packed stmts if possible.
2983 Return FALSE if it's impossible to SLP any stmt in the loop. */
2985 static bool
2986 vect_analyze_slp_instance (loop_vec_info loop_vinfo, tree stmt)
2988 slp_instance new_instance;
2989 slp_tree node = XNEW (struct _slp_tree);
2990 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
2991 unsigned int unrolling_factor = 1, nunits;
2992 tree vectype, scalar_type, next;
2993 unsigned int vectorization_factor = 0, ncopies;
2994 bool slp_impossible = false;
2995 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
2997 /* FORNOW: multiple types are not supported. */
2998 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
2999 vectype = get_vectype_for_scalar_type (scalar_type);
3000 if (!vectype)
3002 if (vect_print_dump_info (REPORT_SLP))
3004 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
3005 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3007 return false;
3010 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3011 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3012 ncopies = vectorization_factor / nunits;
3013 if (ncopies > 1)
3015 if (vect_print_dump_info (REPORT_SLP))
3016 fprintf (vect_dump, "SLP failed - multiple types ");
3018 return false;
3021 /* Create a node (a root of the SLP tree) for the packed strided stores. */
3022 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (tree, heap, group_size);
3023 next = stmt;
3024 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
3025 while (next)
3027 VEC_safe_push (tree, heap, SLP_TREE_SCALAR_STMTS (node), next);
3028 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
3031 SLP_TREE_VEC_STMTS (node) = NULL;
3032 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
3033 SLP_TREE_LEFT (node) = NULL;
3034 SLP_TREE_RIGHT (node) = NULL;
3035 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
3036 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
3038 /* Calculate the unrolling factor. */
3039 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
3041 /* Calculate the number of vector stmts to create based on the unrolling
3042 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
3043 GROUP_SIZE / NUNITS otherwise. */
3044 ncopies_for_cost = unrolling_factor * group_size / nunits;
3046 /* Build the tree for the SLP instance. */
3047 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
3048 &inside_cost, &outside_cost, ncopies_for_cost))
3050 /* Create a new SLP instance. */
3051 new_instance = XNEW (struct _slp_instance);
3052 SLP_INSTANCE_TREE (new_instance) = node;
3053 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
3054 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
3055 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
3056 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
3057 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
3058 new_instance);
3059 if (vect_print_dump_info (REPORT_SLP))
3060 vect_print_slp_tree (node);
3062 return true;
3065 /* Failed to SLP. */
3066 /* Free the allocated memory. */
3067 vect_free_slp_tree (node);
3069 if (slp_impossible)
3070 return false;
3072 /* SLP failed for this instance, but it is still possible to SLP other stmts
3073 in the loop. */
3074 return true;
3078 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3079 trees of packed scalar stmts if SLP is possible. */
3081 static bool
3082 vect_analyze_slp (loop_vec_info loop_vinfo)
3084 unsigned int i;
3085 VEC (tree, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
3086 tree store;
3088 if (vect_print_dump_info (REPORT_SLP))
3089 fprintf (vect_dump, "=== vect_analyze_slp ===");
3091 for (i = 0; VEC_iterate (tree, strided_stores, i, store); i++)
3092 if (!vect_analyze_slp_instance (loop_vinfo, store))
3094 /* SLP failed. No instance can be SLPed in the loop. */
3095 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3096 fprintf (vect_dump, "SLP failed.");
3098 return false;
3101 return true;
3105 /* For each possible SLP instance decide whether to SLP it and calculate overall
3106 unrolling factor needed to SLP the loop. */
3108 static void
3109 vect_make_slp_decision (loop_vec_info loop_vinfo)
3111 unsigned int i, unrolling_factor = 1;
3112 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3113 slp_instance instance;
3114 int decided_to_slp = 0;
3116 if (vect_print_dump_info (REPORT_SLP))
3117 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3119 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3121 /* FORNOW: SLP if you can. */
3122 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3123 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3125 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3126 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3127 loop-based vectorization. Such stmts will be marked as HYBRID. */
3128 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3129 decided_to_slp++;
3132 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3134 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3135 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3136 decided_to_slp, unrolling_factor);
3140 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3141 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3143 static void
3144 vect_detect_hybrid_slp_stmts (slp_tree node)
3146 int i;
3147 tree stmt;
3148 imm_use_iterator imm_iter;
3149 tree use_stmt;
3151 if (!node)
3152 return;
3154 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3155 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3156 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
3157 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
3158 if (vinfo_for_stmt (use_stmt)
3159 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
3160 vect_mark_slp_stmts (node, hybrid, i);
3162 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3163 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3167 /* Find stmts that must be both vectorized and SLPed. */
3169 static void
3170 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3172 unsigned int i;
3173 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3174 slp_instance instance;
3176 if (vect_print_dump_info (REPORT_SLP))
3177 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3179 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3180 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3184 /* Function vect_analyze_data_refs.
3186 Find all the data references in the loop.
3188 The general structure of the analysis of data refs in the vectorizer is as
3189 follows:
3190 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3191 find and analyze all data-refs in the loop and their dependences.
3192 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3193 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3194 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3198 static bool
3199 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3201 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3202 unsigned int i;
3203 VEC (data_reference_p, heap) *datarefs;
3204 struct data_reference *dr;
3205 tree scalar_type;
3207 if (vect_print_dump_info (REPORT_DETAILS))
3208 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3210 compute_data_dependences_for_loop (loop, true,
3211 &LOOP_VINFO_DATAREFS (loop_vinfo),
3212 &LOOP_VINFO_DDRS (loop_vinfo));
3214 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3215 from stmt_vec_info struct to DR and vectype. */
3216 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3218 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3220 tree stmt;
3221 stmt_vec_info stmt_info;
3222 basic_block bb;
3223 tree base, offset, init;
3225 if (!dr || !DR_REF (dr))
3227 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3228 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3229 return false;
3232 stmt = DR_STMT (dr);
3233 stmt_info = vinfo_for_stmt (stmt);
3235 /* Check that analysis of the data-ref succeeded. */
3236 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3237 || !DR_STEP (dr))
3239 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3241 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3242 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3244 return false;
3247 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3249 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3250 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3251 "constant");
3252 return false;
3255 if (!DR_SYMBOL_TAG (dr))
3257 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3259 fprintf (vect_dump, "not vectorized: no memory tag for ");
3260 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3262 return false;
3265 base = unshare_expr (DR_BASE_ADDRESS (dr));
3266 offset = unshare_expr (DR_OFFSET (dr));
3267 init = unshare_expr (DR_INIT (dr));
3269 /* Update DR field in stmt_vec_info struct. */
3270 bb = bb_for_stmt (stmt);
3272 /* If the dataref is in an inner-loop of the loop that is considered for
3273 for vectorization, we also want to analyze the access relative to
3274 the outer-loop (DR contains information only relative to the
3275 inner-most enclosing loop). We do that by building a reference to the
3276 first location accessed by the inner-loop, and analyze it relative to
3277 the outer-loop. */
3278 if (nested_in_vect_loop_p (loop, stmt))
3280 tree outer_step, outer_base, outer_init;
3281 HOST_WIDE_INT pbitsize, pbitpos;
3282 tree poffset;
3283 enum machine_mode pmode;
3284 int punsignedp, pvolatilep;
3285 affine_iv base_iv, offset_iv;
3286 tree dinit;
3288 /* Build a reference to the first location accessed by the
3289 inner-loop: *(BASE+INIT). (The first location is actually
3290 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3291 tree inner_base = build_fold_indirect_ref
3292 (fold_build2 (POINTER_PLUS_EXPR,
3293 TREE_TYPE (base), base,
3294 fold_convert (sizetype, init)));
3296 if (vect_print_dump_info (REPORT_DETAILS))
3298 fprintf (dump_file, "analyze in outer-loop: ");
3299 print_generic_expr (dump_file, inner_base, TDF_SLIM);
3302 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3303 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3304 gcc_assert (outer_base != NULL_TREE);
3306 if (pbitpos % BITS_PER_UNIT != 0)
3308 if (vect_print_dump_info (REPORT_DETAILS))
3309 fprintf (dump_file, "failed: bit offset alignment.\n");
3310 return false;
3313 outer_base = build_fold_addr_expr (outer_base);
3314 if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
3316 if (vect_print_dump_info (REPORT_DETAILS))
3317 fprintf (dump_file, "failed: evolution of base is not affine.\n");
3318 return false;
3321 if (offset)
3323 if (poffset)
3324 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3325 else
3326 poffset = offset;
3329 if (!poffset)
3331 offset_iv.base = ssize_int (0);
3332 offset_iv.step = ssize_int (0);
3334 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
3336 if (vect_print_dump_info (REPORT_DETAILS))
3337 fprintf (dump_file, "evolution of offset is not affine.\n");
3338 return false;
3341 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3342 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3343 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3344 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3345 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3347 outer_step = size_binop (PLUS_EXPR,
3348 fold_convert (ssizetype, base_iv.step),
3349 fold_convert (ssizetype, offset_iv.step));
3351 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3352 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3353 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3354 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3355 STMT_VINFO_DR_OFFSET (stmt_info) =
3356 fold_convert (ssizetype, offset_iv.base);
3357 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3358 size_int (highest_pow2_factor (offset_iv.base));
3360 if (dump_file && (dump_flags & TDF_DETAILS))
3362 fprintf (dump_file, "\touter base_address: ");
3363 print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3364 fprintf (dump_file, "\n\touter offset from base address: ");
3365 print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3366 fprintf (dump_file, "\n\touter constant offset from base address: ");
3367 print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3368 fprintf (dump_file, "\n\touter step: ");
3369 print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3370 fprintf (dump_file, "\n\touter aligned to: ");
3371 print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3375 if (STMT_VINFO_DATA_REF (stmt_info))
3377 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3379 fprintf (vect_dump,
3380 "not vectorized: more than one data ref in stmt: ");
3381 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3383 return false;
3385 STMT_VINFO_DATA_REF (stmt_info) = dr;
3387 /* Set vectype for STMT. */
3388 scalar_type = TREE_TYPE (DR_REF (dr));
3389 STMT_VINFO_VECTYPE (stmt_info) =
3390 get_vectype_for_scalar_type (scalar_type);
3391 if (!STMT_VINFO_VECTYPE (stmt_info))
3393 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3395 fprintf (vect_dump,
3396 "not vectorized: no vectype for stmt: ");
3397 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3398 fprintf (vect_dump, " scalar_type: ");
3399 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3401 return false;
3405 return true;
3409 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3411 /* Function vect_mark_relevant.
3413 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3415 static void
3416 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
3417 enum vect_relevant relevant, bool live_p)
3419 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3420 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3421 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3423 if (vect_print_dump_info (REPORT_DETAILS))
3424 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3426 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3428 tree pattern_stmt;
3430 /* This is the last stmt in a sequence that was detected as a
3431 pattern that can potentially be vectorized. Don't mark the stmt
3432 as relevant/live because it's not going to be vectorized.
3433 Instead mark the pattern-stmt that replaces it. */
3435 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3437 if (vect_print_dump_info (REPORT_DETAILS))
3438 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3439 stmt_info = vinfo_for_stmt (pattern_stmt);
3440 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3441 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3442 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3443 stmt = pattern_stmt;
3446 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3447 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3448 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3450 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3451 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3453 if (vect_print_dump_info (REPORT_DETAILS))
3454 fprintf (vect_dump, "already marked relevant/live.");
3455 return;
3458 VEC_safe_push (tree, heap, *worklist, stmt);
3462 /* Function vect_stmt_relevant_p.
3464 Return true if STMT in loop that is represented by LOOP_VINFO is
3465 "relevant for vectorization".
3467 A stmt is considered "relevant for vectorization" if:
3468 - it has uses outside the loop.
3469 - it has vdefs (it alters memory).
3470 - control stmts in the loop (except for the exit condition).
3472 CHECKME: what other side effects would the vectorizer allow? */
3474 static bool
3475 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
3476 enum vect_relevant *relevant, bool *live_p)
3478 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3479 ssa_op_iter op_iter;
3480 imm_use_iterator imm_iter;
3481 use_operand_p use_p;
3482 def_operand_p def_p;
3484 *relevant = vect_unused_in_loop;
3485 *live_p = false;
3487 /* cond stmt other than loop exit cond. */
3488 if (is_ctrl_stmt (stmt)
3489 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3490 *relevant = vect_used_in_loop;
3492 /* changing memory. */
3493 if (TREE_CODE (stmt) != PHI_NODE)
3494 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3496 if (vect_print_dump_info (REPORT_DETAILS))
3497 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3498 *relevant = vect_used_in_loop;
3501 /* uses outside the loop. */
3502 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3504 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3506 basic_block bb = bb_for_stmt (USE_STMT (use_p));
3507 if (!flow_bb_inside_loop_p (loop, bb))
3509 if (vect_print_dump_info (REPORT_DETAILS))
3510 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3512 /* We expect all such uses to be in the loop exit phis
3513 (because of loop closed form) */
3514 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
3515 gcc_assert (bb == single_exit (loop)->dest);
3517 *live_p = true;
3522 return (*live_p || *relevant);
3527 Function process_use.
3529 Inputs:
3530 - a USE in STMT in a loop represented by LOOP_VINFO
3531 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3532 that defined USE. This is dont by calling mark_relevant and passing it
3533 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
3535 Outputs:
3536 Generally, LIVE_P and RELEVANT are used to define the liveness and
3537 relevance info of the DEF_STMT of this USE:
3538 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3539 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3540 Exceptions:
3541 - case 1: If USE is used only for address computations (e.g. array indexing),
3542 which does not need to be directly vectorized, then the liveness/relevance
3543 of the respective DEF_STMT is left unchanged.
3544 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3545 skip DEF_STMT cause it had already been processed.
3546 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3547 be modified accordingly.
3549 Return true if everything is as expected. Return false otherwise. */
3551 static bool
3552 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3553 enum vect_relevant relevant, VEC(tree,heap) **worklist)
3555 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3556 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3557 stmt_vec_info dstmt_vinfo;
3558 basic_block bb, def_bb;
3559 tree def, def_stmt;
3560 enum vect_def_type dt;
3562 /* case 1: we are only interested in uses that need to be vectorized. Uses
3563 that are used for address computation are not considered relevant. */
3564 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3565 return true;
3567 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3569 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3570 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3571 return false;
3574 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
3575 return true;
3577 def_bb = bb_for_stmt (def_stmt);
3578 if (!flow_bb_inside_loop_p (loop, def_bb))
3580 if (vect_print_dump_info (REPORT_DETAILS))
3581 fprintf (vect_dump, "def_stmt is out of loop.");
3582 return true;
3585 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3586 DEF_STMT must have already been processed, because this should be the
3587 only way that STMT, which is a reduction-phi, was put in the worklist,
3588 as there should be no other uses for DEF_STMT in the loop. So we just
3589 check that everything is as expected, and we are done. */
3590 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3591 bb = bb_for_stmt (stmt);
3592 if (TREE_CODE (stmt) == PHI_NODE
3593 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3594 && TREE_CODE (def_stmt) != PHI_NODE
3595 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3596 && bb->loop_father == def_bb->loop_father)
3598 if (vect_print_dump_info (REPORT_DETAILS))
3599 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3600 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3601 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3602 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3603 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3604 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3605 return true;
3608 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3609 outer-loop-header-bb:
3610 d = def_stmt
3611 inner-loop:
3612 stmt # use (d)
3613 outer-loop-tail-bb:
3614 ... */
3615 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3617 if (vect_print_dump_info (REPORT_DETAILS))
3618 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3619 switch (relevant)
3621 case vect_unused_in_loop:
3622 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3623 vect_used_by_reduction : vect_unused_in_loop;
3624 break;
3625 case vect_used_in_outer_by_reduction:
3626 relevant = vect_used_by_reduction;
3627 break;
3628 case vect_used_in_outer:
3629 relevant = vect_used_in_loop;
3630 break;
3631 case vect_used_by_reduction:
3632 case vect_used_in_loop:
3633 break;
3635 default:
3636 gcc_unreachable ();
3640 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3641 outer-loop-header-bb:
3643 inner-loop:
3644 d = def_stmt
3645 outer-loop-tail-bb:
3646 stmt # use (d) */
3647 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3649 if (vect_print_dump_info (REPORT_DETAILS))
3650 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3651 switch (relevant)
3653 case vect_unused_in_loop:
3654 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3655 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3656 break;
3658 case vect_used_in_outer_by_reduction:
3659 case vect_used_in_outer:
3660 break;
3662 case vect_used_by_reduction:
3663 relevant = vect_used_in_outer_by_reduction;
3664 break;
3666 case vect_used_in_loop:
3667 relevant = vect_used_in_outer;
3668 break;
3670 default:
3671 gcc_unreachable ();
3675 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
3676 return true;
3680 /* Function vect_mark_stmts_to_be_vectorized.
3682 Not all stmts in the loop need to be vectorized. For example:
3684 for i...
3685 for j...
3686 1. T0 = i + j
3687 2. T1 = a[T0]
3689 3. j = j + 1
3691 Stmt 1 and 3 do not need to be vectorized, because loop control and
3692 addressing of vectorized data-refs are handled differently.
3694 This pass detects such stmts. */
3696 static bool
3697 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3699 VEC(tree,heap) *worklist;
3700 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3701 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3702 unsigned int nbbs = loop->num_nodes;
3703 block_stmt_iterator si;
3704 tree stmt;
3705 stmt_ann_t ann;
3706 unsigned int i;
3707 stmt_vec_info stmt_vinfo;
3708 basic_block bb;
3709 tree phi;
3710 bool live_p;
3711 enum vect_relevant relevant;
3713 if (vect_print_dump_info (REPORT_DETAILS))
3714 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
3716 worklist = VEC_alloc (tree, heap, 64);
3718 /* 1. Init worklist. */
3719 for (i = 0; i < nbbs; i++)
3721 bb = bbs[i];
3722 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3724 if (vect_print_dump_info (REPORT_DETAILS))
3726 fprintf (vect_dump, "init: phi relevant? ");
3727 print_generic_expr (vect_dump, phi, TDF_SLIM);
3730 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
3731 vect_mark_relevant (&worklist, phi, relevant, live_p);
3733 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3735 stmt = bsi_stmt (si);
3736 if (vect_print_dump_info (REPORT_DETAILS))
3738 fprintf (vect_dump, "init: stmt relevant? ");
3739 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3742 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
3743 vect_mark_relevant (&worklist, stmt, relevant, live_p);
3747 /* 2. Process_worklist */
3748 while (VEC_length (tree, worklist) > 0)
3750 use_operand_p use_p;
3751 ssa_op_iter iter;
3753 stmt = VEC_pop (tree, worklist);
3754 if (vect_print_dump_info (REPORT_DETAILS))
3756 fprintf (vect_dump, "worklist: examine stmt: ");
3757 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3760 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3761 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3762 liveness and relevance properties of STMT. */
3763 ann = stmt_ann (stmt);
3764 stmt_vinfo = vinfo_for_stmt (stmt);
3765 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
3766 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
3768 /* Generally, the liveness and relevance properties of STMT are
3769 propagated as is to the DEF_STMTs of its USEs:
3770 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3771 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3773 One exception is when STMT has been identified as defining a reduction
3774 variable; in this case we set the liveness/relevance as follows:
3775 live_p = false
3776 relevant = vect_used_by_reduction
3777 This is because we distinguish between two kinds of relevant stmts -
3778 those that are used by a reduction computation, and those that are
3779 (also) used by a regular computation. This allows us later on to
3780 identify stmts that are used solely by a reduction, and therefore the
3781 order of the results that they produce does not have to be kept.
3783 Reduction phis are expected to be used by a reduction stmt, or by
3784 in an outer loop; Other reduction stmts are expected to be
3785 in the loop, and possibly used by a stmt in an outer loop.
3786 Here are the expected values of "relevant" for reduction phis/stmts:
3788 relevance: phi stmt
3789 vect_unused_in_loop ok
3790 vect_used_in_outer_by_reduction ok ok
3791 vect_used_in_outer ok ok
3792 vect_used_by_reduction ok
3793 vect_used_in_loop */
3795 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
3797 enum vect_relevant tmp_relevant = relevant;
3798 switch (tmp_relevant)
3800 case vect_unused_in_loop:
3801 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
3802 relevant = vect_used_by_reduction;
3803 break;
3805 case vect_used_in_outer_by_reduction:
3806 case vect_used_in_outer:
3807 gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR
3808 && TREE_CODE (stmt) != DOT_PROD_EXPR);
3809 break;
3811 case vect_used_by_reduction:
3812 if (TREE_CODE (stmt) == PHI_NODE)
3813 break;
3814 /* fall through */
3815 case vect_used_in_loop:
3816 default:
3817 if (vect_print_dump_info (REPORT_DETAILS))
3818 fprintf (vect_dump, "unsupported use of reduction.");
3819 VEC_free (tree, heap, worklist);
3820 return false;
3822 live_p = false;
3825 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3827 tree op = USE_FROM_PTR (use_p);
3828 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
3830 VEC_free (tree, heap, worklist);
3831 return false;
3834 } /* while worklist */
3836 VEC_free (tree, heap, worklist);
3837 return true;
3841 /* Function vect_can_advance_ivs_p
3843 In case the number of iterations that LOOP iterates is unknown at compile
3844 time, an epilog loop will be generated, and the loop induction variables
3845 (IVs) will be "advanced" to the value they are supposed to take just before
3846 the epilog loop. Here we check that the access function of the loop IVs
3847 and the expression that represents the loop bound are simple enough.
3848 These restrictions will be relaxed in the future. */
3850 static bool
3851 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
3853 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3854 basic_block bb = loop->header;
3855 tree phi;
3857 /* Analyze phi functions of the loop header. */
3859 if (vect_print_dump_info (REPORT_DETAILS))
3860 fprintf (vect_dump, "vect_can_advance_ivs_p:");
3862 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3864 tree access_fn = NULL;
3865 tree evolution_part;
3867 if (vect_print_dump_info (REPORT_DETAILS))
3869 fprintf (vect_dump, "Analyze phi: ");
3870 print_generic_expr (vect_dump, phi, TDF_SLIM);
3873 /* Skip virtual phi's. The data dependences that are associated with
3874 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3876 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3878 if (vect_print_dump_info (REPORT_DETAILS))
3879 fprintf (vect_dump, "virtual phi. skip.");
3880 continue;
3883 /* Skip reduction phis. */
3885 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3887 if (vect_print_dump_info (REPORT_DETAILS))
3888 fprintf (vect_dump, "reduc phi. skip.");
3889 continue;
3892 /* Analyze the evolution function. */
3894 access_fn = instantiate_parameters
3895 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
3897 if (!access_fn)
3899 if (vect_print_dump_info (REPORT_DETAILS))
3900 fprintf (vect_dump, "No Access function.");
3901 return false;
3904 if (vect_print_dump_info (REPORT_DETAILS))
3906 fprintf (vect_dump, "Access function of PHI: ");
3907 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
3910 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
3912 if (evolution_part == NULL_TREE)
3914 if (vect_print_dump_info (REPORT_DETAILS))
3915 fprintf (vect_dump, "No evolution.");
3916 return false;
3919 /* FORNOW: We do not transform initial conditions of IVs
3920 which evolution functions are a polynomial of degree >= 2. */
3922 if (tree_is_chrec (evolution_part))
3923 return false;
3926 return true;
3930 /* Function vect_get_loop_niters.
3932 Determine how many iterations the loop is executed.
3933 If an expression that represents the number of iterations
3934 can be constructed, place it in NUMBER_OF_ITERATIONS.
3935 Return the loop exit condition. */
3937 static tree
3938 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
3940 tree niters;
3942 if (vect_print_dump_info (REPORT_DETAILS))
3943 fprintf (vect_dump, "=== get_loop_niters ===");
3945 niters = number_of_exit_cond_executions (loop);
3947 if (niters != NULL_TREE
3948 && niters != chrec_dont_know)
3950 *number_of_iterations = niters;
3952 if (vect_print_dump_info (REPORT_DETAILS))
3954 fprintf (vect_dump, "==> get_loop_niters:" );
3955 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
3959 return get_loop_exit_condition (loop);
3963 /* Function vect_analyze_loop_1.
3965 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3966 for it. The different analyses will record information in the
3967 loop_vec_info struct. This is a subset of the analyses applied in
3968 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3969 that is now considered for (outer-loop) vectorization. */
3971 static loop_vec_info
3972 vect_analyze_loop_1 (struct loop *loop)
3974 loop_vec_info loop_vinfo;
3976 if (vect_print_dump_info (REPORT_DETAILS))
3977 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
3979 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3981 loop_vinfo = vect_analyze_loop_form (loop);
3982 if (!loop_vinfo)
3984 if (vect_print_dump_info (REPORT_DETAILS))
3985 fprintf (vect_dump, "bad inner-loop form.");
3986 return NULL;
3989 return loop_vinfo;
3993 /* Function vect_analyze_loop_form.
3995 Verify that certain CFG restrictions hold, including:
3996 - the loop has a pre-header
3997 - the loop has a single entry and exit
3998 - the loop exit condition is simple enough, and the number of iterations
3999 can be analyzed (a countable loop). */
4001 loop_vec_info
4002 vect_analyze_loop_form (struct loop *loop)
4004 loop_vec_info loop_vinfo;
4005 tree loop_cond;
4006 tree number_of_iterations = NULL;
4007 loop_vec_info inner_loop_vinfo = NULL;
4009 if (vect_print_dump_info (REPORT_DETAILS))
4010 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
4012 /* Different restrictions apply when we are considering an inner-most loop,
4013 vs. an outer (nested) loop.
4014 (FORNOW. May want to relax some of these restrictions in the future). */
4016 if (!loop->inner)
4018 /* Inner-most loop. We currently require that the number of BBs is
4019 exactly 2 (the header and latch). Vectorizable inner-most loops
4020 look like this:
4022 (pre-header)
4024 header <--------+
4025 | | |
4026 | +--> latch --+
4028 (exit-bb) */
4030 if (loop->num_nodes != 2)
4032 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4033 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4034 return NULL;
4037 if (empty_block_p (loop->header))
4039 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4040 fprintf (vect_dump, "not vectorized: empty loop.");
4041 return NULL;
4044 else
4046 struct loop *innerloop = loop->inner;
4047 edge backedge, entryedge;
4049 /* Nested loop. We currently require that the loop is doubly-nested,
4050 contains a single inner loop, and the number of BBs is exactly 5.
4051 Vectorizable outer-loops look like this:
4053 (pre-header)
4055 header <---+
4057 inner-loop |
4059 tail ------+
4061 (exit-bb)
4063 The inner-loop has the properties expected of inner-most loops
4064 as described above. */
4066 if ((loop->inner)->inner || (loop->inner)->next)
4068 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4069 fprintf (vect_dump, "not vectorized: multiple nested loops.");
4070 return NULL;
4073 /* Analyze the inner-loop. */
4074 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
4075 if (!inner_loop_vinfo)
4077 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4078 fprintf (vect_dump, "not vectorized: Bad inner loop.");
4079 return NULL;
4082 if (!expr_invariant_in_loop_p (loop,
4083 LOOP_VINFO_NITERS (inner_loop_vinfo)))
4085 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4086 fprintf (vect_dump,
4087 "not vectorized: inner-loop count not invariant.");
4088 destroy_loop_vec_info (inner_loop_vinfo, true);
4089 return NULL;
4092 if (loop->num_nodes != 5)
4094 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4095 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4096 destroy_loop_vec_info (inner_loop_vinfo, true);
4097 return NULL;
4100 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
4101 backedge = EDGE_PRED (innerloop->header, 1);
4102 entryedge = EDGE_PRED (innerloop->header, 0);
4103 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
4105 backedge = EDGE_PRED (innerloop->header, 0);
4106 entryedge = EDGE_PRED (innerloop->header, 1);
4109 if (entryedge->src != loop->header
4110 || !single_exit (innerloop)
4111 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
4113 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4114 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4115 destroy_loop_vec_info (inner_loop_vinfo, true);
4116 return NULL;
4119 if (vect_print_dump_info (REPORT_DETAILS))
4120 fprintf (vect_dump, "Considering outer-loop vectorization.");
4123 if (!single_exit (loop)
4124 || EDGE_COUNT (loop->header->preds) != 2)
4126 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4128 if (!single_exit (loop))
4129 fprintf (vect_dump, "not vectorized: multiple exits.");
4130 else if (EDGE_COUNT (loop->header->preds) != 2)
4131 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4133 if (inner_loop_vinfo)
4134 destroy_loop_vec_info (inner_loop_vinfo, true);
4135 return NULL;
4138 /* We assume that the loop exit condition is at the end of the loop. i.e,
4139 that the loop is represented as a do-while (with a proper if-guard
4140 before the loop if needed), where the loop header contains all the
4141 executable statements, and the latch is empty. */
4142 if (!empty_block_p (loop->latch)
4143 || phi_nodes (loop->latch))
4145 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4146 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4147 if (inner_loop_vinfo)
4148 destroy_loop_vec_info (inner_loop_vinfo, true);
4149 return NULL;
4152 /* Make sure there exists a single-predecessor exit bb: */
4153 if (!single_pred_p (single_exit (loop)->dest))
4155 edge e = single_exit (loop);
4156 if (!(e->flags & EDGE_ABNORMAL))
4158 split_loop_exit_edge (e);
4159 if (vect_print_dump_info (REPORT_DETAILS))
4160 fprintf (vect_dump, "split exit edge.");
4162 else
4164 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4165 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4166 if (inner_loop_vinfo)
4167 destroy_loop_vec_info (inner_loop_vinfo, true);
4168 return NULL;
4172 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4173 if (!loop_cond)
4175 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4176 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4177 if (inner_loop_vinfo)
4178 destroy_loop_vec_info (inner_loop_vinfo, true);
4179 return NULL;
4182 if (!number_of_iterations)
4184 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4185 fprintf (vect_dump,
4186 "not vectorized: number of iterations cannot be computed.");
4187 if (inner_loop_vinfo)
4188 destroy_loop_vec_info (inner_loop_vinfo, true);
4189 return NULL;
4192 if (chrec_contains_undetermined (number_of_iterations))
4194 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4195 fprintf (vect_dump, "Infinite number of iterations.");
4196 if (inner_loop_vinfo)
4197 destroy_loop_vec_info (inner_loop_vinfo, true);
4198 return NULL;
4201 if (!NITERS_KNOWN_P (number_of_iterations))
4203 if (vect_print_dump_info (REPORT_DETAILS))
4205 fprintf (vect_dump, "Symbolic number of iterations is ");
4206 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4209 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4211 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4212 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4213 if (inner_loop_vinfo)
4214 destroy_loop_vec_info (inner_loop_vinfo, false);
4215 return NULL;
4218 loop_vinfo = new_loop_vec_info (loop);
4219 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4220 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
4222 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4224 /* CHECKME: May want to keep it around it in the future. */
4225 if (inner_loop_vinfo)
4226 destroy_loop_vec_info (inner_loop_vinfo, false);
4228 gcc_assert (!loop->aux);
4229 loop->aux = loop_vinfo;
4230 return loop_vinfo;
4234 /* Function vect_analyze_loop.
4236 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4237 for it. The different analyses will record information in the
4238 loop_vec_info struct. */
4239 loop_vec_info
4240 vect_analyze_loop (struct loop *loop)
4242 bool ok;
4243 loop_vec_info loop_vinfo;
4245 if (vect_print_dump_info (REPORT_DETAILS))
4246 fprintf (vect_dump, "===== analyze_loop_nest =====");
4248 if (loop_outer (loop)
4249 && loop_vec_info_for_loop (loop_outer (loop))
4250 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4252 if (vect_print_dump_info (REPORT_DETAILS))
4253 fprintf (vect_dump, "outer-loop already vectorized.");
4254 return NULL;
4257 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4259 loop_vinfo = vect_analyze_loop_form (loop);
4260 if (!loop_vinfo)
4262 if (vect_print_dump_info (REPORT_DETAILS))
4263 fprintf (vect_dump, "bad loop form.");
4264 return NULL;
4267 /* Find all data references in the loop (which correspond to vdefs/vuses)
4268 and analyze their evolution in the loop.
4270 FORNOW: Handle only simple, array references, which
4271 alignment can be forced, and aligned pointer-references. */
4273 ok = vect_analyze_data_refs (loop_vinfo);
4274 if (!ok)
4276 if (vect_print_dump_info (REPORT_DETAILS))
4277 fprintf (vect_dump, "bad data references.");
4278 destroy_loop_vec_info (loop_vinfo, true);
4279 return NULL;
4282 /* Classify all cross-iteration scalar data-flow cycles.
4283 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4285 vect_analyze_scalar_cycles (loop_vinfo);
4287 vect_pattern_recog (loop_vinfo);
4289 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4291 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4292 if (!ok)
4294 if (vect_print_dump_info (REPORT_DETAILS))
4295 fprintf (vect_dump, "unexpected pattern.");
4296 destroy_loop_vec_info (loop_vinfo, true);
4297 return NULL;
4300 /* Analyze the alignment of the data-refs in the loop.
4301 Fail if a data reference is found that cannot be vectorized. */
4303 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4304 if (!ok)
4306 if (vect_print_dump_info (REPORT_DETAILS))
4307 fprintf (vect_dump, "bad data alignment.");
4308 destroy_loop_vec_info (loop_vinfo, true);
4309 return NULL;
4312 ok = vect_determine_vectorization_factor (loop_vinfo);
4313 if (!ok)
4315 if (vect_print_dump_info (REPORT_DETAILS))
4316 fprintf (vect_dump, "can't determine vectorization factor.");
4317 destroy_loop_vec_info (loop_vinfo, true);
4318 return NULL;
4321 /* Analyze data dependences between the data-refs in the loop.
4322 FORNOW: fail at the first data dependence that we encounter. */
4324 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4325 if (!ok)
4327 if (vect_print_dump_info (REPORT_DETAILS))
4328 fprintf (vect_dump, "bad data dependence.");
4329 destroy_loop_vec_info (loop_vinfo, true);
4330 return NULL;
4333 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4334 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4336 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4337 if (!ok)
4339 if (vect_print_dump_info (REPORT_DETAILS))
4340 fprintf (vect_dump, "bad data access.");
4341 destroy_loop_vec_info (loop_vinfo, true);
4342 return NULL;
4345 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4346 It is important to call pruning after vect_analyze_data_ref_accesses,
4347 since we use grouping information gathered by interleaving analysis. */
4348 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
4349 if (!ok)
4351 if (vect_print_dump_info (REPORT_DETAILS))
4352 fprintf (vect_dump, "too long list of versioning for alias "
4353 "run-time tests.");
4354 destroy_loop_vec_info (loop_vinfo, true);
4355 return NULL;
4358 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4359 ok = vect_analyze_slp (loop_vinfo);
4360 if (ok)
4362 /* Decide which possible SLP instances to SLP. */
4363 vect_make_slp_decision (loop_vinfo);
4365 /* Find stmts that need to be both vectorized and SLPed. */
4366 vect_detect_hybrid_slp (loop_vinfo);
4369 /* This pass will decide on using loop versioning and/or loop peeling in
4370 order to enhance the alignment of data references in the loop. */
4372 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4373 if (!ok)
4375 if (vect_print_dump_info (REPORT_DETAILS))
4376 fprintf (vect_dump, "bad data alignment.");
4377 destroy_loop_vec_info (loop_vinfo, true);
4378 return NULL;
4381 /* Scan all the operations in the loop and make sure they are
4382 vectorizable. */
4384 ok = vect_analyze_operations (loop_vinfo);
4385 if (!ok)
4387 if (vect_print_dump_info (REPORT_DETAILS))
4388 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4389 destroy_loop_vec_info (loop_vinfo, true);
4390 return NULL;
4393 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4395 return loop_vinfo;