re PR middle-end/33617 (ICE for nonconstant callee-copied constructor arguments)
[official-gcc.git] / gcc / tree-vect-analyze.c
blob92ef5bec4ce3acdb7a85ac4245b46ad078d8a438
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41 #include "toplev.h"
42 #include "recog.h"
44 /* Main analysis functions. */
45 static loop_vec_info vect_analyze_loop_form (struct loop *);
46 static bool vect_analyze_data_refs (loop_vec_info);
47 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
48 static void vect_analyze_scalar_cycles (loop_vec_info);
49 static bool vect_analyze_data_ref_accesses (loop_vec_info);
50 static bool vect_analyze_data_ref_dependences (loop_vec_info);
51 static bool vect_analyze_data_refs_alignment (loop_vec_info);
52 static bool vect_compute_data_refs_alignment (loop_vec_info);
53 static bool vect_enhance_data_refs_alignment (loop_vec_info);
54 static bool vect_analyze_operations (loop_vec_info);
55 static bool vect_determine_vectorization_factor (loop_vec_info);
57 /* Utility functions for the analyses. */
58 static bool exist_non_indexing_operands_for_use_p (tree, tree);
59 static tree vect_get_loop_niters (struct loop *, tree *);
60 static bool vect_analyze_data_ref_dependence
61 (struct data_dependence_relation *, loop_vec_info);
62 static bool vect_compute_data_ref_alignment (struct data_reference *);
63 static bool vect_analyze_data_ref_access (struct data_reference *);
64 static bool vect_can_advance_ivs_p (loop_vec_info);
65 static void vect_update_misalignment_for_peel
66 (struct data_reference *, struct data_reference *, int npeel);
68 /* Function vect_determine_vectorization_factor
70 Determine the vectorization factor (VF). VF is the number of data elements
71 that are operated upon in parallel in a single iteration of the vectorized
72 loop. For example, when vectorizing a loop that operates on 4byte elements,
73 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
74 elements can fit in a single vector register.
76 We currently support vectorization of loops in which all types operated upon
77 are of the same size. Therefore this function currently sets VF according to
78 the size of the types operated upon, and fails if there are multiple sizes
79 in the loop.
81 VF is also the factor by which the loop iterations are strip-mined, e.g.:
82 original loop:
83 for (i=0; i<N; i++){
84 a[i] = b[i] + c[i];
87 vectorized loop:
88 for (i=0; i<N; i+=VF){
89 a[i:VF] = b[i:VF] + c[i:VF];
93 static bool
94 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
96 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
97 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
98 int nbbs = loop->num_nodes;
99 block_stmt_iterator si;
100 unsigned int vectorization_factor = 0;
101 tree scalar_type;
102 tree phi;
103 tree vectype;
104 unsigned int nunits;
105 stmt_vec_info stmt_info;
106 int i;
108 if (vect_print_dump_info (REPORT_DETAILS))
109 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
111 for (i = 0; i < nbbs; i++)
113 basic_block bb = bbs[i];
115 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
117 stmt_info = vinfo_for_stmt (phi);
118 if (vect_print_dump_info (REPORT_DETAILS))
120 fprintf (vect_dump, "==> examining phi: ");
121 print_generic_expr (vect_dump, phi, TDF_SLIM);
124 gcc_assert (stmt_info);
126 if (STMT_VINFO_RELEVANT_P (stmt_info))
128 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
129 scalar_type = TREE_TYPE (PHI_RESULT (phi));
131 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "get vectype for scalar type: ");
134 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
137 vectype = get_vectype_for_scalar_type (scalar_type);
138 if (!vectype)
140 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
142 fprintf (vect_dump,
143 "not vectorized: unsupported data-type ");
144 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
146 return false;
148 STMT_VINFO_VECTYPE (stmt_info) = vectype;
150 if (vect_print_dump_info (REPORT_DETAILS))
152 fprintf (vect_dump, "vectype: ");
153 print_generic_expr (vect_dump, vectype, TDF_SLIM);
156 nunits = TYPE_VECTOR_SUBPARTS (vectype);
157 if (vect_print_dump_info (REPORT_DETAILS))
158 fprintf (vect_dump, "nunits = %d", nunits);
160 if (!vectorization_factor
161 || (nunits > vectorization_factor))
162 vectorization_factor = nunits;
166 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
168 tree stmt = bsi_stmt (si);
169 stmt_info = vinfo_for_stmt (stmt);
171 if (vect_print_dump_info (REPORT_DETAILS))
173 fprintf (vect_dump, "==> examining statement: ");
174 print_generic_expr (vect_dump, stmt, TDF_SLIM);
177 gcc_assert (stmt_info);
179 /* skip stmts which do not need to be vectorized. */
180 if (!STMT_VINFO_RELEVANT_P (stmt_info)
181 && !STMT_VINFO_LIVE_P (stmt_info))
183 if (vect_print_dump_info (REPORT_DETAILS))
184 fprintf (vect_dump, "skip.");
185 continue;
188 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
190 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
192 fprintf (vect_dump, "not vectorized: irregular stmt.");
193 print_generic_expr (vect_dump, stmt, TDF_SLIM);
195 return false;
198 if (!GIMPLE_STMT_P (stmt)
199 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
201 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
203 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
204 print_generic_expr (vect_dump, stmt, TDF_SLIM);
206 return false;
209 if (STMT_VINFO_VECTYPE (stmt_info))
211 /* The only case when a vectype had been already set is for stmts
212 that contain a dataref, or for "pattern-stmts" (stmts generated
213 by the vectorizer to represent/replace a certain idiom). */
214 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
215 || is_pattern_stmt_p (stmt_info));
216 vectype = STMT_VINFO_VECTYPE (stmt_info);
218 else
220 tree operation;
222 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
223 && !is_pattern_stmt_p (stmt_info));
225 /* We generally set the vectype according to the type of the
226 result (lhs).
227 For stmts whose result-type is different than the type of the
228 arguments (e.g. demotion, promotion), vectype will be reset
229 appropriately (later). Note that we have to visit the smallest
230 datatype in this function, because that determines the VF.
231 If the smallest datatype in the loop is present only as the
232 rhs of a promotion operation - we'd miss it here.
233 Such a case, where a variable of this datatype does not appear
234 in the lhs anywhere in the loop, can only occur if it's an
235 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
236 to have been optimized away by invariant motion. However, we
237 cannot rely on invariant motion to always take invariants out
238 of the loop, and so in the case of promotion we also have to
239 check the rhs. */
240 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
242 operation = GIMPLE_STMT_OPERAND (stmt, 1);
243 if (TREE_CODE (operation) == NOP_EXPR
244 || TREE_CODE (operation) == CONVERT_EXPR
245 || TREE_CODE (operation) == WIDEN_MULT_EXPR)
247 tree rhs_type = TREE_TYPE (TREE_OPERAND (operation, 0));
248 if (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)) <
249 TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)))
250 scalar_type = rhs_type;
253 if (vect_print_dump_info (REPORT_DETAILS))
255 fprintf (vect_dump, "get vectype for scalar type: ");
256 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
259 vectype = get_vectype_for_scalar_type (scalar_type);
260 if (!vectype)
262 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
264 fprintf (vect_dump,
265 "not vectorized: unsupported data-type ");
266 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
268 return false;
270 STMT_VINFO_VECTYPE (stmt_info) = vectype;
273 if (vect_print_dump_info (REPORT_DETAILS))
275 fprintf (vect_dump, "vectype: ");
276 print_generic_expr (vect_dump, vectype, TDF_SLIM);
279 nunits = TYPE_VECTOR_SUBPARTS (vectype);
280 if (vect_print_dump_info (REPORT_DETAILS))
281 fprintf (vect_dump, "nunits = %d", nunits);
283 if (!vectorization_factor
284 || (nunits > vectorization_factor))
285 vectorization_factor = nunits;
290 /* TODO: Analyze cost. Decide if worth while to vectorize. */
291 if (vect_print_dump_info (REPORT_DETAILS))
292 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
293 if (vectorization_factor <= 1)
295 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
296 fprintf (vect_dump, "not vectorized: unsupported data-type");
297 return false;
299 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
301 return true;
305 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
306 the number of created vector stmts depends on the unrolling factor). However,
307 the actual number of vector stmts for every SLP node depends on VF which is
308 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
309 In this function we assume that the inside costs calculated in
310 vect_model_xxx_cost are linear in ncopies. */
312 static void
313 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
315 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
316 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
317 slp_instance instance;
319 if (vect_print_dump_info (REPORT_SLP))
320 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
322 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
323 /* We assume that costs are linear in ncopies. */
324 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
325 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
329 /* Function vect_analyze_operations.
331 Scan the loop stmts and make sure they are all vectorizable. */
333 static bool
334 vect_analyze_operations (loop_vec_info loop_vinfo)
336 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
337 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
338 int nbbs = loop->num_nodes;
339 block_stmt_iterator si;
340 unsigned int vectorization_factor = 0;
341 int i;
342 bool ok;
343 tree phi;
344 stmt_vec_info stmt_info;
345 bool need_to_vectorize = false;
346 int min_profitable_iters;
347 int min_scalar_loop_bound;
348 unsigned int th;
349 bool only_slp_in_loop = true;
351 if (vect_print_dump_info (REPORT_DETAILS))
352 fprintf (vect_dump, "=== vect_analyze_operations ===");
354 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
355 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
357 for (i = 0; i < nbbs; i++)
359 basic_block bb = bbs[i];
361 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
363 ok = true;
365 stmt_info = vinfo_for_stmt (phi);
366 if (vect_print_dump_info (REPORT_DETAILS))
368 fprintf (vect_dump, "examining phi: ");
369 print_generic_expr (vect_dump, phi, TDF_SLIM);
372 if (! is_loop_header_bb_p (bb))
374 /* inner-loop loop-closed exit phi in outer-loop vectorization
375 (i.e. a phi in the tail of the outer-loop).
376 FORNOW: we currently don't support the case that these phis
377 are not used in the outerloop, cause this case requires
378 to actually do something here. */
379 if (!STMT_VINFO_RELEVANT_P (stmt_info)
380 || STMT_VINFO_LIVE_P (stmt_info))
382 if (vect_print_dump_info (REPORT_DETAILS))
383 fprintf (vect_dump,
384 "Unsupported loop-closed phi in outer-loop.");
385 return false;
387 continue;
390 gcc_assert (stmt_info);
392 if (STMT_VINFO_LIVE_P (stmt_info))
394 /* FORNOW: not yet supported. */
395 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
396 fprintf (vect_dump, "not vectorized: value used after loop.");
397 return false;
400 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
401 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
403 /* A scalar-dependence cycle that we don't support. */
404 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
405 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
406 return false;
409 if (STMT_VINFO_RELEVANT_P (stmt_info))
411 need_to_vectorize = true;
412 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
413 ok = vectorizable_induction (phi, NULL, NULL);
416 if (!ok)
418 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
420 fprintf (vect_dump,
421 "not vectorized: relevant phi not supported: ");
422 print_generic_expr (vect_dump, phi, TDF_SLIM);
424 return false;
428 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
430 tree stmt = bsi_stmt (si);
431 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
432 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
434 if (vect_print_dump_info (REPORT_DETAILS))
436 fprintf (vect_dump, "==> examining statement: ");
437 print_generic_expr (vect_dump, stmt, TDF_SLIM);
440 gcc_assert (stmt_info);
442 /* skip stmts which do not need to be vectorized.
443 this is expected to include:
444 - the COND_EXPR which is the loop exit condition
445 - any LABEL_EXPRs in the loop
446 - computations that are used only for array indexing or loop
447 control */
449 if (!STMT_VINFO_RELEVANT_P (stmt_info)
450 && !STMT_VINFO_LIVE_P (stmt_info))
452 if (vect_print_dump_info (REPORT_DETAILS))
453 fprintf (vect_dump, "irrelevant.");
454 continue;
457 switch (STMT_VINFO_DEF_TYPE (stmt_info))
459 case vect_loop_def:
460 break;
462 case vect_reduction_def:
463 gcc_assert (relevance == vect_used_in_outer
464 || relevance == vect_used_in_outer_by_reduction
465 || relevance == vect_unused_in_loop);
466 break;
468 case vect_induction_def:
469 case vect_constant_def:
470 case vect_invariant_def:
471 case vect_unknown_def_type:
472 default:
473 gcc_unreachable ();
476 if (STMT_VINFO_RELEVANT_P (stmt_info))
478 gcc_assert (GIMPLE_STMT_P (stmt)
479 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
480 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
481 need_to_vectorize = true;
484 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
485 || vectorizable_type_demotion (stmt, NULL, NULL)
486 || vectorizable_conversion (stmt, NULL, NULL, NULL)
487 || vectorizable_operation (stmt, NULL, NULL, NULL)
488 || vectorizable_assignment (stmt, NULL, NULL, NULL)
489 || vectorizable_load (stmt, NULL, NULL, NULL)
490 || vectorizable_call (stmt, NULL, NULL)
491 || vectorizable_store (stmt, NULL, NULL, NULL)
492 || vectorizable_condition (stmt, NULL, NULL)
493 || vectorizable_reduction (stmt, NULL, NULL));
495 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
496 need extra handling, except for vectorizable reductions. */
497 if (STMT_VINFO_LIVE_P (stmt_info)
498 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
499 ok |= vectorizable_live_operation (stmt, NULL, NULL);
501 if (!ok)
503 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
505 fprintf (vect_dump, "not vectorized: stmt not supported: ");
506 print_generic_expr (vect_dump, stmt, TDF_SLIM);
508 return false;
511 if (!PURE_SLP_STMT (stmt_info))
513 /* STMT needs loop-based vectorization. */
514 only_slp_in_loop = false;
516 /* Groups of strided accesses whose size is not a power of 2 are
517 not vectorizable yet using loop-vectorization. Therefore, if
518 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
519 both SLPed and loop-based vectorzed), the loop cannot be
520 vectorized. */
521 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
522 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
523 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
525 if (vect_print_dump_info (REPORT_DETAILS))
527 fprintf (vect_dump, "not vectorized: the size of group "
528 "of strided accesses is not a power of 2");
529 print_generic_expr (vect_dump, stmt, TDF_SLIM);
531 return false;
534 } /* stmts in bb */
535 } /* bbs */
537 /* All operations in the loop are either irrelevant (deal with loop
538 control, or dead), or only used outside the loop and can be moved
539 out of the loop (e.g. invariants, inductions). The loop can be
540 optimized away by scalar optimizations. We're better off not
541 touching this loop. */
542 if (!need_to_vectorize)
544 if (vect_print_dump_info (REPORT_DETAILS))
545 fprintf (vect_dump,
546 "All the computation can be taken out of the loop.");
547 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
548 fprintf (vect_dump,
549 "not vectorized: redundant loop. no profit to vectorize.");
550 return false;
553 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
554 vectorization factor of the loop is the unrolling factor required by the
555 SLP instances. If that unrolling factor is 1, we say, that we perform
556 pure SLP on loop - cross iteration parallelism is not exploited. */
557 if (only_slp_in_loop)
558 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
559 else
560 vectorization_factor = least_common_multiple (vectorization_factor,
561 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
563 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
565 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
566 && vect_print_dump_info (REPORT_DETAILS))
567 fprintf (vect_dump,
568 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
569 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
571 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
572 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
574 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
575 fprintf (vect_dump, "not vectorized: iteration count too small.");
576 if (vect_print_dump_info (REPORT_DETAILS))
577 fprintf (vect_dump,"not vectorized: iteration count smaller than "
578 "vectorization factor.");
579 return false;
582 /* Analyze cost. Decide if worth while to vectorize. */
584 /* Once VF is set, SLP costs should be updated since the number of created
585 vector stmts depends on VF. */
586 vect_update_slp_costs_according_to_vf (loop_vinfo);
588 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
589 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
590 if (min_profitable_iters < 0)
592 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
593 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
594 if (vect_print_dump_info (REPORT_DETAILS))
595 fprintf (vect_dump, "not vectorized: vector version will never be "
596 "profitable.");
597 return false;
600 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
601 * vectorization_factor) - 1);
603 /* Use the cost model only if it is more conservative than user specified
604 threshold. */
606 th = (unsigned) min_scalar_loop_bound;
607 if (min_profitable_iters
608 && (!min_scalar_loop_bound
609 || min_profitable_iters > min_scalar_loop_bound))
610 th = (unsigned) min_profitable_iters;
612 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
613 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
615 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
616 fprintf (vect_dump, "not vectorized: vectorization not "
617 "profitable.");
618 if (vect_print_dump_info (REPORT_DETAILS))
619 fprintf (vect_dump, "not vectorized: iteration count smaller than "
620 "user specified loop bound parameter or minimum "
621 "profitable iterations (whichever is more conservative).");
622 return false;
625 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
626 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
627 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
629 if (vect_print_dump_info (REPORT_DETAILS))
630 fprintf (vect_dump, "epilog loop required.");
631 if (!vect_can_advance_ivs_p (loop_vinfo))
633 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
634 fprintf (vect_dump,
635 "not vectorized: can't create epilog loop 1.");
636 return false;
638 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
640 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
641 fprintf (vect_dump,
642 "not vectorized: can't create epilog loop 2.");
643 return false;
647 return true;
651 /* Function exist_non_indexing_operands_for_use_p
653 USE is one of the uses attached to STMT. Check if USE is
654 used in STMT for anything other than indexing an array. */
656 static bool
657 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
659 tree operand;
660 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
662 /* USE corresponds to some operand in STMT. If there is no data
663 reference in STMT, then any operand that corresponds to USE
664 is not indexing an array. */
665 if (!STMT_VINFO_DATA_REF (stmt_info))
666 return true;
668 /* STMT has a data_ref. FORNOW this means that its of one of
669 the following forms:
670 -1- ARRAY_REF = var
671 -2- var = ARRAY_REF
672 (This should have been verified in analyze_data_refs).
674 'var' in the second case corresponds to a def, not a use,
675 so USE cannot correspond to any operands that are not used
676 for array indexing.
678 Therefore, all we need to check is if STMT falls into the
679 first case, and whether var corresponds to USE. */
681 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
682 return false;
684 operand = GIMPLE_STMT_OPERAND (stmt, 1);
686 if (TREE_CODE (operand) != SSA_NAME)
687 return false;
689 if (operand == use)
690 return true;
692 return false;
696 /* Function vect_analyze_scalar_cycles_1.
698 Examine the cross iteration def-use cycles of scalar variables
699 in LOOP. LOOP_VINFO represents the loop that is noe being
700 considered for vectorization (can be LOOP, or an outer-loop
701 enclosing LOOP). */
703 static void
704 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
706 tree phi;
707 basic_block bb = loop->header;
708 tree dumy;
709 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
711 if (vect_print_dump_info (REPORT_DETAILS))
712 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
714 /* First - identify all inductions. */
715 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
717 tree access_fn = NULL;
718 tree def = PHI_RESULT (phi);
719 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
721 if (vect_print_dump_info (REPORT_DETAILS))
723 fprintf (vect_dump, "Analyze phi: ");
724 print_generic_expr (vect_dump, phi, TDF_SLIM);
727 /* Skip virtual phi's. The data dependences that are associated with
728 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
729 if (!is_gimple_reg (SSA_NAME_VAR (def)))
730 continue;
732 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
734 /* Analyze the evolution function. */
735 access_fn = analyze_scalar_evolution (loop, def);
736 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
738 fprintf (vect_dump, "Access function of PHI: ");
739 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
742 if (!access_fn
743 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
745 VEC_safe_push (tree, heap, worklist, phi);
746 continue;
749 if (vect_print_dump_info (REPORT_DETAILS))
750 fprintf (vect_dump, "Detected induction.");
751 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
755 /* Second - identify all reductions. */
756 while (VEC_length (tree, worklist) > 0)
758 tree phi = VEC_pop (tree, worklist);
759 tree def = PHI_RESULT (phi);
760 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
761 tree reduc_stmt;
763 if (vect_print_dump_info (REPORT_DETAILS))
765 fprintf (vect_dump, "Analyze phi: ");
766 print_generic_expr (vect_dump, phi, TDF_SLIM);
769 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
770 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
772 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
773 if (reduc_stmt)
775 if (vect_print_dump_info (REPORT_DETAILS))
776 fprintf (vect_dump, "Detected reduction.");
777 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
778 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
779 vect_reduction_def;
781 else
782 if (vect_print_dump_info (REPORT_DETAILS))
783 fprintf (vect_dump, "Unknown def-use cycle pattern.");
786 VEC_free (tree, heap, worklist);
787 return;
791 /* Function vect_analyze_scalar_cycles.
793 Examine the cross iteration def-use cycles of scalar variables, by
794 analyzing the loop-header PHIs of scalar variables; Classify each
795 cycle as one of the following: invariant, induction, reduction, unknown.
796 We do that for the loop represented by LOOP_VINFO, and also to its
797 inner-loop, if exists.
798 Examples for scalar cycles:
800 Example1: reduction:
802 loop1:
803 for (i=0; i<N; i++)
804 sum += a[i];
806 Example2: induction:
808 loop2:
809 for (i=0; i<N; i++)
810 a[i] = i; */
812 static void
813 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
815 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
817 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
819 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
820 Reductions in such inner-loop therefore have different properties than
821 the reductions in the nest that gets vectorized:
822 1. When vectorized, they are executed in the same order as in the original
823 scalar loop, so we can't change the order of computation when
824 vectorizing them.
825 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
826 current checks are too strict. */
828 if (loop->inner)
829 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
833 /* Function vect_insert_into_interleaving_chain.
835 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
837 static void
838 vect_insert_into_interleaving_chain (struct data_reference *dra,
839 struct data_reference *drb)
841 tree prev, next, next_init;
842 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
843 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
845 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
846 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
847 while (next)
849 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
850 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
852 /* Insert here. */
853 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
854 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
855 return;
857 prev = next;
858 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
861 /* We got to the end of the list. Insert here. */
862 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
863 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
867 /* Function vect_update_interleaving_chain.
869 For two data-refs DRA and DRB that are a part of a chain interleaved data
870 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
872 There are four possible cases:
873 1. New stmts - both DRA and DRB are not a part of any chain:
874 FIRST_DR = DRB
875 NEXT_DR (DRB) = DRA
876 2. DRB is a part of a chain and DRA is not:
877 no need to update FIRST_DR
878 no need to insert DRB
879 insert DRA according to init
880 3. DRA is a part of a chain and DRB is not:
881 if (init of FIRST_DR > init of DRB)
882 FIRST_DR = DRB
883 NEXT(FIRST_DR) = previous FIRST_DR
884 else
885 insert DRB according to its init
886 4. both DRA and DRB are in some interleaving chains:
887 choose the chain with the smallest init of FIRST_DR
888 insert the nodes of the second chain into the first one. */
890 static void
891 vect_update_interleaving_chain (struct data_reference *drb,
892 struct data_reference *dra)
894 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
895 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
896 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
897 tree node, prev, next, node_init, first_stmt;
899 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
900 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
902 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
903 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
904 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
905 return;
908 /* 2. DRB is a part of a chain and DRA is not. */
909 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
911 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
912 /* Insert DRA into the chain of DRB. */
913 vect_insert_into_interleaving_chain (dra, drb);
914 return;
917 /* 3. DRA is a part of a chain and DRB is not. */
918 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
920 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
921 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
922 old_first_stmt)));
923 tree tmp;
925 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
927 /* DRB's init is smaller than the init of the stmt previously marked
928 as the first stmt of the interleaving chain of DRA. Therefore, we
929 update FIRST_STMT and put DRB in the head of the list. */
930 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
931 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
933 /* Update all the stmts in the list to point to the new FIRST_STMT. */
934 tmp = old_first_stmt;
935 while (tmp)
937 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
938 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
941 else
943 /* Insert DRB in the list of DRA. */
944 vect_insert_into_interleaving_chain (drb, dra);
945 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
947 return;
950 /* 4. both DRA and DRB are in some interleaving chains. */
951 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
952 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
953 if (first_a == first_b)
954 return;
955 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
956 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
958 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
960 /* Insert the nodes of DRA chain into the DRB chain.
961 After inserting a node, continue from this node of the DRB chain (don't
962 start from the beginning. */
963 node = DR_GROUP_FIRST_DR (stmtinfo_a);
964 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
965 first_stmt = first_b;
967 else
969 /* Insert the nodes of DRB chain into the DRA chain.
970 After inserting a node, continue from this node of the DRA chain (don't
971 start from the beginning. */
972 node = DR_GROUP_FIRST_DR (stmtinfo_b);
973 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
974 first_stmt = first_a;
977 while (node)
979 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
980 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
981 while (next)
983 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
984 if (tree_int_cst_compare (next_init, node_init) > 0)
986 /* Insert here. */
987 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
988 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
989 prev = node;
990 break;
992 prev = next;
993 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
995 if (!next)
997 /* We got to the end of the list. Insert here. */
998 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
999 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
1000 prev = node;
1002 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
1003 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1008 /* Function vect_equal_offsets.
1010 Check if OFFSET1 and OFFSET2 are identical expressions. */
1012 static bool
1013 vect_equal_offsets (tree offset1, tree offset2)
1015 bool res0, res1;
1017 STRIP_NOPS (offset1);
1018 STRIP_NOPS (offset2);
1020 if (offset1 == offset2)
1021 return true;
1023 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1024 || !BINARY_CLASS_P (offset1)
1025 || !BINARY_CLASS_P (offset2))
1026 return false;
1028 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1029 TREE_OPERAND (offset2, 0));
1030 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1031 TREE_OPERAND (offset2, 1));
1033 return (res0 && res1);
1037 /* Function vect_check_interleaving.
1039 Check if DRA and DRB are a part of interleaving. In case they are, insert
1040 DRA and DRB in an interleaving chain. */
1042 static void
1043 vect_check_interleaving (struct data_reference *dra,
1044 struct data_reference *drb)
1046 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1048 /* Check that the data-refs have same first location (except init) and they
1049 are both either store or load (not load and store). */
1050 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1051 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1052 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1053 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1054 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1055 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1056 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1057 || DR_IS_READ (dra) != DR_IS_READ (drb))
1058 return;
1060 /* Check:
1061 1. data-refs are of the same type
1062 2. their steps are equal
1063 3. the step is greater than the difference between data-refs' inits */
1064 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1065 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1067 if (type_size_a != type_size_b
1068 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
1069 return;
1071 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1072 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1073 step = TREE_INT_CST_LOW (DR_STEP (dra));
1075 if (init_a > init_b)
1077 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1078 and DRB is accessed before DRA. */
1079 diff_mod_size = (init_a - init_b) % type_size_a;
1081 if ((init_a - init_b) > step)
1082 return;
1084 if (diff_mod_size == 0)
1086 vect_update_interleaving_chain (drb, dra);
1087 if (vect_print_dump_info (REPORT_DR_DETAILS))
1089 fprintf (vect_dump, "Detected interleaving ");
1090 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1091 fprintf (vect_dump, " and ");
1092 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1094 return;
1097 else
1099 /* If init_b == init_a + the size of the type * k, we have an
1100 interleaving, and DRA is accessed before DRB. */
1101 diff_mod_size = (init_b - init_a) % type_size_a;
1103 if ((init_b - init_a) > step)
1104 return;
1106 if (diff_mod_size == 0)
1108 vect_update_interleaving_chain (dra, drb);
1109 if (vect_print_dump_info (REPORT_DR_DETAILS))
1111 fprintf (vect_dump, "Detected interleaving ");
1112 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1113 fprintf (vect_dump, " and ");
1114 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1116 return;
1121 /* Check if data references pointed by DR_I and DR_J are same or
1122 belong to same interleaving group. Return FALSE if drs are
1123 different, otherwise return TRUE. */
1125 static bool
1126 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
1128 tree stmt_i = DR_STMT (dr_i);
1129 tree stmt_j = DR_STMT (dr_j);
1131 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
1132 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1133 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
1134 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1135 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
1136 return true;
1137 else
1138 return false;
1141 /* If address ranges represented by DDR_I and DDR_J are equal,
1142 return TRUE, otherwise return FALSE. */
1144 static bool
1145 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
1147 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
1148 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
1149 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
1150 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
1151 return true;
1152 else
1153 return false;
1156 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1157 tested at run-time. Return TRUE if DDR was successfully inserted.
1158 Return false if versioning is not supported. */
1160 static bool
1161 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1163 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1165 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
1166 return false;
1168 if (vect_print_dump_info (REPORT_DR_DETAILS))
1170 fprintf (vect_dump, "mark for run-time aliasing test between ");
1171 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1172 fprintf (vect_dump, " and ");
1173 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1176 if (optimize_size)
1178 if (vect_print_dump_info (REPORT_DR_DETAILS))
1179 fprintf (vect_dump, "versioning not supported when optimizing for size.");
1180 return false;
1183 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1184 if (loop->inner)
1186 if (vect_print_dump_info (REPORT_DR_DETAILS))
1187 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1188 return false;
1191 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1192 return true;
1195 /* Function vect_analyze_data_ref_dependence.
1197 Return TRUE if there (might) exist a dependence between a memory-reference
1198 DRA and a memory-reference DRB. When versioning for alias may check a
1199 dependence at run-time, return FALSE. */
1201 static bool
1202 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1203 loop_vec_info loop_vinfo)
1205 unsigned int i;
1206 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1207 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1208 struct data_reference *dra = DDR_A (ddr);
1209 struct data_reference *drb = DDR_B (ddr);
1210 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1211 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1212 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1213 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1214 lambda_vector dist_v;
1215 unsigned int loop_depth;
1217 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1219 /* Independent data accesses. */
1220 vect_check_interleaving (dra, drb);
1221 return false;
1224 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1225 return false;
1227 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1229 if (vect_print_dump_info (REPORT_DR_DETAILS))
1231 fprintf (vect_dump,
1232 "versioning for alias required: can't determine dependence between ");
1233 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1234 fprintf (vect_dump, " and ");
1235 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1237 /* Add to list of ddrs that need to be tested at run-time. */
1238 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1241 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1243 if (vect_print_dump_info (REPORT_DR_DETAILS))
1245 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1246 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1247 fprintf (vect_dump, " and ");
1248 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1250 /* Add to list of ddrs that need to be tested at run-time. */
1251 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1254 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1255 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1257 int dist = dist_v[loop_depth];
1259 if (vect_print_dump_info (REPORT_DR_DETAILS))
1260 fprintf (vect_dump, "dependence distance = %d.", dist);
1262 /* Same loop iteration. */
1263 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1265 /* Two references with distance zero have the same alignment. */
1266 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1267 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1268 if (vect_print_dump_info (REPORT_ALIGNMENT))
1269 fprintf (vect_dump, "accesses have the same alignment.");
1270 if (vect_print_dump_info (REPORT_DR_DETAILS))
1272 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1273 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1274 fprintf (vect_dump, " and ");
1275 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1278 /* For interleaving, mark that there is a read-write dependency if
1279 necessary. We check before that one of the data-refs is store. */
1280 if (DR_IS_READ (dra))
1281 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1282 else
1284 if (DR_IS_READ (drb))
1285 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1288 continue;
1291 if (abs (dist) >= vectorization_factor
1292 || (dist > 0 && DDR_REVERSED_P (ddr)))
1294 /* Dependence distance does not create dependence, as far as
1295 vectorization is concerned, in this case. If DDR_REVERSED_P the
1296 order of the data-refs in DDR was reversed (to make distance
1297 vector positive), and the actual distance is negative. */
1298 if (vect_print_dump_info (REPORT_DR_DETAILS))
1299 fprintf (vect_dump, "dependence distance >= VF or negative.");
1300 continue;
1303 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1305 fprintf (vect_dump,
1306 "not vectorized, possible dependence "
1307 "between data-refs ");
1308 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1309 fprintf (vect_dump, " and ");
1310 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1313 return true;
1316 return false;
1319 /* Function vect_analyze_data_ref_dependences.
1321 Examine all the data references in the loop, and make sure there do not
1322 exist any data dependences between them. */
1324 static bool
1325 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1327 unsigned int i;
1328 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1329 struct data_dependence_relation *ddr;
1331 if (vect_print_dump_info (REPORT_DETAILS))
1332 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1334 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1335 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1336 return false;
1338 return true;
1342 /* Function vect_compute_data_ref_alignment
1344 Compute the misalignment of the data reference DR.
1346 Output:
1347 1. If during the misalignment computation it is found that the data reference
1348 cannot be vectorized then false is returned.
1349 2. DR_MISALIGNMENT (DR) is defined.
1351 FOR NOW: No analysis is actually performed. Misalignment is calculated
1352 only for trivial cases. TODO. */
1354 static bool
1355 vect_compute_data_ref_alignment (struct data_reference *dr)
1357 tree stmt = DR_STMT (dr);
1358 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1359 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1360 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1361 tree ref = DR_REF (dr);
1362 tree vectype;
1363 tree base, base_addr;
1364 bool base_aligned;
1365 tree misalign;
1366 tree aligned_to, alignment;
1368 if (vect_print_dump_info (REPORT_DETAILS))
1369 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1371 /* Initialize misalignment to unknown. */
1372 SET_DR_MISALIGNMENT (dr, -1);
1374 misalign = DR_INIT (dr);
1375 aligned_to = DR_ALIGNED_TO (dr);
1376 base_addr = DR_BASE_ADDRESS (dr);
1378 /* In case the dataref is in an inner-loop of the loop that is being
1379 vectorized (LOOP), we use the base and misalignment information
1380 relative to the outer-loop (LOOP). This is ok only if the misalignment
1381 stays the same throughout the execution of the inner-loop, which is why
1382 we have to check that the stride of the dataref in the inner-loop evenly
1383 divides by the vector size. */
1384 if (nested_in_vect_loop_p (loop, stmt))
1386 tree step = DR_STEP (dr);
1387 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1389 if (dr_step % UNITS_PER_SIMD_WORD == 0)
1391 if (vect_print_dump_info (REPORT_ALIGNMENT))
1392 fprintf (vect_dump, "inner step divides the vector-size.");
1393 misalign = STMT_VINFO_DR_INIT (stmt_info);
1394 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1395 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1397 else
1399 if (vect_print_dump_info (REPORT_ALIGNMENT))
1400 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1401 misalign = NULL_TREE;
1405 base = build_fold_indirect_ref (base_addr);
1406 vectype = STMT_VINFO_VECTYPE (stmt_info);
1407 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1409 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1410 || !misalign)
1412 if (vect_print_dump_info (REPORT_ALIGNMENT))
1414 fprintf (vect_dump, "Unknown alignment for access: ");
1415 print_generic_expr (vect_dump, base, TDF_SLIM);
1417 return true;
1420 if ((DECL_P (base)
1421 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1422 alignment) >= 0)
1423 || (TREE_CODE (base_addr) == SSA_NAME
1424 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1425 TREE_TYPE (base_addr)))),
1426 alignment) >= 0))
1427 base_aligned = true;
1428 else
1429 base_aligned = false;
1431 if (!base_aligned)
1433 /* Do not change the alignment of global variables if
1434 flag_section_anchors is enabled. */
1435 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1436 || (TREE_STATIC (base) && flag_section_anchors))
1438 if (vect_print_dump_info (REPORT_DETAILS))
1440 fprintf (vect_dump, "can't force alignment of ref: ");
1441 print_generic_expr (vect_dump, ref, TDF_SLIM);
1443 return true;
1446 /* Force the alignment of the decl.
1447 NOTE: This is the only change to the code we make during
1448 the analysis phase, before deciding to vectorize the loop. */
1449 if (vect_print_dump_info (REPORT_DETAILS))
1450 fprintf (vect_dump, "force alignment");
1451 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1452 DECL_USER_ALIGN (base) = 1;
1455 /* At this point we assume that the base is aligned. */
1456 gcc_assert (base_aligned
1457 || (TREE_CODE (base) == VAR_DECL
1458 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1460 /* Modulo alignment. */
1461 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1463 if (!host_integerp (misalign, 1))
1465 /* Negative or overflowed misalignment value. */
1466 if (vect_print_dump_info (REPORT_DETAILS))
1467 fprintf (vect_dump, "unexpected misalign value");
1468 return false;
1471 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1473 if (vect_print_dump_info (REPORT_DETAILS))
1475 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1476 print_generic_expr (vect_dump, ref, TDF_SLIM);
1479 return true;
1483 /* Function vect_compute_data_refs_alignment
1485 Compute the misalignment of data references in the loop.
1486 Return FALSE if a data reference is found that cannot be vectorized. */
1488 static bool
1489 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1491 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1492 struct data_reference *dr;
1493 unsigned int i;
1495 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1496 if (!vect_compute_data_ref_alignment (dr))
1497 return false;
1499 return true;
1503 /* Function vect_update_misalignment_for_peel
1505 DR - the data reference whose misalignment is to be adjusted.
1506 DR_PEEL - the data reference whose misalignment is being made
1507 zero in the vector loop by the peel.
1508 NPEEL - the number of iterations in the peel loop if the misalignment
1509 of DR_PEEL is known at compile time. */
1511 static void
1512 vect_update_misalignment_for_peel (struct data_reference *dr,
1513 struct data_reference *dr_peel, int npeel)
1515 unsigned int i;
1516 VEC(dr_p,heap) *same_align_drs;
1517 struct data_reference *current_dr;
1518 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1519 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1520 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1521 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1523 /* For interleaved data accesses the step in the loop must be multiplied by
1524 the size of the interleaving group. */
1525 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1526 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1527 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1528 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1530 /* It can be assumed that the data refs with the same alignment as dr_peel
1531 are aligned in the vector loop. */
1532 same_align_drs
1533 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1534 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1536 if (current_dr != dr)
1537 continue;
1538 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1539 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1540 SET_DR_MISALIGNMENT (dr, 0);
1541 return;
1544 if (known_alignment_for_access_p (dr)
1545 && known_alignment_for_access_p (dr_peel))
1547 int misal = DR_MISALIGNMENT (dr);
1548 misal += npeel * dr_size;
1549 misal %= UNITS_PER_SIMD_WORD;
1550 SET_DR_MISALIGNMENT (dr, misal);
1551 return;
1554 if (vect_print_dump_info (REPORT_DETAILS))
1555 fprintf (vect_dump, "Setting misalignment to -1.");
1556 SET_DR_MISALIGNMENT (dr, -1);
1560 /* Function vect_verify_datarefs_alignment
1562 Return TRUE if all data references in the loop can be
1563 handled with respect to alignment. */
1565 static bool
1566 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1568 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1569 struct data_reference *dr;
1570 enum dr_alignment_support supportable_dr_alignment;
1571 unsigned int i;
1573 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1575 tree stmt = DR_STMT (dr);
1576 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1578 /* For interleaving, only the alignment of the first access matters. */
1579 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1580 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1581 continue;
1583 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1584 if (!supportable_dr_alignment)
1586 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1588 if (DR_IS_READ (dr))
1589 fprintf (vect_dump,
1590 "not vectorized: unsupported unaligned load.");
1591 else
1592 fprintf (vect_dump,
1593 "not vectorized: unsupported unaligned store.");
1595 return false;
1597 if (supportable_dr_alignment != dr_aligned
1598 && vect_print_dump_info (REPORT_ALIGNMENT))
1599 fprintf (vect_dump, "Vectorizing an unaligned access.");
1601 return true;
1605 /* Function vector_alignment_reachable_p
1607 Return true if vector alignment for DR is reachable by peeling
1608 a few loop iterations. Return false otherwise. */
1610 static bool
1611 vector_alignment_reachable_p (struct data_reference *dr)
1613 tree stmt = DR_STMT (dr);
1614 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1615 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1617 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1619 /* For interleaved access we peel only if number of iterations in
1620 the prolog loop ({VF - misalignment}), is a multiple of the
1621 number of the interleaved accesses. */
1622 int elem_size, mis_in_elements;
1623 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1625 /* FORNOW: handle only known alignment. */
1626 if (!known_alignment_for_access_p (dr))
1627 return false;
1629 elem_size = UNITS_PER_SIMD_WORD / nelements;
1630 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1632 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1633 return false;
1636 /* If misalignment is known at the compile time then allow peeling
1637 only if natural alignment is reachable through peeling. */
1638 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1640 HOST_WIDE_INT elmsize =
1641 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1642 if (vect_print_dump_info (REPORT_DETAILS))
1644 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1645 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1647 if (DR_MISALIGNMENT (dr) % elmsize)
1649 if (vect_print_dump_info (REPORT_DETAILS))
1650 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1651 return false;
1655 if (!known_alignment_for_access_p (dr))
1657 tree type = (TREE_TYPE (DR_REF (dr)));
1658 tree ba = DR_BASE_OBJECT (dr);
1659 bool is_packed = false;
1661 if (ba)
1662 is_packed = contains_packed_reference (ba);
1664 if (vect_print_dump_info (REPORT_DETAILS))
1665 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1666 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1667 return true;
1668 else
1669 return false;
1672 return true;
1675 /* Function vect_enhance_data_refs_alignment
1677 This pass will use loop versioning and loop peeling in order to enhance
1678 the alignment of data references in the loop.
1680 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1681 original loop is to be vectorized; Any other loops that are created by
1682 the transformations performed in this pass - are not supposed to be
1683 vectorized. This restriction will be relaxed.
1685 This pass will require a cost model to guide it whether to apply peeling
1686 or versioning or a combination of the two. For example, the scheme that
1687 intel uses when given a loop with several memory accesses, is as follows:
1688 choose one memory access ('p') which alignment you want to force by doing
1689 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1690 other accesses are not necessarily aligned, or (2) use loop versioning to
1691 generate one loop in which all accesses are aligned, and another loop in
1692 which only 'p' is necessarily aligned.
1694 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1695 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1696 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1698 Devising a cost model is the most critical aspect of this work. It will
1699 guide us on which access to peel for, whether to use loop versioning, how
1700 many versions to create, etc. The cost model will probably consist of
1701 generic considerations as well as target specific considerations (on
1702 powerpc for example, misaligned stores are more painful than misaligned
1703 loads).
1705 Here are the general steps involved in alignment enhancements:
1707 -- original loop, before alignment analysis:
1708 for (i=0; i<N; i++){
1709 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1710 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1713 -- After vect_compute_data_refs_alignment:
1714 for (i=0; i<N; i++){
1715 x = q[i]; # DR_MISALIGNMENT(q) = 3
1716 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1719 -- Possibility 1: we do loop versioning:
1720 if (p is aligned) {
1721 for (i=0; i<N; i++){ # loop 1A
1722 x = q[i]; # DR_MISALIGNMENT(q) = 3
1723 p[i] = y; # DR_MISALIGNMENT(p) = 0
1726 else {
1727 for (i=0; i<N; i++){ # loop 1B
1728 x = q[i]; # DR_MISALIGNMENT(q) = 3
1729 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1733 -- Possibility 2: we do loop peeling:
1734 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1735 x = q[i];
1736 p[i] = y;
1738 for (i = 3; i < N; i++){ # loop 2A
1739 x = q[i]; # DR_MISALIGNMENT(q) = 0
1740 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1743 -- Possibility 3: combination of loop peeling and versioning:
1744 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1745 x = q[i];
1746 p[i] = y;
1748 if (p is aligned) {
1749 for (i = 3; i<N; i++){ # loop 3A
1750 x = q[i]; # DR_MISALIGNMENT(q) = 0
1751 p[i] = y; # DR_MISALIGNMENT(p) = 0
1754 else {
1755 for (i = 3; i<N; i++){ # loop 3B
1756 x = q[i]; # DR_MISALIGNMENT(q) = 0
1757 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1761 These loops are later passed to loop_transform to be vectorized. The
1762 vectorizer will use the alignment information to guide the transformation
1763 (whether to generate regular loads/stores, or with special handling for
1764 misalignment). */
1766 static bool
1767 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1769 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1770 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1771 enum dr_alignment_support supportable_dr_alignment;
1772 struct data_reference *dr0 = NULL;
1773 struct data_reference *dr;
1774 unsigned int i;
1775 bool do_peeling = false;
1776 bool do_versioning = false;
1777 bool stat;
1778 tree stmt;
1779 stmt_vec_info stmt_info;
1780 int vect_versioning_for_alias_required;
1782 if (vect_print_dump_info (REPORT_DETAILS))
1783 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1785 /* While cost model enhancements are expected in the future, the high level
1786 view of the code at this time is as follows:
1788 A) If there is a misaligned write then see if peeling to align this write
1789 can make all data references satisfy vect_supportable_dr_alignment.
1790 If so, update data structures as needed and return true. Note that
1791 at this time vect_supportable_dr_alignment is known to return false
1792 for a misaligned write.
1794 B) If peeling wasn't possible and there is a data reference with an
1795 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1796 then see if loop versioning checks can be used to make all data
1797 references satisfy vect_supportable_dr_alignment. If so, update
1798 data structures as needed and return true.
1800 C) If neither peeling nor versioning were successful then return false if
1801 any data reference does not satisfy vect_supportable_dr_alignment.
1803 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1805 Note, Possibility 3 above (which is peeling and versioning together) is not
1806 being done at this time. */
1808 /* (1) Peeling to force alignment. */
1810 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1811 Considerations:
1812 + How many accesses will become aligned due to the peeling
1813 - How many accesses will become unaligned due to the peeling,
1814 and the cost of misaligned accesses.
1815 - The cost of peeling (the extra runtime checks, the increase
1816 in code size).
1818 The scheme we use FORNOW: peel to force the alignment of the first
1819 misaligned store in the loop.
1820 Rationale: misaligned stores are not yet supported.
1822 TODO: Use a cost model. */
1824 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1826 stmt = DR_STMT (dr);
1827 stmt_info = vinfo_for_stmt (stmt);
1829 /* For interleaving, only the alignment of the first access
1830 matters. */
1831 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1832 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1833 continue;
1835 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1837 do_peeling = vector_alignment_reachable_p (dr);
1838 if (do_peeling)
1839 dr0 = dr;
1840 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1841 fprintf (vect_dump, "vector alignment may not be reachable");
1842 break;
1846 vect_versioning_for_alias_required =
1847 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1849 /* Temporarily, if versioning for alias is required, we disable peeling
1850 until we support peeling and versioning. Often peeling for alignment
1851 will require peeling for loop-bound, which in turn requires that we
1852 know how to adjust the loop ivs after the loop. */
1853 if (vect_versioning_for_alias_required
1854 || !vect_can_advance_ivs_p (loop_vinfo)
1855 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1856 do_peeling = false;
1858 if (do_peeling)
1860 int mis;
1861 int npeel = 0;
1862 tree stmt = DR_STMT (dr0);
1863 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1864 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1865 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1867 if (known_alignment_for_access_p (dr0))
1869 /* Since it's known at compile time, compute the number of iterations
1870 in the peeled loop (the peeling factor) for use in updating
1871 DR_MISALIGNMENT values. The peeling factor is the vectorization
1872 factor minus the misalignment as an element count. */
1873 mis = DR_MISALIGNMENT (dr0);
1874 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1875 npeel = nelements - mis;
1877 /* For interleaved data access every iteration accesses all the
1878 members of the group, therefore we divide the number of iterations
1879 by the group size. */
1880 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1881 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1882 npeel /= DR_GROUP_SIZE (stmt_info);
1884 if (vect_print_dump_info (REPORT_DETAILS))
1885 fprintf (vect_dump, "Try peeling by %d", npeel);
1888 /* Ensure that all data refs can be vectorized after the peel. */
1889 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1891 int save_misalignment;
1893 if (dr == dr0)
1894 continue;
1896 stmt = DR_STMT (dr);
1897 stmt_info = vinfo_for_stmt (stmt);
1898 /* For interleaving, only the alignment of the first access
1899 matters. */
1900 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1901 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1902 continue;
1904 save_misalignment = DR_MISALIGNMENT (dr);
1905 vect_update_misalignment_for_peel (dr, dr0, npeel);
1906 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1907 SET_DR_MISALIGNMENT (dr, save_misalignment);
1909 if (!supportable_dr_alignment)
1911 do_peeling = false;
1912 break;
1916 if (do_peeling)
1918 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1919 If the misalignment of DR_i is identical to that of dr0 then set
1920 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1921 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1922 by the peeling factor times the element size of DR_i (MOD the
1923 vectorization factor times the size). Otherwise, the
1924 misalignment of DR_i must be set to unknown. */
1925 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1926 if (dr != dr0)
1927 vect_update_misalignment_for_peel (dr, dr0, npeel);
1929 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1930 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1931 SET_DR_MISALIGNMENT (dr0, 0);
1932 if (vect_print_dump_info (REPORT_ALIGNMENT))
1933 fprintf (vect_dump, "Alignment of access forced using peeling.");
1935 if (vect_print_dump_info (REPORT_DETAILS))
1936 fprintf (vect_dump, "Peeling for alignment will be applied.");
1938 stat = vect_verify_datarefs_alignment (loop_vinfo);
1939 gcc_assert (stat);
1940 return stat;
1945 /* (2) Versioning to force alignment. */
1947 /* Try versioning if:
1948 1) flag_tree_vect_loop_version is TRUE
1949 2) optimize_size is FALSE
1950 3) there is at least one unsupported misaligned data ref with an unknown
1951 misalignment, and
1952 4) all misaligned data refs with a known misalignment are supported, and
1953 5) the number of runtime alignment checks is within reason. */
1955 do_versioning =
1956 flag_tree_vect_loop_version
1957 && (!optimize_size)
1958 && (!loop->inner); /* FORNOW */
1960 if (do_versioning)
1962 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1964 stmt = DR_STMT (dr);
1965 stmt_info = vinfo_for_stmt (stmt);
1967 /* For interleaving, only the alignment of the first access
1968 matters. */
1969 if (aligned_access_p (dr)
1970 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1971 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1972 continue;
1974 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1976 if (!supportable_dr_alignment)
1978 tree stmt;
1979 int mask;
1980 tree vectype;
1982 if (known_alignment_for_access_p (dr)
1983 || VEC_length (tree,
1984 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1985 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1987 do_versioning = false;
1988 break;
1991 stmt = DR_STMT (dr);
1992 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1993 gcc_assert (vectype);
1995 /* The rightmost bits of an aligned address must be zeros.
1996 Construct the mask needed for this test. For example,
1997 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1998 mask must be 15 = 0xf. */
1999 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2001 /* FORNOW: use the same mask to test all potentially unaligned
2002 references in the loop. The vectorizer currently supports
2003 a single vector size, see the reference to
2004 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2005 vectorization factor is computed. */
2006 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2007 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2008 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2009 VEC_safe_push (tree, heap,
2010 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2011 DR_STMT (dr));
2015 /* Versioning requires at least one misaligned data reference. */
2016 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2017 do_versioning = false;
2018 else if (!do_versioning)
2019 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2022 if (do_versioning)
2024 VEC(tree,heap) *may_misalign_stmts
2025 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2026 tree stmt;
2028 /* It can now be assumed that the data references in the statements
2029 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2030 of the loop being vectorized. */
2031 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
2033 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2034 dr = STMT_VINFO_DATA_REF (stmt_info);
2035 SET_DR_MISALIGNMENT (dr, 0);
2036 if (vect_print_dump_info (REPORT_ALIGNMENT))
2037 fprintf (vect_dump, "Alignment of access forced using versioning.");
2040 if (vect_print_dump_info (REPORT_DETAILS))
2041 fprintf (vect_dump, "Versioning for alignment will be applied.");
2043 /* Peeling and versioning can't be done together at this time. */
2044 gcc_assert (! (do_peeling && do_versioning));
2046 stat = vect_verify_datarefs_alignment (loop_vinfo);
2047 gcc_assert (stat);
2048 return stat;
2051 /* This point is reached if neither peeling nor versioning is being done. */
2052 gcc_assert (! (do_peeling || do_versioning));
2054 stat = vect_verify_datarefs_alignment (loop_vinfo);
2055 return stat;
2059 /* Function vect_analyze_data_refs_alignment
2061 Analyze the alignment of the data-references in the loop.
2062 Return FALSE if a data reference is found that cannot be vectorized. */
2064 static bool
2065 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2067 if (vect_print_dump_info (REPORT_DETAILS))
2068 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2070 if (!vect_compute_data_refs_alignment (loop_vinfo))
2072 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2073 fprintf (vect_dump,
2074 "not vectorized: can't calculate alignment for data ref.");
2075 return false;
2078 return true;
2082 /* Analyze groups of strided accesses: check that DR belongs to a group of
2083 strided accesses of legal size, step, etc. Detect gaps, single element
2084 interleaving, and other special cases. Set strided access info.
2085 Collect groups of strided stores for further use in SLP analysis. */
2087 static bool
2088 vect_analyze_group_access (struct data_reference *dr)
2090 tree step = DR_STEP (dr);
2091 tree scalar_type = TREE_TYPE (DR_REF (dr));
2092 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2093 tree stmt = DR_STMT (dr);
2094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2095 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2096 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2097 HOST_WIDE_INT stride;
2098 bool slp_impossible = false;
2100 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2101 interleaving group (including gaps). */
2102 stride = dr_step / type_size;
2104 /* Not consecutive access is possible only if it is a part of interleaving. */
2105 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2107 /* Check if it this DR is a part of interleaving, and is a single
2108 element of the group that is accessed in the loop. */
2110 /* Gaps are supported only for loads. STEP must be a multiple of the type
2111 size. The size of the group must be a power of 2. */
2112 if (DR_IS_READ (dr)
2113 && (dr_step % type_size) == 0
2114 && stride > 0
2115 && exact_log2 (stride) != -1)
2117 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2118 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2119 if (vect_print_dump_info (REPORT_DR_DETAILS))
2121 fprintf (vect_dump, "Detected single element interleaving %d ",
2122 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2123 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2124 fprintf (vect_dump, " step ");
2125 print_generic_expr (vect_dump, step, TDF_SLIM);
2127 return true;
2129 if (vect_print_dump_info (REPORT_DETAILS))
2130 fprintf (vect_dump, "not consecutive access");
2131 return false;
2134 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2136 /* First stmt in the interleaving chain. Check the chain. */
2137 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2138 struct data_reference *data_ref = dr;
2139 unsigned int count = 1;
2140 tree next_step;
2141 tree prev_init = DR_INIT (data_ref);
2142 tree prev = stmt;
2143 HOST_WIDE_INT diff, count_in_bytes;
2145 while (next)
2147 /* Skip same data-refs. In case that two or more stmts share data-ref
2148 (supported only for loads), we vectorize only the first stmt, and
2149 the rest get their vectorized loads from the first one. */
2150 if (!tree_int_cst_compare (DR_INIT (data_ref),
2151 DR_INIT (STMT_VINFO_DATA_REF (
2152 vinfo_for_stmt (next)))))
2154 if (!DR_IS_READ (data_ref))
2156 if (vect_print_dump_info (REPORT_DETAILS))
2157 fprintf (vect_dump, "Two store stmts share the same dr.");
2158 return false;
2161 /* Check that there is no load-store dependencies for this loads
2162 to prevent a case of load-store-load to the same location. */
2163 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2164 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2166 if (vect_print_dump_info (REPORT_DETAILS))
2167 fprintf (vect_dump,
2168 "READ_WRITE dependence in interleaving.");
2169 return false;
2172 /* For load use the same data-ref load. */
2173 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2175 prev = next;
2176 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2177 continue;
2179 prev = next;
2181 /* Check that all the accesses have the same STEP. */
2182 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2183 if (tree_int_cst_compare (step, next_step))
2185 if (vect_print_dump_info (REPORT_DETAILS))
2186 fprintf (vect_dump, "not consecutive access in interleaving");
2187 return false;
2190 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2191 /* Check that the distance between two accesses is equal to the type
2192 size. Otherwise, we have gaps. */
2193 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2194 - TREE_INT_CST_LOW (prev_init)) / type_size;
2195 if (diff != 1)
2197 /* FORNOW: SLP of accesses with gaps is not supported. */
2198 slp_impossible = true;
2199 if (!DR_IS_READ (data_ref))
2201 if (vect_print_dump_info (REPORT_DETAILS))
2202 fprintf (vect_dump, "interleaved store with gaps");
2203 return false;
2207 /* Store the gap from the previous member of the group. If there is no
2208 gap in the access, DR_GROUP_GAP is always 1. */
2209 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2211 prev_init = DR_INIT (data_ref);
2212 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2213 /* Count the number of data-refs in the chain. */
2214 count++;
2217 /* COUNT is the number of accesses found, we multiply it by the size of
2218 the type to get COUNT_IN_BYTES. */
2219 count_in_bytes = type_size * count;
2221 /* Check that the size of the interleaving is not greater than STEP. */
2222 if (dr_step < count_in_bytes)
2224 if (vect_print_dump_info (REPORT_DETAILS))
2226 fprintf (vect_dump, "interleaving size is greater than step for ");
2227 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2229 return false;
2232 /* Check that the size of the interleaving is equal to STEP for stores,
2233 i.e., that there are no gaps. */
2234 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
2236 if (vect_print_dump_info (REPORT_DETAILS))
2237 fprintf (vect_dump, "interleaved store with gaps");
2238 return false;
2241 /* Check that STEP is a multiple of type size. */
2242 if ((dr_step % type_size) != 0)
2244 if (vect_print_dump_info (REPORT_DETAILS))
2246 fprintf (vect_dump, "step is not a multiple of type size: step ");
2247 print_generic_expr (vect_dump, step, TDF_SLIM);
2248 fprintf (vect_dump, " size ");
2249 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2250 TDF_SLIM);
2252 return false;
2255 /* FORNOW: we handle only interleaving that is a power of 2.
2256 We don't fail here if it may be still possible to vectorize the
2257 group using SLP. If not, the size of the group will be checked in
2258 vect_analyze_operations, and the vectorization will fail. */
2259 if (exact_log2 (stride) == -1)
2261 if (vect_print_dump_info (REPORT_DETAILS))
2262 fprintf (vect_dump, "interleaving is not a power of 2");
2264 if (slp_impossible)
2265 return false;
2267 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2268 if (vect_print_dump_info (REPORT_DETAILS))
2269 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2271 /* SLP: create an SLP data structure for every interleaving group of
2272 stores for further analysis in vect_analyse_slp. */
2273 if (!DR_IS_READ (dr) && !slp_impossible)
2274 VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2277 return true;
2281 /* Analyze the access pattern of the data-reference DR.
2282 In case of non-consecutive accesse call vect_analyze_group_access() to
2283 analyze groups of strided accesses. */
2285 static bool
2286 vect_analyze_data_ref_access (struct data_reference *dr)
2288 tree step = DR_STEP (dr);
2289 tree scalar_type = TREE_TYPE (DR_REF (dr));
2290 tree stmt = DR_STMT (dr);
2291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2292 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2293 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2294 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2296 if (!step)
2298 if (vect_print_dump_info (REPORT_DETAILS))
2299 fprintf (vect_dump, "bad data-ref access");
2300 return false;
2303 /* Don't allow invariant accesses. */
2304 if (dr_step == 0)
2305 return false;
2307 if (nested_in_vect_loop_p (loop, stmt))
2309 /* For the rest of the analysis we use the outer-loop step. */
2310 step = STMT_VINFO_DR_STEP (stmt_info);
2311 dr_step = TREE_INT_CST_LOW (step);
2313 if (dr_step == 0)
2315 if (vect_print_dump_info (REPORT_ALIGNMENT))
2316 fprintf (vect_dump, "zero step in outer loop.");
2317 if (DR_IS_READ (dr))
2318 return true;
2319 else
2320 return false;
2324 /* Consecutive? */
2325 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2327 /* Mark that it is not interleaving. */
2328 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2329 return true;
2332 if (nested_in_vect_loop_p (loop, stmt))
2334 if (vect_print_dump_info (REPORT_ALIGNMENT))
2335 fprintf (vect_dump, "strided access in outer loop.");
2336 return false;
2339 /* Not consecutive access - check if it's a part of interleaving group. */
2340 return vect_analyze_group_access (dr);
2344 /* Function vect_analyze_data_ref_accesses.
2346 Analyze the access pattern of all the data references in the loop.
2348 FORNOW: the only access pattern that is considered vectorizable is a
2349 simple step 1 (consecutive) access.
2351 FORNOW: handle only arrays and pointer accesses. */
2353 static bool
2354 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2356 unsigned int i;
2357 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2358 struct data_reference *dr;
2360 if (vect_print_dump_info (REPORT_DETAILS))
2361 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2363 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2364 if (!vect_analyze_data_ref_access (dr))
2366 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2367 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2368 return false;
2371 return true;
2374 /* Function vect_prune_runtime_alias_test_list.
2376 Prune a list of ddrs to be tested at run-time by versioning for alias.
2377 Return FALSE if resulting list of ddrs is longer then allowed by
2378 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2380 static bool
2381 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2383 VEC (ddr_p, heap) * ddrs =
2384 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2385 unsigned i, j;
2387 if (vect_print_dump_info (REPORT_DETAILS))
2388 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2390 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2392 bool found;
2393 ddr_p ddr_i;
2395 ddr_i = VEC_index (ddr_p, ddrs, i);
2396 found = false;
2398 for (j = 0; j < i; j++)
2400 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2402 if (vect_vfa_range_equal (ddr_i, ddr_j))
2404 if (vect_print_dump_info (REPORT_DR_DETAILS))
2406 fprintf (vect_dump, "found equal ranges ");
2407 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2408 fprintf (vect_dump, ", ");
2409 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2410 fprintf (vect_dump, " and ");
2411 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2412 fprintf (vect_dump, ", ");
2413 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2415 found = true;
2416 break;
2420 if (found)
2422 VEC_ordered_remove (ddr_p, ddrs, i);
2423 continue;
2425 i++;
2428 if (VEC_length (ddr_p, ddrs) >
2429 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2431 if (vect_print_dump_info (REPORT_DR_DETAILS))
2433 fprintf (vect_dump,
2434 "disable versioning for alias - max number of generated "
2435 "checks exceeded.");
2438 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2440 return false;
2443 return true;
2446 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2448 void
2449 vect_free_slp_tree (slp_tree node)
2451 if (!node)
2452 return;
2454 if (SLP_TREE_LEFT (node))
2455 vect_free_slp_tree (SLP_TREE_LEFT (node));
2457 if (SLP_TREE_RIGHT (node))
2458 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2460 VEC_free (tree, heap, SLP_TREE_SCALAR_STMTS (node));
2462 if (SLP_TREE_VEC_STMTS (node))
2463 VEC_free (tree, heap, SLP_TREE_VEC_STMTS (node));
2465 free (node);
2469 /* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are
2470 of a legal type and that they match the defs of the first stmt of the SLP
2471 group (stored in FIRST_STMT_...). */
2473 static bool
2474 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2475 tree rhs, VEC (tree, heap) **def_stmts0,
2476 VEC (tree, heap) **def_stmts1,
2477 enum vect_def_type *first_stmt_dt0,
2478 enum vect_def_type *first_stmt_dt1,
2479 tree *first_stmt_def0_type,
2480 tree *first_stmt_def1_type,
2481 tree *first_stmt_const_oprnd,
2482 int ncopies_for_cost)
2484 tree oprnd;
2485 enum operation_type op_type = TREE_OPERAND_LENGTH (rhs);
2486 unsigned int i, number_of_oprnds = op_type;
2487 tree def, def_stmt;
2488 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2489 stmt_vec_info stmt_info =
2490 vinfo_for_stmt (VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2492 /* Store. */
2493 if (!op_type)
2494 number_of_oprnds = 1;
2495 else
2496 gcc_assert (op_type == unary_op || op_type == binary_op);
2498 for (i = 0; i < number_of_oprnds; i++)
2500 if (op_type)
2501 oprnd = TREE_OPERAND (rhs, i);
2502 else
2503 oprnd = rhs;
2505 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2506 || (!def_stmt && dt[i] != vect_constant_def))
2508 if (vect_print_dump_info (REPORT_SLP))
2510 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2511 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2514 return false;
2517 if (!*first_stmt_dt0)
2519 /* op0 of the first stmt of the group - store its info. */
2520 *first_stmt_dt0 = dt[i];
2521 if (def)
2522 *first_stmt_def0_type = TREE_TYPE (def);
2523 else
2524 *first_stmt_const_oprnd = oprnd;
2526 /* Analyze costs (for the first stmt of the group only). */
2527 if (op_type)
2528 /* Not memory operation (we don't call this functions for loads). */
2529 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2530 else
2531 /* Store. */
2532 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2535 else
2537 if (!*first_stmt_dt1 && i == 1)
2539 /* op1 of the first stmt of the group - store its info. */
2540 *first_stmt_dt1 = dt[i];
2541 if (def)
2542 *first_stmt_def1_type = TREE_TYPE (def);
2543 else
2545 /* We assume that the stmt contains only one constant
2546 operand. We fail otherwise, to be on the safe side. */
2547 if (*first_stmt_const_oprnd)
2549 if (vect_print_dump_info (REPORT_SLP))
2550 fprintf (vect_dump, "Build SLP failed: two constant "
2551 "oprnds in stmt");
2552 return false;
2554 *first_stmt_const_oprnd = oprnd;
2557 else
2559 /* Not first stmt of the group, check that the def-stmt/s match
2560 the def-stmt/s of the first stmt. */
2561 if ((i == 0
2562 && (*first_stmt_dt0 != dt[i]
2563 || (*first_stmt_def0_type && def
2564 && *first_stmt_def0_type != TREE_TYPE (def))))
2565 || (i == 1
2566 && (*first_stmt_dt1 != dt[i]
2567 || (*first_stmt_def1_type && def
2568 && *first_stmt_def1_type != TREE_TYPE (def))))
2569 || (!def
2570 && TREE_TYPE (*first_stmt_const_oprnd)
2571 != TREE_TYPE (oprnd)))
2573 if (vect_print_dump_info (REPORT_SLP))
2574 fprintf (vect_dump, "Build SLP failed: different types ");
2576 return false;
2581 /* Check the types of the definitions. */
2582 switch (dt[i])
2584 case vect_constant_def:
2585 case vect_invariant_def:
2586 break;
2588 case vect_loop_def:
2589 if (i == 0)
2590 VEC_safe_push (tree, heap, *def_stmts0, def_stmt);
2591 else
2592 VEC_safe_push (tree, heap, *def_stmts1, def_stmt);
2593 break;
2595 default:
2596 /* FORNOW: Not supported. */
2597 if (vect_print_dump_info (REPORT_SLP))
2599 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2600 print_generic_expr (vect_dump, def, TDF_SLIM);
2603 return false;
2607 return true;
2611 /* Recursively build an SLP tree starting from NODE.
2612 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2613 permutation or are of unsupported types of operation. Otherwise, return
2614 TRUE.
2615 SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2616 in the case of multiple types for now. */
2618 static bool
2619 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2620 unsigned int group_size, bool *slp_impossible,
2621 int *inside_cost, int *outside_cost,
2622 int ncopies_for_cost)
2624 VEC (tree, heap) *def_stmts0 = VEC_alloc (tree, heap, group_size);
2625 VEC (tree, heap) *def_stmts1 = VEC_alloc (tree, heap, group_size);
2626 unsigned int i;
2627 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2628 tree stmt = VEC_index (tree, stmts, 0);
2629 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2630 enum tree_code first_stmt_code = 0;
2631 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2632 tree lhs, rhs, prev_stmt = NULL_TREE;
2633 bool stop_recursion = false, need_same_oprnds = false;
2634 tree vectype, scalar_type, first_op1 = NULL_TREE;
2635 unsigned int vectorization_factor = 0, ncopies;
2636 optab optab;
2637 int icode;
2638 enum machine_mode optab_op2_mode;
2639 enum machine_mode vec_mode;
2640 tree first_stmt_const_oprnd = NULL_TREE;
2641 struct data_reference *first_dr;
2643 /* For every stmt in NODE find its def stmt/s. */
2644 for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
2646 if (vect_print_dump_info (REPORT_SLP))
2648 fprintf (vect_dump, "Build SLP for ");
2649 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2652 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2654 if (vect_print_dump_info (REPORT_SLP))
2656 fprintf (vect_dump, "Build SLP failed: not MODIFY_STMT ");
2657 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2660 return false;
2663 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2664 vectype = get_vectype_for_scalar_type (scalar_type);
2665 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2666 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2667 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2668 if (ncopies > 1)
2670 /* FORNOW. */
2671 if (vect_print_dump_info (REPORT_SLP))
2672 fprintf (vect_dump, "SLP failed - multiple types ");
2674 *slp_impossible = true;
2675 return false;
2678 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
2679 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2681 /* Check the operation. */
2682 if (i == 0)
2684 first_stmt_code = TREE_CODE (rhs);
2686 /* Shift arguments should be equal in all the packed stmts for a
2687 vector shift with scalar shift operand. */
2688 if (TREE_CODE (rhs) == LSHIFT_EXPR || TREE_CODE (rhs) == RSHIFT_EXPR)
2690 vec_mode = TYPE_MODE (vectype);
2691 optab = optab_for_tree_code (TREE_CODE (rhs), vectype);
2692 if (!optab)
2694 if (vect_print_dump_info (REPORT_SLP))
2695 fprintf (vect_dump, "Build SLP failed: no optab.");
2696 return false;
2698 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2699 if (icode == CODE_FOR_nothing)
2701 if (vect_print_dump_info (REPORT_SLP))
2702 fprintf (vect_dump,
2703 "Build SLP failed: op not supported by target.");
2704 return false;
2706 optab_op2_mode = insn_data[icode].operand[2].mode;
2707 if (!VECTOR_MODE_P (optab_op2_mode))
2709 need_same_oprnds = true;
2710 first_op1 = TREE_OPERAND (rhs, 1);
2714 else
2716 if (first_stmt_code != TREE_CODE (rhs))
2718 if (vect_print_dump_info (REPORT_SLP))
2720 fprintf (vect_dump,
2721 "Build SLP failed: different operation in stmt ");
2722 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2725 return false;
2728 if (need_same_oprnds
2729 && !operand_equal_p (first_op1, TREE_OPERAND (rhs, 1), 0))
2731 if (vect_print_dump_info (REPORT_SLP))
2733 fprintf (vect_dump,
2734 "Build SLP failed: different shift arguments in ");
2735 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2738 return false;
2742 /* Strided store or load. */
2743 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2745 if (REFERENCE_CLASS_P (lhs))
2747 /* Store. */
2748 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs,
2749 &def_stmts0, &def_stmts1,
2750 &first_stmt_dt0,
2751 &first_stmt_dt1,
2752 &first_stmt_def0_type,
2753 &first_stmt_def1_type,
2754 &first_stmt_const_oprnd,
2755 ncopies_for_cost))
2756 return false;
2758 else
2760 /* Load. */
2761 if (i == 0)
2763 /* First stmt of the SLP group should be the first load of
2764 the interleaving loop if data permutation is not
2765 allowed. */
2766 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt)
2768 /* FORNOW: data permutations are not supported. */
2769 if (vect_print_dump_info (REPORT_SLP))
2771 fprintf (vect_dump, "Build SLP failed: strided "
2772 " loads need permutation ");
2773 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2776 return false;
2779 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2780 if (vect_supportable_dr_alignment (first_dr)
2781 == dr_unaligned_unsupported)
2783 if (vect_print_dump_info (REPORT_SLP))
2785 fprintf (vect_dump, "Build SLP failed: unsupported "
2786 " unaligned load ");
2787 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2790 return false;
2793 /* Analyze costs (for the first stmt in the group). */
2794 vect_model_load_cost (vinfo_for_stmt (stmt),
2795 ncopies_for_cost, *node);
2797 else
2799 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt)
2801 /* FORNOW: data permutations are not supported. */
2802 if (vect_print_dump_info (REPORT_SLP))
2804 fprintf (vect_dump, "Build SLP failed: strided "
2805 " loads need permutation ");
2806 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2808 return false;
2812 prev_stmt = stmt;
2814 /* We stop the tree when we reach a group of loads. */
2815 stop_recursion = true;
2816 continue;
2818 } /* Strided access. */
2819 else
2821 if (REFERENCE_CLASS_P (rhs))
2823 /* Not strided load. */
2824 if (vect_print_dump_info (REPORT_SLP))
2826 fprintf (vect_dump, "Build SLP failed: not strided load ");
2827 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2830 /* FORNOW: Not strided loads are not supported. */
2831 return false;
2834 /* Not memory operation. */
2835 if (!BINARY_CLASS_P (rhs) && !UNARY_CLASS_P (rhs))
2837 if (vect_print_dump_info (REPORT_SLP))
2839 fprintf (vect_dump, "Build SLP failed: operation");
2840 fprintf (vect_dump, " unsupported ");
2841 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2844 return false;
2847 /* Find the def-stmts. */
2848 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, &def_stmts0,
2849 &def_stmts1, &first_stmt_dt0,
2850 &first_stmt_dt1,
2851 &first_stmt_def0_type,
2852 &first_stmt_def1_type,
2853 &first_stmt_const_oprnd,
2854 ncopies_for_cost))
2855 return false;
2859 /* Add the costs of the node to the overall instance costs. */
2860 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
2861 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
2863 /* Strided loads were reached - stop the recursion. */
2864 if (stop_recursion)
2865 return true;
2867 /* Create SLP_TREE nodes for the definition node/s. */
2868 if (first_stmt_dt0 == vect_loop_def)
2870 slp_tree left_node = XNEW (struct _slp_tree);
2871 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
2872 SLP_TREE_VEC_STMTS (left_node) = NULL;
2873 SLP_TREE_LEFT (left_node) = NULL;
2874 SLP_TREE_RIGHT (left_node) = NULL;
2875 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
2876 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
2877 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
2878 slp_impossible, inside_cost, outside_cost,
2879 ncopies_for_cost))
2880 return false;
2882 SLP_TREE_LEFT (*node) = left_node;
2885 if (first_stmt_dt1 == vect_loop_def)
2887 slp_tree right_node = XNEW (struct _slp_tree);
2888 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
2889 SLP_TREE_VEC_STMTS (right_node) = NULL;
2890 SLP_TREE_LEFT (right_node) = NULL;
2891 SLP_TREE_RIGHT (right_node) = NULL;
2892 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
2893 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
2894 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
2895 slp_impossible, inside_cost, outside_cost,
2896 ncopies_for_cost))
2897 return false;
2899 SLP_TREE_RIGHT (*node) = right_node;
2902 return true;
2906 static void
2907 vect_print_slp_tree (slp_tree node)
2909 int i;
2910 tree stmt;
2912 if (!node)
2913 return;
2915 fprintf (vect_dump, "node ");
2916 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2918 fprintf (vect_dump, "\n\tstmt %d ", i);
2919 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2921 fprintf (vect_dump, "\n");
2923 vect_print_slp_tree (SLP_TREE_LEFT (node));
2924 vect_print_slp_tree (SLP_TREE_RIGHT (node));
2928 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2929 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2930 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2931 stmts in NODE are to be marked. */
2933 static void
2934 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
2936 int i;
2937 tree stmt;
2939 if (!node)
2940 return;
2942 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2943 if (j < 0 || i == j)
2944 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
2946 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
2947 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
2951 /* Analyze an SLP instance starting from a group of strided stores. Call
2952 vect_build_slp_tree to build a tree of packed stmts if possible.
2953 Return FALSE if it's impossible to SLP any stmt in the loop. */
2955 static bool
2956 vect_analyze_slp_instance (loop_vec_info loop_vinfo, tree stmt)
2958 slp_instance new_instance;
2959 slp_tree node = XNEW (struct _slp_tree);
2960 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
2961 unsigned int unrolling_factor = 1, nunits;
2962 tree vectype, scalar_type, next;
2963 unsigned int vectorization_factor = 0, ncopies;
2964 bool slp_impossible = false;
2965 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
2967 /* FORNOW: multiple types are not supported. */
2968 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
2969 vectype = get_vectype_for_scalar_type (scalar_type);
2970 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2971 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2972 ncopies = vectorization_factor / nunits;
2973 if (ncopies > 1)
2975 if (vect_print_dump_info (REPORT_SLP))
2976 fprintf (vect_dump, "SLP failed - multiple types ");
2978 return false;
2981 /* Create a node (a root of the SLP tree) for the packed strided stores. */
2982 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (tree, heap, group_size);
2983 next = stmt;
2984 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2985 while (next)
2987 VEC_safe_push (tree, heap, SLP_TREE_SCALAR_STMTS (node), next);
2988 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2991 SLP_TREE_VEC_STMTS (node) = NULL;
2992 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
2993 SLP_TREE_LEFT (node) = NULL;
2994 SLP_TREE_RIGHT (node) = NULL;
2995 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
2996 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
2998 /* Calculate the unrolling factor. */
2999 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
3001 /* Calculate the number of vector stmts to create based on the unrolling
3002 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
3003 GROUP_SIZE / NUNITS otherwise. */
3004 ncopies_for_cost = unrolling_factor * group_size / nunits;
3006 /* Build the tree for the SLP instance. */
3007 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
3008 &inside_cost, &outside_cost, ncopies_for_cost))
3010 /* Create a new SLP instance. */
3011 new_instance = XNEW (struct _slp_instance);
3012 SLP_INSTANCE_TREE (new_instance) = node;
3013 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
3014 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
3015 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
3016 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
3017 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
3018 new_instance);
3019 if (vect_print_dump_info (REPORT_SLP))
3020 vect_print_slp_tree (node);
3022 return true;
3025 /* Failed to SLP. */
3026 /* Free the allocated memory. */
3027 vect_free_slp_tree (node);
3029 if (slp_impossible)
3030 return false;
3032 /* SLP failed for this instance, but it is still possible to SLP other stmts
3033 in the loop. */
3034 return true;
3038 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3039 trees of packed scalar stmts if SLP is possible. */
3041 static bool
3042 vect_analyze_slp (loop_vec_info loop_vinfo)
3044 unsigned int i;
3045 VEC (tree, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
3046 tree store;
3048 if (vect_print_dump_info (REPORT_SLP))
3049 fprintf (vect_dump, "=== vect_analyze_slp ===");
3051 for (i = 0; VEC_iterate (tree, strided_stores, i, store); i++)
3052 if (!vect_analyze_slp_instance (loop_vinfo, store))
3054 /* SLP failed. No instance can be SLPed in the loop. */
3055 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3056 fprintf (vect_dump, "SLP failed.");
3058 return false;
3061 return true;
3065 /* For each possible SLP instance decide whether to SLP it and calculate overall
3066 unrolling factor needed to SLP the loop. */
3068 static void
3069 vect_make_slp_decision (loop_vec_info loop_vinfo)
3071 unsigned int i, unrolling_factor = 1;
3072 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3073 slp_instance instance;
3074 int decided_to_slp = 0;
3076 if (vect_print_dump_info (REPORT_SLP))
3077 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3079 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3081 /* FORNOW: SLP if you can. */
3082 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3083 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3085 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3086 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3087 loop-based vectorization. Such stmts will be marked as HYBRID. */
3088 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3089 decided_to_slp++;
3092 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3094 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3095 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3096 decided_to_slp, unrolling_factor);
3100 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3101 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3103 static void
3104 vect_detect_hybrid_slp_stmts (slp_tree node)
3106 int i;
3107 tree stmt;
3108 imm_use_iterator imm_iter;
3109 tree use_stmt;
3111 if (!node)
3112 return;
3114 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3115 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3116 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
3117 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
3118 if (vinfo_for_stmt (use_stmt)
3119 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
3120 vect_mark_slp_stmts (node, hybrid, i);
3122 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3123 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3127 /* Find stmts that must be both vectorized and SLPed. */
3129 static void
3130 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3132 unsigned int i;
3133 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3134 slp_instance instance;
3136 if (vect_print_dump_info (REPORT_SLP))
3137 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3139 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3140 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3144 /* Function vect_analyze_data_refs.
3146 Find all the data references in the loop.
3148 The general structure of the analysis of data refs in the vectorizer is as
3149 follows:
3150 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3151 find and analyze all data-refs in the loop and their dependences.
3152 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3153 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3154 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3158 static bool
3159 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3161 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3162 unsigned int i;
3163 VEC (data_reference_p, heap) *datarefs;
3164 struct data_reference *dr;
3165 tree scalar_type;
3167 if (vect_print_dump_info (REPORT_DETAILS))
3168 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3170 compute_data_dependences_for_loop (loop, true,
3171 &LOOP_VINFO_DATAREFS (loop_vinfo),
3172 &LOOP_VINFO_DDRS (loop_vinfo));
3174 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3175 from stmt_vec_info struct to DR and vectype. */
3176 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3178 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3180 tree stmt;
3181 stmt_vec_info stmt_info;
3182 basic_block bb;
3183 tree base, offset, init;
3185 if (!dr || !DR_REF (dr))
3187 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3188 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3189 return false;
3192 stmt = DR_STMT (dr);
3193 stmt_info = vinfo_for_stmt (stmt);
3195 /* Check that analysis of the data-ref succeeded. */
3196 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3197 || !DR_STEP (dr))
3199 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3201 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3202 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3204 return false;
3207 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3209 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3210 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3211 "constant");
3212 return false;
3215 if (!DR_SYMBOL_TAG (dr))
3217 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3219 fprintf (vect_dump, "not vectorized: no memory tag for ");
3220 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3222 return false;
3225 base = unshare_expr (DR_BASE_ADDRESS (dr));
3226 offset = unshare_expr (DR_OFFSET (dr));
3227 init = unshare_expr (DR_INIT (dr));
3229 /* Update DR field in stmt_vec_info struct. */
3230 bb = bb_for_stmt (stmt);
3232 /* If the dataref is in an inner-loop of the loop that is considered for
3233 for vectorization, we also want to analyze the access relative to
3234 the outer-loop (DR contains information only relative to the
3235 inner-most enclosing loop). We do that by building a reference to the
3236 first location accessed by the inner-loop, and analyze it relative to
3237 the outer-loop. */
3238 if (nested_in_vect_loop_p (loop, stmt))
3240 tree outer_step, outer_base, outer_init;
3241 HOST_WIDE_INT pbitsize, pbitpos;
3242 tree poffset;
3243 enum machine_mode pmode;
3244 int punsignedp, pvolatilep;
3245 affine_iv base_iv, offset_iv;
3246 tree dinit;
3248 /* Build a reference to the first location accessed by the
3249 inner-loop: *(BASE+INIT). (The first location is actually
3250 BASE+INIT+OFFSET, but we add OFFSET separately later. */
3251 tree inner_base = build_fold_indirect_ref
3252 (fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, init));
3254 if (vect_print_dump_info (REPORT_DETAILS))
3256 fprintf (dump_file, "analyze in outer-loop: ");
3257 print_generic_expr (dump_file, inner_base, TDF_SLIM);
3260 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3261 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3262 gcc_assert (outer_base != NULL_TREE);
3264 if (pbitpos % BITS_PER_UNIT != 0)
3266 if (vect_print_dump_info (REPORT_DETAILS))
3267 fprintf (dump_file, "failed: bit offset alignment.\n");
3268 return false;
3271 outer_base = build_fold_addr_expr (outer_base);
3272 if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
3274 if (vect_print_dump_info (REPORT_DETAILS))
3275 fprintf (dump_file, "failed: evolution of base is not affine.\n");
3276 return false;
3279 if (offset)
3281 if (poffset)
3282 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3283 else
3284 poffset = offset;
3287 if (!poffset)
3289 offset_iv.base = ssize_int (0);
3290 offset_iv.step = ssize_int (0);
3292 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
3294 if (vect_print_dump_info (REPORT_DETAILS))
3295 fprintf (dump_file, "evolution of offset is not affine.\n");
3296 return false;
3299 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3300 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3301 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3302 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3303 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3305 outer_step = size_binop (PLUS_EXPR,
3306 fold_convert (ssizetype, base_iv.step),
3307 fold_convert (ssizetype, offset_iv.step));
3309 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3310 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3311 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3312 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3313 STMT_VINFO_DR_OFFSET (stmt_info) =
3314 fold_convert (ssizetype, offset_iv.base);
3315 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3316 size_int (highest_pow2_factor (offset_iv.base));
3318 if (dump_file && (dump_flags & TDF_DETAILS))
3320 fprintf (dump_file, "\touter base_address: ");
3321 print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3322 fprintf (dump_file, "\n\touter offset from base address: ");
3323 print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3324 fprintf (dump_file, "\n\touter constant offset from base address: ");
3325 print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3326 fprintf (dump_file, "\n\touter step: ");
3327 print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3328 fprintf (dump_file, "\n\touter aligned to: ");
3329 print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3333 if (STMT_VINFO_DATA_REF (stmt_info))
3335 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3337 fprintf (vect_dump,
3338 "not vectorized: more than one data ref in stmt: ");
3339 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3341 return false;
3343 STMT_VINFO_DATA_REF (stmt_info) = dr;
3345 /* Set vectype for STMT. */
3346 scalar_type = TREE_TYPE (DR_REF (dr));
3347 STMT_VINFO_VECTYPE (stmt_info) =
3348 get_vectype_for_scalar_type (scalar_type);
3349 if (!STMT_VINFO_VECTYPE (stmt_info))
3351 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3353 fprintf (vect_dump,
3354 "not vectorized: no vectype for stmt: ");
3355 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3356 fprintf (vect_dump, " scalar_type: ");
3357 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3359 return false;
3363 return true;
3367 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3369 /* Function vect_mark_relevant.
3371 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3373 static void
3374 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
3375 enum vect_relevant relevant, bool live_p)
3377 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3378 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3379 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3381 if (vect_print_dump_info (REPORT_DETAILS))
3382 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3384 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3386 tree pattern_stmt;
3388 /* This is the last stmt in a sequence that was detected as a
3389 pattern that can potentially be vectorized. Don't mark the stmt
3390 as relevant/live because it's not going to be vectorized.
3391 Instead mark the pattern-stmt that replaces it. */
3393 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3395 if (vect_print_dump_info (REPORT_DETAILS))
3396 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3397 stmt_info = vinfo_for_stmt (pattern_stmt);
3398 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3399 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3400 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3401 stmt = pattern_stmt;
3404 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3405 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3406 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3408 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3409 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3411 if (vect_print_dump_info (REPORT_DETAILS))
3412 fprintf (vect_dump, "already marked relevant/live.");
3413 return;
3416 VEC_safe_push (tree, heap, *worklist, stmt);
3420 /* Function vect_stmt_relevant_p.
3422 Return true if STMT in loop that is represented by LOOP_VINFO is
3423 "relevant for vectorization".
3425 A stmt is considered "relevant for vectorization" if:
3426 - it has uses outside the loop.
3427 - it has vdefs (it alters memory).
3428 - control stmts in the loop (except for the exit condition).
3430 CHECKME: what other side effects would the vectorizer allow? */
3432 static bool
3433 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
3434 enum vect_relevant *relevant, bool *live_p)
3436 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3437 ssa_op_iter op_iter;
3438 imm_use_iterator imm_iter;
3439 use_operand_p use_p;
3440 def_operand_p def_p;
3442 *relevant = vect_unused_in_loop;
3443 *live_p = false;
3445 /* cond stmt other than loop exit cond. */
3446 if (is_ctrl_stmt (stmt)
3447 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3448 *relevant = vect_used_in_loop;
3450 /* changing memory. */
3451 if (TREE_CODE (stmt) != PHI_NODE)
3452 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3454 if (vect_print_dump_info (REPORT_DETAILS))
3455 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3456 *relevant = vect_used_in_loop;
3459 /* uses outside the loop. */
3460 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3462 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3464 basic_block bb = bb_for_stmt (USE_STMT (use_p));
3465 if (!flow_bb_inside_loop_p (loop, bb))
3467 if (vect_print_dump_info (REPORT_DETAILS))
3468 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3470 /* We expect all such uses to be in the loop exit phis
3471 (because of loop closed form) */
3472 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
3473 gcc_assert (bb == single_exit (loop)->dest);
3475 *live_p = true;
3480 return (*live_p || *relevant);
3485 Function process_use.
3487 Inputs:
3488 - a USE in STMT in a loop represented by LOOP_VINFO
3489 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3490 that defined USE. This is dont by calling mark_relevant and passing it
3491 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
3493 Outputs:
3494 Generally, LIVE_P and RELEVANT are used to define the liveness and
3495 relevance info of the DEF_STMT of this USE:
3496 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3497 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3498 Exceptions:
3499 - case 1: If USE is used only for address computations (e.g. array indexing),
3500 which does not need to be directly vectorized, then the liveness/relevance
3501 of the respective DEF_STMT is left unchanged.
3502 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3503 skip DEF_STMT cause it had already been processed.
3504 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3505 be modified accordingly.
3507 Return true if everything is as expected. Return false otherwise. */
3509 static bool
3510 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3511 enum vect_relevant relevant, VEC(tree,heap) **worklist)
3513 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3514 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3515 stmt_vec_info dstmt_vinfo;
3516 basic_block bb, def_bb;
3517 tree def, def_stmt;
3518 enum vect_def_type dt;
3520 /* case 1: we are only interested in uses that need to be vectorized. Uses
3521 that are used for address computation are not considered relevant. */
3522 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3523 return true;
3525 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3527 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3528 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3529 return false;
3532 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
3533 return true;
3535 def_bb = bb_for_stmt (def_stmt);
3536 if (!flow_bb_inside_loop_p (loop, def_bb))
3538 if (vect_print_dump_info (REPORT_DETAILS))
3539 fprintf (vect_dump, "def_stmt is out of loop.");
3540 return true;
3543 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3544 DEF_STMT must have already been processed, because this should be the
3545 only way that STMT, which is a reduction-phi, was put in the worklist,
3546 as there should be no other uses for DEF_STMT in the loop. So we just
3547 check that everything is as expected, and we are done. */
3548 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3549 bb = bb_for_stmt (stmt);
3550 if (TREE_CODE (stmt) == PHI_NODE
3551 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3552 && TREE_CODE (def_stmt) != PHI_NODE
3553 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3554 && bb->loop_father == def_bb->loop_father)
3556 if (vect_print_dump_info (REPORT_DETAILS))
3557 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3558 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3559 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3560 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3561 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3562 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3563 return true;
3566 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3567 outer-loop-header-bb:
3568 d = def_stmt
3569 inner-loop:
3570 stmt # use (d)
3571 outer-loop-tail-bb:
3572 ... */
3573 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3575 if (vect_print_dump_info (REPORT_DETAILS))
3576 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3577 switch (relevant)
3579 case vect_unused_in_loop:
3580 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3581 vect_used_by_reduction : vect_unused_in_loop;
3582 break;
3583 case vect_used_in_outer_by_reduction:
3584 relevant = vect_used_by_reduction;
3585 break;
3586 case vect_used_in_outer:
3587 relevant = vect_used_in_loop;
3588 break;
3589 case vect_used_by_reduction:
3590 case vect_used_in_loop:
3591 break;
3593 default:
3594 gcc_unreachable ();
3598 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3599 outer-loop-header-bb:
3601 inner-loop:
3602 d = def_stmt
3603 outer-loop-tail-bb:
3604 stmt # use (d) */
3605 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3607 if (vect_print_dump_info (REPORT_DETAILS))
3608 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3609 switch (relevant)
3611 case vect_unused_in_loop:
3612 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3613 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3614 break;
3616 case vect_used_in_outer_by_reduction:
3617 case vect_used_in_outer:
3618 break;
3620 case vect_used_by_reduction:
3621 relevant = vect_used_in_outer_by_reduction;
3622 break;
3624 case vect_used_in_loop:
3625 relevant = vect_used_in_outer;
3626 break;
3628 default:
3629 gcc_unreachable ();
3633 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
3634 return true;
3638 /* Function vect_mark_stmts_to_be_vectorized.
3640 Not all stmts in the loop need to be vectorized. For example:
3642 for i...
3643 for j...
3644 1. T0 = i + j
3645 2. T1 = a[T0]
3647 3. j = j + 1
3649 Stmt 1 and 3 do not need to be vectorized, because loop control and
3650 addressing of vectorized data-refs are handled differently.
3652 This pass detects such stmts. */
3654 static bool
3655 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3657 VEC(tree,heap) *worklist;
3658 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3659 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3660 unsigned int nbbs = loop->num_nodes;
3661 block_stmt_iterator si;
3662 tree stmt;
3663 stmt_ann_t ann;
3664 unsigned int i;
3665 stmt_vec_info stmt_vinfo;
3666 basic_block bb;
3667 tree phi;
3668 bool live_p;
3669 enum vect_relevant relevant;
3671 if (vect_print_dump_info (REPORT_DETAILS))
3672 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
3674 worklist = VEC_alloc (tree, heap, 64);
3676 /* 1. Init worklist. */
3677 for (i = 0; i < nbbs; i++)
3679 bb = bbs[i];
3680 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3682 if (vect_print_dump_info (REPORT_DETAILS))
3684 fprintf (vect_dump, "init: phi relevant? ");
3685 print_generic_expr (vect_dump, phi, TDF_SLIM);
3688 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
3689 vect_mark_relevant (&worklist, phi, relevant, live_p);
3691 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3693 stmt = bsi_stmt (si);
3694 if (vect_print_dump_info (REPORT_DETAILS))
3696 fprintf (vect_dump, "init: stmt relevant? ");
3697 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3700 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
3701 vect_mark_relevant (&worklist, stmt, relevant, live_p);
3705 /* 2. Process_worklist */
3706 while (VEC_length (tree, worklist) > 0)
3708 use_operand_p use_p;
3709 ssa_op_iter iter;
3711 stmt = VEC_pop (tree, worklist);
3712 if (vect_print_dump_info (REPORT_DETAILS))
3714 fprintf (vect_dump, "worklist: examine stmt: ");
3715 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3718 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3719 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3720 liveness and relevance properties of STMT. */
3721 ann = stmt_ann (stmt);
3722 stmt_vinfo = vinfo_for_stmt (stmt);
3723 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
3724 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
3726 /* Generally, the liveness and relevance properties of STMT are
3727 propagated as is to the DEF_STMTs of its USEs:
3728 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3729 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3731 One exception is when STMT has been identified as defining a reduction
3732 variable; in this case we set the liveness/relevance as follows:
3733 live_p = false
3734 relevant = vect_used_by_reduction
3735 This is because we distinguish between two kinds of relevant stmts -
3736 those that are used by a reduction computation, and those that are
3737 (also) used by a regular computation. This allows us later on to
3738 identify stmts that are used solely by a reduction, and therefore the
3739 order of the results that they produce does not have to be kept.
3741 Reduction phis are expected to be used by a reduction stmt, or by
3742 in an outer loop; Other reduction stmts are expected to be
3743 in the loop, and possibly used by a stmt in an outer loop.
3744 Here are the expected values of "relevant" for reduction phis/stmts:
3746 relevance: phi stmt
3747 vect_unused_in_loop ok
3748 vect_used_in_outer_by_reduction ok ok
3749 vect_used_in_outer ok ok
3750 vect_used_by_reduction ok
3751 vect_used_in_loop */
3753 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
3755 enum vect_relevant tmp_relevant = relevant;
3756 switch (tmp_relevant)
3758 case vect_unused_in_loop:
3759 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
3760 relevant = vect_used_by_reduction;
3761 break;
3763 case vect_used_in_outer_by_reduction:
3764 case vect_used_in_outer:
3765 gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR
3766 && TREE_CODE (stmt) != DOT_PROD_EXPR);
3767 break;
3769 case vect_used_by_reduction:
3770 if (TREE_CODE (stmt) == PHI_NODE)
3771 break;
3772 /* fall through */
3773 case vect_used_in_loop:
3774 default:
3775 if (vect_print_dump_info (REPORT_DETAILS))
3776 fprintf (vect_dump, "unsupported use of reduction.");
3777 VEC_free (tree, heap, worklist);
3778 return false;
3780 live_p = false;
3783 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3785 tree op = USE_FROM_PTR (use_p);
3786 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
3788 VEC_free (tree, heap, worklist);
3789 return false;
3792 } /* while worklist */
3794 VEC_free (tree, heap, worklist);
3795 return true;
3799 /* Function vect_can_advance_ivs_p
3801 In case the number of iterations that LOOP iterates is unknown at compile
3802 time, an epilog loop will be generated, and the loop induction variables
3803 (IVs) will be "advanced" to the value they are supposed to take just before
3804 the epilog loop. Here we check that the access function of the loop IVs
3805 and the expression that represents the loop bound are simple enough.
3806 These restrictions will be relaxed in the future. */
3808 static bool
3809 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
3811 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3812 basic_block bb = loop->header;
3813 tree phi;
3815 /* Analyze phi functions of the loop header. */
3817 if (vect_print_dump_info (REPORT_DETAILS))
3818 fprintf (vect_dump, "vect_can_advance_ivs_p:");
3820 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3822 tree access_fn = NULL;
3823 tree evolution_part;
3825 if (vect_print_dump_info (REPORT_DETAILS))
3827 fprintf (vect_dump, "Analyze phi: ");
3828 print_generic_expr (vect_dump, phi, TDF_SLIM);
3831 /* Skip virtual phi's. The data dependences that are associated with
3832 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3834 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3836 if (vect_print_dump_info (REPORT_DETAILS))
3837 fprintf (vect_dump, "virtual phi. skip.");
3838 continue;
3841 /* Skip reduction phis. */
3843 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3845 if (vect_print_dump_info (REPORT_DETAILS))
3846 fprintf (vect_dump, "reduc phi. skip.");
3847 continue;
3850 /* Analyze the evolution function. */
3852 access_fn = instantiate_parameters
3853 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
3855 if (!access_fn)
3857 if (vect_print_dump_info (REPORT_DETAILS))
3858 fprintf (vect_dump, "No Access function.");
3859 return false;
3862 if (vect_print_dump_info (REPORT_DETAILS))
3864 fprintf (vect_dump, "Access function of PHI: ");
3865 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
3868 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
3870 if (evolution_part == NULL_TREE)
3872 if (vect_print_dump_info (REPORT_DETAILS))
3873 fprintf (vect_dump, "No evolution.");
3874 return false;
3877 /* FORNOW: We do not transform initial conditions of IVs
3878 which evolution functions are a polynomial of degree >= 2. */
3880 if (tree_is_chrec (evolution_part))
3881 return false;
3884 return true;
3888 /* Function vect_get_loop_niters.
3890 Determine how many iterations the loop is executed.
3891 If an expression that represents the number of iterations
3892 can be constructed, place it in NUMBER_OF_ITERATIONS.
3893 Return the loop exit condition. */
3895 static tree
3896 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
3898 tree niters;
3900 if (vect_print_dump_info (REPORT_DETAILS))
3901 fprintf (vect_dump, "=== get_loop_niters ===");
3903 niters = number_of_exit_cond_executions (loop);
3905 if (niters != NULL_TREE
3906 && niters != chrec_dont_know)
3908 *number_of_iterations = niters;
3910 if (vect_print_dump_info (REPORT_DETAILS))
3912 fprintf (vect_dump, "==> get_loop_niters:" );
3913 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
3917 return get_loop_exit_condition (loop);
3921 /* Function vect_analyze_loop_1.
3923 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3924 for it. The different analyses will record information in the
3925 loop_vec_info struct. This is a subset of the analyses applied in
3926 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3927 that is now considered for (outer-loop) vectorization. */
3929 static loop_vec_info
3930 vect_analyze_loop_1 (struct loop *loop)
3932 loop_vec_info loop_vinfo;
3934 if (vect_print_dump_info (REPORT_DETAILS))
3935 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
3937 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3939 loop_vinfo = vect_analyze_loop_form (loop);
3940 if (!loop_vinfo)
3942 if (vect_print_dump_info (REPORT_DETAILS))
3943 fprintf (vect_dump, "bad inner-loop form.");
3944 return NULL;
3947 return loop_vinfo;
3951 /* Function vect_analyze_loop_form.
3953 Verify that certain CFG restrictions hold, including:
3954 - the loop has a pre-header
3955 - the loop has a single entry and exit
3956 - the loop exit condition is simple enough, and the number of iterations
3957 can be analyzed (a countable loop). */
3959 static loop_vec_info
3960 vect_analyze_loop_form (struct loop *loop)
3962 loop_vec_info loop_vinfo;
3963 tree loop_cond;
3964 tree number_of_iterations = NULL;
3965 loop_vec_info inner_loop_vinfo = NULL;
3967 if (vect_print_dump_info (REPORT_DETAILS))
3968 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
3970 /* Different restrictions apply when we are considering an inner-most loop,
3971 vs. an outer (nested) loop.
3972 (FORNOW. May want to relax some of these restrictions in the future). */
3974 if (!loop->inner)
3976 /* Inner-most loop. We currently require that the number of BBs is
3977 exactly 2 (the header and latch). Vectorizable inner-most loops
3978 look like this:
3980 (pre-header)
3982 header <--------+
3983 | | |
3984 | +--> latch --+
3986 (exit-bb) */
3988 if (loop->num_nodes != 2)
3990 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3991 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
3992 return NULL;
3995 if (empty_block_p (loop->header))
3997 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3998 fprintf (vect_dump, "not vectorized: empty loop.");
3999 return NULL;
4002 else
4004 struct loop *innerloop = loop->inner;
4005 edge backedge, entryedge;
4007 /* Nested loop. We currently require that the loop is doubly-nested,
4008 contains a single inner loop, and the number of BBs is exactly 5.
4009 Vectorizable outer-loops look like this:
4011 (pre-header)
4013 header <---+
4015 inner-loop |
4017 tail ------+
4019 (exit-bb)
4021 The inner-loop has the properties expected of inner-most loops
4022 as described above. */
4024 if ((loop->inner)->inner || (loop->inner)->next)
4026 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4027 fprintf (vect_dump, "not vectorized: multiple nested loops.");
4028 return NULL;
4031 /* Analyze the inner-loop. */
4032 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
4033 if (!inner_loop_vinfo)
4035 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4036 fprintf (vect_dump, "not vectorized: Bad inner loop.");
4037 return NULL;
4040 if (!expr_invariant_in_loop_p (loop,
4041 LOOP_VINFO_NITERS (inner_loop_vinfo)))
4043 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4044 fprintf (vect_dump,
4045 "not vectorized: inner-loop count not invariant.");
4046 destroy_loop_vec_info (inner_loop_vinfo, true);
4047 return NULL;
4050 if (loop->num_nodes != 5)
4052 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4053 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4054 destroy_loop_vec_info (inner_loop_vinfo, true);
4055 return NULL;
4058 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
4059 backedge = EDGE_PRED (innerloop->header, 1);
4060 entryedge = EDGE_PRED (innerloop->header, 0);
4061 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
4063 backedge = EDGE_PRED (innerloop->header, 0);
4064 entryedge = EDGE_PRED (innerloop->header, 1);
4067 if (entryedge->src != loop->header
4068 || !single_exit (innerloop)
4069 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
4071 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4072 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4073 destroy_loop_vec_info (inner_loop_vinfo, true);
4074 return NULL;
4077 if (vect_print_dump_info (REPORT_DETAILS))
4078 fprintf (vect_dump, "Considering outer-loop vectorization.");
4081 if (!single_exit (loop)
4082 || EDGE_COUNT (loop->header->preds) != 2)
4084 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4086 if (!single_exit (loop))
4087 fprintf (vect_dump, "not vectorized: multiple exits.");
4088 else if (EDGE_COUNT (loop->header->preds) != 2)
4089 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4091 if (inner_loop_vinfo)
4092 destroy_loop_vec_info (inner_loop_vinfo, true);
4093 return NULL;
4096 /* We assume that the loop exit condition is at the end of the loop. i.e,
4097 that the loop is represented as a do-while (with a proper if-guard
4098 before the loop if needed), where the loop header contains all the
4099 executable statements, and the latch is empty. */
4100 if (!empty_block_p (loop->latch)
4101 || phi_nodes (loop->latch))
4103 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4104 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4105 if (inner_loop_vinfo)
4106 destroy_loop_vec_info (inner_loop_vinfo, true);
4107 return NULL;
4110 /* Make sure there exists a single-predecessor exit bb: */
4111 if (!single_pred_p (single_exit (loop)->dest))
4113 edge e = single_exit (loop);
4114 if (!(e->flags & EDGE_ABNORMAL))
4116 split_loop_exit_edge (e);
4117 if (vect_print_dump_info (REPORT_DETAILS))
4118 fprintf (vect_dump, "split exit edge.");
4120 else
4122 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4123 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4124 if (inner_loop_vinfo)
4125 destroy_loop_vec_info (inner_loop_vinfo, true);
4126 return NULL;
4130 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4131 if (!loop_cond)
4133 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4134 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4135 if (inner_loop_vinfo)
4136 destroy_loop_vec_info (inner_loop_vinfo, true);
4137 return NULL;
4140 if (!number_of_iterations)
4142 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4143 fprintf (vect_dump,
4144 "not vectorized: number of iterations cannot be computed.");
4145 if (inner_loop_vinfo)
4146 destroy_loop_vec_info (inner_loop_vinfo, true);
4147 return NULL;
4150 if (chrec_contains_undetermined (number_of_iterations))
4152 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4153 fprintf (vect_dump, "Infinite number of iterations.");
4154 if (inner_loop_vinfo)
4155 destroy_loop_vec_info (inner_loop_vinfo, true);
4156 return NULL;
4159 if (!NITERS_KNOWN_P (number_of_iterations))
4161 if (vect_print_dump_info (REPORT_DETAILS))
4163 fprintf (vect_dump, "Symbolic number of iterations is ");
4164 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4167 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4169 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4170 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4171 if (inner_loop_vinfo)
4172 destroy_loop_vec_info (inner_loop_vinfo, false);
4173 return NULL;
4176 loop_vinfo = new_loop_vec_info (loop);
4177 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4179 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4181 /* CHECKME: May want to keep it around it in the future. */
4182 if (inner_loop_vinfo)
4183 destroy_loop_vec_info (inner_loop_vinfo, false);
4185 gcc_assert (!loop->aux);
4186 loop->aux = loop_vinfo;
4187 return loop_vinfo;
4191 /* Function vect_analyze_loop.
4193 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4194 for it. The different analyses will record information in the
4195 loop_vec_info struct. */
4196 loop_vec_info
4197 vect_analyze_loop (struct loop *loop)
4199 bool ok;
4200 loop_vec_info loop_vinfo;
4202 if (vect_print_dump_info (REPORT_DETAILS))
4203 fprintf (vect_dump, "===== analyze_loop_nest =====");
4205 if (loop_outer (loop)
4206 && loop_vec_info_for_loop (loop_outer (loop))
4207 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4209 if (vect_print_dump_info (REPORT_DETAILS))
4210 fprintf (vect_dump, "outer-loop already vectorized.");
4211 return NULL;
4214 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4216 loop_vinfo = vect_analyze_loop_form (loop);
4217 if (!loop_vinfo)
4219 if (vect_print_dump_info (REPORT_DETAILS))
4220 fprintf (vect_dump, "bad loop form.");
4221 return NULL;
4224 /* Find all data references in the loop (which correspond to vdefs/vuses)
4225 and analyze their evolution in the loop.
4227 FORNOW: Handle only simple, array references, which
4228 alignment can be forced, and aligned pointer-references. */
4230 ok = vect_analyze_data_refs (loop_vinfo);
4231 if (!ok)
4233 if (vect_print_dump_info (REPORT_DETAILS))
4234 fprintf (vect_dump, "bad data references.");
4235 destroy_loop_vec_info (loop_vinfo, true);
4236 return NULL;
4239 /* Classify all cross-iteration scalar data-flow cycles.
4240 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4242 vect_analyze_scalar_cycles (loop_vinfo);
4244 vect_pattern_recog (loop_vinfo);
4246 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4248 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4249 if (!ok)
4251 if (vect_print_dump_info (REPORT_DETAILS))
4252 fprintf (vect_dump, "unexpected pattern.");
4253 destroy_loop_vec_info (loop_vinfo, true);
4254 return NULL;
4257 /* Analyze the alignment of the data-refs in the loop.
4258 Fail if a data reference is found that cannot be vectorized. */
4260 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4261 if (!ok)
4263 if (vect_print_dump_info (REPORT_DETAILS))
4264 fprintf (vect_dump, "bad data alignment.");
4265 destroy_loop_vec_info (loop_vinfo, true);
4266 return NULL;
4269 ok = vect_determine_vectorization_factor (loop_vinfo);
4270 if (!ok)
4272 if (vect_print_dump_info (REPORT_DETAILS))
4273 fprintf (vect_dump, "can't determine vectorization factor.");
4274 destroy_loop_vec_info (loop_vinfo, true);
4275 return NULL;
4278 /* Analyze data dependences between the data-refs in the loop.
4279 FORNOW: fail at the first data dependence that we encounter. */
4281 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4282 if (!ok)
4284 if (vect_print_dump_info (REPORT_DETAILS))
4285 fprintf (vect_dump, "bad data dependence.");
4286 destroy_loop_vec_info (loop_vinfo, true);
4287 return NULL;
4290 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4291 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4293 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4294 if (!ok)
4296 if (vect_print_dump_info (REPORT_DETAILS))
4297 fprintf (vect_dump, "bad data access.");
4298 destroy_loop_vec_info (loop_vinfo, true);
4299 return NULL;
4302 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4303 It is important to call pruning after vect_analyze_data_ref_accesses,
4304 since we use grouping information gathered by interleaving analysis. */
4305 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
4306 if (!ok)
4308 if (vect_print_dump_info (REPORT_DETAILS))
4309 fprintf (vect_dump, "too long list of versioning for alias "
4310 "run-time tests.");
4311 destroy_loop_vec_info (loop_vinfo, true);
4312 return NULL;
4315 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4316 ok = vect_analyze_slp (loop_vinfo);
4317 if (ok)
4319 /* Decide which possible SLP instances to SLP. */
4320 vect_make_slp_decision (loop_vinfo);
4322 /* Find stmts that need to be both vectorized and SLPed. */
4323 vect_detect_hybrid_slp (loop_vinfo);
4326 /* This pass will decide on using loop versioning and/or loop peeling in
4327 order to enhance the alignment of data references in the loop. */
4329 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4330 if (!ok)
4332 if (vect_print_dump_info (REPORT_DETAILS))
4333 fprintf (vect_dump, "bad data alignment.");
4334 destroy_loop_vec_info (loop_vinfo, true);
4335 return NULL;
4338 /* Scan all the operations in the loop and make sure they are
4339 vectorizable. */
4341 ok = vect_analyze_operations (loop_vinfo);
4342 if (!ok)
4344 if (vect_print_dump_info (REPORT_DETAILS))
4345 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4346 destroy_loop_vec_info (loop_vinfo, true);
4347 return NULL;
4350 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4352 return loop_vinfo;