UUID stuff from Ryan Raasch
[cegcc.git] / cegcc / src / gcc-4.3.2 / gcc / tree-vect-analyze.c
blobba22c41e739455ad99fe392b6a8ebc4a2d01295c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41 #include "toplev.h"
42 #include "recog.h"
44 /* Main analysis functions. */
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
78 in the loop.
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
81 original loop:
82 for (i=0; i<N; i++){
83 a[i] = b[i] + c[i];
86 vectorized loop:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
92 static bool
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
100 tree scalar_type;
101 tree phi;
102 tree vectype;
103 unsigned int nunits;
104 stmt_vec_info stmt_info;
105 int i;
107 if (vect_print_dump_info (REPORT_DETAILS))
108 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
110 for (i = 0; i < nbbs; i++)
112 basic_block bb = bbs[i];
114 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
116 stmt_info = vinfo_for_stmt (phi);
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining phi: ");
120 print_generic_expr (vect_dump, phi, TDF_SLIM);
123 gcc_assert (stmt_info);
125 if (STMT_VINFO_RELEVANT_P (stmt_info))
127 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
128 scalar_type = TREE_TYPE (PHI_RESULT (phi));
130 if (vect_print_dump_info (REPORT_DETAILS))
132 fprintf (vect_dump, "get vectype for scalar type: ");
133 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
136 vectype = get_vectype_for_scalar_type (scalar_type);
137 if (!vectype)
139 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
141 fprintf (vect_dump,
142 "not vectorized: unsupported data-type ");
143 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
145 return false;
147 STMT_VINFO_VECTYPE (stmt_info) = vectype;
149 if (vect_print_dump_info (REPORT_DETAILS))
151 fprintf (vect_dump, "vectype: ");
152 print_generic_expr (vect_dump, vectype, TDF_SLIM);
155 nunits = TYPE_VECTOR_SUBPARTS (vectype);
156 if (vect_print_dump_info (REPORT_DETAILS))
157 fprintf (vect_dump, "nunits = %d", nunits);
159 if (!vectorization_factor
160 || (nunits > vectorization_factor))
161 vectorization_factor = nunits;
165 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
167 tree stmt = bsi_stmt (si);
168 stmt_info = vinfo_for_stmt (stmt);
170 if (vect_print_dump_info (REPORT_DETAILS))
172 fprintf (vect_dump, "==> examining statement: ");
173 print_generic_expr (vect_dump, stmt, TDF_SLIM);
176 gcc_assert (stmt_info);
178 /* skip stmts which do not need to be vectorized. */
179 if (!STMT_VINFO_RELEVANT_P (stmt_info)
180 && !STMT_VINFO_LIVE_P (stmt_info))
182 if (vect_print_dump_info (REPORT_DETAILS))
183 fprintf (vect_dump, "skip.");
184 continue;
187 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
189 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
191 fprintf (vect_dump, "not vectorized: irregular stmt.");
192 print_generic_expr (vect_dump, stmt, TDF_SLIM);
194 return false;
197 if (!GIMPLE_STMT_P (stmt)
198 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
200 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
202 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
203 print_generic_expr (vect_dump, stmt, TDF_SLIM);
205 return false;
208 if (STMT_VINFO_VECTYPE (stmt_info))
210 /* The only case when a vectype had been already set is for stmts
211 that contain a dataref, or for "pattern-stmts" (stmts generated
212 by the vectorizer to represent/replace a certain idiom). */
213 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
214 || is_pattern_stmt_p (stmt_info));
215 vectype = STMT_VINFO_VECTYPE (stmt_info);
217 else
219 tree operation;
221 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
222 && !is_pattern_stmt_p (stmt_info));
224 /* We generally set the vectype according to the type of the
225 result (lhs).
226 For stmts whose result-type is different than the type of the
227 arguments (e.g. demotion, promotion), vectype will be reset
228 appropriately (later). Note that we have to visit the smallest
229 datatype in this function, because that determines the VF.
230 If the smallest datatype in the loop is present only as the
231 rhs of a promotion operation - we'd miss it here.
232 Such a case, where a variable of this datatype does not appear
233 in the lhs anywhere in the loop, can only occur if it's an
234 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
235 to have been optimized away by invariant motion. However, we
236 cannot rely on invariant motion to always take invariants out
237 of the loop, and so in the case of promotion we also have to
238 check the rhs. */
239 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
241 operation = GIMPLE_STMT_OPERAND (stmt, 1);
242 if (TREE_CODE (operation) == NOP_EXPR
243 || TREE_CODE (operation) == CONVERT_EXPR
244 || TREE_CODE (operation) == WIDEN_MULT_EXPR
245 || TREE_CODE (operation) == FLOAT_EXPR)
247 tree rhs_type = TREE_TYPE (TREE_OPERAND (operation, 0));
248 if (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)) <
249 TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)))
250 scalar_type = rhs_type;
253 if (vect_print_dump_info (REPORT_DETAILS))
255 fprintf (vect_dump, "get vectype for scalar type: ");
256 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
259 vectype = get_vectype_for_scalar_type (scalar_type);
260 if (!vectype)
262 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
264 fprintf (vect_dump,
265 "not vectorized: unsupported data-type ");
266 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
268 return false;
270 STMT_VINFO_VECTYPE (stmt_info) = vectype;
273 if (vect_print_dump_info (REPORT_DETAILS))
275 fprintf (vect_dump, "vectype: ");
276 print_generic_expr (vect_dump, vectype, TDF_SLIM);
279 nunits = TYPE_VECTOR_SUBPARTS (vectype);
280 if (vect_print_dump_info (REPORT_DETAILS))
281 fprintf (vect_dump, "nunits = %d", nunits);
283 if (!vectorization_factor
284 || (nunits > vectorization_factor))
285 vectorization_factor = nunits;
290 /* TODO: Analyze cost. Decide if worth while to vectorize. */
291 if (vect_print_dump_info (REPORT_DETAILS))
292 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
293 if (vectorization_factor <= 1)
295 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
296 fprintf (vect_dump, "not vectorized: unsupported data-type");
297 return false;
299 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
301 return true;
305 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
306 the number of created vector stmts depends on the unrolling factor). However,
307 the actual number of vector stmts for every SLP node depends on VF which is
308 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
309 In this function we assume that the inside costs calculated in
310 vect_model_xxx_cost are linear in ncopies. */
312 static void
313 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
315 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
316 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
317 slp_instance instance;
319 if (vect_print_dump_info (REPORT_SLP))
320 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
322 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
323 /* We assume that costs are linear in ncopies. */
324 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
325 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
329 /* Function vect_analyze_operations.
331 Scan the loop stmts and make sure they are all vectorizable. */
333 static bool
334 vect_analyze_operations (loop_vec_info loop_vinfo)
336 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
337 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
338 int nbbs = loop->num_nodes;
339 block_stmt_iterator si;
340 unsigned int vectorization_factor = 0;
341 int i;
342 bool ok;
343 tree phi;
344 stmt_vec_info stmt_info;
345 bool need_to_vectorize = false;
346 int min_profitable_iters;
347 int min_scalar_loop_bound;
348 unsigned int th;
349 bool only_slp_in_loop = true;
351 if (vect_print_dump_info (REPORT_DETAILS))
352 fprintf (vect_dump, "=== vect_analyze_operations ===");
354 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
355 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
357 for (i = 0; i < nbbs; i++)
359 basic_block bb = bbs[i];
361 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
363 ok = true;
365 stmt_info = vinfo_for_stmt (phi);
366 if (vect_print_dump_info (REPORT_DETAILS))
368 fprintf (vect_dump, "examining phi: ");
369 print_generic_expr (vect_dump, phi, TDF_SLIM);
372 if (! is_loop_header_bb_p (bb))
374 /* inner-loop loop-closed exit phi in outer-loop vectorization
375 (i.e. a phi in the tail of the outer-loop).
376 FORNOW: we currently don't support the case that these phis
377 are not used in the outerloop, cause this case requires
378 to actually do something here. */
379 if (!STMT_VINFO_RELEVANT_P (stmt_info)
380 || STMT_VINFO_LIVE_P (stmt_info))
382 if (vect_print_dump_info (REPORT_DETAILS))
383 fprintf (vect_dump,
384 "Unsupported loop-closed phi in outer-loop.");
385 return false;
387 continue;
390 gcc_assert (stmt_info);
392 if (STMT_VINFO_LIVE_P (stmt_info))
394 /* FORNOW: not yet supported. */
395 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
396 fprintf (vect_dump, "not vectorized: value used after loop.");
397 return false;
400 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
401 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
403 /* A scalar-dependence cycle that we don't support. */
404 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
405 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
406 return false;
409 if (STMT_VINFO_RELEVANT_P (stmt_info))
411 need_to_vectorize = true;
412 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
413 ok = vectorizable_induction (phi, NULL, NULL);
416 if (!ok)
418 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
420 fprintf (vect_dump,
421 "not vectorized: relevant phi not supported: ");
422 print_generic_expr (vect_dump, phi, TDF_SLIM);
424 return false;
428 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
430 tree stmt = bsi_stmt (si);
431 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
432 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
434 if (vect_print_dump_info (REPORT_DETAILS))
436 fprintf (vect_dump, "==> examining statement: ");
437 print_generic_expr (vect_dump, stmt, TDF_SLIM);
440 gcc_assert (stmt_info);
442 /* skip stmts which do not need to be vectorized.
443 this is expected to include:
444 - the COND_EXPR which is the loop exit condition
445 - any LABEL_EXPRs in the loop
446 - computations that are used only for array indexing or loop
447 control */
449 if (!STMT_VINFO_RELEVANT_P (stmt_info)
450 && !STMT_VINFO_LIVE_P (stmt_info))
452 if (vect_print_dump_info (REPORT_DETAILS))
453 fprintf (vect_dump, "irrelevant.");
454 continue;
457 switch (STMT_VINFO_DEF_TYPE (stmt_info))
459 case vect_loop_def:
460 break;
462 case vect_reduction_def:
463 gcc_assert (relevance == vect_used_in_outer
464 || relevance == vect_used_in_outer_by_reduction
465 || relevance == vect_unused_in_loop);
466 break;
468 case vect_induction_def:
469 case vect_constant_def:
470 case vect_invariant_def:
471 case vect_unknown_def_type:
472 default:
473 gcc_unreachable ();
476 if (STMT_VINFO_RELEVANT_P (stmt_info))
478 gcc_assert (GIMPLE_STMT_P (stmt)
479 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
480 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
481 need_to_vectorize = true;
484 ok = true;
485 if (STMT_VINFO_RELEVANT_P (stmt_info)
486 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
487 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
488 || vectorizable_type_demotion (stmt, NULL, NULL)
489 || vectorizable_conversion (stmt, NULL, NULL, NULL)
490 || vectorizable_operation (stmt, NULL, NULL, NULL)
491 || vectorizable_assignment (stmt, NULL, NULL, NULL)
492 || vectorizable_load (stmt, NULL, NULL, NULL)
493 || vectorizable_call (stmt, NULL, NULL)
494 || vectorizable_store (stmt, NULL, NULL, NULL)
495 || vectorizable_condition (stmt, NULL, NULL)
496 || vectorizable_reduction (stmt, NULL, NULL));
498 if (!ok)
500 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
502 fprintf (vect_dump, "not vectorized: relevant stmt not ");
503 fprintf (vect_dump, "supported: ");
504 print_generic_expr (vect_dump, stmt, TDF_SLIM);
506 return false;
509 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
510 need extra handling, except for vectorizable reductions. */
511 if (STMT_VINFO_LIVE_P (stmt_info)
512 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
513 ok = vectorizable_live_operation (stmt, NULL, NULL);
515 if (!ok)
517 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
519 fprintf (vect_dump, "not vectorized: live stmt not ");
520 fprintf (vect_dump, "supported: ");
521 print_generic_expr (vect_dump, stmt, TDF_SLIM);
523 return false;
526 if (!PURE_SLP_STMT (stmt_info))
528 /* STMT needs loop-based vectorization. */
529 only_slp_in_loop = false;
531 /* Groups of strided accesses whose size is not a power of 2 are
532 not vectorizable yet using loop-vectorization. Therefore, if
533 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
534 both SLPed and loop-based vectorzed), the loop cannot be
535 vectorized. */
536 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
537 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
538 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
540 if (vect_print_dump_info (REPORT_DETAILS))
542 fprintf (vect_dump, "not vectorized: the size of group "
543 "of strided accesses is not a power of 2");
544 print_generic_expr (vect_dump, stmt, TDF_SLIM);
546 return false;
549 } /* stmts in bb */
550 } /* bbs */
552 /* All operations in the loop are either irrelevant (deal with loop
553 control, or dead), or only used outside the loop and can be moved
554 out of the loop (e.g. invariants, inductions). The loop can be
555 optimized away by scalar optimizations. We're better off not
556 touching this loop. */
557 if (!need_to_vectorize)
559 if (vect_print_dump_info (REPORT_DETAILS))
560 fprintf (vect_dump,
561 "All the computation can be taken out of the loop.");
562 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
563 fprintf (vect_dump,
564 "not vectorized: redundant loop. no profit to vectorize.");
565 return false;
568 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
569 vectorization factor of the loop is the unrolling factor required by the
570 SLP instances. If that unrolling factor is 1, we say, that we perform
571 pure SLP on loop - cross iteration parallelism is not exploited. */
572 if (only_slp_in_loop)
573 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
574 else
575 vectorization_factor = least_common_multiple (vectorization_factor,
576 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
578 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
580 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
581 && vect_print_dump_info (REPORT_DETAILS))
582 fprintf (vect_dump,
583 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
584 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
586 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
587 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
589 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
590 fprintf (vect_dump, "not vectorized: iteration count too small.");
591 if (vect_print_dump_info (REPORT_DETAILS))
592 fprintf (vect_dump,"not vectorized: iteration count smaller than "
593 "vectorization factor.");
594 return false;
597 /* Analyze cost. Decide if worth while to vectorize. */
599 /* Once VF is set, SLP costs should be updated since the number of created
600 vector stmts depends on VF. */
601 vect_update_slp_costs_according_to_vf (loop_vinfo);
603 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
604 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
606 if (min_profitable_iters < 0)
608 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
609 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
610 if (vect_print_dump_info (REPORT_DETAILS))
611 fprintf (vect_dump, "not vectorized: vector version will never be "
612 "profitable.");
613 return false;
616 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
617 * vectorization_factor) - 1);
619 /* Use the cost model only if it is more conservative than user specified
620 threshold. */
622 th = (unsigned) min_scalar_loop_bound;
623 if (min_profitable_iters
624 && (!min_scalar_loop_bound
625 || min_profitable_iters > min_scalar_loop_bound))
626 th = (unsigned) min_profitable_iters;
628 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
629 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
631 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
632 fprintf (vect_dump, "not vectorized: vectorization not "
633 "profitable.");
634 if (vect_print_dump_info (REPORT_DETAILS))
635 fprintf (vect_dump, "not vectorized: iteration count smaller than "
636 "user specified loop bound parameter or minimum "
637 "profitable iterations (whichever is more conservative).");
638 return false;
641 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
642 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
643 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
645 if (vect_print_dump_info (REPORT_DETAILS))
646 fprintf (vect_dump, "epilog loop required.");
647 if (!vect_can_advance_ivs_p (loop_vinfo))
649 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
650 fprintf (vect_dump,
651 "not vectorized: can't create epilog loop 1.");
652 return false;
654 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
656 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
657 fprintf (vect_dump,
658 "not vectorized: can't create epilog loop 2.");
659 return false;
663 return true;
667 /* Function exist_non_indexing_operands_for_use_p
669 USE is one of the uses attached to STMT. Check if USE is
670 used in STMT for anything other than indexing an array. */
672 static bool
673 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
675 tree operand;
676 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
678 /* USE corresponds to some operand in STMT. If there is no data
679 reference in STMT, then any operand that corresponds to USE
680 is not indexing an array. */
681 if (!STMT_VINFO_DATA_REF (stmt_info))
682 return true;
684 /* STMT has a data_ref. FORNOW this means that its of one of
685 the following forms:
686 -1- ARRAY_REF = var
687 -2- var = ARRAY_REF
688 (This should have been verified in analyze_data_refs).
690 'var' in the second case corresponds to a def, not a use,
691 so USE cannot correspond to any operands that are not used
692 for array indexing.
694 Therefore, all we need to check is if STMT falls into the
695 first case, and whether var corresponds to USE. */
697 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
698 return false;
700 operand = GIMPLE_STMT_OPERAND (stmt, 1);
702 if (TREE_CODE (operand) != SSA_NAME)
703 return false;
705 if (operand == use)
706 return true;
708 return false;
712 /* Function vect_analyze_scalar_cycles_1.
714 Examine the cross iteration def-use cycles of scalar variables
715 in LOOP. LOOP_VINFO represents the loop that is noe being
716 considered for vectorization (can be LOOP, or an outer-loop
717 enclosing LOOP). */
719 static void
720 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
722 tree phi;
723 basic_block bb = loop->header;
724 tree dumy;
725 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
727 if (vect_print_dump_info (REPORT_DETAILS))
728 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
730 /* First - identify all inductions. */
731 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
733 tree access_fn = NULL;
734 tree def = PHI_RESULT (phi);
735 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
737 if (vect_print_dump_info (REPORT_DETAILS))
739 fprintf (vect_dump, "Analyze phi: ");
740 print_generic_expr (vect_dump, phi, TDF_SLIM);
743 /* Skip virtual phi's. The data dependences that are associated with
744 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
745 if (!is_gimple_reg (SSA_NAME_VAR (def)))
746 continue;
748 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
750 /* Analyze the evolution function. */
751 access_fn = analyze_scalar_evolution (loop, def);
752 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
754 fprintf (vect_dump, "Access function of PHI: ");
755 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
758 if (!access_fn
759 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
761 VEC_safe_push (tree, heap, worklist, phi);
762 continue;
765 if (vect_print_dump_info (REPORT_DETAILS))
766 fprintf (vect_dump, "Detected induction.");
767 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
771 /* Second - identify all reductions. */
772 while (VEC_length (tree, worklist) > 0)
774 tree phi = VEC_pop (tree, worklist);
775 tree def = PHI_RESULT (phi);
776 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
777 tree reduc_stmt;
779 if (vect_print_dump_info (REPORT_DETAILS))
781 fprintf (vect_dump, "Analyze phi: ");
782 print_generic_expr (vect_dump, phi, TDF_SLIM);
785 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
786 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
788 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
789 if (reduc_stmt)
791 if (vect_print_dump_info (REPORT_DETAILS))
792 fprintf (vect_dump, "Detected reduction.");
793 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
794 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
795 vect_reduction_def;
797 else
798 if (vect_print_dump_info (REPORT_DETAILS))
799 fprintf (vect_dump, "Unknown def-use cycle pattern.");
802 VEC_free (tree, heap, worklist);
803 return;
807 /* Function vect_analyze_scalar_cycles.
809 Examine the cross iteration def-use cycles of scalar variables, by
810 analyzing the loop-header PHIs of scalar variables; Classify each
811 cycle as one of the following: invariant, induction, reduction, unknown.
812 We do that for the loop represented by LOOP_VINFO, and also to its
813 inner-loop, if exists.
814 Examples for scalar cycles:
816 Example1: reduction:
818 loop1:
819 for (i=0; i<N; i++)
820 sum += a[i];
822 Example2: induction:
824 loop2:
825 for (i=0; i<N; i++)
826 a[i] = i; */
828 static void
829 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
831 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
833 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
835 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
836 Reductions in such inner-loop therefore have different properties than
837 the reductions in the nest that gets vectorized:
838 1. When vectorized, they are executed in the same order as in the original
839 scalar loop, so we can't change the order of computation when
840 vectorizing them.
841 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
842 current checks are too strict. */
844 if (loop->inner)
845 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
849 /* Function vect_insert_into_interleaving_chain.
851 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
853 static void
854 vect_insert_into_interleaving_chain (struct data_reference *dra,
855 struct data_reference *drb)
857 tree prev, next, next_init;
858 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
859 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
861 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
862 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
863 while (next)
865 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
866 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
868 /* Insert here. */
869 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
870 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
871 return;
873 prev = next;
874 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
877 /* We got to the end of the list. Insert here. */
878 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
879 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
883 /* Function vect_update_interleaving_chain.
885 For two data-refs DRA and DRB that are a part of a chain interleaved data
886 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
888 There are four possible cases:
889 1. New stmts - both DRA and DRB are not a part of any chain:
890 FIRST_DR = DRB
891 NEXT_DR (DRB) = DRA
892 2. DRB is a part of a chain and DRA is not:
893 no need to update FIRST_DR
894 no need to insert DRB
895 insert DRA according to init
896 3. DRA is a part of a chain and DRB is not:
897 if (init of FIRST_DR > init of DRB)
898 FIRST_DR = DRB
899 NEXT(FIRST_DR) = previous FIRST_DR
900 else
901 insert DRB according to its init
902 4. both DRA and DRB are in some interleaving chains:
903 choose the chain with the smallest init of FIRST_DR
904 insert the nodes of the second chain into the first one. */
906 static void
907 vect_update_interleaving_chain (struct data_reference *drb,
908 struct data_reference *dra)
910 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
911 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
912 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
913 tree node, prev, next, node_init, first_stmt;
915 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
916 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
918 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
919 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
920 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
921 return;
924 /* 2. DRB is a part of a chain and DRA is not. */
925 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
927 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
928 /* Insert DRA into the chain of DRB. */
929 vect_insert_into_interleaving_chain (dra, drb);
930 return;
933 /* 3. DRA is a part of a chain and DRB is not. */
934 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
936 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
937 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
938 old_first_stmt)));
939 tree tmp;
941 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
943 /* DRB's init is smaller than the init of the stmt previously marked
944 as the first stmt of the interleaving chain of DRA. Therefore, we
945 update FIRST_STMT and put DRB in the head of the list. */
946 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
947 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
949 /* Update all the stmts in the list to point to the new FIRST_STMT. */
950 tmp = old_first_stmt;
951 while (tmp)
953 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
954 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
957 else
959 /* Insert DRB in the list of DRA. */
960 vect_insert_into_interleaving_chain (drb, dra);
961 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
963 return;
966 /* 4. both DRA and DRB are in some interleaving chains. */
967 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
968 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
969 if (first_a == first_b)
970 return;
971 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
972 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
974 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
976 /* Insert the nodes of DRA chain into the DRB chain.
977 After inserting a node, continue from this node of the DRB chain (don't
978 start from the beginning. */
979 node = DR_GROUP_FIRST_DR (stmtinfo_a);
980 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
981 first_stmt = first_b;
983 else
985 /* Insert the nodes of DRB chain into the DRA chain.
986 After inserting a node, continue from this node of the DRA chain (don't
987 start from the beginning. */
988 node = DR_GROUP_FIRST_DR (stmtinfo_b);
989 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
990 first_stmt = first_a;
993 while (node)
995 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
996 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
997 while (next)
999 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1000 if (tree_int_cst_compare (next_init, node_init) > 0)
1002 /* Insert here. */
1003 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
1004 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
1005 prev = node;
1006 break;
1008 prev = next;
1009 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
1011 if (!next)
1013 /* We got to the end of the list. Insert here. */
1014 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
1015 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
1016 prev = node;
1018 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
1019 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1024 /* Function vect_equal_offsets.
1026 Check if OFFSET1 and OFFSET2 are identical expressions. */
1028 static bool
1029 vect_equal_offsets (tree offset1, tree offset2)
1031 bool res0, res1;
1033 STRIP_NOPS (offset1);
1034 STRIP_NOPS (offset2);
1036 if (offset1 == offset2)
1037 return true;
1039 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1040 || !BINARY_CLASS_P (offset1)
1041 || !BINARY_CLASS_P (offset2))
1042 return false;
1044 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1045 TREE_OPERAND (offset2, 0));
1046 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1047 TREE_OPERAND (offset2, 1));
1049 return (res0 && res1);
1053 /* Function vect_check_interleaving.
1055 Check if DRA and DRB are a part of interleaving. In case they are, insert
1056 DRA and DRB in an interleaving chain. */
1058 static void
1059 vect_check_interleaving (struct data_reference *dra,
1060 struct data_reference *drb)
1062 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1064 /* Check that the data-refs have same first location (except init) and they
1065 are both either store or load (not load and store). */
1066 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1067 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1068 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1069 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1070 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1071 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1072 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1073 || DR_IS_READ (dra) != DR_IS_READ (drb))
1074 return;
1076 /* Check:
1077 1. data-refs are of the same type
1078 2. their steps are equal
1079 3. the step is greater than the difference between data-refs' inits */
1080 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1081 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1083 if (type_size_a != type_size_b
1084 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
1085 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
1086 TREE_TYPE (DR_REF (drb))))
1087 return;
1089 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1090 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1091 step = TREE_INT_CST_LOW (DR_STEP (dra));
1093 if (init_a > init_b)
1095 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1096 and DRB is accessed before DRA. */
1097 diff_mod_size = (init_a - init_b) % type_size_a;
1099 if ((init_a - init_b) > step)
1100 return;
1102 if (diff_mod_size == 0)
1104 vect_update_interleaving_chain (drb, dra);
1105 if (vect_print_dump_info (REPORT_DR_DETAILS))
1107 fprintf (vect_dump, "Detected interleaving ");
1108 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1109 fprintf (vect_dump, " and ");
1110 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1112 return;
1115 else
1117 /* If init_b == init_a + the size of the type * k, we have an
1118 interleaving, and DRA is accessed before DRB. */
1119 diff_mod_size = (init_b - init_a) % type_size_a;
1121 if ((init_b - init_a) > step)
1122 return;
1124 if (diff_mod_size == 0)
1126 vect_update_interleaving_chain (dra, drb);
1127 if (vect_print_dump_info (REPORT_DR_DETAILS))
1129 fprintf (vect_dump, "Detected interleaving ");
1130 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1131 fprintf (vect_dump, " and ");
1132 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1134 return;
1139 /* Check if data references pointed by DR_I and DR_J are same or
1140 belong to same interleaving group. Return FALSE if drs are
1141 different, otherwise return TRUE. */
1143 static bool
1144 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
1146 tree stmt_i = DR_STMT (dr_i);
1147 tree stmt_j = DR_STMT (dr_j);
1149 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
1150 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1151 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
1152 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1153 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
1154 return true;
1155 else
1156 return false;
1159 /* If address ranges represented by DDR_I and DDR_J are equal,
1160 return TRUE, otherwise return FALSE. */
1162 static bool
1163 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
1165 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
1166 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
1167 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
1168 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
1169 return true;
1170 else
1171 return false;
1174 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1175 tested at run-time. Return TRUE if DDR was successfully inserted.
1176 Return false if versioning is not supported. */
1178 static bool
1179 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1181 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1183 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
1184 return false;
1186 if (vect_print_dump_info (REPORT_DR_DETAILS))
1188 fprintf (vect_dump, "mark for run-time aliasing test between ");
1189 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1190 fprintf (vect_dump, " and ");
1191 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1194 if (optimize_size)
1196 if (vect_print_dump_info (REPORT_DR_DETAILS))
1197 fprintf (vect_dump, "versioning not supported when optimizing for size.");
1198 return false;
1201 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1202 if (loop->inner)
1204 if (vect_print_dump_info (REPORT_DR_DETAILS))
1205 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1206 return false;
1209 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1210 return true;
1213 /* Function vect_analyze_data_ref_dependence.
1215 Return TRUE if there (might) exist a dependence between a memory-reference
1216 DRA and a memory-reference DRB. When versioning for alias may check a
1217 dependence at run-time, return FALSE. */
1219 static bool
1220 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1221 loop_vec_info loop_vinfo)
1223 unsigned int i;
1224 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1225 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1226 struct data_reference *dra = DDR_A (ddr);
1227 struct data_reference *drb = DDR_B (ddr);
1228 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1229 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1230 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1231 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1232 lambda_vector dist_v;
1233 unsigned int loop_depth;
1235 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1237 /* Independent data accesses. */
1238 vect_check_interleaving (dra, drb);
1239 return false;
1242 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1243 return false;
1245 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1247 if (vect_print_dump_info (REPORT_DR_DETAILS))
1249 fprintf (vect_dump,
1250 "versioning for alias required: can't determine dependence between ");
1251 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1252 fprintf (vect_dump, " and ");
1253 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1255 /* Add to list of ddrs that need to be tested at run-time. */
1256 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1259 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1261 if (vect_print_dump_info (REPORT_DR_DETAILS))
1263 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1264 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1265 fprintf (vect_dump, " and ");
1266 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1268 /* Add to list of ddrs that need to be tested at run-time. */
1269 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1272 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1273 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1275 int dist = dist_v[loop_depth];
1277 if (vect_print_dump_info (REPORT_DR_DETAILS))
1278 fprintf (vect_dump, "dependence distance = %d.", dist);
1280 /* Same loop iteration. */
1281 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1283 /* Two references with distance zero have the same alignment. */
1284 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1285 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1286 if (vect_print_dump_info (REPORT_ALIGNMENT))
1287 fprintf (vect_dump, "accesses have the same alignment.");
1288 if (vect_print_dump_info (REPORT_DR_DETAILS))
1290 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1291 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1292 fprintf (vect_dump, " and ");
1293 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1296 /* For interleaving, mark that there is a read-write dependency if
1297 necessary. We check before that one of the data-refs is store. */
1298 if (DR_IS_READ (dra))
1299 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1300 else
1302 if (DR_IS_READ (drb))
1303 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1306 continue;
1309 if (abs (dist) >= vectorization_factor
1310 || (dist > 0 && DDR_REVERSED_P (ddr)))
1312 /* Dependence distance does not create dependence, as far as
1313 vectorization is concerned, in this case. If DDR_REVERSED_P the
1314 order of the data-refs in DDR was reversed (to make distance
1315 vector positive), and the actual distance is negative. */
1316 if (vect_print_dump_info (REPORT_DR_DETAILS))
1317 fprintf (vect_dump, "dependence distance >= VF or negative.");
1318 continue;
1321 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1323 fprintf (vect_dump,
1324 "not vectorized, possible dependence "
1325 "between data-refs ");
1326 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1327 fprintf (vect_dump, " and ");
1328 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1331 return true;
1334 return false;
1337 /* Function vect_analyze_data_ref_dependences.
1339 Examine all the data references in the loop, and make sure there do not
1340 exist any data dependences between them. */
1342 static bool
1343 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1345 unsigned int i;
1346 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1347 struct data_dependence_relation *ddr;
1349 if (vect_print_dump_info (REPORT_DETAILS))
1350 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1352 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1353 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1354 return false;
1356 return true;
1360 /* Function vect_compute_data_ref_alignment
1362 Compute the misalignment of the data reference DR.
1364 Output:
1365 1. If during the misalignment computation it is found that the data reference
1366 cannot be vectorized then false is returned.
1367 2. DR_MISALIGNMENT (DR) is defined.
1369 FOR NOW: No analysis is actually performed. Misalignment is calculated
1370 only for trivial cases. TODO. */
1372 static bool
1373 vect_compute_data_ref_alignment (struct data_reference *dr)
1375 tree stmt = DR_STMT (dr);
1376 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1377 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1378 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1379 tree ref = DR_REF (dr);
1380 tree vectype;
1381 tree base, base_addr;
1382 bool base_aligned;
1383 tree misalign;
1384 tree aligned_to, alignment;
1386 if (vect_print_dump_info (REPORT_DETAILS))
1387 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1389 /* Initialize misalignment to unknown. */
1390 SET_DR_MISALIGNMENT (dr, -1);
1392 misalign = DR_INIT (dr);
1393 aligned_to = DR_ALIGNED_TO (dr);
1394 base_addr = DR_BASE_ADDRESS (dr);
1396 /* In case the dataref is in an inner-loop of the loop that is being
1397 vectorized (LOOP), we use the base and misalignment information
1398 relative to the outer-loop (LOOP). This is ok only if the misalignment
1399 stays the same throughout the execution of the inner-loop, which is why
1400 we have to check that the stride of the dataref in the inner-loop evenly
1401 divides by the vector size. */
1402 if (nested_in_vect_loop_p (loop, stmt))
1404 tree step = DR_STEP (dr);
1405 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1407 if (dr_step % UNITS_PER_SIMD_WORD == 0)
1409 if (vect_print_dump_info (REPORT_ALIGNMENT))
1410 fprintf (vect_dump, "inner step divides the vector-size.");
1411 misalign = STMT_VINFO_DR_INIT (stmt_info);
1412 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1413 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1415 else
1417 if (vect_print_dump_info (REPORT_ALIGNMENT))
1418 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1419 misalign = NULL_TREE;
1423 base = build_fold_indirect_ref (base_addr);
1424 vectype = STMT_VINFO_VECTYPE (stmt_info);
1425 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1427 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1428 || !misalign)
1430 if (vect_print_dump_info (REPORT_ALIGNMENT))
1432 fprintf (vect_dump, "Unknown alignment for access: ");
1433 print_generic_expr (vect_dump, base, TDF_SLIM);
1435 return true;
1438 if ((DECL_P (base)
1439 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1440 alignment) >= 0)
1441 || (TREE_CODE (base_addr) == SSA_NAME
1442 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1443 TREE_TYPE (base_addr)))),
1444 alignment) >= 0))
1445 base_aligned = true;
1446 else
1447 base_aligned = false;
1449 if (!base_aligned)
1451 /* Do not change the alignment of global variables if
1452 flag_section_anchors is enabled. */
1453 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1454 || (TREE_STATIC (base) && flag_section_anchors))
1456 if (vect_print_dump_info (REPORT_DETAILS))
1458 fprintf (vect_dump, "can't force alignment of ref: ");
1459 print_generic_expr (vect_dump, ref, TDF_SLIM);
1461 return true;
1464 /* Force the alignment of the decl.
1465 NOTE: This is the only change to the code we make during
1466 the analysis phase, before deciding to vectorize the loop. */
1467 if (vect_print_dump_info (REPORT_DETAILS))
1468 fprintf (vect_dump, "force alignment");
1469 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1470 DECL_USER_ALIGN (base) = 1;
1473 /* At this point we assume that the base is aligned. */
1474 gcc_assert (base_aligned
1475 || (TREE_CODE (base) == VAR_DECL
1476 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1478 /* Modulo alignment. */
1479 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1481 if (!host_integerp (misalign, 1))
1483 /* Negative or overflowed misalignment value. */
1484 if (vect_print_dump_info (REPORT_DETAILS))
1485 fprintf (vect_dump, "unexpected misalign value");
1486 return false;
1489 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1491 if (vect_print_dump_info (REPORT_DETAILS))
1493 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1494 print_generic_expr (vect_dump, ref, TDF_SLIM);
1497 return true;
1501 /* Function vect_compute_data_refs_alignment
1503 Compute the misalignment of data references in the loop.
1504 Return FALSE if a data reference is found that cannot be vectorized. */
1506 static bool
1507 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1509 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1510 struct data_reference *dr;
1511 unsigned int i;
1513 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1514 if (!vect_compute_data_ref_alignment (dr))
1515 return false;
1517 return true;
1521 /* Function vect_update_misalignment_for_peel
1523 DR - the data reference whose misalignment is to be adjusted.
1524 DR_PEEL - the data reference whose misalignment is being made
1525 zero in the vector loop by the peel.
1526 NPEEL - the number of iterations in the peel loop if the misalignment
1527 of DR_PEEL is known at compile time. */
1529 static void
1530 vect_update_misalignment_for_peel (struct data_reference *dr,
1531 struct data_reference *dr_peel, int npeel)
1533 unsigned int i;
1534 VEC(dr_p,heap) *same_align_drs;
1535 struct data_reference *current_dr;
1536 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1537 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1538 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1539 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1541 /* For interleaved data accesses the step in the loop must be multiplied by
1542 the size of the interleaving group. */
1543 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1544 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1545 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1546 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1548 /* It can be assumed that the data refs with the same alignment as dr_peel
1549 are aligned in the vector loop. */
1550 same_align_drs
1551 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1552 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1554 if (current_dr != dr)
1555 continue;
1556 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1557 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1558 SET_DR_MISALIGNMENT (dr, 0);
1559 return;
1562 if (known_alignment_for_access_p (dr)
1563 && known_alignment_for_access_p (dr_peel))
1565 int misal = DR_MISALIGNMENT (dr);
1566 misal += npeel * dr_size;
1567 misal %= UNITS_PER_SIMD_WORD;
1568 SET_DR_MISALIGNMENT (dr, misal);
1569 return;
1572 if (vect_print_dump_info (REPORT_DETAILS))
1573 fprintf (vect_dump, "Setting misalignment to -1.");
1574 SET_DR_MISALIGNMENT (dr, -1);
1578 /* Function vect_verify_datarefs_alignment
1580 Return TRUE if all data references in the loop can be
1581 handled with respect to alignment. */
1583 static bool
1584 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1586 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1587 struct data_reference *dr;
1588 enum dr_alignment_support supportable_dr_alignment;
1589 unsigned int i;
1591 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1593 tree stmt = DR_STMT (dr);
1594 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1596 /* For interleaving, only the alignment of the first access matters. */
1597 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1598 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1599 continue;
1601 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1602 if (!supportable_dr_alignment)
1604 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1606 if (DR_IS_READ (dr))
1607 fprintf (vect_dump,
1608 "not vectorized: unsupported unaligned load.");
1609 else
1610 fprintf (vect_dump,
1611 "not vectorized: unsupported unaligned store.");
1613 return false;
1615 if (supportable_dr_alignment != dr_aligned
1616 && vect_print_dump_info (REPORT_ALIGNMENT))
1617 fprintf (vect_dump, "Vectorizing an unaligned access.");
1619 return true;
1623 /* Function vector_alignment_reachable_p
1625 Return true if vector alignment for DR is reachable by peeling
1626 a few loop iterations. Return false otherwise. */
1628 static bool
1629 vector_alignment_reachable_p (struct data_reference *dr)
1631 tree stmt = DR_STMT (dr);
1632 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1633 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1635 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1637 /* For interleaved access we peel only if number of iterations in
1638 the prolog loop ({VF - misalignment}), is a multiple of the
1639 number of the interleaved accesses. */
1640 int elem_size, mis_in_elements;
1641 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1643 /* FORNOW: handle only known alignment. */
1644 if (!known_alignment_for_access_p (dr))
1645 return false;
1647 elem_size = UNITS_PER_SIMD_WORD / nelements;
1648 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1650 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1651 return false;
1654 /* If misalignment is known at the compile time then allow peeling
1655 only if natural alignment is reachable through peeling. */
1656 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1658 HOST_WIDE_INT elmsize =
1659 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1660 if (vect_print_dump_info (REPORT_DETAILS))
1662 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1663 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1665 if (DR_MISALIGNMENT (dr) % elmsize)
1667 if (vect_print_dump_info (REPORT_DETAILS))
1668 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1669 return false;
1673 if (!known_alignment_for_access_p (dr))
1675 tree type = (TREE_TYPE (DR_REF (dr)));
1676 tree ba = DR_BASE_OBJECT (dr);
1677 bool is_packed = false;
1679 if (ba)
1680 is_packed = contains_packed_reference (ba);
1682 if (vect_print_dump_info (REPORT_DETAILS))
1683 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1684 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1685 return true;
1686 else
1687 return false;
1690 return true;
1693 /* Function vect_enhance_data_refs_alignment
1695 This pass will use loop versioning and loop peeling in order to enhance
1696 the alignment of data references in the loop.
1698 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1699 original loop is to be vectorized; Any other loops that are created by
1700 the transformations performed in this pass - are not supposed to be
1701 vectorized. This restriction will be relaxed.
1703 This pass will require a cost model to guide it whether to apply peeling
1704 or versioning or a combination of the two. For example, the scheme that
1705 intel uses when given a loop with several memory accesses, is as follows:
1706 choose one memory access ('p') which alignment you want to force by doing
1707 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1708 other accesses are not necessarily aligned, or (2) use loop versioning to
1709 generate one loop in which all accesses are aligned, and another loop in
1710 which only 'p' is necessarily aligned.
1712 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1713 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1714 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1716 Devising a cost model is the most critical aspect of this work. It will
1717 guide us on which access to peel for, whether to use loop versioning, how
1718 many versions to create, etc. The cost model will probably consist of
1719 generic considerations as well as target specific considerations (on
1720 powerpc for example, misaligned stores are more painful than misaligned
1721 loads).
1723 Here are the general steps involved in alignment enhancements:
1725 -- original loop, before alignment analysis:
1726 for (i=0; i<N; i++){
1727 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1728 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1731 -- After vect_compute_data_refs_alignment:
1732 for (i=0; i<N; i++){
1733 x = q[i]; # DR_MISALIGNMENT(q) = 3
1734 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1737 -- Possibility 1: we do loop versioning:
1738 if (p is aligned) {
1739 for (i=0; i<N; i++){ # loop 1A
1740 x = q[i]; # DR_MISALIGNMENT(q) = 3
1741 p[i] = y; # DR_MISALIGNMENT(p) = 0
1744 else {
1745 for (i=0; i<N; i++){ # loop 1B
1746 x = q[i]; # DR_MISALIGNMENT(q) = 3
1747 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1751 -- Possibility 2: we do loop peeling:
1752 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1753 x = q[i];
1754 p[i] = y;
1756 for (i = 3; i < N; i++){ # loop 2A
1757 x = q[i]; # DR_MISALIGNMENT(q) = 0
1758 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1761 -- Possibility 3: combination of loop peeling and versioning:
1762 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1763 x = q[i];
1764 p[i] = y;
1766 if (p is aligned) {
1767 for (i = 3; i<N; i++){ # loop 3A
1768 x = q[i]; # DR_MISALIGNMENT(q) = 0
1769 p[i] = y; # DR_MISALIGNMENT(p) = 0
1772 else {
1773 for (i = 3; i<N; i++){ # loop 3B
1774 x = q[i]; # DR_MISALIGNMENT(q) = 0
1775 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1779 These loops are later passed to loop_transform to be vectorized. The
1780 vectorizer will use the alignment information to guide the transformation
1781 (whether to generate regular loads/stores, or with special handling for
1782 misalignment). */
1784 static bool
1785 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1787 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1788 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1789 enum dr_alignment_support supportable_dr_alignment;
1790 struct data_reference *dr0 = NULL;
1791 struct data_reference *dr;
1792 unsigned int i;
1793 bool do_peeling = false;
1794 bool do_versioning = false;
1795 bool stat;
1796 tree stmt;
1797 stmt_vec_info stmt_info;
1798 int vect_versioning_for_alias_required;
1800 if (vect_print_dump_info (REPORT_DETAILS))
1801 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1803 /* While cost model enhancements are expected in the future, the high level
1804 view of the code at this time is as follows:
1806 A) If there is a misaligned write then see if peeling to align this write
1807 can make all data references satisfy vect_supportable_dr_alignment.
1808 If so, update data structures as needed and return true. Note that
1809 at this time vect_supportable_dr_alignment is known to return false
1810 for a misaligned write.
1812 B) If peeling wasn't possible and there is a data reference with an
1813 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1814 then see if loop versioning checks can be used to make all data
1815 references satisfy vect_supportable_dr_alignment. If so, update
1816 data structures as needed and return true.
1818 C) If neither peeling nor versioning were successful then return false if
1819 any data reference does not satisfy vect_supportable_dr_alignment.
1821 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1823 Note, Possibility 3 above (which is peeling and versioning together) is not
1824 being done at this time. */
1826 /* (1) Peeling to force alignment. */
1828 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1829 Considerations:
1830 + How many accesses will become aligned due to the peeling
1831 - How many accesses will become unaligned due to the peeling,
1832 and the cost of misaligned accesses.
1833 - The cost of peeling (the extra runtime checks, the increase
1834 in code size).
1836 The scheme we use FORNOW: peel to force the alignment of the first
1837 misaligned store in the loop.
1838 Rationale: misaligned stores are not yet supported.
1840 TODO: Use a cost model. */
1842 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1844 stmt = DR_STMT (dr);
1845 stmt_info = vinfo_for_stmt (stmt);
1847 /* For interleaving, only the alignment of the first access
1848 matters. */
1849 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1850 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1851 continue;
1853 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1855 do_peeling = vector_alignment_reachable_p (dr);
1856 if (do_peeling)
1857 dr0 = dr;
1858 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1859 fprintf (vect_dump, "vector alignment may not be reachable");
1860 break;
1864 vect_versioning_for_alias_required =
1865 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1867 /* Temporarily, if versioning for alias is required, we disable peeling
1868 until we support peeling and versioning. Often peeling for alignment
1869 will require peeling for loop-bound, which in turn requires that we
1870 know how to adjust the loop ivs after the loop. */
1871 if (vect_versioning_for_alias_required
1872 || !vect_can_advance_ivs_p (loop_vinfo)
1873 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1874 do_peeling = false;
1876 if (do_peeling)
1878 int mis;
1879 int npeel = 0;
1880 tree stmt = DR_STMT (dr0);
1881 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1882 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1883 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1885 if (known_alignment_for_access_p (dr0))
1887 /* Since it's known at compile time, compute the number of iterations
1888 in the peeled loop (the peeling factor) for use in updating
1889 DR_MISALIGNMENT values. The peeling factor is the vectorization
1890 factor minus the misalignment as an element count. */
1891 mis = DR_MISALIGNMENT (dr0);
1892 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1893 npeel = nelements - mis;
1895 /* For interleaved data access every iteration accesses all the
1896 members of the group, therefore we divide the number of iterations
1897 by the group size. */
1898 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1899 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1900 npeel /= DR_GROUP_SIZE (stmt_info);
1902 if (vect_print_dump_info (REPORT_DETAILS))
1903 fprintf (vect_dump, "Try peeling by %d", npeel);
1906 /* Ensure that all data refs can be vectorized after the peel. */
1907 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1909 int save_misalignment;
1911 if (dr == dr0)
1912 continue;
1914 stmt = DR_STMT (dr);
1915 stmt_info = vinfo_for_stmt (stmt);
1916 /* For interleaving, only the alignment of the first access
1917 matters. */
1918 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1919 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1920 continue;
1922 save_misalignment = DR_MISALIGNMENT (dr);
1923 vect_update_misalignment_for_peel (dr, dr0, npeel);
1924 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1925 SET_DR_MISALIGNMENT (dr, save_misalignment);
1927 if (!supportable_dr_alignment)
1929 do_peeling = false;
1930 break;
1934 if (do_peeling)
1936 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1937 If the misalignment of DR_i is identical to that of dr0 then set
1938 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1939 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1940 by the peeling factor times the element size of DR_i (MOD the
1941 vectorization factor times the size). Otherwise, the
1942 misalignment of DR_i must be set to unknown. */
1943 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1944 if (dr != dr0)
1945 vect_update_misalignment_for_peel (dr, dr0, npeel);
1947 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1948 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1949 SET_DR_MISALIGNMENT (dr0, 0);
1950 if (vect_print_dump_info (REPORT_ALIGNMENT))
1951 fprintf (vect_dump, "Alignment of access forced using peeling.");
1953 if (vect_print_dump_info (REPORT_DETAILS))
1954 fprintf (vect_dump, "Peeling for alignment will be applied.");
1956 stat = vect_verify_datarefs_alignment (loop_vinfo);
1957 gcc_assert (stat);
1958 return stat;
1963 /* (2) Versioning to force alignment. */
1965 /* Try versioning if:
1966 1) flag_tree_vect_loop_version is TRUE
1967 2) optimize_size is FALSE
1968 3) there is at least one unsupported misaligned data ref with an unknown
1969 misalignment, and
1970 4) all misaligned data refs with a known misalignment are supported, and
1971 5) the number of runtime alignment checks is within reason. */
1973 do_versioning =
1974 flag_tree_vect_loop_version
1975 && (!optimize_size)
1976 && (!loop->inner); /* FORNOW */
1978 if (do_versioning)
1980 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1982 stmt = DR_STMT (dr);
1983 stmt_info = vinfo_for_stmt (stmt);
1985 /* For interleaving, only the alignment of the first access
1986 matters. */
1987 if (aligned_access_p (dr)
1988 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1989 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1990 continue;
1992 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1994 if (!supportable_dr_alignment)
1996 tree stmt;
1997 int mask;
1998 tree vectype;
2000 if (known_alignment_for_access_p (dr)
2001 || VEC_length (tree,
2002 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
2003 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
2005 do_versioning = false;
2006 break;
2009 stmt = DR_STMT (dr);
2010 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2011 gcc_assert (vectype);
2013 /* The rightmost bits of an aligned address must be zeros.
2014 Construct the mask needed for this test. For example,
2015 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2016 mask must be 15 = 0xf. */
2017 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2019 /* FORNOW: use the same mask to test all potentially unaligned
2020 references in the loop. The vectorizer currently supports
2021 a single vector size, see the reference to
2022 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2023 vectorization factor is computed. */
2024 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2025 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2026 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2027 VEC_safe_push (tree, heap,
2028 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2029 DR_STMT (dr));
2033 /* Versioning requires at least one misaligned data reference. */
2034 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2035 do_versioning = false;
2036 else if (!do_versioning)
2037 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2040 if (do_versioning)
2042 VEC(tree,heap) *may_misalign_stmts
2043 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2044 tree stmt;
2046 /* It can now be assumed that the data references in the statements
2047 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2048 of the loop being vectorized. */
2049 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
2051 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2052 dr = STMT_VINFO_DATA_REF (stmt_info);
2053 SET_DR_MISALIGNMENT (dr, 0);
2054 if (vect_print_dump_info (REPORT_ALIGNMENT))
2055 fprintf (vect_dump, "Alignment of access forced using versioning.");
2058 if (vect_print_dump_info (REPORT_DETAILS))
2059 fprintf (vect_dump, "Versioning for alignment will be applied.");
2061 /* Peeling and versioning can't be done together at this time. */
2062 gcc_assert (! (do_peeling && do_versioning));
2064 stat = vect_verify_datarefs_alignment (loop_vinfo);
2065 gcc_assert (stat);
2066 return stat;
2069 /* This point is reached if neither peeling nor versioning is being done. */
2070 gcc_assert (! (do_peeling || do_versioning));
2072 stat = vect_verify_datarefs_alignment (loop_vinfo);
2073 return stat;
2077 /* Function vect_analyze_data_refs_alignment
2079 Analyze the alignment of the data-references in the loop.
2080 Return FALSE if a data reference is found that cannot be vectorized. */
2082 static bool
2083 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2085 if (vect_print_dump_info (REPORT_DETAILS))
2086 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2088 if (!vect_compute_data_refs_alignment (loop_vinfo))
2090 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2091 fprintf (vect_dump,
2092 "not vectorized: can't calculate alignment for data ref.");
2093 return false;
2096 return true;
2100 /* Analyze groups of strided accesses: check that DR belongs to a group of
2101 strided accesses of legal size, step, etc. Detect gaps, single element
2102 interleaving, and other special cases. Set strided access info.
2103 Collect groups of strided stores for further use in SLP analysis. */
2105 static bool
2106 vect_analyze_group_access (struct data_reference *dr)
2108 tree step = DR_STEP (dr);
2109 tree scalar_type = TREE_TYPE (DR_REF (dr));
2110 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2111 tree stmt = DR_STMT (dr);
2112 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2113 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2114 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2115 HOST_WIDE_INT stride;
2116 bool slp_impossible = false;
2118 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2119 interleaving group (including gaps). */
2120 stride = dr_step / type_size;
2122 /* Not consecutive access is possible only if it is a part of interleaving. */
2123 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2125 /* Check if it this DR is a part of interleaving, and is a single
2126 element of the group that is accessed in the loop. */
2128 /* Gaps are supported only for loads. STEP must be a multiple of the type
2129 size. The size of the group must be a power of 2. */
2130 if (DR_IS_READ (dr)
2131 && (dr_step % type_size) == 0
2132 && stride > 0
2133 && exact_log2 (stride) != -1)
2135 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2136 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2137 if (vect_print_dump_info (REPORT_DR_DETAILS))
2139 fprintf (vect_dump, "Detected single element interleaving %d ",
2140 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2141 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2142 fprintf (vect_dump, " step ");
2143 print_generic_expr (vect_dump, step, TDF_SLIM);
2145 return true;
2147 if (vect_print_dump_info (REPORT_DETAILS))
2148 fprintf (vect_dump, "not consecutive access");
2149 return false;
2152 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2154 /* First stmt in the interleaving chain. Check the chain. */
2155 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2156 struct data_reference *data_ref = dr;
2157 unsigned int count = 1;
2158 tree next_step;
2159 tree prev_init = DR_INIT (data_ref);
2160 tree prev = stmt;
2161 HOST_WIDE_INT diff, count_in_bytes;
2163 while (next)
2165 /* Skip same data-refs. In case that two or more stmts share data-ref
2166 (supported only for loads), we vectorize only the first stmt, and
2167 the rest get their vectorized loads from the first one. */
2168 if (!tree_int_cst_compare (DR_INIT (data_ref),
2169 DR_INIT (STMT_VINFO_DATA_REF (
2170 vinfo_for_stmt (next)))))
2172 if (!DR_IS_READ (data_ref))
2174 if (vect_print_dump_info (REPORT_DETAILS))
2175 fprintf (vect_dump, "Two store stmts share the same dr.");
2176 return false;
2179 /* Check that there is no load-store dependencies for this loads
2180 to prevent a case of load-store-load to the same location. */
2181 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2182 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2184 if (vect_print_dump_info (REPORT_DETAILS))
2185 fprintf (vect_dump,
2186 "READ_WRITE dependence in interleaving.");
2187 return false;
2190 /* For load use the same data-ref load. */
2191 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2193 prev = next;
2194 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2195 continue;
2197 prev = next;
2199 /* Check that all the accesses have the same STEP. */
2200 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2201 if (tree_int_cst_compare (step, next_step))
2203 if (vect_print_dump_info (REPORT_DETAILS))
2204 fprintf (vect_dump, "not consecutive access in interleaving");
2205 return false;
2208 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2209 /* Check that the distance between two accesses is equal to the type
2210 size. Otherwise, we have gaps. */
2211 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2212 - TREE_INT_CST_LOW (prev_init)) / type_size;
2213 if (diff != 1)
2215 /* FORNOW: SLP of accesses with gaps is not supported. */
2216 slp_impossible = true;
2217 if (!DR_IS_READ (data_ref))
2219 if (vect_print_dump_info (REPORT_DETAILS))
2220 fprintf (vect_dump, "interleaved store with gaps");
2221 return false;
2225 /* Store the gap from the previous member of the group. If there is no
2226 gap in the access, DR_GROUP_GAP is always 1. */
2227 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2229 prev_init = DR_INIT (data_ref);
2230 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2231 /* Count the number of data-refs in the chain. */
2232 count++;
2235 /* COUNT is the number of accesses found, we multiply it by the size of
2236 the type to get COUNT_IN_BYTES. */
2237 count_in_bytes = type_size * count;
2239 /* Check that the size of the interleaving is not greater than STEP. */
2240 if (dr_step < count_in_bytes)
2242 if (vect_print_dump_info (REPORT_DETAILS))
2244 fprintf (vect_dump, "interleaving size is greater than step for ");
2245 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2247 return false;
2250 /* Check that the size of the interleaving is equal to STEP for stores,
2251 i.e., that there are no gaps. */
2252 if (dr_step != count_in_bytes)
2254 if (DR_IS_READ (dr))
2256 slp_impossible = true;
2257 /* There is a gap after the last load in the group. This gap is a
2258 difference between the stride and the number of elements. When
2259 there is no gap, this difference should be 0. */
2260 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2262 else
2264 if (vect_print_dump_info (REPORT_DETAILS))
2265 fprintf (vect_dump, "interleaved store with gaps");
2266 return false;
2270 /* Check that STEP is a multiple of type size. */
2271 if ((dr_step % type_size) != 0)
2273 if (vect_print_dump_info (REPORT_DETAILS))
2275 fprintf (vect_dump, "step is not a multiple of type size: step ");
2276 print_generic_expr (vect_dump, step, TDF_SLIM);
2277 fprintf (vect_dump, " size ");
2278 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2279 TDF_SLIM);
2281 return false;
2284 /* FORNOW: we handle only interleaving that is a power of 2.
2285 We don't fail here if it may be still possible to vectorize the
2286 group using SLP. If not, the size of the group will be checked in
2287 vect_analyze_operations, and the vectorization will fail. */
2288 if (exact_log2 (stride) == -1)
2290 if (vect_print_dump_info (REPORT_DETAILS))
2291 fprintf (vect_dump, "interleaving is not a power of 2");
2293 if (slp_impossible)
2294 return false;
2296 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2297 if (vect_print_dump_info (REPORT_DETAILS))
2298 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2300 /* SLP: create an SLP data structure for every interleaving group of
2301 stores for further analysis in vect_analyse_slp. */
2302 if (!DR_IS_READ (dr) && !slp_impossible)
2303 VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2306 return true;
2310 /* Analyze the access pattern of the data-reference DR.
2311 In case of non-consecutive accesses call vect_analyze_group_access() to
2312 analyze groups of strided accesses. */
2314 static bool
2315 vect_analyze_data_ref_access (struct data_reference *dr)
2317 tree step = DR_STEP (dr);
2318 tree scalar_type = TREE_TYPE (DR_REF (dr));
2319 tree stmt = DR_STMT (dr);
2320 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2321 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2322 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2323 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2325 if (!step)
2327 if (vect_print_dump_info (REPORT_DETAILS))
2328 fprintf (vect_dump, "bad data-ref access");
2329 return false;
2332 /* Don't allow invariant accesses. */
2333 if (dr_step == 0)
2334 return false;
2336 if (nested_in_vect_loop_p (loop, stmt))
2338 /* Interleaved accesses are not yet supported within outer-loop
2339 vectorization for references in the inner-loop. */
2340 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2342 /* For the rest of the analysis we use the outer-loop step. */
2343 step = STMT_VINFO_DR_STEP (stmt_info);
2344 dr_step = TREE_INT_CST_LOW (step);
2346 if (dr_step == 0)
2348 if (vect_print_dump_info (REPORT_ALIGNMENT))
2349 fprintf (vect_dump, "zero step in outer loop.");
2350 if (DR_IS_READ (dr))
2351 return true;
2352 else
2353 return false;
2357 /* Consecutive? */
2358 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2360 /* Mark that it is not interleaving. */
2361 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2362 return true;
2365 if (nested_in_vect_loop_p (loop, stmt))
2367 if (vect_print_dump_info (REPORT_ALIGNMENT))
2368 fprintf (vect_dump, "strided access in outer loop.");
2369 return false;
2372 /* Not consecutive access - check if it's a part of interleaving group. */
2373 return vect_analyze_group_access (dr);
2377 /* Function vect_analyze_data_ref_accesses.
2379 Analyze the access pattern of all the data references in the loop.
2381 FORNOW: the only access pattern that is considered vectorizable is a
2382 simple step 1 (consecutive) access.
2384 FORNOW: handle only arrays and pointer accesses. */
2386 static bool
2387 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2389 unsigned int i;
2390 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2391 struct data_reference *dr;
2393 if (vect_print_dump_info (REPORT_DETAILS))
2394 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2396 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2397 if (!vect_analyze_data_ref_access (dr))
2399 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2400 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2401 return false;
2404 return true;
2407 /* Function vect_prune_runtime_alias_test_list.
2409 Prune a list of ddrs to be tested at run-time by versioning for alias.
2410 Return FALSE if resulting list of ddrs is longer then allowed by
2411 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2413 static bool
2414 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2416 VEC (ddr_p, heap) * ddrs =
2417 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2418 unsigned i, j;
2420 if (vect_print_dump_info (REPORT_DETAILS))
2421 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2423 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2425 bool found;
2426 ddr_p ddr_i;
2428 ddr_i = VEC_index (ddr_p, ddrs, i);
2429 found = false;
2431 for (j = 0; j < i; j++)
2433 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2435 if (vect_vfa_range_equal (ddr_i, ddr_j))
2437 if (vect_print_dump_info (REPORT_DR_DETAILS))
2439 fprintf (vect_dump, "found equal ranges ");
2440 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2441 fprintf (vect_dump, ", ");
2442 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2443 fprintf (vect_dump, " and ");
2444 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2445 fprintf (vect_dump, ", ");
2446 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2448 found = true;
2449 break;
2453 if (found)
2455 VEC_ordered_remove (ddr_p, ddrs, i);
2456 continue;
2458 i++;
2461 if (VEC_length (ddr_p, ddrs) >
2462 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2464 if (vect_print_dump_info (REPORT_DR_DETAILS))
2466 fprintf (vect_dump,
2467 "disable versioning for alias - max number of generated "
2468 "checks exceeded.");
2471 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2473 return false;
2476 return true;
2479 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2481 void
2482 vect_free_slp_tree (slp_tree node)
2484 if (!node)
2485 return;
2487 if (SLP_TREE_LEFT (node))
2488 vect_free_slp_tree (SLP_TREE_LEFT (node));
2490 if (SLP_TREE_RIGHT (node))
2491 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2493 VEC_free (tree, heap, SLP_TREE_SCALAR_STMTS (node));
2495 if (SLP_TREE_VEC_STMTS (node))
2496 VEC_free (tree, heap, SLP_TREE_VEC_STMTS (node));
2498 free (node);
2502 /* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are
2503 of a legal type and that they match the defs of the first stmt of the SLP
2504 group (stored in FIRST_STMT_...). */
2506 static bool
2507 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2508 tree rhs, VEC (tree, heap) **def_stmts0,
2509 VEC (tree, heap) **def_stmts1,
2510 enum vect_def_type *first_stmt_dt0,
2511 enum vect_def_type *first_stmt_dt1,
2512 tree *first_stmt_def0_type,
2513 tree *first_stmt_def1_type,
2514 tree *first_stmt_const_oprnd,
2515 int ncopies_for_cost)
2517 tree oprnd;
2518 enum operation_type op_type = TREE_OPERAND_LENGTH (rhs);
2519 unsigned int i, number_of_oprnds = op_type;
2520 tree def, def_stmt;
2521 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2522 stmt_vec_info stmt_info =
2523 vinfo_for_stmt (VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2525 /* Store. */
2526 if (!op_type)
2527 number_of_oprnds = 1;
2528 else
2529 gcc_assert (op_type == unary_op || op_type == binary_op);
2531 for (i = 0; i < number_of_oprnds; i++)
2533 if (op_type)
2534 oprnd = TREE_OPERAND (rhs, i);
2535 else
2536 oprnd = rhs;
2538 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2539 || (!def_stmt && dt[i] != vect_constant_def))
2541 if (vect_print_dump_info (REPORT_SLP))
2543 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2544 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2547 return false;
2550 if (!*first_stmt_dt0)
2552 /* op0 of the first stmt of the group - store its info. */
2553 *first_stmt_dt0 = dt[i];
2554 if (def)
2555 *first_stmt_def0_type = TREE_TYPE (def);
2556 else
2557 *first_stmt_const_oprnd = oprnd;
2559 /* Analyze costs (for the first stmt of the group only). */
2560 if (op_type)
2561 /* Not memory operation (we don't call this functions for loads). */
2562 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2563 else
2564 /* Store. */
2565 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2568 else
2570 if (!*first_stmt_dt1 && i == 1)
2572 /* op1 of the first stmt of the group - store its info. */
2573 *first_stmt_dt1 = dt[i];
2574 if (def)
2575 *first_stmt_def1_type = TREE_TYPE (def);
2576 else
2578 /* We assume that the stmt contains only one constant
2579 operand. We fail otherwise, to be on the safe side. */
2580 if (*first_stmt_const_oprnd)
2582 if (vect_print_dump_info (REPORT_SLP))
2583 fprintf (vect_dump, "Build SLP failed: two constant "
2584 "oprnds in stmt");
2585 return false;
2587 *first_stmt_const_oprnd = oprnd;
2590 else
2592 /* Not first stmt of the group, check that the def-stmt/s match
2593 the def-stmt/s of the first stmt. */
2594 if ((i == 0
2595 && (*first_stmt_dt0 != dt[i]
2596 || (*first_stmt_def0_type && def
2597 && *first_stmt_def0_type != TREE_TYPE (def))))
2598 || (i == 1
2599 && (*first_stmt_dt1 != dt[i]
2600 || (*first_stmt_def1_type && def
2601 && *first_stmt_def1_type != TREE_TYPE (def))))
2602 || (!def
2603 && TREE_TYPE (*first_stmt_const_oprnd)
2604 != TREE_TYPE (oprnd)))
2606 if (vect_print_dump_info (REPORT_SLP))
2607 fprintf (vect_dump, "Build SLP failed: different types ");
2609 return false;
2614 /* Check the types of the definitions. */
2615 switch (dt[i])
2617 case vect_constant_def:
2618 case vect_invariant_def:
2619 break;
2621 case vect_loop_def:
2622 if (i == 0)
2623 VEC_safe_push (tree, heap, *def_stmts0, def_stmt);
2624 else
2625 VEC_safe_push (tree, heap, *def_stmts1, def_stmt);
2626 break;
2628 default:
2629 /* FORNOW: Not supported. */
2630 if (vect_print_dump_info (REPORT_SLP))
2632 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2633 print_generic_expr (vect_dump, def, TDF_SLIM);
2636 return false;
2640 return true;
2644 /* Recursively build an SLP tree starting from NODE.
2645 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2646 permutation or are of unsupported types of operation. Otherwise, return
2647 TRUE.
2648 SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2649 in the case of multiple types for now. */
2651 static bool
2652 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2653 unsigned int group_size, bool *slp_impossible,
2654 int *inside_cost, int *outside_cost,
2655 int ncopies_for_cost)
2657 VEC (tree, heap) *def_stmts0 = VEC_alloc (tree, heap, group_size);
2658 VEC (tree, heap) *def_stmts1 = VEC_alloc (tree, heap, group_size);
2659 unsigned int i;
2660 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2661 tree stmt = VEC_index (tree, stmts, 0);
2662 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2663 enum tree_code first_stmt_code = 0;
2664 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2665 tree lhs, rhs, prev_stmt = NULL_TREE;
2666 bool stop_recursion = false, need_same_oprnds = false;
2667 tree vectype, scalar_type, first_op1 = NULL_TREE;
2668 unsigned int vectorization_factor = 0, ncopies;
2669 optab optab;
2670 int icode;
2671 enum machine_mode optab_op2_mode;
2672 enum machine_mode vec_mode;
2673 tree first_stmt_const_oprnd = NULL_TREE;
2674 struct data_reference *first_dr;
2676 /* For every stmt in NODE find its def stmt/s. */
2677 for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
2679 if (vect_print_dump_info (REPORT_SLP))
2681 fprintf (vect_dump, "Build SLP for ");
2682 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2685 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2687 if (vect_print_dump_info (REPORT_SLP))
2689 fprintf (vect_dump, "Build SLP failed: not MODIFY_STMT ");
2690 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2693 return false;
2696 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2697 vectype = get_vectype_for_scalar_type (scalar_type);
2698 if (!vectype)
2700 if (vect_print_dump_info (REPORT_SLP))
2702 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
2703 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2705 return false;
2708 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2709 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2710 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2711 if (ncopies > 1)
2713 /* FORNOW. */
2714 if (vect_print_dump_info (REPORT_SLP))
2715 fprintf (vect_dump, "SLP failed - multiple types ");
2717 *slp_impossible = true;
2718 return false;
2721 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
2722 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2724 /* Check the operation. */
2725 if (i == 0)
2727 first_stmt_code = TREE_CODE (rhs);
2729 /* Shift arguments should be equal in all the packed stmts for a
2730 vector shift with scalar shift operand. */
2731 if (TREE_CODE (rhs) == LSHIFT_EXPR || TREE_CODE (rhs) == RSHIFT_EXPR)
2733 vec_mode = TYPE_MODE (vectype);
2734 optab = optab_for_tree_code (TREE_CODE (rhs), vectype);
2735 if (!optab)
2737 if (vect_print_dump_info (REPORT_SLP))
2738 fprintf (vect_dump, "Build SLP failed: no optab.");
2739 return false;
2741 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2742 if (icode == CODE_FOR_nothing)
2744 if (vect_print_dump_info (REPORT_SLP))
2745 fprintf (vect_dump,
2746 "Build SLP failed: op not supported by target.");
2747 return false;
2749 optab_op2_mode = insn_data[icode].operand[2].mode;
2750 if (!VECTOR_MODE_P (optab_op2_mode))
2752 need_same_oprnds = true;
2753 first_op1 = TREE_OPERAND (rhs, 1);
2757 else
2759 if (first_stmt_code != TREE_CODE (rhs))
2761 if (vect_print_dump_info (REPORT_SLP))
2763 fprintf (vect_dump,
2764 "Build SLP failed: different operation in stmt ");
2765 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2768 return false;
2771 if (need_same_oprnds
2772 && !operand_equal_p (first_op1, TREE_OPERAND (rhs, 1), 0))
2774 if (vect_print_dump_info (REPORT_SLP))
2776 fprintf (vect_dump,
2777 "Build SLP failed: different shift arguments in ");
2778 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2781 return false;
2785 /* Strided store or load. */
2786 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2788 if (REFERENCE_CLASS_P (lhs))
2790 /* Store. */
2791 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs,
2792 &def_stmts0, &def_stmts1,
2793 &first_stmt_dt0,
2794 &first_stmt_dt1,
2795 &first_stmt_def0_type,
2796 &first_stmt_def1_type,
2797 &first_stmt_const_oprnd,
2798 ncopies_for_cost))
2799 return false;
2801 else
2803 /* Load. */
2804 if (i == 0)
2806 /* First stmt of the SLP group should be the first load of
2807 the interleaving loop if data permutation is not allowed.
2808 Check that there is no gap between the loads. */
2809 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
2810 || DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
2812 /* FORNOW: data permutations and gaps in loads are not
2813 supported. */
2814 if (vect_print_dump_info (REPORT_SLP))
2816 fprintf (vect_dump, "Build SLP failed: strided "
2817 " loads need permutation or have gaps ");
2818 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2821 return false;
2824 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2825 if (vect_supportable_dr_alignment (first_dr)
2826 == dr_unaligned_unsupported)
2828 if (vect_print_dump_info (REPORT_SLP))
2830 fprintf (vect_dump, "Build SLP failed: unsupported "
2831 " unaligned load ");
2832 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2835 return false;
2838 /* Analyze costs (for the first stmt in the group). */
2839 vect_model_load_cost (vinfo_for_stmt (stmt),
2840 ncopies_for_cost, *node);
2842 else
2844 /* Check that we have consecutive loads from interleaving
2845 chain and that there is no gap between the loads. */
2846 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt
2847 || DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1)
2849 /* FORNOW: data permutations and gaps in loads are not
2850 supported. */
2851 if (vect_print_dump_info (REPORT_SLP))
2853 fprintf (vect_dump, "Build SLP failed: strided "
2854 " loads need permutation or have gaps ");
2855 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2857 return false;
2861 prev_stmt = stmt;
2863 /* We stop the tree when we reach a group of loads. */
2864 stop_recursion = true;
2865 continue;
2867 } /* Strided access. */
2868 else
2870 if (REFERENCE_CLASS_P (rhs))
2872 /* Not strided load. */
2873 if (vect_print_dump_info (REPORT_SLP))
2875 fprintf (vect_dump, "Build SLP failed: not strided load ");
2876 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2879 /* FORNOW: Not strided loads are not supported. */
2880 return false;
2883 /* Not memory operation. */
2884 if (!BINARY_CLASS_P (rhs) && !UNARY_CLASS_P (rhs))
2886 if (vect_print_dump_info (REPORT_SLP))
2888 fprintf (vect_dump, "Build SLP failed: operation");
2889 fprintf (vect_dump, " unsupported ");
2890 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2893 return false;
2896 /* Find the def-stmts. */
2897 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, &def_stmts0,
2898 &def_stmts1, &first_stmt_dt0,
2899 &first_stmt_dt1,
2900 &first_stmt_def0_type,
2901 &first_stmt_def1_type,
2902 &first_stmt_const_oprnd,
2903 ncopies_for_cost))
2904 return false;
2908 /* Add the costs of the node to the overall instance costs. */
2909 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
2910 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
2912 /* Strided loads were reached - stop the recursion. */
2913 if (stop_recursion)
2914 return true;
2916 /* Create SLP_TREE nodes for the definition node/s. */
2917 if (first_stmt_dt0 == vect_loop_def)
2919 slp_tree left_node = XNEW (struct _slp_tree);
2920 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
2921 SLP_TREE_VEC_STMTS (left_node) = NULL;
2922 SLP_TREE_LEFT (left_node) = NULL;
2923 SLP_TREE_RIGHT (left_node) = NULL;
2924 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
2925 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
2926 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
2927 slp_impossible, inside_cost, outside_cost,
2928 ncopies_for_cost))
2929 return false;
2931 SLP_TREE_LEFT (*node) = left_node;
2934 if (first_stmt_dt1 == vect_loop_def)
2936 slp_tree right_node = XNEW (struct _slp_tree);
2937 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
2938 SLP_TREE_VEC_STMTS (right_node) = NULL;
2939 SLP_TREE_LEFT (right_node) = NULL;
2940 SLP_TREE_RIGHT (right_node) = NULL;
2941 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
2942 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
2943 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
2944 slp_impossible, inside_cost, outside_cost,
2945 ncopies_for_cost))
2946 return false;
2948 SLP_TREE_RIGHT (*node) = right_node;
2951 return true;
2955 static void
2956 vect_print_slp_tree (slp_tree node)
2958 int i;
2959 tree stmt;
2961 if (!node)
2962 return;
2964 fprintf (vect_dump, "node ");
2965 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2967 fprintf (vect_dump, "\n\tstmt %d ", i);
2968 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2970 fprintf (vect_dump, "\n");
2972 vect_print_slp_tree (SLP_TREE_LEFT (node));
2973 vect_print_slp_tree (SLP_TREE_RIGHT (node));
2977 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2978 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2979 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2980 stmts in NODE are to be marked. */
2982 static void
2983 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
2985 int i;
2986 tree stmt;
2988 if (!node)
2989 return;
2991 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2992 if (j < 0 || i == j)
2993 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
2995 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
2996 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
3000 /* Analyze an SLP instance starting from a group of strided stores. Call
3001 vect_build_slp_tree to build a tree of packed stmts if possible.
3002 Return FALSE if it's impossible to SLP any stmt in the loop. */
3004 static bool
3005 vect_analyze_slp_instance (loop_vec_info loop_vinfo, tree stmt)
3007 slp_instance new_instance;
3008 slp_tree node = XNEW (struct _slp_tree);
3009 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
3010 unsigned int unrolling_factor = 1, nunits;
3011 tree vectype, scalar_type, next;
3012 unsigned int vectorization_factor = 0, ncopies;
3013 bool slp_impossible = false;
3014 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
3016 /* FORNOW: multiple types are not supported. */
3017 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
3018 vectype = get_vectype_for_scalar_type (scalar_type);
3019 if (!vectype)
3021 if (vect_print_dump_info (REPORT_SLP))
3023 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
3024 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3026 return false;
3029 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3030 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3031 ncopies = vectorization_factor / nunits;
3032 if (ncopies > 1)
3034 if (vect_print_dump_info (REPORT_SLP))
3035 fprintf (vect_dump, "SLP failed - multiple types ");
3037 return false;
3040 /* Create a node (a root of the SLP tree) for the packed strided stores. */
3041 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (tree, heap, group_size);
3042 next = stmt;
3043 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
3044 while (next)
3046 VEC_safe_push (tree, heap, SLP_TREE_SCALAR_STMTS (node), next);
3047 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
3050 SLP_TREE_VEC_STMTS (node) = NULL;
3051 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
3052 SLP_TREE_LEFT (node) = NULL;
3053 SLP_TREE_RIGHT (node) = NULL;
3054 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
3055 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
3057 /* Calculate the unrolling factor. */
3058 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
3060 /* Calculate the number of vector stmts to create based on the unrolling
3061 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
3062 GROUP_SIZE / NUNITS otherwise. */
3063 ncopies_for_cost = unrolling_factor * group_size / nunits;
3065 /* Build the tree for the SLP instance. */
3066 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
3067 &inside_cost, &outside_cost, ncopies_for_cost))
3069 /* Create a new SLP instance. */
3070 new_instance = XNEW (struct _slp_instance);
3071 SLP_INSTANCE_TREE (new_instance) = node;
3072 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
3073 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
3074 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
3075 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
3076 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
3077 new_instance);
3078 if (vect_print_dump_info (REPORT_SLP))
3079 vect_print_slp_tree (node);
3081 return true;
3084 /* Failed to SLP. */
3085 /* Free the allocated memory. */
3086 vect_free_slp_tree (node);
3088 if (slp_impossible)
3089 return false;
3091 /* SLP failed for this instance, but it is still possible to SLP other stmts
3092 in the loop. */
3093 return true;
3097 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3098 trees of packed scalar stmts if SLP is possible. */
3100 static bool
3101 vect_analyze_slp (loop_vec_info loop_vinfo)
3103 unsigned int i;
3104 VEC (tree, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
3105 tree store;
3107 if (vect_print_dump_info (REPORT_SLP))
3108 fprintf (vect_dump, "=== vect_analyze_slp ===");
3110 for (i = 0; VEC_iterate (tree, strided_stores, i, store); i++)
3111 if (!vect_analyze_slp_instance (loop_vinfo, store))
3113 /* SLP failed. No instance can be SLPed in the loop. */
3114 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3115 fprintf (vect_dump, "SLP failed.");
3117 return false;
3120 return true;
3124 /* For each possible SLP instance decide whether to SLP it and calculate overall
3125 unrolling factor needed to SLP the loop. */
3127 static void
3128 vect_make_slp_decision (loop_vec_info loop_vinfo)
3130 unsigned int i, unrolling_factor = 1;
3131 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3132 slp_instance instance;
3133 int decided_to_slp = 0;
3135 if (vect_print_dump_info (REPORT_SLP))
3136 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3138 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3140 /* FORNOW: SLP if you can. */
3141 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3142 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3144 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3145 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3146 loop-based vectorization. Such stmts will be marked as HYBRID. */
3147 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3148 decided_to_slp++;
3151 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3153 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3154 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3155 decided_to_slp, unrolling_factor);
3159 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3160 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3162 static void
3163 vect_detect_hybrid_slp_stmts (slp_tree node)
3165 int i;
3166 tree stmt;
3167 imm_use_iterator imm_iter;
3168 tree use_stmt;
3170 if (!node)
3171 return;
3173 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3174 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3175 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
3176 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
3177 if (vinfo_for_stmt (use_stmt)
3178 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
3179 vect_mark_slp_stmts (node, hybrid, i);
3181 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3182 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3186 /* Find stmts that must be both vectorized and SLPed. */
3188 static void
3189 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3191 unsigned int i;
3192 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3193 slp_instance instance;
3195 if (vect_print_dump_info (REPORT_SLP))
3196 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3198 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3199 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3203 /* Function vect_analyze_data_refs.
3205 Find all the data references in the loop.
3207 The general structure of the analysis of data refs in the vectorizer is as
3208 follows:
3209 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3210 find and analyze all data-refs in the loop and their dependences.
3211 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3212 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3213 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3217 static bool
3218 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3220 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3221 unsigned int i;
3222 VEC (data_reference_p, heap) *datarefs;
3223 struct data_reference *dr;
3224 tree scalar_type;
3226 if (vect_print_dump_info (REPORT_DETAILS))
3227 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3229 compute_data_dependences_for_loop (loop, true,
3230 &LOOP_VINFO_DATAREFS (loop_vinfo),
3231 &LOOP_VINFO_DDRS (loop_vinfo));
3233 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3234 from stmt_vec_info struct to DR and vectype. */
3235 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3237 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3239 tree stmt;
3240 stmt_vec_info stmt_info;
3241 basic_block bb;
3242 tree base, offset, init;
3244 if (!dr || !DR_REF (dr))
3246 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3247 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3248 return false;
3251 stmt = DR_STMT (dr);
3252 stmt_info = vinfo_for_stmt (stmt);
3254 /* Check that analysis of the data-ref succeeded. */
3255 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3256 || !DR_STEP (dr))
3258 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3260 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3261 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3263 return false;
3266 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3268 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3269 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3270 "constant");
3271 return false;
3274 if (!DR_SYMBOL_TAG (dr))
3276 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3278 fprintf (vect_dump, "not vectorized: no memory tag for ");
3279 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3281 return false;
3284 base = unshare_expr (DR_BASE_ADDRESS (dr));
3285 offset = unshare_expr (DR_OFFSET (dr));
3286 init = unshare_expr (DR_INIT (dr));
3288 /* Update DR field in stmt_vec_info struct. */
3289 bb = bb_for_stmt (stmt);
3291 /* If the dataref is in an inner-loop of the loop that is considered for
3292 for vectorization, we also want to analyze the access relative to
3293 the outer-loop (DR contains information only relative to the
3294 inner-most enclosing loop). We do that by building a reference to the
3295 first location accessed by the inner-loop, and analyze it relative to
3296 the outer-loop. */
3297 if (nested_in_vect_loop_p (loop, stmt))
3299 tree outer_step, outer_base, outer_init;
3300 HOST_WIDE_INT pbitsize, pbitpos;
3301 tree poffset;
3302 enum machine_mode pmode;
3303 int punsignedp, pvolatilep;
3304 affine_iv base_iv, offset_iv;
3305 tree dinit;
3307 /* Build a reference to the first location accessed by the
3308 inner-loop: *(BASE+INIT). (The first location is actually
3309 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3310 tree inner_base = build_fold_indirect_ref
3311 (fold_build2 (POINTER_PLUS_EXPR,
3312 TREE_TYPE (base), base,
3313 fold_convert (sizetype, init)));
3315 if (vect_print_dump_info (REPORT_DETAILS))
3317 fprintf (dump_file, "analyze in outer-loop: ");
3318 print_generic_expr (dump_file, inner_base, TDF_SLIM);
3321 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3322 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3323 gcc_assert (outer_base != NULL_TREE);
3325 if (pbitpos % BITS_PER_UNIT != 0)
3327 if (vect_print_dump_info (REPORT_DETAILS))
3328 fprintf (dump_file, "failed: bit offset alignment.\n");
3329 return false;
3332 outer_base = build_fold_addr_expr (outer_base);
3333 if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
3335 if (vect_print_dump_info (REPORT_DETAILS))
3336 fprintf (dump_file, "failed: evolution of base is not affine.\n");
3337 return false;
3340 if (offset)
3342 if (poffset)
3343 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3344 else
3345 poffset = offset;
3348 if (!poffset)
3350 offset_iv.base = ssize_int (0);
3351 offset_iv.step = ssize_int (0);
3353 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
3355 if (vect_print_dump_info (REPORT_DETAILS))
3356 fprintf (dump_file, "evolution of offset is not affine.\n");
3357 return false;
3360 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3361 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3362 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3363 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3364 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3366 outer_step = size_binop (PLUS_EXPR,
3367 fold_convert (ssizetype, base_iv.step),
3368 fold_convert (ssizetype, offset_iv.step));
3370 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3371 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3372 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3373 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3374 STMT_VINFO_DR_OFFSET (stmt_info) =
3375 fold_convert (ssizetype, offset_iv.base);
3376 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3377 size_int (highest_pow2_factor (offset_iv.base));
3379 if (dump_file && (dump_flags & TDF_DETAILS))
3381 fprintf (dump_file, "\touter base_address: ");
3382 print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3383 fprintf (dump_file, "\n\touter offset from base address: ");
3384 print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3385 fprintf (dump_file, "\n\touter constant offset from base address: ");
3386 print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3387 fprintf (dump_file, "\n\touter step: ");
3388 print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3389 fprintf (dump_file, "\n\touter aligned to: ");
3390 print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3394 if (STMT_VINFO_DATA_REF (stmt_info))
3396 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3398 fprintf (vect_dump,
3399 "not vectorized: more than one data ref in stmt: ");
3400 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3402 return false;
3404 STMT_VINFO_DATA_REF (stmt_info) = dr;
3406 /* Set vectype for STMT. */
3407 scalar_type = TREE_TYPE (DR_REF (dr));
3408 STMT_VINFO_VECTYPE (stmt_info) =
3409 get_vectype_for_scalar_type (scalar_type);
3410 if (!STMT_VINFO_VECTYPE (stmt_info))
3412 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3414 fprintf (vect_dump,
3415 "not vectorized: no vectype for stmt: ");
3416 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3417 fprintf (vect_dump, " scalar_type: ");
3418 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3420 return false;
3424 return true;
3428 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3430 /* Function vect_mark_relevant.
3432 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3434 static void
3435 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
3436 enum vect_relevant relevant, bool live_p)
3438 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3439 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3440 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3442 if (vect_print_dump_info (REPORT_DETAILS))
3443 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3445 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3447 tree pattern_stmt;
3449 /* This is the last stmt in a sequence that was detected as a
3450 pattern that can potentially be vectorized. Don't mark the stmt
3451 as relevant/live because it's not going to be vectorized.
3452 Instead mark the pattern-stmt that replaces it. */
3454 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3456 if (vect_print_dump_info (REPORT_DETAILS))
3457 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3458 stmt_info = vinfo_for_stmt (pattern_stmt);
3459 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3460 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3461 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3462 stmt = pattern_stmt;
3465 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3466 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3467 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3469 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3470 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3472 if (vect_print_dump_info (REPORT_DETAILS))
3473 fprintf (vect_dump, "already marked relevant/live.");
3474 return;
3477 VEC_safe_push (tree, heap, *worklist, stmt);
3481 /* Function vect_stmt_relevant_p.
3483 Return true if STMT in loop that is represented by LOOP_VINFO is
3484 "relevant for vectorization".
3486 A stmt is considered "relevant for vectorization" if:
3487 - it has uses outside the loop.
3488 - it has vdefs (it alters memory).
3489 - control stmts in the loop (except for the exit condition).
3491 CHECKME: what other side effects would the vectorizer allow? */
3493 static bool
3494 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
3495 enum vect_relevant *relevant, bool *live_p)
3497 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3498 ssa_op_iter op_iter;
3499 imm_use_iterator imm_iter;
3500 use_operand_p use_p;
3501 def_operand_p def_p;
3503 *relevant = vect_unused_in_loop;
3504 *live_p = false;
3506 /* cond stmt other than loop exit cond. */
3507 if (is_ctrl_stmt (stmt)
3508 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3509 *relevant = vect_used_in_loop;
3511 /* changing memory. */
3512 if (TREE_CODE (stmt) != PHI_NODE)
3513 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3515 if (vect_print_dump_info (REPORT_DETAILS))
3516 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3517 *relevant = vect_used_in_loop;
3520 /* uses outside the loop. */
3521 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3523 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3525 basic_block bb = bb_for_stmt (USE_STMT (use_p));
3526 if (!flow_bb_inside_loop_p (loop, bb))
3528 if (vect_print_dump_info (REPORT_DETAILS))
3529 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3531 /* We expect all such uses to be in the loop exit phis
3532 (because of loop closed form) */
3533 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
3534 gcc_assert (bb == single_exit (loop)->dest);
3536 *live_p = true;
3541 return (*live_p || *relevant);
3546 Function process_use.
3548 Inputs:
3549 - a USE in STMT in a loop represented by LOOP_VINFO
3550 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3551 that defined USE. This is dont by calling mark_relevant and passing it
3552 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
3554 Outputs:
3555 Generally, LIVE_P and RELEVANT are used to define the liveness and
3556 relevance info of the DEF_STMT of this USE:
3557 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3558 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3559 Exceptions:
3560 - case 1: If USE is used only for address computations (e.g. array indexing),
3561 which does not need to be directly vectorized, then the liveness/relevance
3562 of the respective DEF_STMT is left unchanged.
3563 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3564 skip DEF_STMT cause it had already been processed.
3565 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3566 be modified accordingly.
3568 Return true if everything is as expected. Return false otherwise. */
3570 static bool
3571 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3572 enum vect_relevant relevant, VEC(tree,heap) **worklist)
3574 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3575 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3576 stmt_vec_info dstmt_vinfo;
3577 basic_block bb, def_bb;
3578 tree def, def_stmt;
3579 enum vect_def_type dt;
3581 /* case 1: we are only interested in uses that need to be vectorized. Uses
3582 that are used for address computation are not considered relevant. */
3583 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3584 return true;
3586 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3588 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3589 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3590 return false;
3593 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
3594 return true;
3596 def_bb = bb_for_stmt (def_stmt);
3597 if (!flow_bb_inside_loop_p (loop, def_bb))
3599 if (vect_print_dump_info (REPORT_DETAILS))
3600 fprintf (vect_dump, "def_stmt is out of loop.");
3601 return true;
3604 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3605 DEF_STMT must have already been processed, because this should be the
3606 only way that STMT, which is a reduction-phi, was put in the worklist,
3607 as there should be no other uses for DEF_STMT in the loop. So we just
3608 check that everything is as expected, and we are done. */
3609 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3610 bb = bb_for_stmt (stmt);
3611 if (TREE_CODE (stmt) == PHI_NODE
3612 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3613 && TREE_CODE (def_stmt) != PHI_NODE
3614 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3615 && bb->loop_father == def_bb->loop_father)
3617 if (vect_print_dump_info (REPORT_DETAILS))
3618 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3619 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3620 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3621 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3622 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3623 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3624 return true;
3627 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3628 outer-loop-header-bb:
3629 d = def_stmt
3630 inner-loop:
3631 stmt # use (d)
3632 outer-loop-tail-bb:
3633 ... */
3634 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3636 if (vect_print_dump_info (REPORT_DETAILS))
3637 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3638 switch (relevant)
3640 case vect_unused_in_loop:
3641 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3642 vect_used_by_reduction : vect_unused_in_loop;
3643 break;
3644 case vect_used_in_outer_by_reduction:
3645 relevant = vect_used_by_reduction;
3646 break;
3647 case vect_used_in_outer:
3648 relevant = vect_used_in_loop;
3649 break;
3650 case vect_used_by_reduction:
3651 case vect_used_in_loop:
3652 break;
3654 default:
3655 gcc_unreachable ();
3659 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3660 outer-loop-header-bb:
3662 inner-loop:
3663 d = def_stmt
3664 outer-loop-tail-bb:
3665 stmt # use (d) */
3666 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3668 if (vect_print_dump_info (REPORT_DETAILS))
3669 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3670 switch (relevant)
3672 case vect_unused_in_loop:
3673 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3674 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3675 break;
3677 case vect_used_in_outer_by_reduction:
3678 case vect_used_in_outer:
3679 break;
3681 case vect_used_by_reduction:
3682 relevant = vect_used_in_outer_by_reduction;
3683 break;
3685 case vect_used_in_loop:
3686 relevant = vect_used_in_outer;
3687 break;
3689 default:
3690 gcc_unreachable ();
3694 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
3695 return true;
3699 /* Function vect_mark_stmts_to_be_vectorized.
3701 Not all stmts in the loop need to be vectorized. For example:
3703 for i...
3704 for j...
3705 1. T0 = i + j
3706 2. T1 = a[T0]
3708 3. j = j + 1
3710 Stmt 1 and 3 do not need to be vectorized, because loop control and
3711 addressing of vectorized data-refs are handled differently.
3713 This pass detects such stmts. */
3715 static bool
3716 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3718 VEC(tree,heap) *worklist;
3719 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3720 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3721 unsigned int nbbs = loop->num_nodes;
3722 block_stmt_iterator si;
3723 tree stmt;
3724 stmt_ann_t ann;
3725 unsigned int i;
3726 stmt_vec_info stmt_vinfo;
3727 basic_block bb;
3728 tree phi;
3729 bool live_p;
3730 enum vect_relevant relevant;
3732 if (vect_print_dump_info (REPORT_DETAILS))
3733 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
3735 worklist = VEC_alloc (tree, heap, 64);
3737 /* 1. Init worklist. */
3738 for (i = 0; i < nbbs; i++)
3740 bb = bbs[i];
3741 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3743 if (vect_print_dump_info (REPORT_DETAILS))
3745 fprintf (vect_dump, "init: phi relevant? ");
3746 print_generic_expr (vect_dump, phi, TDF_SLIM);
3749 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
3750 vect_mark_relevant (&worklist, phi, relevant, live_p);
3752 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3754 stmt = bsi_stmt (si);
3755 if (vect_print_dump_info (REPORT_DETAILS))
3757 fprintf (vect_dump, "init: stmt relevant? ");
3758 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3761 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
3762 vect_mark_relevant (&worklist, stmt, relevant, live_p);
3766 /* 2. Process_worklist */
3767 while (VEC_length (tree, worklist) > 0)
3769 use_operand_p use_p;
3770 ssa_op_iter iter;
3772 stmt = VEC_pop (tree, worklist);
3773 if (vect_print_dump_info (REPORT_DETAILS))
3775 fprintf (vect_dump, "worklist: examine stmt: ");
3776 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3779 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3780 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3781 liveness and relevance properties of STMT. */
3782 ann = stmt_ann (stmt);
3783 stmt_vinfo = vinfo_for_stmt (stmt);
3784 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
3785 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
3787 /* Generally, the liveness and relevance properties of STMT are
3788 propagated as is to the DEF_STMTs of its USEs:
3789 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3790 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3792 One exception is when STMT has been identified as defining a reduction
3793 variable; in this case we set the liveness/relevance as follows:
3794 live_p = false
3795 relevant = vect_used_by_reduction
3796 This is because we distinguish between two kinds of relevant stmts -
3797 those that are used by a reduction computation, and those that are
3798 (also) used by a regular computation. This allows us later on to
3799 identify stmts that are used solely by a reduction, and therefore the
3800 order of the results that they produce does not have to be kept.
3802 Reduction phis are expected to be used by a reduction stmt, or by
3803 in an outer loop; Other reduction stmts are expected to be
3804 in the loop, and possibly used by a stmt in an outer loop.
3805 Here are the expected values of "relevant" for reduction phis/stmts:
3807 relevance: phi stmt
3808 vect_unused_in_loop ok
3809 vect_used_in_outer_by_reduction ok ok
3810 vect_used_in_outer ok ok
3811 vect_used_by_reduction ok
3812 vect_used_in_loop */
3814 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
3816 enum vect_relevant tmp_relevant = relevant;
3817 switch (tmp_relevant)
3819 case vect_unused_in_loop:
3820 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
3821 relevant = vect_used_by_reduction;
3822 break;
3824 case vect_used_in_outer_by_reduction:
3825 case vect_used_in_outer:
3826 gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR
3827 && TREE_CODE (stmt) != DOT_PROD_EXPR);
3828 break;
3830 case vect_used_by_reduction:
3831 if (TREE_CODE (stmt) == PHI_NODE)
3832 break;
3833 /* fall through */
3834 case vect_used_in_loop:
3835 default:
3836 if (vect_print_dump_info (REPORT_DETAILS))
3837 fprintf (vect_dump, "unsupported use of reduction.");
3838 VEC_free (tree, heap, worklist);
3839 return false;
3841 live_p = false;
3844 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3846 tree op = USE_FROM_PTR (use_p);
3847 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
3849 VEC_free (tree, heap, worklist);
3850 return false;
3853 } /* while worklist */
3855 VEC_free (tree, heap, worklist);
3856 return true;
3860 /* Function vect_can_advance_ivs_p
3862 In case the number of iterations that LOOP iterates is unknown at compile
3863 time, an epilog loop will be generated, and the loop induction variables
3864 (IVs) will be "advanced" to the value they are supposed to take just before
3865 the epilog loop. Here we check that the access function of the loop IVs
3866 and the expression that represents the loop bound are simple enough.
3867 These restrictions will be relaxed in the future. */
3869 static bool
3870 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
3872 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3873 basic_block bb = loop->header;
3874 tree phi;
3876 /* Analyze phi functions of the loop header. */
3878 if (vect_print_dump_info (REPORT_DETAILS))
3879 fprintf (vect_dump, "vect_can_advance_ivs_p:");
3881 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3883 tree access_fn = NULL;
3884 tree evolution_part;
3886 if (vect_print_dump_info (REPORT_DETAILS))
3888 fprintf (vect_dump, "Analyze phi: ");
3889 print_generic_expr (vect_dump, phi, TDF_SLIM);
3892 /* Skip virtual phi's. The data dependences that are associated with
3893 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3895 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3897 if (vect_print_dump_info (REPORT_DETAILS))
3898 fprintf (vect_dump, "virtual phi. skip.");
3899 continue;
3902 /* Skip reduction phis. */
3904 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3906 if (vect_print_dump_info (REPORT_DETAILS))
3907 fprintf (vect_dump, "reduc phi. skip.");
3908 continue;
3911 /* Analyze the evolution function. */
3913 access_fn = instantiate_parameters
3914 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
3916 if (!access_fn)
3918 if (vect_print_dump_info (REPORT_DETAILS))
3919 fprintf (vect_dump, "No Access function.");
3920 return false;
3923 if (vect_print_dump_info (REPORT_DETAILS))
3925 fprintf (vect_dump, "Access function of PHI: ");
3926 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
3929 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
3931 if (evolution_part == NULL_TREE)
3933 if (vect_print_dump_info (REPORT_DETAILS))
3934 fprintf (vect_dump, "No evolution.");
3935 return false;
3938 /* FORNOW: We do not transform initial conditions of IVs
3939 which evolution functions are a polynomial of degree >= 2. */
3941 if (tree_is_chrec (evolution_part))
3942 return false;
3945 return true;
3949 /* Function vect_get_loop_niters.
3951 Determine how many iterations the loop is executed.
3952 If an expression that represents the number of iterations
3953 can be constructed, place it in NUMBER_OF_ITERATIONS.
3954 Return the loop exit condition. */
3956 static tree
3957 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
3959 tree niters;
3961 if (vect_print_dump_info (REPORT_DETAILS))
3962 fprintf (vect_dump, "=== get_loop_niters ===");
3964 niters = number_of_exit_cond_executions (loop);
3966 if (niters != NULL_TREE
3967 && niters != chrec_dont_know)
3969 *number_of_iterations = niters;
3971 if (vect_print_dump_info (REPORT_DETAILS))
3973 fprintf (vect_dump, "==> get_loop_niters:" );
3974 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
3978 return get_loop_exit_condition (loop);
3982 /* Function vect_analyze_loop_1.
3984 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3985 for it. The different analyses will record information in the
3986 loop_vec_info struct. This is a subset of the analyses applied in
3987 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3988 that is now considered for (outer-loop) vectorization. */
3990 static loop_vec_info
3991 vect_analyze_loop_1 (struct loop *loop)
3993 loop_vec_info loop_vinfo;
3995 if (vect_print_dump_info (REPORT_DETAILS))
3996 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
3998 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4000 loop_vinfo = vect_analyze_loop_form (loop);
4001 if (!loop_vinfo)
4003 if (vect_print_dump_info (REPORT_DETAILS))
4004 fprintf (vect_dump, "bad inner-loop form.");
4005 return NULL;
4008 return loop_vinfo;
4012 /* Function vect_analyze_loop_form.
4014 Verify that certain CFG restrictions hold, including:
4015 - the loop has a pre-header
4016 - the loop has a single entry and exit
4017 - the loop exit condition is simple enough, and the number of iterations
4018 can be analyzed (a countable loop). */
4020 loop_vec_info
4021 vect_analyze_loop_form (struct loop *loop)
4023 loop_vec_info loop_vinfo;
4024 tree loop_cond;
4025 tree number_of_iterations = NULL;
4026 loop_vec_info inner_loop_vinfo = NULL;
4028 if (vect_print_dump_info (REPORT_DETAILS))
4029 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
4031 /* Different restrictions apply when we are considering an inner-most loop,
4032 vs. an outer (nested) loop.
4033 (FORNOW. May want to relax some of these restrictions in the future). */
4035 if (!loop->inner)
4037 /* Inner-most loop. We currently require that the number of BBs is
4038 exactly 2 (the header and latch). Vectorizable inner-most loops
4039 look like this:
4041 (pre-header)
4043 header <--------+
4044 | | |
4045 | +--> latch --+
4047 (exit-bb) */
4049 if (loop->num_nodes != 2)
4051 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4052 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4053 return NULL;
4056 if (empty_block_p (loop->header))
4058 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4059 fprintf (vect_dump, "not vectorized: empty loop.");
4060 return NULL;
4063 else
4065 struct loop *innerloop = loop->inner;
4066 edge backedge, entryedge;
4068 /* Nested loop. We currently require that the loop is doubly-nested,
4069 contains a single inner loop, and the number of BBs is exactly 5.
4070 Vectorizable outer-loops look like this:
4072 (pre-header)
4074 header <---+
4076 inner-loop |
4078 tail ------+
4080 (exit-bb)
4082 The inner-loop has the properties expected of inner-most loops
4083 as described above. */
4085 if ((loop->inner)->inner || (loop->inner)->next)
4087 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4088 fprintf (vect_dump, "not vectorized: multiple nested loops.");
4089 return NULL;
4092 /* Analyze the inner-loop. */
4093 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
4094 if (!inner_loop_vinfo)
4096 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4097 fprintf (vect_dump, "not vectorized: Bad inner loop.");
4098 return NULL;
4101 if (!expr_invariant_in_loop_p (loop,
4102 LOOP_VINFO_NITERS (inner_loop_vinfo)))
4104 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4105 fprintf (vect_dump,
4106 "not vectorized: inner-loop count not invariant.");
4107 destroy_loop_vec_info (inner_loop_vinfo, true);
4108 return NULL;
4111 if (loop->num_nodes != 5)
4113 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4114 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4115 destroy_loop_vec_info (inner_loop_vinfo, true);
4116 return NULL;
4119 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
4120 backedge = EDGE_PRED (innerloop->header, 1);
4121 entryedge = EDGE_PRED (innerloop->header, 0);
4122 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
4124 backedge = EDGE_PRED (innerloop->header, 0);
4125 entryedge = EDGE_PRED (innerloop->header, 1);
4128 if (entryedge->src != loop->header
4129 || !single_exit (innerloop)
4130 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
4132 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4133 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4134 destroy_loop_vec_info (inner_loop_vinfo, true);
4135 return NULL;
4138 if (vect_print_dump_info (REPORT_DETAILS))
4139 fprintf (vect_dump, "Considering outer-loop vectorization.");
4142 if (!single_exit (loop)
4143 || EDGE_COUNT (loop->header->preds) != 2)
4145 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4147 if (!single_exit (loop))
4148 fprintf (vect_dump, "not vectorized: multiple exits.");
4149 else if (EDGE_COUNT (loop->header->preds) != 2)
4150 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4152 if (inner_loop_vinfo)
4153 destroy_loop_vec_info (inner_loop_vinfo, true);
4154 return NULL;
4157 /* We assume that the loop exit condition is at the end of the loop. i.e,
4158 that the loop is represented as a do-while (with a proper if-guard
4159 before the loop if needed), where the loop header contains all the
4160 executable statements, and the latch is empty. */
4161 if (!empty_block_p (loop->latch)
4162 || phi_nodes (loop->latch))
4164 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4165 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4166 if (inner_loop_vinfo)
4167 destroy_loop_vec_info (inner_loop_vinfo, true);
4168 return NULL;
4171 /* Make sure there exists a single-predecessor exit bb: */
4172 if (!single_pred_p (single_exit (loop)->dest))
4174 edge e = single_exit (loop);
4175 if (!(e->flags & EDGE_ABNORMAL))
4177 split_loop_exit_edge (e);
4178 if (vect_print_dump_info (REPORT_DETAILS))
4179 fprintf (vect_dump, "split exit edge.");
4181 else
4183 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4184 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4185 if (inner_loop_vinfo)
4186 destroy_loop_vec_info (inner_loop_vinfo, true);
4187 return NULL;
4191 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4192 if (!loop_cond)
4194 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4195 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4196 if (inner_loop_vinfo)
4197 destroy_loop_vec_info (inner_loop_vinfo, true);
4198 return NULL;
4201 if (!number_of_iterations)
4203 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4204 fprintf (vect_dump,
4205 "not vectorized: number of iterations cannot be computed.");
4206 if (inner_loop_vinfo)
4207 destroy_loop_vec_info (inner_loop_vinfo, true);
4208 return NULL;
4211 if (chrec_contains_undetermined (number_of_iterations))
4213 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4214 fprintf (vect_dump, "Infinite number of iterations.");
4215 if (inner_loop_vinfo)
4216 destroy_loop_vec_info (inner_loop_vinfo, true);
4217 return NULL;
4220 if (!NITERS_KNOWN_P (number_of_iterations))
4222 if (vect_print_dump_info (REPORT_DETAILS))
4224 fprintf (vect_dump, "Symbolic number of iterations is ");
4225 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4228 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4230 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4231 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4232 if (inner_loop_vinfo)
4233 destroy_loop_vec_info (inner_loop_vinfo, false);
4234 return NULL;
4237 loop_vinfo = new_loop_vec_info (loop);
4238 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4239 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
4241 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4243 /* CHECKME: May want to keep it around it in the future. */
4244 if (inner_loop_vinfo)
4245 destroy_loop_vec_info (inner_loop_vinfo, false);
4247 gcc_assert (!loop->aux);
4248 loop->aux = loop_vinfo;
4249 return loop_vinfo;
4253 /* Function vect_analyze_loop.
4255 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4256 for it. The different analyses will record information in the
4257 loop_vec_info struct. */
4258 loop_vec_info
4259 vect_analyze_loop (struct loop *loop)
4261 bool ok;
4262 loop_vec_info loop_vinfo;
4264 if (vect_print_dump_info (REPORT_DETAILS))
4265 fprintf (vect_dump, "===== analyze_loop_nest =====");
4267 if (loop_outer (loop)
4268 && loop_vec_info_for_loop (loop_outer (loop))
4269 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4271 if (vect_print_dump_info (REPORT_DETAILS))
4272 fprintf (vect_dump, "outer-loop already vectorized.");
4273 return NULL;
4276 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4278 loop_vinfo = vect_analyze_loop_form (loop);
4279 if (!loop_vinfo)
4281 if (vect_print_dump_info (REPORT_DETAILS))
4282 fprintf (vect_dump, "bad loop form.");
4283 return NULL;
4286 /* Find all data references in the loop (which correspond to vdefs/vuses)
4287 and analyze their evolution in the loop.
4289 FORNOW: Handle only simple, array references, which
4290 alignment can be forced, and aligned pointer-references. */
4292 ok = vect_analyze_data_refs (loop_vinfo);
4293 if (!ok)
4295 if (vect_print_dump_info (REPORT_DETAILS))
4296 fprintf (vect_dump, "bad data references.");
4297 destroy_loop_vec_info (loop_vinfo, true);
4298 return NULL;
4301 /* Classify all cross-iteration scalar data-flow cycles.
4302 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4304 vect_analyze_scalar_cycles (loop_vinfo);
4306 vect_pattern_recog (loop_vinfo);
4308 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4310 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4311 if (!ok)
4313 if (vect_print_dump_info (REPORT_DETAILS))
4314 fprintf (vect_dump, "unexpected pattern.");
4315 destroy_loop_vec_info (loop_vinfo, true);
4316 return NULL;
4319 /* Analyze the alignment of the data-refs in the loop.
4320 Fail if a data reference is found that cannot be vectorized. */
4322 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4323 if (!ok)
4325 if (vect_print_dump_info (REPORT_DETAILS))
4326 fprintf (vect_dump, "bad data alignment.");
4327 destroy_loop_vec_info (loop_vinfo, true);
4328 return NULL;
4331 ok = vect_determine_vectorization_factor (loop_vinfo);
4332 if (!ok)
4334 if (vect_print_dump_info (REPORT_DETAILS))
4335 fprintf (vect_dump, "can't determine vectorization factor.");
4336 destroy_loop_vec_info (loop_vinfo, true);
4337 return NULL;
4340 /* Analyze data dependences between the data-refs in the loop.
4341 FORNOW: fail at the first data dependence that we encounter. */
4343 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4344 if (!ok)
4346 if (vect_print_dump_info (REPORT_DETAILS))
4347 fprintf (vect_dump, "bad data dependence.");
4348 destroy_loop_vec_info (loop_vinfo, true);
4349 return NULL;
4352 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4353 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4355 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4356 if (!ok)
4358 if (vect_print_dump_info (REPORT_DETAILS))
4359 fprintf (vect_dump, "bad data access.");
4360 destroy_loop_vec_info (loop_vinfo, true);
4361 return NULL;
4364 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4365 It is important to call pruning after vect_analyze_data_ref_accesses,
4366 since we use grouping information gathered by interleaving analysis. */
4367 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
4368 if (!ok)
4370 if (vect_print_dump_info (REPORT_DETAILS))
4371 fprintf (vect_dump, "too long list of versioning for alias "
4372 "run-time tests.");
4373 destroy_loop_vec_info (loop_vinfo, true);
4374 return NULL;
4377 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4378 ok = vect_analyze_slp (loop_vinfo);
4379 if (ok)
4381 /* Decide which possible SLP instances to SLP. */
4382 vect_make_slp_decision (loop_vinfo);
4384 /* Find stmts that need to be both vectorized and SLPed. */
4385 vect_detect_hybrid_slp (loop_vinfo);
4388 /* This pass will decide on using loop versioning and/or loop peeling in
4389 order to enhance the alignment of data references in the loop. */
4391 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4392 if (!ok)
4394 if (vect_print_dump_info (REPORT_DETAILS))
4395 fprintf (vect_dump, "bad data alignment.");
4396 destroy_loop_vec_info (loop_vinfo, true);
4397 return NULL;
4400 /* Scan all the operations in the loop and make sure they are
4401 vectorizable. */
4403 ok = vect_analyze_operations (loop_vinfo);
4404 if (!ok)
4406 if (vect_print_dump_info (REPORT_DETAILS))
4407 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4408 destroy_loop_vec_info (loop_vinfo, true);
4409 return NULL;
4412 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4414 return loop_vinfo;