2008-05-30 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / tree-vect-analyze.c
blob18d7bb8bab108ff431e0b833d728fd0e567aeda2
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41 #include "toplev.h"
42 #include "recog.h"
44 static bool vect_can_advance_ivs_p (loop_vec_info);
46 /* Function vect_determine_vectorization_factor
48 Determine the vectorization factor (VF). VF is the number of data elements
49 that are operated upon in parallel in a single iteration of the vectorized
50 loop. For example, when vectorizing a loop that operates on 4byte elements,
51 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
52 elements can fit in a single vector register.
54 We currently support vectorization of loops in which all types operated upon
55 are of the same size. Therefore this function currently sets VF according to
56 the size of the types operated upon, and fails if there are multiple sizes
57 in the loop.
59 VF is also the factor by which the loop iterations are strip-mined, e.g.:
60 original loop:
61 for (i=0; i<N; i++){
62 a[i] = b[i] + c[i];
65 vectorized loop:
66 for (i=0; i<N; i+=VF){
67 a[i:VF] = b[i:VF] + c[i:VF];
71 static bool
72 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
74 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
75 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
76 int nbbs = loop->num_nodes;
77 block_stmt_iterator si;
78 unsigned int vectorization_factor = 0;
79 tree scalar_type;
80 tree phi;
81 tree vectype;
82 unsigned int nunits;
83 stmt_vec_info stmt_info;
84 int i;
86 if (vect_print_dump_info (REPORT_DETAILS))
87 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
89 for (i = 0; i < nbbs; i++)
91 basic_block bb = bbs[i];
93 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
95 stmt_info = vinfo_for_stmt (phi);
96 if (vect_print_dump_info (REPORT_DETAILS))
98 fprintf (vect_dump, "==> examining phi: ");
99 print_generic_expr (vect_dump, phi, TDF_SLIM);
102 gcc_assert (stmt_info);
104 if (STMT_VINFO_RELEVANT_P (stmt_info))
106 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
107 scalar_type = TREE_TYPE (PHI_RESULT (phi));
109 if (vect_print_dump_info (REPORT_DETAILS))
111 fprintf (vect_dump, "get vectype for scalar type: ");
112 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
115 vectype = get_vectype_for_scalar_type (scalar_type);
116 if (!vectype)
118 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
120 fprintf (vect_dump,
121 "not vectorized: unsupported data-type ");
122 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
124 return false;
126 STMT_VINFO_VECTYPE (stmt_info) = vectype;
128 if (vect_print_dump_info (REPORT_DETAILS))
130 fprintf (vect_dump, "vectype: ");
131 print_generic_expr (vect_dump, vectype, TDF_SLIM);
134 nunits = TYPE_VECTOR_SUBPARTS (vectype);
135 if (vect_print_dump_info (REPORT_DETAILS))
136 fprintf (vect_dump, "nunits = %d", nunits);
138 if (!vectorization_factor
139 || (nunits > vectorization_factor))
140 vectorization_factor = nunits;
144 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
146 tree stmt = bsi_stmt (si);
147 stmt_info = vinfo_for_stmt (stmt);
149 if (vect_print_dump_info (REPORT_DETAILS))
151 fprintf (vect_dump, "==> examining statement: ");
152 print_generic_expr (vect_dump, stmt, TDF_SLIM);
155 gcc_assert (stmt_info);
157 /* skip stmts which do not need to be vectorized. */
158 if (!STMT_VINFO_RELEVANT_P (stmt_info)
159 && !STMT_VINFO_LIVE_P (stmt_info))
161 if (vect_print_dump_info (REPORT_DETAILS))
162 fprintf (vect_dump, "skip.");
163 continue;
166 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
168 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
170 fprintf (vect_dump, "not vectorized: irregular stmt.");
171 print_generic_expr (vect_dump, stmt, TDF_SLIM);
173 return false;
176 if (!GIMPLE_STMT_P (stmt)
177 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
179 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
181 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
182 print_generic_expr (vect_dump, stmt, TDF_SLIM);
184 return false;
187 if (STMT_VINFO_VECTYPE (stmt_info))
189 /* The only case when a vectype had been already set is for stmts
190 that contain a dataref, or for "pattern-stmts" (stmts generated
191 by the vectorizer to represent/replace a certain idiom). */
192 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
193 || is_pattern_stmt_p (stmt_info));
194 vectype = STMT_VINFO_VECTYPE (stmt_info);
196 else
198 tree operation;
200 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
201 && !is_pattern_stmt_p (stmt_info));
203 /* We generally set the vectype according to the type of the
204 result (lhs).
205 For stmts whose result-type is different than the type of the
206 arguments (e.g. demotion, promotion), vectype will be reset
207 appropriately (later). Note that we have to visit the smallest
208 datatype in this function, because that determines the VF.
209 If the smallest datatype in the loop is present only as the
210 rhs of a promotion operation - we'd miss it here.
211 Such a case, where a variable of this datatype does not appear
212 in the lhs anywhere in the loop, can only occur if it's an
213 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
214 to have been optimized away by invariant motion. However, we
215 cannot rely on invariant motion to always take invariants out
216 of the loop, and so in the case of promotion we also have to
217 check the rhs. */
218 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
220 operation = GIMPLE_STMT_OPERAND (stmt, 1);
221 if (CONVERT_EXPR_P (operation)
222 || TREE_CODE (operation) == WIDEN_MULT_EXPR
223 || TREE_CODE (operation) == FLOAT_EXPR)
225 tree rhs_type = TREE_TYPE (TREE_OPERAND (operation, 0));
226 if (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)) <
227 TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)))
228 scalar_type = rhs_type;
231 if (vect_print_dump_info (REPORT_DETAILS))
233 fprintf (vect_dump, "get vectype for scalar type: ");
234 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
237 vectype = get_vectype_for_scalar_type (scalar_type);
238 if (!vectype)
240 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
242 fprintf (vect_dump,
243 "not vectorized: unsupported data-type ");
244 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
246 return false;
248 STMT_VINFO_VECTYPE (stmt_info) = vectype;
251 if (vect_print_dump_info (REPORT_DETAILS))
253 fprintf (vect_dump, "vectype: ");
254 print_generic_expr (vect_dump, vectype, TDF_SLIM);
257 nunits = TYPE_VECTOR_SUBPARTS (vectype);
258 if (vect_print_dump_info (REPORT_DETAILS))
259 fprintf (vect_dump, "nunits = %d", nunits);
261 if (!vectorization_factor
262 || (nunits > vectorization_factor))
263 vectorization_factor = nunits;
268 /* TODO: Analyze cost. Decide if worth while to vectorize. */
269 if (vect_print_dump_info (REPORT_DETAILS))
270 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
271 if (vectorization_factor <= 1)
273 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
274 fprintf (vect_dump, "not vectorized: unsupported data-type");
275 return false;
277 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
279 return true;
283 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
284 the number of created vector stmts depends on the unrolling factor). However,
285 the actual number of vector stmts for every SLP node depends on VF which is
286 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
287 In this function we assume that the inside costs calculated in
288 vect_model_xxx_cost are linear in ncopies. */
290 static void
291 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
293 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
294 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
295 slp_instance instance;
297 if (vect_print_dump_info (REPORT_SLP))
298 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
300 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
301 /* We assume that costs are linear in ncopies. */
302 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
303 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
307 /* Function vect_analyze_operations.
309 Scan the loop stmts and make sure they are all vectorizable. */
311 static bool
312 vect_analyze_operations (loop_vec_info loop_vinfo)
314 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
315 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
316 int nbbs = loop->num_nodes;
317 block_stmt_iterator si;
318 unsigned int vectorization_factor = 0;
319 int i;
320 bool ok;
321 tree phi;
322 stmt_vec_info stmt_info;
323 bool need_to_vectorize = false;
324 int min_profitable_iters;
325 int min_scalar_loop_bound;
326 unsigned int th;
327 bool only_slp_in_loop = true;
329 if (vect_print_dump_info (REPORT_DETAILS))
330 fprintf (vect_dump, "=== vect_analyze_operations ===");
332 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
333 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
335 for (i = 0; i < nbbs; i++)
337 basic_block bb = bbs[i];
339 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
341 ok = true;
343 stmt_info = vinfo_for_stmt (phi);
344 if (vect_print_dump_info (REPORT_DETAILS))
346 fprintf (vect_dump, "examining phi: ");
347 print_generic_expr (vect_dump, phi, TDF_SLIM);
350 if (! is_loop_header_bb_p (bb))
352 /* inner-loop loop-closed exit phi in outer-loop vectorization
353 (i.e. a phi in the tail of the outer-loop).
354 FORNOW: we currently don't support the case that these phis
355 are not used in the outerloop, cause this case requires
356 to actually do something here. */
357 if (!STMT_VINFO_RELEVANT_P (stmt_info)
358 || STMT_VINFO_LIVE_P (stmt_info))
360 if (vect_print_dump_info (REPORT_DETAILS))
361 fprintf (vect_dump,
362 "Unsupported loop-closed phi in outer-loop.");
363 return false;
365 continue;
368 gcc_assert (stmt_info);
370 if (STMT_VINFO_LIVE_P (stmt_info))
372 /* FORNOW: not yet supported. */
373 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
374 fprintf (vect_dump, "not vectorized: value used after loop.");
375 return false;
378 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
379 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
381 /* A scalar-dependence cycle that we don't support. */
382 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
383 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
384 return false;
387 if (STMT_VINFO_RELEVANT_P (stmt_info))
389 need_to_vectorize = true;
390 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
391 ok = vectorizable_induction (phi, NULL, NULL);
394 if (!ok)
396 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
398 fprintf (vect_dump,
399 "not vectorized: relevant phi not supported: ");
400 print_generic_expr (vect_dump, phi, TDF_SLIM);
402 return false;
406 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
408 tree stmt = bsi_stmt (si);
409 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
410 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
412 if (vect_print_dump_info (REPORT_DETAILS))
414 fprintf (vect_dump, "==> examining statement: ");
415 print_generic_expr (vect_dump, stmt, TDF_SLIM);
418 gcc_assert (stmt_info);
420 /* skip stmts which do not need to be vectorized.
421 this is expected to include:
422 - the COND_EXPR which is the loop exit condition
423 - any LABEL_EXPRs in the loop
424 - computations that are used only for array indexing or loop
425 control */
427 if (!STMT_VINFO_RELEVANT_P (stmt_info)
428 && !STMT_VINFO_LIVE_P (stmt_info))
430 if (vect_print_dump_info (REPORT_DETAILS))
431 fprintf (vect_dump, "irrelevant.");
432 continue;
435 switch (STMT_VINFO_DEF_TYPE (stmt_info))
437 case vect_loop_def:
438 break;
440 case vect_reduction_def:
441 gcc_assert (relevance == vect_used_in_outer
442 || relevance == vect_used_in_outer_by_reduction
443 || relevance == vect_unused_in_loop);
444 break;
446 case vect_induction_def:
447 case vect_constant_def:
448 case vect_invariant_def:
449 case vect_unknown_def_type:
450 default:
451 gcc_unreachable ();
454 if (STMT_VINFO_RELEVANT_P (stmt_info))
456 gcc_assert (GIMPLE_STMT_P (stmt)
457 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
458 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
459 need_to_vectorize = true;
462 ok = true;
463 if (STMT_VINFO_RELEVANT_P (stmt_info)
464 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
465 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
466 || vectorizable_type_demotion (stmt, NULL, NULL)
467 || vectorizable_conversion (stmt, NULL, NULL, NULL)
468 || vectorizable_operation (stmt, NULL, NULL, NULL)
469 || vectorizable_assignment (stmt, NULL, NULL, NULL)
470 || vectorizable_load (stmt, NULL, NULL, NULL)
471 || vectorizable_call (stmt, NULL, NULL)
472 || vectorizable_store (stmt, NULL, NULL, NULL)
473 || vectorizable_condition (stmt, NULL, NULL)
474 || vectorizable_reduction (stmt, NULL, NULL));
476 if (!ok)
478 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
480 fprintf (vect_dump, "not vectorized: relevant stmt not ");
481 fprintf (vect_dump, "supported: ");
482 print_generic_expr (vect_dump, stmt, TDF_SLIM);
484 return false;
487 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
488 need extra handling, except for vectorizable reductions. */
489 if (STMT_VINFO_LIVE_P (stmt_info)
490 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
491 ok = vectorizable_live_operation (stmt, NULL, NULL);
493 if (!ok)
495 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
497 fprintf (vect_dump, "not vectorized: live stmt not ");
498 fprintf (vect_dump, "supported: ");
499 print_generic_expr (vect_dump, stmt, TDF_SLIM);
501 return false;
504 if (!PURE_SLP_STMT (stmt_info))
506 /* STMT needs loop-based vectorization. */
507 only_slp_in_loop = false;
509 /* Groups of strided accesses whose size is not a power of 2 are
510 not vectorizable yet using loop-vectorization. Therefore, if
511 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
512 both SLPed and loop-based vectorzed), the loop cannot be
513 vectorized. */
514 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
515 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
516 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
518 if (vect_print_dump_info (REPORT_DETAILS))
520 fprintf (vect_dump, "not vectorized: the size of group "
521 "of strided accesses is not a power of 2");
522 print_generic_expr (vect_dump, stmt, TDF_SLIM);
524 return false;
527 } /* stmts in bb */
528 } /* bbs */
530 /* All operations in the loop are either irrelevant (deal with loop
531 control, or dead), or only used outside the loop and can be moved
532 out of the loop (e.g. invariants, inductions). The loop can be
533 optimized away by scalar optimizations. We're better off not
534 touching this loop. */
535 if (!need_to_vectorize)
537 if (vect_print_dump_info (REPORT_DETAILS))
538 fprintf (vect_dump,
539 "All the computation can be taken out of the loop.");
540 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
541 fprintf (vect_dump,
542 "not vectorized: redundant loop. no profit to vectorize.");
543 return false;
546 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
547 vectorization factor of the loop is the unrolling factor required by the
548 SLP instances. If that unrolling factor is 1, we say, that we perform
549 pure SLP on loop - cross iteration parallelism is not exploited. */
550 if (only_slp_in_loop)
551 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
552 else
553 vectorization_factor = least_common_multiple (vectorization_factor,
554 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
556 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
558 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
559 && vect_print_dump_info (REPORT_DETAILS))
560 fprintf (vect_dump,
561 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
562 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
564 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
565 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
567 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
568 fprintf (vect_dump, "not vectorized: iteration count too small.");
569 if (vect_print_dump_info (REPORT_DETAILS))
570 fprintf (vect_dump,"not vectorized: iteration count smaller than "
571 "vectorization factor.");
572 return false;
575 /* Analyze cost. Decide if worth while to vectorize. */
577 /* Once VF is set, SLP costs should be updated since the number of created
578 vector stmts depends on VF. */
579 vect_update_slp_costs_according_to_vf (loop_vinfo);
581 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
582 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
584 if (min_profitable_iters < 0)
586 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
587 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
588 if (vect_print_dump_info (REPORT_DETAILS))
589 fprintf (vect_dump, "not vectorized: vector version will never be "
590 "profitable.");
591 return false;
594 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
595 * vectorization_factor) - 1);
597 /* Use the cost model only if it is more conservative than user specified
598 threshold. */
600 th = (unsigned) min_scalar_loop_bound;
601 if (min_profitable_iters
602 && (!min_scalar_loop_bound
603 || min_profitable_iters > min_scalar_loop_bound))
604 th = (unsigned) min_profitable_iters;
606 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
607 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
609 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
610 fprintf (vect_dump, "not vectorized: vectorization not "
611 "profitable.");
612 if (vect_print_dump_info (REPORT_DETAILS))
613 fprintf (vect_dump, "not vectorized: iteration count smaller than "
614 "user specified loop bound parameter or minimum "
615 "profitable iterations (whichever is more conservative).");
616 return false;
619 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
620 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
621 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
623 if (vect_print_dump_info (REPORT_DETAILS))
624 fprintf (vect_dump, "epilog loop required.");
625 if (!vect_can_advance_ivs_p (loop_vinfo))
627 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
628 fprintf (vect_dump,
629 "not vectorized: can't create epilog loop 1.");
630 return false;
632 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
634 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
635 fprintf (vect_dump,
636 "not vectorized: can't create epilog loop 2.");
637 return false;
641 return true;
645 /* Function exist_non_indexing_operands_for_use_p
647 USE is one of the uses attached to STMT. Check if USE is
648 used in STMT for anything other than indexing an array. */
650 static bool
651 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
653 tree operand;
654 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
656 /* USE corresponds to some operand in STMT. If there is no data
657 reference in STMT, then any operand that corresponds to USE
658 is not indexing an array. */
659 if (!STMT_VINFO_DATA_REF (stmt_info))
660 return true;
662 /* STMT has a data_ref. FORNOW this means that its of one of
663 the following forms:
664 -1- ARRAY_REF = var
665 -2- var = ARRAY_REF
666 (This should have been verified in analyze_data_refs).
668 'var' in the second case corresponds to a def, not a use,
669 so USE cannot correspond to any operands that are not used
670 for array indexing.
672 Therefore, all we need to check is if STMT falls into the
673 first case, and whether var corresponds to USE. */
675 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
676 return false;
678 operand = GIMPLE_STMT_OPERAND (stmt, 1);
680 if (TREE_CODE (operand) != SSA_NAME)
681 return false;
683 if (operand == use)
684 return true;
686 return false;
690 /* Function vect_analyze_scalar_cycles_1.
692 Examine the cross iteration def-use cycles of scalar variables
693 in LOOP. LOOP_VINFO represents the loop that is noe being
694 considered for vectorization (can be LOOP, or an outer-loop
695 enclosing LOOP). */
697 static void
698 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
700 tree phi;
701 basic_block bb = loop->header;
702 tree dumy;
703 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
705 if (vect_print_dump_info (REPORT_DETAILS))
706 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
708 /* First - identify all inductions. */
709 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
711 tree access_fn = NULL;
712 tree def = PHI_RESULT (phi);
713 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
715 if (vect_print_dump_info (REPORT_DETAILS))
717 fprintf (vect_dump, "Analyze phi: ");
718 print_generic_expr (vect_dump, phi, TDF_SLIM);
721 /* Skip virtual phi's. The data dependences that are associated with
722 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
723 if (!is_gimple_reg (SSA_NAME_VAR (def)))
724 continue;
726 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
728 /* Analyze the evolution function. */
729 access_fn = analyze_scalar_evolution (loop, def);
730 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
732 fprintf (vect_dump, "Access function of PHI: ");
733 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
736 if (!access_fn
737 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
739 VEC_safe_push (tree, heap, worklist, phi);
740 continue;
743 if (vect_print_dump_info (REPORT_DETAILS))
744 fprintf (vect_dump, "Detected induction.");
745 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
749 /* Second - identify all reductions. */
750 while (VEC_length (tree, worklist) > 0)
752 tree phi = VEC_pop (tree, worklist);
753 tree def = PHI_RESULT (phi);
754 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
755 tree reduc_stmt;
757 if (vect_print_dump_info (REPORT_DETAILS))
759 fprintf (vect_dump, "Analyze phi: ");
760 print_generic_expr (vect_dump, phi, TDF_SLIM);
763 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
764 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
766 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
767 if (reduc_stmt)
769 if (vect_print_dump_info (REPORT_DETAILS))
770 fprintf (vect_dump, "Detected reduction.");
771 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
772 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
773 vect_reduction_def;
775 else
776 if (vect_print_dump_info (REPORT_DETAILS))
777 fprintf (vect_dump, "Unknown def-use cycle pattern.");
780 VEC_free (tree, heap, worklist);
781 return;
785 /* Function vect_analyze_scalar_cycles.
787 Examine the cross iteration def-use cycles of scalar variables, by
788 analyzing the loop-header PHIs of scalar variables; Classify each
789 cycle as one of the following: invariant, induction, reduction, unknown.
790 We do that for the loop represented by LOOP_VINFO, and also to its
791 inner-loop, if exists.
792 Examples for scalar cycles:
794 Example1: reduction:
796 loop1:
797 for (i=0; i<N; i++)
798 sum += a[i];
800 Example2: induction:
802 loop2:
803 for (i=0; i<N; i++)
804 a[i] = i; */
806 static void
807 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
809 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
811 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
813 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
814 Reductions in such inner-loop therefore have different properties than
815 the reductions in the nest that gets vectorized:
816 1. When vectorized, they are executed in the same order as in the original
817 scalar loop, so we can't change the order of computation when
818 vectorizing them.
819 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
820 current checks are too strict. */
822 if (loop->inner)
823 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
827 /* Function vect_insert_into_interleaving_chain.
829 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
831 static void
832 vect_insert_into_interleaving_chain (struct data_reference *dra,
833 struct data_reference *drb)
835 tree prev, next, next_init;
836 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
837 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
839 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
840 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
841 while (next)
843 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
844 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
846 /* Insert here. */
847 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
848 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
849 return;
851 prev = next;
852 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
855 /* We got to the end of the list. Insert here. */
856 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
857 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
861 /* Function vect_update_interleaving_chain.
863 For two data-refs DRA and DRB that are a part of a chain interleaved data
864 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
866 There are four possible cases:
867 1. New stmts - both DRA and DRB are not a part of any chain:
868 FIRST_DR = DRB
869 NEXT_DR (DRB) = DRA
870 2. DRB is a part of a chain and DRA is not:
871 no need to update FIRST_DR
872 no need to insert DRB
873 insert DRA according to init
874 3. DRA is a part of a chain and DRB is not:
875 if (init of FIRST_DR > init of DRB)
876 FIRST_DR = DRB
877 NEXT(FIRST_DR) = previous FIRST_DR
878 else
879 insert DRB according to its init
880 4. both DRA and DRB are in some interleaving chains:
881 choose the chain with the smallest init of FIRST_DR
882 insert the nodes of the second chain into the first one. */
884 static void
885 vect_update_interleaving_chain (struct data_reference *drb,
886 struct data_reference *dra)
888 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
889 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
890 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
891 tree node, prev, next, node_init, first_stmt;
893 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
894 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
896 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
897 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
898 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
899 return;
902 /* 2. DRB is a part of a chain and DRA is not. */
903 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
905 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
906 /* Insert DRA into the chain of DRB. */
907 vect_insert_into_interleaving_chain (dra, drb);
908 return;
911 /* 3. DRA is a part of a chain and DRB is not. */
912 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
914 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
915 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
916 old_first_stmt)));
917 tree tmp;
919 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
921 /* DRB's init is smaller than the init of the stmt previously marked
922 as the first stmt of the interleaving chain of DRA. Therefore, we
923 update FIRST_STMT and put DRB in the head of the list. */
924 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
925 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
927 /* Update all the stmts in the list to point to the new FIRST_STMT. */
928 tmp = old_first_stmt;
929 while (tmp)
931 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
932 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
935 else
937 /* Insert DRB in the list of DRA. */
938 vect_insert_into_interleaving_chain (drb, dra);
939 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
941 return;
944 /* 4. both DRA and DRB are in some interleaving chains. */
945 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
946 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
947 if (first_a == first_b)
948 return;
949 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
950 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
952 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
954 /* Insert the nodes of DRA chain into the DRB chain.
955 After inserting a node, continue from this node of the DRB chain (don't
956 start from the beginning. */
957 node = DR_GROUP_FIRST_DR (stmtinfo_a);
958 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
959 first_stmt = first_b;
961 else
963 /* Insert the nodes of DRB chain into the DRA chain.
964 After inserting a node, continue from this node of the DRA chain (don't
965 start from the beginning. */
966 node = DR_GROUP_FIRST_DR (stmtinfo_b);
967 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
968 first_stmt = first_a;
971 while (node)
973 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
974 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
975 while (next)
977 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
978 if (tree_int_cst_compare (next_init, node_init) > 0)
980 /* Insert here. */
981 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
982 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
983 prev = node;
984 break;
986 prev = next;
987 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
989 if (!next)
991 /* We got to the end of the list. Insert here. */
992 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
993 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
994 prev = node;
996 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
997 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1002 /* Function vect_equal_offsets.
1004 Check if OFFSET1 and OFFSET2 are identical expressions. */
1006 static bool
1007 vect_equal_offsets (tree offset1, tree offset2)
1009 bool res0, res1;
1011 STRIP_NOPS (offset1);
1012 STRIP_NOPS (offset2);
1014 if (offset1 == offset2)
1015 return true;
1017 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1018 || !BINARY_CLASS_P (offset1)
1019 || !BINARY_CLASS_P (offset2))
1020 return false;
1022 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1023 TREE_OPERAND (offset2, 0));
1024 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1025 TREE_OPERAND (offset2, 1));
1027 return (res0 && res1);
1031 /* Function vect_check_interleaving.
1033 Check if DRA and DRB are a part of interleaving. In case they are, insert
1034 DRA and DRB in an interleaving chain. */
1036 static void
1037 vect_check_interleaving (struct data_reference *dra,
1038 struct data_reference *drb)
1040 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1042 /* Check that the data-refs have same first location (except init) and they
1043 are both either store or load (not load and store). */
1044 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1045 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1046 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1047 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1048 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1049 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1050 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1051 || DR_IS_READ (dra) != DR_IS_READ (drb))
1052 return;
1054 /* Check:
1055 1. data-refs are of the same type
1056 2. their steps are equal
1057 3. the step is greater than the difference between data-refs' inits */
1058 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1059 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1061 if (type_size_a != type_size_b
1062 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
1063 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
1064 TREE_TYPE (DR_REF (drb))))
1065 return;
1067 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1068 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1069 step = TREE_INT_CST_LOW (DR_STEP (dra));
1071 if (init_a > init_b)
1073 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1074 and DRB is accessed before DRA. */
1075 diff_mod_size = (init_a - init_b) % type_size_a;
1077 if ((init_a - init_b) > step)
1078 return;
1080 if (diff_mod_size == 0)
1082 vect_update_interleaving_chain (drb, dra);
1083 if (vect_print_dump_info (REPORT_DR_DETAILS))
1085 fprintf (vect_dump, "Detected interleaving ");
1086 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1087 fprintf (vect_dump, " and ");
1088 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1090 return;
1093 else
1095 /* If init_b == init_a + the size of the type * k, we have an
1096 interleaving, and DRA is accessed before DRB. */
1097 diff_mod_size = (init_b - init_a) % type_size_a;
1099 if ((init_b - init_a) > step)
1100 return;
1102 if (diff_mod_size == 0)
1104 vect_update_interleaving_chain (dra, drb);
1105 if (vect_print_dump_info (REPORT_DR_DETAILS))
1107 fprintf (vect_dump, "Detected interleaving ");
1108 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1109 fprintf (vect_dump, " and ");
1110 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1112 return;
1117 /* Check if data references pointed by DR_I and DR_J are same or
1118 belong to same interleaving group. Return FALSE if drs are
1119 different, otherwise return TRUE. */
1121 static bool
1122 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
1124 tree stmt_i = DR_STMT (dr_i);
1125 tree stmt_j = DR_STMT (dr_j);
1127 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
1128 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1129 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
1130 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1131 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
1132 return true;
1133 else
1134 return false;
1137 /* If address ranges represented by DDR_I and DDR_J are equal,
1138 return TRUE, otherwise return FALSE. */
1140 static bool
1141 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
1143 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
1144 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
1145 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
1146 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
1147 return true;
1148 else
1149 return false;
1152 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1153 tested at run-time. Return TRUE if DDR was successfully inserted.
1154 Return false if versioning is not supported. */
1156 static bool
1157 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1159 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1161 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
1162 return false;
1164 if (vect_print_dump_info (REPORT_DR_DETAILS))
1166 fprintf (vect_dump, "mark for run-time aliasing test between ");
1167 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1168 fprintf (vect_dump, " and ");
1169 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1172 if (optimize_size)
1174 if (vect_print_dump_info (REPORT_DR_DETAILS))
1175 fprintf (vect_dump, "versioning not supported when optimizing for size.");
1176 return false;
1179 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1180 if (loop->inner)
1182 if (vect_print_dump_info (REPORT_DR_DETAILS))
1183 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1184 return false;
1187 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1188 return true;
1191 /* Function vect_analyze_data_ref_dependence.
1193 Return TRUE if there (might) exist a dependence between a memory-reference
1194 DRA and a memory-reference DRB. When versioning for alias may check a
1195 dependence at run-time, return FALSE. */
1197 static bool
1198 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1199 loop_vec_info loop_vinfo)
1201 unsigned int i;
1202 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1203 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1204 struct data_reference *dra = DDR_A (ddr);
1205 struct data_reference *drb = DDR_B (ddr);
1206 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1207 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1208 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1209 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1210 lambda_vector dist_v;
1211 unsigned int loop_depth;
1213 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1215 /* Independent data accesses. */
1216 vect_check_interleaving (dra, drb);
1217 return false;
1220 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1221 return false;
1223 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1225 if (vect_print_dump_info (REPORT_DR_DETAILS))
1227 fprintf (vect_dump,
1228 "versioning for alias required: can't determine dependence between ");
1229 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1230 fprintf (vect_dump, " and ");
1231 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1233 /* Add to list of ddrs that need to be tested at run-time. */
1234 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1237 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1239 if (vect_print_dump_info (REPORT_DR_DETAILS))
1241 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1242 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1243 fprintf (vect_dump, " and ");
1244 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1246 /* Add to list of ddrs that need to be tested at run-time. */
1247 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1250 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1251 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1253 int dist = dist_v[loop_depth];
1255 if (vect_print_dump_info (REPORT_DR_DETAILS))
1256 fprintf (vect_dump, "dependence distance = %d.", dist);
1258 /* Same loop iteration. */
1259 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1261 /* Two references with distance zero have the same alignment. */
1262 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1263 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1264 if (vect_print_dump_info (REPORT_ALIGNMENT))
1265 fprintf (vect_dump, "accesses have the same alignment.");
1266 if (vect_print_dump_info (REPORT_DR_DETAILS))
1268 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1269 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1270 fprintf (vect_dump, " and ");
1271 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1274 /* For interleaving, mark that there is a read-write dependency if
1275 necessary. We check before that one of the data-refs is store. */
1276 if (DR_IS_READ (dra))
1277 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1278 else
1280 if (DR_IS_READ (drb))
1281 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1284 continue;
1287 if (abs (dist) >= vectorization_factor
1288 || (dist > 0 && DDR_REVERSED_P (ddr)))
1290 /* Dependence distance does not create dependence, as far as
1291 vectorization is concerned, in this case. If DDR_REVERSED_P the
1292 order of the data-refs in DDR was reversed (to make distance
1293 vector positive), and the actual distance is negative. */
1294 if (vect_print_dump_info (REPORT_DR_DETAILS))
1295 fprintf (vect_dump, "dependence distance >= VF or negative.");
1296 continue;
1299 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1301 fprintf (vect_dump,
1302 "not vectorized, possible dependence "
1303 "between data-refs ");
1304 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1305 fprintf (vect_dump, " and ");
1306 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1309 return true;
1312 return false;
1315 /* Function vect_analyze_data_ref_dependences.
1317 Examine all the data references in the loop, and make sure there do not
1318 exist any data dependences between them. */
1320 static bool
1321 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1323 unsigned int i;
1324 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1325 struct data_dependence_relation *ddr;
1327 if (vect_print_dump_info (REPORT_DETAILS))
1328 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1330 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1331 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1332 return false;
1334 return true;
1338 /* Function vect_compute_data_ref_alignment
1340 Compute the misalignment of the data reference DR.
1342 Output:
1343 1. If during the misalignment computation it is found that the data reference
1344 cannot be vectorized then false is returned.
1345 2. DR_MISALIGNMENT (DR) is defined.
1347 FOR NOW: No analysis is actually performed. Misalignment is calculated
1348 only for trivial cases. TODO. */
1350 static bool
1351 vect_compute_data_ref_alignment (struct data_reference *dr)
1353 tree stmt = DR_STMT (dr);
1354 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1355 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1356 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1357 tree ref = DR_REF (dr);
1358 tree vectype;
1359 tree base, base_addr;
1360 bool base_aligned;
1361 tree misalign;
1362 tree aligned_to, alignment;
1364 if (vect_print_dump_info (REPORT_DETAILS))
1365 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1367 /* Initialize misalignment to unknown. */
1368 SET_DR_MISALIGNMENT (dr, -1);
1370 misalign = DR_INIT (dr);
1371 aligned_to = DR_ALIGNED_TO (dr);
1372 base_addr = DR_BASE_ADDRESS (dr);
1373 vectype = STMT_VINFO_VECTYPE (stmt_info);
1375 /* In case the dataref is in an inner-loop of the loop that is being
1376 vectorized (LOOP), we use the base and misalignment information
1377 relative to the outer-loop (LOOP). This is ok only if the misalignment
1378 stays the same throughout the execution of the inner-loop, which is why
1379 we have to check that the stride of the dataref in the inner-loop evenly
1380 divides by the vector size. */
1381 if (nested_in_vect_loop_p (loop, stmt))
1383 tree step = DR_STEP (dr);
1384 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1386 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
1388 if (vect_print_dump_info (REPORT_ALIGNMENT))
1389 fprintf (vect_dump, "inner step divides the vector-size.");
1390 misalign = STMT_VINFO_DR_INIT (stmt_info);
1391 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1392 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1394 else
1396 if (vect_print_dump_info (REPORT_ALIGNMENT))
1397 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1398 misalign = NULL_TREE;
1402 base = build_fold_indirect_ref (base_addr);
1403 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1405 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1406 || !misalign)
1408 if (vect_print_dump_info (REPORT_ALIGNMENT))
1410 fprintf (vect_dump, "Unknown alignment for access: ");
1411 print_generic_expr (vect_dump, base, TDF_SLIM);
1413 return true;
1416 if ((DECL_P (base)
1417 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1418 alignment) >= 0)
1419 || (TREE_CODE (base_addr) == SSA_NAME
1420 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1421 TREE_TYPE (base_addr)))),
1422 alignment) >= 0))
1423 base_aligned = true;
1424 else
1425 base_aligned = false;
1427 if (!base_aligned)
1429 /* Do not change the alignment of global variables if
1430 flag_section_anchors is enabled. */
1431 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1432 || (TREE_STATIC (base) && flag_section_anchors))
1434 if (vect_print_dump_info (REPORT_DETAILS))
1436 fprintf (vect_dump, "can't force alignment of ref: ");
1437 print_generic_expr (vect_dump, ref, TDF_SLIM);
1439 return true;
1442 /* Force the alignment of the decl.
1443 NOTE: This is the only change to the code we make during
1444 the analysis phase, before deciding to vectorize the loop. */
1445 if (vect_print_dump_info (REPORT_DETAILS))
1446 fprintf (vect_dump, "force alignment");
1447 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1448 DECL_USER_ALIGN (base) = 1;
1451 /* At this point we assume that the base is aligned. */
1452 gcc_assert (base_aligned
1453 || (TREE_CODE (base) == VAR_DECL
1454 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1456 /* Modulo alignment. */
1457 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1459 if (!host_integerp (misalign, 1))
1461 /* Negative or overflowed misalignment value. */
1462 if (vect_print_dump_info (REPORT_DETAILS))
1463 fprintf (vect_dump, "unexpected misalign value");
1464 return false;
1467 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1469 if (vect_print_dump_info (REPORT_DETAILS))
1471 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1472 print_generic_expr (vect_dump, ref, TDF_SLIM);
1475 return true;
1479 /* Function vect_compute_data_refs_alignment
1481 Compute the misalignment of data references in the loop.
1482 Return FALSE if a data reference is found that cannot be vectorized. */
1484 static bool
1485 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1487 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1488 struct data_reference *dr;
1489 unsigned int i;
1491 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1492 if (!vect_compute_data_ref_alignment (dr))
1493 return false;
1495 return true;
1499 /* Function vect_update_misalignment_for_peel
1501 DR - the data reference whose misalignment is to be adjusted.
1502 DR_PEEL - the data reference whose misalignment is being made
1503 zero in the vector loop by the peel.
1504 NPEEL - the number of iterations in the peel loop if the misalignment
1505 of DR_PEEL is known at compile time. */
1507 static void
1508 vect_update_misalignment_for_peel (struct data_reference *dr,
1509 struct data_reference *dr_peel, int npeel)
1511 unsigned int i;
1512 VEC(dr_p,heap) *same_align_drs;
1513 struct data_reference *current_dr;
1514 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1515 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1516 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1517 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1519 /* For interleaved data accesses the step in the loop must be multiplied by
1520 the size of the interleaving group. */
1521 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1522 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1523 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1524 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1526 /* It can be assumed that the data refs with the same alignment as dr_peel
1527 are aligned in the vector loop. */
1528 same_align_drs
1529 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1530 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1532 if (current_dr != dr)
1533 continue;
1534 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1535 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1536 SET_DR_MISALIGNMENT (dr, 0);
1537 return;
1540 if (known_alignment_for_access_p (dr)
1541 && known_alignment_for_access_p (dr_peel))
1543 int misal = DR_MISALIGNMENT (dr);
1544 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1545 misal += npeel * dr_size;
1546 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
1547 SET_DR_MISALIGNMENT (dr, misal);
1548 return;
1551 if (vect_print_dump_info (REPORT_DETAILS))
1552 fprintf (vect_dump, "Setting misalignment to -1.");
1553 SET_DR_MISALIGNMENT (dr, -1);
1557 /* Function vect_verify_datarefs_alignment
1559 Return TRUE if all data references in the loop can be
1560 handled with respect to alignment. */
1562 static bool
1563 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1565 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1566 struct data_reference *dr;
1567 enum dr_alignment_support supportable_dr_alignment;
1568 unsigned int i;
1570 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1572 tree stmt = DR_STMT (dr);
1573 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1575 /* For interleaving, only the alignment of the first access matters. */
1576 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1577 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1578 continue;
1580 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1581 if (!supportable_dr_alignment)
1583 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1585 if (DR_IS_READ (dr))
1586 fprintf (vect_dump,
1587 "not vectorized: unsupported unaligned load.");
1588 else
1589 fprintf (vect_dump,
1590 "not vectorized: unsupported unaligned store.");
1592 return false;
1594 if (supportable_dr_alignment != dr_aligned
1595 && vect_print_dump_info (REPORT_ALIGNMENT))
1596 fprintf (vect_dump, "Vectorizing an unaligned access.");
1598 return true;
1602 /* Function vector_alignment_reachable_p
1604 Return true if vector alignment for DR is reachable by peeling
1605 a few loop iterations. Return false otherwise. */
1607 static bool
1608 vector_alignment_reachable_p (struct data_reference *dr)
1610 tree stmt = DR_STMT (dr);
1611 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1612 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1614 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1616 /* For interleaved access we peel only if number of iterations in
1617 the prolog loop ({VF - misalignment}), is a multiple of the
1618 number of the interleaved accesses. */
1619 int elem_size, mis_in_elements;
1620 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1622 /* FORNOW: handle only known alignment. */
1623 if (!known_alignment_for_access_p (dr))
1624 return false;
1626 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1627 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1629 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1630 return false;
1633 /* If misalignment is known at the compile time then allow peeling
1634 only if natural alignment is reachable through peeling. */
1635 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1637 HOST_WIDE_INT elmsize =
1638 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1639 if (vect_print_dump_info (REPORT_DETAILS))
1641 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1642 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1644 if (DR_MISALIGNMENT (dr) % elmsize)
1646 if (vect_print_dump_info (REPORT_DETAILS))
1647 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1648 return false;
1652 if (!known_alignment_for_access_p (dr))
1654 tree type = (TREE_TYPE (DR_REF (dr)));
1655 tree ba = DR_BASE_OBJECT (dr);
1656 bool is_packed = false;
1658 if (ba)
1659 is_packed = contains_packed_reference (ba);
1661 if (vect_print_dump_info (REPORT_DETAILS))
1662 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1663 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1664 return true;
1665 else
1666 return false;
1669 return true;
1672 /* Function vect_enhance_data_refs_alignment
1674 This pass will use loop versioning and loop peeling in order to enhance
1675 the alignment of data references in the loop.
1677 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1678 original loop is to be vectorized; Any other loops that are created by
1679 the transformations performed in this pass - are not supposed to be
1680 vectorized. This restriction will be relaxed.
1682 This pass will require a cost model to guide it whether to apply peeling
1683 or versioning or a combination of the two. For example, the scheme that
1684 intel uses when given a loop with several memory accesses, is as follows:
1685 choose one memory access ('p') which alignment you want to force by doing
1686 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1687 other accesses are not necessarily aligned, or (2) use loop versioning to
1688 generate one loop in which all accesses are aligned, and another loop in
1689 which only 'p' is necessarily aligned.
1691 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1692 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1693 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1695 Devising a cost model is the most critical aspect of this work. It will
1696 guide us on which access to peel for, whether to use loop versioning, how
1697 many versions to create, etc. The cost model will probably consist of
1698 generic considerations as well as target specific considerations (on
1699 powerpc for example, misaligned stores are more painful than misaligned
1700 loads).
1702 Here are the general steps involved in alignment enhancements:
1704 -- original loop, before alignment analysis:
1705 for (i=0; i<N; i++){
1706 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1707 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1710 -- After vect_compute_data_refs_alignment:
1711 for (i=0; i<N; i++){
1712 x = q[i]; # DR_MISALIGNMENT(q) = 3
1713 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1716 -- Possibility 1: we do loop versioning:
1717 if (p is aligned) {
1718 for (i=0; i<N; i++){ # loop 1A
1719 x = q[i]; # DR_MISALIGNMENT(q) = 3
1720 p[i] = y; # DR_MISALIGNMENT(p) = 0
1723 else {
1724 for (i=0; i<N; i++){ # loop 1B
1725 x = q[i]; # DR_MISALIGNMENT(q) = 3
1726 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1730 -- Possibility 2: we do loop peeling:
1731 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1732 x = q[i];
1733 p[i] = y;
1735 for (i = 3; i < N; i++){ # loop 2A
1736 x = q[i]; # DR_MISALIGNMENT(q) = 0
1737 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1740 -- Possibility 3: combination of loop peeling and versioning:
1741 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1742 x = q[i];
1743 p[i] = y;
1745 if (p is aligned) {
1746 for (i = 3; i<N; i++){ # loop 3A
1747 x = q[i]; # DR_MISALIGNMENT(q) = 0
1748 p[i] = y; # DR_MISALIGNMENT(p) = 0
1751 else {
1752 for (i = 3; i<N; i++){ # loop 3B
1753 x = q[i]; # DR_MISALIGNMENT(q) = 0
1754 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1758 These loops are later passed to loop_transform to be vectorized. The
1759 vectorizer will use the alignment information to guide the transformation
1760 (whether to generate regular loads/stores, or with special handling for
1761 misalignment). */
1763 static bool
1764 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1766 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1767 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1768 enum dr_alignment_support supportable_dr_alignment;
1769 struct data_reference *dr0 = NULL;
1770 struct data_reference *dr;
1771 unsigned int i;
1772 bool do_peeling = false;
1773 bool do_versioning = false;
1774 bool stat;
1775 tree stmt;
1776 stmt_vec_info stmt_info;
1777 int vect_versioning_for_alias_required;
1779 if (vect_print_dump_info (REPORT_DETAILS))
1780 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1782 /* While cost model enhancements are expected in the future, the high level
1783 view of the code at this time is as follows:
1785 A) If there is a misaligned write then see if peeling to align this write
1786 can make all data references satisfy vect_supportable_dr_alignment.
1787 If so, update data structures as needed and return true. Note that
1788 at this time vect_supportable_dr_alignment is known to return false
1789 for a misaligned write.
1791 B) If peeling wasn't possible and there is a data reference with an
1792 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1793 then see if loop versioning checks can be used to make all data
1794 references satisfy vect_supportable_dr_alignment. If so, update
1795 data structures as needed and return true.
1797 C) If neither peeling nor versioning were successful then return false if
1798 any data reference does not satisfy vect_supportable_dr_alignment.
1800 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1802 Note, Possibility 3 above (which is peeling and versioning together) is not
1803 being done at this time. */
1805 /* (1) Peeling to force alignment. */
1807 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1808 Considerations:
1809 + How many accesses will become aligned due to the peeling
1810 - How many accesses will become unaligned due to the peeling,
1811 and the cost of misaligned accesses.
1812 - The cost of peeling (the extra runtime checks, the increase
1813 in code size).
1815 The scheme we use FORNOW: peel to force the alignment of the first
1816 misaligned store in the loop.
1817 Rationale: misaligned stores are not yet supported.
1819 TODO: Use a cost model. */
1821 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1823 stmt = DR_STMT (dr);
1824 stmt_info = vinfo_for_stmt (stmt);
1826 /* For interleaving, only the alignment of the first access
1827 matters. */
1828 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1829 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1830 continue;
1832 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1834 do_peeling = vector_alignment_reachable_p (dr);
1835 if (do_peeling)
1836 dr0 = dr;
1837 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1838 fprintf (vect_dump, "vector alignment may not be reachable");
1839 break;
1843 vect_versioning_for_alias_required =
1844 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1846 /* Temporarily, if versioning for alias is required, we disable peeling
1847 until we support peeling and versioning. Often peeling for alignment
1848 will require peeling for loop-bound, which in turn requires that we
1849 know how to adjust the loop ivs after the loop. */
1850 if (vect_versioning_for_alias_required
1851 || !vect_can_advance_ivs_p (loop_vinfo)
1852 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1853 do_peeling = false;
1855 if (do_peeling)
1857 int mis;
1858 int npeel = 0;
1859 tree stmt = DR_STMT (dr0);
1860 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1861 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1862 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1864 if (known_alignment_for_access_p (dr0))
1866 /* Since it's known at compile time, compute the number of iterations
1867 in the peeled loop (the peeling factor) for use in updating
1868 DR_MISALIGNMENT values. The peeling factor is the vectorization
1869 factor minus the misalignment as an element count. */
1870 mis = DR_MISALIGNMENT (dr0);
1871 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1872 npeel = nelements - mis;
1874 /* For interleaved data access every iteration accesses all the
1875 members of the group, therefore we divide the number of iterations
1876 by the group size. */
1877 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1878 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1879 npeel /= DR_GROUP_SIZE (stmt_info);
1881 if (vect_print_dump_info (REPORT_DETAILS))
1882 fprintf (vect_dump, "Try peeling by %d", npeel);
1885 /* Ensure that all data refs can be vectorized after the peel. */
1886 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1888 int save_misalignment;
1890 if (dr == dr0)
1891 continue;
1893 stmt = DR_STMT (dr);
1894 stmt_info = vinfo_for_stmt (stmt);
1895 /* For interleaving, only the alignment of the first access
1896 matters. */
1897 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1898 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1899 continue;
1901 save_misalignment = DR_MISALIGNMENT (dr);
1902 vect_update_misalignment_for_peel (dr, dr0, npeel);
1903 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1904 SET_DR_MISALIGNMENT (dr, save_misalignment);
1906 if (!supportable_dr_alignment)
1908 do_peeling = false;
1909 break;
1913 if (do_peeling)
1915 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1916 If the misalignment of DR_i is identical to that of dr0 then set
1917 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1918 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1919 by the peeling factor times the element size of DR_i (MOD the
1920 vectorization factor times the size). Otherwise, the
1921 misalignment of DR_i must be set to unknown. */
1922 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1923 if (dr != dr0)
1924 vect_update_misalignment_for_peel (dr, dr0, npeel);
1926 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1927 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1928 SET_DR_MISALIGNMENT (dr0, 0);
1929 if (vect_print_dump_info (REPORT_ALIGNMENT))
1930 fprintf (vect_dump, "Alignment of access forced using peeling.");
1932 if (vect_print_dump_info (REPORT_DETAILS))
1933 fprintf (vect_dump, "Peeling for alignment will be applied.");
1935 stat = vect_verify_datarefs_alignment (loop_vinfo);
1936 gcc_assert (stat);
1937 return stat;
1942 /* (2) Versioning to force alignment. */
1944 /* Try versioning if:
1945 1) flag_tree_vect_loop_version is TRUE
1946 2) optimize_size is FALSE
1947 3) there is at least one unsupported misaligned data ref with an unknown
1948 misalignment, and
1949 4) all misaligned data refs with a known misalignment are supported, and
1950 5) the number of runtime alignment checks is within reason. */
1952 do_versioning =
1953 flag_tree_vect_loop_version
1954 && (!optimize_size)
1955 && (!loop->inner); /* FORNOW */
1957 if (do_versioning)
1959 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1961 stmt = DR_STMT (dr);
1962 stmt_info = vinfo_for_stmt (stmt);
1964 /* For interleaving, only the alignment of the first access
1965 matters. */
1966 if (aligned_access_p (dr)
1967 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1968 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1969 continue;
1971 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1973 if (!supportable_dr_alignment)
1975 tree stmt;
1976 int mask;
1977 tree vectype;
1979 if (known_alignment_for_access_p (dr)
1980 || VEC_length (tree,
1981 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1982 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1984 do_versioning = false;
1985 break;
1988 stmt = DR_STMT (dr);
1989 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1990 gcc_assert (vectype);
1992 /* The rightmost bits of an aligned address must be zeros.
1993 Construct the mask needed for this test. For example,
1994 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1995 mask must be 15 = 0xf. */
1996 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1998 /* FORNOW: use the same mask to test all potentially unaligned
1999 references in the loop. The vectorizer currently supports
2000 a single vector size, see the reference to
2001 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2002 vectorization factor is computed. */
2003 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2004 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2005 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2006 VEC_safe_push (tree, heap,
2007 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2008 DR_STMT (dr));
2012 /* Versioning requires at least one misaligned data reference. */
2013 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2014 do_versioning = false;
2015 else if (!do_versioning)
2016 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2019 if (do_versioning)
2021 VEC(tree,heap) *may_misalign_stmts
2022 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2023 tree stmt;
2025 /* It can now be assumed that the data references in the statements
2026 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2027 of the loop being vectorized. */
2028 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
2030 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2031 dr = STMT_VINFO_DATA_REF (stmt_info);
2032 SET_DR_MISALIGNMENT (dr, 0);
2033 if (vect_print_dump_info (REPORT_ALIGNMENT))
2034 fprintf (vect_dump, "Alignment of access forced using versioning.");
2037 if (vect_print_dump_info (REPORT_DETAILS))
2038 fprintf (vect_dump, "Versioning for alignment will be applied.");
2040 /* Peeling and versioning can't be done together at this time. */
2041 gcc_assert (! (do_peeling && do_versioning));
2043 stat = vect_verify_datarefs_alignment (loop_vinfo);
2044 gcc_assert (stat);
2045 return stat;
2048 /* This point is reached if neither peeling nor versioning is being done. */
2049 gcc_assert (! (do_peeling || do_versioning));
2051 stat = vect_verify_datarefs_alignment (loop_vinfo);
2052 return stat;
2056 /* Function vect_analyze_data_refs_alignment
2058 Analyze the alignment of the data-references in the loop.
2059 Return FALSE if a data reference is found that cannot be vectorized. */
2061 static bool
2062 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2064 if (vect_print_dump_info (REPORT_DETAILS))
2065 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2067 if (!vect_compute_data_refs_alignment (loop_vinfo))
2069 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2070 fprintf (vect_dump,
2071 "not vectorized: can't calculate alignment for data ref.");
2072 return false;
2075 return true;
2079 /* Analyze groups of strided accesses: check that DR belongs to a group of
2080 strided accesses of legal size, step, etc. Detect gaps, single element
2081 interleaving, and other special cases. Set strided access info.
2082 Collect groups of strided stores for further use in SLP analysis. */
2084 static bool
2085 vect_analyze_group_access (struct data_reference *dr)
2087 tree step = DR_STEP (dr);
2088 tree scalar_type = TREE_TYPE (DR_REF (dr));
2089 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2090 tree stmt = DR_STMT (dr);
2091 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2092 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2093 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2094 HOST_WIDE_INT stride;
2095 bool slp_impossible = false;
2097 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2098 interleaving group (including gaps). */
2099 stride = dr_step / type_size;
2101 /* Not consecutive access is possible only if it is a part of interleaving. */
2102 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2104 /* Check if it this DR is a part of interleaving, and is a single
2105 element of the group that is accessed in the loop. */
2107 /* Gaps are supported only for loads. STEP must be a multiple of the type
2108 size. The size of the group must be a power of 2. */
2109 if (DR_IS_READ (dr)
2110 && (dr_step % type_size) == 0
2111 && stride > 0
2112 && exact_log2 (stride) != -1)
2114 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2115 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2116 if (vect_print_dump_info (REPORT_DR_DETAILS))
2118 fprintf (vect_dump, "Detected single element interleaving %d ",
2119 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2120 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2121 fprintf (vect_dump, " step ");
2122 print_generic_expr (vect_dump, step, TDF_SLIM);
2124 return true;
2126 if (vect_print_dump_info (REPORT_DETAILS))
2127 fprintf (vect_dump, "not consecutive access");
2128 return false;
2131 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2133 /* First stmt in the interleaving chain. Check the chain. */
2134 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2135 struct data_reference *data_ref = dr;
2136 unsigned int count = 1;
2137 tree next_step;
2138 tree prev_init = DR_INIT (data_ref);
2139 tree prev = stmt;
2140 HOST_WIDE_INT diff, count_in_bytes;
2142 while (next)
2144 /* Skip same data-refs. In case that two or more stmts share data-ref
2145 (supported only for loads), we vectorize only the first stmt, and
2146 the rest get their vectorized loads from the first one. */
2147 if (!tree_int_cst_compare (DR_INIT (data_ref),
2148 DR_INIT (STMT_VINFO_DATA_REF (
2149 vinfo_for_stmt (next)))))
2151 if (!DR_IS_READ (data_ref))
2153 if (vect_print_dump_info (REPORT_DETAILS))
2154 fprintf (vect_dump, "Two store stmts share the same dr.");
2155 return false;
2158 /* Check that there is no load-store dependencies for this loads
2159 to prevent a case of load-store-load to the same location. */
2160 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2161 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2163 if (vect_print_dump_info (REPORT_DETAILS))
2164 fprintf (vect_dump,
2165 "READ_WRITE dependence in interleaving.");
2166 return false;
2169 /* For load use the same data-ref load. */
2170 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2172 prev = next;
2173 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2174 continue;
2176 prev = next;
2178 /* Check that all the accesses have the same STEP. */
2179 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2180 if (tree_int_cst_compare (step, next_step))
2182 if (vect_print_dump_info (REPORT_DETAILS))
2183 fprintf (vect_dump, "not consecutive access in interleaving");
2184 return false;
2187 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2188 /* Check that the distance between two accesses is equal to the type
2189 size. Otherwise, we have gaps. */
2190 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2191 - TREE_INT_CST_LOW (prev_init)) / type_size;
2192 if (diff != 1)
2194 /* FORNOW: SLP of accesses with gaps is not supported. */
2195 slp_impossible = true;
2196 if (!DR_IS_READ (data_ref))
2198 if (vect_print_dump_info (REPORT_DETAILS))
2199 fprintf (vect_dump, "interleaved store with gaps");
2200 return false;
2204 /* Store the gap from the previous member of the group. If there is no
2205 gap in the access, DR_GROUP_GAP is always 1. */
2206 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2208 prev_init = DR_INIT (data_ref);
2209 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2210 /* Count the number of data-refs in the chain. */
2211 count++;
2214 /* COUNT is the number of accesses found, we multiply it by the size of
2215 the type to get COUNT_IN_BYTES. */
2216 count_in_bytes = type_size * count;
2218 /* Check that the size of the interleaving is not greater than STEP. */
2219 if (dr_step < count_in_bytes)
2221 if (vect_print_dump_info (REPORT_DETAILS))
2223 fprintf (vect_dump, "interleaving size is greater than step for ");
2224 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2226 return false;
2229 /* Check that the size of the interleaving is equal to STEP for stores,
2230 i.e., that there are no gaps. */
2231 if (dr_step != count_in_bytes)
2233 if (DR_IS_READ (dr))
2235 slp_impossible = true;
2236 /* There is a gap after the last load in the group. This gap is a
2237 difference between the stride and the number of elements. When
2238 there is no gap, this difference should be 0. */
2239 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2241 else
2243 if (vect_print_dump_info (REPORT_DETAILS))
2244 fprintf (vect_dump, "interleaved store with gaps");
2245 return false;
2249 /* Check that STEP is a multiple of type size. */
2250 if ((dr_step % type_size) != 0)
2252 if (vect_print_dump_info (REPORT_DETAILS))
2254 fprintf (vect_dump, "step is not a multiple of type size: step ");
2255 print_generic_expr (vect_dump, step, TDF_SLIM);
2256 fprintf (vect_dump, " size ");
2257 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2258 TDF_SLIM);
2260 return false;
2263 /* FORNOW: we handle only interleaving that is a power of 2.
2264 We don't fail here if it may be still possible to vectorize the
2265 group using SLP. If not, the size of the group will be checked in
2266 vect_analyze_operations, and the vectorization will fail. */
2267 if (exact_log2 (stride) == -1)
2269 if (vect_print_dump_info (REPORT_DETAILS))
2270 fprintf (vect_dump, "interleaving is not a power of 2");
2272 if (slp_impossible)
2273 return false;
2275 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2276 if (vect_print_dump_info (REPORT_DETAILS))
2277 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2279 /* SLP: create an SLP data structure for every interleaving group of
2280 stores for further analysis in vect_analyse_slp. */
2281 if (!DR_IS_READ (dr) && !slp_impossible)
2282 VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2285 return true;
2289 /* Analyze the access pattern of the data-reference DR.
2290 In case of non-consecutive accesses call vect_analyze_group_access() to
2291 analyze groups of strided accesses. */
2293 static bool
2294 vect_analyze_data_ref_access (struct data_reference *dr)
2296 tree step = DR_STEP (dr);
2297 tree scalar_type = TREE_TYPE (DR_REF (dr));
2298 tree stmt = DR_STMT (dr);
2299 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2300 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2301 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2302 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2304 if (!step)
2306 if (vect_print_dump_info (REPORT_DETAILS))
2307 fprintf (vect_dump, "bad data-ref access");
2308 return false;
2311 /* Don't allow invariant accesses. */
2312 if (dr_step == 0)
2313 return false;
2315 if (nested_in_vect_loop_p (loop, stmt))
2317 /* Interleaved accesses are not yet supported within outer-loop
2318 vectorization for references in the inner-loop. */
2319 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2321 /* For the rest of the analysis we use the outer-loop step. */
2322 step = STMT_VINFO_DR_STEP (stmt_info);
2323 dr_step = TREE_INT_CST_LOW (step);
2325 if (dr_step == 0)
2327 if (vect_print_dump_info (REPORT_ALIGNMENT))
2328 fprintf (vect_dump, "zero step in outer loop.");
2329 if (DR_IS_READ (dr))
2330 return true;
2331 else
2332 return false;
2336 /* Consecutive? */
2337 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2339 /* Mark that it is not interleaving. */
2340 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2341 return true;
2344 if (nested_in_vect_loop_p (loop, stmt))
2346 if (vect_print_dump_info (REPORT_ALIGNMENT))
2347 fprintf (vect_dump, "strided access in outer loop.");
2348 return false;
2351 /* Not consecutive access - check if it's a part of interleaving group. */
2352 return vect_analyze_group_access (dr);
2356 /* Function vect_analyze_data_ref_accesses.
2358 Analyze the access pattern of all the data references in the loop.
2360 FORNOW: the only access pattern that is considered vectorizable is a
2361 simple step 1 (consecutive) access.
2363 FORNOW: handle only arrays and pointer accesses. */
2365 static bool
2366 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2368 unsigned int i;
2369 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2370 struct data_reference *dr;
2372 if (vect_print_dump_info (REPORT_DETAILS))
2373 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2375 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2376 if (!vect_analyze_data_ref_access (dr))
2378 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2379 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2380 return false;
2383 return true;
2386 /* Function vect_prune_runtime_alias_test_list.
2388 Prune a list of ddrs to be tested at run-time by versioning for alias.
2389 Return FALSE if resulting list of ddrs is longer then allowed by
2390 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2392 static bool
2393 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2395 VEC (ddr_p, heap) * ddrs =
2396 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2397 unsigned i, j;
2399 if (vect_print_dump_info (REPORT_DETAILS))
2400 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2402 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2404 bool found;
2405 ddr_p ddr_i;
2407 ddr_i = VEC_index (ddr_p, ddrs, i);
2408 found = false;
2410 for (j = 0; j < i; j++)
2412 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2414 if (vect_vfa_range_equal (ddr_i, ddr_j))
2416 if (vect_print_dump_info (REPORT_DR_DETAILS))
2418 fprintf (vect_dump, "found equal ranges ");
2419 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2420 fprintf (vect_dump, ", ");
2421 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2422 fprintf (vect_dump, " and ");
2423 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2424 fprintf (vect_dump, ", ");
2425 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2427 found = true;
2428 break;
2432 if (found)
2434 VEC_ordered_remove (ddr_p, ddrs, i);
2435 continue;
2437 i++;
2440 if (VEC_length (ddr_p, ddrs) >
2441 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2443 if (vect_print_dump_info (REPORT_DR_DETAILS))
2445 fprintf (vect_dump,
2446 "disable versioning for alias - max number of generated "
2447 "checks exceeded.");
2450 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2452 return false;
2455 return true;
2458 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2460 void
2461 vect_free_slp_tree (slp_tree node)
2463 if (!node)
2464 return;
2466 if (SLP_TREE_LEFT (node))
2467 vect_free_slp_tree (SLP_TREE_LEFT (node));
2469 if (SLP_TREE_RIGHT (node))
2470 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2472 VEC_free (tree, heap, SLP_TREE_SCALAR_STMTS (node));
2474 if (SLP_TREE_VEC_STMTS (node))
2475 VEC_free (tree, heap, SLP_TREE_VEC_STMTS (node));
2477 free (node);
2481 /* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are
2482 of a legal type and that they match the defs of the first stmt of the SLP
2483 group (stored in FIRST_STMT_...). */
2485 static bool
2486 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2487 tree rhs, VEC (tree, heap) **def_stmts0,
2488 VEC (tree, heap) **def_stmts1,
2489 enum vect_def_type *first_stmt_dt0,
2490 enum vect_def_type *first_stmt_dt1,
2491 tree *first_stmt_def0_type,
2492 tree *first_stmt_def1_type,
2493 tree *first_stmt_const_oprnd,
2494 int ncopies_for_cost)
2496 tree oprnd;
2497 enum operation_type op_type = TREE_OPERAND_LENGTH (rhs);
2498 unsigned int i, number_of_oprnds = op_type;
2499 tree def, def_stmt;
2500 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2501 stmt_vec_info stmt_info =
2502 vinfo_for_stmt (VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2504 /* Store. */
2505 if (!op_type)
2506 number_of_oprnds = 1;
2507 else
2508 gcc_assert (op_type == unary_op || op_type == binary_op);
2510 for (i = 0; i < number_of_oprnds; i++)
2512 if (op_type)
2513 oprnd = TREE_OPERAND (rhs, i);
2514 else
2515 oprnd = rhs;
2517 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2518 || (!def_stmt && dt[i] != vect_constant_def))
2520 if (vect_print_dump_info (REPORT_SLP))
2522 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2523 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2526 return false;
2529 if (!*first_stmt_dt0)
2531 /* op0 of the first stmt of the group - store its info. */
2532 *first_stmt_dt0 = dt[i];
2533 if (def)
2534 *first_stmt_def0_type = TREE_TYPE (def);
2535 else
2536 *first_stmt_const_oprnd = oprnd;
2538 /* Analyze costs (for the first stmt of the group only). */
2539 if (op_type)
2540 /* Not memory operation (we don't call this functions for loads). */
2541 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2542 else
2543 /* Store. */
2544 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2547 else
2549 if (!*first_stmt_dt1 && i == 1)
2551 /* op1 of the first stmt of the group - store its info. */
2552 *first_stmt_dt1 = dt[i];
2553 if (def)
2554 *first_stmt_def1_type = TREE_TYPE (def);
2555 else
2557 /* We assume that the stmt contains only one constant
2558 operand. We fail otherwise, to be on the safe side. */
2559 if (*first_stmt_const_oprnd)
2561 if (vect_print_dump_info (REPORT_SLP))
2562 fprintf (vect_dump, "Build SLP failed: two constant "
2563 "oprnds in stmt");
2564 return false;
2566 *first_stmt_const_oprnd = oprnd;
2569 else
2571 /* Not first stmt of the group, check that the def-stmt/s match
2572 the def-stmt/s of the first stmt. */
2573 if ((i == 0
2574 && (*first_stmt_dt0 != dt[i]
2575 || (*first_stmt_def0_type && def
2576 && *first_stmt_def0_type != TREE_TYPE (def))))
2577 || (i == 1
2578 && (*first_stmt_dt1 != dt[i]
2579 || (*first_stmt_def1_type && def
2580 && *first_stmt_def1_type != TREE_TYPE (def))))
2581 || (!def
2582 && TREE_TYPE (*first_stmt_const_oprnd)
2583 != TREE_TYPE (oprnd)))
2585 if (vect_print_dump_info (REPORT_SLP))
2586 fprintf (vect_dump, "Build SLP failed: different types ");
2588 return false;
2593 /* Check the types of the definitions. */
2594 switch (dt[i])
2596 case vect_constant_def:
2597 case vect_invariant_def:
2598 break;
2600 case vect_loop_def:
2601 if (i == 0)
2602 VEC_safe_push (tree, heap, *def_stmts0, def_stmt);
2603 else
2604 VEC_safe_push (tree, heap, *def_stmts1, def_stmt);
2605 break;
2607 default:
2608 /* FORNOW: Not supported. */
2609 if (vect_print_dump_info (REPORT_SLP))
2611 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2612 print_generic_expr (vect_dump, def, TDF_SLIM);
2615 return false;
2619 return true;
2623 /* Recursively build an SLP tree starting from NODE.
2624 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2625 permutation or are of unsupported types of operation. Otherwise, return
2626 TRUE.
2627 SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2628 in the case of multiple types for now. */
2630 static bool
2631 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2632 unsigned int group_size, bool *slp_impossible,
2633 int *inside_cost, int *outside_cost,
2634 int ncopies_for_cost)
2636 VEC (tree, heap) *def_stmts0 = VEC_alloc (tree, heap, group_size);
2637 VEC (tree, heap) *def_stmts1 = VEC_alloc (tree, heap, group_size);
2638 unsigned int i;
2639 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2640 tree stmt = VEC_index (tree, stmts, 0);
2641 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2642 enum tree_code first_stmt_code = 0;
2643 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2644 tree lhs, rhs, prev_stmt = NULL_TREE;
2645 bool stop_recursion = false, need_same_oprnds = false;
2646 tree vectype, scalar_type, first_op1 = NULL_TREE;
2647 unsigned int vectorization_factor = 0, ncopies;
2648 optab optab;
2649 int icode;
2650 enum machine_mode optab_op2_mode;
2651 enum machine_mode vec_mode;
2652 tree first_stmt_const_oprnd = NULL_TREE;
2653 struct data_reference *first_dr;
2655 /* For every stmt in NODE find its def stmt/s. */
2656 for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
2658 if (vect_print_dump_info (REPORT_SLP))
2660 fprintf (vect_dump, "Build SLP for ");
2661 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2664 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2666 if (vect_print_dump_info (REPORT_SLP))
2668 fprintf (vect_dump, "Build SLP failed: not MODIFY_STMT ");
2669 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2672 return false;
2675 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2676 vectype = get_vectype_for_scalar_type (scalar_type);
2677 if (!vectype)
2679 if (vect_print_dump_info (REPORT_SLP))
2681 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
2682 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2684 return false;
2687 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2688 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2689 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2690 if (ncopies > 1)
2692 /* FORNOW. */
2693 if (vect_print_dump_info (REPORT_SLP))
2694 fprintf (vect_dump, "SLP failed - multiple types ");
2696 *slp_impossible = true;
2697 return false;
2700 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
2701 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2703 /* Check the operation. */
2704 if (i == 0)
2706 first_stmt_code = TREE_CODE (rhs);
2708 /* Shift arguments should be equal in all the packed stmts for a
2709 vector shift with scalar shift operand. */
2710 if (TREE_CODE (rhs) == LSHIFT_EXPR || TREE_CODE (rhs) == RSHIFT_EXPR
2711 || TREE_CODE (rhs) == LROTATE_EXPR
2712 || TREE_CODE (rhs) == RROTATE_EXPR)
2714 vec_mode = TYPE_MODE (vectype);
2716 /* First see if we have a vector/vector shift. */
2717 optab = optab_for_tree_code (TREE_CODE (rhs), vectype,
2718 optab_vector);
2720 if (!optab
2721 || (optab->handlers[(int) vec_mode].insn_code
2722 == CODE_FOR_nothing))
2724 /* No vector/vector shift, try for a vector/scalar shift. */
2725 optab = optab_for_tree_code (TREE_CODE (rhs), vectype,
2726 optab_scalar);
2728 if (!optab)
2730 if (vect_print_dump_info (REPORT_SLP))
2731 fprintf (vect_dump, "Build SLP failed: no optab.");
2732 return false;
2734 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2735 if (icode == CODE_FOR_nothing)
2737 if (vect_print_dump_info (REPORT_SLP))
2738 fprintf (vect_dump,
2739 "Build SLP failed: op not supported by target.");
2740 return false;
2742 optab_op2_mode = insn_data[icode].operand[2].mode;
2743 if (!VECTOR_MODE_P (optab_op2_mode))
2745 need_same_oprnds = true;
2746 first_op1 = TREE_OPERAND (rhs, 1);
2751 else
2753 if (first_stmt_code != TREE_CODE (rhs))
2755 if (vect_print_dump_info (REPORT_SLP))
2757 fprintf (vect_dump,
2758 "Build SLP failed: different operation in stmt ");
2759 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2762 return false;
2765 if (need_same_oprnds
2766 && !operand_equal_p (first_op1, TREE_OPERAND (rhs, 1), 0))
2768 if (vect_print_dump_info (REPORT_SLP))
2770 fprintf (vect_dump,
2771 "Build SLP failed: different shift arguments in ");
2772 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2775 return false;
2779 /* Strided store or load. */
2780 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2782 if (REFERENCE_CLASS_P (lhs))
2784 /* Store. */
2785 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs,
2786 &def_stmts0, &def_stmts1,
2787 &first_stmt_dt0,
2788 &first_stmt_dt1,
2789 &first_stmt_def0_type,
2790 &first_stmt_def1_type,
2791 &first_stmt_const_oprnd,
2792 ncopies_for_cost))
2793 return false;
2795 else
2797 /* Load. */
2798 if (i == 0)
2800 /* First stmt of the SLP group should be the first load of
2801 the interleaving loop if data permutation is not allowed.
2802 Check that there is no gap between the loads. */
2803 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
2804 || DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
2806 /* FORNOW: data permutations and gaps in loads are not
2807 supported. */
2808 if (vect_print_dump_info (REPORT_SLP))
2810 fprintf (vect_dump, "Build SLP failed: strided "
2811 " loads need permutation or have gaps ");
2812 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2815 return false;
2818 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2819 if (vect_supportable_dr_alignment (first_dr)
2820 == dr_unaligned_unsupported)
2822 if (vect_print_dump_info (REPORT_SLP))
2824 fprintf (vect_dump, "Build SLP failed: unsupported "
2825 " unaligned load ");
2826 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2829 return false;
2832 /* Analyze costs (for the first stmt in the group). */
2833 vect_model_load_cost (vinfo_for_stmt (stmt),
2834 ncopies_for_cost, *node);
2836 else
2838 /* Check that we have consecutive loads from interleaving
2839 chain and that there is no gap between the loads. */
2840 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt
2841 || DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1)
2843 /* FORNOW: data permutations and gaps in loads are not
2844 supported. */
2845 if (vect_print_dump_info (REPORT_SLP))
2847 fprintf (vect_dump, "Build SLP failed: strided "
2848 " loads need permutation or have gaps ");
2849 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2851 return false;
2855 prev_stmt = stmt;
2857 /* We stop the tree when we reach a group of loads. */
2858 stop_recursion = true;
2859 continue;
2861 } /* Strided access. */
2862 else
2864 if (REFERENCE_CLASS_P (rhs))
2866 /* Not strided load. */
2867 if (vect_print_dump_info (REPORT_SLP))
2869 fprintf (vect_dump, "Build SLP failed: not strided load ");
2870 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2873 /* FORNOW: Not strided loads are not supported. */
2874 return false;
2877 /* Not memory operation. */
2878 if (!BINARY_CLASS_P (rhs) && !UNARY_CLASS_P (rhs))
2880 if (vect_print_dump_info (REPORT_SLP))
2882 fprintf (vect_dump, "Build SLP failed: operation");
2883 fprintf (vect_dump, " unsupported ");
2884 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2887 return false;
2890 /* Find the def-stmts. */
2891 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, &def_stmts0,
2892 &def_stmts1, &first_stmt_dt0,
2893 &first_stmt_dt1,
2894 &first_stmt_def0_type,
2895 &first_stmt_def1_type,
2896 &first_stmt_const_oprnd,
2897 ncopies_for_cost))
2898 return false;
2902 /* Add the costs of the node to the overall instance costs. */
2903 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
2904 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
2906 /* Strided loads were reached - stop the recursion. */
2907 if (stop_recursion)
2908 return true;
2910 /* Create SLP_TREE nodes for the definition node/s. */
2911 if (first_stmt_dt0 == vect_loop_def)
2913 slp_tree left_node = XNEW (struct _slp_tree);
2914 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
2915 SLP_TREE_VEC_STMTS (left_node) = NULL;
2916 SLP_TREE_LEFT (left_node) = NULL;
2917 SLP_TREE_RIGHT (left_node) = NULL;
2918 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
2919 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
2920 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
2921 slp_impossible, inside_cost, outside_cost,
2922 ncopies_for_cost))
2923 return false;
2925 SLP_TREE_LEFT (*node) = left_node;
2928 if (first_stmt_dt1 == vect_loop_def)
2930 slp_tree right_node = XNEW (struct _slp_tree);
2931 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
2932 SLP_TREE_VEC_STMTS (right_node) = NULL;
2933 SLP_TREE_LEFT (right_node) = NULL;
2934 SLP_TREE_RIGHT (right_node) = NULL;
2935 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
2936 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
2937 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
2938 slp_impossible, inside_cost, outside_cost,
2939 ncopies_for_cost))
2940 return false;
2942 SLP_TREE_RIGHT (*node) = right_node;
2945 return true;
2949 static void
2950 vect_print_slp_tree (slp_tree node)
2952 int i;
2953 tree stmt;
2955 if (!node)
2956 return;
2958 fprintf (vect_dump, "node ");
2959 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2961 fprintf (vect_dump, "\n\tstmt %d ", i);
2962 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2964 fprintf (vect_dump, "\n");
2966 vect_print_slp_tree (SLP_TREE_LEFT (node));
2967 vect_print_slp_tree (SLP_TREE_RIGHT (node));
2971 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2972 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2973 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2974 stmts in NODE are to be marked. */
2976 static void
2977 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
2979 int i;
2980 tree stmt;
2982 if (!node)
2983 return;
2985 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2986 if (j < 0 || i == j)
2987 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
2989 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
2990 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
2994 /* Analyze an SLP instance starting from a group of strided stores. Call
2995 vect_build_slp_tree to build a tree of packed stmts if possible.
2996 Return FALSE if it's impossible to SLP any stmt in the loop. */
2998 static bool
2999 vect_analyze_slp_instance (loop_vec_info loop_vinfo, tree stmt)
3001 slp_instance new_instance;
3002 slp_tree node = XNEW (struct _slp_tree);
3003 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
3004 unsigned int unrolling_factor = 1, nunits;
3005 tree vectype, scalar_type, next;
3006 unsigned int vectorization_factor = 0, ncopies;
3007 bool slp_impossible = false;
3008 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
3010 /* FORNOW: multiple types are not supported. */
3011 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
3012 vectype = get_vectype_for_scalar_type (scalar_type);
3013 if (!vectype)
3015 if (vect_print_dump_info (REPORT_SLP))
3017 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
3018 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3020 return false;
3023 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3024 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3025 ncopies = vectorization_factor / nunits;
3026 if (ncopies > 1)
3028 if (vect_print_dump_info (REPORT_SLP))
3029 fprintf (vect_dump, "SLP failed - multiple types ");
3031 return false;
3034 /* Create a node (a root of the SLP tree) for the packed strided stores. */
3035 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (tree, heap, group_size);
3036 next = stmt;
3037 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
3038 while (next)
3040 VEC_safe_push (tree, heap, SLP_TREE_SCALAR_STMTS (node), next);
3041 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
3044 SLP_TREE_VEC_STMTS (node) = NULL;
3045 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
3046 SLP_TREE_LEFT (node) = NULL;
3047 SLP_TREE_RIGHT (node) = NULL;
3048 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
3049 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
3051 /* Calculate the unrolling factor. */
3052 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
3054 /* Calculate the number of vector stmts to create based on the unrolling
3055 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
3056 GROUP_SIZE / NUNITS otherwise. */
3057 ncopies_for_cost = unrolling_factor * group_size / nunits;
3059 /* Build the tree for the SLP instance. */
3060 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
3061 &inside_cost, &outside_cost, ncopies_for_cost))
3063 /* Create a new SLP instance. */
3064 new_instance = XNEW (struct _slp_instance);
3065 SLP_INSTANCE_TREE (new_instance) = node;
3066 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
3067 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
3068 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
3069 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
3070 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
3071 new_instance);
3072 if (vect_print_dump_info (REPORT_SLP))
3073 vect_print_slp_tree (node);
3075 return true;
3078 /* Failed to SLP. */
3079 /* Free the allocated memory. */
3080 vect_free_slp_tree (node);
3082 if (slp_impossible)
3083 return false;
3085 /* SLP failed for this instance, but it is still possible to SLP other stmts
3086 in the loop. */
3087 return true;
3091 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3092 trees of packed scalar stmts if SLP is possible. */
3094 static bool
3095 vect_analyze_slp (loop_vec_info loop_vinfo)
3097 unsigned int i;
3098 VEC (tree, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
3099 tree store;
3101 if (vect_print_dump_info (REPORT_SLP))
3102 fprintf (vect_dump, "=== vect_analyze_slp ===");
3104 for (i = 0; VEC_iterate (tree, strided_stores, i, store); i++)
3105 if (!vect_analyze_slp_instance (loop_vinfo, store))
3107 /* SLP failed. No instance can be SLPed in the loop. */
3108 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3109 fprintf (vect_dump, "SLP failed.");
3111 return false;
3114 return true;
3118 /* For each possible SLP instance decide whether to SLP it and calculate overall
3119 unrolling factor needed to SLP the loop. */
3121 static void
3122 vect_make_slp_decision (loop_vec_info loop_vinfo)
3124 unsigned int i, unrolling_factor = 1;
3125 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3126 slp_instance instance;
3127 int decided_to_slp = 0;
3129 if (vect_print_dump_info (REPORT_SLP))
3130 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3132 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3134 /* FORNOW: SLP if you can. */
3135 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3136 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3138 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3139 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3140 loop-based vectorization. Such stmts will be marked as HYBRID. */
3141 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3142 decided_to_slp++;
3145 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3147 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3148 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3149 decided_to_slp, unrolling_factor);
3153 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3154 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3156 static void
3157 vect_detect_hybrid_slp_stmts (slp_tree node)
3159 int i;
3160 tree stmt;
3161 imm_use_iterator imm_iter;
3162 tree use_stmt;
3164 if (!node)
3165 return;
3167 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3168 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3169 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
3170 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
3171 if (vinfo_for_stmt (use_stmt)
3172 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
3173 vect_mark_slp_stmts (node, hybrid, i);
3175 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3176 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3180 /* Find stmts that must be both vectorized and SLPed. */
3182 static void
3183 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3185 unsigned int i;
3186 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3187 slp_instance instance;
3189 if (vect_print_dump_info (REPORT_SLP))
3190 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3192 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3193 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3197 /* Function vect_analyze_data_refs.
3199 Find all the data references in the loop.
3201 The general structure of the analysis of data refs in the vectorizer is as
3202 follows:
3203 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3204 find and analyze all data-refs in the loop and their dependences.
3205 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3206 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3207 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3211 static bool
3212 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3214 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3215 unsigned int i;
3216 VEC (data_reference_p, heap) *datarefs;
3217 struct data_reference *dr;
3218 tree scalar_type;
3220 if (vect_print_dump_info (REPORT_DETAILS))
3221 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3223 compute_data_dependences_for_loop (loop, true,
3224 &LOOP_VINFO_DATAREFS (loop_vinfo),
3225 &LOOP_VINFO_DDRS (loop_vinfo));
3227 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3228 from stmt_vec_info struct to DR and vectype. */
3229 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3231 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3233 tree stmt;
3234 stmt_vec_info stmt_info;
3235 basic_block bb;
3236 tree base, offset, init;
3238 if (!dr || !DR_REF (dr))
3240 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3241 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3242 return false;
3245 stmt = DR_STMT (dr);
3246 stmt_info = vinfo_for_stmt (stmt);
3248 /* Check that analysis of the data-ref succeeded. */
3249 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3250 || !DR_STEP (dr))
3252 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3254 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3255 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3257 return false;
3260 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3262 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3263 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3264 "constant");
3265 return false;
3268 if (!DR_SYMBOL_TAG (dr))
3270 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3272 fprintf (vect_dump, "not vectorized: no memory tag for ");
3273 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3275 return false;
3278 base = unshare_expr (DR_BASE_ADDRESS (dr));
3279 offset = unshare_expr (DR_OFFSET (dr));
3280 init = unshare_expr (DR_INIT (dr));
3282 /* Update DR field in stmt_vec_info struct. */
3283 bb = bb_for_stmt (stmt);
3285 /* If the dataref is in an inner-loop of the loop that is considered for
3286 for vectorization, we also want to analyze the access relative to
3287 the outer-loop (DR contains information only relative to the
3288 inner-most enclosing loop). We do that by building a reference to the
3289 first location accessed by the inner-loop, and analyze it relative to
3290 the outer-loop. */
3291 if (nested_in_vect_loop_p (loop, stmt))
3293 tree outer_step, outer_base, outer_init;
3294 HOST_WIDE_INT pbitsize, pbitpos;
3295 tree poffset;
3296 enum machine_mode pmode;
3297 int punsignedp, pvolatilep;
3298 affine_iv base_iv, offset_iv;
3299 tree dinit;
3301 /* Build a reference to the first location accessed by the
3302 inner-loop: *(BASE+INIT). (The first location is actually
3303 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3304 tree inner_base = build_fold_indirect_ref
3305 (fold_build2 (POINTER_PLUS_EXPR,
3306 TREE_TYPE (base), base,
3307 fold_convert (sizetype, init)));
3309 if (vect_print_dump_info (REPORT_DETAILS))
3311 fprintf (dump_file, "analyze in outer-loop: ");
3312 print_generic_expr (dump_file, inner_base, TDF_SLIM);
3315 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3316 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3317 gcc_assert (outer_base != NULL_TREE);
3319 if (pbitpos % BITS_PER_UNIT != 0)
3321 if (vect_print_dump_info (REPORT_DETAILS))
3322 fprintf (dump_file, "failed: bit offset alignment.\n");
3323 return false;
3326 outer_base = build_fold_addr_expr (outer_base);
3327 if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
3329 if (vect_print_dump_info (REPORT_DETAILS))
3330 fprintf (dump_file, "failed: evolution of base is not affine.\n");
3331 return false;
3334 if (offset)
3336 if (poffset)
3337 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3338 else
3339 poffset = offset;
3342 if (!poffset)
3344 offset_iv.base = ssize_int (0);
3345 offset_iv.step = ssize_int (0);
3347 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
3349 if (vect_print_dump_info (REPORT_DETAILS))
3350 fprintf (dump_file, "evolution of offset is not affine.\n");
3351 return false;
3354 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3355 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3356 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3357 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3358 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3360 outer_step = size_binop (PLUS_EXPR,
3361 fold_convert (ssizetype, base_iv.step),
3362 fold_convert (ssizetype, offset_iv.step));
3364 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3365 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3366 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3367 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3368 STMT_VINFO_DR_OFFSET (stmt_info) =
3369 fold_convert (ssizetype, offset_iv.base);
3370 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3371 size_int (highest_pow2_factor (offset_iv.base));
3373 if (dump_file && (dump_flags & TDF_DETAILS))
3375 fprintf (dump_file, "\touter base_address: ");
3376 print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3377 fprintf (dump_file, "\n\touter offset from base address: ");
3378 print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3379 fprintf (dump_file, "\n\touter constant offset from base address: ");
3380 print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3381 fprintf (dump_file, "\n\touter step: ");
3382 print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3383 fprintf (dump_file, "\n\touter aligned to: ");
3384 print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3388 if (STMT_VINFO_DATA_REF (stmt_info))
3390 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3392 fprintf (vect_dump,
3393 "not vectorized: more than one data ref in stmt: ");
3394 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3396 return false;
3398 STMT_VINFO_DATA_REF (stmt_info) = dr;
3400 /* Set vectype for STMT. */
3401 scalar_type = TREE_TYPE (DR_REF (dr));
3402 STMT_VINFO_VECTYPE (stmt_info) =
3403 get_vectype_for_scalar_type (scalar_type);
3404 if (!STMT_VINFO_VECTYPE (stmt_info))
3406 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3408 fprintf (vect_dump,
3409 "not vectorized: no vectype for stmt: ");
3410 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3411 fprintf (vect_dump, " scalar_type: ");
3412 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3414 return false;
3418 return true;
3422 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3424 /* Function vect_mark_relevant.
3426 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3428 static void
3429 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
3430 enum vect_relevant relevant, bool live_p)
3432 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3433 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3434 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3436 if (vect_print_dump_info (REPORT_DETAILS))
3437 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3439 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3441 tree pattern_stmt;
3443 /* This is the last stmt in a sequence that was detected as a
3444 pattern that can potentially be vectorized. Don't mark the stmt
3445 as relevant/live because it's not going to be vectorized.
3446 Instead mark the pattern-stmt that replaces it. */
3448 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3450 if (vect_print_dump_info (REPORT_DETAILS))
3451 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3452 stmt_info = vinfo_for_stmt (pattern_stmt);
3453 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3454 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3455 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3456 stmt = pattern_stmt;
3459 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3460 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3461 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3463 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3464 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3466 if (vect_print_dump_info (REPORT_DETAILS))
3467 fprintf (vect_dump, "already marked relevant/live.");
3468 return;
3471 VEC_safe_push (tree, heap, *worklist, stmt);
3475 /* Function vect_stmt_relevant_p.
3477 Return true if STMT in loop that is represented by LOOP_VINFO is
3478 "relevant for vectorization".
3480 A stmt is considered "relevant for vectorization" if:
3481 - it has uses outside the loop.
3482 - it has vdefs (it alters memory).
3483 - control stmts in the loop (except for the exit condition).
3485 CHECKME: what other side effects would the vectorizer allow? */
3487 static bool
3488 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
3489 enum vect_relevant *relevant, bool *live_p)
3491 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3492 ssa_op_iter op_iter;
3493 imm_use_iterator imm_iter;
3494 use_operand_p use_p;
3495 def_operand_p def_p;
3497 *relevant = vect_unused_in_loop;
3498 *live_p = false;
3500 /* cond stmt other than loop exit cond. */
3501 if (is_ctrl_stmt (stmt)
3502 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3503 *relevant = vect_used_in_loop;
3505 /* changing memory. */
3506 if (TREE_CODE (stmt) != PHI_NODE)
3507 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3509 if (vect_print_dump_info (REPORT_DETAILS))
3510 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3511 *relevant = vect_used_in_loop;
3514 /* uses outside the loop. */
3515 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3517 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3519 basic_block bb = bb_for_stmt (USE_STMT (use_p));
3520 if (!flow_bb_inside_loop_p (loop, bb))
3522 if (vect_print_dump_info (REPORT_DETAILS))
3523 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3525 /* We expect all such uses to be in the loop exit phis
3526 (because of loop closed form) */
3527 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
3528 gcc_assert (bb == single_exit (loop)->dest);
3530 *live_p = true;
3535 return (*live_p || *relevant);
3540 Function process_use.
3542 Inputs:
3543 - a USE in STMT in a loop represented by LOOP_VINFO
3544 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3545 that defined USE. This is dont by calling mark_relevant and passing it
3546 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
3548 Outputs:
3549 Generally, LIVE_P and RELEVANT are used to define the liveness and
3550 relevance info of the DEF_STMT of this USE:
3551 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3552 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3553 Exceptions:
3554 - case 1: If USE is used only for address computations (e.g. array indexing),
3555 which does not need to be directly vectorized, then the liveness/relevance
3556 of the respective DEF_STMT is left unchanged.
3557 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3558 skip DEF_STMT cause it had already been processed.
3559 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3560 be modified accordingly.
3562 Return true if everything is as expected. Return false otherwise. */
3564 static bool
3565 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3566 enum vect_relevant relevant, VEC(tree,heap) **worklist)
3568 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3569 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3570 stmt_vec_info dstmt_vinfo;
3571 basic_block bb, def_bb;
3572 tree def, def_stmt;
3573 enum vect_def_type dt;
3575 /* case 1: we are only interested in uses that need to be vectorized. Uses
3576 that are used for address computation are not considered relevant. */
3577 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3578 return true;
3580 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3582 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3583 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3584 return false;
3587 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
3588 return true;
3590 def_bb = bb_for_stmt (def_stmt);
3591 if (!flow_bb_inside_loop_p (loop, def_bb))
3593 if (vect_print_dump_info (REPORT_DETAILS))
3594 fprintf (vect_dump, "def_stmt is out of loop.");
3595 return true;
3598 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3599 DEF_STMT must have already been processed, because this should be the
3600 only way that STMT, which is a reduction-phi, was put in the worklist,
3601 as there should be no other uses for DEF_STMT in the loop. So we just
3602 check that everything is as expected, and we are done. */
3603 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3604 bb = bb_for_stmt (stmt);
3605 if (TREE_CODE (stmt) == PHI_NODE
3606 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3607 && TREE_CODE (def_stmt) != PHI_NODE
3608 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3609 && bb->loop_father == def_bb->loop_father)
3611 if (vect_print_dump_info (REPORT_DETAILS))
3612 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3613 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3614 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3615 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3616 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3617 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3618 return true;
3621 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3622 outer-loop-header-bb:
3623 d = def_stmt
3624 inner-loop:
3625 stmt # use (d)
3626 outer-loop-tail-bb:
3627 ... */
3628 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3630 if (vect_print_dump_info (REPORT_DETAILS))
3631 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3632 switch (relevant)
3634 case vect_unused_in_loop:
3635 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3636 vect_used_by_reduction : vect_unused_in_loop;
3637 break;
3638 case vect_used_in_outer_by_reduction:
3639 relevant = vect_used_by_reduction;
3640 break;
3641 case vect_used_in_outer:
3642 relevant = vect_used_in_loop;
3643 break;
3644 case vect_used_by_reduction:
3645 case vect_used_in_loop:
3646 break;
3648 default:
3649 gcc_unreachable ();
3653 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3654 outer-loop-header-bb:
3656 inner-loop:
3657 d = def_stmt
3658 outer-loop-tail-bb:
3659 stmt # use (d) */
3660 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3662 if (vect_print_dump_info (REPORT_DETAILS))
3663 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3664 switch (relevant)
3666 case vect_unused_in_loop:
3667 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3668 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3669 break;
3671 case vect_used_in_outer_by_reduction:
3672 case vect_used_in_outer:
3673 break;
3675 case vect_used_by_reduction:
3676 relevant = vect_used_in_outer_by_reduction;
3677 break;
3679 case vect_used_in_loop:
3680 relevant = vect_used_in_outer;
3681 break;
3683 default:
3684 gcc_unreachable ();
3688 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
3689 return true;
3693 /* Function vect_mark_stmts_to_be_vectorized.
3695 Not all stmts in the loop need to be vectorized. For example:
3697 for i...
3698 for j...
3699 1. T0 = i + j
3700 2. T1 = a[T0]
3702 3. j = j + 1
3704 Stmt 1 and 3 do not need to be vectorized, because loop control and
3705 addressing of vectorized data-refs are handled differently.
3707 This pass detects such stmts. */
3709 static bool
3710 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3712 VEC(tree,heap) *worklist;
3713 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3714 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3715 unsigned int nbbs = loop->num_nodes;
3716 block_stmt_iterator si;
3717 tree stmt;
3718 stmt_ann_t ann;
3719 unsigned int i;
3720 stmt_vec_info stmt_vinfo;
3721 basic_block bb;
3722 tree phi;
3723 bool live_p;
3724 enum vect_relevant relevant;
3726 if (vect_print_dump_info (REPORT_DETAILS))
3727 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
3729 worklist = VEC_alloc (tree, heap, 64);
3731 /* 1. Init worklist. */
3732 for (i = 0; i < nbbs; i++)
3734 bb = bbs[i];
3735 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3737 if (vect_print_dump_info (REPORT_DETAILS))
3739 fprintf (vect_dump, "init: phi relevant? ");
3740 print_generic_expr (vect_dump, phi, TDF_SLIM);
3743 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
3744 vect_mark_relevant (&worklist, phi, relevant, live_p);
3746 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3748 stmt = bsi_stmt (si);
3749 if (vect_print_dump_info (REPORT_DETAILS))
3751 fprintf (vect_dump, "init: stmt relevant? ");
3752 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3755 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
3756 vect_mark_relevant (&worklist, stmt, relevant, live_p);
3760 /* 2. Process_worklist */
3761 while (VEC_length (tree, worklist) > 0)
3763 use_operand_p use_p;
3764 ssa_op_iter iter;
3766 stmt = VEC_pop (tree, worklist);
3767 if (vect_print_dump_info (REPORT_DETAILS))
3769 fprintf (vect_dump, "worklist: examine stmt: ");
3770 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3773 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3774 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3775 liveness and relevance properties of STMT. */
3776 ann = stmt_ann (stmt);
3777 stmt_vinfo = vinfo_for_stmt (stmt);
3778 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
3779 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
3781 /* Generally, the liveness and relevance properties of STMT are
3782 propagated as is to the DEF_STMTs of its USEs:
3783 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3784 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3786 One exception is when STMT has been identified as defining a reduction
3787 variable; in this case we set the liveness/relevance as follows:
3788 live_p = false
3789 relevant = vect_used_by_reduction
3790 This is because we distinguish between two kinds of relevant stmts -
3791 those that are used by a reduction computation, and those that are
3792 (also) used by a regular computation. This allows us later on to
3793 identify stmts that are used solely by a reduction, and therefore the
3794 order of the results that they produce does not have to be kept.
3796 Reduction phis are expected to be used by a reduction stmt, or by
3797 in an outer loop; Other reduction stmts are expected to be
3798 in the loop, and possibly used by a stmt in an outer loop.
3799 Here are the expected values of "relevant" for reduction phis/stmts:
3801 relevance: phi stmt
3802 vect_unused_in_loop ok
3803 vect_used_in_outer_by_reduction ok ok
3804 vect_used_in_outer ok ok
3805 vect_used_by_reduction ok
3806 vect_used_in_loop */
3808 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
3810 enum vect_relevant tmp_relevant = relevant;
3811 switch (tmp_relevant)
3813 case vect_unused_in_loop:
3814 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
3815 relevant = vect_used_by_reduction;
3816 break;
3818 case vect_used_in_outer_by_reduction:
3819 case vect_used_in_outer:
3820 gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR
3821 && TREE_CODE (stmt) != DOT_PROD_EXPR);
3822 break;
3824 case vect_used_by_reduction:
3825 if (TREE_CODE (stmt) == PHI_NODE)
3826 break;
3827 /* fall through */
3828 case vect_used_in_loop:
3829 default:
3830 if (vect_print_dump_info (REPORT_DETAILS))
3831 fprintf (vect_dump, "unsupported use of reduction.");
3832 VEC_free (tree, heap, worklist);
3833 return false;
3835 live_p = false;
3838 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3840 tree op = USE_FROM_PTR (use_p);
3841 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
3843 VEC_free (tree, heap, worklist);
3844 return false;
3847 } /* while worklist */
3849 VEC_free (tree, heap, worklist);
3850 return true;
3854 /* Function vect_can_advance_ivs_p
3856 In case the number of iterations that LOOP iterates is unknown at compile
3857 time, an epilog loop will be generated, and the loop induction variables
3858 (IVs) will be "advanced" to the value they are supposed to take just before
3859 the epilog loop. Here we check that the access function of the loop IVs
3860 and the expression that represents the loop bound are simple enough.
3861 These restrictions will be relaxed in the future. */
3863 static bool
3864 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
3866 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3867 basic_block bb = loop->header;
3868 tree phi;
3870 /* Analyze phi functions of the loop header. */
3872 if (vect_print_dump_info (REPORT_DETAILS))
3873 fprintf (vect_dump, "vect_can_advance_ivs_p:");
3875 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3877 tree access_fn = NULL;
3878 tree evolution_part;
3880 if (vect_print_dump_info (REPORT_DETAILS))
3882 fprintf (vect_dump, "Analyze phi: ");
3883 print_generic_expr (vect_dump, phi, TDF_SLIM);
3886 /* Skip virtual phi's. The data dependences that are associated with
3887 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3889 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3891 if (vect_print_dump_info (REPORT_DETAILS))
3892 fprintf (vect_dump, "virtual phi. skip.");
3893 continue;
3896 /* Skip reduction phis. */
3898 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3900 if (vect_print_dump_info (REPORT_DETAILS))
3901 fprintf (vect_dump, "reduc phi. skip.");
3902 continue;
3905 /* Analyze the evolution function. */
3907 access_fn = instantiate_parameters
3908 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
3910 if (!access_fn)
3912 if (vect_print_dump_info (REPORT_DETAILS))
3913 fprintf (vect_dump, "No Access function.");
3914 return false;
3917 if (vect_print_dump_info (REPORT_DETAILS))
3919 fprintf (vect_dump, "Access function of PHI: ");
3920 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
3923 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
3925 if (evolution_part == NULL_TREE)
3927 if (vect_print_dump_info (REPORT_DETAILS))
3928 fprintf (vect_dump, "No evolution.");
3929 return false;
3932 /* FORNOW: We do not transform initial conditions of IVs
3933 which evolution functions are a polynomial of degree >= 2. */
3935 if (tree_is_chrec (evolution_part))
3936 return false;
3939 return true;
3943 /* Function vect_get_loop_niters.
3945 Determine how many iterations the loop is executed.
3946 If an expression that represents the number of iterations
3947 can be constructed, place it in NUMBER_OF_ITERATIONS.
3948 Return the loop exit condition. */
3950 static tree
3951 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
3953 tree niters;
3955 if (vect_print_dump_info (REPORT_DETAILS))
3956 fprintf (vect_dump, "=== get_loop_niters ===");
3958 niters = number_of_exit_cond_executions (loop);
3960 if (niters != NULL_TREE
3961 && niters != chrec_dont_know)
3963 *number_of_iterations = niters;
3965 if (vect_print_dump_info (REPORT_DETAILS))
3967 fprintf (vect_dump, "==> get_loop_niters:" );
3968 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
3972 return get_loop_exit_condition (loop);
3976 /* Function vect_analyze_loop_1.
3978 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3979 for it. The different analyses will record information in the
3980 loop_vec_info struct. This is a subset of the analyses applied in
3981 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3982 that is now considered for (outer-loop) vectorization. */
3984 static loop_vec_info
3985 vect_analyze_loop_1 (struct loop *loop)
3987 loop_vec_info loop_vinfo;
3989 if (vect_print_dump_info (REPORT_DETAILS))
3990 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
3992 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3994 loop_vinfo = vect_analyze_loop_form (loop);
3995 if (!loop_vinfo)
3997 if (vect_print_dump_info (REPORT_DETAILS))
3998 fprintf (vect_dump, "bad inner-loop form.");
3999 return NULL;
4002 return loop_vinfo;
4006 /* Function vect_analyze_loop_form.
4008 Verify that certain CFG restrictions hold, including:
4009 - the loop has a pre-header
4010 - the loop has a single entry and exit
4011 - the loop exit condition is simple enough, and the number of iterations
4012 can be analyzed (a countable loop). */
4014 loop_vec_info
4015 vect_analyze_loop_form (struct loop *loop)
4017 loop_vec_info loop_vinfo;
4018 tree loop_cond;
4019 tree number_of_iterations = NULL;
4020 loop_vec_info inner_loop_vinfo = NULL;
4022 if (vect_print_dump_info (REPORT_DETAILS))
4023 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
4025 /* Different restrictions apply when we are considering an inner-most loop,
4026 vs. an outer (nested) loop.
4027 (FORNOW. May want to relax some of these restrictions in the future). */
4029 if (!loop->inner)
4031 /* Inner-most loop. We currently require that the number of BBs is
4032 exactly 2 (the header and latch). Vectorizable inner-most loops
4033 look like this:
4035 (pre-header)
4037 header <--------+
4038 | | |
4039 | +--> latch --+
4041 (exit-bb) */
4043 if (loop->num_nodes != 2)
4045 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4046 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4047 return NULL;
4050 if (empty_block_p (loop->header))
4052 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4053 fprintf (vect_dump, "not vectorized: empty loop.");
4054 return NULL;
4057 else
4059 struct loop *innerloop = loop->inner;
4060 edge backedge, entryedge;
4062 /* Nested loop. We currently require that the loop is doubly-nested,
4063 contains a single inner loop, and the number of BBs is exactly 5.
4064 Vectorizable outer-loops look like this:
4066 (pre-header)
4068 header <---+
4070 inner-loop |
4072 tail ------+
4074 (exit-bb)
4076 The inner-loop has the properties expected of inner-most loops
4077 as described above. */
4079 if ((loop->inner)->inner || (loop->inner)->next)
4081 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4082 fprintf (vect_dump, "not vectorized: multiple nested loops.");
4083 return NULL;
4086 /* Analyze the inner-loop. */
4087 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
4088 if (!inner_loop_vinfo)
4090 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4091 fprintf (vect_dump, "not vectorized: Bad inner loop.");
4092 return NULL;
4095 if (!expr_invariant_in_loop_p (loop,
4096 LOOP_VINFO_NITERS (inner_loop_vinfo)))
4098 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4099 fprintf (vect_dump,
4100 "not vectorized: inner-loop count not invariant.");
4101 destroy_loop_vec_info (inner_loop_vinfo, true);
4102 return NULL;
4105 if (loop->num_nodes != 5)
4107 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4108 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4109 destroy_loop_vec_info (inner_loop_vinfo, true);
4110 return NULL;
4113 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
4114 backedge = EDGE_PRED (innerloop->header, 1);
4115 entryedge = EDGE_PRED (innerloop->header, 0);
4116 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
4118 backedge = EDGE_PRED (innerloop->header, 0);
4119 entryedge = EDGE_PRED (innerloop->header, 1);
4122 if (entryedge->src != loop->header
4123 || !single_exit (innerloop)
4124 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
4126 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4127 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4128 destroy_loop_vec_info (inner_loop_vinfo, true);
4129 return NULL;
4132 if (vect_print_dump_info (REPORT_DETAILS))
4133 fprintf (vect_dump, "Considering outer-loop vectorization.");
4136 if (!single_exit (loop)
4137 || EDGE_COUNT (loop->header->preds) != 2)
4139 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4141 if (!single_exit (loop))
4142 fprintf (vect_dump, "not vectorized: multiple exits.");
4143 else if (EDGE_COUNT (loop->header->preds) != 2)
4144 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4146 if (inner_loop_vinfo)
4147 destroy_loop_vec_info (inner_loop_vinfo, true);
4148 return NULL;
4151 /* We assume that the loop exit condition is at the end of the loop. i.e,
4152 that the loop is represented as a do-while (with a proper if-guard
4153 before the loop if needed), where the loop header contains all the
4154 executable statements, and the latch is empty. */
4155 if (!empty_block_p (loop->latch)
4156 || phi_nodes (loop->latch))
4158 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4159 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4160 if (inner_loop_vinfo)
4161 destroy_loop_vec_info (inner_loop_vinfo, true);
4162 return NULL;
4165 /* Make sure there exists a single-predecessor exit bb: */
4166 if (!single_pred_p (single_exit (loop)->dest))
4168 edge e = single_exit (loop);
4169 if (!(e->flags & EDGE_ABNORMAL))
4171 split_loop_exit_edge (e);
4172 if (vect_print_dump_info (REPORT_DETAILS))
4173 fprintf (vect_dump, "split exit edge.");
4175 else
4177 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4178 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4179 if (inner_loop_vinfo)
4180 destroy_loop_vec_info (inner_loop_vinfo, true);
4181 return NULL;
4185 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4186 if (!loop_cond)
4188 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4189 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4190 if (inner_loop_vinfo)
4191 destroy_loop_vec_info (inner_loop_vinfo, true);
4192 return NULL;
4195 if (!number_of_iterations)
4197 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4198 fprintf (vect_dump,
4199 "not vectorized: number of iterations cannot be computed.");
4200 if (inner_loop_vinfo)
4201 destroy_loop_vec_info (inner_loop_vinfo, true);
4202 return NULL;
4205 if (chrec_contains_undetermined (number_of_iterations))
4207 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4208 fprintf (vect_dump, "Infinite number of iterations.");
4209 if (inner_loop_vinfo)
4210 destroy_loop_vec_info (inner_loop_vinfo, true);
4211 return NULL;
4214 if (!NITERS_KNOWN_P (number_of_iterations))
4216 if (vect_print_dump_info (REPORT_DETAILS))
4218 fprintf (vect_dump, "Symbolic number of iterations is ");
4219 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4222 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4224 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4225 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4226 if (inner_loop_vinfo)
4227 destroy_loop_vec_info (inner_loop_vinfo, false);
4228 return NULL;
4231 loop_vinfo = new_loop_vec_info (loop);
4232 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4233 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
4235 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4237 /* CHECKME: May want to keep it around it in the future. */
4238 if (inner_loop_vinfo)
4239 destroy_loop_vec_info (inner_loop_vinfo, false);
4241 gcc_assert (!loop->aux);
4242 loop->aux = loop_vinfo;
4243 return loop_vinfo;
4247 /* Function vect_analyze_loop.
4249 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4250 for it. The different analyses will record information in the
4251 loop_vec_info struct. */
4252 loop_vec_info
4253 vect_analyze_loop (struct loop *loop)
4255 bool ok;
4256 loop_vec_info loop_vinfo;
4258 if (vect_print_dump_info (REPORT_DETAILS))
4259 fprintf (vect_dump, "===== analyze_loop_nest =====");
4261 if (loop_outer (loop)
4262 && loop_vec_info_for_loop (loop_outer (loop))
4263 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4265 if (vect_print_dump_info (REPORT_DETAILS))
4266 fprintf (vect_dump, "outer-loop already vectorized.");
4267 return NULL;
4270 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4272 loop_vinfo = vect_analyze_loop_form (loop);
4273 if (!loop_vinfo)
4275 if (vect_print_dump_info (REPORT_DETAILS))
4276 fprintf (vect_dump, "bad loop form.");
4277 return NULL;
4280 /* Find all data references in the loop (which correspond to vdefs/vuses)
4281 and analyze their evolution in the loop.
4283 FORNOW: Handle only simple, array references, which
4284 alignment can be forced, and aligned pointer-references. */
4286 ok = vect_analyze_data_refs (loop_vinfo);
4287 if (!ok)
4289 if (vect_print_dump_info (REPORT_DETAILS))
4290 fprintf (vect_dump, "bad data references.");
4291 destroy_loop_vec_info (loop_vinfo, true);
4292 return NULL;
4295 /* Classify all cross-iteration scalar data-flow cycles.
4296 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4298 vect_analyze_scalar_cycles (loop_vinfo);
4300 vect_pattern_recog (loop_vinfo);
4302 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4304 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4305 if (!ok)
4307 if (vect_print_dump_info (REPORT_DETAILS))
4308 fprintf (vect_dump, "unexpected pattern.");
4309 destroy_loop_vec_info (loop_vinfo, true);
4310 return NULL;
4313 /* Analyze the alignment of the data-refs in the loop.
4314 Fail if a data reference is found that cannot be vectorized. */
4316 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4317 if (!ok)
4319 if (vect_print_dump_info (REPORT_DETAILS))
4320 fprintf (vect_dump, "bad data alignment.");
4321 destroy_loop_vec_info (loop_vinfo, true);
4322 return NULL;
4325 ok = vect_determine_vectorization_factor (loop_vinfo);
4326 if (!ok)
4328 if (vect_print_dump_info (REPORT_DETAILS))
4329 fprintf (vect_dump, "can't determine vectorization factor.");
4330 destroy_loop_vec_info (loop_vinfo, true);
4331 return NULL;
4334 /* Analyze data dependences between the data-refs in the loop.
4335 FORNOW: fail at the first data dependence that we encounter. */
4337 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4338 if (!ok)
4340 if (vect_print_dump_info (REPORT_DETAILS))
4341 fprintf (vect_dump, "bad data dependence.");
4342 destroy_loop_vec_info (loop_vinfo, true);
4343 return NULL;
4346 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4347 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4349 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4350 if (!ok)
4352 if (vect_print_dump_info (REPORT_DETAILS))
4353 fprintf (vect_dump, "bad data access.");
4354 destroy_loop_vec_info (loop_vinfo, true);
4355 return NULL;
4358 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4359 It is important to call pruning after vect_analyze_data_ref_accesses,
4360 since we use grouping information gathered by interleaving analysis. */
4361 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
4362 if (!ok)
4364 if (vect_print_dump_info (REPORT_DETAILS))
4365 fprintf (vect_dump, "too long list of versioning for alias "
4366 "run-time tests.");
4367 destroy_loop_vec_info (loop_vinfo, true);
4368 return NULL;
4371 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4372 ok = vect_analyze_slp (loop_vinfo);
4373 if (ok)
4375 /* Decide which possible SLP instances to SLP. */
4376 vect_make_slp_decision (loop_vinfo);
4378 /* Find stmts that need to be both vectorized and SLPed. */
4379 vect_detect_hybrid_slp (loop_vinfo);
4382 /* This pass will decide on using loop versioning and/or loop peeling in
4383 order to enhance the alignment of data references in the loop. */
4385 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4386 if (!ok)
4388 if (vect_print_dump_info (REPORT_DETAILS))
4389 fprintf (vect_dump, "bad data alignment.");
4390 destroy_loop_vec_info (loop_vinfo, true);
4391 return NULL;
4394 /* Scan all the operations in the loop and make sure they are
4395 vectorizable. */
4397 ok = vect_analyze_operations (loop_vinfo);
4398 if (!ok)
4400 if (vect_print_dump_info (REPORT_DETAILS))
4401 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4402 destroy_loop_vec_info (loop_vinfo, true);
4403 return NULL;
4406 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4408 return loop_vinfo;