2007-03-01 Paul Brook <paul@codesourcery.com>
[official-gcc.git] / gcc / tree-vect-analyze.c
blob7b3f4c8fd6fe86a33088c060cd23c5f67d365a4a
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41 #include "toplev.h"
43 /* Main analysis functions. */
44 static loop_vec_info vect_analyze_loop_form (struct loop *);
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
78 in the loop.
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
81 original loop:
82 for (i=0; i<N; i++){
83 a[i] = b[i] + c[i];
86 vectorized loop:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
92 static bool
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
100 tree scalar_type;
101 tree phi;
102 tree vectype;
103 unsigned int nunits;
104 stmt_vec_info stmt_info;
105 int i;
107 if (vect_print_dump_info (REPORT_DETAILS))
108 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
110 for (i = 0; i < nbbs; i++)
112 basic_block bb = bbs[i];
114 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
116 stmt_info = vinfo_for_stmt (phi);
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining phi: ");
120 print_generic_expr (vect_dump, phi, TDF_SLIM);
123 gcc_assert (stmt_info);
125 /* Two cases of "relevant" phis: those that define an
126 induction that is used in the loop, and those that
127 define a reduction. */
128 if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
129 && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
130 || (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
131 && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
133 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
134 scalar_type = TREE_TYPE (PHI_RESULT (phi));
136 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "get vectype for scalar type: ");
139 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
142 vectype = get_vectype_for_scalar_type (scalar_type);
143 if (!vectype)
145 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
147 fprintf (vect_dump,
148 "not vectorized: unsupported data-type ");
149 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
151 return false;
153 STMT_VINFO_VECTYPE (stmt_info) = vectype;
155 if (vect_print_dump_info (REPORT_DETAILS))
157 fprintf (vect_dump, "vectype: ");
158 print_generic_expr (vect_dump, vectype, TDF_SLIM);
161 nunits = TYPE_VECTOR_SUBPARTS (vectype);
162 if (vect_print_dump_info (REPORT_DETAILS))
163 fprintf (vect_dump, "nunits = %d", nunits);
165 if (!vectorization_factor
166 || (nunits > vectorization_factor))
167 vectorization_factor = nunits;
171 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
173 tree stmt = bsi_stmt (si);
174 stmt_info = vinfo_for_stmt (stmt);
176 if (vect_print_dump_info (REPORT_DETAILS))
178 fprintf (vect_dump, "==> examining statement: ");
179 print_generic_expr (vect_dump, stmt, TDF_SLIM);
182 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
183 continue;
185 gcc_assert (stmt_info);
187 /* skip stmts which do not need to be vectorized. */
188 if (!STMT_VINFO_RELEVANT_P (stmt_info)
189 && !STMT_VINFO_LIVE_P (stmt_info))
191 if (vect_print_dump_info (REPORT_DETAILS))
192 fprintf (vect_dump, "skip.");
193 continue;
196 if (!GIMPLE_STMT_P (stmt)
197 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
199 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
201 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
202 print_generic_expr (vect_dump, stmt, TDF_SLIM);
204 return false;
207 if (STMT_VINFO_VECTYPE (stmt_info))
209 /* The only case when a vectype had been already set is for stmts
210 that contain a dataref, or for "pattern-stmts" (stmts generated
211 by the vectorizer to represent/replace a certain idiom). */
212 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
213 || is_pattern_stmt_p (stmt_info));
214 vectype = STMT_VINFO_VECTYPE (stmt_info);
216 else
218 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
219 && !is_pattern_stmt_p (stmt_info));
221 /* We set the vectype according to the type of the result (lhs).
222 For stmts whose result-type is different than the type of the
223 arguments (e.g. demotion, promotion), vectype will be reset
224 appropriately (later). Note that we have to visit the smallest
225 datatype in this function, because that determines the VF.
226 If the smallest datatype in the loop is present only as the
227 rhs of a promotion operation - we'd miss it here.
228 However, in such a case, that a variable of this datatype
229 does not appear in the lhs anywhere in the loop, it shouldn't
230 affect the vectorization factor. */
231 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
233 if (vect_print_dump_info (REPORT_DETAILS))
235 fprintf (vect_dump, "get vectype for scalar type: ");
236 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
239 vectype = get_vectype_for_scalar_type (scalar_type);
240 if (!vectype)
242 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
244 fprintf (vect_dump,
245 "not vectorized: unsupported data-type ");
246 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
248 return false;
250 STMT_VINFO_VECTYPE (stmt_info) = vectype;
253 if (vect_print_dump_info (REPORT_DETAILS))
255 fprintf (vect_dump, "vectype: ");
256 print_generic_expr (vect_dump, vectype, TDF_SLIM);
259 nunits = TYPE_VECTOR_SUBPARTS (vectype);
260 if (vect_print_dump_info (REPORT_DETAILS))
261 fprintf (vect_dump, "nunits = %d", nunits);
263 if (!vectorization_factor
264 || (nunits > vectorization_factor))
265 vectorization_factor = nunits;
270 /* TODO: Analyze cost. Decide if worth while to vectorize. */
272 if (vectorization_factor <= 1)
274 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
275 fprintf (vect_dump, "not vectorized: unsupported data-type");
276 return false;
278 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
280 return true;
284 /* Function vect_analyze_operations.
286 Scan the loop stmts and make sure they are all vectorizable. */
288 static bool
289 vect_analyze_operations (loop_vec_info loop_vinfo)
291 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
292 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
293 int nbbs = loop->num_nodes;
294 block_stmt_iterator si;
295 unsigned int vectorization_factor = 0;
296 int i;
297 bool ok;
298 tree phi;
299 stmt_vec_info stmt_info;
300 bool need_to_vectorize = false;
302 if (vect_print_dump_info (REPORT_DETAILS))
303 fprintf (vect_dump, "=== vect_analyze_operations ===");
305 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
306 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
308 for (i = 0; i < nbbs; i++)
310 basic_block bb = bbs[i];
312 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
314 stmt_info = vinfo_for_stmt (phi);
315 if (vect_print_dump_info (REPORT_DETAILS))
317 fprintf (vect_dump, "examining phi: ");
318 print_generic_expr (vect_dump, phi, TDF_SLIM);
321 gcc_assert (stmt_info);
323 if (STMT_VINFO_LIVE_P (stmt_info))
325 /* FORNOW: not yet supported. */
326 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
327 fprintf (vect_dump, "not vectorized: value used after loop.");
328 return false;
331 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
332 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
334 /* Most likely a reduction-like computation that is used
335 in the loop. */
336 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
337 fprintf (vect_dump, "not vectorized: unsupported pattern.");
338 return false;
342 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
344 tree stmt = bsi_stmt (si);
345 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
347 if (vect_print_dump_info (REPORT_DETAILS))
349 fprintf (vect_dump, "==> examining statement: ");
350 print_generic_expr (vect_dump, stmt, TDF_SLIM);
353 gcc_assert (stmt_info);
355 /* skip stmts which do not need to be vectorized.
356 this is expected to include:
357 - the COND_EXPR which is the loop exit condition
358 - any LABEL_EXPRs in the loop
359 - computations that are used only for array indexing or loop
360 control */
362 if (!STMT_VINFO_RELEVANT_P (stmt_info)
363 && !STMT_VINFO_LIVE_P (stmt_info))
365 if (vect_print_dump_info (REPORT_DETAILS))
366 fprintf (vect_dump, "irrelevant.");
367 continue;
370 if (STMT_VINFO_RELEVANT_P (stmt_info))
372 gcc_assert (GIMPLE_STMT_P (stmt)
373 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
374 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
376 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
377 || vectorizable_type_demotion (stmt, NULL, NULL)
378 || vectorizable_conversion (stmt, NULL, NULL)
379 || vectorizable_operation (stmt, NULL, NULL)
380 || vectorizable_assignment (stmt, NULL, NULL)
381 || vectorizable_load (stmt, NULL, NULL)
382 || vectorizable_call (stmt, NULL, NULL)
383 || vectorizable_store (stmt, NULL, NULL)
384 || vectorizable_condition (stmt, NULL, NULL));
386 if (!ok)
388 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
390 fprintf (vect_dump,
391 "not vectorized: relevant stmt not supported: ");
392 print_generic_expr (vect_dump, stmt, TDF_SLIM);
394 return false;
396 need_to_vectorize = true;
399 if (STMT_VINFO_LIVE_P (stmt_info))
401 ok = vectorizable_reduction (stmt, NULL, NULL);
403 if (ok)
404 need_to_vectorize = true;
405 else
406 ok = vectorizable_live_operation (stmt, NULL, NULL);
408 if (!ok)
410 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
412 fprintf (vect_dump,
413 "not vectorized: live stmt not supported: ");
414 print_generic_expr (vect_dump, stmt, TDF_SLIM);
416 return false;
419 } /* stmts in bb */
420 } /* bbs */
422 /* TODO: Analyze cost. Decide if worth while to vectorize. */
424 /* All operations in the loop are either irrelevant (deal with loop
425 control, or dead), or only used outside the loop and can be moved
426 out of the loop (e.g. invariants, inductions). The loop can be
427 optimized away by scalar optimizations. We're better off not
428 touching this loop. */
429 if (!need_to_vectorize)
431 if (vect_print_dump_info (REPORT_DETAILS))
432 fprintf (vect_dump,
433 "All the computation can be taken out of the loop.");
434 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
435 fprintf (vect_dump,
436 "not vectorized: redundant loop. no profit to vectorize.");
437 return false;
440 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
441 && vect_print_dump_info (REPORT_DETAILS))
442 fprintf (vect_dump,
443 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
444 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
446 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
447 && ((LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
448 || (LOOP_VINFO_INT_NITERS (loop_vinfo) <=
449 ((unsigned) (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
450 * vectorization_factor))))
452 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
453 fprintf (vect_dump, "not vectorized: iteration count too small.");
454 return false;
457 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
458 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
459 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
461 if (vect_print_dump_info (REPORT_DETAILS))
462 fprintf (vect_dump, "epilog loop required.");
463 if (!vect_can_advance_ivs_p (loop_vinfo))
465 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
466 fprintf (vect_dump,
467 "not vectorized: can't create epilog loop 1.");
468 return false;
470 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
472 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
473 fprintf (vect_dump,
474 "not vectorized: can't create epilog loop 2.");
475 return false;
479 return true;
483 /* Function exist_non_indexing_operands_for_use_p
485 USE is one of the uses attached to STMT. Check if USE is
486 used in STMT for anything other than indexing an array. */
488 static bool
489 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
491 tree operand;
492 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
494 /* USE corresponds to some operand in STMT. If there is no data
495 reference in STMT, then any operand that corresponds to USE
496 is not indexing an array. */
497 if (!STMT_VINFO_DATA_REF (stmt_info))
498 return true;
500 /* STMT has a data_ref. FORNOW this means that its of one of
501 the following forms:
502 -1- ARRAY_REF = var
503 -2- var = ARRAY_REF
504 (This should have been verified in analyze_data_refs).
506 'var' in the second case corresponds to a def, not a use,
507 so USE cannot correspond to any operands that are not used
508 for array indexing.
510 Therefore, all we need to check is if STMT falls into the
511 first case, and whether var corresponds to USE. */
513 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
514 return false;
516 operand = GIMPLE_STMT_OPERAND (stmt, 1);
518 if (TREE_CODE (operand) != SSA_NAME)
519 return false;
521 if (operand == use)
522 return true;
524 return false;
528 /* Function vect_analyze_scalar_cycles.
530 Examine the cross iteration def-use cycles of scalar variables, by
531 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
532 following: invariant, induction, reduction, unknown.
534 Some forms of scalar cycles are not yet supported.
536 Example1: reduction: (unsupported yet)
538 loop1:
539 for (i=0; i<N; i++)
540 sum += a[i];
542 Example2: induction: (unsupported yet)
544 loop2:
545 for (i=0; i<N; i++)
546 a[i] = i;
548 Note: the following loop *is* vectorizable:
550 loop3:
551 for (i=0; i<N; i++)
552 a[i] = b[i];
554 even though it has a def-use cycle caused by the induction variable i:
556 loop: i_2 = PHI (i_0, i_1)
557 a[i_2] = ...;
558 i_1 = i_2 + 1;
559 GOTO loop;
561 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
562 it does not need to be vectorized because it is only used for array
563 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
564 loop2 on the other hand is relevant (it is being written to memory).
567 static void
568 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
570 tree phi;
571 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
572 basic_block bb = loop->header;
573 tree dumy;
574 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
576 if (vect_print_dump_info (REPORT_DETAILS))
577 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
579 /* First - identify all inductions. */
580 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
582 tree access_fn = NULL;
583 tree def = PHI_RESULT (phi);
584 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
586 if (vect_print_dump_info (REPORT_DETAILS))
588 fprintf (vect_dump, "Analyze phi: ");
589 print_generic_expr (vect_dump, phi, TDF_SLIM);
592 /* Skip virtual phi's. The data dependences that are associated with
593 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
594 if (!is_gimple_reg (SSA_NAME_VAR (def)))
595 continue;
597 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
599 /* Analyze the evolution function. */
600 access_fn = analyze_scalar_evolution (loop, def);
601 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
603 fprintf (vect_dump, "Access function of PHI: ");
604 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
607 if (!access_fn
608 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
610 VEC_safe_push (tree, heap, worklist, phi);
611 continue;
614 if (vect_print_dump_info (REPORT_DETAILS))
615 fprintf (vect_dump, "Detected induction.");
616 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
620 /* Second - identify all reductions. */
621 while (VEC_length (tree, worklist) > 0)
623 tree phi = VEC_pop (tree, worklist);
624 tree def = PHI_RESULT (phi);
625 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
626 tree reduc_stmt;
628 if (vect_print_dump_info (REPORT_DETAILS))
630 fprintf (vect_dump, "Analyze phi: ");
631 print_generic_expr (vect_dump, phi, TDF_SLIM);
634 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
635 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
637 reduc_stmt = vect_is_simple_reduction (loop, phi);
638 if (reduc_stmt)
640 if (vect_print_dump_info (REPORT_DETAILS))
641 fprintf (vect_dump, "Detected reduction.");
642 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
643 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
644 vect_reduction_def;
646 else
647 if (vect_print_dump_info (REPORT_DETAILS))
648 fprintf (vect_dump, "Unknown def-use cycle pattern.");
651 VEC_free (tree, heap, worklist);
652 return;
656 /* Function vect_insert_into_interleaving_chain.
658 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
660 static void
661 vect_insert_into_interleaving_chain (struct data_reference *dra,
662 struct data_reference *drb)
664 tree prev, next, next_init;
665 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
666 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
668 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
669 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
670 while (next)
672 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
673 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
675 /* Insert here. */
676 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
677 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
678 return;
680 prev = next;
681 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
684 /* We got to the end of the list. Insert here. */
685 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
686 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
690 /* Function vect_update_interleaving_chain.
692 For two data-refs DRA and DRB that are a part of a chain interleaved data
693 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
695 There are four possible cases:
696 1. New stmts - both DRA and DRB are not a part of any chain:
697 FIRST_DR = DRB
698 NEXT_DR (DRB) = DRA
699 2. DRB is a part of a chain and DRA is not:
700 no need to update FIRST_DR
701 no need to insert DRB
702 insert DRA according to init
703 3. DRA is a part of a chain and DRB is not:
704 if (init of FIRST_DR > init of DRB)
705 FIRST_DR = DRB
706 NEXT(FIRST_DR) = previous FIRST_DR
707 else
708 insert DRB according to its init
709 4. both DRA and DRB are in some interleaving chains:
710 choose the chain with the smallest init of FIRST_DR
711 insert the nodes of the second chain into the first one. */
713 static void
714 vect_update_interleaving_chain (struct data_reference *drb,
715 struct data_reference *dra)
717 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
718 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
719 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
720 tree node, prev, next, node_init, first_stmt;
722 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
723 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
725 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
726 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
727 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
728 return;
731 /* 2. DRB is a part of a chain and DRA is not. */
732 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
734 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
735 /* Insert DRA into the chain of DRB. */
736 vect_insert_into_interleaving_chain (dra, drb);
737 return;
740 /* 3. DRA is a part of a chain and DRB is not. */
741 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
743 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
744 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
745 old_first_stmt)));
746 tree tmp;
748 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
750 /* DRB's init is smaller than the init of the stmt previously marked
751 as the first stmt of the interleaving chain of DRA. Therefore, we
752 update FIRST_STMT and put DRB in the head of the list. */
753 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
754 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
756 /* Update all the stmts in the list to point to the new FIRST_STMT. */
757 tmp = old_first_stmt;
758 while (tmp)
760 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
761 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
764 else
766 /* Insert DRB in the list of DRA. */
767 vect_insert_into_interleaving_chain (drb, dra);
768 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
770 return;
773 /* 4. both DRA and DRB are in some interleaving chains. */
774 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
775 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
776 if (first_a == first_b)
777 return;
778 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
779 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
781 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
783 /* Insert the nodes of DRA chain into the DRB chain.
784 After inserting a node, continue from this node of the DRB chain (don't
785 start from the beginning. */
786 node = DR_GROUP_FIRST_DR (stmtinfo_a);
787 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
788 first_stmt = first_b;
790 else
792 /* Insert the nodes of DRB chain into the DRA chain.
793 After inserting a node, continue from this node of the DRA chain (don't
794 start from the beginning. */
795 node = DR_GROUP_FIRST_DR (stmtinfo_b);
796 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
797 first_stmt = first_a;
800 while (node)
802 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
803 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
804 while (next)
806 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
807 if (tree_int_cst_compare (next_init, node_init) > 0)
809 /* Insert here. */
810 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
811 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
812 prev = node;
813 break;
815 prev = next;
816 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
818 if (!next)
820 /* We got to the end of the list. Insert here. */
821 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
822 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
823 prev = node;
825 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
826 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
831 /* Function vect_equal_offsets.
833 Check if OFFSET1 and OFFSET2 are identical expressions. */
835 static bool
836 vect_equal_offsets (tree offset1, tree offset2)
838 bool res0, res1;
840 STRIP_NOPS (offset1);
841 STRIP_NOPS (offset2);
843 if (offset1 == offset2)
844 return true;
846 if (TREE_CODE (offset1) != TREE_CODE (offset2)
847 || !BINARY_CLASS_P (offset1)
848 || !BINARY_CLASS_P (offset2))
849 return false;
851 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
852 TREE_OPERAND (offset2, 0));
853 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
854 TREE_OPERAND (offset2, 1));
856 return (res0 && res1);
860 /* Function vect_check_interleaving.
862 Check if DRA and DRB are a part of interleaving. In case they are, insert
863 DRA and DRB in an interleaving chain. */
865 static void
866 vect_check_interleaving (struct data_reference *dra,
867 struct data_reference *drb)
869 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
871 /* Check that the data-refs have same first location (except init) and they
872 are both either store or load (not load and store). */
873 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
874 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
875 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
876 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
877 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
878 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
879 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
880 || DR_IS_READ (dra) != DR_IS_READ (drb))
881 return;
883 /* Check:
884 1. data-refs are of the same type
885 2. their steps are equal
886 3. the step is greater than the difference between data-refs' inits */
887 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
888 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
890 if (type_size_a != type_size_b
891 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
892 return;
894 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
895 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
896 step = TREE_INT_CST_LOW (DR_STEP (dra));
898 if (init_a > init_b)
900 /* If init_a == init_b + the size of the type * k, we have an interleaving,
901 and DRB is accessed before DRA. */
902 diff_mod_size = (init_a - init_b) % type_size_a;
904 if ((init_a - init_b) > step)
905 return;
907 if (diff_mod_size == 0)
909 vect_update_interleaving_chain (drb, dra);
910 if (vect_print_dump_info (REPORT_DR_DETAILS))
912 fprintf (vect_dump, "Detected interleaving ");
913 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
914 fprintf (vect_dump, " and ");
915 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
917 return;
920 else
922 /* If init_b == init_a + the size of the type * k, we have an
923 interleaving, and DRA is accessed before DRB. */
924 diff_mod_size = (init_b - init_a) % type_size_a;
926 if ((init_b - init_a) > step)
927 return;
929 if (diff_mod_size == 0)
931 vect_update_interleaving_chain (dra, drb);
932 if (vect_print_dump_info (REPORT_DR_DETAILS))
934 fprintf (vect_dump, "Detected interleaving ");
935 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
936 fprintf (vect_dump, " and ");
937 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
939 return;
945 /* Function vect_analyze_data_ref_dependence.
947 Return TRUE if there (might) exist a dependence between a memory-reference
948 DRA and a memory-reference DRB. */
950 static bool
951 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
952 loop_vec_info loop_vinfo)
954 unsigned int i;
955 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
956 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
957 struct data_reference *dra = DDR_A (ddr);
958 struct data_reference *drb = DDR_B (ddr);
959 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
960 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
961 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
962 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
963 lambda_vector dist_v;
964 unsigned int loop_depth;
966 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
968 /* Independent data accesses. */
969 vect_check_interleaving (dra, drb);
970 return false;
973 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
974 return false;
976 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
978 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
980 fprintf (vect_dump,
981 "not vectorized: can't determine dependence between ");
982 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
983 fprintf (vect_dump, " and ");
984 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
986 return true;
989 if (DDR_NUM_DIST_VECTS (ddr) == 0)
991 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
993 fprintf (vect_dump, "not vectorized: bad dist vector for ");
994 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
995 fprintf (vect_dump, " and ");
996 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
998 return true;
1001 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1002 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1004 int dist = dist_v[loop_depth];
1006 if (vect_print_dump_info (REPORT_DR_DETAILS))
1007 fprintf (vect_dump, "dependence distance = %d.", dist);
1009 /* Same loop iteration. */
1010 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1012 /* Two references with distance zero have the same alignment. */
1013 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1014 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1015 if (vect_print_dump_info (REPORT_ALIGNMENT))
1016 fprintf (vect_dump, "accesses have the same alignment.");
1017 if (vect_print_dump_info (REPORT_DR_DETAILS))
1019 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1020 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1021 fprintf (vect_dump, " and ");
1022 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1025 /* For interleaving, mark that there is a read-write dependency if
1026 necessary. We check before that one of the data-refs is store. */
1027 if (DR_IS_READ (dra))
1028 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1029 else
1031 if (DR_IS_READ (drb))
1032 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1035 continue;
1038 if (abs (dist) >= vectorization_factor)
1040 /* Dependence distance does not create dependence, as far as vectorization
1041 is concerned, in this case. */
1042 if (vect_print_dump_info (REPORT_DR_DETAILS))
1043 fprintf (vect_dump, "dependence distance >= VF.");
1044 continue;
1047 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1049 fprintf (vect_dump,
1050 "not vectorized: possible dependence between data-refs ");
1051 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1052 fprintf (vect_dump, " and ");
1053 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1056 return true;
1059 return false;
1063 /* Function vect_analyze_data_ref_dependences.
1065 Examine all the data references in the loop, and make sure there do not
1066 exist any data dependences between them. */
1068 static bool
1069 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1071 unsigned int i;
1072 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1073 struct data_dependence_relation *ddr;
1075 if (vect_print_dump_info (REPORT_DETAILS))
1076 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1078 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1079 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1080 return false;
1082 return true;
1086 /* Function vect_compute_data_ref_alignment
1088 Compute the misalignment of the data reference DR.
1090 Output:
1091 1. If during the misalignment computation it is found that the data reference
1092 cannot be vectorized then false is returned.
1093 2. DR_MISALIGNMENT (DR) is defined.
1095 FOR NOW: No analysis is actually performed. Misalignment is calculated
1096 only for trivial cases. TODO. */
1098 static bool
1099 vect_compute_data_ref_alignment (struct data_reference *dr)
1101 tree stmt = DR_STMT (dr);
1102 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1103 tree ref = DR_REF (dr);
1104 tree vectype;
1105 tree base, base_addr;
1106 bool base_aligned;
1107 tree misalign;
1108 tree aligned_to, alignment;
1110 if (vect_print_dump_info (REPORT_DETAILS))
1111 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1113 /* Initialize misalignment to unknown. */
1114 DR_MISALIGNMENT (dr) = -1;
1116 misalign = DR_OFFSET_MISALIGNMENT (dr);
1117 aligned_to = DR_ALIGNED_TO (dr);
1118 base_addr = DR_BASE_ADDRESS (dr);
1119 base = build_fold_indirect_ref (base_addr);
1120 vectype = STMT_VINFO_VECTYPE (stmt_info);
1121 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1123 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1124 || !misalign)
1126 if (vect_print_dump_info (REPORT_DETAILS))
1128 fprintf (vect_dump, "Unknown alignment for access: ");
1129 print_generic_expr (vect_dump, base, TDF_SLIM);
1131 return true;
1134 if ((DECL_P (base)
1135 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1136 alignment) >= 0)
1137 || (TREE_CODE (base_addr) == SSA_NAME
1138 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1139 TREE_TYPE (base_addr)))),
1140 alignment) >= 0))
1141 base_aligned = true;
1142 else
1143 base_aligned = false;
1145 if (!base_aligned)
1147 /* Do not change the alignment of global variables if
1148 flag_section_anchors is enabled. */
1149 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1150 || (TREE_STATIC (base) && flag_section_anchors))
1152 if (vect_print_dump_info (REPORT_DETAILS))
1154 fprintf (vect_dump, "can't force alignment of ref: ");
1155 print_generic_expr (vect_dump, ref, TDF_SLIM);
1157 return true;
1160 /* Force the alignment of the decl.
1161 NOTE: This is the only change to the code we make during
1162 the analysis phase, before deciding to vectorize the loop. */
1163 if (vect_print_dump_info (REPORT_DETAILS))
1164 fprintf (vect_dump, "force alignment");
1165 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1166 DECL_USER_ALIGN (base) = 1;
1169 /* At this point we assume that the base is aligned. */
1170 gcc_assert (base_aligned
1171 || (TREE_CODE (base) == VAR_DECL
1172 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1174 /* Modulo alignment. */
1175 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1177 if (!host_integerp (misalign, 1))
1179 /* Negative or overflowed misalignment value. */
1180 if (vect_print_dump_info (REPORT_DETAILS))
1181 fprintf (vect_dump, "unexpected misalign value");
1182 return false;
1185 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
1187 if (vect_print_dump_info (REPORT_DETAILS))
1189 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1190 print_generic_expr (vect_dump, ref, TDF_SLIM);
1193 return true;
1197 /* Function vect_compute_data_refs_alignment
1199 Compute the misalignment of data references in the loop.
1200 Return FALSE if a data reference is found that cannot be vectorized. */
1202 static bool
1203 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1205 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1206 struct data_reference *dr;
1207 unsigned int i;
1209 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1210 if (!vect_compute_data_ref_alignment (dr))
1211 return false;
1213 return true;
1217 /* Function vect_update_misalignment_for_peel
1219 DR - the data reference whose misalignment is to be adjusted.
1220 DR_PEEL - the data reference whose misalignment is being made
1221 zero in the vector loop by the peel.
1222 NPEEL - the number of iterations in the peel loop if the misalignment
1223 of DR_PEEL is known at compile time. */
1225 static void
1226 vect_update_misalignment_for_peel (struct data_reference *dr,
1227 struct data_reference *dr_peel, int npeel)
1229 unsigned int i;
1230 VEC(dr_p,heap) *same_align_drs;
1231 struct data_reference *current_dr;
1232 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1233 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1234 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1235 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1237 /* For interleaved data accesses the step in the loop must be multiplied by
1238 the size of the interleaving group. */
1239 if (DR_GROUP_FIRST_DR (stmt_info))
1240 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1241 if (DR_GROUP_FIRST_DR (peel_stmt_info))
1242 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1244 if (known_alignment_for_access_p (dr)
1245 && known_alignment_for_access_p (dr_peel)
1246 && (DR_MISALIGNMENT (dr) / dr_size ==
1247 DR_MISALIGNMENT (dr_peel) / dr_peel_size))
1249 DR_MISALIGNMENT (dr) = 0;
1250 return;
1253 /* It can be assumed that the data refs with the same alignment as dr_peel
1254 are aligned in the vector loop. */
1255 same_align_drs
1256 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1257 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1259 if (current_dr != dr)
1260 continue;
1261 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1262 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1263 DR_MISALIGNMENT (dr) = 0;
1264 return;
1267 if (known_alignment_for_access_p (dr)
1268 && known_alignment_for_access_p (dr_peel))
1270 DR_MISALIGNMENT (dr) += npeel * dr_size;
1271 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
1272 return;
1275 if (vect_print_dump_info (REPORT_DETAILS))
1276 fprintf (vect_dump, "Setting misalignment to -1.");
1277 DR_MISALIGNMENT (dr) = -1;
1281 /* Function vect_verify_datarefs_alignment
1283 Return TRUE if all data references in the loop can be
1284 handled with respect to alignment. */
1286 static bool
1287 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1289 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1290 struct data_reference *dr;
1291 enum dr_alignment_support supportable_dr_alignment;
1292 unsigned int i;
1294 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1296 tree stmt = DR_STMT (dr);
1297 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1299 /* For interleaving, only the alignment of the first access matters. */
1300 if (DR_GROUP_FIRST_DR (stmt_info)
1301 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1302 continue;
1304 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1305 if (!supportable_dr_alignment)
1307 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1309 if (DR_IS_READ (dr))
1310 fprintf (vect_dump,
1311 "not vectorized: unsupported unaligned load.");
1312 else
1313 fprintf (vect_dump,
1314 "not vectorized: unsupported unaligned store.");
1316 return false;
1318 if (supportable_dr_alignment != dr_aligned
1319 && vect_print_dump_info (REPORT_ALIGNMENT))
1320 fprintf (vect_dump, "Vectorizing an unaligned access.");
1322 return true;
1326 /* Function vect_enhance_data_refs_alignment
1328 This pass will use loop versioning and loop peeling in order to enhance
1329 the alignment of data references in the loop.
1331 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1332 original loop is to be vectorized; Any other loops that are created by
1333 the transformations performed in this pass - are not supposed to be
1334 vectorized. This restriction will be relaxed.
1336 This pass will require a cost model to guide it whether to apply peeling
1337 or versioning or a combination of the two. For example, the scheme that
1338 intel uses when given a loop with several memory accesses, is as follows:
1339 choose one memory access ('p') which alignment you want to force by doing
1340 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1341 other accesses are not necessarily aligned, or (2) use loop versioning to
1342 generate one loop in which all accesses are aligned, and another loop in
1343 which only 'p' is necessarily aligned.
1345 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1346 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1347 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1349 Devising a cost model is the most critical aspect of this work. It will
1350 guide us on which access to peel for, whether to use loop versioning, how
1351 many versions to create, etc. The cost model will probably consist of
1352 generic considerations as well as target specific considerations (on
1353 powerpc for example, misaligned stores are more painful than misaligned
1354 loads).
1356 Here are the general steps involved in alignment enhancements:
1358 -- original loop, before alignment analysis:
1359 for (i=0; i<N; i++){
1360 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1361 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1364 -- After vect_compute_data_refs_alignment:
1365 for (i=0; i<N; i++){
1366 x = q[i]; # DR_MISALIGNMENT(q) = 3
1367 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1370 -- Possibility 1: we do loop versioning:
1371 if (p is aligned) {
1372 for (i=0; i<N; i++){ # loop 1A
1373 x = q[i]; # DR_MISALIGNMENT(q) = 3
1374 p[i] = y; # DR_MISALIGNMENT(p) = 0
1377 else {
1378 for (i=0; i<N; i++){ # loop 1B
1379 x = q[i]; # DR_MISALIGNMENT(q) = 3
1380 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1384 -- Possibility 2: we do loop peeling:
1385 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1386 x = q[i];
1387 p[i] = y;
1389 for (i = 3; i < N; i++){ # loop 2A
1390 x = q[i]; # DR_MISALIGNMENT(q) = 0
1391 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1394 -- Possibility 3: combination of loop peeling and versioning:
1395 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1396 x = q[i];
1397 p[i] = y;
1399 if (p is aligned) {
1400 for (i = 3; i<N; i++){ # loop 3A
1401 x = q[i]; # DR_MISALIGNMENT(q) = 0
1402 p[i] = y; # DR_MISALIGNMENT(p) = 0
1405 else {
1406 for (i = 3; i<N; i++){ # loop 3B
1407 x = q[i]; # DR_MISALIGNMENT(q) = 0
1408 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1412 These loops are later passed to loop_transform to be vectorized. The
1413 vectorizer will use the alignment information to guide the transformation
1414 (whether to generate regular loads/stores, or with special handling for
1415 misalignment). */
1417 static bool
1418 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1420 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1421 enum dr_alignment_support supportable_dr_alignment;
1422 struct data_reference *dr0 = NULL;
1423 struct data_reference *dr;
1424 unsigned int i;
1425 bool do_peeling = false;
1426 bool do_versioning = false;
1427 bool stat;
1428 tree stmt;
1429 stmt_vec_info stmt_info;
1431 if (vect_print_dump_info (REPORT_DETAILS))
1432 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1434 /* While cost model enhancements are expected in the future, the high level
1435 view of the code at this time is as follows:
1437 A) If there is a misaligned write then see if peeling to align this write
1438 can make all data references satisfy vect_supportable_dr_alignment.
1439 If so, update data structures as needed and return true. Note that
1440 at this time vect_supportable_dr_alignment is known to return false
1441 for a misaligned write.
1443 B) If peeling wasn't possible and there is a data reference with an
1444 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1445 then see if loop versioning checks can be used to make all data
1446 references satisfy vect_supportable_dr_alignment. If so, update
1447 data structures as needed and return true.
1449 C) If neither peeling nor versioning were successful then return false if
1450 any data reference does not satisfy vect_supportable_dr_alignment.
1452 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1454 Note, Possibility 3 above (which is peeling and versioning together) is not
1455 being done at this time. */
1457 /* (1) Peeling to force alignment. */
1459 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1460 Considerations:
1461 + How many accesses will become aligned due to the peeling
1462 - How many accesses will become unaligned due to the peeling,
1463 and the cost of misaligned accesses.
1464 - The cost of peeling (the extra runtime checks, the increase
1465 in code size).
1467 The scheme we use FORNOW: peel to force the alignment of the first
1468 misaligned store in the loop.
1469 Rationale: misaligned stores are not yet supported.
1471 TODO: Use a cost model. */
1473 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1475 stmt = DR_STMT (dr);
1476 stmt_info = vinfo_for_stmt (stmt);
1478 /* For interleaving, only the alignment of the first access
1479 matters. */
1480 if (DR_GROUP_FIRST_DR (stmt_info)
1481 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1482 continue;
1484 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1486 if (DR_GROUP_FIRST_DR (stmt_info))
1488 /* For interleaved access we peel only if number of iterations in
1489 the prolog loop ({VF - misalignment}), is a multiple of the
1490 number of the interleaved accesses. */
1491 int elem_size, mis_in_elements;
1492 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1494 /* FORNOW: handle only known alignment. */
1495 if (!known_alignment_for_access_p (dr))
1497 do_peeling = false;
1498 break;
1501 elem_size = UNITS_PER_SIMD_WORD / vf;
1502 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1504 if ((vf - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1506 do_peeling = false;
1507 break;
1510 dr0 = dr;
1511 do_peeling = true;
1512 break;
1516 /* Often peeling for alignment will require peeling for loop-bound, which in
1517 turn requires that we know how to adjust the loop ivs after the loop. */
1518 if (!vect_can_advance_ivs_p (loop_vinfo))
1519 do_peeling = false;
1521 if (do_peeling)
1523 int mis;
1524 int npeel = 0;
1526 if (known_alignment_for_access_p (dr0))
1528 /* Since it's known at compile time, compute the number of iterations
1529 in the peeled loop (the peeling factor) for use in updating
1530 DR_MISALIGNMENT values. The peeling factor is the vectorization
1531 factor minus the misalignment as an element count. */
1532 mis = DR_MISALIGNMENT (dr0);
1533 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1534 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1536 /* For interleaved data access every iteration accesses all the
1537 members of the group, therefore we divide the number of iterations
1538 by the group size. */
1539 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1540 if (DR_GROUP_FIRST_DR (stmt_info))
1541 npeel /= DR_GROUP_SIZE (stmt_info);
1543 if (vect_print_dump_info (REPORT_DETAILS))
1544 fprintf (vect_dump, "Try peeling by %d", npeel);
1547 /* Ensure that all data refs can be vectorized after the peel. */
1548 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1550 int save_misalignment;
1552 if (dr == dr0)
1553 continue;
1555 stmt = DR_STMT (dr);
1556 stmt_info = vinfo_for_stmt (stmt);
1557 /* For interleaving, only the alignment of the first access
1558 matters. */
1559 if (DR_GROUP_FIRST_DR (stmt_info)
1560 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1561 continue;
1563 save_misalignment = DR_MISALIGNMENT (dr);
1564 vect_update_misalignment_for_peel (dr, dr0, npeel);
1565 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1566 DR_MISALIGNMENT (dr) = save_misalignment;
1568 if (!supportable_dr_alignment)
1570 do_peeling = false;
1571 break;
1575 if (do_peeling)
1577 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1578 If the misalignment of DR_i is identical to that of dr0 then set
1579 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1580 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1581 by the peeling factor times the element size of DR_i (MOD the
1582 vectorization factor times the size). Otherwise, the
1583 misalignment of DR_i must be set to unknown. */
1584 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1585 if (dr != dr0)
1586 vect_update_misalignment_for_peel (dr, dr0, npeel);
1588 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1589 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1590 DR_MISALIGNMENT (dr0) = 0;
1591 if (vect_print_dump_info (REPORT_ALIGNMENT))
1592 fprintf (vect_dump, "Alignment of access forced using peeling.");
1594 if (vect_print_dump_info (REPORT_DETAILS))
1595 fprintf (vect_dump, "Peeling for alignment will be applied.");
1597 stat = vect_verify_datarefs_alignment (loop_vinfo);
1598 gcc_assert (stat);
1599 return stat;
1604 /* (2) Versioning to force alignment. */
1606 /* Try versioning if:
1607 1) flag_tree_vect_loop_version is TRUE
1608 2) optimize_size is FALSE
1609 3) there is at least one unsupported misaligned data ref with an unknown
1610 misalignment, and
1611 4) all misaligned data refs with a known misalignment are supported, and
1612 5) the number of runtime alignment checks is within reason. */
1614 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1616 if (do_versioning)
1618 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1620 stmt = DR_STMT (dr);
1621 stmt_info = vinfo_for_stmt (stmt);
1623 /* For interleaving, only the alignment of the first access
1624 matters. */
1625 if (aligned_access_p (dr)
1626 || (DR_GROUP_FIRST_DR (stmt_info)
1627 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1628 continue;
1630 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1632 if (!supportable_dr_alignment)
1634 tree stmt;
1635 int mask;
1636 tree vectype;
1638 if (known_alignment_for_access_p (dr)
1639 || VEC_length (tree,
1640 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1641 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1643 do_versioning = false;
1644 break;
1647 stmt = DR_STMT (dr);
1648 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1649 gcc_assert (vectype);
1651 /* The rightmost bits of an aligned address must be zeros.
1652 Construct the mask needed for this test. For example,
1653 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1654 mask must be 15 = 0xf. */
1655 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1657 /* FORNOW: use the same mask to test all potentially unaligned
1658 references in the loop. The vectorizer currently supports
1659 a single vector size, see the reference to
1660 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1661 vectorization factor is computed. */
1662 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1663 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1664 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1665 VEC_safe_push (tree, heap,
1666 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1667 DR_STMT (dr));
1671 /* Versioning requires at least one misaligned data reference. */
1672 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1673 do_versioning = false;
1674 else if (!do_versioning)
1675 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1678 if (do_versioning)
1680 VEC(tree,heap) *may_misalign_stmts
1681 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1682 tree stmt;
1684 /* It can now be assumed that the data references in the statements
1685 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1686 of the loop being vectorized. */
1687 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1689 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1690 dr = STMT_VINFO_DATA_REF (stmt_info);
1691 DR_MISALIGNMENT (dr) = 0;
1692 if (vect_print_dump_info (REPORT_ALIGNMENT))
1693 fprintf (vect_dump, "Alignment of access forced using versioning.");
1696 if (vect_print_dump_info (REPORT_DETAILS))
1697 fprintf (vect_dump, "Versioning for alignment will be applied.");
1699 /* Peeling and versioning can't be done together at this time. */
1700 gcc_assert (! (do_peeling && do_versioning));
1702 stat = vect_verify_datarefs_alignment (loop_vinfo);
1703 gcc_assert (stat);
1704 return stat;
1707 /* This point is reached if neither peeling nor versioning is being done. */
1708 gcc_assert (! (do_peeling || do_versioning));
1710 stat = vect_verify_datarefs_alignment (loop_vinfo);
1711 return stat;
1715 /* Function vect_analyze_data_refs_alignment
1717 Analyze the alignment of the data-references in the loop.
1718 Return FALSE if a data reference is found that cannot be vectorized. */
1720 static bool
1721 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1723 if (vect_print_dump_info (REPORT_DETAILS))
1724 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1726 if (!vect_compute_data_refs_alignment (loop_vinfo))
1728 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1729 fprintf (vect_dump,
1730 "not vectorized: can't calculate alignment for data ref.");
1731 return false;
1734 return true;
1738 /* Function vect_analyze_data_ref_access.
1740 Analyze the access pattern of the data-reference DR. For now, a data access
1741 has to be consecutive to be considered vectorizable. */
1743 static bool
1744 vect_analyze_data_ref_access (struct data_reference *dr)
1746 tree step = DR_STEP (dr);
1747 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1748 tree scalar_type = TREE_TYPE (DR_REF (dr));
1749 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1750 tree stmt = DR_STMT (dr);
1751 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1752 interleaving group (including gaps). */
1753 HOST_WIDE_INT stride = dr_step / type_size;
1755 if (!step)
1757 if (vect_print_dump_info (REPORT_DETAILS))
1758 fprintf (vect_dump, "bad data-ref access");
1759 return false;
1762 /* Consecutive? */
1763 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1765 /* Mark that it is not interleaving. */
1766 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
1767 return true;
1770 /* Not consecutive access is possible only if it is a part of interleaving. */
1771 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1773 /* Check if it this DR is a part of interleaving, and is a single
1774 element of the group that is accessed in the loop. */
1776 /* Gaps are supported only for loads. STEP must be a multiple of the type
1777 size. The size of the group must be a power of 2. */
1778 if (DR_IS_READ (dr)
1779 && (dr_step % type_size) == 0
1780 && stride > 0
1781 && exact_log2 (stride) != -1)
1783 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1784 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1785 if (vect_print_dump_info (REPORT_DR_DETAILS))
1787 fprintf (vect_dump, "Detected single element interleaving %d ",
1788 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
1789 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1790 fprintf (vect_dump, " step ");
1791 print_generic_expr (vect_dump, step, TDF_SLIM);
1793 return true;
1795 if (vect_print_dump_info (REPORT_DETAILS))
1796 fprintf (vect_dump, "not consecutive access");
1797 return false;
1800 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1802 /* First stmt in the interleaving chain. Check the chain. */
1803 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1804 struct data_reference *data_ref = dr;
1805 unsigned int count = 1;
1806 tree next_step;
1807 tree prev_init = DR_INIT (data_ref);
1808 tree prev = stmt;
1809 HOST_WIDE_INT diff, count_in_bytes;
1811 while (next)
1813 /* Skip same data-refs. In case that two or more stmts share data-ref
1814 (supported only for loads), we vectorize only the first stmt, and
1815 the rest get their vectorized loads from the first one. */
1816 if (!tree_int_cst_compare (DR_INIT (data_ref),
1817 DR_INIT (STMT_VINFO_DATA_REF (
1818 vinfo_for_stmt (next)))))
1820 if (!DR_IS_READ (data_ref))
1822 if (vect_print_dump_info (REPORT_DETAILS))
1823 fprintf (vect_dump, "Two store stmts share the same dr.");
1824 return false;
1827 /* Check that there is no load-store dependencies for this loads
1828 to prevent a case of load-store-load to the same location. */
1829 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1830 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1832 if (vect_print_dump_info (REPORT_DETAILS))
1833 fprintf (vect_dump,
1834 "READ_WRITE dependence in interleaving.");
1835 return false;
1838 /* For load use the same data-ref load. */
1839 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1841 prev = next;
1842 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1843 continue;
1845 prev = next;
1847 /* Check that all the accesses have the same STEP. */
1848 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1849 if (tree_int_cst_compare (step, next_step))
1851 if (vect_print_dump_info (REPORT_DETAILS))
1852 fprintf (vect_dump, "not consecutive access in interleaving");
1853 return false;
1856 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1857 /* Check that the distance between two accesses is equal to the type
1858 size. Otherwise, we have gaps. */
1859 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1860 - TREE_INT_CST_LOW (prev_init)) / type_size;
1861 if (!DR_IS_READ (data_ref) && diff != 1)
1863 if (vect_print_dump_info (REPORT_DETAILS))
1864 fprintf (vect_dump, "interleaved store with gaps");
1865 return false;
1867 /* Store the gap from the previous member of the group. If there is no
1868 gap in the access, DR_GROUP_GAP is always 1. */
1869 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1871 prev_init = DR_INIT (data_ref);
1872 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1873 /* Count the number of data-refs in the chain. */
1874 count++;
1877 /* COUNT is the number of accesses found, we multiply it by the size of
1878 the type to get COUNT_IN_BYTES. */
1879 count_in_bytes = type_size * count;
1881 /* Check that the size of the interleaving is not greater than STEP. */
1882 if (dr_step < count_in_bytes)
1884 if (vect_print_dump_info (REPORT_DETAILS))
1886 fprintf (vect_dump, "interleaving size is greater than step for ");
1887 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1889 return false;
1892 /* Check that the size of the interleaving is equal to STEP for stores,
1893 i.e., that there are no gaps. */
1894 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
1896 if (vect_print_dump_info (REPORT_DETAILS))
1897 fprintf (vect_dump, "interleaved store with gaps");
1898 return false;
1901 /* Check that STEP is a multiple of type size. */
1902 if ((dr_step % type_size) != 0)
1904 if (vect_print_dump_info (REPORT_DETAILS))
1906 fprintf (vect_dump, "step is not a multiple of type size: step ");
1907 print_generic_expr (vect_dump, step, TDF_SLIM);
1908 fprintf (vect_dump, " size ");
1909 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1910 TDF_SLIM);
1912 return false;
1915 /* FORNOW: we handle only interleaving that is a power of 2. */
1916 if (exact_log2 (stride) == -1)
1918 if (vect_print_dump_info (REPORT_DETAILS))
1919 fprintf (vect_dump, "interleaving is not a power of 2");
1920 return false;
1922 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1924 return true;
1928 /* Function vect_analyze_data_ref_accesses.
1930 Analyze the access pattern of all the data references in the loop.
1932 FORNOW: the only access pattern that is considered vectorizable is a
1933 simple step 1 (consecutive) access.
1935 FORNOW: handle only arrays and pointer accesses. */
1937 static bool
1938 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1940 unsigned int i;
1941 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1942 struct data_reference *dr;
1944 if (vect_print_dump_info (REPORT_DETAILS))
1945 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1947 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1948 if (!vect_analyze_data_ref_access (dr))
1950 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1951 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1952 return false;
1955 return true;
1959 /* Function vect_analyze_data_refs.
1961 Find all the data references in the loop.
1963 The general structure of the analysis of data refs in the vectorizer is as
1964 follows:
1965 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1966 find and analyze all data-refs in the loop and their dependences.
1967 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1968 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1969 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1973 static bool
1974 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1976 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1977 unsigned int i;
1978 VEC (data_reference_p, heap) *datarefs;
1979 struct data_reference *dr;
1980 tree scalar_type;
1982 if (vect_print_dump_info (REPORT_DETAILS))
1983 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1985 compute_data_dependences_for_loop (loop, true,
1986 &LOOP_VINFO_DATAREFS (loop_vinfo),
1987 &LOOP_VINFO_DDRS (loop_vinfo));
1989 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1990 from stmt_vec_info struct to DR and vectype. */
1991 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1993 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1995 tree stmt;
1996 stmt_vec_info stmt_info;
1998 if (!dr || !DR_REF (dr))
2000 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2001 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2002 return false;
2005 /* Update DR field in stmt_vec_info struct. */
2006 stmt = DR_STMT (dr);
2007 stmt_info = vinfo_for_stmt (stmt);
2009 if (STMT_VINFO_DATA_REF (stmt_info))
2011 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2013 fprintf (vect_dump,
2014 "not vectorized: more than one data ref in stmt: ");
2015 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2017 return false;
2019 STMT_VINFO_DATA_REF (stmt_info) = dr;
2021 /* Check that analysis of the data-ref succeeded. */
2022 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2023 || !DR_STEP (dr))
2025 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2027 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2028 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2030 return false;
2032 if (!DR_MEMTAG (dr))
2034 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2036 fprintf (vect_dump, "not vectorized: no memory tag for ");
2037 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2039 return false;
2042 /* Set vectype for STMT. */
2043 scalar_type = TREE_TYPE (DR_REF (dr));
2044 STMT_VINFO_VECTYPE (stmt_info) =
2045 get_vectype_for_scalar_type (scalar_type);
2046 if (!STMT_VINFO_VECTYPE (stmt_info))
2048 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2050 fprintf (vect_dump,
2051 "not vectorized: no vectype for stmt: ");
2052 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2053 fprintf (vect_dump, " scalar_type: ");
2054 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2056 return false;
2060 return true;
2064 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2066 /* Function vect_mark_relevant.
2068 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2070 static void
2071 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2072 enum vect_relevant relevant, bool live_p)
2074 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2075 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2076 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2078 if (vect_print_dump_info (REPORT_DETAILS))
2079 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
2081 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2083 tree pattern_stmt;
2085 /* This is the last stmt in a sequence that was detected as a
2086 pattern that can potentially be vectorized. Don't mark the stmt
2087 as relevant/live because it's not going to vectorized.
2088 Instead mark the pattern-stmt that replaces it. */
2089 if (vect_print_dump_info (REPORT_DETAILS))
2090 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
2091 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2092 stmt_info = vinfo_for_stmt (pattern_stmt);
2093 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
2094 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2095 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2096 stmt = pattern_stmt;
2099 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2100 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
2101 STMT_VINFO_RELEVANT (stmt_info) = relevant;
2103 if (TREE_CODE (stmt) == PHI_NODE)
2104 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
2105 or live will fail vectorization later on. */
2106 return;
2108 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
2109 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2111 if (vect_print_dump_info (REPORT_DETAILS))
2112 fprintf (vect_dump, "already marked relevant/live.");
2113 return;
2116 VEC_safe_push (tree, heap, *worklist, stmt);
2120 /* Function vect_stmt_relevant_p.
2122 Return true if STMT in loop that is represented by LOOP_VINFO is
2123 "relevant for vectorization".
2125 A stmt is considered "relevant for vectorization" if:
2126 - it has uses outside the loop.
2127 - it has vdefs (it alters memory).
2128 - control stmts in the loop (except for the exit condition).
2130 CHECKME: what other side effects would the vectorizer allow? */
2132 static bool
2133 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2134 enum vect_relevant *relevant, bool *live_p)
2136 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2137 ssa_op_iter op_iter;
2138 imm_use_iterator imm_iter;
2139 use_operand_p use_p;
2140 def_operand_p def_p;
2142 *relevant = vect_unused_in_loop;
2143 *live_p = false;
2145 /* cond stmt other than loop exit cond. */
2146 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2147 *relevant = vect_used_in_loop;
2149 /* changing memory. */
2150 if (TREE_CODE (stmt) != PHI_NODE)
2151 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2153 if (vect_print_dump_info (REPORT_DETAILS))
2154 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2155 *relevant = vect_used_in_loop;
2158 /* uses outside the loop. */
2159 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2161 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2163 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2164 if (!flow_bb_inside_loop_p (loop, bb))
2166 if (vect_print_dump_info (REPORT_DETAILS))
2167 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2169 /* We expect all such uses to be in the loop exit phis
2170 (because of loop closed form) */
2171 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2172 gcc_assert (bb == single_exit (loop)->dest);
2174 *live_p = true;
2179 return (*live_p || *relevant);
2183 /* Function vect_mark_stmts_to_be_vectorized.
2185 Not all stmts in the loop need to be vectorized. For example:
2187 for i...
2188 for j...
2189 1. T0 = i + j
2190 2. T1 = a[T0]
2192 3. j = j + 1
2194 Stmt 1 and 3 do not need to be vectorized, because loop control and
2195 addressing of vectorized data-refs are handled differently.
2197 This pass detects such stmts. */
2199 static bool
2200 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2202 VEC(tree,heap) *worklist;
2203 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2204 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2205 unsigned int nbbs = loop->num_nodes;
2206 block_stmt_iterator si;
2207 tree stmt, use;
2208 stmt_ann_t ann;
2209 ssa_op_iter iter;
2210 unsigned int i;
2211 stmt_vec_info stmt_vinfo;
2212 basic_block bb;
2213 tree phi;
2214 bool live_p;
2215 enum vect_relevant relevant;
2216 tree def, def_stmt;
2217 enum vect_def_type dt;
2219 if (vect_print_dump_info (REPORT_DETAILS))
2220 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2222 worklist = VEC_alloc (tree, heap, 64);
2224 /* 1. Init worklist. */
2226 bb = loop->header;
2227 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2229 if (vect_print_dump_info (REPORT_DETAILS))
2231 fprintf (vect_dump, "init: phi relevant? ");
2232 print_generic_expr (vect_dump, phi, TDF_SLIM);
2235 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
2236 vect_mark_relevant (&worklist, phi, relevant, live_p);
2239 for (i = 0; i < nbbs; i++)
2241 bb = bbs[i];
2242 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2244 stmt = bsi_stmt (si);
2246 if (vect_print_dump_info (REPORT_DETAILS))
2248 fprintf (vect_dump, "init: stmt relevant? ");
2249 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2252 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
2253 vect_mark_relevant (&worklist, stmt, relevant, live_p);
2258 /* 2. Process_worklist */
2260 while (VEC_length (tree, worklist) > 0)
2262 stmt = VEC_pop (tree, worklist);
2264 if (vect_print_dump_info (REPORT_DETAILS))
2266 fprintf (vect_dump, "worklist: examine stmt: ");
2267 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2270 /* Examine the USEs of STMT. For each ssa-name USE that is defined
2271 in the loop, mark the stmt that defines it (DEF_STMT) as
2272 relevant/irrelevant and live/dead according to the liveness and
2273 relevance properties of STMT.
2276 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2278 ann = stmt_ann (stmt);
2279 stmt_vinfo = vinfo_for_stmt (stmt);
2281 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
2282 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2284 /* Generally, the liveness and relevance properties of STMT are
2285 propagated to the DEF_STMTs of its USEs:
2286 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2287 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2289 Exceptions:
2291 (case 1)
2292 If USE is used only for address computations (e.g. array indexing),
2293 which does not need to be directly vectorized, then the
2294 liveness/relevance of the respective DEF_STMT is left unchanged.
2296 (case 2)
2297 If STMT has been identified as defining a reduction variable, then
2298 we want to set liveness/relevance as follows:
2299 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
2300 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- vect_used_by_reduction
2301 because even though STMT is classified as live (since it defines a
2302 value that is used across loop iterations) and irrelevant (since it
2303 is not used inside the loop), it will be vectorized, and therefore
2304 the corresponding DEF_STMTs need to marked as relevant.
2305 We distinguish between two kinds of relevant stmts - those that are
2306 used by a reduction computation, and those that are (also) used by
2307 a regular computation. This allows us later on to identify stmts
2308 that are used solely by a reduction, and therefore the order of
2309 the results that they produce does not have to be kept.
2312 /* case 2.2: */
2313 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2315 gcc_assert (relevant == vect_unused_in_loop && live_p);
2316 relevant = vect_used_by_reduction;
2317 live_p = false;
2320 i = 0;
2321 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2323 if (vect_print_dump_info (REPORT_DETAILS))
2325 fprintf (vect_dump, "worklist: examine use %d: ", i++);
2326 print_generic_expr (vect_dump, use, TDF_SLIM);
2329 /* case 1: we are only interested in uses that need to be vectorized.
2330 Uses that are used for address computation are not considered
2331 relevant.
2333 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2334 continue;
2336 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2338 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2339 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2340 VEC_free (tree, heap, worklist);
2341 return false;
2344 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2345 continue;
2347 bb = bb_for_stmt (def_stmt);
2348 if (!flow_bb_inside_loop_p (loop, bb))
2349 continue;
2350 vect_mark_relevant (&worklist, def_stmt, relevant, live_p);
2352 } /* while worklist */
2354 VEC_free (tree, heap, worklist);
2355 return true;
2359 /* Function vect_can_advance_ivs_p
2361 In case the number of iterations that LOOP iterates is unknown at compile
2362 time, an epilog loop will be generated, and the loop induction variables
2363 (IVs) will be "advanced" to the value they are supposed to take just before
2364 the epilog loop. Here we check that the access function of the loop IVs
2365 and the expression that represents the loop bound are simple enough.
2366 These restrictions will be relaxed in the future. */
2368 static bool
2369 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2371 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2372 basic_block bb = loop->header;
2373 tree phi;
2375 /* Analyze phi functions of the loop header. */
2377 if (vect_print_dump_info (REPORT_DETAILS))
2378 fprintf (vect_dump, "vect_can_advance_ivs_p:");
2380 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2382 tree access_fn = NULL;
2383 tree evolution_part;
2385 if (vect_print_dump_info (REPORT_DETAILS))
2387 fprintf (vect_dump, "Analyze phi: ");
2388 print_generic_expr (vect_dump, phi, TDF_SLIM);
2391 /* Skip virtual phi's. The data dependences that are associated with
2392 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2394 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2396 if (vect_print_dump_info (REPORT_DETAILS))
2397 fprintf (vect_dump, "virtual phi. skip.");
2398 continue;
2401 /* Skip reduction phis. */
2403 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2405 if (vect_print_dump_info (REPORT_DETAILS))
2406 fprintf (vect_dump, "reduc phi. skip.");
2407 continue;
2410 /* Analyze the evolution function. */
2412 access_fn = instantiate_parameters
2413 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2415 if (!access_fn)
2417 if (vect_print_dump_info (REPORT_DETAILS))
2418 fprintf (vect_dump, "No Access function.");
2419 return false;
2422 if (vect_print_dump_info (REPORT_DETAILS))
2424 fprintf (vect_dump, "Access function of PHI: ");
2425 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2428 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2430 if (evolution_part == NULL_TREE)
2432 if (vect_print_dump_info (REPORT_DETAILS))
2433 fprintf (vect_dump, "No evolution.");
2434 return false;
2437 /* FORNOW: We do not transform initial conditions of IVs
2438 which evolution functions are a polynomial of degree >= 2. */
2440 if (tree_is_chrec (evolution_part))
2441 return false;
2444 return true;
2448 /* Function vect_get_loop_niters.
2450 Determine how many iterations the loop is executed.
2451 If an expression that represents the number of iterations
2452 can be constructed, place it in NUMBER_OF_ITERATIONS.
2453 Return the loop exit condition. */
2455 static tree
2456 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2458 tree niters;
2460 if (vect_print_dump_info (REPORT_DETAILS))
2461 fprintf (vect_dump, "=== get_loop_niters ===");
2463 niters = number_of_exit_cond_executions (loop);
2465 if (niters != NULL_TREE
2466 && niters != chrec_dont_know)
2468 *number_of_iterations = niters;
2470 if (vect_print_dump_info (REPORT_DETAILS))
2472 fprintf (vect_dump, "==> get_loop_niters:" );
2473 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2477 return get_loop_exit_condition (loop);
2481 /* Function vect_analyze_loop_form.
2483 Verify the following restrictions (some may be relaxed in the future):
2484 - it's an inner-most loop
2485 - number of BBs = 2 (which are the loop header and the latch)
2486 - the loop has a pre-header
2487 - the loop has a single entry and exit
2488 - the loop exit condition is simple enough, and the number of iterations
2489 can be analyzed (a countable loop). */
2491 static loop_vec_info
2492 vect_analyze_loop_form (struct loop *loop)
2494 loop_vec_info loop_vinfo;
2495 tree loop_cond;
2496 tree number_of_iterations = NULL;
2498 if (vect_print_dump_info (REPORT_DETAILS))
2499 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2501 if (loop->inner)
2503 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
2504 fprintf (vect_dump, "not vectorized: nested loop.");
2505 return NULL;
2508 if (!single_exit (loop)
2509 || loop->num_nodes != 2
2510 || EDGE_COUNT (loop->header->preds) != 2)
2512 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2514 if (!single_exit (loop))
2515 fprintf (vect_dump, "not vectorized: multiple exits.");
2516 else if (loop->num_nodes != 2)
2517 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2518 else if (EDGE_COUNT (loop->header->preds) != 2)
2519 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2522 return NULL;
2525 /* We assume that the loop exit condition is at the end of the loop. i.e,
2526 that the loop is represented as a do-while (with a proper if-guard
2527 before the loop if needed), where the loop header contains all the
2528 executable statements, and the latch is empty. */
2529 if (!empty_block_p (loop->latch)
2530 || phi_nodes (loop->latch))
2532 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2533 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2534 return NULL;
2537 /* Make sure there exists a single-predecessor exit bb: */
2538 if (!single_pred_p (single_exit (loop)->dest))
2540 edge e = single_exit (loop);
2541 if (!(e->flags & EDGE_ABNORMAL))
2543 split_loop_exit_edge (e);
2544 if (vect_print_dump_info (REPORT_DETAILS))
2545 fprintf (vect_dump, "split exit edge.");
2547 else
2549 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2550 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2551 return NULL;
2555 if (empty_block_p (loop->header))
2557 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2558 fprintf (vect_dump, "not vectorized: empty loop.");
2559 return NULL;
2562 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2563 if (!loop_cond)
2565 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2566 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2567 return NULL;
2570 if (!number_of_iterations)
2572 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2573 fprintf (vect_dump,
2574 "not vectorized: number of iterations cannot be computed.");
2575 return NULL;
2578 if (chrec_contains_undetermined (number_of_iterations))
2580 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2581 fprintf (vect_dump, "Infinite number of iterations.");
2582 return false;
2585 loop_vinfo = new_loop_vec_info (loop);
2586 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2588 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2590 if (vect_print_dump_info (REPORT_DETAILS))
2592 fprintf (vect_dump, "Symbolic number of iterations is ");
2593 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2596 else
2597 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2599 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2600 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2601 return NULL;
2604 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2606 return loop_vinfo;
2610 /* Function vect_analyze_loop.
2612 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2613 for it. The different analyses will record information in the
2614 loop_vec_info struct. */
2615 loop_vec_info
2616 vect_analyze_loop (struct loop *loop)
2618 bool ok;
2619 loop_vec_info loop_vinfo;
2621 if (vect_print_dump_info (REPORT_DETAILS))
2622 fprintf (vect_dump, "===== analyze_loop_nest =====");
2624 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2626 loop_vinfo = vect_analyze_loop_form (loop);
2627 if (!loop_vinfo)
2629 if (vect_print_dump_info (REPORT_DETAILS))
2630 fprintf (vect_dump, "bad loop form.");
2631 return NULL;
2634 /* Find all data references in the loop (which correspond to vdefs/vuses)
2635 and analyze their evolution in the loop.
2637 FORNOW: Handle only simple, array references, which
2638 alignment can be forced, and aligned pointer-references. */
2640 ok = vect_analyze_data_refs (loop_vinfo);
2641 if (!ok)
2643 if (vect_print_dump_info (REPORT_DETAILS))
2644 fprintf (vect_dump, "bad data references.");
2645 destroy_loop_vec_info (loop_vinfo);
2646 return NULL;
2649 /* Classify all cross-iteration scalar data-flow cycles.
2650 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2652 vect_analyze_scalar_cycles (loop_vinfo);
2654 vect_pattern_recog (loop_vinfo);
2656 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2658 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2659 if (!ok)
2661 if (vect_print_dump_info (REPORT_DETAILS))
2662 fprintf (vect_dump, "unexpected pattern.");
2663 destroy_loop_vec_info (loop_vinfo);
2664 return NULL;
2667 /* Analyze the alignment of the data-refs in the loop.
2668 Fail if a data reference is found that cannot be vectorized. */
2670 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2671 if (!ok)
2673 if (vect_print_dump_info (REPORT_DETAILS))
2674 fprintf (vect_dump, "bad data alignment.");
2675 destroy_loop_vec_info (loop_vinfo);
2676 return NULL;
2679 ok = vect_determine_vectorization_factor (loop_vinfo);
2680 if (!ok)
2682 if (vect_print_dump_info (REPORT_DETAILS))
2683 fprintf (vect_dump, "can't determine vectorization factor.");
2684 destroy_loop_vec_info (loop_vinfo);
2685 return NULL;
2688 /* Analyze data dependences between the data-refs in the loop.
2689 FORNOW: fail at the first data dependence that we encounter. */
2691 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2692 if (!ok)
2694 if (vect_print_dump_info (REPORT_DETAILS))
2695 fprintf (vect_dump, "bad data dependence.");
2696 destroy_loop_vec_info (loop_vinfo);
2697 return NULL;
2700 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2701 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2703 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2704 if (!ok)
2706 if (vect_print_dump_info (REPORT_DETAILS))
2707 fprintf (vect_dump, "bad data access.");
2708 destroy_loop_vec_info (loop_vinfo);
2709 return NULL;
2712 /* This pass will decide on using loop versioning and/or loop peeling in
2713 order to enhance the alignment of data references in the loop. */
2715 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2716 if (!ok)
2718 if (vect_print_dump_info (REPORT_DETAILS))
2719 fprintf (vect_dump, "bad data alignment.");
2720 destroy_loop_vec_info (loop_vinfo);
2721 return NULL;
2724 /* Scan all the operations in the loop and make sure they are
2725 vectorizable. */
2727 ok = vect_analyze_operations (loop_vinfo);
2728 if (!ok)
2730 if (vect_print_dump_info (REPORT_DETAILS))
2731 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2732 destroy_loop_vec_info (loop_vinfo);
2733 return NULL;
2736 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2738 return loop_vinfo;