gcc/ChangeLog:
[official-gcc.git] / gcc / tree-vect-analyze.c
blob88c937c3fa3bf5f23246b2a6e2cd7e9a2f1e675d
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41 #include "toplev.h"
43 /* Main analysis functions. */
44 static loop_vec_info vect_analyze_loop_form (struct loop *);
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
78 in the loop.
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
81 original loop:
82 for (i=0; i<N; i++){
83 a[i] = b[i] + c[i];
86 vectorized loop:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
92 static bool
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
100 int i;
101 tree scalar_type;
103 if (vect_print_dump_info (REPORT_DETAILS))
104 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
106 for (i = 0; i < nbbs; i++)
108 basic_block bb = bbs[i];
110 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
112 tree stmt = bsi_stmt (si);
113 unsigned int nunits;
114 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
115 tree vectype;
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining statement: ");
120 print_generic_expr (vect_dump, stmt, TDF_SLIM);
123 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
124 continue;
126 gcc_assert (stmt_info);
128 /* skip stmts which do not need to be vectorized. */
129 if (!STMT_VINFO_RELEVANT_P (stmt_info)
130 && !STMT_VINFO_LIVE_P (stmt_info))
132 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "skip.");
134 continue;
137 if (!GIMPLE_STMT_P (stmt)
138 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
140 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
142 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
143 print_generic_expr (vect_dump, stmt, TDF_SLIM);
145 return false;
148 if (STMT_VINFO_VECTYPE (stmt_info))
150 /* The only case when a vectype had been already set is for stmts
151 that contain a dataref, or for "pattern-stmts" (stmts generated
152 by the vectorizer to represent/replace a certain idiom). */
153 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
154 || is_pattern_stmt_p (stmt_info));
155 vectype = STMT_VINFO_VECTYPE (stmt_info);
157 else
159 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
160 && !is_pattern_stmt_p (stmt_info));
162 /* We set the vectype according to the type of the result (lhs).
163 For stmts whose result-type is different than the type of the
164 arguments (e.g. demotion, promotion), vectype will be reset
165 appropriately (later). Note that we have to visit the smallest
166 datatype in this function, because that determines the VF.
167 If the smallest datatype in the loop is present only as the
168 rhs of a promotion operation - we'd miss it here.
169 However, in such a case, that a variable of this datatype
170 does not appear in the lhs anywhere in the loop, it shouldn't
171 affect the vectorization factor. */
172 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
174 if (vect_print_dump_info (REPORT_DETAILS))
176 fprintf (vect_dump, "get vectype for scalar type: ");
177 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
180 vectype = get_vectype_for_scalar_type (scalar_type);
181 if (!vectype)
183 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
185 fprintf (vect_dump,
186 "not vectorized: unsupported data-type ");
187 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
189 return false;
191 STMT_VINFO_VECTYPE (stmt_info) = vectype;
194 if (vect_print_dump_info (REPORT_DETAILS))
196 fprintf (vect_dump, "vectype: ");
197 print_generic_expr (vect_dump, vectype, TDF_SLIM);
200 nunits = TYPE_VECTOR_SUBPARTS (vectype);
201 if (vect_print_dump_info (REPORT_DETAILS))
202 fprintf (vect_dump, "nunits = %d", nunits);
204 if (!vectorization_factor
205 || (nunits > vectorization_factor))
206 vectorization_factor = nunits;
211 /* TODO: Analyze cost. Decide if worth while to vectorize. */
213 if (vectorization_factor <= 1)
215 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
216 fprintf (vect_dump, "not vectorized: unsupported data-type");
217 return false;
219 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
221 return true;
225 /* Function vect_analyze_operations.
227 Scan the loop stmts and make sure they are all vectorizable. */
229 static bool
230 vect_analyze_operations (loop_vec_info loop_vinfo)
232 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
233 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
234 int nbbs = loop->num_nodes;
235 block_stmt_iterator si;
236 unsigned int vectorization_factor = 0;
237 int i;
238 bool ok;
239 tree phi;
240 stmt_vec_info stmt_info;
241 bool need_to_vectorize = false;
243 if (vect_print_dump_info (REPORT_DETAILS))
244 fprintf (vect_dump, "=== vect_analyze_operations ===");
246 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
247 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
249 for (i = 0; i < nbbs; i++)
251 basic_block bb = bbs[i];
253 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
255 stmt_info = vinfo_for_stmt (phi);
256 if (vect_print_dump_info (REPORT_DETAILS))
258 fprintf (vect_dump, "examining phi: ");
259 print_generic_expr (vect_dump, phi, TDF_SLIM);
262 gcc_assert (stmt_info);
264 if (STMT_VINFO_LIVE_P (stmt_info))
266 /* FORNOW: not yet supported. */
267 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
268 fprintf (vect_dump, "not vectorized: value used after loop.");
269 return false;
272 if (STMT_VINFO_RELEVANT_P (stmt_info))
274 /* Most likely a reduction-like computation that is used
275 in the loop. */
276 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
277 fprintf (vect_dump, "not vectorized: unsupported pattern.");
278 return false;
282 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
284 tree stmt = bsi_stmt (si);
285 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
287 if (vect_print_dump_info (REPORT_DETAILS))
289 fprintf (vect_dump, "==> examining statement: ");
290 print_generic_expr (vect_dump, stmt, TDF_SLIM);
293 gcc_assert (stmt_info);
295 /* skip stmts which do not need to be vectorized.
296 this is expected to include:
297 - the COND_EXPR which is the loop exit condition
298 - any LABEL_EXPRs in the loop
299 - computations that are used only for array indexing or loop
300 control */
302 if (!STMT_VINFO_RELEVANT_P (stmt_info)
303 && !STMT_VINFO_LIVE_P (stmt_info))
305 if (vect_print_dump_info (REPORT_DETAILS))
306 fprintf (vect_dump, "irrelevant.");
307 continue;
310 if (STMT_VINFO_RELEVANT_P (stmt_info))
312 gcc_assert (GIMPLE_STMT_P (stmt)
313 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
314 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
316 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
317 || vectorizable_type_demotion (stmt, NULL, NULL)
318 || vectorizable_operation (stmt, NULL, NULL)
319 || vectorizable_assignment (stmt, NULL, NULL)
320 || vectorizable_load (stmt, NULL, NULL)
321 || vectorizable_call (stmt, NULL, NULL)
322 || vectorizable_store (stmt, NULL, NULL)
323 || vectorizable_condition (stmt, NULL, NULL));
325 if (!ok)
327 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
329 fprintf (vect_dump,
330 "not vectorized: relevant stmt not supported: ");
331 print_generic_expr (vect_dump, stmt, TDF_SLIM);
333 return false;
335 need_to_vectorize = true;
338 if (STMT_VINFO_LIVE_P (stmt_info))
340 ok = vectorizable_reduction (stmt, NULL, NULL);
342 if (ok)
343 need_to_vectorize = true;
344 else
345 ok = vectorizable_live_operation (stmt, NULL, NULL);
347 if (!ok)
349 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
351 fprintf (vect_dump,
352 "not vectorized: live stmt not supported: ");
353 print_generic_expr (vect_dump, stmt, TDF_SLIM);
355 return false;
358 } /* stmts in bb */
359 } /* bbs */
361 /* TODO: Analyze cost. Decide if worth while to vectorize. */
363 /* All operations in the loop are either irrelevant (deal with loop
364 control, or dead), or only used outside the loop and can be moved
365 out of the loop (e.g. invariants, inductions). The loop can be
366 optimized away by scalar optimizations. We're better off not
367 touching this loop. */
368 if (!need_to_vectorize)
370 if (vect_print_dump_info (REPORT_DETAILS))
371 fprintf (vect_dump,
372 "All the computation can be taken out of the loop.");
373 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
374 fprintf (vect_dump,
375 "not vectorized: redundant loop. no profit to vectorize.");
376 return false;
379 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
380 && vect_print_dump_info (REPORT_DETAILS))
381 fprintf (vect_dump,
382 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
383 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
385 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
386 && ((LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
387 || (LOOP_VINFO_INT_NITERS (loop_vinfo) <=
388 ((unsigned) (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
389 * vectorization_factor))))
391 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
392 fprintf (vect_dump, "not vectorized: iteration count too small.");
393 return false;
396 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
397 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
398 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
400 if (vect_print_dump_info (REPORT_DETAILS))
401 fprintf (vect_dump, "epilog loop required.");
402 if (!vect_can_advance_ivs_p (loop_vinfo))
404 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
405 fprintf (vect_dump,
406 "not vectorized: can't create epilog loop 1.");
407 return false;
409 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
411 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
412 fprintf (vect_dump,
413 "not vectorized: can't create epilog loop 2.");
414 return false;
418 return true;
422 /* Function exist_non_indexing_operands_for_use_p
424 USE is one of the uses attached to STMT. Check if USE is
425 used in STMT for anything other than indexing an array. */
427 static bool
428 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
430 tree operand;
431 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
433 /* USE corresponds to some operand in STMT. If there is no data
434 reference in STMT, then any operand that corresponds to USE
435 is not indexing an array. */
436 if (!STMT_VINFO_DATA_REF (stmt_info))
437 return true;
439 /* STMT has a data_ref. FORNOW this means that its of one of
440 the following forms:
441 -1- ARRAY_REF = var
442 -2- var = ARRAY_REF
443 (This should have been verified in analyze_data_refs).
445 'var' in the second case corresponds to a def, not a use,
446 so USE cannot correspond to any operands that are not used
447 for array indexing.
449 Therefore, all we need to check is if STMT falls into the
450 first case, and whether var corresponds to USE. */
452 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
453 return false;
455 operand = GIMPLE_STMT_OPERAND (stmt, 1);
457 if (TREE_CODE (operand) != SSA_NAME)
458 return false;
460 if (operand == use)
461 return true;
463 return false;
467 /* Function vect_analyze_scalar_cycles.
469 Examine the cross iteration def-use cycles of scalar variables, by
470 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
471 following: invariant, induction, reduction, unknown.
473 Some forms of scalar cycles are not yet supported.
475 Example1: reduction: (unsupported yet)
477 loop1:
478 for (i=0; i<N; i++)
479 sum += a[i];
481 Example2: induction: (unsupported yet)
483 loop2:
484 for (i=0; i<N; i++)
485 a[i] = i;
487 Note: the following loop *is* vectorizable:
489 loop3:
490 for (i=0; i<N; i++)
491 a[i] = b[i];
493 even though it has a def-use cycle caused by the induction variable i:
495 loop: i_2 = PHI (i_0, i_1)
496 a[i_2] = ...;
497 i_1 = i_2 + 1;
498 GOTO loop;
500 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
501 it does not need to be vectorized because it is only used for array
502 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
503 loop2 on the other hand is relevant (it is being written to memory).
506 static void
507 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
509 tree phi;
510 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
511 basic_block bb = loop->header;
512 tree dummy;
514 if (vect_print_dump_info (REPORT_DETAILS))
515 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
517 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
519 tree access_fn = NULL;
520 tree def = PHI_RESULT (phi);
521 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
522 tree reduc_stmt;
524 if (vect_print_dump_info (REPORT_DETAILS))
526 fprintf (vect_dump, "Analyze phi: ");
527 print_generic_expr (vect_dump, phi, TDF_SLIM);
530 /* Skip virtual phi's. The data dependences that are associated with
531 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
533 if (!is_gimple_reg (SSA_NAME_VAR (def)))
535 if (vect_print_dump_info (REPORT_DETAILS))
536 fprintf (vect_dump, "virtual phi. skip.");
537 continue;
540 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
542 /* Analyze the evolution function. */
544 access_fn = analyze_scalar_evolution (loop, def);
546 if (!access_fn)
547 continue;
549 if (vect_print_dump_info (REPORT_DETAILS))
551 fprintf (vect_dump, "Access function of PHI: ");
552 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
555 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
557 if (vect_print_dump_info (REPORT_DETAILS))
558 fprintf (vect_dump, "Detected induction.");
559 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
560 continue;
563 /* TODO: handle invariant phis */
565 reduc_stmt = vect_is_simple_reduction (loop, phi);
566 if (reduc_stmt)
568 if (vect_print_dump_info (REPORT_DETAILS))
569 fprintf (vect_dump, "Detected reduction.");
570 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
571 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
572 vect_reduction_def;
574 else
575 if (vect_print_dump_info (REPORT_DETAILS))
576 fprintf (vect_dump, "Unknown def-use cycle pattern.");
580 return;
584 /* Function vect_insert_into_interleaving_chain.
586 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
588 static void
589 vect_insert_into_interleaving_chain (struct data_reference *dra,
590 struct data_reference *drb)
592 tree prev, next, next_init;
593 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
594 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
596 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
597 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
598 while (next)
600 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
601 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
603 /* Insert here. */
604 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
605 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
606 return;
608 prev = next;
609 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
612 /* We got to the end of the list. Insert here. */
613 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
614 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
618 /* Function vect_update_interleaving_chain.
620 For two data-refs DRA and DRB that are a part of a chain interleaved data
621 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
623 There are four possible cases:
624 1. New stmts - both DRA and DRB are not a part of any chain:
625 FIRST_DR = DRB
626 NEXT_DR (DRB) = DRA
627 2. DRB is a part of a chain and DRA is not:
628 no need to update FIRST_DR
629 no need to insert DRB
630 insert DRA according to init
631 3. DRA is a part of a chain and DRB is not:
632 if (init of FIRST_DR > init of DRB)
633 FIRST_DR = DRB
634 NEXT(FIRST_DR) = previous FIRST_DR
635 else
636 insert DRB according to its init
637 4. both DRA and DRB are in some interleaving chains:
638 choose the chain with the smallest init of FIRST_DR
639 insert the nodes of the second chain into the first one. */
641 static void
642 vect_update_interleaving_chain (struct data_reference *drb,
643 struct data_reference *dra)
645 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
646 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
647 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
648 tree node, prev, next, node_init, first_stmt;
650 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
651 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
653 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
654 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
655 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
656 return;
659 /* 2. DRB is a part of a chain and DRA is not. */
660 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
662 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
663 /* Insert DRA into the chain of DRB. */
664 vect_insert_into_interleaving_chain (dra, drb);
665 return;
668 /* 3. DRA is a part of a chain and DRB is not. */
669 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
671 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
672 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
673 old_first_stmt)));
674 tree tmp;
676 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
678 /* DRB's init is smaller than the init of the stmt previously marked
679 as the first stmt of the interleaving chain of DRA. Therefore, we
680 update FIRST_STMT and put DRB in the head of the list. */
681 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
682 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
684 /* Update all the stmts in the list to point to the new FIRST_STMT. */
685 tmp = old_first_stmt;
686 while (tmp)
688 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
689 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
692 else
694 /* Insert DRB in the list of DRA. */
695 vect_insert_into_interleaving_chain (drb, dra);
696 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
698 return;
701 /* 4. both DRA and DRB are in some interleaving chains. */
702 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
703 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
704 if (first_a == first_b)
705 return;
706 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
707 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
709 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
711 /* Insert the nodes of DRA chain into the DRB chain.
712 After inserting a node, continue from this node of the DRB chain (don't
713 start from the beginning. */
714 node = DR_GROUP_FIRST_DR (stmtinfo_a);
715 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
716 first_stmt = first_b;
718 else
720 /* Insert the nodes of DRB chain into the DRA chain.
721 After inserting a node, continue from this node of the DRA chain (don't
722 start from the beginning. */
723 node = DR_GROUP_FIRST_DR (stmtinfo_b);
724 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
725 first_stmt = first_a;
728 while (node)
730 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
731 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
732 while (next)
734 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
735 if (tree_int_cst_compare (next_init, node_init) > 0)
737 /* Insert here. */
738 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
739 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
740 prev = node;
741 break;
743 prev = next;
744 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
746 if (!next)
748 /* We got to the end of the list. Insert here. */
749 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
750 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
751 prev = node;
753 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
754 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
759 /* Function vect_equal_offsets.
761 Check if OFFSET1 and OFFSET2 are identical expressions. */
763 static bool
764 vect_equal_offsets (tree offset1, tree offset2)
766 bool res0, res1;
768 STRIP_NOPS (offset1);
769 STRIP_NOPS (offset2);
771 if (offset1 == offset2)
772 return true;
774 if (TREE_CODE (offset1) != TREE_CODE (offset2)
775 || !BINARY_CLASS_P (offset1)
776 || !BINARY_CLASS_P (offset2))
777 return false;
779 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
780 TREE_OPERAND (offset2, 0));
781 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
782 TREE_OPERAND (offset2, 1));
784 return (res0 && res1);
788 /* Function vect_check_interleaving.
790 Check if DRA and DRB are a part of interleaving. In case they are, insert
791 DRA and DRB in an interleaving chain. */
793 static void
794 vect_check_interleaving (struct data_reference *dra,
795 struct data_reference *drb)
797 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
799 /* Check that the data-refs have same first location (except init) and they
800 are both either store or load (not load and store). */
801 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
802 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
803 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
804 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
805 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
806 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
807 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
808 || DR_IS_READ (dra) != DR_IS_READ (drb))
809 return;
811 /* Check:
812 1. data-refs are of the same type
813 2. their steps are equal
814 3. the step is greater than the difference between data-refs' inits */
815 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
816 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
818 if (type_size_a != type_size_b
819 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
820 return;
822 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
823 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
824 step = TREE_INT_CST_LOW (DR_STEP (dra));
826 if (init_a > init_b)
828 /* If init_a == init_b + the size of the type * k, we have an interleaving,
829 and DRB is accessed before DRA. */
830 diff_mod_size = (init_a - init_b) % type_size_a;
832 if ((init_a - init_b) > step)
833 return;
835 if (diff_mod_size == 0)
837 vect_update_interleaving_chain (drb, dra);
838 if (vect_print_dump_info (REPORT_DR_DETAILS))
840 fprintf (vect_dump, "Detected interleaving ");
841 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
842 fprintf (vect_dump, " and ");
843 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
845 return;
848 else
850 /* If init_b == init_a + the size of the type * k, we have an
851 interleaving, and DRA is accessed before DRB. */
852 diff_mod_size = (init_b - init_a) % type_size_a;
854 if ((init_b - init_a) > step)
855 return;
857 if (diff_mod_size == 0)
859 vect_update_interleaving_chain (dra, drb);
860 if (vect_print_dump_info (REPORT_DR_DETAILS))
862 fprintf (vect_dump, "Detected interleaving ");
863 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
864 fprintf (vect_dump, " and ");
865 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
867 return;
873 /* Function vect_analyze_data_ref_dependence.
875 Return TRUE if there (might) exist a dependence between a memory-reference
876 DRA and a memory-reference DRB. */
878 static bool
879 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
880 loop_vec_info loop_vinfo)
882 unsigned int i;
883 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
884 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
885 struct data_reference *dra = DDR_A (ddr);
886 struct data_reference *drb = DDR_B (ddr);
887 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
888 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
889 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
890 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
891 lambda_vector dist_v;
892 unsigned int loop_depth;
894 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
896 /* Independent data accesses. */
897 vect_check_interleaving (dra, drb);
898 return false;
901 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
902 return false;
904 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
906 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
908 fprintf (vect_dump,
909 "not vectorized: can't determine dependence between ");
910 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
911 fprintf (vect_dump, " and ");
912 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
914 return true;
917 if (DDR_NUM_DIST_VECTS (ddr) == 0)
919 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
921 fprintf (vect_dump, "not vectorized: bad dist vector for ");
922 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
923 fprintf (vect_dump, " and ");
924 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
926 return true;
929 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
930 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
932 int dist = dist_v[loop_depth];
934 if (vect_print_dump_info (REPORT_DR_DETAILS))
935 fprintf (vect_dump, "dependence distance = %d.", dist);
937 /* Same loop iteration. */
938 if (dist % vectorization_factor == 0 && dra_size == drb_size)
940 /* Two references with distance zero have the same alignment. */
941 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
942 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
943 if (vect_print_dump_info (REPORT_ALIGNMENT))
944 fprintf (vect_dump, "accesses have the same alignment.");
945 if (vect_print_dump_info (REPORT_DR_DETAILS))
947 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
948 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
949 fprintf (vect_dump, " and ");
950 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
953 /* For interleaving, mark that there is a read-write dependency if
954 necessary. We check before that one of the data-refs is store. */
955 if (DR_IS_READ (dra))
956 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
957 else
959 if (DR_IS_READ (drb))
960 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
963 continue;
966 if (abs (dist) >= vectorization_factor)
968 /* Dependence distance does not create dependence, as far as vectorization
969 is concerned, in this case. */
970 if (vect_print_dump_info (REPORT_DR_DETAILS))
971 fprintf (vect_dump, "dependence distance >= VF.");
972 continue;
975 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
977 fprintf (vect_dump,
978 "not vectorized: possible dependence between data-refs ");
979 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
980 fprintf (vect_dump, " and ");
981 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
984 return true;
987 return false;
991 /* Function vect_analyze_data_ref_dependences.
993 Examine all the data references in the loop, and make sure there do not
994 exist any data dependences between them. */
996 static bool
997 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
999 unsigned int i;
1000 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1001 struct data_dependence_relation *ddr;
1003 if (vect_print_dump_info (REPORT_DETAILS))
1004 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1006 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1007 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1008 return false;
1010 return true;
1014 /* Function vect_compute_data_ref_alignment
1016 Compute the misalignment of the data reference DR.
1018 Output:
1019 1. If during the misalignment computation it is found that the data reference
1020 cannot be vectorized then false is returned.
1021 2. DR_MISALIGNMENT (DR) is defined.
1023 FOR NOW: No analysis is actually performed. Misalignment is calculated
1024 only for trivial cases. TODO. */
1026 static bool
1027 vect_compute_data_ref_alignment (struct data_reference *dr)
1029 tree stmt = DR_STMT (dr);
1030 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1031 tree ref = DR_REF (dr);
1032 tree vectype;
1033 tree base, base_addr;
1034 bool base_aligned;
1035 tree misalign;
1036 tree aligned_to, alignment;
1038 if (vect_print_dump_info (REPORT_DETAILS))
1039 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1041 /* Initialize misalignment to unknown. */
1042 DR_MISALIGNMENT (dr) = -1;
1044 misalign = DR_OFFSET_MISALIGNMENT (dr);
1045 aligned_to = DR_ALIGNED_TO (dr);
1046 base_addr = DR_BASE_ADDRESS (dr);
1047 base = build_fold_indirect_ref (base_addr);
1048 vectype = STMT_VINFO_VECTYPE (stmt_info);
1049 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1051 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1052 || !misalign)
1054 if (vect_print_dump_info (REPORT_DETAILS))
1056 fprintf (vect_dump, "Unknown alignment for access: ");
1057 print_generic_expr (vect_dump, base, TDF_SLIM);
1059 return true;
1062 if ((DECL_P (base)
1063 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1064 alignment) >= 0)
1065 || (TREE_CODE (base_addr) == SSA_NAME
1066 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1067 TREE_TYPE (base_addr)))),
1068 alignment) >= 0))
1069 base_aligned = true;
1070 else
1071 base_aligned = false;
1073 if (!base_aligned)
1075 /* Do not change the alignment of global variables if
1076 flag_section_anchors is enabled. */
1077 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1078 || (TREE_STATIC (base) && flag_section_anchors))
1080 if (vect_print_dump_info (REPORT_DETAILS))
1082 fprintf (vect_dump, "can't force alignment of ref: ");
1083 print_generic_expr (vect_dump, ref, TDF_SLIM);
1085 return true;
1088 /* Force the alignment of the decl.
1089 NOTE: This is the only change to the code we make during
1090 the analysis phase, before deciding to vectorize the loop. */
1091 if (vect_print_dump_info (REPORT_DETAILS))
1092 fprintf (vect_dump, "force alignment");
1093 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1094 DECL_USER_ALIGN (base) = 1;
1097 /* At this point we assume that the base is aligned. */
1098 gcc_assert (base_aligned
1099 || (TREE_CODE (base) == VAR_DECL
1100 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1102 /* Modulo alignment. */
1103 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1105 if (!host_integerp (misalign, 1))
1107 /* Negative or overflowed misalignment value. */
1108 if (vect_print_dump_info (REPORT_DETAILS))
1109 fprintf (vect_dump, "unexpected misalign value");
1110 return false;
1113 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
1115 if (vect_print_dump_info (REPORT_DETAILS))
1117 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1118 print_generic_expr (vect_dump, ref, TDF_SLIM);
1121 return true;
1125 /* Function vect_compute_data_refs_alignment
1127 Compute the misalignment of data references in the loop.
1128 Return FALSE if a data reference is found that cannot be vectorized. */
1130 static bool
1131 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1133 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1134 struct data_reference *dr;
1135 unsigned int i;
1137 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1138 if (!vect_compute_data_ref_alignment (dr))
1139 return false;
1141 return true;
1145 /* Function vect_update_misalignment_for_peel
1147 DR - the data reference whose misalignment is to be adjusted.
1148 DR_PEEL - the data reference whose misalignment is being made
1149 zero in the vector loop by the peel.
1150 NPEEL - the number of iterations in the peel loop if the misalignment
1151 of DR_PEEL is known at compile time. */
1153 static void
1154 vect_update_misalignment_for_peel (struct data_reference *dr,
1155 struct data_reference *dr_peel, int npeel)
1157 unsigned int i;
1158 VEC(dr_p,heap) *same_align_drs;
1159 struct data_reference *current_dr;
1160 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1161 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1162 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1163 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1165 /* For interleaved data accesses the step in the loop must be multiplied by
1166 the size of the interleaving group. */
1167 if (DR_GROUP_FIRST_DR (stmt_info))
1168 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1169 if (DR_GROUP_FIRST_DR (peel_stmt_info))
1170 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1172 if (known_alignment_for_access_p (dr)
1173 && known_alignment_for_access_p (dr_peel)
1174 && (DR_MISALIGNMENT (dr) / dr_size ==
1175 DR_MISALIGNMENT (dr_peel) / dr_peel_size))
1177 DR_MISALIGNMENT (dr) = 0;
1178 return;
1181 /* It can be assumed that the data refs with the same alignment as dr_peel
1182 are aligned in the vector loop. */
1183 same_align_drs
1184 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1185 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1187 if (current_dr != dr)
1188 continue;
1189 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1190 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1191 DR_MISALIGNMENT (dr) = 0;
1192 return;
1195 if (known_alignment_for_access_p (dr)
1196 && known_alignment_for_access_p (dr_peel))
1198 DR_MISALIGNMENT (dr) += npeel * dr_size;
1199 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
1200 return;
1203 if (vect_print_dump_info (REPORT_DETAILS))
1204 fprintf (vect_dump, "Setting misalignment to -1.");
1205 DR_MISALIGNMENT (dr) = -1;
1209 /* Function vect_verify_datarefs_alignment
1211 Return TRUE if all data references in the loop can be
1212 handled with respect to alignment. */
1214 static bool
1215 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1217 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1218 struct data_reference *dr;
1219 enum dr_alignment_support supportable_dr_alignment;
1220 unsigned int i;
1222 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1224 tree stmt = DR_STMT (dr);
1225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1227 /* For interleaving, only the alignment of the first access matters. */
1228 if (DR_GROUP_FIRST_DR (stmt_info)
1229 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1230 continue;
1232 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1233 if (!supportable_dr_alignment)
1235 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1237 if (DR_IS_READ (dr))
1238 fprintf (vect_dump,
1239 "not vectorized: unsupported unaligned load.");
1240 else
1241 fprintf (vect_dump,
1242 "not vectorized: unsupported unaligned store.");
1244 return false;
1246 if (supportable_dr_alignment != dr_aligned
1247 && vect_print_dump_info (REPORT_ALIGNMENT))
1248 fprintf (vect_dump, "Vectorizing an unaligned access.");
1250 return true;
1254 /* Function vect_enhance_data_refs_alignment
1256 This pass will use loop versioning and loop peeling in order to enhance
1257 the alignment of data references in the loop.
1259 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1260 original loop is to be vectorized; Any other loops that are created by
1261 the transformations performed in this pass - are not supposed to be
1262 vectorized. This restriction will be relaxed.
1264 This pass will require a cost model to guide it whether to apply peeling
1265 or versioning or a combination of the two. For example, the scheme that
1266 intel uses when given a loop with several memory accesses, is as follows:
1267 choose one memory access ('p') which alignment you want to force by doing
1268 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1269 other accesses are not necessarily aligned, or (2) use loop versioning to
1270 generate one loop in which all accesses are aligned, and another loop in
1271 which only 'p' is necessarily aligned.
1273 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1274 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1275 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1277 Devising a cost model is the most critical aspect of this work. It will
1278 guide us on which access to peel for, whether to use loop versioning, how
1279 many versions to create, etc. The cost model will probably consist of
1280 generic considerations as well as target specific considerations (on
1281 powerpc for example, misaligned stores are more painful than misaligned
1282 loads).
1284 Here are the general steps involved in alignment enhancements:
1286 -- original loop, before alignment analysis:
1287 for (i=0; i<N; i++){
1288 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1289 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1292 -- After vect_compute_data_refs_alignment:
1293 for (i=0; i<N; i++){
1294 x = q[i]; # DR_MISALIGNMENT(q) = 3
1295 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1298 -- Possibility 1: we do loop versioning:
1299 if (p is aligned) {
1300 for (i=0; i<N; i++){ # loop 1A
1301 x = q[i]; # DR_MISALIGNMENT(q) = 3
1302 p[i] = y; # DR_MISALIGNMENT(p) = 0
1305 else {
1306 for (i=0; i<N; i++){ # loop 1B
1307 x = q[i]; # DR_MISALIGNMENT(q) = 3
1308 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1312 -- Possibility 2: we do loop peeling:
1313 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1314 x = q[i];
1315 p[i] = y;
1317 for (i = 3; i < N; i++){ # loop 2A
1318 x = q[i]; # DR_MISALIGNMENT(q) = 0
1319 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1322 -- Possibility 3: combination of loop peeling and versioning:
1323 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1324 x = q[i];
1325 p[i] = y;
1327 if (p is aligned) {
1328 for (i = 3; i<N; i++){ # loop 3A
1329 x = q[i]; # DR_MISALIGNMENT(q) = 0
1330 p[i] = y; # DR_MISALIGNMENT(p) = 0
1333 else {
1334 for (i = 3; i<N; i++){ # loop 3B
1335 x = q[i]; # DR_MISALIGNMENT(q) = 0
1336 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1340 These loops are later passed to loop_transform to be vectorized. The
1341 vectorizer will use the alignment information to guide the transformation
1342 (whether to generate regular loads/stores, or with special handling for
1343 misalignment). */
1345 static bool
1346 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1348 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1349 enum dr_alignment_support supportable_dr_alignment;
1350 struct data_reference *dr0 = NULL;
1351 struct data_reference *dr;
1352 unsigned int i;
1353 bool do_peeling = false;
1354 bool do_versioning = false;
1355 bool stat;
1356 tree stmt;
1357 stmt_vec_info stmt_info;
1359 if (vect_print_dump_info (REPORT_DETAILS))
1360 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1362 /* While cost model enhancements are expected in the future, the high level
1363 view of the code at this time is as follows:
1365 A) If there is a misaligned write then see if peeling to align this write
1366 can make all data references satisfy vect_supportable_dr_alignment.
1367 If so, update data structures as needed and return true. Note that
1368 at this time vect_supportable_dr_alignment is known to return false
1369 for a a misaligned write.
1371 B) If peeling wasn't possible and there is a data reference with an
1372 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1373 then see if loop versioning checks can be used to make all data
1374 references satisfy vect_supportable_dr_alignment. If so, update
1375 data structures as needed and return true.
1377 C) If neither peeling nor versioning were successful then return false if
1378 any data reference does not satisfy vect_supportable_dr_alignment.
1380 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1382 Note, Possibility 3 above (which is peeling and versioning together) is not
1383 being done at this time. */
1385 /* (1) Peeling to force alignment. */
1387 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1388 Considerations:
1389 + How many accesses will become aligned due to the peeling
1390 - How many accesses will become unaligned due to the peeling,
1391 and the cost of misaligned accesses.
1392 - The cost of peeling (the extra runtime checks, the increase
1393 in code size).
1395 The scheme we use FORNOW: peel to force the alignment of the first
1396 misaligned store in the loop.
1397 Rationale: misaligned stores are not yet supported.
1399 TODO: Use a cost model. */
1401 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1403 stmt = DR_STMT (dr);
1404 stmt_info = vinfo_for_stmt (stmt);
1406 /* For interleaving, only the alignment of the first access
1407 matters. */
1408 if (DR_GROUP_FIRST_DR (stmt_info)
1409 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1410 continue;
1412 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1414 if (DR_GROUP_FIRST_DR (stmt_info))
1416 /* For interleaved access we peel only if number of iterations in
1417 the prolog loop ({VF - misalignment}), is a multiple of the
1418 number of the interleaved accesses. */
1419 int elem_size, mis_in_elements;
1420 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1422 /* FORNOW: handle only known alignment. */
1423 if (!known_alignment_for_access_p (dr))
1425 do_peeling = false;
1426 break;
1429 elem_size = UNITS_PER_SIMD_WORD / vf;
1430 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1432 if ((vf - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1434 do_peeling = false;
1435 break;
1438 dr0 = dr;
1439 do_peeling = true;
1440 break;
1444 /* Often peeling for alignment will require peeling for loop-bound, which in
1445 turn requires that we know how to adjust the loop ivs after the loop. */
1446 if (!vect_can_advance_ivs_p (loop_vinfo))
1447 do_peeling = false;
1449 if (do_peeling)
1451 int mis;
1452 int npeel = 0;
1454 if (known_alignment_for_access_p (dr0))
1456 /* Since it's known at compile time, compute the number of iterations
1457 in the peeled loop (the peeling factor) for use in updating
1458 DR_MISALIGNMENT values. The peeling factor is the vectorization
1459 factor minus the misalignment as an element count. */
1460 mis = DR_MISALIGNMENT (dr0);
1461 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1462 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1464 /* For interleaved data access every iteration accesses all the
1465 members of the group, therefore we divide the number of iterations
1466 by the group size. */
1467 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1468 if (DR_GROUP_FIRST_DR (stmt_info))
1469 npeel /= DR_GROUP_SIZE (stmt_info);
1471 if (vect_print_dump_info (REPORT_DETAILS))
1472 fprintf (vect_dump, "Try peeling by %d", npeel);
1475 /* Ensure that all data refs can be vectorized after the peel. */
1476 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1478 int save_misalignment;
1480 if (dr == dr0)
1481 continue;
1483 stmt = DR_STMT (dr);
1484 stmt_info = vinfo_for_stmt (stmt);
1485 /* For interleaving, only the alignment of the first access
1486 matters. */
1487 if (DR_GROUP_FIRST_DR (stmt_info)
1488 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1489 continue;
1491 save_misalignment = DR_MISALIGNMENT (dr);
1492 vect_update_misalignment_for_peel (dr, dr0, npeel);
1493 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1494 DR_MISALIGNMENT (dr) = save_misalignment;
1496 if (!supportable_dr_alignment)
1498 do_peeling = false;
1499 break;
1503 if (do_peeling)
1505 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1506 If the misalignment of DR_i is identical to that of dr0 then set
1507 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1508 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1509 by the peeling factor times the element size of DR_i (MOD the
1510 vectorization factor times the size). Otherwise, the
1511 misalignment of DR_i must be set to unknown. */
1512 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1513 if (dr != dr0)
1514 vect_update_misalignment_for_peel (dr, dr0, npeel);
1516 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1517 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1518 DR_MISALIGNMENT (dr0) = 0;
1519 if (vect_print_dump_info (REPORT_ALIGNMENT))
1520 fprintf (vect_dump, "Alignment of access forced using peeling.");
1522 if (vect_print_dump_info (REPORT_DETAILS))
1523 fprintf (vect_dump, "Peeling for alignment will be applied.");
1525 stat = vect_verify_datarefs_alignment (loop_vinfo);
1526 gcc_assert (stat);
1527 return stat;
1532 /* (2) Versioning to force alignment. */
1534 /* Try versioning if:
1535 1) flag_tree_vect_loop_version is TRUE
1536 2) optimize_size is FALSE
1537 3) there is at least one unsupported misaligned data ref with an unknown
1538 misalignment, and
1539 4) all misaligned data refs with a known misalignment are supported, and
1540 5) the number of runtime alignment checks is within reason. */
1542 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1544 if (do_versioning)
1546 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1548 stmt = DR_STMT (dr);
1549 stmt_info = vinfo_for_stmt (stmt);
1551 /* For interleaving, only the alignment of the first access
1552 matters. */
1553 if (aligned_access_p (dr)
1554 || (DR_GROUP_FIRST_DR (stmt_info)
1555 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1556 continue;
1558 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1560 if (!supportable_dr_alignment)
1562 tree stmt;
1563 int mask;
1564 tree vectype;
1566 if (known_alignment_for_access_p (dr)
1567 || VEC_length (tree,
1568 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1569 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1571 do_versioning = false;
1572 break;
1575 stmt = DR_STMT (dr);
1576 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1577 gcc_assert (vectype);
1579 /* The rightmost bits of an aligned address must be zeros.
1580 Construct the mask needed for this test. For example,
1581 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1582 mask must be 15 = 0xf. */
1583 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1585 /* FORNOW: use the same mask to test all potentially unaligned
1586 references in the loop. The vectorizer currently supports
1587 a single vector size, see the reference to
1588 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1589 vectorization factor is computed. */
1590 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1591 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1592 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1593 VEC_safe_push (tree, heap,
1594 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1595 DR_STMT (dr));
1599 /* Versioning requires at least one misaligned data reference. */
1600 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1601 do_versioning = false;
1602 else if (!do_versioning)
1603 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1606 if (do_versioning)
1608 VEC(tree,heap) *may_misalign_stmts
1609 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1610 tree stmt;
1612 /* It can now be assumed that the data references in the statements
1613 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1614 of the loop being vectorized. */
1615 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1617 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1618 dr = STMT_VINFO_DATA_REF (stmt_info);
1619 DR_MISALIGNMENT (dr) = 0;
1620 if (vect_print_dump_info (REPORT_ALIGNMENT))
1621 fprintf (vect_dump, "Alignment of access forced using versioning.");
1624 if (vect_print_dump_info (REPORT_DETAILS))
1625 fprintf (vect_dump, "Versioning for alignment will be applied.");
1627 /* Peeling and versioning can't be done together at this time. */
1628 gcc_assert (! (do_peeling && do_versioning));
1630 stat = vect_verify_datarefs_alignment (loop_vinfo);
1631 gcc_assert (stat);
1632 return stat;
1635 /* This point is reached if neither peeling nor versioning is being done. */
1636 gcc_assert (! (do_peeling || do_versioning));
1638 stat = vect_verify_datarefs_alignment (loop_vinfo);
1639 return stat;
1643 /* Function vect_analyze_data_refs_alignment
1645 Analyze the alignment of the data-references in the loop.
1646 Return FALSE if a data reference is found that cannot be vectorized. */
1648 static bool
1649 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1651 if (vect_print_dump_info (REPORT_DETAILS))
1652 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1654 if (!vect_compute_data_refs_alignment (loop_vinfo))
1656 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1657 fprintf (vect_dump,
1658 "not vectorized: can't calculate alignment for data ref.");
1659 return false;
1662 return true;
1666 /* Function vect_analyze_data_ref_access.
1668 Analyze the access pattern of the data-reference DR. For now, a data access
1669 has to be consecutive to be considered vectorizable. */
1671 static bool
1672 vect_analyze_data_ref_access (struct data_reference *dr)
1674 tree step = DR_STEP (dr);
1675 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1676 tree scalar_type = TREE_TYPE (DR_REF (dr));
1677 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1678 tree stmt = DR_STMT (dr);
1679 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1680 interleaving group (including gaps). */
1681 HOST_WIDE_INT stride = dr_step / type_size;
1683 if (!step)
1685 if (vect_print_dump_info (REPORT_DETAILS))
1686 fprintf (vect_dump, "bad data-ref access");
1687 return false;
1690 /* Consecutive? */
1691 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1693 /* Mark that it is not interleaving. */
1694 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
1695 return true;
1698 /* Not consecutive access is possible only if it is a part of interleaving. */
1699 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1701 /* Check if it this DR is a part of interleaving, and is a single
1702 element of the group that is accessed in the loop. */
1704 /* Gaps are supported only for loads. STEP must be a multiple of the type
1705 size. The size of the group must be a power of 2. */
1706 if (DR_IS_READ (dr)
1707 && (dr_step % type_size) == 0
1708 && stride > 0
1709 && exact_log2 (stride) != -1)
1711 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1712 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1713 if (vect_print_dump_info (REPORT_DR_DETAILS))
1715 fprintf (vect_dump, "Detected single element interleaving %d ",
1716 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
1717 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1718 fprintf (vect_dump, " step ");
1719 print_generic_expr (vect_dump, step, TDF_SLIM);
1721 return true;
1723 if (vect_print_dump_info (REPORT_DETAILS))
1724 fprintf (vect_dump, "not consecutive access");
1725 return false;
1728 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1730 /* First stmt in the interleaving chain. Check the chain. */
1731 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1732 struct data_reference *data_ref = dr;
1733 unsigned int count = 1;
1734 tree next_step;
1735 tree prev_init = DR_INIT (data_ref);
1736 tree prev = stmt;
1737 HOST_WIDE_INT diff, count_in_bytes;
1739 while (next)
1741 /* Skip same data-refs. In case that two or more stmts share data-ref
1742 (supported only for loads), we vectorize only the first stmt, and
1743 the rest get their vectorized loads from the the first one. */
1744 if (!tree_int_cst_compare (DR_INIT (data_ref),
1745 DR_INIT (STMT_VINFO_DATA_REF (
1746 vinfo_for_stmt (next)))))
1748 if (!DR_IS_READ (data_ref))
1750 if (vect_print_dump_info (REPORT_DETAILS))
1751 fprintf (vect_dump, "Two store stmts share the same dr.");
1752 return false;
1755 /* Check that there is no load-store dependencies for this loads
1756 to prevent a case of load-store-load to the same location. */
1757 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1758 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1760 if (vect_print_dump_info (REPORT_DETAILS))
1761 fprintf (vect_dump,
1762 "READ_WRITE dependence in interleaving.");
1763 return false;
1766 /* For load use the same data-ref load. */
1767 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1769 prev = next;
1770 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1771 continue;
1773 prev = next;
1775 /* Check that all the accesses have the same STEP. */
1776 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1777 if (tree_int_cst_compare (step, next_step))
1779 if (vect_print_dump_info (REPORT_DETAILS))
1780 fprintf (vect_dump, "not consecutive access in interleaving");
1781 return false;
1784 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1785 /* Check that the distance between two accesses is equal to the type
1786 size. Otherwise, we have gaps. */
1787 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1788 - TREE_INT_CST_LOW (prev_init)) / type_size;
1789 if (!DR_IS_READ (data_ref) && diff != 1)
1791 if (vect_print_dump_info (REPORT_DETAILS))
1792 fprintf (vect_dump, "interleaved store with gaps");
1793 return false;
1795 /* Store the gap from the previous member of the group. If there is no
1796 gap in the access, DR_GROUP_GAP is always 1. */
1797 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1799 prev_init = DR_INIT (data_ref);
1800 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1801 /* Count the number of data-refs in the chain. */
1802 count++;
1805 /* COUNT is the number of accesses found, we multiply it by the size of
1806 the type to get COUNT_IN_BYTES. */
1807 count_in_bytes = type_size * count;
1809 /* Check that the size of the interleaving is not greater than STEP. */
1810 if (dr_step < count_in_bytes)
1812 if (vect_print_dump_info (REPORT_DETAILS))
1814 fprintf (vect_dump, "interleaving size is greater than step for ");
1815 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1817 return false;
1820 /* Check that the size of the interleaving is equal to STEP for stores,
1821 i.e., that there are no gaps. */
1822 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
1824 if (vect_print_dump_info (REPORT_DETAILS))
1825 fprintf (vect_dump, "interleaved store with gaps");
1826 return false;
1829 /* Check that STEP is a multiple of type size. */
1830 if ((dr_step % type_size) != 0)
1832 if (vect_print_dump_info (REPORT_DETAILS))
1834 fprintf (vect_dump, "step is not a multiple of type size: step ");
1835 print_generic_expr (vect_dump, step, TDF_SLIM);
1836 fprintf (vect_dump, " size ");
1837 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1838 TDF_SLIM);
1840 return false;
1843 /* FORNOW: we handle only interleaving that is a power of 2. */
1844 if (exact_log2 (stride) == -1)
1846 if (vect_print_dump_info (REPORT_DETAILS))
1847 fprintf (vect_dump, "interleaving is not a power of 2");
1848 return false;
1850 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1852 return true;
1856 /* Function vect_analyze_data_ref_accesses.
1858 Analyze the access pattern of all the data references in the loop.
1860 FORNOW: the only access pattern that is considered vectorizable is a
1861 simple step 1 (consecutive) access.
1863 FORNOW: handle only arrays and pointer accesses. */
1865 static bool
1866 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1868 unsigned int i;
1869 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1870 struct data_reference *dr;
1872 if (vect_print_dump_info (REPORT_DETAILS))
1873 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1875 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1876 if (!vect_analyze_data_ref_access (dr))
1878 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1879 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1880 return false;
1883 return true;
1887 /* Function vect_analyze_data_refs.
1889 Find all the data references in the loop.
1891 The general structure of the analysis of data refs in the vectorizer is as
1892 follows:
1893 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1894 find and analyze all data-refs in the loop and their dependences.
1895 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1896 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1897 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1901 static bool
1902 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1904 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1905 unsigned int i;
1906 VEC (data_reference_p, heap) *datarefs;
1907 struct data_reference *dr;
1908 tree scalar_type;
1910 if (vect_print_dump_info (REPORT_DETAILS))
1911 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1913 compute_data_dependences_for_loop (loop, true,
1914 &LOOP_VINFO_DATAREFS (loop_vinfo),
1915 &LOOP_VINFO_DDRS (loop_vinfo));
1917 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1918 from stmt_vec_info struct to DR and vectype. */
1919 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1921 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1923 tree stmt;
1924 stmt_vec_info stmt_info;
1926 if (!dr || !DR_REF (dr))
1928 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1929 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1930 return false;
1933 /* Update DR field in stmt_vec_info struct. */
1934 stmt = DR_STMT (dr);
1935 stmt_info = vinfo_for_stmt (stmt);
1937 if (STMT_VINFO_DATA_REF (stmt_info))
1939 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1941 fprintf (vect_dump,
1942 "not vectorized: more than one data ref in stmt: ");
1943 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1945 return false;
1947 STMT_VINFO_DATA_REF (stmt_info) = dr;
1949 /* Check that analysis of the data-ref succeeded. */
1950 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1951 || !DR_STEP (dr))
1953 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1955 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1956 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1958 return false;
1960 if (!DR_MEMTAG (dr))
1962 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1964 fprintf (vect_dump, "not vectorized: no memory tag for ");
1965 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1967 return false;
1970 /* Set vectype for STMT. */
1971 scalar_type = TREE_TYPE (DR_REF (dr));
1972 STMT_VINFO_VECTYPE (stmt_info) =
1973 get_vectype_for_scalar_type (scalar_type);
1974 if (!STMT_VINFO_VECTYPE (stmt_info))
1976 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1978 fprintf (vect_dump,
1979 "not vectorized: no vectype for stmt: ");
1980 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1981 fprintf (vect_dump, " scalar_type: ");
1982 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1984 return false;
1988 return true;
1992 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1994 /* Function vect_mark_relevant.
1996 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
1998 static void
1999 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2000 enum vect_relevant relevant, bool live_p)
2002 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2003 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2004 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2006 if (vect_print_dump_info (REPORT_DETAILS))
2007 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
2009 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2011 tree pattern_stmt;
2013 /* This is the last stmt in a sequence that was detected as a
2014 pattern that can potentially be vectorized. Don't mark the stmt
2015 as relevant/live because it's not going to vectorized.
2016 Instead mark the pattern-stmt that replaces it. */
2017 if (vect_print_dump_info (REPORT_DETAILS))
2018 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
2019 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2020 stmt_info = vinfo_for_stmt (pattern_stmt);
2021 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
2022 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2023 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2024 stmt = pattern_stmt;
2027 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2028 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
2029 STMT_VINFO_RELEVANT (stmt_info) = relevant;
2031 if (TREE_CODE (stmt) == PHI_NODE)
2032 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
2033 or live will fail vectorization later on. */
2034 return;
2036 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
2037 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2039 if (vect_print_dump_info (REPORT_DETAILS))
2040 fprintf (vect_dump, "already marked relevant/live.");
2041 return;
2044 VEC_safe_push (tree, heap, *worklist, stmt);
2048 /* Function vect_stmt_relevant_p.
2050 Return true if STMT in loop that is represented by LOOP_VINFO is
2051 "relevant for vectorization".
2053 A stmt is considered "relevant for vectorization" if:
2054 - it has uses outside the loop.
2055 - it has vdefs (it alters memory).
2056 - control stmts in the loop (except for the exit condition).
2058 CHECKME: what other side effects would the vectorizer allow? */
2060 static bool
2061 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2062 enum vect_relevant *relevant, bool *live_p)
2064 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2065 ssa_op_iter op_iter;
2066 imm_use_iterator imm_iter;
2067 use_operand_p use_p;
2068 def_operand_p def_p;
2070 *relevant = vect_unused_in_loop;
2071 *live_p = false;
2073 /* cond stmt other than loop exit cond. */
2074 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2075 *relevant = vect_used_in_loop;
2077 /* changing memory. */
2078 if (TREE_CODE (stmt) != PHI_NODE)
2079 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2081 if (vect_print_dump_info (REPORT_DETAILS))
2082 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2083 *relevant = vect_used_in_loop;
2086 /* uses outside the loop. */
2087 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2089 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2091 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2092 if (!flow_bb_inside_loop_p (loop, bb))
2094 if (vect_print_dump_info (REPORT_DETAILS))
2095 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2097 /* We expect all such uses to be in the loop exit phis
2098 (because of loop closed form) */
2099 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2100 gcc_assert (bb == single_exit (loop)->dest);
2102 *live_p = true;
2107 return (*live_p || *relevant);
2111 /* Function vect_mark_stmts_to_be_vectorized.
2113 Not all stmts in the loop need to be vectorized. For example:
2115 for i...
2116 for j...
2117 1. T0 = i + j
2118 2. T1 = a[T0]
2120 3. j = j + 1
2122 Stmt 1 and 3 do not need to be vectorized, because loop control and
2123 addressing of vectorized data-refs are handled differently.
2125 This pass detects such stmts. */
2127 static bool
2128 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2130 VEC(tree,heap) *worklist;
2131 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2132 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2133 unsigned int nbbs = loop->num_nodes;
2134 block_stmt_iterator si;
2135 tree stmt, use;
2136 stmt_ann_t ann;
2137 ssa_op_iter iter;
2138 unsigned int i;
2139 stmt_vec_info stmt_vinfo;
2140 basic_block bb;
2141 tree phi;
2142 bool live_p;
2143 enum vect_relevant relevant;
2144 tree def, def_stmt;
2145 enum vect_def_type dt;
2147 if (vect_print_dump_info (REPORT_DETAILS))
2148 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2150 worklist = VEC_alloc (tree, heap, 64);
2152 /* 1. Init worklist. */
2154 bb = loop->header;
2155 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2157 if (vect_print_dump_info (REPORT_DETAILS))
2159 fprintf (vect_dump, "init: phi relevant? ");
2160 print_generic_expr (vect_dump, phi, TDF_SLIM);
2163 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
2164 vect_mark_relevant (&worklist, phi, relevant, live_p);
2167 for (i = 0; i < nbbs; i++)
2169 bb = bbs[i];
2170 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2172 stmt = bsi_stmt (si);
2174 if (vect_print_dump_info (REPORT_DETAILS))
2176 fprintf (vect_dump, "init: stmt relevant? ");
2177 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2180 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
2181 vect_mark_relevant (&worklist, stmt, relevant, live_p);
2186 /* 2. Process_worklist */
2188 while (VEC_length (tree, worklist) > 0)
2190 stmt = VEC_pop (tree, worklist);
2192 if (vect_print_dump_info (REPORT_DETAILS))
2194 fprintf (vect_dump, "worklist: examine stmt: ");
2195 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2198 /* Examine the USEs of STMT. For each ssa-name USE that is defined
2199 in the loop, mark the stmt that defines it (DEF_STMT) as
2200 relevant/irrelevant and live/dead according to the liveness and
2201 relevance properties of STMT.
2204 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2206 ann = stmt_ann (stmt);
2207 stmt_vinfo = vinfo_for_stmt (stmt);
2209 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
2210 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2212 /* Generally, the liveness and relevance properties of STMT are
2213 propagated to the DEF_STMTs of its USEs:
2214 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2215 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2217 Exceptions:
2219 (case 1)
2220 If USE is used only for address computations (e.g. array indexing),
2221 which does not need to be directly vectorized, then the
2222 liveness/relevance of the respective DEF_STMT is left unchanged.
2224 (case 2)
2225 If STMT has been identified as defining a reduction variable, then
2226 we have two cases:
2227 (case 2.1)
2228 The last use of STMT is the reduction-variable, which is defined
2229 by a loop-header-phi. We don't want to mark the phi as live or
2230 relevant (because it does not need to be vectorized, it is handled
2231 as part of the vectorization of the reduction), so in this case we
2232 skip the call to vect_mark_relevant.
2233 (case 2.2)
2234 The rest of the uses of STMT are defined in the loop body. For
2235 the def_stmt of these uses we want to set liveness/relevance
2236 as follows:
2237 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
2238 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- vect_used_by_reduction
2239 because even though STMT is classified as live (since it defines a
2240 value that is used across loop iterations) and irrelevant (since it
2241 is not used inside the loop), it will be vectorized, and therefore
2242 the corresponding DEF_STMTs need to marked as relevant.
2243 We distinguish between two kinds of relevant stmts - those that are
2244 used by a reduction computation, and those that are (also) used by
2245 a regular computation. This allows us later on to identify stmts
2246 that are used solely by a reduction, and therefore the order of
2247 the results that they produce does not have to be kept.
2250 /* case 2.2: */
2251 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2253 gcc_assert (relevant == vect_unused_in_loop && live_p);
2254 relevant = vect_used_by_reduction;
2255 live_p = false;
2258 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2260 /* case 1: we are only interested in uses that need to be vectorized.
2261 Uses that are used for address computation are not considered
2262 relevant.
2264 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2265 continue;
2267 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2269 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2270 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2271 VEC_free (tree, heap, worklist);
2272 return false;
2275 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2276 continue;
2278 if (vect_print_dump_info (REPORT_DETAILS))
2280 fprintf (vect_dump, "worklist: examine use %d: ", i);
2281 print_generic_expr (vect_dump, use, TDF_SLIM);
2284 bb = bb_for_stmt (def_stmt);
2285 if (!flow_bb_inside_loop_p (loop, bb))
2286 continue;
2288 /* case 2.1: the reduction-use does not mark the defining-phi
2289 as relevant. */
2290 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2291 && TREE_CODE (def_stmt) == PHI_NODE)
2292 continue;
2294 vect_mark_relevant (&worklist, def_stmt, relevant, live_p);
2296 } /* while worklist */
2298 VEC_free (tree, heap, worklist);
2299 return true;
2303 /* Function vect_can_advance_ivs_p
2305 In case the number of iterations that LOOP iterates is unknown at compile
2306 time, an epilog loop will be generated, and the loop induction variables
2307 (IVs) will be "advanced" to the value they are supposed to take just before
2308 the epilog loop. Here we check that the access function of the loop IVs
2309 and the expression that represents the loop bound are simple enough.
2310 These restrictions will be relaxed in the future. */
2312 static bool
2313 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2315 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2316 basic_block bb = loop->header;
2317 tree phi;
2319 /* Analyze phi functions of the loop header. */
2321 if (vect_print_dump_info (REPORT_DETAILS))
2322 fprintf (vect_dump, "vect_can_advance_ivs_p:");
2324 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2326 tree access_fn = NULL;
2327 tree evolution_part;
2329 if (vect_print_dump_info (REPORT_DETAILS))
2331 fprintf (vect_dump, "Analyze phi: ");
2332 print_generic_expr (vect_dump, phi, TDF_SLIM);
2335 /* Skip virtual phi's. The data dependences that are associated with
2336 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2338 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2340 if (vect_print_dump_info (REPORT_DETAILS))
2341 fprintf (vect_dump, "virtual phi. skip.");
2342 continue;
2345 /* Skip reduction phis. */
2347 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2349 if (vect_print_dump_info (REPORT_DETAILS))
2350 fprintf (vect_dump, "reduc phi. skip.");
2351 continue;
2354 /* Analyze the evolution function. */
2356 access_fn = instantiate_parameters
2357 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2359 if (!access_fn)
2361 if (vect_print_dump_info (REPORT_DETAILS))
2362 fprintf (vect_dump, "No Access function.");
2363 return false;
2366 if (vect_print_dump_info (REPORT_DETAILS))
2368 fprintf (vect_dump, "Access function of PHI: ");
2369 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2372 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2374 if (evolution_part == NULL_TREE)
2376 if (vect_print_dump_info (REPORT_DETAILS))
2377 fprintf (vect_dump, "No evolution.");
2378 return false;
2381 /* FORNOW: We do not transform initial conditions of IVs
2382 which evolution functions are a polynomial of degree >= 2. */
2384 if (tree_is_chrec (evolution_part))
2385 return false;
2388 return true;
2392 /* Function vect_get_loop_niters.
2394 Determine how many iterations the loop is executed.
2395 If an expression that represents the number of iterations
2396 can be constructed, place it in NUMBER_OF_ITERATIONS.
2397 Return the loop exit condition. */
2399 static tree
2400 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2402 tree niters;
2404 if (vect_print_dump_info (REPORT_DETAILS))
2405 fprintf (vect_dump, "=== get_loop_niters ===");
2407 niters = number_of_exit_cond_executions (loop);
2409 if (niters != NULL_TREE
2410 && niters != chrec_dont_know)
2412 *number_of_iterations = niters;
2414 if (vect_print_dump_info (REPORT_DETAILS))
2416 fprintf (vect_dump, "==> get_loop_niters:" );
2417 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2421 return get_loop_exit_condition (loop);
2425 /* Function vect_analyze_loop_form.
2427 Verify the following restrictions (some may be relaxed in the future):
2428 - it's an inner-most loop
2429 - number of BBs = 2 (which are the loop header and the latch)
2430 - the loop has a pre-header
2431 - the loop has a single entry and exit
2432 - the loop exit condition is simple enough, and the number of iterations
2433 can be analyzed (a countable loop). */
2435 static loop_vec_info
2436 vect_analyze_loop_form (struct loop *loop)
2438 loop_vec_info loop_vinfo;
2439 tree loop_cond;
2440 tree number_of_iterations = NULL;
2442 if (vect_print_dump_info (REPORT_DETAILS))
2443 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2445 if (loop->inner)
2447 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
2448 fprintf (vect_dump, "not vectorized: nested loop.");
2449 return NULL;
2452 if (!single_exit (loop)
2453 || loop->num_nodes != 2
2454 || EDGE_COUNT (loop->header->preds) != 2)
2456 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2458 if (!single_exit (loop))
2459 fprintf (vect_dump, "not vectorized: multiple exits.");
2460 else if (loop->num_nodes != 2)
2461 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2462 else if (EDGE_COUNT (loop->header->preds) != 2)
2463 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2466 return NULL;
2469 /* We assume that the loop exit condition is at the end of the loop. i.e,
2470 that the loop is represented as a do-while (with a proper if-guard
2471 before the loop if needed), where the loop header contains all the
2472 executable statements, and the latch is empty. */
2473 if (!empty_block_p (loop->latch)
2474 || phi_nodes (loop->latch))
2476 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2477 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2478 return NULL;
2481 /* Make sure there exists a single-predecessor exit bb: */
2482 if (!single_pred_p (single_exit (loop)->dest))
2484 edge e = single_exit (loop);
2485 if (!(e->flags & EDGE_ABNORMAL))
2487 split_loop_exit_edge (e);
2488 if (vect_print_dump_info (REPORT_DETAILS))
2489 fprintf (vect_dump, "split exit edge.");
2491 else
2493 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2494 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2495 return NULL;
2499 if (empty_block_p (loop->header))
2501 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2502 fprintf (vect_dump, "not vectorized: empty loop.");
2503 return NULL;
2506 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2507 if (!loop_cond)
2509 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2510 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2511 return NULL;
2514 if (!number_of_iterations)
2516 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2517 fprintf (vect_dump,
2518 "not vectorized: number of iterations cannot be computed.");
2519 return NULL;
2522 if (chrec_contains_undetermined (number_of_iterations))
2524 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2525 fprintf (vect_dump, "Infinite number of iterations.");
2526 return false;
2529 loop_vinfo = new_loop_vec_info (loop);
2530 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2532 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2534 if (vect_print_dump_info (REPORT_DETAILS))
2536 fprintf (vect_dump, "Symbolic number of iterations is ");
2537 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2540 else
2541 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2543 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2544 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2545 return NULL;
2548 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2550 return loop_vinfo;
2554 /* Function vect_analyze_loop.
2556 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2557 for it. The different analyses will record information in the
2558 loop_vec_info struct. */
2559 loop_vec_info
2560 vect_analyze_loop (struct loop *loop)
2562 bool ok;
2563 loop_vec_info loop_vinfo;
2565 if (vect_print_dump_info (REPORT_DETAILS))
2566 fprintf (vect_dump, "===== analyze_loop_nest =====");
2568 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2570 loop_vinfo = vect_analyze_loop_form (loop);
2571 if (!loop_vinfo)
2573 if (vect_print_dump_info (REPORT_DETAILS))
2574 fprintf (vect_dump, "bad loop form.");
2575 return NULL;
2578 /* Find all data references in the loop (which correspond to vdefs/vuses)
2579 and analyze their evolution in the loop.
2581 FORNOW: Handle only simple, array references, which
2582 alignment can be forced, and aligned pointer-references. */
2584 ok = vect_analyze_data_refs (loop_vinfo);
2585 if (!ok)
2587 if (vect_print_dump_info (REPORT_DETAILS))
2588 fprintf (vect_dump, "bad data references.");
2589 destroy_loop_vec_info (loop_vinfo);
2590 return NULL;
2593 /* Classify all cross-iteration scalar data-flow cycles.
2594 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2596 vect_analyze_scalar_cycles (loop_vinfo);
2598 vect_pattern_recog (loop_vinfo);
2600 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2602 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2603 if (!ok)
2605 if (vect_print_dump_info (REPORT_DETAILS))
2606 fprintf (vect_dump, "unexpected pattern.");
2607 destroy_loop_vec_info (loop_vinfo);
2608 return NULL;
2611 /* Analyze the alignment of the data-refs in the loop.
2612 Fail if a data reference is found that cannot be vectorized. */
2614 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2615 if (!ok)
2617 if (vect_print_dump_info (REPORT_DETAILS))
2618 fprintf (vect_dump, "bad data alignment.");
2619 destroy_loop_vec_info (loop_vinfo);
2620 return NULL;
2623 ok = vect_determine_vectorization_factor (loop_vinfo);
2624 if (!ok)
2626 if (vect_print_dump_info (REPORT_DETAILS))
2627 fprintf (vect_dump, "can't determine vectorization factor.");
2628 destroy_loop_vec_info (loop_vinfo);
2629 return NULL;
2632 /* Analyze data dependences between the data-refs in the loop.
2633 FORNOW: fail at the first data dependence that we encounter. */
2635 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2636 if (!ok)
2638 if (vect_print_dump_info (REPORT_DETAILS))
2639 fprintf (vect_dump, "bad data dependence.");
2640 destroy_loop_vec_info (loop_vinfo);
2641 return NULL;
2644 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2645 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2647 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2648 if (!ok)
2650 if (vect_print_dump_info (REPORT_DETAILS))
2651 fprintf (vect_dump, "bad data access.");
2652 destroy_loop_vec_info (loop_vinfo);
2653 return NULL;
2656 /* This pass will decide on using loop versioning and/or loop peeling in
2657 order to enhance the alignment of data references in the loop. */
2659 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2660 if (!ok)
2662 if (vect_print_dump_info (REPORT_DETAILS))
2663 fprintf (vect_dump, "bad data alignment.");
2664 destroy_loop_vec_info (loop_vinfo);
2665 return NULL;
2668 /* Scan all the operations in the loop and make sure they are
2669 vectorizable. */
2671 ok = vect_analyze_operations (loop_vinfo);
2672 if (!ok)
2674 if (vect_print_dump_info (REPORT_DETAILS))
2675 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2676 destroy_loop_vec_info (loop_vinfo);
2677 return NULL;
2680 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2682 return loop_vinfo;