* config/i386/i386.md (paritydi2, paritysi2): New expanders.
[official-gcc.git] / gcc / tree-vect-analyze.c
blob7ee07035db0de2c66da2177efd82d74c6f0e1a0f
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41 #include "toplev.h"
43 /* Main analysis functions. */
44 static loop_vec_info vect_analyze_loop_form (struct loop *);
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
78 in the loop.
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
81 original loop:
82 for (i=0; i<N; i++){
83 a[i] = b[i] + c[i];
86 vectorized loop:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
92 static bool
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
100 int i;
101 tree scalar_type;
103 if (vect_print_dump_info (REPORT_DETAILS))
104 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
106 for (i = 0; i < nbbs; i++)
108 basic_block bb = bbs[i];
110 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
112 tree stmt = bsi_stmt (si);
113 unsigned int nunits;
114 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
115 tree vectype;
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining statement: ");
120 print_generic_expr (vect_dump, stmt, TDF_SLIM);
123 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
124 continue;
126 gcc_assert (stmt_info);
128 /* skip stmts which do not need to be vectorized. */
129 if (!STMT_VINFO_RELEVANT_P (stmt_info)
130 && !STMT_VINFO_LIVE_P (stmt_info))
132 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "skip.");
134 continue;
137 if (!GIMPLE_STMT_P (stmt)
138 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
140 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
142 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
143 print_generic_expr (vect_dump, stmt, TDF_SLIM);
145 return false;
148 if (STMT_VINFO_VECTYPE (stmt_info))
150 /* The only case when a vectype had been already set is for stmts
151 that contain a dataref, or for "pattern-stmts" (stmts generated
152 by the vectorizer to represent/replace a certain idiom). */
153 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
154 || is_pattern_stmt_p (stmt_info));
155 vectype = STMT_VINFO_VECTYPE (stmt_info);
157 else
159 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
160 && !is_pattern_stmt_p (stmt_info));
162 /* We set the vectype according to the type of the result (lhs).
163 For stmts whose result-type is different than the type of the
164 arguments (e.g. demotion, promotion), vectype will be reset
165 appropriately (later). Note that we have to visit the smallest
166 datatype in this function, because that determines the VF.
167 If the smallest datatype in the loop is present only as the
168 rhs of a promotion operation - we'd miss it here.
169 However, in such a case, that a variable of this datatype
170 does not appear in the lhs anywhere in the loop, it shouldn't
171 affect the vectorization factor. */
172 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
174 if (vect_print_dump_info (REPORT_DETAILS))
176 fprintf (vect_dump, "get vectype for scalar type: ");
177 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
180 vectype = get_vectype_for_scalar_type (scalar_type);
181 if (!vectype)
183 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
185 fprintf (vect_dump,
186 "not vectorized: unsupported data-type ");
187 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
189 return false;
191 STMT_VINFO_VECTYPE (stmt_info) = vectype;
194 if (vect_print_dump_info (REPORT_DETAILS))
196 fprintf (vect_dump, "vectype: ");
197 print_generic_expr (vect_dump, vectype, TDF_SLIM);
200 nunits = TYPE_VECTOR_SUBPARTS (vectype);
201 if (vect_print_dump_info (REPORT_DETAILS))
202 fprintf (vect_dump, "nunits = %d", nunits);
204 if (!vectorization_factor
205 || (nunits > vectorization_factor))
206 vectorization_factor = nunits;
211 /* TODO: Analyze cost. Decide if worth while to vectorize. */
213 if (vectorization_factor <= 1)
215 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
216 fprintf (vect_dump, "not vectorized: unsupported data-type");
217 return false;
219 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
221 return true;
225 /* Function vect_analyze_operations.
227 Scan the loop stmts and make sure they are all vectorizable. */
229 static bool
230 vect_analyze_operations (loop_vec_info loop_vinfo)
232 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
233 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
234 int nbbs = loop->num_nodes;
235 block_stmt_iterator si;
236 unsigned int vectorization_factor = 0;
237 int i;
238 bool ok;
239 tree phi;
240 stmt_vec_info stmt_info;
241 bool need_to_vectorize = false;
243 if (vect_print_dump_info (REPORT_DETAILS))
244 fprintf (vect_dump, "=== vect_analyze_operations ===");
246 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
247 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
249 for (i = 0; i < nbbs; i++)
251 basic_block bb = bbs[i];
253 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
255 stmt_info = vinfo_for_stmt (phi);
256 if (vect_print_dump_info (REPORT_DETAILS))
258 fprintf (vect_dump, "examining phi: ");
259 print_generic_expr (vect_dump, phi, TDF_SLIM);
262 gcc_assert (stmt_info);
264 if (STMT_VINFO_LIVE_P (stmt_info))
266 /* FORNOW: not yet supported. */
267 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
268 fprintf (vect_dump, "not vectorized: value used after loop.");
269 return false;
272 if (STMT_VINFO_RELEVANT_P (stmt_info))
274 /* Most likely a reduction-like computation that is used
275 in the loop. */
276 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
277 fprintf (vect_dump, "not vectorized: unsupported pattern.");
278 return false;
282 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
284 tree stmt = bsi_stmt (si);
285 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
287 if (vect_print_dump_info (REPORT_DETAILS))
289 fprintf (vect_dump, "==> examining statement: ");
290 print_generic_expr (vect_dump, stmt, TDF_SLIM);
293 gcc_assert (stmt_info);
295 /* skip stmts which do not need to be vectorized.
296 this is expected to include:
297 - the COND_EXPR which is the loop exit condition
298 - any LABEL_EXPRs in the loop
299 - computations that are used only for array indexing or loop
300 control */
302 if (!STMT_VINFO_RELEVANT_P (stmt_info)
303 && !STMT_VINFO_LIVE_P (stmt_info))
305 if (vect_print_dump_info (REPORT_DETAILS))
306 fprintf (vect_dump, "irrelevant.");
307 continue;
310 if (STMT_VINFO_RELEVANT_P (stmt_info))
312 gcc_assert (GIMPLE_STMT_P (stmt)
313 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
314 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
316 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
317 || vectorizable_type_demotion (stmt, NULL, NULL)
318 || vectorizable_conversion (stmt, NULL, NULL)
319 || vectorizable_operation (stmt, NULL, NULL)
320 || vectorizable_assignment (stmt, NULL, NULL)
321 || vectorizable_load (stmt, NULL, NULL)
322 || vectorizable_call (stmt, NULL, NULL)
323 || vectorizable_store (stmt, NULL, NULL)
324 || vectorizable_condition (stmt, NULL, NULL));
326 if (!ok)
328 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
330 fprintf (vect_dump,
331 "not vectorized: relevant stmt not supported: ");
332 print_generic_expr (vect_dump, stmt, TDF_SLIM);
334 return false;
336 need_to_vectorize = true;
339 if (STMT_VINFO_LIVE_P (stmt_info))
341 ok = vectorizable_reduction (stmt, NULL, NULL);
343 if (ok)
344 need_to_vectorize = true;
345 else
346 ok = vectorizable_live_operation (stmt, NULL, NULL);
348 if (!ok)
350 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
352 fprintf (vect_dump,
353 "not vectorized: live stmt not supported: ");
354 print_generic_expr (vect_dump, stmt, TDF_SLIM);
356 return false;
359 } /* stmts in bb */
360 } /* bbs */
362 /* TODO: Analyze cost. Decide if worth while to vectorize. */
364 /* All operations in the loop are either irrelevant (deal with loop
365 control, or dead), or only used outside the loop and can be moved
366 out of the loop (e.g. invariants, inductions). The loop can be
367 optimized away by scalar optimizations. We're better off not
368 touching this loop. */
369 if (!need_to_vectorize)
371 if (vect_print_dump_info (REPORT_DETAILS))
372 fprintf (vect_dump,
373 "All the computation can be taken out of the loop.");
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
375 fprintf (vect_dump,
376 "not vectorized: redundant loop. no profit to vectorize.");
377 return false;
380 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
381 && vect_print_dump_info (REPORT_DETAILS))
382 fprintf (vect_dump,
383 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
384 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
386 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
387 && ((LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
388 || (LOOP_VINFO_INT_NITERS (loop_vinfo) <=
389 ((unsigned) (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
390 * vectorization_factor))))
392 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
393 fprintf (vect_dump, "not vectorized: iteration count too small.");
394 return false;
397 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
398 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
399 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
401 if (vect_print_dump_info (REPORT_DETAILS))
402 fprintf (vect_dump, "epilog loop required.");
403 if (!vect_can_advance_ivs_p (loop_vinfo))
405 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
406 fprintf (vect_dump,
407 "not vectorized: can't create epilog loop 1.");
408 return false;
410 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
412 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
413 fprintf (vect_dump,
414 "not vectorized: can't create epilog loop 2.");
415 return false;
419 return true;
423 /* Function exist_non_indexing_operands_for_use_p
425 USE is one of the uses attached to STMT. Check if USE is
426 used in STMT for anything other than indexing an array. */
428 static bool
429 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
431 tree operand;
432 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
434 /* USE corresponds to some operand in STMT. If there is no data
435 reference in STMT, then any operand that corresponds to USE
436 is not indexing an array. */
437 if (!STMT_VINFO_DATA_REF (stmt_info))
438 return true;
440 /* STMT has a data_ref. FORNOW this means that its of one of
441 the following forms:
442 -1- ARRAY_REF = var
443 -2- var = ARRAY_REF
444 (This should have been verified in analyze_data_refs).
446 'var' in the second case corresponds to a def, not a use,
447 so USE cannot correspond to any operands that are not used
448 for array indexing.
450 Therefore, all we need to check is if STMT falls into the
451 first case, and whether var corresponds to USE. */
453 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
454 return false;
456 operand = GIMPLE_STMT_OPERAND (stmt, 1);
458 if (TREE_CODE (operand) != SSA_NAME)
459 return false;
461 if (operand == use)
462 return true;
464 return false;
468 /* Function vect_analyze_scalar_cycles.
470 Examine the cross iteration def-use cycles of scalar variables, by
471 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
472 following: invariant, induction, reduction, unknown.
474 Some forms of scalar cycles are not yet supported.
476 Example1: reduction: (unsupported yet)
478 loop1:
479 for (i=0; i<N; i++)
480 sum += a[i];
482 Example2: induction: (unsupported yet)
484 loop2:
485 for (i=0; i<N; i++)
486 a[i] = i;
488 Note: the following loop *is* vectorizable:
490 loop3:
491 for (i=0; i<N; i++)
492 a[i] = b[i];
494 even though it has a def-use cycle caused by the induction variable i:
496 loop: i_2 = PHI (i_0, i_1)
497 a[i_2] = ...;
498 i_1 = i_2 + 1;
499 GOTO loop;
501 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
502 it does not need to be vectorized because it is only used for array
503 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
504 loop2 on the other hand is relevant (it is being written to memory).
507 static void
508 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
510 tree phi;
511 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
512 basic_block bb = loop->header;
513 tree dumy;
514 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
516 if (vect_print_dump_info (REPORT_DETAILS))
517 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
519 /* First - identify all inductions. */
520 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
522 tree access_fn = NULL;
523 tree def = PHI_RESULT (phi);
524 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
526 if (vect_print_dump_info (REPORT_DETAILS))
528 fprintf (vect_dump, "Analyze phi: ");
529 print_generic_expr (vect_dump, phi, TDF_SLIM);
532 /* Skip virtual phi's. The data dependences that are associated with
533 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
534 if (!is_gimple_reg (SSA_NAME_VAR (def)))
535 continue;
537 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
539 /* Analyze the evolution function. */
540 access_fn = analyze_scalar_evolution (loop, def);
541 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
543 fprintf (vect_dump, "Access function of PHI: ");
544 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
547 if (!access_fn
548 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
550 VEC_safe_push (tree, heap, worklist, phi);
551 continue;
554 if (vect_print_dump_info (REPORT_DETAILS))
555 fprintf (vect_dump, "Detected induction.");
556 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
560 /* Second - identify all reductions. */
561 while (VEC_length (tree, worklist) > 0)
563 tree phi = VEC_pop (tree, worklist);
564 tree def = PHI_RESULT (phi);
565 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
566 tree reduc_stmt;
568 if (vect_print_dump_info (REPORT_DETAILS))
570 fprintf (vect_dump, "Analyze phi: ");
571 print_generic_expr (vect_dump, phi, TDF_SLIM);
574 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
575 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
577 reduc_stmt = vect_is_simple_reduction (loop, phi);
578 if (reduc_stmt)
580 if (vect_print_dump_info (REPORT_DETAILS))
581 fprintf (vect_dump, "Detected reduction.");
582 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
583 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
584 vect_reduction_def;
586 else
587 if (vect_print_dump_info (REPORT_DETAILS))
588 fprintf (vect_dump, "Unknown def-use cycle pattern.");
591 VEC_free (tree, heap, worklist);
592 return;
596 /* Function vect_insert_into_interleaving_chain.
598 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
600 static void
601 vect_insert_into_interleaving_chain (struct data_reference *dra,
602 struct data_reference *drb)
604 tree prev, next, next_init;
605 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
606 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
608 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
609 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
610 while (next)
612 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
613 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
615 /* Insert here. */
616 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
617 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
618 return;
620 prev = next;
621 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
624 /* We got to the end of the list. Insert here. */
625 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
626 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
630 /* Function vect_update_interleaving_chain.
632 For two data-refs DRA and DRB that are a part of a chain interleaved data
633 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
635 There are four possible cases:
636 1. New stmts - both DRA and DRB are not a part of any chain:
637 FIRST_DR = DRB
638 NEXT_DR (DRB) = DRA
639 2. DRB is a part of a chain and DRA is not:
640 no need to update FIRST_DR
641 no need to insert DRB
642 insert DRA according to init
643 3. DRA is a part of a chain and DRB is not:
644 if (init of FIRST_DR > init of DRB)
645 FIRST_DR = DRB
646 NEXT(FIRST_DR) = previous FIRST_DR
647 else
648 insert DRB according to its init
649 4. both DRA and DRB are in some interleaving chains:
650 choose the chain with the smallest init of FIRST_DR
651 insert the nodes of the second chain into the first one. */
653 static void
654 vect_update_interleaving_chain (struct data_reference *drb,
655 struct data_reference *dra)
657 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
658 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
659 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
660 tree node, prev, next, node_init, first_stmt;
662 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
663 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
665 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
666 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
667 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
668 return;
671 /* 2. DRB is a part of a chain and DRA is not. */
672 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
674 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
675 /* Insert DRA into the chain of DRB. */
676 vect_insert_into_interleaving_chain (dra, drb);
677 return;
680 /* 3. DRA is a part of a chain and DRB is not. */
681 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
683 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
684 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
685 old_first_stmt)));
686 tree tmp;
688 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
690 /* DRB's init is smaller than the init of the stmt previously marked
691 as the first stmt of the interleaving chain of DRA. Therefore, we
692 update FIRST_STMT and put DRB in the head of the list. */
693 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
694 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
696 /* Update all the stmts in the list to point to the new FIRST_STMT. */
697 tmp = old_first_stmt;
698 while (tmp)
700 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
701 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
704 else
706 /* Insert DRB in the list of DRA. */
707 vect_insert_into_interleaving_chain (drb, dra);
708 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
710 return;
713 /* 4. both DRA and DRB are in some interleaving chains. */
714 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
715 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
716 if (first_a == first_b)
717 return;
718 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
719 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
721 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
723 /* Insert the nodes of DRA chain into the DRB chain.
724 After inserting a node, continue from this node of the DRB chain (don't
725 start from the beginning. */
726 node = DR_GROUP_FIRST_DR (stmtinfo_a);
727 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
728 first_stmt = first_b;
730 else
732 /* Insert the nodes of DRB chain into the DRA chain.
733 After inserting a node, continue from this node of the DRA chain (don't
734 start from the beginning. */
735 node = DR_GROUP_FIRST_DR (stmtinfo_b);
736 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
737 first_stmt = first_a;
740 while (node)
742 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
743 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
744 while (next)
746 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
747 if (tree_int_cst_compare (next_init, node_init) > 0)
749 /* Insert here. */
750 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
751 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
752 prev = node;
753 break;
755 prev = next;
756 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
758 if (!next)
760 /* We got to the end of the list. Insert here. */
761 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
762 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
763 prev = node;
765 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
766 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
771 /* Function vect_equal_offsets.
773 Check if OFFSET1 and OFFSET2 are identical expressions. */
775 static bool
776 vect_equal_offsets (tree offset1, tree offset2)
778 bool res0, res1;
780 STRIP_NOPS (offset1);
781 STRIP_NOPS (offset2);
783 if (offset1 == offset2)
784 return true;
786 if (TREE_CODE (offset1) != TREE_CODE (offset2)
787 || !BINARY_CLASS_P (offset1)
788 || !BINARY_CLASS_P (offset2))
789 return false;
791 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
792 TREE_OPERAND (offset2, 0));
793 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
794 TREE_OPERAND (offset2, 1));
796 return (res0 && res1);
800 /* Function vect_check_interleaving.
802 Check if DRA and DRB are a part of interleaving. In case they are, insert
803 DRA and DRB in an interleaving chain. */
805 static void
806 vect_check_interleaving (struct data_reference *dra,
807 struct data_reference *drb)
809 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
811 /* Check that the data-refs have same first location (except init) and they
812 are both either store or load (not load and store). */
813 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
814 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
815 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
816 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
817 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
818 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
819 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
820 || DR_IS_READ (dra) != DR_IS_READ (drb))
821 return;
823 /* Check:
824 1. data-refs are of the same type
825 2. their steps are equal
826 3. the step is greater than the difference between data-refs' inits */
827 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
828 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
830 if (type_size_a != type_size_b
831 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
832 return;
834 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
835 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
836 step = TREE_INT_CST_LOW (DR_STEP (dra));
838 if (init_a > init_b)
840 /* If init_a == init_b + the size of the type * k, we have an interleaving,
841 and DRB is accessed before DRA. */
842 diff_mod_size = (init_a - init_b) % type_size_a;
844 if ((init_a - init_b) > step)
845 return;
847 if (diff_mod_size == 0)
849 vect_update_interleaving_chain (drb, dra);
850 if (vect_print_dump_info (REPORT_DR_DETAILS))
852 fprintf (vect_dump, "Detected interleaving ");
853 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
854 fprintf (vect_dump, " and ");
855 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
857 return;
860 else
862 /* If init_b == init_a + the size of the type * k, we have an
863 interleaving, and DRA is accessed before DRB. */
864 diff_mod_size = (init_b - init_a) % type_size_a;
866 if ((init_b - init_a) > step)
867 return;
869 if (diff_mod_size == 0)
871 vect_update_interleaving_chain (dra, drb);
872 if (vect_print_dump_info (REPORT_DR_DETAILS))
874 fprintf (vect_dump, "Detected interleaving ");
875 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
876 fprintf (vect_dump, " and ");
877 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
879 return;
885 /* Function vect_analyze_data_ref_dependence.
887 Return TRUE if there (might) exist a dependence between a memory-reference
888 DRA and a memory-reference DRB. */
890 static bool
891 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
892 loop_vec_info loop_vinfo)
894 unsigned int i;
895 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
896 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
897 struct data_reference *dra = DDR_A (ddr);
898 struct data_reference *drb = DDR_B (ddr);
899 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
900 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
901 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
902 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
903 lambda_vector dist_v;
904 unsigned int loop_depth;
906 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
908 /* Independent data accesses. */
909 vect_check_interleaving (dra, drb);
910 return false;
913 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
914 return false;
916 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
918 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
920 fprintf (vect_dump,
921 "not vectorized: can't determine dependence between ");
922 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
923 fprintf (vect_dump, " and ");
924 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
926 return true;
929 if (DDR_NUM_DIST_VECTS (ddr) == 0)
931 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
933 fprintf (vect_dump, "not vectorized: bad dist vector for ");
934 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
935 fprintf (vect_dump, " and ");
936 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
938 return true;
941 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
942 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
944 int dist = dist_v[loop_depth];
946 if (vect_print_dump_info (REPORT_DR_DETAILS))
947 fprintf (vect_dump, "dependence distance = %d.", dist);
949 /* Same loop iteration. */
950 if (dist % vectorization_factor == 0 && dra_size == drb_size)
952 /* Two references with distance zero have the same alignment. */
953 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
954 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
955 if (vect_print_dump_info (REPORT_ALIGNMENT))
956 fprintf (vect_dump, "accesses have the same alignment.");
957 if (vect_print_dump_info (REPORT_DR_DETAILS))
959 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
960 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
961 fprintf (vect_dump, " and ");
962 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
965 /* For interleaving, mark that there is a read-write dependency if
966 necessary. We check before that one of the data-refs is store. */
967 if (DR_IS_READ (dra))
968 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
969 else
971 if (DR_IS_READ (drb))
972 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
975 continue;
978 if (abs (dist) >= vectorization_factor)
980 /* Dependence distance does not create dependence, as far as vectorization
981 is concerned, in this case. */
982 if (vect_print_dump_info (REPORT_DR_DETAILS))
983 fprintf (vect_dump, "dependence distance >= VF.");
984 continue;
987 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
989 fprintf (vect_dump,
990 "not vectorized: possible dependence between data-refs ");
991 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
992 fprintf (vect_dump, " and ");
993 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
996 return true;
999 return false;
1003 /* Function vect_analyze_data_ref_dependences.
1005 Examine all the data references in the loop, and make sure there do not
1006 exist any data dependences between them. */
1008 static bool
1009 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1011 unsigned int i;
1012 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1013 struct data_dependence_relation *ddr;
1015 if (vect_print_dump_info (REPORT_DETAILS))
1016 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1018 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1019 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1020 return false;
1022 return true;
1026 /* Function vect_compute_data_ref_alignment
1028 Compute the misalignment of the data reference DR.
1030 Output:
1031 1. If during the misalignment computation it is found that the data reference
1032 cannot be vectorized then false is returned.
1033 2. DR_MISALIGNMENT (DR) is defined.
1035 FOR NOW: No analysis is actually performed. Misalignment is calculated
1036 only for trivial cases. TODO. */
1038 static bool
1039 vect_compute_data_ref_alignment (struct data_reference *dr)
1041 tree stmt = DR_STMT (dr);
1042 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1043 tree ref = DR_REF (dr);
1044 tree vectype;
1045 tree base, base_addr;
1046 bool base_aligned;
1047 tree misalign;
1048 tree aligned_to, alignment;
1050 if (vect_print_dump_info (REPORT_DETAILS))
1051 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1053 /* Initialize misalignment to unknown. */
1054 DR_MISALIGNMENT (dr) = -1;
1056 misalign = DR_OFFSET_MISALIGNMENT (dr);
1057 aligned_to = DR_ALIGNED_TO (dr);
1058 base_addr = DR_BASE_ADDRESS (dr);
1059 base = build_fold_indirect_ref (base_addr);
1060 vectype = STMT_VINFO_VECTYPE (stmt_info);
1061 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1063 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1064 || !misalign)
1066 if (vect_print_dump_info (REPORT_DETAILS))
1068 fprintf (vect_dump, "Unknown alignment for access: ");
1069 print_generic_expr (vect_dump, base, TDF_SLIM);
1071 return true;
1074 if ((DECL_P (base)
1075 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1076 alignment) >= 0)
1077 || (TREE_CODE (base_addr) == SSA_NAME
1078 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1079 TREE_TYPE (base_addr)))),
1080 alignment) >= 0))
1081 base_aligned = true;
1082 else
1083 base_aligned = false;
1085 if (!base_aligned)
1087 /* Do not change the alignment of global variables if
1088 flag_section_anchors is enabled. */
1089 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1090 || (TREE_STATIC (base) && flag_section_anchors))
1092 if (vect_print_dump_info (REPORT_DETAILS))
1094 fprintf (vect_dump, "can't force alignment of ref: ");
1095 print_generic_expr (vect_dump, ref, TDF_SLIM);
1097 return true;
1100 /* Force the alignment of the decl.
1101 NOTE: This is the only change to the code we make during
1102 the analysis phase, before deciding to vectorize the loop. */
1103 if (vect_print_dump_info (REPORT_DETAILS))
1104 fprintf (vect_dump, "force alignment");
1105 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1106 DECL_USER_ALIGN (base) = 1;
1109 /* At this point we assume that the base is aligned. */
1110 gcc_assert (base_aligned
1111 || (TREE_CODE (base) == VAR_DECL
1112 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1114 /* Modulo alignment. */
1115 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1117 if (!host_integerp (misalign, 1))
1119 /* Negative or overflowed misalignment value. */
1120 if (vect_print_dump_info (REPORT_DETAILS))
1121 fprintf (vect_dump, "unexpected misalign value");
1122 return false;
1125 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
1127 if (vect_print_dump_info (REPORT_DETAILS))
1129 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1130 print_generic_expr (vect_dump, ref, TDF_SLIM);
1133 return true;
1137 /* Function vect_compute_data_refs_alignment
1139 Compute the misalignment of data references in the loop.
1140 Return FALSE if a data reference is found that cannot be vectorized. */
1142 static bool
1143 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1145 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1146 struct data_reference *dr;
1147 unsigned int i;
1149 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1150 if (!vect_compute_data_ref_alignment (dr))
1151 return false;
1153 return true;
1157 /* Function vect_update_misalignment_for_peel
1159 DR - the data reference whose misalignment is to be adjusted.
1160 DR_PEEL - the data reference whose misalignment is being made
1161 zero in the vector loop by the peel.
1162 NPEEL - the number of iterations in the peel loop if the misalignment
1163 of DR_PEEL is known at compile time. */
1165 static void
1166 vect_update_misalignment_for_peel (struct data_reference *dr,
1167 struct data_reference *dr_peel, int npeel)
1169 unsigned int i;
1170 VEC(dr_p,heap) *same_align_drs;
1171 struct data_reference *current_dr;
1172 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1173 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1174 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1175 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1177 /* For interleaved data accesses the step in the loop must be multiplied by
1178 the size of the interleaving group. */
1179 if (DR_GROUP_FIRST_DR (stmt_info))
1180 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1181 if (DR_GROUP_FIRST_DR (peel_stmt_info))
1182 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1184 if (known_alignment_for_access_p (dr)
1185 && known_alignment_for_access_p (dr_peel)
1186 && (DR_MISALIGNMENT (dr) / dr_size ==
1187 DR_MISALIGNMENT (dr_peel) / dr_peel_size))
1189 DR_MISALIGNMENT (dr) = 0;
1190 return;
1193 /* It can be assumed that the data refs with the same alignment as dr_peel
1194 are aligned in the vector loop. */
1195 same_align_drs
1196 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1197 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1199 if (current_dr != dr)
1200 continue;
1201 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1202 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1203 DR_MISALIGNMENT (dr) = 0;
1204 return;
1207 if (known_alignment_for_access_p (dr)
1208 && known_alignment_for_access_p (dr_peel))
1210 DR_MISALIGNMENT (dr) += npeel * dr_size;
1211 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
1212 return;
1215 if (vect_print_dump_info (REPORT_DETAILS))
1216 fprintf (vect_dump, "Setting misalignment to -1.");
1217 DR_MISALIGNMENT (dr) = -1;
1221 /* Function vect_verify_datarefs_alignment
1223 Return TRUE if all data references in the loop can be
1224 handled with respect to alignment. */
1226 static bool
1227 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1229 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1230 struct data_reference *dr;
1231 enum dr_alignment_support supportable_dr_alignment;
1232 unsigned int i;
1234 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1236 tree stmt = DR_STMT (dr);
1237 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1239 /* For interleaving, only the alignment of the first access matters. */
1240 if (DR_GROUP_FIRST_DR (stmt_info)
1241 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1242 continue;
1244 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1245 if (!supportable_dr_alignment)
1247 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1249 if (DR_IS_READ (dr))
1250 fprintf (vect_dump,
1251 "not vectorized: unsupported unaligned load.");
1252 else
1253 fprintf (vect_dump,
1254 "not vectorized: unsupported unaligned store.");
1256 return false;
1258 if (supportable_dr_alignment != dr_aligned
1259 && vect_print_dump_info (REPORT_ALIGNMENT))
1260 fprintf (vect_dump, "Vectorizing an unaligned access.");
1262 return true;
1266 /* Function vect_enhance_data_refs_alignment
1268 This pass will use loop versioning and loop peeling in order to enhance
1269 the alignment of data references in the loop.
1271 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1272 original loop is to be vectorized; Any other loops that are created by
1273 the transformations performed in this pass - are not supposed to be
1274 vectorized. This restriction will be relaxed.
1276 This pass will require a cost model to guide it whether to apply peeling
1277 or versioning or a combination of the two. For example, the scheme that
1278 intel uses when given a loop with several memory accesses, is as follows:
1279 choose one memory access ('p') which alignment you want to force by doing
1280 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1281 other accesses are not necessarily aligned, or (2) use loop versioning to
1282 generate one loop in which all accesses are aligned, and another loop in
1283 which only 'p' is necessarily aligned.
1285 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1286 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1287 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1289 Devising a cost model is the most critical aspect of this work. It will
1290 guide us on which access to peel for, whether to use loop versioning, how
1291 many versions to create, etc. The cost model will probably consist of
1292 generic considerations as well as target specific considerations (on
1293 powerpc for example, misaligned stores are more painful than misaligned
1294 loads).
1296 Here are the general steps involved in alignment enhancements:
1298 -- original loop, before alignment analysis:
1299 for (i=0; i<N; i++){
1300 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1301 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1304 -- After vect_compute_data_refs_alignment:
1305 for (i=0; i<N; i++){
1306 x = q[i]; # DR_MISALIGNMENT(q) = 3
1307 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1310 -- Possibility 1: we do loop versioning:
1311 if (p is aligned) {
1312 for (i=0; i<N; i++){ # loop 1A
1313 x = q[i]; # DR_MISALIGNMENT(q) = 3
1314 p[i] = y; # DR_MISALIGNMENT(p) = 0
1317 else {
1318 for (i=0; i<N; i++){ # loop 1B
1319 x = q[i]; # DR_MISALIGNMENT(q) = 3
1320 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1324 -- Possibility 2: we do loop peeling:
1325 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1326 x = q[i];
1327 p[i] = y;
1329 for (i = 3; i < N; i++){ # loop 2A
1330 x = q[i]; # DR_MISALIGNMENT(q) = 0
1331 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1334 -- Possibility 3: combination of loop peeling and versioning:
1335 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1336 x = q[i];
1337 p[i] = y;
1339 if (p is aligned) {
1340 for (i = 3; i<N; i++){ # loop 3A
1341 x = q[i]; # DR_MISALIGNMENT(q) = 0
1342 p[i] = y; # DR_MISALIGNMENT(p) = 0
1345 else {
1346 for (i = 3; i<N; i++){ # loop 3B
1347 x = q[i]; # DR_MISALIGNMENT(q) = 0
1348 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1352 These loops are later passed to loop_transform to be vectorized. The
1353 vectorizer will use the alignment information to guide the transformation
1354 (whether to generate regular loads/stores, or with special handling for
1355 misalignment). */
1357 static bool
1358 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1360 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1361 enum dr_alignment_support supportable_dr_alignment;
1362 struct data_reference *dr0 = NULL;
1363 struct data_reference *dr;
1364 unsigned int i;
1365 bool do_peeling = false;
1366 bool do_versioning = false;
1367 bool stat;
1368 tree stmt;
1369 stmt_vec_info stmt_info;
1371 if (vect_print_dump_info (REPORT_DETAILS))
1372 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1374 /* While cost model enhancements are expected in the future, the high level
1375 view of the code at this time is as follows:
1377 A) If there is a misaligned write then see if peeling to align this write
1378 can make all data references satisfy vect_supportable_dr_alignment.
1379 If so, update data structures as needed and return true. Note that
1380 at this time vect_supportable_dr_alignment is known to return false
1381 for a a misaligned write.
1383 B) If peeling wasn't possible and there is a data reference with an
1384 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1385 then see if loop versioning checks can be used to make all data
1386 references satisfy vect_supportable_dr_alignment. If so, update
1387 data structures as needed and return true.
1389 C) If neither peeling nor versioning were successful then return false if
1390 any data reference does not satisfy vect_supportable_dr_alignment.
1392 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1394 Note, Possibility 3 above (which is peeling and versioning together) is not
1395 being done at this time. */
1397 /* (1) Peeling to force alignment. */
1399 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1400 Considerations:
1401 + How many accesses will become aligned due to the peeling
1402 - How many accesses will become unaligned due to the peeling,
1403 and the cost of misaligned accesses.
1404 - The cost of peeling (the extra runtime checks, the increase
1405 in code size).
1407 The scheme we use FORNOW: peel to force the alignment of the first
1408 misaligned store in the loop.
1409 Rationale: misaligned stores are not yet supported.
1411 TODO: Use a cost model. */
1413 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1415 stmt = DR_STMT (dr);
1416 stmt_info = vinfo_for_stmt (stmt);
1418 /* For interleaving, only the alignment of the first access
1419 matters. */
1420 if (DR_GROUP_FIRST_DR (stmt_info)
1421 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1422 continue;
1424 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1426 if (DR_GROUP_FIRST_DR (stmt_info))
1428 /* For interleaved access we peel only if number of iterations in
1429 the prolog loop ({VF - misalignment}), is a multiple of the
1430 number of the interleaved accesses. */
1431 int elem_size, mis_in_elements;
1432 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1434 /* FORNOW: handle only known alignment. */
1435 if (!known_alignment_for_access_p (dr))
1437 do_peeling = false;
1438 break;
1441 elem_size = UNITS_PER_SIMD_WORD / vf;
1442 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1444 if ((vf - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1446 do_peeling = false;
1447 break;
1450 dr0 = dr;
1451 do_peeling = true;
1452 break;
1456 /* Often peeling for alignment will require peeling for loop-bound, which in
1457 turn requires that we know how to adjust the loop ivs after the loop. */
1458 if (!vect_can_advance_ivs_p (loop_vinfo))
1459 do_peeling = false;
1461 if (do_peeling)
1463 int mis;
1464 int npeel = 0;
1466 if (known_alignment_for_access_p (dr0))
1468 /* Since it's known at compile time, compute the number of iterations
1469 in the peeled loop (the peeling factor) for use in updating
1470 DR_MISALIGNMENT values. The peeling factor is the vectorization
1471 factor minus the misalignment as an element count. */
1472 mis = DR_MISALIGNMENT (dr0);
1473 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1474 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1476 /* For interleaved data access every iteration accesses all the
1477 members of the group, therefore we divide the number of iterations
1478 by the group size. */
1479 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1480 if (DR_GROUP_FIRST_DR (stmt_info))
1481 npeel /= DR_GROUP_SIZE (stmt_info);
1483 if (vect_print_dump_info (REPORT_DETAILS))
1484 fprintf (vect_dump, "Try peeling by %d", npeel);
1487 /* Ensure that all data refs can be vectorized after the peel. */
1488 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1490 int save_misalignment;
1492 if (dr == dr0)
1493 continue;
1495 stmt = DR_STMT (dr);
1496 stmt_info = vinfo_for_stmt (stmt);
1497 /* For interleaving, only the alignment of the first access
1498 matters. */
1499 if (DR_GROUP_FIRST_DR (stmt_info)
1500 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1501 continue;
1503 save_misalignment = DR_MISALIGNMENT (dr);
1504 vect_update_misalignment_for_peel (dr, dr0, npeel);
1505 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1506 DR_MISALIGNMENT (dr) = save_misalignment;
1508 if (!supportable_dr_alignment)
1510 do_peeling = false;
1511 break;
1515 if (do_peeling)
1517 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1518 If the misalignment of DR_i is identical to that of dr0 then set
1519 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1520 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1521 by the peeling factor times the element size of DR_i (MOD the
1522 vectorization factor times the size). Otherwise, the
1523 misalignment of DR_i must be set to unknown. */
1524 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1525 if (dr != dr0)
1526 vect_update_misalignment_for_peel (dr, dr0, npeel);
1528 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1529 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1530 DR_MISALIGNMENT (dr0) = 0;
1531 if (vect_print_dump_info (REPORT_ALIGNMENT))
1532 fprintf (vect_dump, "Alignment of access forced using peeling.");
1534 if (vect_print_dump_info (REPORT_DETAILS))
1535 fprintf (vect_dump, "Peeling for alignment will be applied.");
1537 stat = vect_verify_datarefs_alignment (loop_vinfo);
1538 gcc_assert (stat);
1539 return stat;
1544 /* (2) Versioning to force alignment. */
1546 /* Try versioning if:
1547 1) flag_tree_vect_loop_version is TRUE
1548 2) optimize_size is FALSE
1549 3) there is at least one unsupported misaligned data ref with an unknown
1550 misalignment, and
1551 4) all misaligned data refs with a known misalignment are supported, and
1552 5) the number of runtime alignment checks is within reason. */
1554 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1556 if (do_versioning)
1558 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1560 stmt = DR_STMT (dr);
1561 stmt_info = vinfo_for_stmt (stmt);
1563 /* For interleaving, only the alignment of the first access
1564 matters. */
1565 if (aligned_access_p (dr)
1566 || (DR_GROUP_FIRST_DR (stmt_info)
1567 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1568 continue;
1570 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1572 if (!supportable_dr_alignment)
1574 tree stmt;
1575 int mask;
1576 tree vectype;
1578 if (known_alignment_for_access_p (dr)
1579 || VEC_length (tree,
1580 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1581 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1583 do_versioning = false;
1584 break;
1587 stmt = DR_STMT (dr);
1588 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1589 gcc_assert (vectype);
1591 /* The rightmost bits of an aligned address must be zeros.
1592 Construct the mask needed for this test. For example,
1593 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1594 mask must be 15 = 0xf. */
1595 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1597 /* FORNOW: use the same mask to test all potentially unaligned
1598 references in the loop. The vectorizer currently supports
1599 a single vector size, see the reference to
1600 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1601 vectorization factor is computed. */
1602 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1603 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1604 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1605 VEC_safe_push (tree, heap,
1606 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1607 DR_STMT (dr));
1611 /* Versioning requires at least one misaligned data reference. */
1612 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1613 do_versioning = false;
1614 else if (!do_versioning)
1615 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1618 if (do_versioning)
1620 VEC(tree,heap) *may_misalign_stmts
1621 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1622 tree stmt;
1624 /* It can now be assumed that the data references in the statements
1625 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1626 of the loop being vectorized. */
1627 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1629 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1630 dr = STMT_VINFO_DATA_REF (stmt_info);
1631 DR_MISALIGNMENT (dr) = 0;
1632 if (vect_print_dump_info (REPORT_ALIGNMENT))
1633 fprintf (vect_dump, "Alignment of access forced using versioning.");
1636 if (vect_print_dump_info (REPORT_DETAILS))
1637 fprintf (vect_dump, "Versioning for alignment will be applied.");
1639 /* Peeling and versioning can't be done together at this time. */
1640 gcc_assert (! (do_peeling && do_versioning));
1642 stat = vect_verify_datarefs_alignment (loop_vinfo);
1643 gcc_assert (stat);
1644 return stat;
1647 /* This point is reached if neither peeling nor versioning is being done. */
1648 gcc_assert (! (do_peeling || do_versioning));
1650 stat = vect_verify_datarefs_alignment (loop_vinfo);
1651 return stat;
1655 /* Function vect_analyze_data_refs_alignment
1657 Analyze the alignment of the data-references in the loop.
1658 Return FALSE if a data reference is found that cannot be vectorized. */
1660 static bool
1661 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1663 if (vect_print_dump_info (REPORT_DETAILS))
1664 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1666 if (!vect_compute_data_refs_alignment (loop_vinfo))
1668 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1669 fprintf (vect_dump,
1670 "not vectorized: can't calculate alignment for data ref.");
1671 return false;
1674 return true;
1678 /* Function vect_analyze_data_ref_access.
1680 Analyze the access pattern of the data-reference DR. For now, a data access
1681 has to be consecutive to be considered vectorizable. */
1683 static bool
1684 vect_analyze_data_ref_access (struct data_reference *dr)
1686 tree step = DR_STEP (dr);
1687 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1688 tree scalar_type = TREE_TYPE (DR_REF (dr));
1689 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1690 tree stmt = DR_STMT (dr);
1691 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1692 interleaving group (including gaps). */
1693 HOST_WIDE_INT stride = dr_step / type_size;
1695 if (!step)
1697 if (vect_print_dump_info (REPORT_DETAILS))
1698 fprintf (vect_dump, "bad data-ref access");
1699 return false;
1702 /* Consecutive? */
1703 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1705 /* Mark that it is not interleaving. */
1706 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
1707 return true;
1710 /* Not consecutive access is possible only if it is a part of interleaving. */
1711 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1713 /* Check if it this DR is a part of interleaving, and is a single
1714 element of the group that is accessed in the loop. */
1716 /* Gaps are supported only for loads. STEP must be a multiple of the type
1717 size. The size of the group must be a power of 2. */
1718 if (DR_IS_READ (dr)
1719 && (dr_step % type_size) == 0
1720 && stride > 0
1721 && exact_log2 (stride) != -1)
1723 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1724 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1725 if (vect_print_dump_info (REPORT_DR_DETAILS))
1727 fprintf (vect_dump, "Detected single element interleaving %d ",
1728 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
1729 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1730 fprintf (vect_dump, " step ");
1731 print_generic_expr (vect_dump, step, TDF_SLIM);
1733 return true;
1735 if (vect_print_dump_info (REPORT_DETAILS))
1736 fprintf (vect_dump, "not consecutive access");
1737 return false;
1740 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1742 /* First stmt in the interleaving chain. Check the chain. */
1743 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1744 struct data_reference *data_ref = dr;
1745 unsigned int count = 1;
1746 tree next_step;
1747 tree prev_init = DR_INIT (data_ref);
1748 tree prev = stmt;
1749 HOST_WIDE_INT diff, count_in_bytes;
1751 while (next)
1753 /* Skip same data-refs. In case that two or more stmts share data-ref
1754 (supported only for loads), we vectorize only the first stmt, and
1755 the rest get their vectorized loads from the the first one. */
1756 if (!tree_int_cst_compare (DR_INIT (data_ref),
1757 DR_INIT (STMT_VINFO_DATA_REF (
1758 vinfo_for_stmt (next)))))
1760 if (!DR_IS_READ (data_ref))
1762 if (vect_print_dump_info (REPORT_DETAILS))
1763 fprintf (vect_dump, "Two store stmts share the same dr.");
1764 return false;
1767 /* Check that there is no load-store dependencies for this loads
1768 to prevent a case of load-store-load to the same location. */
1769 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1770 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1772 if (vect_print_dump_info (REPORT_DETAILS))
1773 fprintf (vect_dump,
1774 "READ_WRITE dependence in interleaving.");
1775 return false;
1778 /* For load use the same data-ref load. */
1779 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1781 prev = next;
1782 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1783 continue;
1785 prev = next;
1787 /* Check that all the accesses have the same STEP. */
1788 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1789 if (tree_int_cst_compare (step, next_step))
1791 if (vect_print_dump_info (REPORT_DETAILS))
1792 fprintf (vect_dump, "not consecutive access in interleaving");
1793 return false;
1796 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1797 /* Check that the distance between two accesses is equal to the type
1798 size. Otherwise, we have gaps. */
1799 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1800 - TREE_INT_CST_LOW (prev_init)) / type_size;
1801 if (!DR_IS_READ (data_ref) && diff != 1)
1803 if (vect_print_dump_info (REPORT_DETAILS))
1804 fprintf (vect_dump, "interleaved store with gaps");
1805 return false;
1807 /* Store the gap from the previous member of the group. If there is no
1808 gap in the access, DR_GROUP_GAP is always 1. */
1809 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1811 prev_init = DR_INIT (data_ref);
1812 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1813 /* Count the number of data-refs in the chain. */
1814 count++;
1817 /* COUNT is the number of accesses found, we multiply it by the size of
1818 the type to get COUNT_IN_BYTES. */
1819 count_in_bytes = type_size * count;
1821 /* Check that the size of the interleaving is not greater than STEP. */
1822 if (dr_step < count_in_bytes)
1824 if (vect_print_dump_info (REPORT_DETAILS))
1826 fprintf (vect_dump, "interleaving size is greater than step for ");
1827 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1829 return false;
1832 /* Check that the size of the interleaving is equal to STEP for stores,
1833 i.e., that there are no gaps. */
1834 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
1836 if (vect_print_dump_info (REPORT_DETAILS))
1837 fprintf (vect_dump, "interleaved store with gaps");
1838 return false;
1841 /* Check that STEP is a multiple of type size. */
1842 if ((dr_step % type_size) != 0)
1844 if (vect_print_dump_info (REPORT_DETAILS))
1846 fprintf (vect_dump, "step is not a multiple of type size: step ");
1847 print_generic_expr (vect_dump, step, TDF_SLIM);
1848 fprintf (vect_dump, " size ");
1849 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1850 TDF_SLIM);
1852 return false;
1855 /* FORNOW: we handle only interleaving that is a power of 2. */
1856 if (exact_log2 (stride) == -1)
1858 if (vect_print_dump_info (REPORT_DETAILS))
1859 fprintf (vect_dump, "interleaving is not a power of 2");
1860 return false;
1862 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1864 return true;
1868 /* Function vect_analyze_data_ref_accesses.
1870 Analyze the access pattern of all the data references in the loop.
1872 FORNOW: the only access pattern that is considered vectorizable is a
1873 simple step 1 (consecutive) access.
1875 FORNOW: handle only arrays and pointer accesses. */
1877 static bool
1878 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1880 unsigned int i;
1881 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1882 struct data_reference *dr;
1884 if (vect_print_dump_info (REPORT_DETAILS))
1885 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1887 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1888 if (!vect_analyze_data_ref_access (dr))
1890 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1891 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1892 return false;
1895 return true;
1899 /* Function vect_analyze_data_refs.
1901 Find all the data references in the loop.
1903 The general structure of the analysis of data refs in the vectorizer is as
1904 follows:
1905 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1906 find and analyze all data-refs in the loop and their dependences.
1907 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1908 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1909 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1913 static bool
1914 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1916 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1917 unsigned int i;
1918 VEC (data_reference_p, heap) *datarefs;
1919 struct data_reference *dr;
1920 tree scalar_type;
1922 if (vect_print_dump_info (REPORT_DETAILS))
1923 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1925 compute_data_dependences_for_loop (loop, true,
1926 &LOOP_VINFO_DATAREFS (loop_vinfo),
1927 &LOOP_VINFO_DDRS (loop_vinfo));
1929 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1930 from stmt_vec_info struct to DR and vectype. */
1931 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1933 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1935 tree stmt;
1936 stmt_vec_info stmt_info;
1938 if (!dr || !DR_REF (dr))
1940 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1941 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1942 return false;
1945 /* Update DR field in stmt_vec_info struct. */
1946 stmt = DR_STMT (dr);
1947 stmt_info = vinfo_for_stmt (stmt);
1949 if (STMT_VINFO_DATA_REF (stmt_info))
1951 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1953 fprintf (vect_dump,
1954 "not vectorized: more than one data ref in stmt: ");
1955 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1957 return false;
1959 STMT_VINFO_DATA_REF (stmt_info) = dr;
1961 /* Check that analysis of the data-ref succeeded. */
1962 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1963 || !DR_STEP (dr))
1965 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1967 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1968 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1970 return false;
1972 if (!DR_MEMTAG (dr))
1974 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1976 fprintf (vect_dump, "not vectorized: no memory tag for ");
1977 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1979 return false;
1982 /* Set vectype for STMT. */
1983 scalar_type = TREE_TYPE (DR_REF (dr));
1984 STMT_VINFO_VECTYPE (stmt_info) =
1985 get_vectype_for_scalar_type (scalar_type);
1986 if (!STMT_VINFO_VECTYPE (stmt_info))
1988 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1990 fprintf (vect_dump,
1991 "not vectorized: no vectype for stmt: ");
1992 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1993 fprintf (vect_dump, " scalar_type: ");
1994 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1996 return false;
2000 return true;
2004 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2006 /* Function vect_mark_relevant.
2008 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2010 static void
2011 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2012 enum vect_relevant relevant, bool live_p)
2014 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2015 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2016 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2018 if (vect_print_dump_info (REPORT_DETAILS))
2019 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
2021 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2023 tree pattern_stmt;
2025 /* This is the last stmt in a sequence that was detected as a
2026 pattern that can potentially be vectorized. Don't mark the stmt
2027 as relevant/live because it's not going to vectorized.
2028 Instead mark the pattern-stmt that replaces it. */
2029 if (vect_print_dump_info (REPORT_DETAILS))
2030 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
2031 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2032 stmt_info = vinfo_for_stmt (pattern_stmt);
2033 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
2034 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2035 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2036 stmt = pattern_stmt;
2039 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2040 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
2041 STMT_VINFO_RELEVANT (stmt_info) = relevant;
2043 if (TREE_CODE (stmt) == PHI_NODE)
2044 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
2045 or live will fail vectorization later on. */
2046 return;
2048 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
2049 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2051 if (vect_print_dump_info (REPORT_DETAILS))
2052 fprintf (vect_dump, "already marked relevant/live.");
2053 return;
2056 VEC_safe_push (tree, heap, *worklist, stmt);
2060 /* Function vect_stmt_relevant_p.
2062 Return true if STMT in loop that is represented by LOOP_VINFO is
2063 "relevant for vectorization".
2065 A stmt is considered "relevant for vectorization" if:
2066 - it has uses outside the loop.
2067 - it has vdefs (it alters memory).
2068 - control stmts in the loop (except for the exit condition).
2070 CHECKME: what other side effects would the vectorizer allow? */
2072 static bool
2073 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2074 enum vect_relevant *relevant, bool *live_p)
2076 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2077 ssa_op_iter op_iter;
2078 imm_use_iterator imm_iter;
2079 use_operand_p use_p;
2080 def_operand_p def_p;
2082 *relevant = vect_unused_in_loop;
2083 *live_p = false;
2085 /* cond stmt other than loop exit cond. */
2086 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2087 *relevant = vect_used_in_loop;
2089 /* changing memory. */
2090 if (TREE_CODE (stmt) != PHI_NODE)
2091 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2093 if (vect_print_dump_info (REPORT_DETAILS))
2094 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2095 *relevant = vect_used_in_loop;
2098 /* uses outside the loop. */
2099 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2101 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2103 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2104 if (!flow_bb_inside_loop_p (loop, bb))
2106 if (vect_print_dump_info (REPORT_DETAILS))
2107 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2109 /* We expect all such uses to be in the loop exit phis
2110 (because of loop closed form) */
2111 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2112 gcc_assert (bb == single_exit (loop)->dest);
2114 *live_p = true;
2119 return (*live_p || *relevant);
2123 /* Function vect_mark_stmts_to_be_vectorized.
2125 Not all stmts in the loop need to be vectorized. For example:
2127 for i...
2128 for j...
2129 1. T0 = i + j
2130 2. T1 = a[T0]
2132 3. j = j + 1
2134 Stmt 1 and 3 do not need to be vectorized, because loop control and
2135 addressing of vectorized data-refs are handled differently.
2137 This pass detects such stmts. */
2139 static bool
2140 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2142 VEC(tree,heap) *worklist;
2143 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2144 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2145 unsigned int nbbs = loop->num_nodes;
2146 block_stmt_iterator si;
2147 tree stmt, use;
2148 stmt_ann_t ann;
2149 ssa_op_iter iter;
2150 unsigned int i;
2151 stmt_vec_info stmt_vinfo;
2152 basic_block bb;
2153 tree phi;
2154 bool live_p;
2155 enum vect_relevant relevant;
2156 tree def, def_stmt;
2157 enum vect_def_type dt;
2159 if (vect_print_dump_info (REPORT_DETAILS))
2160 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2162 worklist = VEC_alloc (tree, heap, 64);
2164 /* 1. Init worklist. */
2166 bb = loop->header;
2167 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2169 if (vect_print_dump_info (REPORT_DETAILS))
2171 fprintf (vect_dump, "init: phi relevant? ");
2172 print_generic_expr (vect_dump, phi, TDF_SLIM);
2175 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
2176 vect_mark_relevant (&worklist, phi, relevant, live_p);
2179 for (i = 0; i < nbbs; i++)
2181 bb = bbs[i];
2182 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2184 stmt = bsi_stmt (si);
2186 if (vect_print_dump_info (REPORT_DETAILS))
2188 fprintf (vect_dump, "init: stmt relevant? ");
2189 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2192 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
2193 vect_mark_relevant (&worklist, stmt, relevant, live_p);
2198 /* 2. Process_worklist */
2200 while (VEC_length (tree, worklist) > 0)
2202 stmt = VEC_pop (tree, worklist);
2204 if (vect_print_dump_info (REPORT_DETAILS))
2206 fprintf (vect_dump, "worklist: examine stmt: ");
2207 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2210 /* Examine the USEs of STMT. For each ssa-name USE that is defined
2211 in the loop, mark the stmt that defines it (DEF_STMT) as
2212 relevant/irrelevant and live/dead according to the liveness and
2213 relevance properties of STMT.
2216 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2218 ann = stmt_ann (stmt);
2219 stmt_vinfo = vinfo_for_stmt (stmt);
2221 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
2222 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2224 /* Generally, the liveness and relevance properties of STMT are
2225 propagated to the DEF_STMTs of its USEs:
2226 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2227 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2229 Exceptions:
2231 (case 1)
2232 If USE is used only for address computations (e.g. array indexing),
2233 which does not need to be directly vectorized, then the
2234 liveness/relevance of the respective DEF_STMT is left unchanged.
2236 (case 2)
2237 If STMT has been identified as defining a reduction variable, then
2238 we have two cases:
2239 (case 2.1)
2240 The last use of STMT is the reduction-variable, which is defined
2241 by a loop-header-phi. We don't want to mark the phi as live or
2242 relevant (because it does not need to be vectorized, it is handled
2243 as part of the vectorization of the reduction), so in this case we
2244 skip the call to vect_mark_relevant.
2245 (case 2.2)
2246 The rest of the uses of STMT are defined in the loop body. For
2247 the def_stmt of these uses we want to set liveness/relevance
2248 as follows:
2249 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
2250 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- vect_used_by_reduction
2251 because even though STMT is classified as live (since it defines a
2252 value that is used across loop iterations) and irrelevant (since it
2253 is not used inside the loop), it will be vectorized, and therefore
2254 the corresponding DEF_STMTs need to marked as relevant.
2255 We distinguish between two kinds of relevant stmts - those that are
2256 used by a reduction computation, and those that are (also) used by
2257 a regular computation. This allows us later on to identify stmts
2258 that are used solely by a reduction, and therefore the order of
2259 the results that they produce does not have to be kept.
2262 /* case 2.2: */
2263 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2265 gcc_assert (relevant == vect_unused_in_loop && live_p);
2266 relevant = vect_used_by_reduction;
2267 live_p = false;
2270 i = 0;
2271 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2273 if (vect_print_dump_info (REPORT_DETAILS))
2275 fprintf (vect_dump, "worklist: examine use %d: ", i++);
2276 print_generic_expr (vect_dump, use, TDF_SLIM);
2279 /* case 1: we are only interested in uses that need to be vectorized.
2280 Uses that are used for address computation are not considered
2281 relevant.
2283 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2284 continue;
2286 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2288 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2289 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2290 VEC_free (tree, heap, worklist);
2291 return false;
2294 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2295 continue;
2297 bb = bb_for_stmt (def_stmt);
2298 if (!flow_bb_inside_loop_p (loop, bb))
2299 continue;
2301 /* case 2.1: the reduction-use does not mark the defining-phi
2302 as relevant. */
2303 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2304 && TREE_CODE (def_stmt) == PHI_NODE)
2305 continue;
2307 if (dt == vect_induction_def && TREE_CODE (def_stmt) == PHI_NODE)
2308 continue;
2310 vect_mark_relevant (&worklist, def_stmt, relevant, live_p);
2312 } /* while worklist */
2314 VEC_free (tree, heap, worklist);
2315 return true;
2319 /* Function vect_can_advance_ivs_p
2321 In case the number of iterations that LOOP iterates is unknown at compile
2322 time, an epilog loop will be generated, and the loop induction variables
2323 (IVs) will be "advanced" to the value they are supposed to take just before
2324 the epilog loop. Here we check that the access function of the loop IVs
2325 and the expression that represents the loop bound are simple enough.
2326 These restrictions will be relaxed in the future. */
2328 static bool
2329 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2331 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2332 basic_block bb = loop->header;
2333 tree phi;
2335 /* Analyze phi functions of the loop header. */
2337 if (vect_print_dump_info (REPORT_DETAILS))
2338 fprintf (vect_dump, "vect_can_advance_ivs_p:");
2340 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2342 tree access_fn = NULL;
2343 tree evolution_part;
2345 if (vect_print_dump_info (REPORT_DETAILS))
2347 fprintf (vect_dump, "Analyze phi: ");
2348 print_generic_expr (vect_dump, phi, TDF_SLIM);
2351 /* Skip virtual phi's. The data dependences that are associated with
2352 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2354 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2356 if (vect_print_dump_info (REPORT_DETAILS))
2357 fprintf (vect_dump, "virtual phi. skip.");
2358 continue;
2361 /* Skip reduction phis. */
2363 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2365 if (vect_print_dump_info (REPORT_DETAILS))
2366 fprintf (vect_dump, "reduc phi. skip.");
2367 continue;
2370 /* Analyze the evolution function. */
2372 access_fn = instantiate_parameters
2373 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2375 if (!access_fn)
2377 if (vect_print_dump_info (REPORT_DETAILS))
2378 fprintf (vect_dump, "No Access function.");
2379 return false;
2382 if (vect_print_dump_info (REPORT_DETAILS))
2384 fprintf (vect_dump, "Access function of PHI: ");
2385 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2388 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2390 if (evolution_part == NULL_TREE)
2392 if (vect_print_dump_info (REPORT_DETAILS))
2393 fprintf (vect_dump, "No evolution.");
2394 return false;
2397 /* FORNOW: We do not transform initial conditions of IVs
2398 which evolution functions are a polynomial of degree >= 2. */
2400 if (tree_is_chrec (evolution_part))
2401 return false;
2404 return true;
2408 /* Function vect_get_loop_niters.
2410 Determine how many iterations the loop is executed.
2411 If an expression that represents the number of iterations
2412 can be constructed, place it in NUMBER_OF_ITERATIONS.
2413 Return the loop exit condition. */
2415 static tree
2416 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2418 tree niters;
2420 if (vect_print_dump_info (REPORT_DETAILS))
2421 fprintf (vect_dump, "=== get_loop_niters ===");
2423 niters = number_of_exit_cond_executions (loop);
2425 if (niters != NULL_TREE
2426 && niters != chrec_dont_know)
2428 *number_of_iterations = niters;
2430 if (vect_print_dump_info (REPORT_DETAILS))
2432 fprintf (vect_dump, "==> get_loop_niters:" );
2433 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2437 return get_loop_exit_condition (loop);
2441 /* Function vect_analyze_loop_form.
2443 Verify the following restrictions (some may be relaxed in the future):
2444 - it's an inner-most loop
2445 - number of BBs = 2 (which are the loop header and the latch)
2446 - the loop has a pre-header
2447 - the loop has a single entry and exit
2448 - the loop exit condition is simple enough, and the number of iterations
2449 can be analyzed (a countable loop). */
2451 static loop_vec_info
2452 vect_analyze_loop_form (struct loop *loop)
2454 loop_vec_info loop_vinfo;
2455 tree loop_cond;
2456 tree number_of_iterations = NULL;
2458 if (vect_print_dump_info (REPORT_DETAILS))
2459 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2461 if (loop->inner)
2463 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
2464 fprintf (vect_dump, "not vectorized: nested loop.");
2465 return NULL;
2468 if (!single_exit (loop)
2469 || loop->num_nodes != 2
2470 || EDGE_COUNT (loop->header->preds) != 2)
2472 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2474 if (!single_exit (loop))
2475 fprintf (vect_dump, "not vectorized: multiple exits.");
2476 else if (loop->num_nodes != 2)
2477 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2478 else if (EDGE_COUNT (loop->header->preds) != 2)
2479 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2482 return NULL;
2485 /* We assume that the loop exit condition is at the end of the loop. i.e,
2486 that the loop is represented as a do-while (with a proper if-guard
2487 before the loop if needed), where the loop header contains all the
2488 executable statements, and the latch is empty. */
2489 if (!empty_block_p (loop->latch)
2490 || phi_nodes (loop->latch))
2492 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2493 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2494 return NULL;
2497 /* Make sure there exists a single-predecessor exit bb: */
2498 if (!single_pred_p (single_exit (loop)->dest))
2500 edge e = single_exit (loop);
2501 if (!(e->flags & EDGE_ABNORMAL))
2503 split_loop_exit_edge (e);
2504 if (vect_print_dump_info (REPORT_DETAILS))
2505 fprintf (vect_dump, "split exit edge.");
2507 else
2509 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2510 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2511 return NULL;
2515 if (empty_block_p (loop->header))
2517 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2518 fprintf (vect_dump, "not vectorized: empty loop.");
2519 return NULL;
2522 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2523 if (!loop_cond)
2525 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2526 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2527 return NULL;
2530 if (!number_of_iterations)
2532 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2533 fprintf (vect_dump,
2534 "not vectorized: number of iterations cannot be computed.");
2535 return NULL;
2538 if (chrec_contains_undetermined (number_of_iterations))
2540 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2541 fprintf (vect_dump, "Infinite number of iterations.");
2542 return false;
2545 loop_vinfo = new_loop_vec_info (loop);
2546 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2548 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2550 if (vect_print_dump_info (REPORT_DETAILS))
2552 fprintf (vect_dump, "Symbolic number of iterations is ");
2553 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2556 else
2557 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2559 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2560 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2561 return NULL;
2564 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2566 return loop_vinfo;
2570 /* Function vect_analyze_loop.
2572 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2573 for it. The different analyses will record information in the
2574 loop_vec_info struct. */
2575 loop_vec_info
2576 vect_analyze_loop (struct loop *loop)
2578 bool ok;
2579 loop_vec_info loop_vinfo;
2581 if (vect_print_dump_info (REPORT_DETAILS))
2582 fprintf (vect_dump, "===== analyze_loop_nest =====");
2584 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2586 loop_vinfo = vect_analyze_loop_form (loop);
2587 if (!loop_vinfo)
2589 if (vect_print_dump_info (REPORT_DETAILS))
2590 fprintf (vect_dump, "bad loop form.");
2591 return NULL;
2594 /* Find all data references in the loop (which correspond to vdefs/vuses)
2595 and analyze their evolution in the loop.
2597 FORNOW: Handle only simple, array references, which
2598 alignment can be forced, and aligned pointer-references. */
2600 ok = vect_analyze_data_refs (loop_vinfo);
2601 if (!ok)
2603 if (vect_print_dump_info (REPORT_DETAILS))
2604 fprintf (vect_dump, "bad data references.");
2605 destroy_loop_vec_info (loop_vinfo);
2606 return NULL;
2609 /* Classify all cross-iteration scalar data-flow cycles.
2610 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2612 vect_analyze_scalar_cycles (loop_vinfo);
2614 vect_pattern_recog (loop_vinfo);
2616 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2618 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2619 if (!ok)
2621 if (vect_print_dump_info (REPORT_DETAILS))
2622 fprintf (vect_dump, "unexpected pattern.");
2623 destroy_loop_vec_info (loop_vinfo);
2624 return NULL;
2627 /* Analyze the alignment of the data-refs in the loop.
2628 Fail if a data reference is found that cannot be vectorized. */
2630 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2631 if (!ok)
2633 if (vect_print_dump_info (REPORT_DETAILS))
2634 fprintf (vect_dump, "bad data alignment.");
2635 destroy_loop_vec_info (loop_vinfo);
2636 return NULL;
2639 ok = vect_determine_vectorization_factor (loop_vinfo);
2640 if (!ok)
2642 if (vect_print_dump_info (REPORT_DETAILS))
2643 fprintf (vect_dump, "can't determine vectorization factor.");
2644 destroy_loop_vec_info (loop_vinfo);
2645 return NULL;
2648 /* Analyze data dependences between the data-refs in the loop.
2649 FORNOW: fail at the first data dependence that we encounter. */
2651 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2652 if (!ok)
2654 if (vect_print_dump_info (REPORT_DETAILS))
2655 fprintf (vect_dump, "bad data dependence.");
2656 destroy_loop_vec_info (loop_vinfo);
2657 return NULL;
2660 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2661 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2663 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2664 if (!ok)
2666 if (vect_print_dump_info (REPORT_DETAILS))
2667 fprintf (vect_dump, "bad data access.");
2668 destroy_loop_vec_info (loop_vinfo);
2669 return NULL;
2672 /* This pass will decide on using loop versioning and/or loop peeling in
2673 order to enhance the alignment of data references in the loop. */
2675 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2676 if (!ok)
2678 if (vect_print_dump_info (REPORT_DETAILS))
2679 fprintf (vect_dump, "bad data alignment.");
2680 destroy_loop_vec_info (loop_vinfo);
2681 return NULL;
2684 /* Scan all the operations in the loop and make sure they are
2685 vectorizable. */
2687 ok = vect_analyze_operations (loop_vinfo);
2688 if (!ok)
2690 if (vect_print_dump_info (REPORT_DETAILS))
2691 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2692 destroy_loop_vec_info (loop_vinfo);
2693 return NULL;
2696 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2698 return loop_vinfo;