Remove old autovect-branch by moving to "dead" directory.
[official-gcc.git] / old-autovect-branch / gcc / tree-vect-analyze.c
blobe3ad83df4d06a9126268036901a532b39162f226
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "params.h"
38 #include "tree-chrec.h"
39 #include "tree-data-ref.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
42 #include "recog.h"
43 #include "toplev.h"
45 /* Main analysis functions. */
46 static loop_vec_info vect_analyze_loop_form (struct loop *);
47 static bool vect_analyze_data_refs (loop_vec_info);
48 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
49 static void vect_analyze_scalar_cycles (loop_vec_info);
50 static bool vect_analyze_data_ref_accesses (loop_vec_info);
51 static bool vect_analyze_data_ref_dependences (loop_vec_info);
52 static bool vect_compute_data_refs_alignment (loop_vec_info);
53 static bool vect_enhance_data_refs_alignment (loop_vec_info);
54 static bool vect_analyze_operations (loop_vec_info);
55 static bool vect_determine_vectorization_factor (loop_vec_info);
57 /* Utility functions for the analyses. */
58 static bool exist_non_indexing_operands_for_use_p (tree, tree);
59 static void vect_mark_relevant
60 (VEC(tree,heap) **, tree, enum vect_relevant, bool);
61 static bool vect_stmt_relevant_p
62 (tree, loop_vec_info, enum vect_relevant *, bool *);
63 static tree vect_get_loop_niters (struct loop *, tree *);
64 static bool vect_analyze_data_ref_dependence
65 (struct data_dependence_relation *, loop_vec_info, bool);
66 static bool vect_compute_data_ref_alignment (struct data_reference *);
67 static bool vect_analyze_data_ref_access (struct data_reference *);
68 static bool vect_can_advance_ivs_p (loop_vec_info);
69 static void vect_update_misalignment_for_peel
70 (struct data_reference *, struct data_reference *, int npeel);
71 static void vect_check_interleaving
72 (struct data_reference *, struct data_reference *);
73 static void vect_update_interleaving_chain
74 (struct data_reference *, struct data_reference *);
75 static bool vect_equal_offsets (tree, tree);
76 static bool vect_check_dependences (struct data_dependence_relation *);
78 /* Function vect_determine_vectorization_factor
80 Determine the vectorization factor (VF). VF is the number of data elements
81 that are operated upon in parallel in a single iteration of the vectorized
82 loop. For example, when vectorizing a loop that operates on 4byte elements,
83 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
84 elements can fit in a single vector register.
86 We currently support vectorization of loops in which all types operated upon
87 are of the same size. Therefore this function currently sets VF according to
88 the size of the types operated upon, and fails if there are multiple sizes
89 in the loop.
91 VF is also the factor by which the loop iterations are strip-mined, e.g.:
92 original loop:
93 for (i=0; i<N; i++){
94 a[i] = b[i] + c[i];
97 vectorized loop:
98 for (i=0; i<N; i+=VF){
99 a[i:VF] = b[i:VF] + c[i:VF];
103 static bool
104 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
106 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
107 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
108 int nbbs = loop->num_nodes;
109 block_stmt_iterator si;
110 unsigned int vectorization_factor = 0;
111 int i;
112 tree scalar_type;
114 if (vect_print_dump_info (REPORT_DETAILS))
115 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
117 for (i = 0; i < nbbs; i++)
119 basic_block bb = bbs[i];
121 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
123 tree stmt = bsi_stmt (si);
124 unsigned int nunits;
125 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
126 tree vectype;
128 if (vect_print_dump_info (REPORT_DETAILS))
130 fprintf (vect_dump, "==> examining statement: ");
131 print_generic_expr (vect_dump, stmt, TDF_SLIM);
134 gcc_assert (stmt_info);
135 /* skip stmts which do not need to be vectorized. */
136 if (!STMT_VINFO_RELEVANT_P (stmt_info)
137 && !STMT_VINFO_LIVE_P (stmt_info))
139 if (vect_print_dump_info (REPORT_DETAILS))
140 fprintf (vect_dump, "skip.");
141 continue;
144 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
146 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
148 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
149 print_generic_expr (vect_dump, stmt, TDF_SLIM);
151 return false;
154 if (STMT_VINFO_VECTYPE (stmt_info))
156 vectype = STMT_VINFO_VECTYPE (stmt_info);
157 scalar_type = TREE_TYPE (vectype);
159 else
161 if (STMT_VINFO_DATA_REF (stmt_info))
162 scalar_type =
163 TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
164 else if (TREE_CODE (stmt) == MODIFY_EXPR)
165 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
166 else
167 scalar_type = TREE_TYPE (stmt);
169 if (vect_print_dump_info (REPORT_DETAILS))
171 fprintf (vect_dump, "get vectype for scalar type: ");
172 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
175 vectype = get_vectype_for_scalar_type (scalar_type);
176 if (!vectype)
178 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
180 fprintf (vect_dump,
181 "not vectorized: unsupported data-type ");
182 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
184 return false;
186 STMT_VINFO_VECTYPE (stmt_info) = vectype;
189 if (vect_print_dump_info (REPORT_DETAILS))
191 fprintf (vect_dump, "vectype: ");
192 print_generic_expr (vect_dump, vectype, TDF_SLIM);
195 nunits = TYPE_VECTOR_SUBPARTS (vectype);
196 if (vect_print_dump_info (REPORT_DETAILS))
197 fprintf (vect_dump, "nunits = %d", nunits);
199 if (!vectorization_factor
200 || (nunits > vectorization_factor))
201 vectorization_factor = nunits;
202 #if 0
203 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
204 * vectorization_factor == UNITS_PER_SIMD_WORD);
205 #endif
209 /* TODO: Analyze cost. Decide if worth while to vectorize. */
211 if (vectorization_factor <= 1)
213 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
214 fprintf (vect_dump, "not vectorized: unsupported data-type");
215 return false;
217 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
219 return true;
223 /* Function vect_analyze_operations.
225 Scan the loop stmts and make sure they are all vectorizable. */
227 static bool
228 vect_analyze_operations (loop_vec_info loop_vinfo)
230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
231 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
232 int nbbs = loop->num_nodes;
233 block_stmt_iterator si;
234 unsigned int vectorization_factor = 0;
235 int i;
236 bool ok;
237 tree phi;
238 stmt_vec_info stmt_info;
239 bool need_to_vectorize = false;
241 if (vect_print_dump_info (REPORT_DETAILS))
242 fprintf (vect_dump, "=== vect_analyze_operations ===");
244 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
245 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
247 for (i = 0; i < nbbs; i++)
249 basic_block bb = bbs[i];
251 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
253 stmt_info = vinfo_for_stmt (phi);
254 if (vect_print_dump_info (REPORT_DETAILS))
256 fprintf (vect_dump, "examining phi: ");
257 print_generic_expr (vect_dump, phi, TDF_SLIM);
260 gcc_assert (stmt_info);
262 if (STMT_VINFO_LIVE_P (stmt_info)
263 && !vectorizable_live_operation (phi, NULL, NULL))
265 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
267 fprintf (vect_dump, "not vectorized: live phi not supported: ");
268 print_generic_expr (vect_dump, phi, TDF_SLIM);
270 return false;
273 if (STMT_VINFO_RELEVANT_P (stmt_info))
275 /* Most likely a reduction-like computation that is used
276 in the loop. */
277 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
278 fprintf (vect_dump, "not vectorized: unsupported pattern.");
279 return false;
283 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
285 tree stmt = bsi_stmt (si);
286 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
288 if (vect_print_dump_info (REPORT_DETAILS))
290 fprintf (vect_dump, "examining statement: ");
291 print_generic_expr (vect_dump, stmt, TDF_SLIM);
294 gcc_assert (stmt_info);
296 /* skip stmts which do not need to be vectorized.
297 this is expected to include:
298 - the COND_EXPR which is the loop exit condition
299 - any LABEL_EXPRs in the loop
300 - computations that are used only for array indexing or loop
301 control */
303 if (!STMT_VINFO_RELEVANT_P (stmt_info)
304 && !STMT_VINFO_LIVE_P (stmt_info))
306 if (vect_print_dump_info (REPORT_DETAILS))
307 fprintf (vect_dump, "irrelevant.");
308 continue;
311 if (STMT_VINFO_RELEVANT_P (stmt_info))
313 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
314 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
316 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
317 || vectorizable_type_demotion (stmt, NULL, NULL)
318 || vectorizable_operation (stmt, NULL, NULL)
319 || vectorizable_assignment (stmt, NULL, NULL)
320 || vectorizable_load (stmt, NULL, NULL)
321 || vectorizable_store (stmt, NULL, NULL)
322 || vectorizable_condition (stmt, NULL, NULL));
324 if (!ok)
326 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
328 fprintf (vect_dump,
329 "not vectorized: relevant stmt not supported: ");
330 print_generic_expr (vect_dump, stmt, TDF_SLIM);
332 return false;
334 need_to_vectorize = true;
336 if (STMT_VINFO_LIVE_P (stmt_info))
338 ok = vectorizable_reduction (stmt, NULL, NULL);
340 if (ok)
341 need_to_vectorize = true;
342 else
343 ok = vectorizable_live_operation (stmt, NULL, NULL);
345 if (!ok)
347 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
349 fprintf (vect_dump,
350 "not vectorized: live stmt not supported: ");
351 print_generic_expr (vect_dump, stmt, TDF_SLIM);
353 return false;
356 } /* stmts in bb */
357 } /* bbs */
359 /* TODO: Analyze cost. Decide if worth while to vectorize. */
361 /* All operations in the loop are either irrelevant (deal with loop
362 control, or dead), or only used outside the loop and can be moved
363 out of the loop (e.g. invariants, inductions). The loop can be
364 optimized away by scalar optimizations. We're better off not
365 touching this loop. */
366 if (!need_to_vectorize)
368 if (vect_print_dump_info (REPORT_DETAILS))
369 fprintf (vect_dump,
370 "All the computation can be taken out of the loop.");
371 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
372 fprintf (vect_dump,
373 "not vectorized: redundant loop. no profit to vectorize.");
374 return false;
377 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
378 && vect_print_dump_info (REPORT_DETAILS))
379 fprintf (vect_dump,
380 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
381 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
383 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
384 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
386 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
387 fprintf (vect_dump, "not vectorized: iteration count too small.");
388 return false;
391 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
392 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
393 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
395 if (vect_print_dump_info (REPORT_DETAILS))
396 fprintf (vect_dump, "epilog loop required.");
397 if (!vect_can_advance_ivs_p (loop_vinfo))
399 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
400 fprintf (vect_dump,
401 "not vectorized: can't create epilog loop 1.");
402 return false;
404 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
406 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
407 fprintf (vect_dump,
408 "not vectorized: can't create epilog loop 2.");
409 return false;
413 return true;
417 /* Function exist_non_indexing_operands_for_use_p
419 USE is one of the uses attached to STMT. Check if USE is
420 used in STMT for anything other than indexing an array. */
422 static bool
423 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
425 tree operand;
426 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
428 /* USE corresponds to some operand in STMT. If there is no data
429 reference in STMT, then any operand that corresponds to USE
430 is not indexing an array. */
431 if (!STMT_VINFO_DATA_REF (stmt_info))
432 return true;
434 /* STMT has a data_ref. FORNOW this means that its of one of
435 the following forms:
436 -1- ARRAY_REF = var
437 -2- var = ARRAY_REF
438 (This should have been verified in analyze_data_refs).
440 'var' in the second case corresponds to a def, not a use,
441 so USE cannot correspond to any operands that are not used
442 for array indexing.
444 Therefore, all we need to check is if STMT falls into the
445 first case, and whether var corresponds to USE. */
447 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
448 return false;
450 operand = TREE_OPERAND (stmt, 1);
452 if (TREE_CODE (operand) != SSA_NAME)
453 return false;
455 if (operand == use)
456 return true;
458 return false;
462 /* Function vect_analyze_scalar_cycles.
464 Examine the cross iteration def-use cycles of scalar variables, by
465 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
466 following: invariant, induction, reduction, unknown.
468 Some forms of scalar cycles are not yet supported.
470 Example1: reduction: (unsupported yet)
472 loop1:
473 for (i=0; i<N; i++)
474 sum += a[i];
476 Example2: induction: (unsupported yet)
478 loop2:
479 for (i=0; i<N; i++)
480 a[i] = i;
482 Note: the following loop *is* vectorizable:
484 loop3:
485 for (i=0; i<N; i++)
486 a[i] = b[i];
488 even though it has a def-use cycle caused by the induction variable i:
490 loop: i_2 = PHI (i_0, i_1)
491 a[i_2] = ...;
492 i_1 = i_2 + 1;
493 GOTO loop;
495 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
496 it does not need to be vectorized because it is only used for array
497 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
498 loop2 on the other hand is relevant (it is being written to memory).
501 static void
502 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
504 tree phi;
505 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
506 basic_block bb = loop->header;
507 tree dummy;
509 if (vect_print_dump_info (REPORT_DETAILS))
510 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
512 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
514 tree access_fn = NULL;
515 tree def = PHI_RESULT (phi);
516 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
517 tree reduc_stmt;
519 if (vect_print_dump_info (REPORT_DETAILS))
521 fprintf (vect_dump, "Analyze phi: ");
522 print_generic_expr (vect_dump, phi, TDF_SLIM);
525 /* Skip virtual phi's. The data dependences that are associated with
526 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
528 if (!is_gimple_reg (SSA_NAME_VAR (def)))
530 if (vect_print_dump_info (REPORT_DETAILS))
531 fprintf (vect_dump, "virtual phi. skip.");
532 continue;
535 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
537 /* Analyze the evolution function. */
539 access_fn = analyze_scalar_evolution (loop, def);
541 if (!access_fn)
542 continue;
544 if (vect_print_dump_info (REPORT_DETAILS))
546 fprintf (vect_dump, "Access function of PHI: ");
547 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
550 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
552 if (vect_print_dump_info (REPORT_DETAILS))
553 fprintf (vect_dump, "Detected induction.");
554 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
555 continue;
558 /* TODO: handle invariant phis */
560 reduc_stmt = vect_is_simple_reduction (loop, phi);
561 if (reduc_stmt)
563 if (vect_print_dump_info (REPORT_DETAILS))
564 fprintf (vect_dump, "Detected reduction.");
565 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
566 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
567 vect_reduction_def;
569 else
570 if (vect_print_dump_info (REPORT_DETAILS))
571 fprintf (vect_dump, "Unknown def-use cycle pattern.");
576 /* Function vect_insert_into_interleaving_chain.
578 Insert DRA into the interleaving chain of DRB according to DRA's INIT.
581 static void
582 vect_insert_into_interleaving_chain (struct data_reference *dra,
583 struct data_reference *drb)
585 tree prev, next, next_init;
586 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
587 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
589 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
590 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
591 while (next)
593 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
594 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
596 /* Insert here. */
597 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
598 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
599 return;
601 prev = next;
602 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
604 /* We got to the end of the list. Insert here. */
605 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
606 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
610 /* Function vect_update_interleaving_chain.
612 For two data-refs DRA and DRB that are a part of a chain interleaved data
613 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
615 There are four possible cases:
616 1. New stmts - both DRA and DRB are not a part of any chain:
617 FIRST_DR = DRB
618 NEXT_DR (DRB) = DRA
619 2. DRB is a part of a chain and DRA is not:
620 no need to update FIRST_DR
621 no need to insert DRB
622 insert DRA according to init
623 3. DRA is a part of a chain and DRB is not:
624 if (init of FIRST_DR > init of DRB)
625 FIRST_DR = DRB
626 NEXT(FIRST_DR) = previous FIRST_DR
627 else
628 insert DRB according to its init
629 4. both DRA and DRB are in some interleaving chains:
630 choose the chain with the smallest init of FIRST_DR
631 insert the nodes of the second chain into the first one
634 static void
635 vect_update_interleaving_chain (struct data_reference *drb,
636 struct data_reference *dra)
638 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
639 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
640 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
641 tree node, prev, next, node_init, first_stmt;
643 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
644 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
646 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
647 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
648 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
649 return;
652 /* 2. DRB is a part of a chain and DRA is not. */
653 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
655 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
656 /* Insert DRA into the chain of DRB. */
657 vect_insert_into_interleaving_chain (dra, drb);
658 return;
661 /* 3. DRA is a part of a chain and DRB is not. */
662 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
664 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
665 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
666 old_first_stmt)));
667 tree tmp;
669 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
671 /* DRB's init is smaller than the init of the stmt previously mark as
672 the first stmt of the interleaving chain of DRA. Therefore, we
673 update FIRST_STMT and put DRB in the head of the list. */
674 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
675 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
677 /* Update all the stmts in the list to point to the new FIRST_STMT. */
678 tmp = old_first_stmt;
679 while (tmp)
681 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
682 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
685 else
687 /* Insert DRB in the list of DRA. */
688 vect_insert_into_interleaving_chain (drb, dra);
689 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
691 return;
694 /* 4. both DRA and DRB are in some interleaving chains. */
695 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
696 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
697 if (first_a == first_b)
698 return;
699 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
700 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
702 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
704 /* Insert the nodes of DRA chain into the DRB chain.
705 After inserting a node, continue from this node of the DRB chain (don't
706 start from the beginning. */
707 node = DR_GROUP_FIRST_DR (stmtinfo_a);
708 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
709 first_stmt = first_b;
711 else
713 /* Insert the nodes of DRB chain into the DRA chain.
714 After inserting a node, continue from this node of the DRA chain (don't
715 start from the beginning. */
716 node = DR_GROUP_FIRST_DR (stmtinfo_b);
717 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
718 first_stmt = first_a;
721 while (node)
723 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
724 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
725 while (next)
727 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
728 if (tree_int_cst_compare (next_init, node_init) > 0)
730 /* Insert here. */
731 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
732 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
733 prev = node;
734 break;
736 prev = next;
737 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
739 if (!next)
741 /* We got to the end of the list. Insert here. */
742 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
743 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
744 prev = node;
746 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
747 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
752 /* Function vect_equal_offsets.
754 Check if OFFSET1 and OFFSET2 are identical expressions.
757 static bool
758 vect_equal_offsets (tree offset1, tree offset2)
760 bool res0, res1;
762 STRIP_NOPS (offset1);
763 STRIP_NOPS (offset2);
765 if (offset1 == offset2)
766 return true;
768 if (TREE_CODE (offset1) != TREE_CODE (offset2)
769 || !BINARY_CLASS_P (offset1)
770 || !BINARY_CLASS_P (offset2))
771 return false;
773 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
774 TREE_OPERAND (offset2, 0));
775 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
776 TREE_OPERAND (offset2, 1));
778 return (res0 && res1);
782 /* Function vect_check_interleaving.
784 Check if DRA and DRB are a part of interleaving. In case they are, insert
785 DRA and DRB in an interleaving chain.
788 static void
789 vect_check_interleaving (struct data_reference *dra,
790 struct data_reference *drb)
792 tree type_size_a, type_size_b, inits_diff, diff_mod_size;
794 /* Check that the data-refs have same first location (except init) and they
795 are both either store or load (not load and store). */
796 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
797 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
798 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
799 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
800 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
801 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
802 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
803 || DR_IS_READ (dra) != DR_IS_READ (drb))
804 return;
806 /* Check:
807 1. data-refs are of the same type
808 2. their steps are equal
809 3. the step is greater than the difference between data-refs' inits
811 type_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
812 type_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
814 if (tree_int_cst_compare (type_size_a, type_size_b)
815 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
816 return;
818 if (tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) > 0)
820 /* If init_a == init_b + the size of the type * k, we have an interleaving,
821 and DRB is accessed before DRA. */
822 inits_diff = fold_build2 (MINUS_EXPR, TREE_TYPE (type_size_a),
823 DR_INIT (dra), DR_INIT (drb));
824 diff_mod_size = fold_build2 (TRUNC_MOD_EXPR, TREE_TYPE (type_size_a),
825 inits_diff, type_size_a);
827 if (tree_int_cst_compare (inits_diff, DR_STEP (dra)) > 0)
828 return;
830 if (!tree_int_cst_compare (diff_mod_size, ssize_int (0)))
832 vect_update_interleaving_chain (drb, dra);
833 if (vect_print_dump_info (REPORT_DR_DETAILS))
835 fprintf (vect_dump, "Detected interleaving ");
836 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
837 fprintf (vect_dump, " and ");
838 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
840 return;
843 else
845 /* If init_b == init_a + the size of the type * k, we have an
846 interleaving, and DRA is accessed before DRB. */
847 inits_diff = fold_build2 (MINUS_EXPR, TREE_TYPE (type_size_a),
848 DR_INIT (drb), DR_INIT (dra));
849 diff_mod_size = fold_build2 (TRUNC_MOD_EXPR, TREE_TYPE (type_size_a),
850 inits_diff, type_size_a);
852 if (tree_int_cst_compare (inits_diff, DR_STEP (dra)) > 0)
853 return;
855 if (!tree_int_cst_compare (diff_mod_size, ssize_int (0)))
857 vect_update_interleaving_chain (dra, drb);
858 if (vect_print_dump_info (REPORT_DR_DETAILS))
860 fprintf (vect_dump, "Detected interleaving ");
861 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
862 fprintf (vect_dump, " and ");
863 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
865 return;
871 /* Function vect_analyze_data_ref_dependence.
873 Return TRUE if there (might) exist a dependence between a memory-reference
874 DRA and a memory-reference DRB of DDR. */
876 static bool
877 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
878 loop_vec_info loop_vinfo,
879 bool check_interleaving)
881 unsigned int i;
882 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
883 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
884 struct data_reference *dra = DDR_A (ddr);
885 struct data_reference *drb = DDR_B (ddr);
886 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
887 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
888 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
889 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
890 lambda_vector dist_v;
891 unsigned int loop_depth;
893 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
895 /* Independent data accesses. */
896 if (!check_interleaving)
897 vect_check_interleaving (dra, drb);
898 return false;
900 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
901 return false;
903 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
905 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
907 fprintf (vect_dump,
908 "not vectorized: can't determine dependence between ");
909 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
910 fprintf (vect_dump, " and ");
911 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
913 return true;
916 if (DDR_NUM_DIST_VECTS (ddr) == 0)
918 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
920 fprintf (vect_dump, "not vectorized: bad dist vector for ");
921 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
922 fprintf (vect_dump, " and ");
923 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
925 return true;
928 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
929 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
931 int dist = dist_v[loop_depth];
933 if (vect_print_dump_info (REPORT_DR_DETAILS))
934 fprintf (vect_dump, "dependence distance = %d.", dist);
936 /* Same loop iteration. */
937 if (dist % vectorization_factor == 0 && dra_size == drb_size)
939 /* Two references with distance zero have the same alignment. */
940 VEC_safe_push (dr_p, heap,
941 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
942 VEC_safe_push (dr_p, heap,
943 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
944 if (vect_print_dump_info (REPORT_ALIGNMENT))
945 fprintf (vect_dump, "accesses have the same alignment.");
946 if (vect_print_dump_info (REPORT_DR_DETAILS))
948 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
949 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
950 fprintf (vect_dump, " and ");
951 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
953 continue;
956 if (abs (dist) >= vectorization_factor)
958 /* Dependence distance does not create dependence, as far as vectorization
959 is concerned, in this case. */
960 if (vect_print_dump_info (REPORT_DR_DETAILS))
961 fprintf (vect_dump, "dependence distance >= VF.");
962 continue;
965 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
967 fprintf (vect_dump,
968 "not vectorized: possible dependence between data-refs ");
969 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
970 fprintf (vect_dump, " and ");
971 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
974 return true;
977 return false;
980 /* Function vect_check_dependences.
982 Check if there is a store-store or load-store dependence between data-refs
983 in DDR.
986 static bool
987 vect_check_dependences (struct data_dependence_relation *ddr)
989 struct data_reference *dra = DDR_A (ddr);
990 struct data_reference *drb = DDR_B (ddr);
992 if (DDR_ARE_DEPENDENT (ddr) == chrec_known || dra == drb)
993 /* Independent or same data accesses. */
994 return false;
996 if (DR_IS_READ (dra) == DR_IS_READ (drb) && DR_IS_READ (dra))
997 /* Two loads. */
998 return false;
1000 if (vect_print_dump_info (REPORT_DR_DETAILS))
1002 fprintf (vect_dump, "possible store and store/load dependence between ");
1003 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1004 fprintf (vect_dump, " and ");
1005 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1007 return true;
1010 /* Function vect_analyze_data_ref_dependences.
1012 Examine all the data references in the loop, and make sure there do not
1013 exist any data dependences between them. */
1015 static bool
1016 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1018 unsigned int i;
1019 varray_type ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1020 bool check_interleaving = false;
1022 if (vect_print_dump_info (REPORT_DETAILS))
1023 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1025 /* We allow interleaving only if there are no store-store and load-store
1026 dependencies in the loop. */
1027 for (i = 0; i < VARRAY_ACTIVE_SIZE (ddrs); i++)
1029 struct data_dependence_relation *ddr = VARRAY_GENERIC_PTR (ddrs, i);
1031 if (vect_check_dependences (ddr))
1033 check_interleaving = true;
1034 break;
1038 for (i = 0; i < VARRAY_ACTIVE_SIZE (ddrs); i++)
1040 struct data_dependence_relation *ddr = VARRAY_GENERIC_PTR (ddrs, i);
1042 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo,
1043 check_interleaving))
1044 return false;
1046 return true;
1050 /* Function vect_compute_data_ref_alignment
1052 Compute the misalignment of the data reference DR.
1054 Output:
1055 1. If during the misalignment computation it is found that the data reference
1056 cannot be vectorized then false is returned.
1057 2. DR_MISALIGNMENT (DR) is defined.
1059 FOR NOW: No analysis is actually performed. Misalignment is calculated
1060 only for trivial cases. TODO. */
1062 static bool
1063 vect_compute_data_ref_alignment (struct data_reference *dr)
1065 tree stmt = DR_STMT (dr);
1066 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1067 tree ref = DR_REF (dr);
1068 tree vectype;
1069 tree base, base_addr;
1070 bool base_aligned;
1071 tree misalign;
1072 tree aligned_to, alignment;
1074 if (vect_print_dump_info (REPORT_DETAILS))
1075 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1077 /* Initialize misalignment to unknown. */
1078 DR_MISALIGNMENT (dr) = -1;
1080 misalign = DR_OFFSET_MISALIGNMENT (dr);
1081 aligned_to = DR_ALIGNED_TO (dr);
1082 base_addr = DR_BASE_ADDRESS (dr);
1083 base = build_fold_indirect_ref (base_addr);
1084 vectype = STMT_VINFO_VECTYPE (stmt_info);
1085 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1087 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1088 || !misalign)
1090 if (vect_print_dump_info (REPORT_DETAILS))
1092 fprintf (vect_dump, "Unknown alignment for access: ");
1093 print_generic_expr (vect_dump, base, TDF_SLIM);
1095 return true;
1098 if ((DECL_P (base)
1099 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1100 alignment) >= 0)
1101 || (TREE_CODE (base_addr) == SSA_NAME
1102 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1103 TREE_TYPE (base_addr)))),
1104 alignment) >= 0))
1105 base_aligned = true;
1106 else
1107 base_aligned = false;
1109 if (!base_aligned)
1111 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
1113 if (vect_print_dump_info (REPORT_DETAILS))
1115 fprintf (vect_dump, "can't force alignment of ref: ");
1116 print_generic_expr (vect_dump, ref, TDF_SLIM);
1118 return true;
1121 /* Force the alignment of the decl.
1122 NOTE: This is the only change to the code we make during
1123 the analysis phase, before deciding to vectorize the loop. */
1124 if (vect_print_dump_info (REPORT_DETAILS))
1125 fprintf (vect_dump, "force alignment");
1126 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1127 DECL_USER_ALIGN (base) = 1;
1130 /* At this point we assume that the base is aligned. */
1131 gcc_assert (base_aligned
1132 || (TREE_CODE (base) == VAR_DECL
1133 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1135 /* Modulo alignment. */
1136 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1138 if (!host_integerp (misalign, 1))
1140 /* Negative or overflowed misalignment value. */
1141 if (vect_print_dump_info (REPORT_DETAILS))
1142 fprintf (vect_dump, "unexpected misalign value");
1143 return false;
1146 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
1148 if (vect_print_dump_info (REPORT_DETAILS))
1150 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1151 print_generic_expr (vect_dump, ref, TDF_SLIM);
1154 return true;
1158 /* Function vect_compute_data_refs_alignment
1160 Compute the misalignment of data references in the loop.
1161 Return FALSE if a data reference is found that cannot be vectorized. */
1163 static bool
1164 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1166 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1167 unsigned int i;
1169 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1171 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1172 if (!vect_compute_data_ref_alignment (dr))
1173 return false;
1176 return true;
1180 /* Function vect_update_misalignment_for_peel
1182 DR - the data reference whose misalignment is to be adjusted.
1183 DR_PEEL - the data reference whose misalignment is being made
1184 zero in the vector loop by the peel.
1185 NPEEL - the number of iterations in the peel loop if the misalignment
1186 of DR_PEEL is known at compile time. */
1188 static void
1189 vect_update_misalignment_for_peel (struct data_reference *dr,
1190 struct data_reference *dr_peel, int npeel)
1192 unsigned int i;
1193 VEC(dr_p,heap) *same_align_drs;
1194 struct data_reference *current_dr;
1195 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1196 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1197 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1198 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1200 /* For interleaved data accesses the step in the loop must be multiplied by
1201 the size of the interleaving group. */
1202 if (DR_GROUP_FIRST_DR (stmt_info))
1203 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1204 if (DR_GROUP_FIRST_DR (peel_stmt_info))
1205 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1207 if (known_alignment_for_access_p (dr)
1208 && known_alignment_for_access_p (dr_peel)
1209 && (DR_MISALIGNMENT (dr)/dr_size ==
1210 DR_MISALIGNMENT (dr_peel)/dr_peel_size))
1212 DR_MISALIGNMENT (dr) = 0;
1213 return;
1216 /* It can be assumed that the data refs with the same alignment as dr_peel
1217 are aligned in the vector loop. */
1218 same_align_drs
1219 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1220 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1222 if (current_dr != dr)
1223 continue;
1224 gcc_assert (DR_MISALIGNMENT (dr)/dr_size ==
1225 DR_MISALIGNMENT (dr_peel)/dr_peel_size);
1226 DR_MISALIGNMENT (dr) = 0;
1227 return;
1230 if (known_alignment_for_access_p (dr)
1231 && known_alignment_for_access_p (dr_peel))
1233 DR_MISALIGNMENT (dr) += npeel * dr_size;
1234 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
1235 return;
1238 if (vect_print_dump_info (REPORT_DETAILS))
1239 fprintf (vect_dump, "Setting misalignment to -1.");
1240 DR_MISALIGNMENT (dr) = -1;
1244 /* Function vect_verify_datarefs_alignment
1246 Return TRUE if all data references in the loop can be
1247 handled with respect to alignment. */
1249 static bool
1250 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1252 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1253 enum dr_alignment_support supportable_dr_alignment;
1254 unsigned int i;
1256 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1258 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1259 tree stmt = DR_STMT (dr);
1260 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1262 /* For interleaving, only the alignment of the first access matters. */
1263 if (DR_GROUP_FIRST_DR (stmt_info)
1264 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1265 continue;
1267 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1268 if (!supportable_dr_alignment)
1270 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1272 if (DR_IS_READ (dr))
1273 fprintf (vect_dump,
1274 "not vectorized: unsupported unaligned load.");
1275 else
1276 fprintf (vect_dump,
1277 "not vectorized: unsupported unaligned store.");
1279 return false;
1281 if (supportable_dr_alignment != dr_aligned
1282 && vect_print_dump_info (REPORT_ALIGNMENT))
1283 fprintf (vect_dump, "Vectorizing an unaligned access.");
1285 return true;
1289 /* Function vect_enhance_data_refs_alignment
1291 This pass will use loop versioning and loop peeling in order to enhance
1292 the alignment of data references in the loop.
1294 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1295 original loop is to be vectorized; Any other loops that are created by
1296 the transformations performed in this pass - are not supposed to be
1297 vectorized. This restriction will be relaxed.
1299 This pass will require a cost model to guide it whether to apply peeling
1300 or versioning or a combination of the two. For example, the scheme that
1301 intel uses when given a loop with several memory accesses, is as follows:
1302 choose one memory access ('p') which alignment you want to force by doing
1303 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1304 other accesses are not necessarily aligned, or (2) use loop versioning to
1305 generate one loop in which all accesses are aligned, and another loop in
1306 which only 'p' is necessarily aligned.
1308 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1309 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1310 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1312 Devising a cost model is the most critical aspect of this work. It will
1313 guide us on which access to peel for, whether to use loop versioning, how
1314 many versions to create, etc. The cost model will probably consist of
1315 generic considerations as well as target specific considerations (on
1316 powerpc for example, misaligned stores are more painful than misaligned
1317 loads).
1319 Here are the general steps involved in alignment enhancements:
1321 -- original loop, before alignment analysis:
1322 for (i=0; i<N; i++){
1323 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1324 p[i] = x; # DR_MISALIGNMENT(p) = unknown
1328 -- After vect_compute_data_refs_alignment:
1329 for (i=0; i<N; i++){
1330 x = q[i]; # DR_MISALIGNMENT(q) = 3
1331 p[i] = x; # DR_MISALIGNMENT(p) = unknown
1334 -- Possibility 1: we do loop versioning:
1335 if (p is aligned) {
1336 for (i=0; i<N; i++){ # loop 1A
1337 x = q[i]; # DR_MISALIGNMENT(q) = 3
1338 p[i] = y; # DR_MISALIGNMENT(p) = 0
1341 else {
1342 for (i=0; i<N; i++){ # loop 1B
1343 x = q[i]; # DR_MISALIGNMENT(q) = 3
1344 p[i] = x; # DR_MISALIGNMENT(p) = unaligned
1348 -- Possibility 2: we do loop peeling:
1349 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1350 x = q[i];
1351 p[i] = x;
1353 for (i = 3; i < N; i++){ # loop 2A
1354 x = q[i]; # DR_MISALIGNMENT(q) = 0
1355 p[i] = x; # DR_MISALIGNMENT(p) = unknown
1358 -- Possibility 3: combination of loop peeling and versioning:
1359 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1360 x = q[i];
1361 p[i] = x;
1363 if (p is aligned) {
1364 for (i = 3; i<N; i++){ # loop 3A
1365 x = q[i]; # DR_MISALIGNMENT(q) = 0
1366 p[i] = y; # DR_MISALIGNMENT(p) = 0
1369 else {
1370 for (i = 3; i<N; i++){ # loop 3B
1371 x = q[i]; # DR_MISALIGNMENT(q) = 0
1372 p[i] = x; # DR_MISALIGNMENT(p) = unaligned
1374 These loops are later passed to loop_transform to be vectorized. The
1375 vectorizer will use the alignment information to guide the transformation
1376 (whether to generate regular loads/stores, or with special handling for
1377 misalignment). */
1379 static bool
1380 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1382 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1383 enum dr_alignment_support supportable_dr_alignment;
1384 struct data_reference *dr0 = NULL;
1385 struct data_reference *dr;
1386 unsigned int i;
1387 bool do_peeling = false;
1388 bool do_versioning = false;
1389 bool stat;
1391 if (vect_print_dump_info (REPORT_DETAILS))
1392 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1394 /* While cost model enhancements are expected in the future, the high level
1395 view of the code at this time is as follows:
1397 A) If there is a misaligned write then see if peeling to align this write
1398 can make all data references satisfy vect_supportable_dr_alignment.
1399 If so, update data structures as needed and return true. Note that
1400 at this time vect_supportable_dr_alignment is known to return false
1401 for a a misaligned write.
1403 B) If peeling wasn't possible and there is a data reference with an
1404 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1405 then see if loop versioning checks can be used to make all data
1406 references satisfy vect_supportable_dr_alignment. If so, update
1407 data structures as needed and return true.
1409 C) If neither peeling nor versioning were successful then return false if
1410 any data reference does not satisfy vect_supportable_dr_alignment.
1412 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1414 Note, Possibility 3 above (which is peeling and versioning together) is not
1415 being done at this time. */
1417 /* (1) Peeling to force alignment. */
1419 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1420 Considerations:
1421 + How many accesses will become aligned due to the peeling
1422 - How many accesses will become unaligned due to the peeling,
1423 and the cost of misaligned accesses.
1424 - The cost of peeling (the extra runtime checks, the increase
1425 in code size).
1427 The scheme we use FORNOW: peel to force the alignment of the first
1428 misaligned store in the loop.
1429 Rationale: misaligned stores are not yet supported.
1431 TODO: Use a cost model. */
1433 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1435 tree stmt;
1436 stmt_vec_info stmt_info;
1438 dr = VARRAY_GENERIC_PTR (datarefs, i);
1439 stmt = DR_STMT (dr);
1440 stmt_info = vinfo_for_stmt (stmt);
1442 /* For interleaving, only the alignment of the first access
1443 matters. */
1444 if (DR_GROUP_FIRST_DR (stmt_info)
1445 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1446 continue;
1448 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1450 if (DR_GROUP_FIRST_DR (stmt_info))
1452 /* For interleaved access we peel only if number of iterations in
1453 the prolog loop ({VF - misalignment}), is a multiple of the
1454 number of the interelaved accesses. */
1455 int elem_size, mis_in_elements;
1456 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1458 /* FORNOW: handle only known alignment. */
1459 if (!known_alignment_for_access_p (dr))
1461 do_peeling = false;
1462 break;
1465 elem_size = UNITS_PER_SIMD_WORD/vf;
1466 mis_in_elements = DR_MISALIGNMENT (dr)/elem_size;
1468 if ((vf - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1470 do_peeling = false;
1471 break;
1475 dr0 = dr;
1476 do_peeling = true;
1477 break;
1481 /* Often peeling for alignment will require peeling for loop-bound, which in
1482 turn requires that we know how to adjust the loop ivs after the loop. */
1483 if (!vect_can_advance_ivs_p (loop_vinfo))
1484 do_peeling = false;
1486 if (do_peeling)
1488 int mis;
1489 int npeel = 0;
1490 stmt_vec_info stmt_info;
1492 if (known_alignment_for_access_p (dr0))
1494 /* Since it's known at compile time, compute the number of iterations
1495 in the peeled loop (the peeling factor) for use in updating
1496 DR_MISALIGNMENT values. The peeling factor is the vectorization
1497 factor minus the misalignment as an element count. */
1498 mis = DR_MISALIGNMENT (dr0);
1499 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1500 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1502 /* For interleaved data access every iteration accesses all the
1503 members of the group, therefore we divide the number of iterations
1504 by the group size. */
1505 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1506 if (DR_GROUP_FIRST_DR (stmt_info))
1507 npeel /= DR_GROUP_SIZE (stmt_info);
1509 if (vect_print_dump_info (REPORT_DETAILS))
1510 fprintf (vect_dump, "Try peeling by %d",npeel);
1513 /* Ensure that all data refs can be vectorized after the peel. */
1514 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1516 int save_misalignment;
1517 tree stmt;
1518 stmt_vec_info stmt_info;
1520 dr = VARRAY_GENERIC_PTR (datarefs, i);
1521 if (dr == dr0)
1522 continue;
1524 stmt = DR_STMT (dr);
1525 stmt_info = vinfo_for_stmt (stmt);
1526 /* For interleaving, only the alignment of the first access
1527 matters. */
1528 if (DR_GROUP_FIRST_DR (stmt_info)
1529 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1530 continue;
1532 save_misalignment = DR_MISALIGNMENT (dr);
1533 vect_update_misalignment_for_peel (dr, dr0, npeel);
1534 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1535 DR_MISALIGNMENT (dr) = save_misalignment;
1537 if (!supportable_dr_alignment)
1539 do_peeling = false;
1540 break;
1544 if (do_peeling)
1546 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1547 If the misalignment of DR_i is identical to that of dr0 then set
1548 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1549 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1550 by the peeling factor times the element size of DR_i (MOD the
1551 vectorization factor times the size). Otherwise, the
1552 misalignment of DR_i must be set to unknown. */
1553 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1555 dr = VARRAY_GENERIC_PTR (datarefs, i);
1556 if (dr == dr0)
1557 continue;
1558 vect_update_misalignment_for_peel (dr, dr0, npeel);
1559 if (vect_print_dump_info (REPORT_DETAILS))
1560 fprintf (vect_dump, "New alignment for dr = %d",
1561 DR_MISALIGNMENT (dr));
1563 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1564 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1565 DR_MISALIGNMENT (dr0) = 0;
1566 if (vect_print_dump_info (REPORT_ALIGNMENT))
1567 fprintf (vect_dump, "Alignment of access forced using peeling.");
1569 if (vect_print_dump_info (REPORT_DETAILS))
1570 fprintf (vect_dump, "Peeling for alignment will be applied.");
1572 stat = vect_verify_datarefs_alignment (loop_vinfo);
1573 gcc_assert (stat);
1574 return stat;
1578 /* (2) Versioning to force alignment. */
1580 /* Try versioning if:
1581 1) flag_tree_vect_loop_version is TRUE
1582 2) optimize_size is FALSE
1583 3) there is at least one unsupported misaligned data ref with an unknown
1584 misalignment, and
1585 4) all misaligned data refs with a known misalignment are supported, and
1586 5) the number of runtime alignment checks is within reason. */
1588 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1590 if (do_versioning)
1592 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1594 tree stmt;
1595 stmt_vec_info stmt_info;
1597 dr = VARRAY_GENERIC_PTR (datarefs, i);
1598 stmt = DR_STMT (dr);
1599 stmt_info = vinfo_for_stmt (stmt);
1601 if (aligned_access_p (dr)
1602 || (DR_GROUP_FIRST_DR (stmt_info)
1603 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1604 continue;
1606 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1608 if (!supportable_dr_alignment)
1610 tree stmt;
1611 int mask;
1612 tree vectype;
1614 if (known_alignment_for_access_p (dr)
1615 || VEC_length (tree,
1616 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1617 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1619 do_versioning = false;
1620 break;
1623 stmt = DR_STMT (dr);
1624 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1625 gcc_assert (vectype);
1627 /* The rightmost bits of an aligned address must be zeros.
1628 Construct the mask needed for this test. For example,
1629 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1630 mask must be 15 = 0xf. */
1631 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1633 /* FORNOW: use the same mask to test all potentially unaligned
1634 references in the loop. The vectorizer currently supports
1635 a single vector size, see the reference to
1636 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1637 vectorization factor is computed. */
1638 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1639 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1640 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1641 VEC_safe_push (tree, heap,
1642 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1643 DR_STMT (dr));
1647 /* Versioning requires at least one misaligned data reference. */
1648 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1649 do_versioning = false;
1650 else if (!do_versioning)
1651 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1654 if (do_versioning)
1656 VEC(tree,heap) *may_misalign_stmts
1657 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1658 tree stmt;
1660 /* It can now be assumed that the data references in the statements
1661 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1662 of the loop being vectorized. */
1663 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1665 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1666 dr = STMT_VINFO_DATA_REF (stmt_info);
1667 DR_MISALIGNMENT (dr) = 0;
1668 if (vect_print_dump_info (REPORT_ALIGNMENT))
1669 fprintf (vect_dump, "Alignment of access forced using versioning.");
1672 if (vect_print_dump_info (REPORT_DETAILS))
1673 fprintf (vect_dump, "Versioning for alignment will be applied.");
1675 /* Peeling and versioning can't be done together at this time. */
1676 gcc_assert (! (do_peeling && do_versioning));
1678 stat = vect_verify_datarefs_alignment (loop_vinfo);
1679 gcc_assert (stat);
1680 return stat;
1683 /* This point is reached if neither peeling nor versioning is being done. */
1684 gcc_assert (! (do_peeling || do_versioning));
1686 stat = vect_verify_datarefs_alignment (loop_vinfo);
1687 return stat;
1691 /* Function vect_analyze_data_refs_alignment
1693 Analyze the alignment of the data-references in the loop.
1694 Return FALSE if a data reference is found that cannot be vectorized. */
1696 static bool
1697 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1699 if (vect_print_dump_info (REPORT_DETAILS))
1700 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1702 if (!vect_compute_data_refs_alignment (loop_vinfo))
1704 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1705 fprintf (vect_dump,
1706 "not vectorized: can't calculate alignment for data ref.");
1707 return false;
1710 return true;
1714 /* Function vect_analyze_data_ref_access.
1716 Analyze the access pattern of the data-reference DR. For now, a data access
1717 has to be consecutive or to be a part of interleaving scheme to be considered
1718 vectorizable. */
1720 static bool
1721 vect_analyze_data_ref_access (struct data_reference *dr)
1723 tree step = DR_STEP (dr);
1724 tree scalar_type = TREE_TYPE (DR_REF (dr));
1725 tree stmt = DR_STMT (dr);
1727 if (!step)
1729 if (vect_print_dump_info (REPORT_DETAILS))
1730 fprintf (vect_dump, "bad data-ref access");
1731 return false;
1734 /* Consecutive? */
1735 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1737 /* Mark that it is not interleaving. */
1738 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
1739 return true;
1742 /* Not consecutive access is possible only if it is a part of interleaving. */
1743 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1745 /* Check if it this data-ref is a part of interleaving, and is a single
1746 element of the group that is accessed in the loop. */
1747 struct data_reference *data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (
1748 stmt));
1749 /* STRIDE is STEP counted in elements. */
1750 tree stride = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (step), step,
1751 TYPE_SIZE_UNIT (scalar_type));
1752 unsigned int size = TREE_INT_CST_LOW (stride);
1754 /* Gaps are supported only for loads. STEP must be a multiple of the type
1755 size. The size of the group must be a power of 2. */
1756 if (DR_IS_READ (data_ref)
1757 && !tree_int_cst_compare (fold_build2 (TRUNC_MOD_EXPR,
1758 TREE_TYPE (step), step,
1759 TYPE_SIZE_UNIT (scalar_type)),
1760 ssize_int (0))
1761 && tree_int_cst_compare (stride, ssize_int (0)) > 0
1762 && exact_log2 (size) != -1)
1764 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1765 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = size;
1766 if (vect_print_dump_info (REPORT_DR_DETAILS))
1768 fprintf (vect_dump, "Detected single element interleaving %d ",
1769 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
1770 print_generic_expr (vect_dump, DR_REF (data_ref), TDF_SLIM);
1771 fprintf (vect_dump, " step ");
1772 print_generic_expr (vect_dump, step, TDF_SLIM);
1774 return true;
1776 if (vect_print_dump_info (REPORT_DETAILS))
1777 fprintf (vect_dump, "not consecutive access");
1778 return false;
1781 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1783 /* First stmt in the interleaving chain. Check the chain. */
1784 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1785 tree prev = stmt;
1786 struct data_reference *data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (
1787 stmt));
1788 unsigned int count = 1, size;
1789 tree next_step, stride, count_in_bytes;
1790 tree prev_init = DR_INIT (data_ref);
1791 tree diff;
1793 while (next)
1795 /* Skip same data-refs. */
1796 if (!tree_int_cst_compare (DR_INIT (data_ref),
1797 DR_INIT (STMT_VINFO_DATA_REF (
1798 vinfo_for_stmt (next)))))
1800 /* For load use the same data-ref load. (We check that there are
1801 no two stores to the same location). */
1802 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1804 prev = next;
1805 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1806 continue;
1808 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1809 prev = next;
1811 diff = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (prev_init),
1812 fold_build2 (MINUS_EXPR, TREE_TYPE (prev_init),
1813 DR_INIT (data_ref), prev_init),
1814 TYPE_SIZE_UNIT (scalar_type));
1815 /* Store the gap from the previous member of the group. If there is no
1816 gap in the access, DR_GROUP_GAP is always 1. */
1817 DR_GROUP_GAP (vinfo_for_stmt (next)) = TREE_INT_CST_LOW (diff);
1819 /* Check that all the accesses have the same STEP. */
1820 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1821 if (tree_int_cst_compare (step, next_step))
1823 if (vect_print_dump_info (REPORT_DETAILS))
1824 fprintf (vect_dump, "not consecutive access in interleaving");
1825 return false;
1828 prev_init = DR_INIT (data_ref);
1829 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1830 count++;
1833 /* COUNT is the number of accesses found, we multiply it by the size of
1834 the type and check that the result, COUNT_IN_BYTES, it is equal to STEP,
1835 otherwise we have gaps. */
1836 count_in_bytes = fold_build2 (MULT_EXPR, TREE_TYPE (step), ssize_int (count),
1837 TYPE_SIZE_UNIT (scalar_type));
1838 if (tree_int_cst_compare (step, count_in_bytes)
1839 && !DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1841 /* Gaps are supported only for loads. */
1842 if (vect_print_dump_info (REPORT_DETAILS))
1844 fprintf (vect_dump, "hole in store interleaving level %d, step ",
1845 count);
1846 print_generic_expr (vect_dump, step, TDF_SLIM);
1847 fprintf (vect_dump, " size ");
1848 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1849 TDF_SLIM);
1851 return false;
1854 /* Check the size of the interleaving is not greater than STEP. */
1855 if (tree_int_cst_compare (step, count_in_bytes) < 0)
1857 if (vect_print_dump_info (REPORT_DETAILS))
1859 fprintf (vect_dump, "interleaving level is greater than step for ");
1860 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1862 return false;
1865 /* Check that STEP is a multiple of type size. */
1866 if (tree_int_cst_compare (fold_build2 (TRUNC_MOD_EXPR, TREE_TYPE (step),
1867 step,
1868 TYPE_SIZE_UNIT (scalar_type)),
1869 ssize_int (0)))
1871 if (vect_print_dump_info (REPORT_DETAILS))
1873 fprintf (vect_dump, "step is not a multiple of type size: step ");
1874 print_generic_expr (vect_dump, step, TDF_SLIM);
1875 fprintf (vect_dump, " size ");
1876 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1877 TDF_SLIM);
1879 return false;
1882 /* STRIDE is STEP counted in elements. */
1883 stride = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (step), step,
1884 TYPE_SIZE_UNIT (scalar_type));
1885 size = TREE_INT_CST_LOW (stride);
1887 /* FORNOW: we handle only interleaving that is a power of 2. */
1888 if (exact_log2 (size) == -1)
1890 if (vect_print_dump_info (REPORT_DETAILS))
1891 fprintf (vect_dump, "interleaving is not a power of 2");
1892 return false;
1894 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = size;
1896 return true;
1900 /* Function vect_analyze_data_ref_accesses.
1902 Analyze the access pattern of all the data references in the loop.
1904 FORNOW: the only access pattern that is considered vectorizable is a
1905 simple step 1 (consecutive) access.
1907 FORNOW: handle only arrays and pointer accesses. */
1909 static bool
1910 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1912 unsigned int i;
1913 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1915 if (vect_print_dump_info (REPORT_DETAILS))
1916 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1918 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1920 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1921 if (!vect_analyze_data_ref_access (dr))
1923 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1924 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1925 return false;
1928 return true;
1932 /* Function vect_analyze_data_refs.
1934 Find all the data references in the loop.
1936 The general structure of the analysis of data refs in the vectorizer is as
1937 follows:
1938 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1939 find and analyze all data-refs in the loop and their dependences.
1940 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1941 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1942 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1946 static bool
1947 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1949 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1950 unsigned int i;
1951 varray_type datarefs;
1952 tree scalar_type;
1954 if (vect_print_dump_info (REPORT_DETAILS))
1955 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1957 compute_data_dependences_for_loop (loop, true,
1958 &(LOOP_VINFO_DATAREFS (loop_vinfo)),
1959 &(LOOP_VINFO_DDRS (loop_vinfo)));
1961 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1962 from stmt_vec_info struct to DR and vectype. */
1963 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1964 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1966 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1967 tree stmt;
1968 stmt_vec_info stmt_info;
1970 if (!dr || !DR_REF (dr))
1972 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1973 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1974 return false;
1977 /* Update DR field in stmt_vec_info struct. */
1978 stmt = DR_STMT (dr);
1979 stmt_info = vinfo_for_stmt (stmt);
1981 if (STMT_VINFO_DATA_REF (stmt_info))
1983 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1985 fprintf (vect_dump,
1986 "not vectorized: more than one data ref in stmt: ");
1987 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1989 return false;
1991 STMT_VINFO_DATA_REF (stmt_info) = dr;
1993 /* Check that analysis of the data-ref succeeded. */
1994 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1995 || !DR_STEP (dr))
1997 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1999 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2000 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2002 return false;
2004 if (!DR_MEMTAG (dr))
2006 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2008 fprintf (vect_dump, "not vectorized: no memory tag for ");
2009 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2011 return false;
2014 /* Set vectype for STMT. */
2015 scalar_type = TREE_TYPE (DR_REF (dr));
2016 STMT_VINFO_VECTYPE (stmt_info) =
2017 get_vectype_for_scalar_type (scalar_type);
2018 if (!STMT_VINFO_VECTYPE (stmt_info))
2020 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2022 fprintf (vect_dump,
2023 "not vectorized: no vectype for stmt: ");
2024 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2025 fprintf (vect_dump, " scalar_type: ");
2026 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2028 return false;
2031 return true;
2035 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2037 /* Function vect_mark_relevant.
2039 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2041 static void
2042 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2043 enum vect_relevant relevant, bool live_p)
2045 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2046 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2047 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2049 if (vect_print_dump_info (REPORT_DETAILS))
2050 fprintf (vect_dump, "mark relevant %d, live %d.",relevant, live_p);
2052 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2054 tree pattern_stmt;
2056 /* This is the last stmt in a sequence that was detected as a
2057 pattern that can potentially be vectorized. Don't mark the stmt
2058 as relevant/live because it's not going to vectorized.
2059 Instead mark the pattern-stmt that replaces it. */
2060 if (vect_print_dump_info (REPORT_DETAILS))
2061 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
2062 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2063 stmt_info = vinfo_for_stmt (pattern_stmt);
2064 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
2065 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2066 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2067 stmt = pattern_stmt;
2070 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2071 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
2072 STMT_VINFO_RELEVANT (stmt_info) = relevant;
2074 if (TREE_CODE (stmt) == PHI_NODE)
2075 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
2076 or live will fail vectorization later on. */
2077 return;
2079 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
2080 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2082 if (vect_print_dump_info (REPORT_DETAILS))
2083 fprintf (vect_dump, "already marked relevant/live.");
2084 return;
2087 VEC_safe_push (tree, heap, *worklist, stmt);
2091 /* Function vect_stmt_relevant_p.
2093 Return true if STMT in loop that is represented by LOOP_VINFO is
2094 "relevant for vectorization".
2096 A stmt is considered "relevant for vectorization" if:
2097 - it has uses outside the loop.
2098 - it has vdefs (it alters memory).
2099 - control stmts in the loop (except for the exit condition).
2101 CHECKME: what other side effects would the vectorizer allow? */
2103 static bool
2104 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2105 enum vect_relevant *relevant, bool *live_p)
2107 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2108 ssa_op_iter op_iter;
2109 imm_use_iterator imm_iter;
2110 use_operand_p use_p;
2111 def_operand_p def_p;
2113 *relevant = vect_unused_in_loop;
2114 *live_p = false;
2116 /* cond stmt other than loop exit cond. */
2117 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2118 *relevant = vect_used_in_loop;
2120 /* changing memory. */
2121 if (TREE_CODE (stmt) != PHI_NODE)
2122 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2124 if (vect_print_dump_info (REPORT_DETAILS))
2125 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2126 *relevant = vect_used_in_loop;
2129 /* uses outside the loop. */
2130 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2132 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2134 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2135 if (!flow_bb_inside_loop_p (loop, bb))
2137 if (vect_print_dump_info (REPORT_DETAILS))
2138 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2140 /* We expect all such uses to be in the loop exit phis
2141 (because of loop closed form) */
2142 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2143 gcc_assert (bb == loop->single_exit->dest);
2145 *live_p = true;
2150 return (*live_p || *relevant);
2154 /* Function vect_mark_stmts_to_be_vectorized.
2156 Not all stmts in the loop need to be vectorized. For example:
2158 for i...
2159 for j...
2160 1. T0 = i + j
2161 2. T1 = a[T0]
2163 3. j = j + 1
2165 Stmt 1 and 3 do not need to be vectorized, because loop control and
2166 addressing of vectorized data-refs are handled differently.
2168 This pass detects such stmts. */
2170 static bool
2171 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2173 VEC(tree,heap) *worklist;
2174 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2175 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2176 unsigned int nbbs = loop->num_nodes;
2177 block_stmt_iterator si;
2178 tree stmt, use;
2179 stmt_ann_t ann;
2180 ssa_op_iter iter;
2181 unsigned int i;
2182 stmt_vec_info stmt_vinfo;
2183 basic_block bb;
2184 tree phi;
2185 bool live_p;
2186 enum vect_relevant relevant;
2187 tree def, def_stmt;
2188 enum vect_def_type dt;
2190 if (vect_print_dump_info (REPORT_DETAILS))
2191 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2193 worklist = VEC_alloc (tree, heap, 64);
2195 /* 1. Init worklist. */
2197 bb = loop->header;
2198 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2200 if (vect_print_dump_info (REPORT_DETAILS))
2202 fprintf (vect_dump, "init: phi relevant? ");
2203 print_generic_expr (vect_dump, phi, TDF_SLIM);
2206 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
2207 vect_mark_relevant (&worklist, phi, relevant, live_p);
2210 for (i = 0; i < nbbs; i++)
2212 bb = bbs[i];
2213 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2215 stmt = bsi_stmt (si);
2217 if (vect_print_dump_info (REPORT_DETAILS))
2219 fprintf (vect_dump, "init: stmt relevant? ");
2220 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2223 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
2224 vect_mark_relevant (&worklist, stmt, relevant, live_p);
2229 /* 2. Process_worklist */
2231 while (VEC_length (tree, worklist) > 0)
2233 stmt = VEC_pop (tree, worklist);
2235 if (vect_print_dump_info (REPORT_DETAILS))
2237 fprintf (vect_dump, "worklist: examine stmt: ");
2238 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2241 /* Examine the USEs of STMT. For each ssa-name USE that is defined
2242 in the loop, mark the stmt that defines it (DEF_STMT) as
2243 relevant/irrelevant and live/dead according to the liveness and
2244 relevance properties of STMT.
2247 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2249 ann = stmt_ann (stmt);
2250 stmt_vinfo = vinfo_for_stmt (stmt);
2252 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
2253 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2255 /* Generally, the liveness and relevance properties of STMT are
2256 propagated to the DEF_STMTs of its USEs:
2257 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2258 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2260 Exceptions:
2262 (case 1)
2263 If USE is used only for address computations (e.g. array indexing),
2264 which does not need to be directly vectorized, then the
2265 liveness/relevance of the respective DEF_STMT is left unchanged.
2267 (case 2)
2268 If STMT has been identified as defining a reduction variable, then
2269 we have two cases:
2270 (case 2.1)
2271 The last use of STMT is the reduction-variable, which is defined
2272 by a loop-header-phi. We don't want to mark the phi as live or
2273 relevant (because it does not need to be vectorized, it is handled
2274 as part of the vectorization of the reduction), so in this case we
2275 skip the call to vect_mark_relevant.
2276 (case 2.2)
2277 The rest of the uses of STMT are defined in the loop body. For
2278 the def_stmt of these uses we want to set liveness/relevance
2279 as follows:
2280 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
2281 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- vect_used_by_reduction
2282 because even though STMT is classified as live (since it defines a
2283 value that is used across loop iterations) and irrelevant (since it
2284 is not used inside the loop), it will be vectorized, and therefore
2285 the corresponding DEF_STMTs need to marked as relevant.
2286 We distinguish between two kinds of relevant stmts - those that are
2287 used by a reduction conputation, and those that are (also) used by
2288 a regular computation. This allows us later on to identify stmts
2289 that are used solely by a reduction, and therefore the order of
2290 the results that they produce does not have to be kept.
2293 /* case 2.2: */
2294 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2296 gcc_assert (relevant == vect_unused_in_loop && live_p);
2297 relevant = vect_used_by_reduction;
2298 live_p = false;
2301 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2303 /* case 1: we are only interested in uses that need to be vectorized.
2304 Uses that are used for address computation are not considered
2305 relevant.
2307 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2308 continue;
2310 if ((live_p &&
2311 !vect_is_simple_live_use (use, loop_vinfo, &def_stmt, &def, &dt))
2312 || (relevant &&
2313 !vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt)))
2315 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2316 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2317 VEC_free (tree, heap, worklist);
2318 return false;
2321 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2322 continue;
2324 if (vect_print_dump_info (REPORT_DETAILS))
2326 fprintf (vect_dump, "worklist: examine use %d: ", i);
2327 print_generic_expr (vect_dump, use, TDF_SLIM);
2330 bb = bb_for_stmt (def_stmt);
2331 if (!flow_bb_inside_loop_p (loop, bb))
2332 continue;
2334 /* case 2.1: the reduction-use does not mark the defining-phi
2335 as relevant. */
2336 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2337 && TREE_CODE (def_stmt) == PHI_NODE)
2338 continue;
2340 vect_mark_relevant (&worklist, def_stmt, relevant, live_p);
2342 } /* while worklist */
2344 VEC_free (tree, heap, worklist);
2345 return true;
2349 /* Function vect_can_advance_ivs_p
2351 In case the number of iterations that LOOP iterates is unknown at compile
2352 time, an epilog loop will be generated, and the loop induction variables
2353 (IVs) will be "advanced" to the value they are supposed to take just before
2354 the epilog loop. Here we check that the access function of the loop IVs
2355 and the expression that represents the loop bound are simple enough.
2356 These restrictions will be relaxed in the future. */
2358 static bool
2359 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2361 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2362 basic_block bb = loop->header;
2363 tree phi;
2365 /* Analyze phi functions of the loop header. */
2367 if (vect_print_dump_info (REPORT_DETAILS))
2368 fprintf (vect_dump, "vect_can_advance_ivs_p:");
2370 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2372 tree access_fn = NULL;
2373 tree evolution_part;
2375 if (vect_print_dump_info (REPORT_DETAILS))
2377 fprintf (vect_dump, "Analyze phi: ");
2378 print_generic_expr (vect_dump, phi, TDF_SLIM);
2381 /* Skip virtual phis. The data dependences that are associated with
2382 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2384 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2386 if (vect_print_dump_info (REPORT_DETAILS))
2387 fprintf (vect_dump, "virtual phi. skip.");
2388 continue;
2391 /* Skip reduction phis. */
2393 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2395 if (vect_print_dump_info (REPORT_DETAILS))
2396 fprintf (vect_dump, "reduc phi. skip.");
2397 continue;
2400 /* Analyze the evolution function. */
2402 access_fn = instantiate_parameters
2403 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2405 if (!access_fn)
2407 if (vect_print_dump_info (REPORT_DETAILS))
2408 fprintf (vect_dump, "No Access function.");
2409 return false;
2412 if (vect_print_dump_info (REPORT_DETAILS))
2414 fprintf (vect_dump, "Access function of PHI: ");
2415 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2418 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2420 if (evolution_part == NULL_TREE)
2422 if (vect_print_dump_info (REPORT_DETAILS))
2423 fprintf (vect_dump, "No evolution.");
2424 return false;
2427 /* FORNOW: We do not transform initial conditions of IVs
2428 which evolution functions are a polynomial of degree >= 2. */
2430 if (tree_is_chrec (evolution_part))
2431 return false;
2434 return true;
2438 /* Function vect_get_loop_niters.
2440 Determine how many iterations the loop is executed.
2441 If an expression that represents the number of iterations
2442 can be constructed, place it in NUMBER_OF_ITERATIONS.
2443 Return the loop exit condition. */
2445 static tree
2446 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2448 tree niters;
2450 if (vect_print_dump_info (REPORT_DETAILS))
2451 fprintf (vect_dump, "=== get_loop_niters ===");
2453 niters = number_of_iterations_in_loop (loop);
2455 if (niters != NULL_TREE
2456 && niters != chrec_dont_know)
2458 *number_of_iterations = niters;
2460 if (vect_print_dump_info (REPORT_DETAILS))
2462 fprintf (vect_dump, "==> get_loop_niters:" );
2463 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2467 return get_loop_exit_condition (loop);
2471 /* Function vect_analyze_loop_form.
2473 Verify the following restrictions (some may be relaxed in the future):
2474 - it's an inner-most loop
2475 - number of BBs = 2 (which are the loop header and the latch)
2476 - the loop has a pre-header
2477 - the loop has a single entry and exit
2478 - the loop exit condition is simple enough, and the number of iterations
2479 can be analyzed (a countable loop). */
2481 static loop_vec_info
2482 vect_analyze_loop_form (struct loop *loop)
2484 loop_vec_info loop_vinfo;
2485 tree loop_cond;
2486 tree number_of_iterations = NULL;
2488 if (vect_print_dump_info (REPORT_DETAILS))
2489 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2491 if (loop->inner)
2493 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
2494 fprintf (vect_dump, "not vectorized: nested loop.");
2495 return NULL;
2498 if (!loop->single_exit
2499 || loop->num_nodes != 2
2500 || EDGE_COUNT (loop->header->preds) != 2)
2502 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2504 if (!loop->single_exit)
2505 fprintf (vect_dump, "not vectorized: multiple exits.");
2506 else if (loop->num_nodes != 2)
2507 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2508 else if (EDGE_COUNT (loop->header->preds) != 2)
2509 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2512 return NULL;
2515 /* We assume that the loop exit condition is at the end of the loop. i.e,
2516 that the loop is represented as a do-while (with a proper if-guard
2517 before the loop if needed), where the loop header contains all the
2518 executable statements, and the latch is empty. */
2519 if (!empty_block_p (loop->latch))
2521 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2522 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2523 return NULL;
2526 /* Make sure there exists a single-predecessor exit bb: */
2527 if (!single_pred_p (loop->single_exit->dest))
2529 edge e = loop->single_exit;
2530 if (!(e->flags & EDGE_ABNORMAL))
2532 split_loop_exit_edge (e);
2533 if (vect_print_dump_info (REPORT_DETAILS))
2534 fprintf (vect_dump, "split exit edge.");
2536 else
2538 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2539 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2540 return NULL;
2544 if (empty_block_p (loop->header))
2546 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2547 fprintf (vect_dump, "not vectorized: empty loop.");
2548 return NULL;
2551 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2552 if (!loop_cond)
2554 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2555 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2556 return NULL;
2559 if (!number_of_iterations)
2561 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2562 fprintf (vect_dump,
2563 "not vectorized: number of iterations cannot be computed.");
2564 return NULL;
2567 if (chrec_contains_undetermined (number_of_iterations))
2569 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2570 fprintf (vect_dump, "Infinite number of iterations.");
2571 return NULL;
2574 loop_vinfo = new_loop_vec_info (loop);
2575 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2577 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2579 if (vect_print_dump_info (REPORT_DETAILS))
2581 fprintf (vect_dump, "Symbolic number of iterations is ");
2582 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2585 else
2586 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2588 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2589 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2590 return NULL;
2593 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2595 return loop_vinfo;
2599 /* Function vect_analyze_loop.
2601 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2602 for it. The different analyses will record information in the
2603 loop_vec_info struct. */
2605 loop_vec_info
2606 vect_analyze_loop (struct loop *loop)
2608 bool ok;
2609 loop_vec_info loop_vinfo;
2611 if (vect_print_dump_info (REPORT_DETAILS))
2612 fprintf (vect_dump, "===== analyze_loop_nest =====");
2614 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2616 loop_vinfo = vect_analyze_loop_form (loop);
2617 if (!loop_vinfo)
2619 if (vect_print_dump_info (REPORT_DETAILS))
2620 fprintf (vect_dump, "bad loop form.");
2621 return NULL;
2624 /* Find all data references in the loop (which correspond to vdefs/vuses)
2625 and analyze their evolution in the loop.
2627 FORNOW: Handle only simple, array references, which
2628 alignment can be forced, and aligned pointer-references. */
2630 ok = vect_analyze_data_refs (loop_vinfo);
2631 if (!ok)
2633 if (vect_print_dump_info (REPORT_DETAILS))
2634 fprintf (vect_dump, "bad data references.");
2635 destroy_loop_vec_info (loop_vinfo);
2636 return NULL;
2639 /* Classify all cross-iteration scalar data-flow cycles.
2640 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2642 vect_analyze_scalar_cycles (loop_vinfo);
2644 vect_pattern_recog (loop_vinfo);
2646 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2648 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2649 if (!ok)
2651 if (vect_print_dump_info (REPORT_DETAILS))
2652 fprintf (vect_dump, "unexpected pattern.");
2653 destroy_loop_vec_info (loop_vinfo);
2654 return NULL;
2657 /* Analyze the alignment of the data-refs in the loop.
2658 Fail if a data reference is found that cannot be vectorized. */
2660 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2661 if (!ok)
2663 if (vect_print_dump_info (REPORT_DETAILS))
2664 fprintf (vect_dump, "bad data alignment.");
2665 destroy_loop_vec_info (loop_vinfo);
2666 return NULL;
2669 ok = vect_determine_vectorization_factor (loop_vinfo);
2670 if (!ok)
2672 if (vect_print_dump_info (REPORT_DETAILS))
2673 fprintf (vect_dump, "can't determine vectorization factor.");
2674 destroy_loop_vec_info (loop_vinfo);
2675 return NULL;
2678 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2679 if (!ok)
2681 if (vect_print_dump_info (REPORT_DETAILS))
2682 fprintf (vect_dump, "bad data dependence.");
2683 destroy_loop_vec_info (loop_vinfo);
2684 return NULL;
2687 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2688 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2690 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2691 if (!ok)
2693 if (vect_print_dump_info (REPORT_DETAILS))
2694 fprintf (vect_dump, "bad data access.");
2695 destroy_loop_vec_info (loop_vinfo);
2696 return NULL;
2699 /* This pass will decide on using loop versioning and/or loop peeling in
2700 order to enhance the alignment of data references in the loop. */
2702 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2703 if (!ok)
2705 if (vect_print_dump_info (REPORT_DETAILS))
2706 fprintf (vect_dump, "bad data alignment.");
2707 destroy_loop_vec_info (loop_vinfo);
2708 return NULL;
2711 /* Scan all the operations in the loop and make sure they are
2712 vectorizable. */
2714 ok = vect_analyze_operations (loop_vinfo);
2715 if (!ok)
2717 if (vect_print_dump_info (REPORT_DETAILS))
2718 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2719 destroy_loop_vec_info (loop_vinfo);
2720 return NULL;
2723 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2725 return loop_vinfo;