2004-09-15 Steven G. Kargl <kargls@comcast.net>
[official-gcc.git] / gcc / tree-vectorizer.c
blob54d5d1abefe63957adfaf8f89d5d4b376c9ddecf
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS that are one dimensional
61 arrays which base is an array DECL (not a pointer), and INDIRECT_REFS
62 through pointers; both array and pointer accesses are required to have a
63 simple (consecutive) access pattern.
65 Analysis phase:
66 ===============
67 The driver for the analysis phase is vect_analyze_loop_nest().
68 It applies a set of analyses, some of which rely on the scalar evolution
69 analyzer (scev) developed by Sebastian Pop.
71 During the analysis phase the vectorizer records some information
72 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
73 loop, as well as general information about the loop as a whole, which is
74 recorded in a "loop_vec_info" struct attached to each loop.
76 Transformation phase:
77 =====================
78 The loop transformation phase scans all the stmts in the loop, and
79 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
80 the loop that needs to be vectorized. It insert the vector code sequence
81 just before the scalar stmt S, and records a pointer to the vector code
82 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
83 attached to S). This pointer will be used for the vectorization of following
84 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
85 otherwise, we rely on dead code elimination for removing it.
87 For example, say stmt S1 was vectorized into stmt VS1:
89 VS1: vb = px[i];
90 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
91 S2: a = b;
93 To vectorize stmt S2, the vectorizer first finds the stmt that defines
94 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
95 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
96 resulting sequence would be:
98 VS1: vb = px[i];
99 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 VS2: va = vb;
101 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
103 Operands that are not SSA_NAMEs, are data-refs that appear in
104 load/store operations (like 'x[i]' in S1), and are handled differently.
106 Target modeling:
107 =================
108 Currently the only target specific information that is used is the
109 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
110 support different sizes of vectors, for now will need to specify one value
111 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
113 Since we only vectorize operations which vector form can be
114 expressed using existing tree codes, to verify that an operation is
115 supported, the vectorizer checks the relevant optab at the relevant
116 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
117 the value found is CODE_FOR_nothing, then there's no target support, and
118 we can't vectorize the stmt.
120 For additional information on this project see:
121 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
124 #include "config.h"
125 #include "system.h"
126 #include "coretypes.h"
127 #include "tm.h"
128 #include "errors.h"
129 #include "ggc.h"
130 #include "tree.h"
131 #include "target.h"
133 #include "rtl.h"
134 #include "basic-block.h"
135 #include "diagnostic.h"
136 #include "tree-flow.h"
137 #include "tree-dump.h"
138 #include "timevar.h"
139 #include "cfgloop.h"
140 #include "cfglayout.h"
141 #include "expr.h"
142 #include "optabs.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
149 /* Main analysis functions. */
150 static loop_vec_info vect_analyze_loop (struct loop *);
151 static loop_vec_info vect_analyze_loop_form (struct loop *);
152 static bool vect_analyze_data_refs (loop_vec_info);
153 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
154 static bool vect_analyze_scalar_cycles (loop_vec_info);
155 static bool vect_analyze_data_ref_accesses (loop_vec_info);
156 static bool vect_analyze_data_refs_alignment (loop_vec_info);
157 static void vect_compute_data_refs_alignment (loop_vec_info);
158 static bool vect_analyze_operations (loop_vec_info);
160 /* Main code transformation functions. */
161 static void vect_transform_loop (loop_vec_info, struct loops *);
162 static void vect_transform_loop_bound (loop_vec_info);
163 static bool vect_transform_stmt (tree, block_stmt_iterator *);
164 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
165 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
166 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
167 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
168 static void vect_align_data_ref (tree);
169 static void vect_enhance_data_refs_alignment (loop_vec_info);
171 /* Utility functions for the analyses. */
172 static bool vect_is_simple_use (tree , struct loop *, tree *);
173 static bool exist_non_indexing_operands_for_use_p (tree, tree);
174 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
175 static void vect_mark_relevant (varray_type, tree);
176 static bool vect_stmt_relevant_p (tree, loop_vec_info);
177 static tree vect_get_loop_niters (struct loop *, HOST_WIDE_INT *);
178 static void vect_compute_data_ref_alignment
179 (struct data_reference *, loop_vec_info);
180 static bool vect_analyze_data_ref_access (struct data_reference *);
181 static bool vect_get_first_index (tree, tree *);
182 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
183 static tree vect_get_base_decl_and_bit_offset (tree, tree *);
184 static struct data_reference * vect_analyze_pointer_ref_access (tree, tree, bool);
186 /* Utility functions for the code transformation. */
187 static tree vect_create_destination_var (tree, tree);
188 static tree vect_create_data_ref (tree, block_stmt_iterator *);
189 static tree vect_create_index_for_array_ref (tree, block_stmt_iterator *);
190 static tree get_vectype_for_scalar_type (tree);
191 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
192 static tree vect_get_vec_def_for_operand (tree, tree);
193 static tree vect_init_vector (tree, tree);
194 static void vect_finish_stmt_generation
195 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
197 /* Utilities for creation and deletion of vec_info structs. */
198 loop_vec_info new_loop_vec_info (struct loop *loop);
199 void destroy_loop_vec_info (loop_vec_info);
200 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
202 static bool vect_debug_stats (struct loop *loop);
203 static bool vect_debug_details (struct loop *loop);
206 /* Function new_stmt_vec_info.
208 Create and initialize a new stmt_vec_info struct for STMT. */
210 stmt_vec_info
211 new_stmt_vec_info (tree stmt, struct loop *loop)
213 stmt_vec_info res;
214 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
216 STMT_VINFO_TYPE (res) = undef_vec_info_type;
217 STMT_VINFO_STMT (res) = stmt;
218 STMT_VINFO_LOOP (res) = loop;
219 STMT_VINFO_RELEVANT_P (res) = 0;
220 STMT_VINFO_VECTYPE (res) = NULL;
221 STMT_VINFO_VEC_STMT (res) = NULL;
222 STMT_VINFO_DATA_REF (res) = NULL;
223 STMT_VINFO_MEMTAG (res) = NULL;
225 return res;
229 /* Function new_loop_vec_info.
231 Create and initialize a new loop_vec_info struct for LOOP, as well as
232 stmt_vec_info structs for all the stmts in LOOP. */
234 loop_vec_info
235 new_loop_vec_info (struct loop *loop)
237 loop_vec_info res;
238 basic_block *bbs;
239 block_stmt_iterator si;
240 unsigned int i;
242 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
244 bbs = get_loop_body (loop);
246 /* Create stmt_info for all stmts in the loop. */
247 for (i = 0; i < loop->num_nodes; i++)
249 basic_block bb = bbs[i];
250 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
252 tree stmt = bsi_stmt (si);
253 stmt_ann_t ann;
255 get_stmt_operands (stmt);
256 ann = stmt_ann (stmt);
257 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
261 LOOP_VINFO_LOOP (res) = loop;
262 LOOP_VINFO_BBS (res) = bbs;
263 LOOP_VINFO_EXIT_COND (res) = NULL;
264 LOOP_VINFO_NITERS (res) = -1;
265 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
266 LOOP_VINFO_VECT_FACTOR (res) = 0;
267 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
268 "loop_write_datarefs");
269 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
270 "loop_read_datarefs");
271 return res;
275 /* Function destroy_loop_vec_info.
277 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
278 stmts in the loop. */
280 void
281 destroy_loop_vec_info (loop_vec_info loop_vinfo)
283 struct loop *loop;
284 basic_block *bbs;
285 int nbbs;
286 block_stmt_iterator si;
287 int j;
289 if (!loop_vinfo)
290 return;
292 loop = LOOP_VINFO_LOOP (loop_vinfo);
294 bbs = LOOP_VINFO_BBS (loop_vinfo);
295 nbbs = loop->num_nodes;
297 for (j = 0; j < nbbs; j++)
299 basic_block bb = bbs[j];
300 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
302 tree stmt = bsi_stmt (si);
303 stmt_ann_t ann = stmt_ann (stmt);
304 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
305 free (stmt_info);
306 set_stmt_info (ann, NULL);
310 free (LOOP_VINFO_BBS (loop_vinfo));
311 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
312 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
314 free (loop_vinfo);
318 /* Function debug_loop_stats.
320 For vectorization statistics dumps. */
322 static bool
323 vect_debug_stats (struct loop *loop)
325 basic_block bb;
326 block_stmt_iterator si;
327 tree node = NULL_TREE;
329 if (!dump_file || !(dump_flags & TDF_STATS))
330 return false;
332 if (!loop)
334 fprintf (dump_file, "\n");
335 return true;
338 if (!loop->header)
339 return false;
341 bb = loop->header;
343 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
345 node = bsi_stmt (si);
346 if (node && EXPR_P (node) && EXPR_LOCUS (node))
347 break;
350 if (node && EXPR_P (node) && EXPR_LOCUS (node)
351 && EXPR_FILENAME (node) && EXPR_LINENO (node))
353 fprintf (dump_file, "\nloop at %s:%d: ",
354 EXPR_FILENAME (node), EXPR_LINENO (node));
355 return true;
358 return false;
362 /* Function debug_loop_details.
364 For vectorization debug dumps. */
366 static bool
367 vect_debug_details (struct loop *loop)
369 basic_block bb;
370 block_stmt_iterator si;
371 tree node = NULL_TREE;
373 if (!dump_file || !(dump_flags & TDF_DETAILS))
374 return false;
376 if (!loop)
378 fprintf (dump_file, "\n");
379 return true;
382 if (!loop->header)
383 return false;
385 bb = loop->header;
387 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
389 node = bsi_stmt (si);
390 if (node && EXPR_P (node) && EXPR_LOCUS (node))
391 break;
394 if (node && EXPR_P (node) && EXPR_LOCUS (node)
395 && EXPR_FILENAME (node) && EXPR_LINENO (node))
397 fprintf (dump_file, "\nloop at %s:%d: ",
398 EXPR_FILENAME (node), EXPR_LINENO (node));
399 return true;
402 return false;
405 /* Function vect_get_base_decl_and_bit_offset
407 Get the decl from which the data reference REF is based,
408 and compute the OFFSET from it in bits on the way.
409 FORNOW: Handle only component-refs that consist of
410 VAR_DECLs (no ARRAY_REF or INDIRECT_REF). */
412 static tree
413 vect_get_base_decl_and_bit_offset (tree ref, tree *offset)
415 tree decl;
416 if (TREE_CODE (ref) == VAR_DECL)
417 return ref;
419 if (TREE_CODE (ref) == COMPONENT_REF)
421 tree this_offset;
422 tree oprnd0 = TREE_OPERAND (ref, 0);
423 tree oprnd1 = TREE_OPERAND (ref, 1);
425 this_offset = bit_position (oprnd1);
426 if (!host_integerp (this_offset,1))
427 return NULL_TREE;
429 decl = vect_get_base_decl_and_bit_offset (oprnd0, offset);
431 if (decl)
433 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
435 if (!host_integerp (*offset,1) || TREE_OVERFLOW (*offset))
436 return NULL_TREE;
438 if (vect_debug_details (NULL))
440 print_generic_expr (dump_file, ref, TDF_SLIM);
441 fprintf (dump_file, " --> total offset for ref: ");
442 print_generic_expr (dump_file, *offset, TDF_SLIM);
446 return decl;
449 /* TODO: extend to handle more cases. */
450 return NULL_TREE;
454 /* Function vect_force_dr_alignment_p.
456 Returns whether the alignment of a DECL can be forced to be aligned
457 on ALIGNMENT bit boundary. */
459 static bool
460 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
462 if (TREE_CODE (decl) != VAR_DECL)
463 return false;
465 if (DECL_EXTERNAL (decl))
466 return false;
468 if (TREE_STATIC (decl))
469 return (alignment <= MAX_OFILE_ALIGNMENT);
470 else
471 /* This is not 100% correct. The absolute correct stack alignment
472 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
473 PREFERRED_STACK_BOUNDARY is honored by all translation units.
474 However, until someone implements forced stack alignment, SSE
475 isn't really usable without this. */
476 return (alignment <= PREFERRED_STACK_BOUNDARY);
480 /* Function vect_get_new_vect_var.
482 Returns a name for a new variable. The current naming scheme appends the
483 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
484 the name of vectorizer generated variables, and appends that to NAME if
485 provided. */
487 static tree
488 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
490 const char *prefix;
491 int prefix_len;
492 tree new_vect_var;
494 if (var_kind == vect_simple_var)
495 prefix = "vect_";
496 else
497 prefix = "vect_p";
499 prefix_len = strlen (prefix);
501 if (name)
502 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
503 else
504 new_vect_var = create_tmp_var (type, prefix);
506 return new_vect_var;
510 /* Function create_index_for_array_ref.
512 Create (and return) an index variable, along with it's update chain in the
513 loop. This variable will be used to access a memory location in a vector
514 operation.
516 Input:
517 STMT: The stmt that contains a memory data-ref.
518 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
519 function can be added here, or in the loop pre-header.
521 FORNOW: We are only handling array accesses with step 1. */
523 static tree
524 vect_create_index_for_array_ref (tree stmt, block_stmt_iterator *bsi)
526 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
527 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
528 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
529 tree expr = DR_REF (dr);
530 tree access_fn;
531 tree init, step;
532 loop_vec_info loop_info = loop->aux;
533 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_info);
534 tree vf;
535 tree array_first_index;
536 tree indx_before_incr, indx_after_incr;
537 int loopnum = loop->num;
538 bool ok;
539 #ifdef ENABLE_CHECKING
540 varray_type access_fns = DR_ACCESS_FNS (dr);
542 /* FORNOW: handling only one dimensional arrays. */
543 gcc_assert (VARRAY_ACTIVE_SIZE (access_fns) == 1);
544 gcc_assert (vectorization_factor);
545 #endif
547 access_fn = DR_ACCESS_FN (dr, 0);
548 ok = vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, true)
549 && vect_get_first_index (expr, &array_first_index);
551 gcc_assert (ok);
553 /* FORNOW: Handling only constant 'init'. */
554 gcc_assert (TREE_CODE (init) == INTEGER_CST);
556 vf = build_int_cst (unsigned_type_node, vectorization_factor);
558 if (vect_debug_details (NULL))
560 fprintf (dump_file, "int vf = %d",vectorization_factor);
561 fprintf (dump_file, ", vf:");
562 print_generic_expr (dump_file, vf, TDF_SLIM);
563 fprintf (dump_file, ", init:");
564 print_generic_expr (dump_file, init, TDF_SLIM);
565 fprintf (dump_file, ", array_first_index:");
566 print_generic_expr (dump_file, array_first_index, TDF_SLIM);
569 /* Calculate the 'init' of the new index.
570 init = (init - array_first_index) / vectorization_factor */
571 init = int_const_binop (TRUNC_DIV_EXPR,
572 int_const_binop (MINUS_EXPR, init, array_first_index, 1),
573 vf, 1);
575 /* Calculate the 'step' of the new index. FORNOW: always 1. */
576 step = size_one_node;
578 if (vect_debug_details (NULL))
580 fprintf (dump_file, "create iv for (");
581 print_generic_expr (dump_file, init, TDF_SLIM);
582 fprintf (dump_file, ", + ,");
583 print_generic_expr (dump_file, step, TDF_SLIM);
584 fprintf (dump_file, ")");
587 create_iv (init, step, NULL_TREE, loop, bsi, false,
588 &indx_before_incr, &indx_after_incr);
590 return indx_before_incr;
594 /* Function get_vectype_for_scalar_type.
596 Returns the vector type corresponding to SCALAR_TYPE as supported
597 by the target. */
599 static tree
600 get_vectype_for_scalar_type (tree scalar_type)
602 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
603 int nbytes = GET_MODE_SIZE (inner_mode);
604 int nunits;
606 if (nbytes == 0)
607 return NULL_TREE;
609 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
610 is expected. */
611 nunits = UNITS_PER_SIMD_WORD / nbytes;
613 return build_vector_type (scalar_type, nunits);
617 /* Function vect_align_data_ref.
619 Handle mislignment of a memory accesses.
621 FORNOW: Can't handle misaligned accesses.
622 Make sure that the dataref is aligned. */
624 static void
625 vect_align_data_ref (tree stmt)
627 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
628 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
630 /* FORNOW: can't handle misaligned accesses;
631 all accesses expected to be aligned. */
632 gcc_assert (aligned_access_p (dr));
636 /* Function vect_create_data_ref.
638 Create a memory reference expression for vector access, to be used in a
639 vector load/store stmt.
641 Input:
642 STMT: a stmt that references memory. expected to be of the form
643 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
644 BSI: block_stmt_iterator where new stmts can be added.
646 Output:
647 1. Declare a new ptr to vector_type, and have it point to the array base.
648 For example, for vector of type V8HI:
649 v8hi *p0;
650 p0 = (v8hi *)&a;
651 2. Create a data-reference based on the new vector pointer p0, and using
652 a new index variable 'idx'. Return the expression '(*p0)[idx]'.
654 FORNOW: handle only aligned and consecutive accesses. */
656 static tree
657 vect_create_data_ref (tree stmt, block_stmt_iterator *bsi)
659 tree new_base;
660 tree data_ref;
661 tree idx;
662 tree vec_stmt;
663 tree new_temp;
664 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
665 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
666 tree vect_ptr_type;
667 tree vect_ptr;
668 tree addr_ref;
669 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
670 tree array_type;
671 tree base_addr = NULL_TREE;
672 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
673 edge pe;
674 tree tag;
675 tree addr_expr;
676 tree scalar_ptr_type;
677 tree use;
678 ssa_op_iter iter;
680 /* FORNOW: make sure the data reference is aligned. */
681 vect_align_data_ref (stmt);
683 addr_ref = DR_BASE_NAME (dr);
685 array_type = build_array_type (vectype, 0);
686 TYPE_ALIGN (array_type) = TYPE_ALIGN (TREE_TYPE (addr_ref));
687 vect_ptr_type = build_pointer_type (array_type);
688 scalar_ptr_type = build_pointer_type (TREE_TYPE (addr_ref));
690 if (vect_debug_details (NULL))
692 fprintf (dump_file, "create array_ref of type: ");
693 print_generic_expr (dump_file, vectype, TDF_SLIM);
696 /*** create: vectype_array *p; ***/
697 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
698 get_name (addr_ref));
699 add_referenced_tmp_var (vect_ptr);
701 gcc_assert (TREE_CODE (addr_ref) == VAR_DECL
702 || TREE_CODE (addr_ref) == COMPONENT_REF
703 || TREE_CODE (addr_ref) == SSA_NAME);
705 if (vect_debug_details (NULL))
707 if (TREE_CODE (addr_ref) == VAR_DECL)
708 fprintf (dump_file, "vectorizing an array ref: ");
709 else if (TREE_CODE (addr_ref) == SSA_NAME)
710 fprintf (dump_file, "vectorizing a pointer ref: ");
711 else if (TREE_CODE (addr_ref) == COMPONENT_REF)
712 fprintf (dump_file, "vectorizing a record ref: ");
713 print_generic_expr (dump_file, addr_ref, TDF_SLIM);
716 /* Get base address: */
717 if (TREE_CODE (addr_ref) == SSA_NAME)
718 base_addr = addr_ref;
719 else
720 base_addr = build_fold_addr_expr (addr_ref);
722 /* Handle aliasing: */
723 tag = STMT_VINFO_MEMTAG (stmt_info);
724 gcc_assert (tag);
725 get_var_ann (vect_ptr)->type_mem_tag = tag;
727 /* Mark for renaming all aliased variables
728 (i.e, the may-aliases of the type-mem-tag) */
729 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter,
730 (SSA_OP_VIRTUAL_DEFS | SSA_OP_VUSE))
732 if (TREE_CODE (use) == SSA_NAME)
733 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
736 pe = loop_preheader_edge (loop);
738 /*** create: p = (vectype *)&a; ***/
740 /* addr_expr = &a */
741 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
742 get_name (addr_ref));
743 add_referenced_tmp_var (addr_expr);
744 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, base_addr);
745 new_temp = make_ssa_name (addr_expr, vec_stmt);
746 TREE_OPERAND (vec_stmt, 0) = new_temp;
747 bsi_insert_on_edge (pe, vec_stmt);
749 /* vect_ptr = (vectype_array *)&a; */
750 vec_stmt = fold_convert (vect_ptr_type, new_temp);
751 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
752 new_temp = make_ssa_name (vect_ptr, vec_stmt);
753 TREE_OPERAND (vec_stmt, 0) = new_temp;
754 bsi_insert_on_edge (pe, vec_stmt);
756 /*** create data ref: '(*p)[idx]' ***/
758 idx = vect_create_index_for_array_ref (stmt, bsi);
760 new_base = build_fold_indirect_ref (new_temp);
761 data_ref = build4 (ARRAY_REF, vectype, new_base, idx, NULL_TREE, NULL_TREE);
763 if (vect_debug_details (NULL))
765 fprintf (dump_file, "created new data-ref: ");
766 print_generic_expr (dump_file, data_ref, TDF_SLIM);
769 return data_ref;
773 /* Function vect_create_destination_var.
775 Create a new temporary of type VECTYPE. */
777 static tree
778 vect_create_destination_var (tree scalar_dest, tree vectype)
780 tree vec_dest;
781 const char *new_name;
783 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
785 new_name = get_name (scalar_dest);
786 if (!new_name)
787 new_name = "var_";
788 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
789 add_referenced_tmp_var (vec_dest);
791 return vec_dest;
795 /* Function vect_init_vector.
797 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
798 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
799 used in the vectorization of STMT. */
801 static tree
802 vect_init_vector (tree stmt, tree vector_var)
804 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
805 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
806 tree new_var;
807 tree init_stmt;
808 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
809 tree vec_oprnd;
810 edge pe;
811 tree new_temp;
813 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
814 add_referenced_tmp_var (new_var);
816 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
817 new_temp = make_ssa_name (new_var, init_stmt);
818 TREE_OPERAND (init_stmt, 0) = new_temp;
820 pe = loop_preheader_edge (loop);
821 bsi_insert_on_edge (pe, init_stmt);
823 if (vect_debug_details (NULL))
825 fprintf (dump_file, "created new init_stmt: ");
826 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
829 vec_oprnd = TREE_OPERAND (init_stmt, 0);
830 return vec_oprnd;
834 /* Function vect_get_vec_def_for_operand.
836 OP is an operand in STMT. This function returns a (vector) def that will be
837 used in the vectorized stmt for STMT.
839 In the case that OP is an SSA_NAME which is defined in the loop, then
840 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
842 In case OP is an invariant or constant, a new stmt that creates a vector def
843 needs to be introduced. */
845 static tree
846 vect_get_vec_def_for_operand (tree op, tree stmt)
848 tree vec_oprnd;
849 tree vec_stmt;
850 tree def_stmt;
851 stmt_vec_info def_stmt_info = NULL;
852 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
853 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
854 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
855 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
856 basic_block bb;
857 tree vec_inv;
858 tree t = NULL_TREE;
859 tree def;
860 int i;
862 if (vect_debug_details (NULL))
864 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
865 print_generic_expr (dump_file, op, TDF_SLIM);
868 /** ===> Case 1: operand is a constant. **/
870 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
872 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
874 tree vec_cst;
875 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
876 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
877 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
878 tree t = NULL_TREE;
879 int i;
881 /* Build a tree with vector elements. */
882 if (vect_debug_details (NULL))
883 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
885 for (i = nunits - 1; i >= 0; --i)
887 t = tree_cons (NULL_TREE, op, t);
889 vec_cst = build_vector (vectype, t);
890 return vect_init_vector (stmt, vec_cst);
893 gcc_assert (TREE_CODE (op) == SSA_NAME);
895 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
897 def_stmt = SSA_NAME_DEF_STMT (op);
898 def_stmt_info = vinfo_for_stmt (def_stmt);
900 if (vect_debug_details (NULL))
902 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
903 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
907 /** ==> Case 2.1: operand is defined inside the loop. **/
909 if (def_stmt_info)
911 /* Get the def from the vectorized stmt. */
913 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
914 gcc_assert (vec_stmt);
915 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
916 return vec_oprnd;
920 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
921 it is a reduction/induction. **/
923 bb = bb_for_stmt (def_stmt);
924 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
926 if (vect_debug_details (NULL))
927 fprintf (dump_file, "reduction/induction - unsupported.");
928 internal_error ("no support for reduction/induction"); /* FORNOW */
932 /** ==> Case 2.3: operand is defined outside the loop -
933 it is a loop invariant. */
935 switch (TREE_CODE (def_stmt))
937 case PHI_NODE:
938 def = PHI_RESULT (def_stmt);
939 break;
940 case MODIFY_EXPR:
941 def = TREE_OPERAND (def_stmt, 0);
942 break;
943 case NOP_EXPR:
944 def = TREE_OPERAND (def_stmt, 0);
945 gcc_assert (IS_EMPTY_STMT (def_stmt));
946 def = op;
947 break;
948 default:
949 if (vect_debug_details (NULL))
951 fprintf (dump_file, "unsupported defining stmt: ");
952 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
954 internal_error ("unsupported defining stmt");
957 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
959 if (vect_debug_details (NULL))
960 fprintf (dump_file, "Create vector_inv.");
962 for (i = nunits - 1; i >= 0; --i)
964 t = tree_cons (NULL_TREE, def, t);
967 vec_inv = build_constructor (vectype, t);
968 return vect_init_vector (stmt, vec_inv);
972 /* Function vect_finish_stmt_generation.
974 Insert a new stmt. */
976 static void
977 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
979 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
981 if (vect_debug_details (NULL))
983 fprintf (dump_file, "add new stmt: ");
984 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
987 /* Make sure bsi points to the stmt that is being vectorized. */
989 /* Assumption: any stmts created for the vectorization of smtmt S are
990 inserted before S. BSI may point to S or some new stmt before it. */
992 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
993 bsi_next (bsi);
994 gcc_assert (stmt == bsi_stmt (*bsi));
998 /* Function vectorizable_assignment.
1000 Check if STMT performs an assignment (copy) that can be vectorized.
1001 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1002 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1003 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1005 static bool
1006 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1008 tree vec_dest;
1009 tree scalar_dest;
1010 tree op;
1011 tree vec_oprnd;
1012 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1013 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1014 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1015 tree new_temp;
1017 /* Is vectorizable assignment? */
1019 if (TREE_CODE (stmt) != MODIFY_EXPR)
1020 return false;
1022 scalar_dest = TREE_OPERAND (stmt, 0);
1023 if (TREE_CODE (scalar_dest) != SSA_NAME)
1024 return false;
1026 op = TREE_OPERAND (stmt, 1);
1027 if (!vect_is_simple_use (op, loop, NULL))
1029 if (vect_debug_details (NULL))
1030 fprintf (dump_file, "use not simple.");
1031 return false;
1034 if (!vec_stmt) /* transformation not required. */
1036 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1037 return true;
1040 /** Trasform. **/
1041 if (vect_debug_details (NULL))
1042 fprintf (dump_file, "transform assignment.");
1044 /* Handle def. */
1045 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1047 /* Handle use. */
1048 op = TREE_OPERAND (stmt, 1);
1049 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
1051 /* Arguments are ready. create the new vector stmt. */
1052 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1053 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1054 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1055 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1057 return true;
1061 /* Function vectorizable_operation.
1063 Check if STMT performs a binary or unary operation that can be vectorized.
1064 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1065 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1066 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1068 static bool
1069 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1071 tree vec_dest;
1072 tree scalar_dest;
1073 tree operation;
1074 tree op0, op1 = NULL;
1075 tree vec_oprnd0, vec_oprnd1=NULL;
1076 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1077 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1078 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1079 int i;
1080 enum tree_code code;
1081 enum machine_mode vec_mode;
1082 tree new_temp;
1083 int op_type;
1084 tree op;
1085 optab optab;
1087 /* Is STMT a vectorizable binary/unary operation? */
1088 if (TREE_CODE (stmt) != MODIFY_EXPR)
1089 return false;
1091 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1092 return false;
1094 operation = TREE_OPERAND (stmt, 1);
1095 code = TREE_CODE (operation);
1096 optab = optab_for_tree_code (code, vectype);
1098 /* Support only unary or binary operations. */
1099 op_type = TREE_CODE_LENGTH (code);
1100 if (op_type != unary_op && op_type != binary_op)
1102 if (vect_debug_details (NULL))
1103 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
1104 return false;
1107 for (i = 0; i < op_type; i++)
1109 op = TREE_OPERAND (operation, i);
1110 if (!vect_is_simple_use (op, loop, NULL))
1112 if (vect_debug_details (NULL))
1113 fprintf (dump_file, "use not simple.");
1114 return false;
1118 /* Supportable by target? */
1119 if (!optab)
1121 if (vect_debug_details (NULL))
1122 fprintf (dump_file, "no optab.");
1123 return false;
1125 vec_mode = TYPE_MODE (vectype);
1126 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1128 if (vect_debug_details (NULL))
1129 fprintf (dump_file, "op not supported by target.");
1130 return false;
1133 if (!vec_stmt) /* transformation not required. */
1135 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1136 return true;
1139 /** Trasform. **/
1141 if (vect_debug_details (NULL))
1142 fprintf (dump_file, "transform binary/unary operation.");
1144 /* Handle def. */
1145 scalar_dest = TREE_OPERAND (stmt, 0);
1146 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1148 /* Handle uses. */
1149 op0 = TREE_OPERAND (operation, 0);
1150 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
1152 if (op_type == binary_op)
1154 op1 = TREE_OPERAND (operation, 1);
1155 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
1158 /* Arguments are ready. create the new vector stmt. */
1160 if (op_type == binary_op)
1161 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1162 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1163 else
1164 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1165 build1 (code, vectype, vec_oprnd0));
1166 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1167 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1168 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1170 return true;
1174 /* Function vectorizable_store.
1176 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1177 can be vectorized.
1178 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1179 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1180 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1182 static bool
1183 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1185 tree scalar_dest;
1186 tree data_ref;
1187 tree op;
1188 tree vec_oprnd1;
1189 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1190 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1191 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1192 enum machine_mode vec_mode;
1194 /* Is vectorizable store? */
1196 if (TREE_CODE (stmt) != MODIFY_EXPR)
1197 return false;
1199 scalar_dest = TREE_OPERAND (stmt, 0);
1200 if (TREE_CODE (scalar_dest) != ARRAY_REF
1201 && TREE_CODE (scalar_dest) != INDIRECT_REF)
1202 return false;
1204 op = TREE_OPERAND (stmt, 1);
1205 if (!vect_is_simple_use (op, loop, NULL))
1207 if (vect_debug_details (NULL))
1208 fprintf (dump_file, "use not simple.");
1209 return false;
1212 vec_mode = TYPE_MODE (vectype);
1213 /* FORNOW. In some cases can vectorize even if data-type not supported
1214 (e.g. - array initialization with 0). */
1215 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1216 return false;
1218 if (!STMT_VINFO_DATA_REF (stmt_info))
1219 return false;
1221 if (!vec_stmt) /* transformation not required. */
1223 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1224 return true;
1227 /** Trasform. **/
1229 if (vect_debug_details (NULL))
1230 fprintf (dump_file, "transform store");
1232 /* Handle use - get the vectorized def from the defining stmt. */
1233 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
1235 /* Handle def. */
1236 data_ref = vect_create_data_ref (stmt, bsi);
1238 /* Arguments are ready. create the new vector stmt. */
1239 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1240 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1242 return true;
1246 /* vectorizable_load.
1248 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1249 can be vectorized.
1250 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1251 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1252 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1254 static bool
1255 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1257 tree scalar_dest;
1258 tree vec_dest = NULL;
1259 tree data_ref = NULL;
1260 tree op;
1261 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1262 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1263 tree new_temp;
1264 enum machine_mode vec_mode;
1266 /* Is vectorizable load? */
1268 if (TREE_CODE (stmt) != MODIFY_EXPR)
1269 return false;
1271 scalar_dest = TREE_OPERAND (stmt, 0);
1272 if (TREE_CODE (scalar_dest) != SSA_NAME)
1273 return false;
1275 op = TREE_OPERAND (stmt, 1);
1276 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1277 return false;
1279 if (!STMT_VINFO_DATA_REF (stmt_info))
1280 return false;
1282 vec_mode = TYPE_MODE (vectype);
1283 /* FORNOW. In some cases can vectorize even if data-type not supported
1284 (e.g. - data copies). */
1285 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1286 return false;
1288 if (!vec_stmt) /* transformation not required. */
1290 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1291 return true;
1294 /** Trasform. **/
1296 if (vect_debug_details (NULL))
1297 fprintf (dump_file, "transform load.");
1299 /* Handle def. */
1300 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1302 /* Handle use. */
1303 op = TREE_OPERAND (stmt, 1);
1304 data_ref = vect_create_data_ref (stmt, bsi);
1306 /* Arguments are ready. create the new vector stmt. */
1307 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1308 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1309 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1310 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1312 return true;
1316 /* Function vect_transform_stmt.
1318 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1320 static bool
1321 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
1323 bool is_store = false;
1324 tree vec_stmt = NULL_TREE;
1325 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1326 bool done;
1328 switch (STMT_VINFO_TYPE (stmt_info))
1330 case op_vec_info_type:
1331 done = vectorizable_operation (stmt, bsi, &vec_stmt);
1332 gcc_assert (done);
1333 break;
1335 case assignment_vec_info_type:
1336 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
1337 gcc_assert (done);
1338 break;
1340 case load_vec_info_type:
1341 done = vectorizable_load (stmt, bsi, &vec_stmt);
1342 gcc_assert (done);
1343 break;
1345 case store_vec_info_type:
1346 done = vectorizable_store (stmt, bsi, &vec_stmt);
1347 gcc_assert (done);
1348 is_store = true;
1349 break;
1350 default:
1351 if (vect_debug_details (NULL))
1352 fprintf (dump_file, "stmt not supported.");
1353 gcc_unreachable ();
1356 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1358 return is_store;
1362 /* Function vect_transform_loop_bound.
1364 Create a new exit condition for the loop. */
1366 static void
1367 vect_transform_loop_bound (loop_vec_info loop_vinfo)
1369 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1370 edge exit_edge = loop->single_exit;
1371 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
1372 tree indx_before_incr, indx_after_incr;
1373 tree orig_cond_expr;
1374 HOST_WIDE_INT old_N = 0;
1375 int vf;
1376 tree cond_stmt;
1377 tree new_loop_bound;
1378 tree cond;
1379 tree lb_type;
1381 gcc_assert (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1382 old_N = LOOP_VINFO_NITERS (loop_vinfo);
1383 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1385 /* FORNOW:
1386 assuming number-of-iterations divides by the vectorization factor. */
1387 gcc_assert (!(old_N % vf));
1389 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
1390 gcc_assert (orig_cond_expr);
1391 gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
1393 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
1394 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
1396 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
1397 to point to the exit condition. */
1398 bsi_next (&loop_exit_bsi);
1399 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
1401 /* new loop exit test: */
1402 lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
1403 new_loop_bound = build_int_cst (lb_type, old_N/vf);
1405 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
1406 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1407 else /* 'then' edge loops back. */
1408 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1410 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
1411 TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
1413 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
1415 /* remove old loop exit test: */
1416 bsi_remove (&loop_exit_bsi);
1418 if (vect_debug_details (NULL))
1419 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
1423 /* Function vect_transform_loop.
1425 The analysis phase has determined that the loop is vectorizable.
1426 Vectorize the loop - created vectorized stmts to replace the scalar
1427 stmts in the loop, and update the loop exit condition. */
1429 static void
1430 vect_transform_loop (loop_vec_info loop_vinfo,
1431 struct loops *loops ATTRIBUTE_UNUSED)
1433 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1434 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1435 int nbbs = loop->num_nodes;
1436 block_stmt_iterator si;
1437 int i;
1438 #ifdef ENABLE_CHECKING
1439 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1440 #endif
1442 if (vect_debug_details (NULL))
1443 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
1445 /* 1) Make sure the loop header has exactly two entries
1446 2) Make sure we have a preheader basic block. */
1448 gcc_assert (loop->header->pred->pred_next);
1449 gcc_assert (!loop->header->pred->pred_next->pred_next);
1451 loop_split_edge_with (loop_preheader_edge (loop), NULL);
1454 /* FORNOW: the vectorizer supports only loops which body consist
1455 of one basic block (header + empty latch). When the vectorizer will
1456 support more involved loop forms, the order by which the BBs are
1457 traversed need to be reconsidered. */
1459 for (i = 0; i < nbbs; i++)
1461 basic_block bb = bbs[i];
1463 for (si = bsi_start (bb); !bsi_end_p (si);)
1465 tree stmt = bsi_stmt (si);
1466 stmt_vec_info stmt_info;
1467 bool is_store;
1468 #ifdef ENABLE_CHECKING
1469 tree vectype;
1470 #endif
1472 if (vect_debug_details (NULL))
1474 fprintf (dump_file, "------>vectorizing statement: ");
1475 print_generic_expr (dump_file, stmt, TDF_SLIM);
1477 stmt_info = vinfo_for_stmt (stmt);
1478 gcc_assert (stmt_info);
1479 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1481 bsi_next (&si);
1482 continue;
1484 #ifdef ENABLE_CHECKING
1485 /* FORNOW: Verify that all stmts operate on the same number of
1486 units and no inner unrolling is necessary. */
1487 vectype = STMT_VINFO_VECTYPE (stmt_info);
1488 gcc_assert (GET_MODE_NUNITS (TYPE_MODE (vectype))
1489 == vectorization_factor);
1490 #endif
1491 /* -------- vectorize statement ------------ */
1492 if (vect_debug_details (NULL))
1493 fprintf (dump_file, "transform statement.");
1495 is_store = vect_transform_stmt (stmt, &si);
1496 if (is_store)
1498 /* free the attached stmt_vec_info and remove the stmt. */
1499 stmt_ann_t ann = stmt_ann (stmt);
1500 free (stmt_info);
1501 set_stmt_info (ann, NULL);
1502 bsi_remove (&si);
1503 continue;
1506 bsi_next (&si);
1507 } /* stmts in BB */
1508 } /* BBs in loop */
1510 vect_transform_loop_bound (loop_vinfo);
1512 if (vect_debug_details (loop))
1513 fprintf (dump_file,"Success! loop vectorized.");
1514 if (vect_debug_stats (loop))
1515 fprintf (dump_file, "LOOP VECTORIZED.");
1519 /* Function vect_is_simple_use.
1521 Input:
1522 LOOP - the loop that is being vectorized.
1523 OPERAND - operand of a stmt in LOOP.
1524 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1526 Returns whether a stmt with OPERAND can be vectorized.
1527 Supportable operands are constants, loop invariants, and operands that are
1528 defined by the current iteration of the loop. Unsupportable operands are
1529 those that are defined by a previous iteration of the loop (as is the case
1530 in reduction/induction computations). */
1532 static bool
1533 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
1535 tree def_stmt;
1536 basic_block bb;
1538 if (def)
1539 *def = NULL_TREE;
1541 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1542 return true;
1544 if (TREE_CODE (operand) != SSA_NAME)
1545 return false;
1547 def_stmt = SSA_NAME_DEF_STMT (operand);
1548 if (def_stmt == NULL_TREE )
1550 if (vect_debug_details (NULL))
1551 fprintf (dump_file, "no def_stmt.");
1552 return false;
1555 /* empty stmt is expected only in case of a function argument.
1556 (Otherwise - we expect a phi_node or a modify_expr). */
1557 if (IS_EMPTY_STMT (def_stmt))
1559 tree arg = TREE_OPERAND (def_stmt, 0);
1560 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1561 return true;
1562 if (vect_debug_details (NULL))
1564 fprintf (dump_file, "Unexpected empty stmt: ");
1565 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1567 return false;
1570 /* phi_node inside the loop indicates an induction/reduction pattern.
1571 This is not supported yet. */
1572 bb = bb_for_stmt (def_stmt);
1573 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1575 if (vect_debug_details (NULL))
1576 fprintf (dump_file, "reduction/induction - unsupported.");
1577 return false; /* FORNOW: not supported yet. */
1580 /* Expecting a modify_expr or a phi_node. */
1581 if (TREE_CODE (def_stmt) == MODIFY_EXPR
1582 || TREE_CODE (def_stmt) == PHI_NODE)
1584 if (def)
1585 *def = def_stmt;
1586 return true;
1589 return false;
1593 /* Function vect_analyze_operations.
1595 Scan the loop stmts and make sure they are all vectorizable. */
1597 static bool
1598 vect_analyze_operations (loop_vec_info loop_vinfo)
1600 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1601 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1602 int nbbs = loop->num_nodes;
1603 block_stmt_iterator si;
1604 int vectorization_factor = 0;
1605 int i;
1606 bool ok;
1607 tree scalar_type;
1609 if (vect_debug_details (NULL))
1610 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
1612 for (i = 0; i < nbbs; i++)
1614 basic_block bb = bbs[i];
1616 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1618 tree stmt = bsi_stmt (si);
1619 int nunits;
1620 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1621 tree vectype;
1623 if (vect_debug_details (NULL))
1625 fprintf (dump_file, "==> examining statement: ");
1626 print_generic_expr (dump_file, stmt, TDF_SLIM);
1629 gcc_assert (stmt_info);
1631 /* skip stmts which do not need to be vectorized.
1632 this is expected to include:
1633 - the COND_EXPR which is the loop exit condition
1634 - any LABEL_EXPRs in the loop
1635 - computations that are used only for array indexing or loop
1636 control */
1638 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1640 if (vect_debug_details (NULL))
1641 fprintf (dump_file, "irrelevant.");
1642 continue;
1645 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
1647 if (vect_debug_stats (loop) || vect_debug_details (loop))
1649 fprintf (dump_file, "not vectorized: vector stmt in loop:");
1650 print_generic_expr (dump_file, stmt, TDF_SLIM);
1652 return false;
1655 if (STMT_VINFO_DATA_REF (stmt_info))
1656 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
1657 else if (TREE_CODE (stmt) == MODIFY_EXPR)
1658 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
1659 else
1660 scalar_type = TREE_TYPE (stmt);
1662 if (vect_debug_details (NULL))
1664 fprintf (dump_file, "get vectype for scalar type: ");
1665 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1668 vectype = get_vectype_for_scalar_type (scalar_type);
1669 if (!vectype)
1671 if (vect_debug_stats (loop) || vect_debug_details (loop))
1673 fprintf (dump_file, "not vectorized: unsupported data-type ");
1674 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1676 return false;
1679 if (vect_debug_details (NULL))
1681 fprintf (dump_file, "vectype: ");
1682 print_generic_expr (dump_file, vectype, TDF_SLIM);
1684 STMT_VINFO_VECTYPE (stmt_info) = vectype;
1686 ok = (vectorizable_operation (stmt, NULL, NULL)
1687 || vectorizable_assignment (stmt, NULL, NULL)
1688 || vectorizable_load (stmt, NULL, NULL)
1689 || vectorizable_store (stmt, NULL, NULL));
1691 if (!ok)
1693 if (vect_debug_stats (loop) || vect_debug_details (loop))
1695 fprintf (dump_file, "not vectorized: stmt not supported: ");
1696 print_generic_expr (dump_file, stmt, TDF_SLIM);
1698 return false;
1701 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1702 if (vect_debug_details (NULL))
1703 fprintf (dump_file, "nunits = %d", nunits);
1705 if (vectorization_factor)
1707 /* FORNOW: don't allow mixed units.
1708 This restriction will be relaxed in the future. */
1709 if (nunits != vectorization_factor)
1711 if (vect_debug_stats (loop) || vect_debug_details (loop))
1712 fprintf (dump_file, "not vectorized: mixed data-types");
1713 return false;
1716 else
1717 vectorization_factor = nunits;
1721 /* TODO: Analyze cost. Decide if worth while to vectorize. */
1722 if (!vectorization_factor)
1724 if (vect_debug_stats (loop) || vect_debug_details (loop))
1725 fprintf (dump_file, "not vectorized: unsupported data-type");
1726 return false;
1728 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1730 /* FORNOW: handle only cases where the loop bound divides by the
1731 vectorization factor. */
1733 if (vect_debug_details (NULL))
1734 fprintf (dump_file,
1735 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
1736 vectorization_factor, LOOP_VINFO_NITERS (loop_vinfo));
1738 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1740 if (vect_debug_stats (loop) || vect_debug_details (loop))
1741 fprintf (dump_file, "not vectorized: Unknown loop bound.");
1742 return false;
1745 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1746 && LOOP_VINFO_NITERS (loop_vinfo) % vectorization_factor != 0)
1748 if (vect_debug_stats (loop) || vect_debug_details (loop))
1749 fprintf (dump_file, "not vectorized: loop bound doesn't divided by %d.",
1750 vectorization_factor);
1751 return false;
1754 return true;
1758 /* Function exist_non_indexing_operands_for_use_p
1760 USE is one of the uses attached to STMT. Check if USE is
1761 used in STMT for anything other than indexing an array. */
1763 static bool
1764 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
1766 tree operand;
1767 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1769 /* USE corresponds to some operand in STMT. If there is no data
1770 reference in STMT, then any operand that corresponds to USE
1771 is not indexing an array. */
1772 if (!STMT_VINFO_DATA_REF (stmt_info))
1773 return true;
1775 /* STMT has a data_ref. FORNOW this means that its of one of
1776 the following forms:
1777 -1- ARRAY_REF = var
1778 -2- var = ARRAY_REF
1779 (This should have been verified in analyze_data_refs).
1781 'var' in the second case corresponds to a def, not a use,
1782 so USE cannot correspond to any operands that are not used
1783 for array indexing.
1785 Therefore, all we need to check is if STMT falls into the
1786 first case, and whether var corresponds to USE. */
1788 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
1789 return false;
1791 operand = TREE_OPERAND (stmt, 1);
1793 if (TREE_CODE (operand) != SSA_NAME)
1794 return false;
1796 if (operand == use)
1797 return true;
1799 return false;
1803 /* Function vect_is_simple_iv_evolution.
1805 FORNOW: A simple evolution of an induction variables in the loop is
1806 considered a polynomial evolution with constant step. */
1808 static bool
1809 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1810 tree * step, bool strict)
1812 tree init_expr;
1813 tree step_expr;
1815 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1817 /* When there is no evolution in this loop, the evolution function
1818 is not "simple". */
1819 if (evolution_part == NULL_TREE)
1820 return false;
1822 /* When the evolution is a polynomial of degree >= 2
1823 the evolution function is not "simple". */
1824 if (tree_is_chrec (evolution_part))
1825 return false;
1827 step_expr = evolution_part;
1828 init_expr = initial_condition (access_fn);
1830 if (vect_debug_details (NULL))
1832 fprintf (dump_file, "step: ");
1833 print_generic_expr (dump_file, step_expr, TDF_SLIM);
1834 fprintf (dump_file, ", init: ");
1835 print_generic_expr (dump_file, init_expr, TDF_SLIM);
1838 *init = init_expr;
1839 *step = step_expr;
1841 if (TREE_CODE (step_expr) != INTEGER_CST)
1843 if (vect_debug_details (NULL))
1844 fprintf (dump_file, "step unknown.");
1845 return false;
1848 if (strict)
1849 if (!integer_onep (step_expr))
1851 if (vect_debug_details (NULL))
1852 print_generic_expr (dump_file, step_expr, TDF_SLIM);
1853 return false;
1856 return true;
1860 /* Function vect_analyze_scalar_cycles.
1862 Examine the cross iteration def-use cycles of scalar variables, by
1863 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
1864 cycles that they represent do not impede vectorization.
1866 FORNOW: Reduction as in the following loop, is not supported yet:
1867 loop1:
1868 for (i=0; i<N; i++)
1869 sum += a[i];
1870 The cross-iteration cycle corresponding to variable 'sum' will be
1871 considered too complicated and will impede vectorization.
1873 FORNOW: Induction as in the following loop, is not supported yet:
1874 loop2:
1875 for (i=0; i<N; i++)
1876 a[i] = i;
1878 However, the following loop *is* vectorizable:
1879 loop3:
1880 for (i=0; i<N; i++)
1881 a[i] = b[i];
1883 In both loops there exists a def-use cycle for the variable i:
1884 loop: i_2 = PHI (i_0, i_1)
1885 a[i_2] = ...;
1886 i_1 = i_2 + 1;
1887 GOTO loop;
1889 The evolution of the above cycle is considered simple enough,
1890 however, we also check that the cycle does not need to be
1891 vectorized, i.e - we check that the variable that this cycle
1892 defines is only used for array indexing or in stmts that do not
1893 need to be vectorized. This is not the case in loop2, but it
1894 *is* the case in loop3. */
1896 static bool
1897 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
1899 tree phi;
1900 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1901 basic_block bb = loop->header;
1902 tree dummy;
1904 if (vect_debug_details (NULL))
1905 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
1907 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
1909 tree access_fn = NULL;
1911 if (vect_debug_details (NULL))
1913 fprintf (dump_file, "Analyze phi: ");
1914 print_generic_expr (dump_file, phi, TDF_SLIM);
1917 /* Skip virtual phi's. The data dependences that are associated with
1918 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1920 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1922 if (vect_debug_details (NULL))
1923 fprintf (dump_file, "virtual phi. skip.");
1924 continue;
1927 /* Analyze the evolution function. */
1929 /* FORNOW: The only scalar cross-iteration cycles that we allow are
1930 those of loop induction variables; This property is verified here.
1932 Furthermore, if that induction variable is used in an operation
1933 that needs to be vectorized (i.e, is not solely used to index
1934 arrays and check the exit condition) - we do not support its
1935 vectorization yet. This property is verified in vect_is_simple_use,
1936 during vect_analyze_operations. */
1938 access_fn = instantiate_parameters
1939 (loop,
1940 analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1942 if (!access_fn)
1944 if (vect_debug_stats (loop) || vect_debug_details (loop))
1945 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
1946 return false;
1949 if (vect_debug_details (NULL))
1951 fprintf (dump_file, "Access function of PHI: ");
1952 print_generic_expr (dump_file, access_fn, TDF_SLIM);
1955 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
1956 &dummy, false))
1958 if (vect_debug_stats (loop) || vect_debug_details (loop))
1959 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
1960 return false;
1964 return true;
1968 /* Function vect_analyze_data_ref_dependence.
1970 Return TRUE if there (might) exist a dependence between a memory-reference
1971 DRA and a memory-reference DRB. */
1973 static bool
1974 vect_analyze_data_ref_dependence (struct data_reference *dra,
1975 struct data_reference *drb,
1976 struct loop *loop)
1978 bool differ_p;
1979 struct data_dependence_relation *ddr;
1981 if (!array_base_name_differ_p (dra, drb, &differ_p))
1983 if (vect_debug_stats (loop) || vect_debug_details (loop))
1985 fprintf (dump_file,
1986 "not vectorized: can't determine dependence between: ");
1987 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
1988 fprintf (dump_file, " and ");
1989 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
1991 return true;
1994 if (differ_p)
1995 return false;
1997 ddr = initialize_data_dependence_relation (dra, drb);
1998 compute_affine_dependence (ddr);
2000 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2001 return false;
2003 if (vect_debug_stats (loop) || vect_debug_details (loop))
2005 fprintf (dump_file,
2006 "not vectorized: possible dependence between data-refs ");
2007 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
2008 fprintf (dump_file, " and ");
2009 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
2012 return true;
2016 /* Function vect_analyze_data_ref_dependences.
2018 Examine all the data references in the loop, and make sure there do not
2019 exist any data dependences between them.
2021 TODO: dependences which distance is greater than the vectorization factor
2022 can be ignored. */
2024 static bool
2025 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
2027 unsigned int i, j;
2028 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2029 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2030 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2032 /* Examine store-store (output) dependences. */
2034 if (vect_debug_details (NULL))
2035 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
2037 if (vect_debug_details (NULL))
2038 fprintf (dump_file, "compare all store-store pairs.");
2040 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
2042 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2044 struct data_reference *dra =
2045 VARRAY_GENERIC_PTR (loop_write_refs, i);
2046 struct data_reference *drb =
2047 VARRAY_GENERIC_PTR (loop_write_refs, j);
2048 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2049 return false;
2053 /* Examine load-store (true/anti) dependences. */
2055 if (vect_debug_details (NULL))
2056 fprintf (dump_file, "compare all load-store pairs.");
2058 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
2060 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2062 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
2063 struct data_reference *drb =
2064 VARRAY_GENERIC_PTR (loop_write_refs, j);
2065 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2066 return false;
2070 return true;
2074 /* Function vect_get_first_index.
2076 REF is a data reference.
2077 If it is an ARRAY_REF: if its lower bound is simple enough,
2078 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
2079 If it is not an ARRAY_REF: REF has no "first index";
2080 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
2082 static bool
2083 vect_get_first_index (tree ref, tree *array_first_index)
2085 tree array_start;
2087 if (TREE_CODE (ref) != ARRAY_REF)
2088 *array_first_index = size_zero_node;
2089 else
2091 array_start = array_ref_low_bound (ref);
2092 if (!host_integerp (array_start,0))
2094 if (vect_debug_details (NULL))
2096 fprintf (dump_file, "array min val not simple integer cst.");
2097 print_generic_expr (dump_file, array_start, TDF_DETAILS);
2099 return false;
2101 *array_first_index = array_start;
2104 return true;
2108 /* Function vect_compute_data_ref_alignment
2110 Compute the misalignment of the data reference DR.
2112 FOR NOW: No analysis is actually performed. Misalignment is calculated
2113 only for trivial cases. TODO. */
2115 static void
2116 vect_compute_data_ref_alignment (struct data_reference *dr,
2117 loop_vec_info loop_vinfo ATTRIBUTE_UNUSED)
2119 tree stmt = DR_STMT (dr);
2120 tree ref = DR_REF (dr);
2121 tree vectype;
2122 tree access_fn = DR_ACCESS_FN (dr, 0); /* FORNOW: single access_fn. */
2123 tree init;
2124 tree scalar_type;
2125 tree misalign;
2126 tree array_first_index;
2127 tree array_base = DR_BASE_NAME (dr);
2128 tree base_decl = NULL_TREE;
2129 tree bit_offset = size_zero_node;
2130 tree offset = size_zero_node;
2131 tree unit_bits = build_int_cst (unsigned_type_node, BITS_PER_UNIT);
2132 tree nunits;
2133 tree alignment;
2135 if (vect_debug_details (NULL))
2136 fprintf (dump_file, "vect_compute_data_ref_alignment:");
2138 /* Initialize misalignment to unknown. */
2139 DR_MISALIGNMENT (dr) = -1;
2141 scalar_type = TREE_TYPE (ref);
2142 vectype = get_vectype_for_scalar_type (scalar_type);
2143 if (!vectype)
2145 if (vect_debug_details (NULL))
2147 fprintf (dump_file, "no vectype for stmt: ");
2148 print_generic_expr (dump_file, stmt, TDF_SLIM);
2149 fprintf (dump_file, "scalar_type: ");
2150 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
2152 return;
2155 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (array_base))) < TYPE_ALIGN (vectype))
2157 base_decl = vect_get_base_decl_and_bit_offset (array_base, &bit_offset);
2158 if (!base_decl)
2160 if (vect_debug_details (NULL))
2161 fprintf (dump_file, "Unknown alignment for access");
2162 return;
2165 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
2166 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
2167 if (!integer_zerop (bit_offset))
2169 if (vect_debug_details (NULL))
2171 fprintf (dump_file, "bit offset alignment: ");
2172 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
2174 return;
2177 if (!base_decl ||
2178 (DECL_ALIGN (base_decl) < TYPE_ALIGN (vectype)
2179 && !vect_can_force_dr_alignment_p (base_decl, TYPE_ALIGN (vectype))))
2181 if (vect_debug_details (NULL))
2183 fprintf (dump_file, "can't force alignment of ref: ");
2184 print_generic_expr (dump_file, array_base, TDF_SLIM);
2186 return;
2189 if (DECL_ALIGN (base_decl) < TYPE_ALIGN (vectype))
2191 /* Force the alignment of the decl.
2192 NOTE: This is the only change to the code we make during
2193 the analysis phase, before deciding to vectorize the loop. */
2194 if (vect_debug_details (NULL))
2195 fprintf (dump_file, "force alignment");
2196 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
2197 DECL_USER_ALIGN (base_decl) = TYPE_ALIGN (vectype);
2201 /* The misalignement is:
2202 (base_alignment + offset + index_access_fn_init) % alignment.
2203 At this point we already guaranteed that base_alignment == 0,
2204 and computed the offset.
2205 It remains to check the first index accessed. */
2207 if (!vect_get_first_index (ref, &array_first_index))
2209 if (vect_debug_details (NULL))
2210 fprintf (dump_file, "no first_index for array.");
2211 return;
2214 /* Check the index of the array_ref. */
2216 init = initial_condition (access_fn);
2218 /* FORNOW: In order to simplify the handling of alignment, we make sure
2219 that the first location at which the array is accessed ('init') is on an
2220 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
2221 This is too conservative, since we require that
2222 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
2223 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
2224 This should be relaxed in the future. */
2226 if (!init || !host_integerp (init,0))
2228 if (vect_debug_details (NULL))
2229 fprintf (dump_file, "init not simple INTEGER_CST.");
2230 return;
2233 /* alignment required, in bytes: */
2234 alignment = build_int_cst (unsigned_type_node,
2235 TYPE_ALIGN (vectype)/BITS_PER_UNIT);
2236 /* bytes per scalar element: */
2237 nunits = build_int_cst (unsigned_type_node,
2238 GET_MODE_SIZE (TYPE_MODE (scalar_type)));
2240 /* misalign = (offset + (init-array_first_index)*nunits) % alignment */
2241 if (vect_debug_details (NULL))
2243 fprintf (dump_file, "misalign = ( offset <");
2244 print_generic_expr (dump_file, offset, TDF_SLIM);
2245 fprintf (dump_file, "> + (init <");
2246 print_generic_expr (dump_file, init, TDF_SLIM);
2247 fprintf (dump_file, "> - first_indx <");
2248 print_generic_expr (dump_file, array_first_index, TDF_SLIM);
2249 fprintf (dump_file, ">) * nunits <");
2250 print_generic_expr (dump_file, nunits, TDF_SLIM);
2251 fprintf (dump_file, ">) mod alignment <");
2252 print_generic_expr (dump_file, alignment, TDF_SLIM);
2253 fprintf (dump_file, ">");
2256 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
2257 misalign = int_const_binop (MULT_EXPR, misalign, nunits, 0);
2258 misalign = int_const_binop (PLUS_EXPR, misalign, offset, 0);
2259 misalign = int_const_binop (TRUNC_MOD_EXPR, misalign, alignment, 0);
2261 if (vect_debug_details (NULL))
2263 fprintf (dump_file, "misalign = ");
2264 print_generic_expr (dump_file, misalign, TDF_SLIM);
2267 if (!host_integerp (misalign,1) || TREE_OVERFLOW (misalign))
2269 if (vect_debug_details (NULL))
2270 fprintf (dump_file, "unexpected misalign value");
2271 return;
2274 DR_MISALIGNMENT (dr) = tree_low_cst (misalign,1);
2276 if (vect_debug_details (NULL))
2277 fprintf (dump_file, "misalign = %d",DR_MISALIGNMENT (dr));
2281 /* Function vect_compute_data_refs_alignment
2283 Compute the misalignment of data references in the loop.
2284 This pass may take place at function granularity instead of at loop
2285 granularity.
2287 FOR NOW: No analysis is actually performed. Misalignment is calculated
2288 only for trivial cases. TODO. */
2290 static void
2291 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
2293 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2294 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2295 unsigned int i;
2297 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2299 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2300 vect_compute_data_ref_alignment (dr, loop_vinfo);
2303 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2305 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2306 vect_compute_data_ref_alignment (dr, loop_vinfo);
2311 /* Function vect_enhance_data_refs_alignment
2313 This pass will use loop versioning and loop peeling in order to enhance
2314 the alignment of data references in the loop.
2316 FOR NOW: we assume that whatever versioning/peeling takes place, only the
2317 original loop is to be vectorized; Any other loops that are created by
2318 the transformations performed in this pass - are not supposed to be
2319 vectorized. This restriction will be relaxed.
2321 FOR NOW: No transformation is actually performed. TODO. */
2323 static void
2324 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo ATTRIBUTE_UNUSED)
2327 This pass will require a cost model to guide it whether to apply peeling
2328 or versioning or a combination of the two. For example, the scheme that
2329 intel uses when given a loop with several memory accesses, is as follows:
2330 choose one memory access ('p') which alignment you want to force by doing
2331 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
2332 other accesses are not necessarily aligned, or (2) use loop versioning to
2333 generate one loop in which all accesses are aligned, and another loop in
2334 which only 'p' is necessarily aligned.
2336 ("Automatic Intra-Register Vectorization for the Intel Architecture",
2337 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
2338 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
2340 Devising a cost model is the most critical aspect of this work. It will
2341 guide us on which access to peel for, whether to use loop versioning, how
2342 many versions to create, etc. The cost model will probably consist of
2343 generic considerations as well as target specific considerations (on
2344 powerpc for example, misaligned stores are more painful than misaligned
2345 loads).
2347 Here is the general steps involved in alignment enhancements:
2349 -- original loop, before alignment analysis:
2350 for (i=0; i<N; i++){
2351 x = q[i]; # DR_MISALIGNMENT(q) = unknown
2352 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2355 -- After vect_compute_data_refs_alignment:
2356 for (i=0; i<N; i++){
2357 x = q[i]; # DR_MISALIGNMENT(q) = 3
2358 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2361 -- Possibility 1: we do loop versioning:
2362 if (p is aligned) {
2363 for (i=0; i<N; i++){ # loop 1A
2364 x = q[i]; # DR_MISALIGNMENT(q) = 3
2365 p[i] = y; # DR_MISALIGNMENT(p) = 0
2368 else {
2369 for (i=0; i<N; i++){ # loop 1B
2370 x = q[i]; # DR_MISALIGNMENT(q) = 3
2371 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2375 -- Possibility 2: we do loop peeling:
2376 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2377 x = q[i];
2378 p[i] = y;
2380 for (i = 3; i < N; i++){ # loop 2A
2381 x = q[i]; # DR_MISALIGNMENT(q) = 0
2382 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2385 -- Possibility 3: combination of loop peeling and versioning:
2386 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2387 x = q[i];
2388 p[i] = y;
2390 if (p is aligned) {
2391 for (i = 3; i<N; i++){ # loop 3A
2392 x = q[i]; # DR_MISALIGNMENT(q) = 0
2393 p[i] = y; # DR_MISALIGNMENT(p) = 0
2396 else {
2397 for (i = 3; i<N; i++){ # loop 3B
2398 x = q[i]; # DR_MISALIGNMENT(q) = 0
2399 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2403 These loops are later passed to loop_transform to be vectorized. The
2404 vectorizer will use the alignment information to guide the transformation
2405 (whether to generate regular loads/stores, or with special handling for
2406 misalignment).
2411 /* Function vect_analyze_data_refs_alignment
2413 Analyze the alignment of the data-references in the loop.
2414 FOR NOW: Until support for misliagned accesses is in place, only if all
2415 accesses are aligned can the loop be vectorized. This restriction will be
2416 relaxed. */
2418 static bool
2419 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2421 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2422 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2423 unsigned int i;
2425 if (vect_debug_details (NULL))
2426 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
2429 /* This pass may take place at function granularity instead of at loop
2430 granularity. */
2432 vect_compute_data_refs_alignment (loop_vinfo);
2435 /* This pass will use loop versioning and loop peeling in order to enhance
2436 the alignment of data references in the loop.
2437 FOR NOW: we assume that whatever versioning/peeling took place, the
2438 original loop is to be vectorized. Any other loops that were created by
2439 the transformations performed in this pass - are not supposed to be
2440 vectorized. This restriction will be relaxed. */
2442 vect_enhance_data_refs_alignment (loop_vinfo);
2445 /* Finally, check that loop can be vectorized.
2446 FOR NOW: Until support for misaligned accesses is in place, only if all
2447 accesses are aligned can the loop be vectorized. This restriction will be
2448 relaxed. */
2450 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2452 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2453 if (!aligned_access_p (dr))
2455 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
2456 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
2457 fprintf (dump_file, "not vectorized: unaligned store.");
2458 return false;
2462 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2464 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2465 if (!aligned_access_p (dr))
2467 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
2468 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
2469 fprintf (dump_file, "not vectorized: unaligned load.");
2470 return false;
2474 return true;
2478 /* Function vect_analyze_data_ref_access.
2480 Analyze the access pattern of the data-reference DR. For now, a data access
2481 has to consecutive and aligned to be considered vectorizable. */
2483 static bool
2484 vect_analyze_data_ref_access (struct data_reference *dr)
2486 varray_type access_fns = DR_ACCESS_FNS (dr);
2487 tree access_fn;
2488 tree init, step;
2490 /* FORNOW: handle only one dimensional arrays.
2491 This restriction will be relaxed in the future. */
2492 if (VARRAY_ACTIVE_SIZE (access_fns) != 1)
2494 if (vect_debug_details (NULL))
2495 fprintf (dump_file, "multi dimensional array reference.");
2496 return false;
2498 access_fn = DR_ACCESS_FN (dr, 0);
2500 if (!vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
2501 access_fn, &init, &step, true))
2503 if (vect_debug_details (NULL))
2505 fprintf (dump_file, "too complicated access function.");
2506 print_generic_expr (dump_file, access_fn, TDF_SLIM);
2508 return false;
2511 return true;
2515 /* Function vect_analyze_data_ref_accesses.
2517 Analyze the access pattern of all the data references in the loop.
2519 FORNOW: the only access pattern that is considered vectorizable is a
2520 simple step 1 (consecutive) access.
2522 FORNOW: handle only one dimensional arrays, and pointer accesses. */
2524 static bool
2525 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2527 unsigned int i;
2528 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2529 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2531 if (vect_debug_details (NULL))
2532 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
2534 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2536 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2537 bool ok = vect_analyze_data_ref_access (dr);
2538 if (!ok)
2540 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
2541 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
2542 fprintf (dump_file, "not vectorized: complicated access pattern.");
2543 return false;
2547 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2549 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2550 bool ok = vect_analyze_data_ref_access (dr);
2551 if (!ok)
2553 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
2554 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
2555 fprintf (dump_file, "not vectorized: complicated access pattern.");
2556 return false;
2560 return true;
2564 /* Function vect_analyze_pointer_ref_access.
2566 Input:
2567 STMT - a stmt that contains a data-ref
2568 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
2570 If the data-ref access is vectorizable, return a data_reference structure
2571 that represents it (DR). Otherwise - return NULL. */
2573 static struct data_reference *
2574 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
2576 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2577 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2578 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
2579 tree init, step;
2580 int step_val;
2581 tree reftype, innertype;
2582 enum machine_mode innermode;
2583 tree indx_access_fn;
2584 int loopnum = loop->num;
2585 struct data_reference *dr;
2587 if (!access_fn)
2589 if (vect_debug_stats (loop) || vect_debug_details (loop))
2590 fprintf (dump_file, "not vectorized: complicated pointer access.");
2591 return NULL;
2594 if (vect_debug_details (NULL))
2596 fprintf (dump_file, "Access function of ptr: ");
2597 print_generic_expr (dump_file, access_fn, TDF_SLIM);
2600 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
2602 if (vect_debug_stats (loop) || vect_debug_details (loop))
2603 fprintf (dump_file, "not vectorized: pointer access is not simple.");
2604 return NULL;
2607 if (TREE_CODE (init) != SSA_NAME /* FORNOW */
2608 || !host_integerp (step,0))
2610 if (vect_debug_stats (loop) || vect_debug_details (loop))
2611 fprintf (dump_file,
2612 "not vectorized: non constant init/step for pointer access.");
2613 return NULL;
2616 step_val = TREE_INT_CST_LOW (step);
2618 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
2619 if (TREE_CODE (reftype) != POINTER_TYPE)
2621 if (vect_debug_stats (loop) || vect_debug_details (loop))
2622 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
2623 return NULL;
2626 reftype = TREE_TYPE (init);
2627 if (TREE_CODE (reftype) != POINTER_TYPE)
2629 if (vect_debug_stats (loop) || vect_debug_details (loop))
2630 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
2631 return NULL;
2634 innertype = TREE_TYPE (reftype);
2635 innermode = TYPE_MODE (innertype);
2636 if (GET_MODE_SIZE (innermode) != step_val)
2638 /* FORNOW: support only consecutive access */
2639 if (vect_debug_stats (loop) || vect_debug_details (loop))
2640 fprintf (dump_file, "not vectorized: non consecutive access.");
2641 return NULL;
2644 indx_access_fn =
2645 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
2646 if (vect_debug_details (NULL))
2648 fprintf (dump_file, "Access function of ptr indx: ");
2649 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
2651 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
2652 return dr;
2656 /* Function vect_analyze_data_refs.
2658 Find all the data references in the loop.
2660 FORNOW: Handle aligned INDIRECT_REFs and one dimensional ARRAY_REFs
2661 which base is really an array (not a pointer) and which alignment
2662 can be forced. This restriction will be relaxed. */
2664 static bool
2665 vect_analyze_data_refs (loop_vec_info loop_vinfo)
2667 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2668 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2669 int nbbs = loop->num_nodes;
2670 block_stmt_iterator si;
2671 int j;
2672 struct data_reference *dr;
2674 if (vect_debug_details (NULL))
2675 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
2677 for (j = 0; j < nbbs; j++)
2679 basic_block bb = bbs[j];
2680 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2682 bool is_read = false;
2683 tree stmt = bsi_stmt (si);
2684 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2685 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2686 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2687 vuse_optype vuses = STMT_VUSE_OPS (stmt);
2688 varray_type *datarefs = NULL;
2689 int nvuses, nv_may_defs, nv_must_defs;
2690 tree memref = NULL;
2691 tree array_base;
2692 tree symbl;
2694 /* Assumption: there exists a data-ref in stmt, if and only if
2695 it has vuses/vdefs. */
2697 if (!vuses && !v_may_defs && !v_must_defs)
2698 continue;
2700 nvuses = NUM_VUSES (vuses);
2701 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
2702 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
2704 if (nvuses && (nv_may_defs || nv_must_defs))
2706 if (vect_debug_details (NULL))
2708 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
2709 print_generic_expr (dump_file, stmt, TDF_SLIM);
2711 return false;
2714 if (TREE_CODE (stmt) != MODIFY_EXPR)
2716 if (vect_debug_details (NULL))
2718 fprintf (dump_file, "unexpected vops in stmt: ");
2719 print_generic_expr (dump_file, stmt, TDF_SLIM);
2721 return false;
2724 if (vuses)
2726 memref = TREE_OPERAND (stmt, 1);
2727 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
2728 is_read = true;
2730 else /* vdefs */
2732 memref = TREE_OPERAND (stmt, 0);
2733 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
2734 is_read = false;
2737 if (TREE_CODE (memref) == INDIRECT_REF)
2739 dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
2740 if (! dr)
2741 return false;
2742 symbl = DR_BASE_NAME (dr);
2744 else if (TREE_CODE (memref) == ARRAY_REF)
2746 tree base;
2747 tree offset = size_zero_node;
2748 array_base = TREE_OPERAND (memref, 0);
2750 /* FORNOW: make sure that the array is one dimensional.
2751 This restriction will be relaxed in the future. */
2752 if (TREE_CODE (array_base) == ARRAY_REF)
2754 if (vect_debug_stats (loop) || vect_debug_details (loop))
2756 fprintf (dump_file,
2757 "not vectorized: multi-dimensional array.");
2758 print_generic_expr (dump_file, stmt, TDF_SLIM);
2760 return false;
2763 dr = analyze_array (stmt, memref, is_read);
2765 /* Find the relevant symbol for aliasing purposes. */
2766 base = DR_BASE_NAME (dr);
2767 switch (TREE_CODE (base))
2769 case VAR_DECL:
2770 symbl = base;
2771 break;
2772 /* FORNOW: Disabled.
2773 case INDIRECT_REF:
2774 symbl = TREE_OPERAND (base, 0);
2775 break;
2777 case COMPONENT_REF:
2778 /* CHECKME: could have recorded more accurate information -
2779 i.e, the actual FIELD_DECL that is being referenced -
2780 but later passes expect VAR_DECL as the nmt. */
2781 symbl = vect_get_base_decl_and_bit_offset (base, &offset);
2782 if (symbl)
2783 break;
2784 /* fall through */
2785 default:
2786 if (vect_debug_stats (loop) || vect_debug_details (loop))
2788 fprintf (dump_file,
2789 "not vectorized: unhandled struct/class field access ");
2790 print_generic_expr (dump_file, stmt, TDF_SLIM);
2792 return false;
2793 } /* switch */
2795 else
2797 if (vect_debug_stats (loop) || vect_debug_details (loop))
2799 fprintf (dump_file, "not vectorized: unhandled data ref: ");
2800 print_generic_expr (dump_file, stmt, TDF_SLIM);
2802 return false;
2805 /* Find and record the memtag assigned to this data-ref. */
2806 if (TREE_CODE (symbl) == VAR_DECL)
2807 STMT_VINFO_MEMTAG (stmt_info) = symbl;
2808 else if (TREE_CODE (symbl) == SSA_NAME)
2810 tree tag;
2811 symbl = SSA_NAME_VAR (symbl);
2812 tag = get_var_ann (symbl)->type_mem_tag;
2813 if (!tag)
2815 tree ptr = TREE_OPERAND (memref, 0);
2816 if (TREE_CODE (ptr) == SSA_NAME)
2817 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
2819 if (!tag)
2821 if (vect_debug_stats (loop) || vect_debug_details (loop))
2822 fprintf (dump_file, "not vectorized: no memtag for ref.");
2823 return false;
2825 STMT_VINFO_MEMTAG (stmt_info) = tag;
2827 else
2829 if (vect_debug_stats (loop) || vect_debug_details (loop))
2831 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
2832 print_generic_expr (dump_file, memref, TDF_SLIM);
2834 return false;
2837 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
2838 STMT_VINFO_DATA_REF (stmt_info) = dr;
2842 return true;
2846 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2848 /* Function vect_mark_relevant.
2850 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2852 static void
2853 vect_mark_relevant (varray_type worklist, tree stmt)
2855 stmt_vec_info stmt_info;
2857 if (vect_debug_details (NULL))
2858 fprintf (dump_file, "mark relevant.");
2860 if (TREE_CODE (stmt) == PHI_NODE)
2862 VARRAY_PUSH_TREE (worklist, stmt);
2863 return;
2866 stmt_info = vinfo_for_stmt (stmt);
2868 if (!stmt_info)
2870 if (vect_debug_details (NULL))
2872 fprintf (dump_file, "mark relevant: no stmt info!!.");
2873 print_generic_expr (dump_file, stmt, TDF_SLIM);
2875 return;
2878 if (STMT_VINFO_RELEVANT_P (stmt_info))
2880 if (vect_debug_details (NULL))
2881 fprintf (dump_file, "already marked relevant.");
2882 return;
2885 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
2886 VARRAY_PUSH_TREE (worklist, stmt);
2890 /* Function vect_stmt_relevant_p.
2892 Return true if STMT in loop that is represented by LOOP_VINFO is
2893 "relevant for vectorization".
2895 A stmt is considered "relevant for vectorization" if:
2896 - it has uses outside the loop.
2897 - it has vdefs (it alters memory).
2898 - control stmts in the loop (except for the exit condition).
2900 CHECKME: what other side effects would the vectorizer allow? */
2902 static bool
2903 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
2905 v_may_def_optype v_may_defs;
2906 v_must_def_optype v_must_defs;
2907 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2908 int i;
2909 dataflow_t df;
2910 int num_uses;
2912 /* cond stmt other than loop exit cond. */
2913 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2914 return true;
2916 /* changing memory. */
2917 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2918 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2919 if (v_may_defs || v_must_defs)
2921 if (vect_debug_details (NULL))
2922 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
2923 return true;
2926 /* uses outside the loop. */
2927 df = get_immediate_uses (stmt);
2928 num_uses = num_immediate_uses (df);
2929 for (i = 0; i < num_uses; i++)
2931 tree use = immediate_use (df, i);
2932 basic_block bb = bb_for_stmt (use);
2933 if (!flow_bb_inside_loop_p (loop, bb))
2935 if (vect_debug_details (NULL))
2936 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
2937 return true;
2941 return false;
2945 /* Function vect_mark_stmts_to_be_vectorized.
2947 Not all stmts in the loop need to be vectorized. For example:
2949 for i...
2950 for j...
2951 1. T0 = i + j
2952 2. T1 = a[T0]
2954 3. j = j + 1
2956 Stmt 1 and 3 do not need to be vectorized, because loop control and
2957 addressing of vectorized data-refs are handled differently.
2959 This pass detects such stmts. */
2961 static bool
2962 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2964 varray_type worklist;
2965 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2966 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2967 unsigned int nbbs = loop->num_nodes;
2968 block_stmt_iterator si;
2969 tree stmt;
2970 stmt_ann_t ann;
2971 unsigned int i;
2972 int j;
2973 use_optype use_ops;
2974 stmt_vec_info stmt_info;
2976 if (vect_debug_details (NULL))
2977 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
2979 VARRAY_TREE_INIT (worklist, 64, "work list");
2981 /* 1. Init worklist. */
2983 for (i = 0; i < nbbs; i++)
2985 basic_block bb = bbs[i];
2986 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2988 stmt = bsi_stmt (si);
2990 if (vect_debug_details (NULL))
2992 fprintf (dump_file, "init: stmt relevant? ");
2993 print_generic_expr (dump_file, stmt, TDF_SLIM);
2996 stmt_info = vinfo_for_stmt (stmt);
2997 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
2999 if (vect_stmt_relevant_p (stmt, loop_vinfo))
3000 vect_mark_relevant (worklist, stmt);
3005 /* 2. Process_worklist */
3007 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
3009 stmt = VARRAY_TOP_TREE (worklist);
3010 VARRAY_POP (worklist);
3012 if (vect_debug_details (NULL))
3014 fprintf (dump_file, "worklist: examine stmt: ");
3015 print_generic_expr (dump_file, stmt, TDF_SLIM);
3018 /* Examine the USES in this statement. Mark all the statements which
3019 feed this statement's uses as "relevant", unless the USE is used as
3020 an array index. */
3022 if (TREE_CODE (stmt) == PHI_NODE)
3024 /* follow the def-use chain inside the loop. */
3025 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
3027 tree arg = PHI_ARG_DEF (stmt, j);
3028 tree def_stmt = NULL_TREE;
3029 basic_block bb;
3030 if (!vect_is_simple_use (arg, loop, &def_stmt))
3032 if (vect_debug_details (NULL))
3033 fprintf (dump_file, "worklist: unsupported use.");
3034 varray_clear (worklist);
3035 return false;
3037 if (!def_stmt)
3038 continue;
3040 if (vect_debug_details (NULL))
3042 fprintf (dump_file, "worklist: def_stmt: ");
3043 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3046 bb = bb_for_stmt (def_stmt);
3047 if (flow_bb_inside_loop_p (loop, bb))
3048 vect_mark_relevant (worklist, def_stmt);
3052 ann = stmt_ann (stmt);
3053 use_ops = USE_OPS (ann);
3055 for (i = 0; i < NUM_USES (use_ops); i++)
3057 tree use = USE_OP (use_ops, i);
3059 /* We are only interested in uses that need to be vectorized. Uses
3060 that are used for address computation are not considered relevant.
3062 if (exist_non_indexing_operands_for_use_p (use, stmt))
3064 tree def_stmt = NULL_TREE;
3065 basic_block bb;
3066 if (!vect_is_simple_use (use, loop, &def_stmt))
3068 if (vect_debug_details (NULL))
3069 fprintf (dump_file, "worklist: unsupported use.");
3070 varray_clear (worklist);
3071 return false;
3074 if (!def_stmt)
3075 continue;
3077 if (vect_debug_details (NULL))
3079 fprintf (dump_file, "worklist: examine use %d: ", i);
3080 print_generic_expr (dump_file, use, TDF_SLIM);
3083 bb = bb_for_stmt (def_stmt);
3084 if (flow_bb_inside_loop_p (loop, bb))
3085 vect_mark_relevant (worklist, def_stmt);
3088 } /* while worklist */
3090 varray_clear (worklist);
3091 return true;
3095 /* Function vect_get_loop_niters.
3097 Determine how many iterations the loop is executed. */
3099 static tree
3100 vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations)
3102 tree niters;
3104 if (vect_debug_details (NULL))
3105 fprintf (dump_file, "\n<<get_loop_niters>>\n");
3107 niters = number_of_iterations_in_loop (loop);
3109 if (niters != NULL_TREE
3110 && niters != chrec_dont_know
3111 && host_integerp (niters,0))
3113 *number_of_iterations = TREE_INT_CST_LOW (niters);
3115 if (vect_debug_details (NULL))
3116 fprintf (dump_file, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC,
3117 *number_of_iterations);
3120 return get_loop_exit_condition (loop);
3124 /* Function vect_analyze_loop_form.
3126 Verify the following restrictions (some may be relaxed in the future):
3127 - it's an inner-most loop
3128 - number of BBs = 2 (which are the loop header and the latch)
3129 - the loop has a pre-header
3130 - the loop has a single entry and exit
3131 - the loop exit condition is simple enough, and the number of iterations
3132 can be analyzed (a countable loop). */
3134 static loop_vec_info
3135 vect_analyze_loop_form (struct loop *loop)
3137 loop_vec_info loop_vinfo;
3138 tree loop_cond;
3139 HOST_WIDE_INT number_of_iterations = -1;
3141 if (vect_debug_details (loop))
3142 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
3144 if (loop->inner
3145 || !loop->single_exit
3146 || loop->num_nodes != 2)
3148 if (vect_debug_stats (loop) || vect_debug_details (loop))
3150 fprintf (dump_file, "not vectorized: bad loop form. ");
3151 if (loop->inner)
3152 fprintf (dump_file, "nested loop.");
3153 else if (!loop->single_exit)
3154 fprintf (dump_file, "multiple exits.");
3155 else if (loop->num_nodes != 2)
3156 fprintf (dump_file, "too many BBs in loop.");
3159 return NULL;
3162 /* We assume that the loop exit condition is at the end of the loop. i.e,
3163 that the loop is represented as a do-while (with a proper if-guard
3164 before the loop if needed), where the loop header contains all the
3165 executable statements, and the latch is empty. */
3166 if (!empty_block_p (loop->latch))
3168 if (vect_debug_stats (loop) || vect_debug_details (loop))
3169 fprintf (dump_file, "not vectorized: unexpectd loop form.");
3170 return NULL;
3173 if (empty_block_p (loop->header))
3175 if (vect_debug_stats (loop) || vect_debug_details (loop))
3176 fprintf (dump_file, "not vectorized: empty loop.");
3177 return NULL;
3180 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
3181 if (!loop_cond)
3183 if (vect_debug_stats (loop) || vect_debug_details (loop))
3184 fprintf (dump_file, "not vectorized: complicated exit condition.");
3185 return NULL;
3188 if (number_of_iterations < 0)
3190 if (vect_debug_stats (loop) || vect_debug_details (loop))
3191 fprintf (dump_file, "not vectorized: unknown loop bound.");
3192 return NULL;
3195 if (number_of_iterations == 0) /* CHECKME: can this happen? */
3197 if (vect_debug_stats (loop) || vect_debug_details (loop))
3198 fprintf (dump_file, "not vectorized: number of iterations = 0.");
3199 return NULL;
3202 loop_vinfo = new_loop_vec_info (loop);
3203 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
3204 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
3206 return loop_vinfo;
3210 /* Function vect_analyze_loop.
3212 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3213 for it. The different analyses will record information in the
3214 loop_vec_info struct. */
3216 static loop_vec_info
3217 vect_analyze_loop (struct loop *loop)
3219 bool ok;
3220 loop_vec_info loop_vinfo;
3222 if (vect_debug_details (NULL))
3223 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
3225 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3227 loop_vinfo = vect_analyze_loop_form (loop);
3228 if (!loop_vinfo)
3230 if (vect_debug_details (loop))
3231 fprintf (dump_file, "bad loop form.");
3232 return NULL;
3235 /* Find all data references in the loop (which correspond to vdefs/vuses)
3236 and analyze their evolution in the loop.
3238 FORNOW: Handle only simple, one-dimensional, array references, which
3239 alignment can be forced, and aligned pointer-references. */
3241 ok = vect_analyze_data_refs (loop_vinfo);
3242 if (!ok)
3244 if (vect_debug_details (loop))
3245 fprintf (dump_file, "bad data references.");
3246 destroy_loop_vec_info (loop_vinfo);
3247 return NULL;
3251 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
3253 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
3254 if (!ok)
3256 if (vect_debug_details (loop))
3257 fprintf (dump_file, "unexpected pattern.");
3258 if (vect_debug_details (loop))
3259 fprintf (dump_file, "not vectorized: unexpected pattern.");
3260 destroy_loop_vec_info (loop_vinfo);
3261 return NULL;
3265 /* Check that all cross-iteration scalar data-flow cycles are OK.
3266 Cross-iteration cycles caused by virtual phis are analyzed separately. */
3268 ok = vect_analyze_scalar_cycles (loop_vinfo);
3269 if (!ok)
3271 if (vect_debug_details (loop))
3272 fprintf (dump_file, "bad scalar cycle.");
3273 destroy_loop_vec_info (loop_vinfo);
3274 return NULL;
3278 /* Analyze data dependences between the data-refs in the loop.
3279 FORNOW: fail at the first data dependence that we encounter. */
3281 ok = vect_analyze_data_ref_dependences (loop_vinfo);
3282 if (!ok)
3284 if (vect_debug_details (loop))
3285 fprintf (dump_file, "bad data dependence.");
3286 destroy_loop_vec_info (loop_vinfo);
3287 return NULL;
3291 /* Analyze the access patterns of the data-refs in the loop (consecutive,
3292 complex, etc.). FORNOW: Only handle consecutive access pattern. */
3294 ok = vect_analyze_data_ref_accesses (loop_vinfo);
3295 if (!ok)
3297 if (vect_debug_details (loop))
3298 fprintf (dump_file, "bad data access.");
3299 destroy_loop_vec_info (loop_vinfo);
3300 return NULL;
3304 /* Analyze the alignment of the data-refs in the loop.
3305 FORNOW: Only aligned accesses are handled. */
3307 ok = vect_analyze_data_refs_alignment (loop_vinfo);
3308 if (!ok)
3310 if (vect_debug_details (loop))
3311 fprintf (dump_file, "bad data alignment.");
3312 destroy_loop_vec_info (loop_vinfo);
3313 return NULL;
3317 /* Scan all the operations in the loop and make sure they are
3318 vectorizable. */
3320 ok = vect_analyze_operations (loop_vinfo);
3321 if (!ok)
3323 if (vect_debug_details (loop))
3324 fprintf (dump_file, "bad operation or unsupported loop bound.");
3325 destroy_loop_vec_info (loop_vinfo);
3326 return NULL;
3329 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
3331 return loop_vinfo;
3335 /* Function need_imm_uses_for.
3337 Return whether we ought to include information for 'var'
3338 when calculating immediate uses. For this pass we only want use
3339 information for non-virtual variables. */
3341 static bool
3342 need_imm_uses_for (tree var)
3344 return is_gimple_reg (var);
3348 /* Function vectorize_loops.
3350 Entry Point to loop vectorization phase. */
3352 void
3353 vectorize_loops (struct loops *loops)
3355 unsigned int i, loops_num;
3356 unsigned int num_vectorized_loops = 0;
3358 /* Does the target support SIMD? */
3359 /* FORNOW: until more sophisticated machine modelling is in place. */
3360 if (!UNITS_PER_SIMD_WORD)
3362 if (vect_debug_details (NULL))
3363 fprintf (dump_file, "vectorizer: target vector size is not defined.");
3364 return;
3367 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
3369 /* ----------- Analyze loops. ----------- */
3371 /* If some loop was duplicated, it gets bigger number
3372 than all previously defined loops. This fact allows us to run
3373 only over initial loops skipping newly generated ones. */
3374 loops_num = loops->num;
3375 for (i = 1; i < loops_num; i++)
3377 loop_vec_info loop_vinfo;
3378 struct loop *loop = loops->parray[i];
3380 if (!loop)
3381 continue;
3383 loop_vinfo = vect_analyze_loop (loop);
3384 loop->aux = loop_vinfo;
3386 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
3387 continue;
3389 vect_transform_loop (loop_vinfo, loops);
3390 num_vectorized_loops++;
3393 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
3394 fprintf (dump_file, "\nvectorized %u loops in function.\n",
3395 num_vectorized_loops);
3397 /* ----------- Finalize. ----------- */
3399 free_df ();
3400 for (i = 1; i < loops_num; i++)
3402 struct loop *loop = loops->parray[i];
3403 loop_vec_info loop_vinfo = loop->aux;
3404 if (!loop)
3405 continue;
3406 destroy_loop_vec_info (loop_vinfo);
3407 loop->aux = NULL;
3410 loop_commit_inserts ();
3411 rewrite_into_ssa (false);
3412 if (bitmap_first_set_bit (vars_to_rename) >= 0)
3414 /* The rewrite of ssa names may cause violation of loop closed ssa
3415 form invariants. TODO -- avoid these rewrites completely.
3416 Information in virtual phi nodes is sufficient for it. */
3417 rewrite_into_loop_closed_ssa ();
3419 bitmap_clear (vars_to_rename);