* Merge with edge-vector-mergepoint-20040918.
[official-gcc.git] / gcc / tree-vectorizer.c
blobd83ef98daa3e3302f83d721730c8f07777dba626
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS that are one dimensional
61 arrays which base is an array DECL (not a pointer), and INDIRECT_REFS
62 through pointers; both array and pointer accesses are required to have a
63 simple (consecutive) access pattern.
65 Analysis phase:
66 ===============
67 The driver for the analysis phase is vect_analyze_loop_nest().
68 It applies a set of analyses, some of which rely on the scalar evolution
69 analyzer (scev) developed by Sebastian Pop.
71 During the analysis phase the vectorizer records some information
72 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
73 loop, as well as general information about the loop as a whole, which is
74 recorded in a "loop_vec_info" struct attached to each loop.
76 Transformation phase:
77 =====================
78 The loop transformation phase scans all the stmts in the loop, and
79 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
80 the loop that needs to be vectorized. It insert the vector code sequence
81 just before the scalar stmt S, and records a pointer to the vector code
82 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
83 attached to S). This pointer will be used for the vectorization of following
84 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
85 otherwise, we rely on dead code elimination for removing it.
87 For example, say stmt S1 was vectorized into stmt VS1:
89 VS1: vb = px[i];
90 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
91 S2: a = b;
93 To vectorize stmt S2, the vectorizer first finds the stmt that defines
94 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
95 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
96 resulting sequence would be:
98 VS1: vb = px[i];
99 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 VS2: va = vb;
101 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
103 Operands that are not SSA_NAMEs, are data-refs that appear in
104 load/store operations (like 'x[i]' in S1), and are handled differently.
106 Target modeling:
107 =================
108 Currently the only target specific information that is used is the
109 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
110 support different sizes of vectors, for now will need to specify one value
111 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
113 Since we only vectorize operations which vector form can be
114 expressed using existing tree codes, to verify that an operation is
115 supported, the vectorizer checks the relevant optab at the relevant
116 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
117 the value found is CODE_FOR_nothing, then there's no target support, and
118 we can't vectorize the stmt.
120 For additional information on this project see:
121 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
124 #include "config.h"
125 #include "system.h"
126 #include "coretypes.h"
127 #include "tm.h"
128 #include "errors.h"
129 #include "ggc.h"
130 #include "tree.h"
131 #include "target.h"
133 #include "rtl.h"
134 #include "basic-block.h"
135 #include "diagnostic.h"
136 #include "tree-flow.h"
137 #include "tree-dump.h"
138 #include "timevar.h"
139 #include "cfgloop.h"
140 #include "cfglayout.h"
141 #include "expr.h"
142 #include "optabs.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
149 /* Main analysis functions. */
150 static loop_vec_info vect_analyze_loop (struct loop *);
151 static loop_vec_info vect_analyze_loop_form (struct loop *);
152 static bool vect_analyze_data_refs (loop_vec_info);
153 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
154 static bool vect_analyze_scalar_cycles (loop_vec_info);
155 static bool vect_analyze_data_ref_accesses (loop_vec_info);
156 static bool vect_analyze_data_refs_alignment (loop_vec_info);
157 static void vect_compute_data_refs_alignment (loop_vec_info);
158 static bool vect_analyze_operations (loop_vec_info);
160 /* Main code transformation functions. */
161 static void vect_transform_loop (loop_vec_info, struct loops *);
162 static void vect_transform_loop_bound (loop_vec_info);
163 static bool vect_transform_stmt (tree, block_stmt_iterator *);
164 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
165 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
166 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
167 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
168 static void vect_align_data_ref (tree);
169 static void vect_enhance_data_refs_alignment (loop_vec_info);
171 /* Utility functions for the analyses. */
172 static bool vect_is_simple_use (tree , struct loop *, tree *);
173 static bool exist_non_indexing_operands_for_use_p (tree, tree);
174 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
175 static void vect_mark_relevant (varray_type, tree);
176 static bool vect_stmt_relevant_p (tree, loop_vec_info);
177 static tree vect_get_loop_niters (struct loop *, HOST_WIDE_INT *);
178 static void vect_compute_data_ref_alignment
179 (struct data_reference *, loop_vec_info);
180 static bool vect_analyze_data_ref_access (struct data_reference *);
181 static bool vect_get_first_index (tree, tree *);
182 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
183 static tree vect_get_base_decl_and_bit_offset (tree, tree *);
184 static struct data_reference * vect_analyze_pointer_ref_access (tree, tree, bool);
186 /* Utility functions for the code transformation. */
187 static tree vect_create_destination_var (tree, tree);
188 static tree vect_create_data_ref (tree, block_stmt_iterator *);
189 static tree vect_create_index_for_array_ref (tree, block_stmt_iterator *);
190 static tree get_vectype_for_scalar_type (tree);
191 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
192 static tree vect_get_vec_def_for_operand (tree, tree);
193 static tree vect_init_vector (tree, tree);
194 static void vect_finish_stmt_generation
195 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
197 /* Utilities for creation and deletion of vec_info structs. */
198 loop_vec_info new_loop_vec_info (struct loop *loop);
199 void destroy_loop_vec_info (loop_vec_info);
200 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
202 static bool vect_debug_stats (struct loop *loop);
203 static bool vect_debug_details (struct loop *loop);
206 /* Function new_stmt_vec_info.
208 Create and initialize a new stmt_vec_info struct for STMT. */
210 stmt_vec_info
211 new_stmt_vec_info (tree stmt, struct loop *loop)
213 stmt_vec_info res;
214 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
216 STMT_VINFO_TYPE (res) = undef_vec_info_type;
217 STMT_VINFO_STMT (res) = stmt;
218 STMT_VINFO_LOOP (res) = loop;
219 STMT_VINFO_RELEVANT_P (res) = 0;
220 STMT_VINFO_VECTYPE (res) = NULL;
221 STMT_VINFO_VEC_STMT (res) = NULL;
222 STMT_VINFO_DATA_REF (res) = NULL;
223 STMT_VINFO_MEMTAG (res) = NULL;
225 return res;
229 /* Function new_loop_vec_info.
231 Create and initialize a new loop_vec_info struct for LOOP, as well as
232 stmt_vec_info structs for all the stmts in LOOP. */
234 loop_vec_info
235 new_loop_vec_info (struct loop *loop)
237 loop_vec_info res;
238 basic_block *bbs;
239 block_stmt_iterator si;
240 unsigned int i;
242 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
244 bbs = get_loop_body (loop);
246 /* Create stmt_info for all stmts in the loop. */
247 for (i = 0; i < loop->num_nodes; i++)
249 basic_block bb = bbs[i];
250 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
252 tree stmt = bsi_stmt (si);
253 stmt_ann_t ann;
255 get_stmt_operands (stmt);
256 ann = stmt_ann (stmt);
257 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
261 LOOP_VINFO_LOOP (res) = loop;
262 LOOP_VINFO_BBS (res) = bbs;
263 LOOP_VINFO_EXIT_COND (res) = NULL;
264 LOOP_VINFO_NITERS (res) = -1;
265 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
266 LOOP_VINFO_VECT_FACTOR (res) = 0;
267 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
268 "loop_write_datarefs");
269 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
270 "loop_read_datarefs");
271 return res;
275 /* Function destroy_loop_vec_info.
277 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
278 stmts in the loop. */
280 void
281 destroy_loop_vec_info (loop_vec_info loop_vinfo)
283 struct loop *loop;
284 basic_block *bbs;
285 int nbbs;
286 block_stmt_iterator si;
287 int j;
289 if (!loop_vinfo)
290 return;
292 loop = LOOP_VINFO_LOOP (loop_vinfo);
294 bbs = LOOP_VINFO_BBS (loop_vinfo);
295 nbbs = loop->num_nodes;
297 for (j = 0; j < nbbs; j++)
299 basic_block bb = bbs[j];
300 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
302 tree stmt = bsi_stmt (si);
303 stmt_ann_t ann = stmt_ann (stmt);
304 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
305 free (stmt_info);
306 set_stmt_info (ann, NULL);
310 free (LOOP_VINFO_BBS (loop_vinfo));
311 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
312 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
314 free (loop_vinfo);
318 /* Function debug_loop_stats.
320 For vectorization statistics dumps. */
322 static bool
323 vect_debug_stats (struct loop *loop)
325 basic_block bb;
326 block_stmt_iterator si;
327 tree node = NULL_TREE;
329 if (!dump_file || !(dump_flags & TDF_STATS))
330 return false;
332 if (!loop)
334 fprintf (dump_file, "\n");
335 return true;
338 if (!loop->header)
339 return false;
341 bb = loop->header;
343 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
345 node = bsi_stmt (si);
346 if (node && EXPR_P (node) && EXPR_LOCUS (node))
347 break;
350 if (node && EXPR_P (node) && EXPR_LOCUS (node)
351 && EXPR_FILENAME (node) && EXPR_LINENO (node))
353 fprintf (dump_file, "\nloop at %s:%d: ",
354 EXPR_FILENAME (node), EXPR_LINENO (node));
355 return true;
358 return false;
362 /* Function debug_loop_details.
364 For vectorization debug dumps. */
366 static bool
367 vect_debug_details (struct loop *loop)
369 basic_block bb;
370 block_stmt_iterator si;
371 tree node = NULL_TREE;
373 if (!dump_file || !(dump_flags & TDF_DETAILS))
374 return false;
376 if (!loop)
378 fprintf (dump_file, "\n");
379 return true;
382 if (!loop->header)
383 return false;
385 bb = loop->header;
387 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
389 node = bsi_stmt (si);
390 if (node && EXPR_P (node) && EXPR_LOCUS (node))
391 break;
394 if (node && EXPR_P (node) && EXPR_LOCUS (node)
395 && EXPR_FILENAME (node) && EXPR_LINENO (node))
397 fprintf (dump_file, "\nloop at %s:%d: ",
398 EXPR_FILENAME (node), EXPR_LINENO (node));
399 return true;
402 return false;
405 /* Function vect_get_base_decl_and_bit_offset
407 Get the decl from which the data reference REF is based,
408 and compute the OFFSET from it in bits on the way.
409 FORNOW: Handle only component-refs that consist of
410 VAR_DECLs (no ARRAY_REF or INDIRECT_REF). */
412 static tree
413 vect_get_base_decl_and_bit_offset (tree ref, tree *offset)
415 tree decl;
416 if (TREE_CODE (ref) == VAR_DECL)
417 return ref;
419 if (TREE_CODE (ref) == COMPONENT_REF)
421 tree this_offset;
422 tree oprnd0 = TREE_OPERAND (ref, 0);
423 tree oprnd1 = TREE_OPERAND (ref, 1);
425 this_offset = bit_position (oprnd1);
426 if (!host_integerp (this_offset,1))
427 return NULL_TREE;
429 decl = vect_get_base_decl_and_bit_offset (oprnd0, offset);
431 if (decl)
433 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
435 if (!host_integerp (*offset,1) || TREE_OVERFLOW (*offset))
436 return NULL_TREE;
438 if (vect_debug_details (NULL))
440 print_generic_expr (dump_file, ref, TDF_SLIM);
441 fprintf (dump_file, " --> total offset for ref: ");
442 print_generic_expr (dump_file, *offset, TDF_SLIM);
446 return decl;
449 /* TODO: extend to handle more cases. */
450 return NULL_TREE;
454 /* Function vect_force_dr_alignment_p.
456 Returns whether the alignment of a DECL can be forced to be aligned
457 on ALIGNMENT bit boundary. */
459 static bool
460 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
462 if (TREE_CODE (decl) != VAR_DECL)
463 return false;
465 if (DECL_EXTERNAL (decl))
466 return false;
468 if (TREE_STATIC (decl))
469 return (alignment <= MAX_OFILE_ALIGNMENT);
470 else
471 /* This is not 100% correct. The absolute correct stack alignment
472 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
473 PREFERRED_STACK_BOUNDARY is honored by all translation units.
474 However, until someone implements forced stack alignment, SSE
475 isn't really usable without this. */
476 return (alignment <= PREFERRED_STACK_BOUNDARY);
480 /* Function vect_get_new_vect_var.
482 Returns a name for a new variable. The current naming scheme appends the
483 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
484 the name of vectorizer generated variables, and appends that to NAME if
485 provided. */
487 static tree
488 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
490 const char *prefix;
491 int prefix_len;
492 tree new_vect_var;
494 if (var_kind == vect_simple_var)
495 prefix = "vect_";
496 else
497 prefix = "vect_p";
499 prefix_len = strlen (prefix);
501 if (name)
502 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
503 else
504 new_vect_var = create_tmp_var (type, prefix);
506 return new_vect_var;
510 /* Function create_index_for_array_ref.
512 Create (and return) an index variable, along with it's update chain in the
513 loop. This variable will be used to access a memory location in a vector
514 operation.
516 Input:
517 STMT: The stmt that contains a memory data-ref.
518 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
519 function can be added here, or in the loop pre-header.
521 FORNOW: We are only handling array accesses with step 1. */
523 static tree
524 vect_create_index_for_array_ref (tree stmt, block_stmt_iterator *bsi)
526 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
527 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
528 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
529 tree expr = DR_REF (dr);
530 tree access_fn;
531 tree init, step;
532 loop_vec_info loop_info = loop->aux;
533 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_info);
534 tree vf;
535 tree array_first_index;
536 tree indx_before_incr, indx_after_incr;
537 int loopnum = loop->num;
538 bool ok;
539 #ifdef ENABLE_CHECKING
540 varray_type access_fns = DR_ACCESS_FNS (dr);
542 /* FORNOW: handling only one dimensional arrays. */
543 gcc_assert (VARRAY_ACTIVE_SIZE (access_fns) == 1);
544 gcc_assert (vectorization_factor);
545 #endif
547 access_fn = DR_ACCESS_FN (dr, 0);
548 ok = vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, true)
549 && vect_get_first_index (expr, &array_first_index);
551 gcc_assert (ok);
553 /* FORNOW: Handling only constant 'init'. */
554 gcc_assert (TREE_CODE (init) == INTEGER_CST);
556 vf = build_int_cst (unsigned_type_node, vectorization_factor);
558 if (vect_debug_details (NULL))
560 fprintf (dump_file, "int vf = %d",vectorization_factor);
561 fprintf (dump_file, ", vf:");
562 print_generic_expr (dump_file, vf, TDF_SLIM);
563 fprintf (dump_file, ", init:");
564 print_generic_expr (dump_file, init, TDF_SLIM);
565 fprintf (dump_file, ", array_first_index:");
566 print_generic_expr (dump_file, array_first_index, TDF_SLIM);
569 /* Calculate the 'init' of the new index.
570 init = (init - array_first_index) / vectorization_factor */
571 init = int_const_binop (TRUNC_DIV_EXPR,
572 int_const_binop (MINUS_EXPR, init, array_first_index, 1),
573 vf, 1);
575 /* Calculate the 'step' of the new index. FORNOW: always 1. */
576 step = size_one_node;
578 if (vect_debug_details (NULL))
580 fprintf (dump_file, "create iv for (");
581 print_generic_expr (dump_file, init, TDF_SLIM);
582 fprintf (dump_file, ", + ,");
583 print_generic_expr (dump_file, step, TDF_SLIM);
584 fprintf (dump_file, ")");
587 create_iv (init, step, NULL_TREE, loop, bsi, false,
588 &indx_before_incr, &indx_after_incr);
590 return indx_before_incr;
594 /* Function get_vectype_for_scalar_type.
596 Returns the vector type corresponding to SCALAR_TYPE as supported
597 by the target. */
599 static tree
600 get_vectype_for_scalar_type (tree scalar_type)
602 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
603 int nbytes = GET_MODE_SIZE (inner_mode);
604 int nunits;
606 if (nbytes == 0)
607 return NULL_TREE;
609 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
610 is expected. */
611 nunits = UNITS_PER_SIMD_WORD / nbytes;
613 return build_vector_type (scalar_type, nunits);
617 /* Function vect_align_data_ref.
619 Handle mislignment of a memory accesses.
621 FORNOW: Can't handle misaligned accesses.
622 Make sure that the dataref is aligned. */
624 static void
625 vect_align_data_ref (tree stmt)
627 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
628 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
630 /* FORNOW: can't handle misaligned accesses;
631 all accesses expected to be aligned. */
632 gcc_assert (aligned_access_p (dr));
636 /* Function vect_create_data_ref.
638 Create a memory reference expression for vector access, to be used in a
639 vector load/store stmt.
641 Input:
642 STMT: a stmt that references memory. expected to be of the form
643 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
644 BSI: block_stmt_iterator where new stmts can be added.
646 Output:
647 1. Declare a new ptr to vector_type, and have it point to the array base.
648 For example, for vector of type V8HI:
649 v8hi *p0;
650 p0 = (v8hi *)&a;
651 2. Create a data-reference based on the new vector pointer p0, and using
652 a new index variable 'idx'. Return the expression '(*p0)[idx]'.
654 FORNOW: handle only aligned and consecutive accesses. */
656 static tree
657 vect_create_data_ref (tree stmt, block_stmt_iterator *bsi)
659 tree new_base;
660 tree data_ref;
661 tree idx;
662 tree vec_stmt;
663 tree new_temp;
664 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
665 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
666 tree vect_ptr_type;
667 tree vect_ptr;
668 tree addr_ref;
669 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
670 tree array_type;
671 tree base_addr = NULL_TREE;
672 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
673 edge pe;
674 tree tag;
675 tree addr_expr;
676 tree scalar_ptr_type;
677 tree use;
678 ssa_op_iter iter;
680 /* FORNOW: make sure the data reference is aligned. */
681 vect_align_data_ref (stmt);
683 addr_ref = DR_BASE_NAME (dr);
685 array_type = build_array_type (vectype, 0);
686 TYPE_ALIGN (array_type) = TYPE_ALIGN (TREE_TYPE (addr_ref));
687 vect_ptr_type = build_pointer_type (array_type);
688 scalar_ptr_type = build_pointer_type (TREE_TYPE (addr_ref));
690 if (vect_debug_details (NULL))
692 fprintf (dump_file, "create array_ref of type: ");
693 print_generic_expr (dump_file, vectype, TDF_SLIM);
696 /*** create: vectype_array *p; ***/
697 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
698 get_name (addr_ref));
699 add_referenced_tmp_var (vect_ptr);
701 gcc_assert (TREE_CODE (addr_ref) == VAR_DECL
702 || TREE_CODE (addr_ref) == COMPONENT_REF
703 || TREE_CODE (addr_ref) == SSA_NAME);
705 if (vect_debug_details (NULL))
707 if (TREE_CODE (addr_ref) == VAR_DECL)
708 fprintf (dump_file, "vectorizing an array ref: ");
709 else if (TREE_CODE (addr_ref) == SSA_NAME)
710 fprintf (dump_file, "vectorizing a pointer ref: ");
711 else if (TREE_CODE (addr_ref) == COMPONENT_REF)
712 fprintf (dump_file, "vectorizing a record ref: ");
713 print_generic_expr (dump_file, addr_ref, TDF_SLIM);
716 /* Get base address: */
717 if (TREE_CODE (addr_ref) == SSA_NAME)
718 base_addr = addr_ref;
719 else
720 base_addr = build_fold_addr_expr (addr_ref);
722 /* Handle aliasing: */
723 tag = STMT_VINFO_MEMTAG (stmt_info);
724 gcc_assert (tag);
725 get_var_ann (vect_ptr)->type_mem_tag = tag;
727 /* Mark for renaming all aliased variables
728 (i.e, the may-aliases of the type-mem-tag) */
729 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter,
730 (SSA_OP_VIRTUAL_DEFS | SSA_OP_VUSE))
732 if (TREE_CODE (use) == SSA_NAME)
733 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
736 pe = loop_preheader_edge (loop);
738 /*** create: p = (vectype *)&a; ***/
740 /* addr_expr = &a */
741 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
742 get_name (addr_ref));
743 add_referenced_tmp_var (addr_expr);
744 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, base_addr);
745 new_temp = make_ssa_name (addr_expr, vec_stmt);
746 TREE_OPERAND (vec_stmt, 0) = new_temp;
747 bsi_insert_on_edge (pe, vec_stmt);
749 /* vect_ptr = (vectype_array *)&a; */
750 vec_stmt = fold_convert (vect_ptr_type, new_temp);
751 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
752 new_temp = make_ssa_name (vect_ptr, vec_stmt);
753 TREE_OPERAND (vec_stmt, 0) = new_temp;
754 bsi_insert_on_edge (pe, vec_stmt);
756 /*** create data ref: '(*p)[idx]' ***/
758 idx = vect_create_index_for_array_ref (stmt, bsi);
760 new_base = build_fold_indirect_ref (new_temp);
761 data_ref = build4 (ARRAY_REF, vectype, new_base, idx, NULL_TREE, NULL_TREE);
763 if (vect_debug_details (NULL))
765 fprintf (dump_file, "created new data-ref: ");
766 print_generic_expr (dump_file, data_ref, TDF_SLIM);
769 return data_ref;
773 /* Function vect_create_destination_var.
775 Create a new temporary of type VECTYPE. */
777 static tree
778 vect_create_destination_var (tree scalar_dest, tree vectype)
780 tree vec_dest;
781 const char *new_name;
783 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
785 new_name = get_name (scalar_dest);
786 if (!new_name)
787 new_name = "var_";
788 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
789 add_referenced_tmp_var (vec_dest);
791 return vec_dest;
795 /* Function vect_init_vector.
797 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
798 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
799 used in the vectorization of STMT. */
801 static tree
802 vect_init_vector (tree stmt, tree vector_var)
804 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
805 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
806 tree new_var;
807 tree init_stmt;
808 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
809 tree vec_oprnd;
810 edge pe;
811 tree new_temp;
813 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
814 add_referenced_tmp_var (new_var);
816 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
817 new_temp = make_ssa_name (new_var, init_stmt);
818 TREE_OPERAND (init_stmt, 0) = new_temp;
820 pe = loop_preheader_edge (loop);
821 bsi_insert_on_edge (pe, init_stmt);
823 if (vect_debug_details (NULL))
825 fprintf (dump_file, "created new init_stmt: ");
826 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
829 vec_oprnd = TREE_OPERAND (init_stmt, 0);
830 return vec_oprnd;
834 /* Function vect_get_vec_def_for_operand.
836 OP is an operand in STMT. This function returns a (vector) def that will be
837 used in the vectorized stmt for STMT.
839 In the case that OP is an SSA_NAME which is defined in the loop, then
840 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
842 In case OP is an invariant or constant, a new stmt that creates a vector def
843 needs to be introduced. */
845 static tree
846 vect_get_vec_def_for_operand (tree op, tree stmt)
848 tree vec_oprnd;
849 tree vec_stmt;
850 tree def_stmt;
851 stmt_vec_info def_stmt_info = NULL;
852 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
853 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
854 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
855 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
856 basic_block bb;
857 tree vec_inv;
858 tree t = NULL_TREE;
859 tree def;
860 int i;
862 if (vect_debug_details (NULL))
864 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
865 print_generic_expr (dump_file, op, TDF_SLIM);
868 /** ===> Case 1: operand is a constant. **/
870 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
872 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
874 tree vec_cst;
875 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
876 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
877 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
878 tree t = NULL_TREE;
879 int i;
881 /* Build a tree with vector elements. */
882 if (vect_debug_details (NULL))
883 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
885 for (i = nunits - 1; i >= 0; --i)
887 t = tree_cons (NULL_TREE, op, t);
889 vec_cst = build_vector (vectype, t);
890 return vect_init_vector (stmt, vec_cst);
893 gcc_assert (TREE_CODE (op) == SSA_NAME);
895 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
897 def_stmt = SSA_NAME_DEF_STMT (op);
898 def_stmt_info = vinfo_for_stmt (def_stmt);
900 if (vect_debug_details (NULL))
902 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
903 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
907 /** ==> Case 2.1: operand is defined inside the loop. **/
909 if (def_stmt_info)
911 /* Get the def from the vectorized stmt. */
913 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
914 gcc_assert (vec_stmt);
915 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
916 return vec_oprnd;
920 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
921 it is a reduction/induction. **/
923 bb = bb_for_stmt (def_stmt);
924 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
926 if (vect_debug_details (NULL))
927 fprintf (dump_file, "reduction/induction - unsupported.");
928 internal_error ("no support for reduction/induction"); /* FORNOW */
932 /** ==> Case 2.3: operand is defined outside the loop -
933 it is a loop invariant. */
935 switch (TREE_CODE (def_stmt))
937 case PHI_NODE:
938 def = PHI_RESULT (def_stmt);
939 break;
940 case MODIFY_EXPR:
941 def = TREE_OPERAND (def_stmt, 0);
942 break;
943 case NOP_EXPR:
944 def = TREE_OPERAND (def_stmt, 0);
945 gcc_assert (IS_EMPTY_STMT (def_stmt));
946 def = op;
947 break;
948 default:
949 if (vect_debug_details (NULL))
951 fprintf (dump_file, "unsupported defining stmt: ");
952 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
954 internal_error ("unsupported defining stmt");
957 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
959 if (vect_debug_details (NULL))
960 fprintf (dump_file, "Create vector_inv.");
962 for (i = nunits - 1; i >= 0; --i)
964 t = tree_cons (NULL_TREE, def, t);
967 vec_inv = build_constructor (vectype, t);
968 return vect_init_vector (stmt, vec_inv);
972 /* Function vect_finish_stmt_generation.
974 Insert a new stmt. */
976 static void
977 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
979 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
981 if (vect_debug_details (NULL))
983 fprintf (dump_file, "add new stmt: ");
984 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
987 /* Make sure bsi points to the stmt that is being vectorized. */
989 /* Assumption: any stmts created for the vectorization of smtmt S are
990 inserted before S. BSI may point to S or some new stmt before it. */
992 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
993 bsi_next (bsi);
994 gcc_assert (stmt == bsi_stmt (*bsi));
998 /* Function vectorizable_assignment.
1000 Check if STMT performs an assignment (copy) that can be vectorized.
1001 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1002 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1003 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1005 static bool
1006 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1008 tree vec_dest;
1009 tree scalar_dest;
1010 tree op;
1011 tree vec_oprnd;
1012 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1013 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1014 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1015 tree new_temp;
1017 /* Is vectorizable assignment? */
1019 if (TREE_CODE (stmt) != MODIFY_EXPR)
1020 return false;
1022 scalar_dest = TREE_OPERAND (stmt, 0);
1023 if (TREE_CODE (scalar_dest) != SSA_NAME)
1024 return false;
1026 op = TREE_OPERAND (stmt, 1);
1027 if (!vect_is_simple_use (op, loop, NULL))
1029 if (vect_debug_details (NULL))
1030 fprintf (dump_file, "use not simple.");
1031 return false;
1034 if (!vec_stmt) /* transformation not required. */
1036 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1037 return true;
1040 /** Trasform. **/
1041 if (vect_debug_details (NULL))
1042 fprintf (dump_file, "transform assignment.");
1044 /* Handle def. */
1045 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1047 /* Handle use. */
1048 op = TREE_OPERAND (stmt, 1);
1049 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
1051 /* Arguments are ready. create the new vector stmt. */
1052 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1053 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1054 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1055 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1057 return true;
1061 /* Function vectorizable_operation.
1063 Check if STMT performs a binary or unary operation that can be vectorized.
1064 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1065 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1066 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1068 static bool
1069 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1071 tree vec_dest;
1072 tree scalar_dest;
1073 tree operation;
1074 tree op0, op1 = NULL;
1075 tree vec_oprnd0, vec_oprnd1=NULL;
1076 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1077 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1078 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1079 int i;
1080 enum tree_code code;
1081 enum machine_mode vec_mode;
1082 tree new_temp;
1083 int op_type;
1084 tree op;
1085 optab optab;
1087 /* Is STMT a vectorizable binary/unary operation? */
1088 if (TREE_CODE (stmt) != MODIFY_EXPR)
1089 return false;
1091 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1092 return false;
1094 operation = TREE_OPERAND (stmt, 1);
1095 code = TREE_CODE (operation);
1096 optab = optab_for_tree_code (code, vectype);
1098 /* Support only unary or binary operations. */
1099 op_type = TREE_CODE_LENGTH (code);
1100 if (op_type != unary_op && op_type != binary_op)
1102 if (vect_debug_details (NULL))
1103 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
1104 return false;
1107 for (i = 0; i < op_type; i++)
1109 op = TREE_OPERAND (operation, i);
1110 if (!vect_is_simple_use (op, loop, NULL))
1112 if (vect_debug_details (NULL))
1113 fprintf (dump_file, "use not simple.");
1114 return false;
1118 /* Supportable by target? */
1119 if (!optab)
1121 if (vect_debug_details (NULL))
1122 fprintf (dump_file, "no optab.");
1123 return false;
1125 vec_mode = TYPE_MODE (vectype);
1126 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1128 if (vect_debug_details (NULL))
1129 fprintf (dump_file, "op not supported by target.");
1130 return false;
1133 if (!vec_stmt) /* transformation not required. */
1135 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1136 return true;
1139 /** Trasform. **/
1141 if (vect_debug_details (NULL))
1142 fprintf (dump_file, "transform binary/unary operation.");
1144 /* Handle def. */
1145 scalar_dest = TREE_OPERAND (stmt, 0);
1146 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1148 /* Handle uses. */
1149 op0 = TREE_OPERAND (operation, 0);
1150 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
1152 if (op_type == binary_op)
1154 op1 = TREE_OPERAND (operation, 1);
1155 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
1158 /* Arguments are ready. create the new vector stmt. */
1160 if (op_type == binary_op)
1161 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1162 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1163 else
1164 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1165 build1 (code, vectype, vec_oprnd0));
1166 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1167 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1168 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1170 return true;
1174 /* Function vectorizable_store.
1176 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1177 can be vectorized.
1178 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1179 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1180 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1182 static bool
1183 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1185 tree scalar_dest;
1186 tree data_ref;
1187 tree op;
1188 tree vec_oprnd1;
1189 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1190 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1191 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1192 enum machine_mode vec_mode;
1194 /* Is vectorizable store? */
1196 if (TREE_CODE (stmt) != MODIFY_EXPR)
1197 return false;
1199 scalar_dest = TREE_OPERAND (stmt, 0);
1200 if (TREE_CODE (scalar_dest) != ARRAY_REF
1201 && TREE_CODE (scalar_dest) != INDIRECT_REF)
1202 return false;
1204 op = TREE_OPERAND (stmt, 1);
1205 if (!vect_is_simple_use (op, loop, NULL))
1207 if (vect_debug_details (NULL))
1208 fprintf (dump_file, "use not simple.");
1209 return false;
1212 vec_mode = TYPE_MODE (vectype);
1213 /* FORNOW. In some cases can vectorize even if data-type not supported
1214 (e.g. - array initialization with 0). */
1215 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1216 return false;
1218 if (!STMT_VINFO_DATA_REF (stmt_info))
1219 return false;
1221 if (!vec_stmt) /* transformation not required. */
1223 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1224 return true;
1227 /** Trasform. **/
1229 if (vect_debug_details (NULL))
1230 fprintf (dump_file, "transform store");
1232 /* Handle use - get the vectorized def from the defining stmt. */
1233 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
1235 /* Handle def. */
1236 data_ref = vect_create_data_ref (stmt, bsi);
1238 /* Arguments are ready. create the new vector stmt. */
1239 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1240 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1242 return true;
1246 /* vectorizable_load.
1248 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1249 can be vectorized.
1250 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1251 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1252 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1254 static bool
1255 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1257 tree scalar_dest;
1258 tree vec_dest = NULL;
1259 tree data_ref = NULL;
1260 tree op;
1261 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1262 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1263 tree new_temp;
1264 enum machine_mode vec_mode;
1266 /* Is vectorizable load? */
1268 if (TREE_CODE (stmt) != MODIFY_EXPR)
1269 return false;
1271 scalar_dest = TREE_OPERAND (stmt, 0);
1272 if (TREE_CODE (scalar_dest) != SSA_NAME)
1273 return false;
1275 op = TREE_OPERAND (stmt, 1);
1276 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1277 return false;
1279 if (!STMT_VINFO_DATA_REF (stmt_info))
1280 return false;
1282 vec_mode = TYPE_MODE (vectype);
1283 /* FORNOW. In some cases can vectorize even if data-type not supported
1284 (e.g. - data copies). */
1285 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1286 return false;
1288 if (!vec_stmt) /* transformation not required. */
1290 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1291 return true;
1294 /** Trasform. **/
1296 if (vect_debug_details (NULL))
1297 fprintf (dump_file, "transform load.");
1299 /* Handle def. */
1300 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1302 /* Handle use. */
1303 op = TREE_OPERAND (stmt, 1);
1304 data_ref = vect_create_data_ref (stmt, bsi);
1306 /* Arguments are ready. create the new vector stmt. */
1307 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1308 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1309 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1310 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1312 return true;
1316 /* Function vect_transform_stmt.
1318 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1320 static bool
1321 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
1323 bool is_store = false;
1324 tree vec_stmt = NULL_TREE;
1325 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1326 bool done;
1328 switch (STMT_VINFO_TYPE (stmt_info))
1330 case op_vec_info_type:
1331 done = vectorizable_operation (stmt, bsi, &vec_stmt);
1332 gcc_assert (done);
1333 break;
1335 case assignment_vec_info_type:
1336 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
1337 gcc_assert (done);
1338 break;
1340 case load_vec_info_type:
1341 done = vectorizable_load (stmt, bsi, &vec_stmt);
1342 gcc_assert (done);
1343 break;
1345 case store_vec_info_type:
1346 done = vectorizable_store (stmt, bsi, &vec_stmt);
1347 gcc_assert (done);
1348 is_store = true;
1349 break;
1350 default:
1351 if (vect_debug_details (NULL))
1352 fprintf (dump_file, "stmt not supported.");
1353 gcc_unreachable ();
1356 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1358 return is_store;
1362 /* Function vect_transform_loop_bound.
1364 Create a new exit condition for the loop. */
1366 static void
1367 vect_transform_loop_bound (loop_vec_info loop_vinfo)
1369 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1370 edge exit_edge = loop->single_exit;
1371 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
1372 tree indx_before_incr, indx_after_incr;
1373 tree orig_cond_expr;
1374 HOST_WIDE_INT old_N = 0;
1375 int vf;
1376 tree cond_stmt;
1377 tree new_loop_bound;
1378 tree cond;
1379 tree lb_type;
1381 gcc_assert (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1382 old_N = LOOP_VINFO_NITERS (loop_vinfo);
1383 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1385 /* FORNOW:
1386 assuming number-of-iterations divides by the vectorization factor. */
1387 gcc_assert (!(old_N % vf));
1389 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
1390 gcc_assert (orig_cond_expr);
1391 gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
1393 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
1394 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
1396 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
1397 to point to the exit condition. */
1398 bsi_next (&loop_exit_bsi);
1399 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
1401 /* new loop exit test: */
1402 lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
1403 new_loop_bound = build_int_cst (lb_type, old_N/vf);
1405 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
1406 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1407 else /* 'then' edge loops back. */
1408 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1410 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
1411 TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
1413 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
1415 /* remove old loop exit test: */
1416 bsi_remove (&loop_exit_bsi);
1418 if (vect_debug_details (NULL))
1419 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
1423 /* Function vect_transform_loop.
1425 The analysis phase has determined that the loop is vectorizable.
1426 Vectorize the loop - created vectorized stmts to replace the scalar
1427 stmts in the loop, and update the loop exit condition. */
1429 static void
1430 vect_transform_loop (loop_vec_info loop_vinfo,
1431 struct loops *loops ATTRIBUTE_UNUSED)
1433 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1434 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1435 int nbbs = loop->num_nodes;
1436 block_stmt_iterator si;
1437 int i;
1438 #ifdef ENABLE_CHECKING
1439 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1440 #endif
1442 if (vect_debug_details (NULL))
1443 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
1445 /* 1) Make sure the loop header has exactly two entries
1446 2) Make sure we have a preheader basic block. */
1448 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
1450 loop_split_edge_with (loop_preheader_edge (loop), NULL);
1453 /* FORNOW: the vectorizer supports only loops which body consist
1454 of one basic block (header + empty latch). When the vectorizer will
1455 support more involved loop forms, the order by which the BBs are
1456 traversed need to be reconsidered. */
1458 for (i = 0; i < nbbs; i++)
1460 basic_block bb = bbs[i];
1462 for (si = bsi_start (bb); !bsi_end_p (si);)
1464 tree stmt = bsi_stmt (si);
1465 stmt_vec_info stmt_info;
1466 bool is_store;
1467 #ifdef ENABLE_CHECKING
1468 tree vectype;
1469 #endif
1471 if (vect_debug_details (NULL))
1473 fprintf (dump_file, "------>vectorizing statement: ");
1474 print_generic_expr (dump_file, stmt, TDF_SLIM);
1476 stmt_info = vinfo_for_stmt (stmt);
1477 gcc_assert (stmt_info);
1478 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1480 bsi_next (&si);
1481 continue;
1483 #ifdef ENABLE_CHECKING
1484 /* FORNOW: Verify that all stmts operate on the same number of
1485 units and no inner unrolling is necessary. */
1486 vectype = STMT_VINFO_VECTYPE (stmt_info);
1487 gcc_assert (GET_MODE_NUNITS (TYPE_MODE (vectype))
1488 == vectorization_factor);
1489 #endif
1490 /* -------- vectorize statement ------------ */
1491 if (vect_debug_details (NULL))
1492 fprintf (dump_file, "transform statement.");
1494 is_store = vect_transform_stmt (stmt, &si);
1495 if (is_store)
1497 /* free the attached stmt_vec_info and remove the stmt. */
1498 stmt_ann_t ann = stmt_ann (stmt);
1499 free (stmt_info);
1500 set_stmt_info (ann, NULL);
1501 bsi_remove (&si);
1502 continue;
1505 bsi_next (&si);
1506 } /* stmts in BB */
1507 } /* BBs in loop */
1509 vect_transform_loop_bound (loop_vinfo);
1511 if (vect_debug_details (loop))
1512 fprintf (dump_file,"Success! loop vectorized.");
1513 if (vect_debug_stats (loop))
1514 fprintf (dump_file, "LOOP VECTORIZED.");
1518 /* Function vect_is_simple_use.
1520 Input:
1521 LOOP - the loop that is being vectorized.
1522 OPERAND - operand of a stmt in LOOP.
1523 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1525 Returns whether a stmt with OPERAND can be vectorized.
1526 Supportable operands are constants, loop invariants, and operands that are
1527 defined by the current iteration of the loop. Unsupportable operands are
1528 those that are defined by a previous iteration of the loop (as is the case
1529 in reduction/induction computations). */
1531 static bool
1532 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
1534 tree def_stmt;
1535 basic_block bb;
1537 if (def)
1538 *def = NULL_TREE;
1540 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1541 return true;
1543 if (TREE_CODE (operand) != SSA_NAME)
1544 return false;
1546 def_stmt = SSA_NAME_DEF_STMT (operand);
1547 if (def_stmt == NULL_TREE )
1549 if (vect_debug_details (NULL))
1550 fprintf (dump_file, "no def_stmt.");
1551 return false;
1554 /* empty stmt is expected only in case of a function argument.
1555 (Otherwise - we expect a phi_node or a modify_expr). */
1556 if (IS_EMPTY_STMT (def_stmt))
1558 tree arg = TREE_OPERAND (def_stmt, 0);
1559 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1560 return true;
1561 if (vect_debug_details (NULL))
1563 fprintf (dump_file, "Unexpected empty stmt: ");
1564 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1566 return false;
1569 /* phi_node inside the loop indicates an induction/reduction pattern.
1570 This is not supported yet. */
1571 bb = bb_for_stmt (def_stmt);
1572 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1574 if (vect_debug_details (NULL))
1575 fprintf (dump_file, "reduction/induction - unsupported.");
1576 return false; /* FORNOW: not supported yet. */
1579 /* Expecting a modify_expr or a phi_node. */
1580 if (TREE_CODE (def_stmt) == MODIFY_EXPR
1581 || TREE_CODE (def_stmt) == PHI_NODE)
1583 if (def)
1584 *def = def_stmt;
1585 return true;
1588 return false;
1592 /* Function vect_analyze_operations.
1594 Scan the loop stmts and make sure they are all vectorizable. */
1596 static bool
1597 vect_analyze_operations (loop_vec_info loop_vinfo)
1599 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1600 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1601 int nbbs = loop->num_nodes;
1602 block_stmt_iterator si;
1603 int vectorization_factor = 0;
1604 int i;
1605 bool ok;
1606 tree scalar_type;
1608 if (vect_debug_details (NULL))
1609 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
1611 for (i = 0; i < nbbs; i++)
1613 basic_block bb = bbs[i];
1615 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1617 tree stmt = bsi_stmt (si);
1618 int nunits;
1619 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1620 tree vectype;
1622 if (vect_debug_details (NULL))
1624 fprintf (dump_file, "==> examining statement: ");
1625 print_generic_expr (dump_file, stmt, TDF_SLIM);
1628 gcc_assert (stmt_info);
1630 /* skip stmts which do not need to be vectorized.
1631 this is expected to include:
1632 - the COND_EXPR which is the loop exit condition
1633 - any LABEL_EXPRs in the loop
1634 - computations that are used only for array indexing or loop
1635 control */
1637 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1639 if (vect_debug_details (NULL))
1640 fprintf (dump_file, "irrelevant.");
1641 continue;
1644 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
1646 if (vect_debug_stats (loop) || vect_debug_details (loop))
1648 fprintf (dump_file, "not vectorized: vector stmt in loop:");
1649 print_generic_expr (dump_file, stmt, TDF_SLIM);
1651 return false;
1654 if (STMT_VINFO_DATA_REF (stmt_info))
1655 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
1656 else if (TREE_CODE (stmt) == MODIFY_EXPR)
1657 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
1658 else
1659 scalar_type = TREE_TYPE (stmt);
1661 if (vect_debug_details (NULL))
1663 fprintf (dump_file, "get vectype for scalar type: ");
1664 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1667 vectype = get_vectype_for_scalar_type (scalar_type);
1668 if (!vectype)
1670 if (vect_debug_stats (loop) || vect_debug_details (loop))
1672 fprintf (dump_file, "not vectorized: unsupported data-type ");
1673 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1675 return false;
1678 if (vect_debug_details (NULL))
1680 fprintf (dump_file, "vectype: ");
1681 print_generic_expr (dump_file, vectype, TDF_SLIM);
1683 STMT_VINFO_VECTYPE (stmt_info) = vectype;
1685 ok = (vectorizable_operation (stmt, NULL, NULL)
1686 || vectorizable_assignment (stmt, NULL, NULL)
1687 || vectorizable_load (stmt, NULL, NULL)
1688 || vectorizable_store (stmt, NULL, NULL));
1690 if (!ok)
1692 if (vect_debug_stats (loop) || vect_debug_details (loop))
1694 fprintf (dump_file, "not vectorized: stmt not supported: ");
1695 print_generic_expr (dump_file, stmt, TDF_SLIM);
1697 return false;
1700 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1701 if (vect_debug_details (NULL))
1702 fprintf (dump_file, "nunits = %d", nunits);
1704 if (vectorization_factor)
1706 /* FORNOW: don't allow mixed units.
1707 This restriction will be relaxed in the future. */
1708 if (nunits != vectorization_factor)
1710 if (vect_debug_stats (loop) || vect_debug_details (loop))
1711 fprintf (dump_file, "not vectorized: mixed data-types");
1712 return false;
1715 else
1716 vectorization_factor = nunits;
1720 /* TODO: Analyze cost. Decide if worth while to vectorize. */
1721 if (!vectorization_factor)
1723 if (vect_debug_stats (loop) || vect_debug_details (loop))
1724 fprintf (dump_file, "not vectorized: unsupported data-type");
1725 return false;
1727 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1729 /* FORNOW: handle only cases where the loop bound divides by the
1730 vectorization factor. */
1732 if (vect_debug_details (NULL))
1733 fprintf (dump_file,
1734 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
1735 vectorization_factor, LOOP_VINFO_NITERS (loop_vinfo));
1737 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1739 if (vect_debug_stats (loop) || vect_debug_details (loop))
1740 fprintf (dump_file, "not vectorized: Unknown loop bound.");
1741 return false;
1744 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1745 && LOOP_VINFO_NITERS (loop_vinfo) % vectorization_factor != 0)
1747 if (vect_debug_stats (loop) || vect_debug_details (loop))
1748 fprintf (dump_file, "not vectorized: loop bound doesn't divided by %d.",
1749 vectorization_factor);
1750 return false;
1753 return true;
1757 /* Function exist_non_indexing_operands_for_use_p
1759 USE is one of the uses attached to STMT. Check if USE is
1760 used in STMT for anything other than indexing an array. */
1762 static bool
1763 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
1765 tree operand;
1766 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1768 /* USE corresponds to some operand in STMT. If there is no data
1769 reference in STMT, then any operand that corresponds to USE
1770 is not indexing an array. */
1771 if (!STMT_VINFO_DATA_REF (stmt_info))
1772 return true;
1774 /* STMT has a data_ref. FORNOW this means that its of one of
1775 the following forms:
1776 -1- ARRAY_REF = var
1777 -2- var = ARRAY_REF
1778 (This should have been verified in analyze_data_refs).
1780 'var' in the second case corresponds to a def, not a use,
1781 so USE cannot correspond to any operands that are not used
1782 for array indexing.
1784 Therefore, all we need to check is if STMT falls into the
1785 first case, and whether var corresponds to USE. */
1787 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
1788 return false;
1790 operand = TREE_OPERAND (stmt, 1);
1792 if (TREE_CODE (operand) != SSA_NAME)
1793 return false;
1795 if (operand == use)
1796 return true;
1798 return false;
1802 /* Function vect_is_simple_iv_evolution.
1804 FORNOW: A simple evolution of an induction variables in the loop is
1805 considered a polynomial evolution with constant step. */
1807 static bool
1808 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1809 tree * step, bool strict)
1811 tree init_expr;
1812 tree step_expr;
1814 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1816 /* When there is no evolution in this loop, the evolution function
1817 is not "simple". */
1818 if (evolution_part == NULL_TREE)
1819 return false;
1821 /* When the evolution is a polynomial of degree >= 2
1822 the evolution function is not "simple". */
1823 if (tree_is_chrec (evolution_part))
1824 return false;
1826 step_expr = evolution_part;
1827 init_expr = initial_condition (access_fn);
1829 if (vect_debug_details (NULL))
1831 fprintf (dump_file, "step: ");
1832 print_generic_expr (dump_file, step_expr, TDF_SLIM);
1833 fprintf (dump_file, ", init: ");
1834 print_generic_expr (dump_file, init_expr, TDF_SLIM);
1837 *init = init_expr;
1838 *step = step_expr;
1840 if (TREE_CODE (step_expr) != INTEGER_CST)
1842 if (vect_debug_details (NULL))
1843 fprintf (dump_file, "step unknown.");
1844 return false;
1847 if (strict)
1848 if (!integer_onep (step_expr))
1850 if (vect_debug_details (NULL))
1851 print_generic_expr (dump_file, step_expr, TDF_SLIM);
1852 return false;
1855 return true;
1859 /* Function vect_analyze_scalar_cycles.
1861 Examine the cross iteration def-use cycles of scalar variables, by
1862 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
1863 cycles that they represent do not impede vectorization.
1865 FORNOW: Reduction as in the following loop, is not supported yet:
1866 loop1:
1867 for (i=0; i<N; i++)
1868 sum += a[i];
1869 The cross-iteration cycle corresponding to variable 'sum' will be
1870 considered too complicated and will impede vectorization.
1872 FORNOW: Induction as in the following loop, is not supported yet:
1873 loop2:
1874 for (i=0; i<N; i++)
1875 a[i] = i;
1877 However, the following loop *is* vectorizable:
1878 loop3:
1879 for (i=0; i<N; i++)
1880 a[i] = b[i];
1882 In both loops there exists a def-use cycle for the variable i:
1883 loop: i_2 = PHI (i_0, i_1)
1884 a[i_2] = ...;
1885 i_1 = i_2 + 1;
1886 GOTO loop;
1888 The evolution of the above cycle is considered simple enough,
1889 however, we also check that the cycle does not need to be
1890 vectorized, i.e - we check that the variable that this cycle
1891 defines is only used for array indexing or in stmts that do not
1892 need to be vectorized. This is not the case in loop2, but it
1893 *is* the case in loop3. */
1895 static bool
1896 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
1898 tree phi;
1899 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1900 basic_block bb = loop->header;
1901 tree dummy;
1903 if (vect_debug_details (NULL))
1904 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
1906 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
1908 tree access_fn = NULL;
1910 if (vect_debug_details (NULL))
1912 fprintf (dump_file, "Analyze phi: ");
1913 print_generic_expr (dump_file, phi, TDF_SLIM);
1916 /* Skip virtual phi's. The data dependences that are associated with
1917 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1919 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1921 if (vect_debug_details (NULL))
1922 fprintf (dump_file, "virtual phi. skip.");
1923 continue;
1926 /* Analyze the evolution function. */
1928 /* FORNOW: The only scalar cross-iteration cycles that we allow are
1929 those of loop induction variables; This property is verified here.
1931 Furthermore, if that induction variable is used in an operation
1932 that needs to be vectorized (i.e, is not solely used to index
1933 arrays and check the exit condition) - we do not support its
1934 vectorization yet. This property is verified in vect_is_simple_use,
1935 during vect_analyze_operations. */
1937 access_fn = instantiate_parameters
1938 (loop,
1939 analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1941 if (!access_fn)
1943 if (vect_debug_stats (loop) || vect_debug_details (loop))
1944 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
1945 return false;
1948 if (vect_debug_details (NULL))
1950 fprintf (dump_file, "Access function of PHI: ");
1951 print_generic_expr (dump_file, access_fn, TDF_SLIM);
1954 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
1955 &dummy, false))
1957 if (vect_debug_stats (loop) || vect_debug_details (loop))
1958 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
1959 return false;
1963 return true;
1967 /* Function vect_analyze_data_ref_dependence.
1969 Return TRUE if there (might) exist a dependence between a memory-reference
1970 DRA and a memory-reference DRB. */
1972 static bool
1973 vect_analyze_data_ref_dependence (struct data_reference *dra,
1974 struct data_reference *drb,
1975 struct loop *loop)
1977 bool differ_p;
1978 struct data_dependence_relation *ddr;
1980 if (!array_base_name_differ_p (dra, drb, &differ_p))
1982 if (vect_debug_stats (loop) || vect_debug_details (loop))
1984 fprintf (dump_file,
1985 "not vectorized: can't determine dependence between: ");
1986 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
1987 fprintf (dump_file, " and ");
1988 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
1990 return true;
1993 if (differ_p)
1994 return false;
1996 ddr = initialize_data_dependence_relation (dra, drb);
1997 compute_affine_dependence (ddr);
1999 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2000 return false;
2002 if (vect_debug_stats (loop) || vect_debug_details (loop))
2004 fprintf (dump_file,
2005 "not vectorized: possible dependence between data-refs ");
2006 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
2007 fprintf (dump_file, " and ");
2008 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
2011 return true;
2015 /* Function vect_analyze_data_ref_dependences.
2017 Examine all the data references in the loop, and make sure there do not
2018 exist any data dependences between them.
2020 TODO: dependences which distance is greater than the vectorization factor
2021 can be ignored. */
2023 static bool
2024 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
2026 unsigned int i, j;
2027 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2028 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2029 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2031 /* Examine store-store (output) dependences. */
2033 if (vect_debug_details (NULL))
2034 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
2036 if (vect_debug_details (NULL))
2037 fprintf (dump_file, "compare all store-store pairs.");
2039 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
2041 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2043 struct data_reference *dra =
2044 VARRAY_GENERIC_PTR (loop_write_refs, i);
2045 struct data_reference *drb =
2046 VARRAY_GENERIC_PTR (loop_write_refs, j);
2047 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2048 return false;
2052 /* Examine load-store (true/anti) dependences. */
2054 if (vect_debug_details (NULL))
2055 fprintf (dump_file, "compare all load-store pairs.");
2057 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
2059 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2061 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
2062 struct data_reference *drb =
2063 VARRAY_GENERIC_PTR (loop_write_refs, j);
2064 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2065 return false;
2069 return true;
2073 /* Function vect_get_first_index.
2075 REF is a data reference.
2076 If it is an ARRAY_REF: if its lower bound is simple enough,
2077 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
2078 If it is not an ARRAY_REF: REF has no "first index";
2079 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
2081 static bool
2082 vect_get_first_index (tree ref, tree *array_first_index)
2084 tree array_start;
2086 if (TREE_CODE (ref) != ARRAY_REF)
2087 *array_first_index = size_zero_node;
2088 else
2090 array_start = array_ref_low_bound (ref);
2091 if (!host_integerp (array_start,0))
2093 if (vect_debug_details (NULL))
2095 fprintf (dump_file, "array min val not simple integer cst.");
2096 print_generic_expr (dump_file, array_start, TDF_DETAILS);
2098 return false;
2100 *array_first_index = array_start;
2103 return true;
2107 /* Function vect_compute_data_ref_alignment
2109 Compute the misalignment of the data reference DR.
2111 FOR NOW: No analysis is actually performed. Misalignment is calculated
2112 only for trivial cases. TODO. */
2114 static void
2115 vect_compute_data_ref_alignment (struct data_reference *dr,
2116 loop_vec_info loop_vinfo ATTRIBUTE_UNUSED)
2118 tree stmt = DR_STMT (dr);
2119 tree ref = DR_REF (dr);
2120 tree vectype;
2121 tree access_fn = DR_ACCESS_FN (dr, 0); /* FORNOW: single access_fn. */
2122 tree init;
2123 tree scalar_type;
2124 tree misalign;
2125 tree array_first_index;
2126 tree array_base = DR_BASE_NAME (dr);
2127 tree base_decl = NULL_TREE;
2128 tree bit_offset = size_zero_node;
2129 tree offset = size_zero_node;
2130 tree unit_bits = build_int_cst (unsigned_type_node, BITS_PER_UNIT);
2131 tree nunits;
2132 tree alignment;
2134 if (vect_debug_details (NULL))
2135 fprintf (dump_file, "vect_compute_data_ref_alignment:");
2137 /* Initialize misalignment to unknown. */
2138 DR_MISALIGNMENT (dr) = -1;
2140 scalar_type = TREE_TYPE (ref);
2141 vectype = get_vectype_for_scalar_type (scalar_type);
2142 if (!vectype)
2144 if (vect_debug_details (NULL))
2146 fprintf (dump_file, "no vectype for stmt: ");
2147 print_generic_expr (dump_file, stmt, TDF_SLIM);
2148 fprintf (dump_file, "scalar_type: ");
2149 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
2151 return;
2154 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (array_base))) < TYPE_ALIGN (vectype))
2156 base_decl = vect_get_base_decl_and_bit_offset (array_base, &bit_offset);
2157 if (!base_decl)
2159 if (vect_debug_details (NULL))
2160 fprintf (dump_file, "Unknown alignment for access");
2161 return;
2164 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
2165 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
2166 if (!integer_zerop (bit_offset))
2168 if (vect_debug_details (NULL))
2170 fprintf (dump_file, "bit offset alignment: ");
2171 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
2173 return;
2176 if (!base_decl ||
2177 (DECL_ALIGN (base_decl) < TYPE_ALIGN (vectype)
2178 && !vect_can_force_dr_alignment_p (base_decl, TYPE_ALIGN (vectype))))
2180 if (vect_debug_details (NULL))
2182 fprintf (dump_file, "can't force alignment of ref: ");
2183 print_generic_expr (dump_file, array_base, TDF_SLIM);
2185 return;
2188 if (DECL_ALIGN (base_decl) < TYPE_ALIGN (vectype))
2190 /* Force the alignment of the decl.
2191 NOTE: This is the only change to the code we make during
2192 the analysis phase, before deciding to vectorize the loop. */
2193 if (vect_debug_details (NULL))
2194 fprintf (dump_file, "force alignment");
2195 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
2196 DECL_USER_ALIGN (base_decl) = TYPE_ALIGN (vectype);
2200 /* The misalignement is:
2201 (base_alignment + offset + index_access_fn_init) % alignment.
2202 At this point we already guaranteed that base_alignment == 0,
2203 and computed the offset.
2204 It remains to check the first index accessed. */
2206 if (!vect_get_first_index (ref, &array_first_index))
2208 if (vect_debug_details (NULL))
2209 fprintf (dump_file, "no first_index for array.");
2210 return;
2213 /* Check the index of the array_ref. */
2215 init = initial_condition (access_fn);
2217 /* FORNOW: In order to simplify the handling of alignment, we make sure
2218 that the first location at which the array is accessed ('init') is on an
2219 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
2220 This is too conservative, since we require that
2221 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
2222 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
2223 This should be relaxed in the future. */
2225 if (!init || !host_integerp (init,0))
2227 if (vect_debug_details (NULL))
2228 fprintf (dump_file, "init not simple INTEGER_CST.");
2229 return;
2232 /* alignment required, in bytes: */
2233 alignment = build_int_cst (unsigned_type_node,
2234 TYPE_ALIGN (vectype)/BITS_PER_UNIT);
2235 /* bytes per scalar element: */
2236 nunits = build_int_cst (unsigned_type_node,
2237 GET_MODE_SIZE (TYPE_MODE (scalar_type)));
2239 /* misalign = (offset + (init-array_first_index)*nunits) % alignment */
2240 if (vect_debug_details (NULL))
2242 fprintf (dump_file, "misalign = ( offset <");
2243 print_generic_expr (dump_file, offset, TDF_SLIM);
2244 fprintf (dump_file, "> + (init <");
2245 print_generic_expr (dump_file, init, TDF_SLIM);
2246 fprintf (dump_file, "> - first_indx <");
2247 print_generic_expr (dump_file, array_first_index, TDF_SLIM);
2248 fprintf (dump_file, ">) * nunits <");
2249 print_generic_expr (dump_file, nunits, TDF_SLIM);
2250 fprintf (dump_file, ">) mod alignment <");
2251 print_generic_expr (dump_file, alignment, TDF_SLIM);
2252 fprintf (dump_file, ">");
2255 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
2256 misalign = int_const_binop (MULT_EXPR, misalign, nunits, 0);
2257 misalign = int_const_binop (PLUS_EXPR, misalign, offset, 0);
2258 misalign = int_const_binop (TRUNC_MOD_EXPR, misalign, alignment, 0);
2260 if (vect_debug_details (NULL))
2262 fprintf (dump_file, "misalign = ");
2263 print_generic_expr (dump_file, misalign, TDF_SLIM);
2266 if (!host_integerp (misalign,1) || TREE_OVERFLOW (misalign))
2268 if (vect_debug_details (NULL))
2269 fprintf (dump_file, "unexpected misalign value");
2270 return;
2273 DR_MISALIGNMENT (dr) = tree_low_cst (misalign,1);
2275 if (vect_debug_details (NULL))
2276 fprintf (dump_file, "misalign = %d",DR_MISALIGNMENT (dr));
2280 /* Function vect_compute_data_refs_alignment
2282 Compute the misalignment of data references in the loop.
2283 This pass may take place at function granularity instead of at loop
2284 granularity.
2286 FOR NOW: No analysis is actually performed. Misalignment is calculated
2287 only for trivial cases. TODO. */
2289 static void
2290 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
2292 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2293 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2294 unsigned int i;
2296 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2298 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2299 vect_compute_data_ref_alignment (dr, loop_vinfo);
2302 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2304 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2305 vect_compute_data_ref_alignment (dr, loop_vinfo);
2310 /* Function vect_enhance_data_refs_alignment
2312 This pass will use loop versioning and loop peeling in order to enhance
2313 the alignment of data references in the loop.
2315 FOR NOW: we assume that whatever versioning/peeling takes place, only the
2316 original loop is to be vectorized; Any other loops that are created by
2317 the transformations performed in this pass - are not supposed to be
2318 vectorized. This restriction will be relaxed.
2320 FOR NOW: No transformation is actually performed. TODO. */
2322 static void
2323 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo ATTRIBUTE_UNUSED)
2326 This pass will require a cost model to guide it whether to apply peeling
2327 or versioning or a combination of the two. For example, the scheme that
2328 intel uses when given a loop with several memory accesses, is as follows:
2329 choose one memory access ('p') which alignment you want to force by doing
2330 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
2331 other accesses are not necessarily aligned, or (2) use loop versioning to
2332 generate one loop in which all accesses are aligned, and another loop in
2333 which only 'p' is necessarily aligned.
2335 ("Automatic Intra-Register Vectorization for the Intel Architecture",
2336 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
2337 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
2339 Devising a cost model is the most critical aspect of this work. It will
2340 guide us on which access to peel for, whether to use loop versioning, how
2341 many versions to create, etc. The cost model will probably consist of
2342 generic considerations as well as target specific considerations (on
2343 powerpc for example, misaligned stores are more painful than misaligned
2344 loads).
2346 Here is the general steps involved in alignment enhancements:
2348 -- original loop, before alignment analysis:
2349 for (i=0; i<N; i++){
2350 x = q[i]; # DR_MISALIGNMENT(q) = unknown
2351 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2354 -- After vect_compute_data_refs_alignment:
2355 for (i=0; i<N; i++){
2356 x = q[i]; # DR_MISALIGNMENT(q) = 3
2357 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2360 -- Possibility 1: we do loop versioning:
2361 if (p is aligned) {
2362 for (i=0; i<N; i++){ # loop 1A
2363 x = q[i]; # DR_MISALIGNMENT(q) = 3
2364 p[i] = y; # DR_MISALIGNMENT(p) = 0
2367 else {
2368 for (i=0; i<N; i++){ # loop 1B
2369 x = q[i]; # DR_MISALIGNMENT(q) = 3
2370 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2374 -- Possibility 2: we do loop peeling:
2375 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2376 x = q[i];
2377 p[i] = y;
2379 for (i = 3; i < N; i++){ # loop 2A
2380 x = q[i]; # DR_MISALIGNMENT(q) = 0
2381 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2384 -- Possibility 3: combination of loop peeling and versioning:
2385 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2386 x = q[i];
2387 p[i] = y;
2389 if (p is aligned) {
2390 for (i = 3; i<N; i++){ # loop 3A
2391 x = q[i]; # DR_MISALIGNMENT(q) = 0
2392 p[i] = y; # DR_MISALIGNMENT(p) = 0
2395 else {
2396 for (i = 3; i<N; i++){ # loop 3B
2397 x = q[i]; # DR_MISALIGNMENT(q) = 0
2398 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2402 These loops are later passed to loop_transform to be vectorized. The
2403 vectorizer will use the alignment information to guide the transformation
2404 (whether to generate regular loads/stores, or with special handling for
2405 misalignment).
2410 /* Function vect_analyze_data_refs_alignment
2412 Analyze the alignment of the data-references in the loop.
2413 FOR NOW: Until support for misliagned accesses is in place, only if all
2414 accesses are aligned can the loop be vectorized. This restriction will be
2415 relaxed. */
2417 static bool
2418 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2420 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2421 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2422 unsigned int i;
2424 if (vect_debug_details (NULL))
2425 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
2428 /* This pass may take place at function granularity instead of at loop
2429 granularity. */
2431 vect_compute_data_refs_alignment (loop_vinfo);
2434 /* This pass will use loop versioning and loop peeling in order to enhance
2435 the alignment of data references in the loop.
2436 FOR NOW: we assume that whatever versioning/peeling took place, the
2437 original loop is to be vectorized. Any other loops that were created by
2438 the transformations performed in this pass - are not supposed to be
2439 vectorized. This restriction will be relaxed. */
2441 vect_enhance_data_refs_alignment (loop_vinfo);
2444 /* Finally, check that loop can be vectorized.
2445 FOR NOW: Until support for misaligned accesses is in place, only if all
2446 accesses are aligned can the loop be vectorized. This restriction will be
2447 relaxed. */
2449 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2451 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2452 if (!aligned_access_p (dr))
2454 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
2455 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
2456 fprintf (dump_file, "not vectorized: unaligned store.");
2457 return false;
2461 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2463 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2464 if (!aligned_access_p (dr))
2466 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
2467 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
2468 fprintf (dump_file, "not vectorized: unaligned load.");
2469 return false;
2473 return true;
2477 /* Function vect_analyze_data_ref_access.
2479 Analyze the access pattern of the data-reference DR. For now, a data access
2480 has to consecutive and aligned to be considered vectorizable. */
2482 static bool
2483 vect_analyze_data_ref_access (struct data_reference *dr)
2485 varray_type access_fns = DR_ACCESS_FNS (dr);
2486 tree access_fn;
2487 tree init, step;
2489 /* FORNOW: handle only one dimensional arrays.
2490 This restriction will be relaxed in the future. */
2491 if (VARRAY_ACTIVE_SIZE (access_fns) != 1)
2493 if (vect_debug_details (NULL))
2494 fprintf (dump_file, "multi dimensional array reference.");
2495 return false;
2497 access_fn = DR_ACCESS_FN (dr, 0);
2499 if (!vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
2500 access_fn, &init, &step, true))
2502 if (vect_debug_details (NULL))
2504 fprintf (dump_file, "too complicated access function.");
2505 print_generic_expr (dump_file, access_fn, TDF_SLIM);
2507 return false;
2510 return true;
2514 /* Function vect_analyze_data_ref_accesses.
2516 Analyze the access pattern of all the data references in the loop.
2518 FORNOW: the only access pattern that is considered vectorizable is a
2519 simple step 1 (consecutive) access.
2521 FORNOW: handle only one dimensional arrays, and pointer accesses. */
2523 static bool
2524 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2526 unsigned int i;
2527 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2528 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2530 if (vect_debug_details (NULL))
2531 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
2533 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2535 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2536 bool ok = vect_analyze_data_ref_access (dr);
2537 if (!ok)
2539 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
2540 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
2541 fprintf (dump_file, "not vectorized: complicated access pattern.");
2542 return false;
2546 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2548 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2549 bool ok = vect_analyze_data_ref_access (dr);
2550 if (!ok)
2552 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
2553 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
2554 fprintf (dump_file, "not vectorized: complicated access pattern.");
2555 return false;
2559 return true;
2563 /* Function vect_analyze_pointer_ref_access.
2565 Input:
2566 STMT - a stmt that contains a data-ref
2567 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
2569 If the data-ref access is vectorizable, return a data_reference structure
2570 that represents it (DR). Otherwise - return NULL. */
2572 static struct data_reference *
2573 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
2575 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2576 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2577 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
2578 tree init, step;
2579 int step_val;
2580 tree reftype, innertype;
2581 enum machine_mode innermode;
2582 tree indx_access_fn;
2583 int loopnum = loop->num;
2584 struct data_reference *dr;
2586 if (!access_fn)
2588 if (vect_debug_stats (loop) || vect_debug_details (loop))
2589 fprintf (dump_file, "not vectorized: complicated pointer access.");
2590 return NULL;
2593 if (vect_debug_details (NULL))
2595 fprintf (dump_file, "Access function of ptr: ");
2596 print_generic_expr (dump_file, access_fn, TDF_SLIM);
2599 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
2601 if (vect_debug_stats (loop) || vect_debug_details (loop))
2602 fprintf (dump_file, "not vectorized: pointer access is not simple.");
2603 return NULL;
2606 if (TREE_CODE (init) != SSA_NAME /* FORNOW */
2607 || !host_integerp (step,0))
2609 if (vect_debug_stats (loop) || vect_debug_details (loop))
2610 fprintf (dump_file,
2611 "not vectorized: non constant init/step for pointer access.");
2612 return NULL;
2615 step_val = TREE_INT_CST_LOW (step);
2617 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
2618 if (TREE_CODE (reftype) != POINTER_TYPE)
2620 if (vect_debug_stats (loop) || vect_debug_details (loop))
2621 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
2622 return NULL;
2625 reftype = TREE_TYPE (init);
2626 if (TREE_CODE (reftype) != POINTER_TYPE)
2628 if (vect_debug_stats (loop) || vect_debug_details (loop))
2629 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
2630 return NULL;
2633 innertype = TREE_TYPE (reftype);
2634 innermode = TYPE_MODE (innertype);
2635 if (GET_MODE_SIZE (innermode) != step_val)
2637 /* FORNOW: support only consecutive access */
2638 if (vect_debug_stats (loop) || vect_debug_details (loop))
2639 fprintf (dump_file, "not vectorized: non consecutive access.");
2640 return NULL;
2643 indx_access_fn =
2644 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
2645 if (vect_debug_details (NULL))
2647 fprintf (dump_file, "Access function of ptr indx: ");
2648 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
2650 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
2651 return dr;
2655 /* Function vect_analyze_data_refs.
2657 Find all the data references in the loop.
2659 FORNOW: Handle aligned INDIRECT_REFs and one dimensional ARRAY_REFs
2660 which base is really an array (not a pointer) and which alignment
2661 can be forced. This restriction will be relaxed. */
2663 static bool
2664 vect_analyze_data_refs (loop_vec_info loop_vinfo)
2666 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2667 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2668 int nbbs = loop->num_nodes;
2669 block_stmt_iterator si;
2670 int j;
2671 struct data_reference *dr;
2673 if (vect_debug_details (NULL))
2674 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
2676 for (j = 0; j < nbbs; j++)
2678 basic_block bb = bbs[j];
2679 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2681 bool is_read = false;
2682 tree stmt = bsi_stmt (si);
2683 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2684 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2685 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2686 vuse_optype vuses = STMT_VUSE_OPS (stmt);
2687 varray_type *datarefs = NULL;
2688 int nvuses, nv_may_defs, nv_must_defs;
2689 tree memref = NULL;
2690 tree array_base;
2691 tree symbl;
2693 /* Assumption: there exists a data-ref in stmt, if and only if
2694 it has vuses/vdefs. */
2696 if (!vuses && !v_may_defs && !v_must_defs)
2697 continue;
2699 nvuses = NUM_VUSES (vuses);
2700 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
2701 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
2703 if (nvuses && (nv_may_defs || nv_must_defs))
2705 if (vect_debug_details (NULL))
2707 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
2708 print_generic_expr (dump_file, stmt, TDF_SLIM);
2710 return false;
2713 if (TREE_CODE (stmt) != MODIFY_EXPR)
2715 if (vect_debug_details (NULL))
2717 fprintf (dump_file, "unexpected vops in stmt: ");
2718 print_generic_expr (dump_file, stmt, TDF_SLIM);
2720 return false;
2723 if (vuses)
2725 memref = TREE_OPERAND (stmt, 1);
2726 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
2727 is_read = true;
2729 else /* vdefs */
2731 memref = TREE_OPERAND (stmt, 0);
2732 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
2733 is_read = false;
2736 if (TREE_CODE (memref) == INDIRECT_REF)
2738 dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
2739 if (! dr)
2740 return false;
2741 symbl = DR_BASE_NAME (dr);
2743 else if (TREE_CODE (memref) == ARRAY_REF)
2745 tree base;
2746 tree offset = size_zero_node;
2747 array_base = TREE_OPERAND (memref, 0);
2749 /* FORNOW: make sure that the array is one dimensional.
2750 This restriction will be relaxed in the future. */
2751 if (TREE_CODE (array_base) == ARRAY_REF)
2753 if (vect_debug_stats (loop) || vect_debug_details (loop))
2755 fprintf (dump_file,
2756 "not vectorized: multi-dimensional array.");
2757 print_generic_expr (dump_file, stmt, TDF_SLIM);
2759 return false;
2762 dr = analyze_array (stmt, memref, is_read);
2764 /* Find the relevant symbol for aliasing purposes. */
2765 base = DR_BASE_NAME (dr);
2766 switch (TREE_CODE (base))
2768 case VAR_DECL:
2769 symbl = base;
2770 break;
2771 /* FORNOW: Disabled.
2772 case INDIRECT_REF:
2773 symbl = TREE_OPERAND (base, 0);
2774 break;
2776 case COMPONENT_REF:
2777 /* CHECKME: could have recorded more accurate information -
2778 i.e, the actual FIELD_DECL that is being referenced -
2779 but later passes expect VAR_DECL as the nmt. */
2780 symbl = vect_get_base_decl_and_bit_offset (base, &offset);
2781 if (symbl)
2782 break;
2783 /* fall through */
2784 default:
2785 if (vect_debug_stats (loop) || vect_debug_details (loop))
2787 fprintf (dump_file,
2788 "not vectorized: unhandled struct/class field access ");
2789 print_generic_expr (dump_file, stmt, TDF_SLIM);
2791 return false;
2792 } /* switch */
2794 else
2796 if (vect_debug_stats (loop) || vect_debug_details (loop))
2798 fprintf (dump_file, "not vectorized: unhandled data ref: ");
2799 print_generic_expr (dump_file, stmt, TDF_SLIM);
2801 return false;
2804 /* Find and record the memtag assigned to this data-ref. */
2805 if (TREE_CODE (symbl) == VAR_DECL)
2806 STMT_VINFO_MEMTAG (stmt_info) = symbl;
2807 else if (TREE_CODE (symbl) == SSA_NAME)
2809 tree tag;
2810 symbl = SSA_NAME_VAR (symbl);
2811 tag = get_var_ann (symbl)->type_mem_tag;
2812 if (!tag)
2814 tree ptr = TREE_OPERAND (memref, 0);
2815 if (TREE_CODE (ptr) == SSA_NAME)
2816 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
2818 if (!tag)
2820 if (vect_debug_stats (loop) || vect_debug_details (loop))
2821 fprintf (dump_file, "not vectorized: no memtag for ref.");
2822 return false;
2824 STMT_VINFO_MEMTAG (stmt_info) = tag;
2826 else
2828 if (vect_debug_stats (loop) || vect_debug_details (loop))
2830 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
2831 print_generic_expr (dump_file, memref, TDF_SLIM);
2833 return false;
2836 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
2837 STMT_VINFO_DATA_REF (stmt_info) = dr;
2841 return true;
2845 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2847 /* Function vect_mark_relevant.
2849 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2851 static void
2852 vect_mark_relevant (varray_type worklist, tree stmt)
2854 stmt_vec_info stmt_info;
2856 if (vect_debug_details (NULL))
2857 fprintf (dump_file, "mark relevant.");
2859 if (TREE_CODE (stmt) == PHI_NODE)
2861 VARRAY_PUSH_TREE (worklist, stmt);
2862 return;
2865 stmt_info = vinfo_for_stmt (stmt);
2867 if (!stmt_info)
2869 if (vect_debug_details (NULL))
2871 fprintf (dump_file, "mark relevant: no stmt info!!.");
2872 print_generic_expr (dump_file, stmt, TDF_SLIM);
2874 return;
2877 if (STMT_VINFO_RELEVANT_P (stmt_info))
2879 if (vect_debug_details (NULL))
2880 fprintf (dump_file, "already marked relevant.");
2881 return;
2884 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
2885 VARRAY_PUSH_TREE (worklist, stmt);
2889 /* Function vect_stmt_relevant_p.
2891 Return true if STMT in loop that is represented by LOOP_VINFO is
2892 "relevant for vectorization".
2894 A stmt is considered "relevant for vectorization" if:
2895 - it has uses outside the loop.
2896 - it has vdefs (it alters memory).
2897 - control stmts in the loop (except for the exit condition).
2899 CHECKME: what other side effects would the vectorizer allow? */
2901 static bool
2902 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
2904 v_may_def_optype v_may_defs;
2905 v_must_def_optype v_must_defs;
2906 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2907 int i;
2908 dataflow_t df;
2909 int num_uses;
2911 /* cond stmt other than loop exit cond. */
2912 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2913 return true;
2915 /* changing memory. */
2916 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2917 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2918 if (v_may_defs || v_must_defs)
2920 if (vect_debug_details (NULL))
2921 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
2922 return true;
2925 /* uses outside the loop. */
2926 df = get_immediate_uses (stmt);
2927 num_uses = num_immediate_uses (df);
2928 for (i = 0; i < num_uses; i++)
2930 tree use = immediate_use (df, i);
2931 basic_block bb = bb_for_stmt (use);
2932 if (!flow_bb_inside_loop_p (loop, bb))
2934 if (vect_debug_details (NULL))
2935 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
2936 return true;
2940 return false;
2944 /* Function vect_mark_stmts_to_be_vectorized.
2946 Not all stmts in the loop need to be vectorized. For example:
2948 for i...
2949 for j...
2950 1. T0 = i + j
2951 2. T1 = a[T0]
2953 3. j = j + 1
2955 Stmt 1 and 3 do not need to be vectorized, because loop control and
2956 addressing of vectorized data-refs are handled differently.
2958 This pass detects such stmts. */
2960 static bool
2961 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2963 varray_type worklist;
2964 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2965 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2966 unsigned int nbbs = loop->num_nodes;
2967 block_stmt_iterator si;
2968 tree stmt;
2969 stmt_ann_t ann;
2970 unsigned int i;
2971 int j;
2972 use_optype use_ops;
2973 stmt_vec_info stmt_info;
2975 if (vect_debug_details (NULL))
2976 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
2978 VARRAY_TREE_INIT (worklist, 64, "work list");
2980 /* 1. Init worklist. */
2982 for (i = 0; i < nbbs; i++)
2984 basic_block bb = bbs[i];
2985 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2987 stmt = bsi_stmt (si);
2989 if (vect_debug_details (NULL))
2991 fprintf (dump_file, "init: stmt relevant? ");
2992 print_generic_expr (dump_file, stmt, TDF_SLIM);
2995 stmt_info = vinfo_for_stmt (stmt);
2996 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
2998 if (vect_stmt_relevant_p (stmt, loop_vinfo))
2999 vect_mark_relevant (worklist, stmt);
3004 /* 2. Process_worklist */
3006 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
3008 stmt = VARRAY_TOP_TREE (worklist);
3009 VARRAY_POP (worklist);
3011 if (vect_debug_details (NULL))
3013 fprintf (dump_file, "worklist: examine stmt: ");
3014 print_generic_expr (dump_file, stmt, TDF_SLIM);
3017 /* Examine the USES in this statement. Mark all the statements which
3018 feed this statement's uses as "relevant", unless the USE is used as
3019 an array index. */
3021 if (TREE_CODE (stmt) == PHI_NODE)
3023 /* follow the def-use chain inside the loop. */
3024 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
3026 tree arg = PHI_ARG_DEF (stmt, j);
3027 tree def_stmt = NULL_TREE;
3028 basic_block bb;
3029 if (!vect_is_simple_use (arg, loop, &def_stmt))
3031 if (vect_debug_details (NULL))
3032 fprintf (dump_file, "worklist: unsupported use.");
3033 varray_clear (worklist);
3034 return false;
3036 if (!def_stmt)
3037 continue;
3039 if (vect_debug_details (NULL))
3041 fprintf (dump_file, "worklist: def_stmt: ");
3042 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3045 bb = bb_for_stmt (def_stmt);
3046 if (flow_bb_inside_loop_p (loop, bb))
3047 vect_mark_relevant (worklist, def_stmt);
3051 ann = stmt_ann (stmt);
3052 use_ops = USE_OPS (ann);
3054 for (i = 0; i < NUM_USES (use_ops); i++)
3056 tree use = USE_OP (use_ops, i);
3058 /* We are only interested in uses that need to be vectorized. Uses
3059 that are used for address computation are not considered relevant.
3061 if (exist_non_indexing_operands_for_use_p (use, stmt))
3063 tree def_stmt = NULL_TREE;
3064 basic_block bb;
3065 if (!vect_is_simple_use (use, loop, &def_stmt))
3067 if (vect_debug_details (NULL))
3068 fprintf (dump_file, "worklist: unsupported use.");
3069 varray_clear (worklist);
3070 return false;
3073 if (!def_stmt)
3074 continue;
3076 if (vect_debug_details (NULL))
3078 fprintf (dump_file, "worklist: examine use %d: ", i);
3079 print_generic_expr (dump_file, use, TDF_SLIM);
3082 bb = bb_for_stmt (def_stmt);
3083 if (flow_bb_inside_loop_p (loop, bb))
3084 vect_mark_relevant (worklist, def_stmt);
3087 } /* while worklist */
3089 varray_clear (worklist);
3090 return true;
3094 /* Function vect_get_loop_niters.
3096 Determine how many iterations the loop is executed. */
3098 static tree
3099 vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations)
3101 tree niters;
3103 if (vect_debug_details (NULL))
3104 fprintf (dump_file, "\n<<get_loop_niters>>\n");
3106 niters = number_of_iterations_in_loop (loop);
3108 if (niters != NULL_TREE
3109 && niters != chrec_dont_know
3110 && host_integerp (niters,0))
3112 *number_of_iterations = TREE_INT_CST_LOW (niters);
3114 if (vect_debug_details (NULL))
3115 fprintf (dump_file, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC,
3116 *number_of_iterations);
3119 return get_loop_exit_condition (loop);
3123 /* Function vect_analyze_loop_form.
3125 Verify the following restrictions (some may be relaxed in the future):
3126 - it's an inner-most loop
3127 - number of BBs = 2 (which are the loop header and the latch)
3128 - the loop has a pre-header
3129 - the loop has a single entry and exit
3130 - the loop exit condition is simple enough, and the number of iterations
3131 can be analyzed (a countable loop). */
3133 static loop_vec_info
3134 vect_analyze_loop_form (struct loop *loop)
3136 loop_vec_info loop_vinfo;
3137 tree loop_cond;
3138 HOST_WIDE_INT number_of_iterations = -1;
3140 if (vect_debug_details (loop))
3141 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
3143 if (loop->inner
3144 || !loop->single_exit
3145 || loop->num_nodes != 2)
3147 if (vect_debug_stats (loop) || vect_debug_details (loop))
3149 fprintf (dump_file, "not vectorized: bad loop form. ");
3150 if (loop->inner)
3151 fprintf (dump_file, "nested loop.");
3152 else if (!loop->single_exit)
3153 fprintf (dump_file, "multiple exits.");
3154 else if (loop->num_nodes != 2)
3155 fprintf (dump_file, "too many BBs in loop.");
3158 return NULL;
3161 /* We assume that the loop exit condition is at the end of the loop. i.e,
3162 that the loop is represented as a do-while (with a proper if-guard
3163 before the loop if needed), where the loop header contains all the
3164 executable statements, and the latch is empty. */
3165 if (!empty_block_p (loop->latch))
3167 if (vect_debug_stats (loop) || vect_debug_details (loop))
3168 fprintf (dump_file, "not vectorized: unexpectd loop form.");
3169 return NULL;
3172 if (empty_block_p (loop->header))
3174 if (vect_debug_stats (loop) || vect_debug_details (loop))
3175 fprintf (dump_file, "not vectorized: empty loop.");
3176 return NULL;
3179 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
3180 if (!loop_cond)
3182 if (vect_debug_stats (loop) || vect_debug_details (loop))
3183 fprintf (dump_file, "not vectorized: complicated exit condition.");
3184 return NULL;
3187 if (number_of_iterations < 0)
3189 if (vect_debug_stats (loop) || vect_debug_details (loop))
3190 fprintf (dump_file, "not vectorized: unknown loop bound.");
3191 return NULL;
3194 if (number_of_iterations == 0) /* CHECKME: can this happen? */
3196 if (vect_debug_stats (loop) || vect_debug_details (loop))
3197 fprintf (dump_file, "not vectorized: number of iterations = 0.");
3198 return NULL;
3201 loop_vinfo = new_loop_vec_info (loop);
3202 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
3203 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
3205 return loop_vinfo;
3209 /* Function vect_analyze_loop.
3211 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3212 for it. The different analyses will record information in the
3213 loop_vec_info struct. */
3215 static loop_vec_info
3216 vect_analyze_loop (struct loop *loop)
3218 bool ok;
3219 loop_vec_info loop_vinfo;
3221 if (vect_debug_details (NULL))
3222 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
3224 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3226 loop_vinfo = vect_analyze_loop_form (loop);
3227 if (!loop_vinfo)
3229 if (vect_debug_details (loop))
3230 fprintf (dump_file, "bad loop form.");
3231 return NULL;
3234 /* Find all data references in the loop (which correspond to vdefs/vuses)
3235 and analyze their evolution in the loop.
3237 FORNOW: Handle only simple, one-dimensional, array references, which
3238 alignment can be forced, and aligned pointer-references. */
3240 ok = vect_analyze_data_refs (loop_vinfo);
3241 if (!ok)
3243 if (vect_debug_details (loop))
3244 fprintf (dump_file, "bad data references.");
3245 destroy_loop_vec_info (loop_vinfo);
3246 return NULL;
3250 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
3252 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
3253 if (!ok)
3255 if (vect_debug_details (loop))
3256 fprintf (dump_file, "unexpected pattern.");
3257 if (vect_debug_details (loop))
3258 fprintf (dump_file, "not vectorized: unexpected pattern.");
3259 destroy_loop_vec_info (loop_vinfo);
3260 return NULL;
3264 /* Check that all cross-iteration scalar data-flow cycles are OK.
3265 Cross-iteration cycles caused by virtual phis are analyzed separately. */
3267 ok = vect_analyze_scalar_cycles (loop_vinfo);
3268 if (!ok)
3270 if (vect_debug_details (loop))
3271 fprintf (dump_file, "bad scalar cycle.");
3272 destroy_loop_vec_info (loop_vinfo);
3273 return NULL;
3277 /* Analyze data dependences between the data-refs in the loop.
3278 FORNOW: fail at the first data dependence that we encounter. */
3280 ok = vect_analyze_data_ref_dependences (loop_vinfo);
3281 if (!ok)
3283 if (vect_debug_details (loop))
3284 fprintf (dump_file, "bad data dependence.");
3285 destroy_loop_vec_info (loop_vinfo);
3286 return NULL;
3290 /* Analyze the access patterns of the data-refs in the loop (consecutive,
3291 complex, etc.). FORNOW: Only handle consecutive access pattern. */
3293 ok = vect_analyze_data_ref_accesses (loop_vinfo);
3294 if (!ok)
3296 if (vect_debug_details (loop))
3297 fprintf (dump_file, "bad data access.");
3298 destroy_loop_vec_info (loop_vinfo);
3299 return NULL;
3303 /* Analyze the alignment of the data-refs in the loop.
3304 FORNOW: Only aligned accesses are handled. */
3306 ok = vect_analyze_data_refs_alignment (loop_vinfo);
3307 if (!ok)
3309 if (vect_debug_details (loop))
3310 fprintf (dump_file, "bad data alignment.");
3311 destroy_loop_vec_info (loop_vinfo);
3312 return NULL;
3316 /* Scan all the operations in the loop and make sure they are
3317 vectorizable. */
3319 ok = vect_analyze_operations (loop_vinfo);
3320 if (!ok)
3322 if (vect_debug_details (loop))
3323 fprintf (dump_file, "bad operation or unsupported loop bound.");
3324 destroy_loop_vec_info (loop_vinfo);
3325 return NULL;
3328 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
3330 return loop_vinfo;
3334 /* Function need_imm_uses_for.
3336 Return whether we ought to include information for 'var'
3337 when calculating immediate uses. For this pass we only want use
3338 information for non-virtual variables. */
3340 static bool
3341 need_imm_uses_for (tree var)
3343 return is_gimple_reg (var);
3347 /* Function vectorize_loops.
3349 Entry Point to loop vectorization phase. */
3351 void
3352 vectorize_loops (struct loops *loops)
3354 unsigned int i, loops_num;
3355 unsigned int num_vectorized_loops = 0;
3357 /* Does the target support SIMD? */
3358 /* FORNOW: until more sophisticated machine modelling is in place. */
3359 if (!UNITS_PER_SIMD_WORD)
3361 if (vect_debug_details (NULL))
3362 fprintf (dump_file, "vectorizer: target vector size is not defined.");
3363 return;
3366 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
3368 /* ----------- Analyze loops. ----------- */
3370 /* If some loop was duplicated, it gets bigger number
3371 than all previously defined loops. This fact allows us to run
3372 only over initial loops skipping newly generated ones. */
3373 loops_num = loops->num;
3374 for (i = 1; i < loops_num; i++)
3376 loop_vec_info loop_vinfo;
3377 struct loop *loop = loops->parray[i];
3379 if (!loop)
3380 continue;
3382 loop_vinfo = vect_analyze_loop (loop);
3383 loop->aux = loop_vinfo;
3385 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
3386 continue;
3388 vect_transform_loop (loop_vinfo, loops);
3389 num_vectorized_loops++;
3392 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
3393 fprintf (dump_file, "\nvectorized %u loops in function.\n",
3394 num_vectorized_loops);
3396 /* ----------- Finalize. ----------- */
3398 free_df ();
3399 for (i = 1; i < loops_num; i++)
3401 struct loop *loop = loops->parray[i];
3402 loop_vec_info loop_vinfo = loop->aux;
3403 if (!loop)
3404 continue;
3405 destroy_loop_vec_info (loop_vinfo);
3406 loop->aux = NULL;
3409 loop_commit_inserts ();
3410 rewrite_into_ssa (false);
3411 if (bitmap_first_set_bit (vars_to_rename) >= 0)
3413 /* The rewrite of ssa names may cause violation of loop closed ssa
3414 form invariants. TODO -- avoid these rewrites completely.
3415 Information in virtual phi nodes is sufficient for it. */
3416 rewrite_into_loop_closed_ssa ();
3418 bitmap_clear (vars_to_rename);