* arm.h (REVERSE_CONDITION): Define.
[official-gcc.git] / gcc / tree-vectorizer.c
blob770772cc9517d5b7d16e4e700168d8646949474d
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS that are one dimensional
61 arrays which base is an array DECL (not a pointer), and INDIRECT_REFS
62 through pointers; both array and pointer accesses are required to have a
63 simple (consecutive) access pattern.
65 Analysis phase:
66 ===============
67 The driver for the analysis phase is vect_analyze_loop_nest().
68 It applies a set of analyses, some of which rely on the scalar evolution
69 analyzer (scev) developed by Sebastian Pop.
71 During the analysis phase the vectorizer records some information
72 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
73 loop, as well as general information about the loop as a whole, which is
74 recorded in a "loop_vec_info" struct attached to each loop.
76 Transformation phase:
77 =====================
78 The loop transformation phase scans all the stmts in the loop, and
79 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
80 the loop that needs to be vectorized. It insert the vector code sequence
81 just before the scalar stmt S, and records a pointer to the vector code
82 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
83 attached to S). This pointer will be used for the vectorization of following
84 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
85 otherwise, we rely on dead code elimination for removing it.
87 For example, say stmt S1 was vectorized into stmt VS1:
89 VS1: vb = px[i];
90 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
91 S2: a = b;
93 To vectorize stmt S2, the vectorizer first finds the stmt that defines
94 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
95 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
96 resulting sequence would be:
98 VS1: vb = px[i];
99 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 VS2: va = vb;
101 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
103 Operands that are not SSA_NAMEs, are data-refs that appear in
104 load/store operations (like 'x[i]' in S1), and are handled differently.
106 Target modeling:
107 =================
108 Currently the only target specific information that is used is the
109 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
110 support different sizes of vectors, for now will need to specify one value
111 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
113 Since we only vectorize operations which vector form can be
114 expressed using existing tree codes, to verify that an operation is
115 supported, the vectorizer checks the relevant optab at the relevant
116 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
117 the value found is CODE_FOR_nothing, then there's no target support, and
118 we can't vectorize the stmt.
120 For additional information on this project see:
121 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
124 #include "config.h"
125 #include "system.h"
126 #include "coretypes.h"
127 #include "tm.h"
128 #include "errors.h"
129 #include "ggc.h"
130 #include "tree.h"
131 #include "target.h"
133 #include "rtl.h"
134 #include "basic-block.h"
135 #include "diagnostic.h"
136 #include "tree-flow.h"
137 #include "tree-dump.h"
138 #include "timevar.h"
139 #include "cfgloop.h"
140 #include "cfglayout.h"
141 #include "expr.h"
142 #include "optabs.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
149 /* Main analysis functions. */
150 static loop_vec_info vect_analyze_loop (struct loop *);
151 static loop_vec_info vect_analyze_loop_form (struct loop *);
152 static bool vect_analyze_data_refs (loop_vec_info);
153 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
154 static bool vect_analyze_scalar_cycles (loop_vec_info);
155 static bool vect_analyze_data_ref_accesses (loop_vec_info);
156 static bool vect_analyze_data_refs_alignment (loop_vec_info);
157 static void vect_compute_data_refs_alignment (loop_vec_info);
158 static bool vect_analyze_operations (loop_vec_info);
160 /* Main code transformation functions. */
161 static void vect_transform_loop (loop_vec_info, struct loops *);
162 static void vect_transform_loop_bound (loop_vec_info);
163 static bool vect_transform_stmt (tree, block_stmt_iterator *);
164 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
165 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
166 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
167 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
168 static void vect_align_data_ref (tree);
169 static void vect_enhance_data_refs_alignment (loop_vec_info);
171 /* Utility functions for the analyses. */
172 static bool vect_is_simple_use (tree , struct loop *, tree *);
173 static bool exist_non_indexing_operands_for_use_p (tree, tree);
174 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
175 static void vect_mark_relevant (varray_type, tree);
176 static bool vect_stmt_relevant_p (tree, loop_vec_info);
177 static tree vect_get_loop_niters (struct loop *, HOST_WIDE_INT *);
178 static void vect_compute_data_ref_alignment
179 (struct data_reference *, loop_vec_info);
180 static bool vect_analyze_data_ref_access (struct data_reference *);
181 static bool vect_get_first_index (tree, tree *);
182 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
183 static tree vect_get_base_decl_and_bit_offset (tree, tree *);
184 static struct data_reference * vect_analyze_pointer_ref_access (tree, tree, bool);
186 /* Utility functions for the code transformation. */
187 static tree vect_create_destination_var (tree, tree);
188 static tree vect_create_data_ref (tree, block_stmt_iterator *);
189 static tree vect_create_index_for_array_ref (tree, block_stmt_iterator *);
190 static tree get_vectype_for_scalar_type (tree);
191 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
192 static tree vect_get_vec_def_for_operand (tree, tree);
193 static tree vect_init_vector (tree, tree);
194 static void vect_finish_stmt_generation
195 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
197 /* Utilities for creation and deletion of vec_info structs. */
198 loop_vec_info new_loop_vec_info (struct loop *loop);
199 void destroy_loop_vec_info (loop_vec_info);
200 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
202 static bool vect_debug_stats (struct loop *loop);
203 static bool vect_debug_details (struct loop *loop);
206 /* Function new_stmt_vec_info.
208 Create and initialize a new stmt_vec_info struct for STMT. */
210 stmt_vec_info
211 new_stmt_vec_info (tree stmt, struct loop *loop)
213 stmt_vec_info res;
214 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
216 STMT_VINFO_TYPE (res) = undef_vec_info_type;
217 STMT_VINFO_STMT (res) = stmt;
218 STMT_VINFO_LOOP (res) = loop;
219 STMT_VINFO_RELEVANT_P (res) = 0;
220 STMT_VINFO_VECTYPE (res) = NULL;
221 STMT_VINFO_VEC_STMT (res) = NULL;
222 STMT_VINFO_DATA_REF (res) = NULL;
223 STMT_VINFO_MEMTAG (res) = NULL;
225 return res;
229 /* Function new_loop_vec_info.
231 Create and initialize a new loop_vec_info struct for LOOP, as well as
232 stmt_vec_info structs for all the stmts in LOOP. */
234 loop_vec_info
235 new_loop_vec_info (struct loop *loop)
237 loop_vec_info res;
238 basic_block *bbs;
239 block_stmt_iterator si;
240 unsigned int i;
242 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
244 bbs = get_loop_body (loop);
246 /* Create stmt_info for all stmts in the loop. */
247 for (i = 0; i < loop->num_nodes; i++)
249 basic_block bb = bbs[i];
250 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
252 tree stmt = bsi_stmt (si);
253 stmt_ann_t ann;
255 get_stmt_operands (stmt);
256 ann = stmt_ann (stmt);
257 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
261 LOOP_VINFO_LOOP (res) = loop;
262 LOOP_VINFO_BBS (res) = bbs;
263 LOOP_VINFO_EXIT_COND (res) = NULL;
264 LOOP_VINFO_NITERS (res) = -1;
265 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
266 LOOP_VINFO_VECT_FACTOR (res) = 0;
267 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
268 "loop_write_datarefs");
269 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
270 "loop_read_datarefs");
271 return res;
275 /* Function destroy_loop_vec_info.
277 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
278 stmts in the loop. */
280 void
281 destroy_loop_vec_info (loop_vec_info loop_vinfo)
283 struct loop *loop;
284 basic_block *bbs;
285 int nbbs;
286 block_stmt_iterator si;
287 int j;
289 if (!loop_vinfo)
290 return;
292 loop = LOOP_VINFO_LOOP (loop_vinfo);
294 bbs = LOOP_VINFO_BBS (loop_vinfo);
295 nbbs = loop->num_nodes;
297 for (j = 0; j < nbbs; j++)
299 basic_block bb = bbs[j];
300 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
302 tree stmt = bsi_stmt (si);
303 stmt_ann_t ann = stmt_ann (stmt);
304 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
305 free (stmt_info);
306 set_stmt_info (ann, NULL);
310 free (LOOP_VINFO_BBS (loop_vinfo));
311 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
312 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
314 free (loop_vinfo);
318 /* Function debug_loop_stats.
320 For vectorization statistics dumps. */
322 static bool
323 vect_debug_stats (struct loop *loop)
325 basic_block bb;
326 block_stmt_iterator si;
327 tree node = NULL_TREE;
329 if (!dump_file || !(dump_flags & TDF_STATS))
330 return false;
332 if (!loop)
334 fprintf (dump_file, "\n");
335 return true;
338 if (!loop->header)
339 return false;
341 bb = loop->header;
343 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
345 node = bsi_stmt (si);
346 if (node && EXPR_P (node) && EXPR_LOCUS (node))
347 break;
350 if (node && EXPR_P (node) && EXPR_LOCUS (node)
351 && EXPR_FILENAME (node) && EXPR_LINENO (node))
353 fprintf (dump_file, "\nloop at %s:%d: ",
354 EXPR_FILENAME (node), EXPR_LINENO (node));
355 return true;
358 return false;
362 /* Function debug_loop_details.
364 For vectorization debug dumps. */
366 static bool
367 vect_debug_details (struct loop *loop)
369 basic_block bb;
370 block_stmt_iterator si;
371 tree node = NULL_TREE;
373 if (!dump_file || !(dump_flags & TDF_DETAILS))
374 return false;
376 if (!loop)
378 fprintf (dump_file, "\n");
379 return true;
382 if (!loop->header)
383 return false;
385 bb = loop->header;
387 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
389 node = bsi_stmt (si);
390 if (node && EXPR_P (node) && EXPR_LOCUS (node))
391 break;
394 if (node && EXPR_P (node) && EXPR_LOCUS (node)
395 && EXPR_FILENAME (node) && EXPR_LINENO (node))
397 fprintf (dump_file, "\nloop at %s:%d: ",
398 EXPR_FILENAME (node), EXPR_LINENO (node));
399 return true;
402 return false;
405 /* Function vect_get_base_decl_and_bit_offset
407 Get the decl from which the data reference REF is based,
408 and compute the OFFSET from it in bits on the way.
409 FORNOW: Handle only component-refs that consist of
410 VAR_DECLs (no ARRAY_REF or INDIRECT_REF). */
412 static tree
413 vect_get_base_decl_and_bit_offset (tree ref, tree *offset)
415 tree decl;
416 if (TREE_CODE (ref) == VAR_DECL)
417 return ref;
419 if (TREE_CODE (ref) == COMPONENT_REF)
421 tree this_offset;
422 tree oprnd0 = TREE_OPERAND (ref, 0);
423 tree oprnd1 = TREE_OPERAND (ref, 1);
425 this_offset = bit_position (oprnd1);
426 if (!host_integerp (this_offset,1))
427 return NULL_TREE;
429 decl = vect_get_base_decl_and_bit_offset (oprnd0, offset);
431 if (decl)
433 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
435 if (!host_integerp (*offset,1) || TREE_OVERFLOW (*offset))
436 return NULL_TREE;
438 if (vect_debug_details (NULL))
440 print_generic_expr (dump_file, ref, TDF_SLIM);
441 fprintf (dump_file, " --> total offset for ref: ");
442 print_generic_expr (dump_file, *offset, TDF_SLIM);
446 return decl;
449 /* TODO: extend to handle more cases. */
450 return NULL_TREE;
454 /* Function vect_force_dr_alignment_p.
456 Returns whether the alignment of a DECL can be forced to be aligned
457 on ALIGNMENT bit boundary. */
459 static bool
460 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
462 if (TREE_CODE (decl) != VAR_DECL)
463 return false;
465 if (DECL_EXTERNAL (decl))
466 return false;
468 if (TREE_STATIC (decl))
469 return (alignment <= MAX_OFILE_ALIGNMENT);
470 else
471 /* This is not 100% correct. The absolute correct stack alignment
472 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
473 PREFERRED_STACK_BOUNDARY is honored by all translation units.
474 However, until someone implements forced stack alignment, SSE
475 isn't really usable without this. */
476 return (alignment <= PREFERRED_STACK_BOUNDARY);
480 /* Function vect_get_new_vect_var.
482 Returns a name for a new variable. The current naming scheme appends the
483 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
484 the name of vectorizer generated variables, and appends that to NAME if
485 provided. */
487 static tree
488 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
490 const char *prefix;
491 int prefix_len;
492 tree new_vect_var;
494 if (var_kind == vect_simple_var)
495 prefix = "vect_";
496 else
497 prefix = "vect_p";
499 prefix_len = strlen (prefix);
501 if (name)
502 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
503 else
504 new_vect_var = create_tmp_var (type, prefix);
506 return new_vect_var;
510 /* Function create_index_for_array_ref.
512 Create (and return) an index variable, along with it's update chain in the
513 loop. This variable will be used to access a memory location in a vector
514 operation.
516 Input:
517 STMT: The stmt that contains a memory data-ref.
518 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
519 function can be added here, or in the loop pre-header.
521 FORNOW: We are only handling array accesses with step 1. */
523 static tree
524 vect_create_index_for_array_ref (tree stmt, block_stmt_iterator *bsi)
526 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
527 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
528 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
529 tree expr = DR_REF (dr);
530 tree access_fn;
531 tree init, step;
532 loop_vec_info loop_info = loop->aux;
533 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_info);
534 tree vf;
535 tree array_first_index;
536 tree indx_before_incr, indx_after_incr;
537 int loopnum = loop->num;
538 bool ok;
539 #ifdef ENABLE_CHECKING
540 varray_type access_fns = DR_ACCESS_FNS (dr);
542 /* FORNOW: handling only one dimensional arrays. */
543 if (VARRAY_ACTIVE_SIZE (access_fns) != 1)
544 abort ();
546 if (!vectorization_factor)
547 abort ();
548 #endif
550 access_fn = DR_ACCESS_FN (dr, 0);
551 ok = vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, true)
552 && vect_get_first_index (expr, &array_first_index);
554 #ifdef ENABLE_CHECKING
555 if (!ok)
556 abort ();
558 /* FORNOW: Handling only constant 'init'. */
559 if (TREE_CODE (init) != INTEGER_CST)
560 abort ();
561 #endif
563 vf = build_int_cst (unsigned_type_node, vectorization_factor);
565 if (vect_debug_details (NULL))
567 fprintf (dump_file, "int vf = %d",vectorization_factor);
568 fprintf (dump_file, ", vf:");
569 print_generic_expr (dump_file, vf, TDF_SLIM);
570 fprintf (dump_file, ", init:");
571 print_generic_expr (dump_file, init, TDF_SLIM);
572 fprintf (dump_file, ", array_first_index:");
573 print_generic_expr (dump_file, array_first_index, TDF_SLIM);
576 /* Calculate the 'init' of the new index.
577 init = (init - array_first_index) / vectorization_factor */
578 init = int_const_binop (TRUNC_DIV_EXPR,
579 int_const_binop (MINUS_EXPR, init, array_first_index, 1),
580 vf, 1);
582 /* Calculate the 'step' of the new index. FORNOW: always 1. */
583 step = size_one_node;
585 if (vect_debug_details (NULL))
587 fprintf (dump_file, "create iv for (");
588 print_generic_expr (dump_file, init, TDF_SLIM);
589 fprintf (dump_file, ", + ,");
590 print_generic_expr (dump_file, step, TDF_SLIM);
591 fprintf (dump_file, ")");
594 create_iv (init, step, NULL_TREE, loop, bsi, false,
595 &indx_before_incr, &indx_after_incr);
597 return indx_before_incr;
601 /* Function get_vectype_for_scalar_type.
603 Returns the vector type corresponding to SCALAR_TYPE as supported
604 by the target. */
606 static tree
607 get_vectype_for_scalar_type (tree scalar_type)
609 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
610 int nbytes = GET_MODE_SIZE (inner_mode);
611 int nunits;
613 if (nbytes == 0)
614 return NULL_TREE;
616 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
617 is expected. */
618 nunits = UNITS_PER_SIMD_WORD / nbytes;
620 return build_vector_type (scalar_type, nunits);
624 /* Function vect_align_data_ref.
626 Handle mislignment of a memory accesses.
628 FORNOW: Can't handle misaligned accesses.
629 Make sure that the dataref is aligned. */
631 static void
632 vect_align_data_ref (tree stmt)
634 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
635 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
637 /* FORNOW: can't handle misaligned accesses;
638 all accesses expected to be aligned. */
639 if (!aligned_access_p (dr))
640 abort ();
644 /* Function vect_create_data_ref.
646 Create a memory reference expression for vector access, to be used in a
647 vector load/store stmt.
649 Input:
650 STMT: a stmt that references memory. expected to be of the form
651 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
652 BSI: block_stmt_iterator where new stmts can be added.
654 Output:
655 1. Declare a new ptr to vector_type, and have it point to the array base.
656 For example, for vector of type V8HI:
657 v8hi *p0;
658 p0 = (v8hi *)&a;
659 2. Create a data-reference based on the new vector pointer p0, and using
660 a new index variable 'idx'. Return the expression '(*p0)[idx]'.
662 FORNOW: handle only aligned and consecutive accesses. */
664 static tree
665 vect_create_data_ref (tree stmt, block_stmt_iterator *bsi)
667 tree new_base;
668 tree data_ref;
669 tree idx;
670 tree vec_stmt;
671 tree new_temp;
672 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
673 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
674 tree vect_ptr_type;
675 tree vect_ptr;
676 tree addr_ref;
677 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
678 tree array_type;
679 tree base_addr = NULL_TREE;
680 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
681 edge pe;
682 tree tag;
683 tree addr_expr;
684 tree scalar_ptr_type;
685 tree use;
686 ssa_op_iter iter;
688 /* FORNOW: make sure the data reference is aligned. */
689 vect_align_data_ref (stmt);
691 addr_ref = DR_BASE_NAME (dr);
693 array_type = build_array_type (vectype, 0);
694 TYPE_ALIGN (array_type) = TYPE_ALIGN (TREE_TYPE (addr_ref));
695 vect_ptr_type = build_pointer_type (array_type);
696 scalar_ptr_type = build_pointer_type (TREE_TYPE (addr_ref));
698 if (vect_debug_details (NULL))
700 fprintf (dump_file, "create array_ref of type: ");
701 print_generic_expr (dump_file, vectype, TDF_SLIM);
704 /*** create: vectype_array *p; ***/
705 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
706 get_name (addr_ref));
707 add_referenced_tmp_var (vect_ptr);
709 #ifdef ENABLE_CHECKING
710 if (TREE_CODE (addr_ref) != VAR_DECL
711 && TREE_CODE (addr_ref) != COMPONENT_REF
712 && TREE_CODE (addr_ref) != SSA_NAME)
713 abort ();
714 #endif
716 if (vect_debug_details (NULL))
718 if (TREE_CODE (addr_ref) == VAR_DECL)
719 fprintf (dump_file, "vectorizing an array ref: ");
720 else if (TREE_CODE (addr_ref) == SSA_NAME)
721 fprintf (dump_file, "vectorizing a pointer ref: ");
722 else if (TREE_CODE (addr_ref) == COMPONENT_REF)
723 fprintf (dump_file, "vectorizing a record ref: ");
724 print_generic_expr (dump_file, addr_ref, TDF_SLIM);
727 /* Get base address: */
728 if (TREE_CODE (addr_ref) == SSA_NAME)
729 base_addr = addr_ref;
730 else
731 base_addr = build_fold_addr_expr (addr_ref);
733 /* Handle aliasing: */
734 tag = STMT_VINFO_MEMTAG (stmt_info);
735 #ifdef ENABLE_CHECKING
736 if (!tag)
737 abort ();
738 #endif
739 get_var_ann (vect_ptr)->type_mem_tag = tag;
741 /* Mark for renaming all aliased variables
742 (i.e, the may-aliases of the type-mem-tag) */
743 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter,
744 (SSA_OP_VIRTUAL_DEFS | SSA_OP_VUSE))
746 if (TREE_CODE (use) == SSA_NAME)
747 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
750 pe = loop_preheader_edge (loop);
752 /*** create: p = (vectype *)&a; ***/
754 /* addr_expr = &a */
755 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
756 get_name (addr_ref));
757 add_referenced_tmp_var (addr_expr);
758 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, base_addr);
759 new_temp = make_ssa_name (addr_expr, vec_stmt);
760 TREE_OPERAND (vec_stmt, 0) = new_temp;
761 bsi_insert_on_edge (pe, vec_stmt);
763 /* vect_ptr = (vectype_array *)&a; */
764 vec_stmt = fold_convert (vect_ptr_type, new_temp);
765 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
766 new_temp = make_ssa_name (vect_ptr, vec_stmt);
767 TREE_OPERAND (vec_stmt, 0) = new_temp;
768 bsi_insert_on_edge (pe, vec_stmt);
770 /*** create data ref: '(*p)[idx]' ***/
772 idx = vect_create_index_for_array_ref (stmt, bsi);
774 new_base = build_fold_indirect_ref (new_temp);
775 data_ref = build4 (ARRAY_REF, vectype, new_base, idx, NULL_TREE, NULL_TREE);
777 if (vect_debug_details (NULL))
779 fprintf (dump_file, "created new data-ref: ");
780 print_generic_expr (dump_file, data_ref, TDF_SLIM);
783 return data_ref;
787 /* Function vect_create_destination_var.
789 Create a new temporary of type VECTYPE. */
791 static tree
792 vect_create_destination_var (tree scalar_dest, tree vectype)
794 tree vec_dest;
795 const char *new_name;
797 #ifdef ENABLE_CHECKING
798 if (TREE_CODE (scalar_dest) != SSA_NAME)
799 abort ();
800 #endif
802 new_name = get_name (scalar_dest);
803 if (!new_name)
804 new_name = "var_";
805 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
806 add_referenced_tmp_var (vec_dest);
808 return vec_dest;
812 /* Function vect_init_vector.
814 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
815 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
816 used in the vectorization of STMT. */
818 static tree
819 vect_init_vector (tree stmt, tree vector_var)
821 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
822 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
823 tree new_var;
824 tree init_stmt;
825 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
826 tree vec_oprnd;
827 edge pe;
828 tree new_temp;
830 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
831 add_referenced_tmp_var (new_var);
833 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
834 new_temp = make_ssa_name (new_var, init_stmt);
835 TREE_OPERAND (init_stmt, 0) = new_temp;
837 pe = loop_preheader_edge (loop);
838 bsi_insert_on_edge (pe, init_stmt);
840 if (vect_debug_details (NULL))
842 fprintf (dump_file, "created new init_stmt: ");
843 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
846 vec_oprnd = TREE_OPERAND (init_stmt, 0);
847 return vec_oprnd;
851 /* Function vect_get_vec_def_for_operand.
853 OP is an operand in STMT. This function returns a (vector) def that will be
854 used in the vectorized stmt for STMT.
856 In the case that OP is an SSA_NAME which is defined in the loop, then
857 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
859 In case OP is an invariant or constant, a new stmt that creates a vector def
860 needs to be introduced. */
862 static tree
863 vect_get_vec_def_for_operand (tree op, tree stmt)
865 tree vec_oprnd;
866 tree vec_stmt;
867 tree def_stmt;
868 stmt_vec_info def_stmt_info = NULL;
869 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
870 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
871 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
872 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
873 basic_block bb;
874 tree vec_inv;
875 tree t = NULL_TREE;
876 tree def;
877 int i;
879 if (vect_debug_details (NULL))
881 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
882 print_generic_expr (dump_file, op, TDF_SLIM);
885 /** ===> Case 1: operand is a constant. **/
887 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
889 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
891 tree vec_cst;
892 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
893 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
894 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
895 tree t = NULL_TREE;
896 int i;
898 /* Build a tree with vector elements. */
899 if (vect_debug_details (NULL))
900 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
902 for (i = nunits - 1; i >= 0; --i)
904 t = tree_cons (NULL_TREE, op, t);
906 vec_cst = build_vector (vectype, t);
907 return vect_init_vector (stmt, vec_cst);
910 #ifdef ENABLE_CHECKING
911 if (TREE_CODE (op) != SSA_NAME)
912 abort ();
913 #endif
915 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
917 def_stmt = SSA_NAME_DEF_STMT (op);
918 def_stmt_info = vinfo_for_stmt (def_stmt);
920 if (vect_debug_details (NULL))
922 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
923 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
927 /** ==> Case 2.1: operand is defined inside the loop. **/
929 if (def_stmt_info)
931 /* Get the def from the vectorized stmt. */
933 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
934 #ifdef ENABLE_CHECKING
935 if (!vec_stmt)
936 abort ();
937 #endif
938 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
939 return vec_oprnd;
943 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
944 it is a reduction/induction. **/
946 bb = bb_for_stmt (def_stmt);
947 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
949 if (vect_debug_details (NULL))
950 fprintf (dump_file, "reduction/induction - unsupported.");
951 abort (); /* FORNOW no support for reduction/induction. */
955 /** ==> Case 2.3: operand is defined outside the loop -
956 it is a loop invariant. */
958 switch (TREE_CODE (def_stmt))
960 case PHI_NODE:
961 def = PHI_RESULT (def_stmt);
962 break;
963 case MODIFY_EXPR:
964 def = TREE_OPERAND (def_stmt, 0);
965 break;
966 case NOP_EXPR:
967 def = TREE_OPERAND (def_stmt, 0);
968 #ifdef ENABLE_CHECKING
969 if (!IS_EMPTY_STMT (def_stmt))
970 abort ();
971 #endif
972 def = op;
973 break;
974 default:
975 if (vect_debug_details (NULL))
977 fprintf (dump_file, "unsupported defining stmt: ");
978 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
980 abort ();
983 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
985 if (vect_debug_details (NULL))
986 fprintf (dump_file, "Create vector_inv.");
988 for (i = nunits - 1; i >= 0; --i)
990 t = tree_cons (NULL_TREE, def, t);
993 vec_inv = build_constructor (vectype, t);
994 return vect_init_vector (stmt, vec_inv);
998 /* Function vect_finish_stmt_generation.
1000 Insert a new stmt. */
1002 static void
1003 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
1005 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1007 if (vect_debug_details (NULL))
1009 fprintf (dump_file, "add new stmt: ");
1010 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
1013 /* Make sure bsi points to the stmt that is being vectorized. */
1015 /* Assumption: any stmts created for the vectorization of smtmt S are
1016 inserted before S. BSI may point to S or some new stmt before it. */
1018 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
1019 bsi_next (bsi);
1020 #ifdef ENABLE_CHECKING
1021 if (stmt != bsi_stmt (*bsi))
1022 abort ();
1023 #endif
1027 /* Function vectorizable_assignment.
1029 Check if STMT performs an assignment (copy) that can be vectorized.
1030 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1031 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1032 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1034 static bool
1035 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1037 tree vec_dest;
1038 tree scalar_dest;
1039 tree op;
1040 tree vec_oprnd;
1041 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1042 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1043 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1044 tree new_temp;
1046 /* Is vectorizable assignment? */
1048 if (TREE_CODE (stmt) != MODIFY_EXPR)
1049 return false;
1051 scalar_dest = TREE_OPERAND (stmt, 0);
1052 if (TREE_CODE (scalar_dest) != SSA_NAME)
1053 return false;
1055 op = TREE_OPERAND (stmt, 1);
1056 if (!vect_is_simple_use (op, loop, NULL))
1058 if (vect_debug_details (NULL))
1059 fprintf (dump_file, "use not simple.");
1060 return false;
1063 if (!vec_stmt) /* transformation not required. */
1065 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1066 return true;
1069 /** Trasform. **/
1070 if (vect_debug_details (NULL))
1071 fprintf (dump_file, "transform assignment.");
1073 /* Handle def. */
1074 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1076 /* Handle use. */
1077 op = TREE_OPERAND (stmt, 1);
1078 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
1080 /* Arguments are ready. create the new vector stmt. */
1081 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1082 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1083 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1084 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1086 return true;
1090 /* Function vectorizable_operation.
1092 Check if STMT performs a binary or unary operation that can be vectorized.
1093 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1094 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1095 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1097 static bool
1098 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1100 tree vec_dest;
1101 tree scalar_dest;
1102 tree operation;
1103 tree op0, op1 = NULL;
1104 tree vec_oprnd0, vec_oprnd1=NULL;
1105 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1106 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1107 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1108 int i;
1109 enum tree_code code;
1110 enum machine_mode vec_mode;
1111 tree new_temp;
1112 int op_type;
1113 tree op;
1114 optab optab;
1116 /* Is STMT a vectorizable binary/unary operation? */
1117 if (TREE_CODE (stmt) != MODIFY_EXPR)
1118 return false;
1120 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1121 return false;
1123 operation = TREE_OPERAND (stmt, 1);
1124 code = TREE_CODE (operation);
1125 optab = optab_for_tree_code (code, vectype);
1127 /* Support only unary or binary operations. */
1128 op_type = TREE_CODE_LENGTH (code);
1129 if (op_type != unary_op && op_type != binary_op)
1131 if (vect_debug_details (NULL))
1132 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
1133 return false;
1136 for (i = 0; i < op_type; i++)
1138 op = TREE_OPERAND (operation, i);
1139 if (!vect_is_simple_use (op, loop, NULL))
1141 if (vect_debug_details (NULL))
1142 fprintf (dump_file, "use not simple.");
1143 return false;
1147 /* Supportable by target? */
1148 if (!optab)
1150 if (vect_debug_details (NULL))
1151 fprintf (dump_file, "no optab.");
1152 return false;
1154 vec_mode = TYPE_MODE (vectype);
1155 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1157 if (vect_debug_details (NULL))
1158 fprintf (dump_file, "op not supported by target.");
1159 return false;
1162 if (!vec_stmt) /* transformation not required. */
1164 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1165 return true;
1168 /** Trasform. **/
1170 if (vect_debug_details (NULL))
1171 fprintf (dump_file, "transform binary/unary operation.");
1173 /* Handle def. */
1174 scalar_dest = TREE_OPERAND (stmt, 0);
1175 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1177 /* Handle uses. */
1178 op0 = TREE_OPERAND (operation, 0);
1179 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
1181 if (op_type == binary_op)
1183 op1 = TREE_OPERAND (operation, 1);
1184 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
1187 /* Arguments are ready. create the new vector stmt. */
1189 if (op_type == binary_op)
1190 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1191 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1192 else
1193 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1194 build1 (code, vectype, vec_oprnd0));
1195 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1196 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1197 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1199 return true;
1203 /* Function vectorizable_store.
1205 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1206 can be vectorized.
1207 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1208 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1209 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1211 static bool
1212 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1214 tree scalar_dest;
1215 tree data_ref;
1216 tree op;
1217 tree vec_oprnd1;
1218 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1219 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1220 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1221 enum machine_mode vec_mode;
1223 /* Is vectorizable store? */
1225 if (TREE_CODE (stmt) != MODIFY_EXPR)
1226 return false;
1228 scalar_dest = TREE_OPERAND (stmt, 0);
1229 if (TREE_CODE (scalar_dest) != ARRAY_REF
1230 && TREE_CODE (scalar_dest) != INDIRECT_REF)
1231 return false;
1233 op = TREE_OPERAND (stmt, 1);
1234 if (!vect_is_simple_use (op, loop, NULL))
1236 if (vect_debug_details (NULL))
1237 fprintf (dump_file, "use not simple.");
1238 return false;
1241 vec_mode = TYPE_MODE (vectype);
1242 /* FORNOW. In some cases can vectorize even if data-type not supported
1243 (e.g. - array initialization with 0). */
1244 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1245 return false;
1247 if (!STMT_VINFO_DATA_REF (stmt_info))
1248 return false;
1250 if (!vec_stmt) /* transformation not required. */
1252 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1253 return true;
1256 /** Trasform. **/
1258 if (vect_debug_details (NULL))
1259 fprintf (dump_file, "transform store");
1261 /* Handle use - get the vectorized def from the defining stmt. */
1262 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
1264 /* Handle def. */
1265 data_ref = vect_create_data_ref (stmt, bsi);
1267 /* Arguments are ready. create the new vector stmt. */
1268 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1269 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1271 return true;
1275 /* vectorizable_load.
1277 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1278 can be vectorized.
1279 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1280 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1281 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1283 static bool
1284 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1286 tree scalar_dest;
1287 tree vec_dest = NULL;
1288 tree data_ref = NULL;
1289 tree op;
1290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1291 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1292 tree new_temp;
1293 enum machine_mode vec_mode;
1295 /* Is vectorizable load? */
1297 if (TREE_CODE (stmt) != MODIFY_EXPR)
1298 return false;
1300 scalar_dest = TREE_OPERAND (stmt, 0);
1301 if (TREE_CODE (scalar_dest) != SSA_NAME)
1302 return false;
1304 op = TREE_OPERAND (stmt, 1);
1305 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1306 return false;
1308 if (!STMT_VINFO_DATA_REF (stmt_info))
1309 return false;
1311 vec_mode = TYPE_MODE (vectype);
1312 /* FORNOW. In some cases can vectorize even if data-type not supported
1313 (e.g. - data copies). */
1314 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1315 return false;
1317 if (!vec_stmt) /* transformation not required. */
1319 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1320 return true;
1323 /** Trasform. **/
1325 if (vect_debug_details (NULL))
1326 fprintf (dump_file, "transform load.");
1328 /* Handle def. */
1329 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1331 /* Handle use. */
1332 op = TREE_OPERAND (stmt, 1);
1333 data_ref = vect_create_data_ref (stmt, bsi);
1335 /* Arguments are ready. create the new vector stmt. */
1336 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1337 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1338 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1339 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1341 return true;
1345 /* Function vect_transform_stmt.
1347 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1349 static bool
1350 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
1352 bool is_store = false;
1353 tree vec_stmt = NULL_TREE;
1354 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1356 switch (STMT_VINFO_TYPE (stmt_info))
1358 case op_vec_info_type:
1359 if (!vectorizable_operation (stmt, bsi, &vec_stmt))
1360 abort ();
1361 break;
1363 case assignment_vec_info_type:
1364 if (!vectorizable_assignment (stmt, bsi, &vec_stmt))
1365 abort ();
1366 break;
1368 case load_vec_info_type:
1369 if (!vectorizable_load (stmt, bsi, &vec_stmt))
1370 abort ();
1371 break;
1373 case store_vec_info_type:
1374 if (!vectorizable_store (stmt, bsi, &vec_stmt))
1375 abort ();
1376 is_store = true;
1377 break;
1378 default:
1379 if (vect_debug_details (NULL))
1380 fprintf (dump_file, "stmt not supported.");
1381 abort ();
1384 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1386 return is_store;
1390 /* Function vect_transform_loop_bound.
1392 Create a new exit condition for the loop. */
1394 static void
1395 vect_transform_loop_bound (loop_vec_info loop_vinfo)
1397 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1398 edge exit_edge = loop->single_exit;
1399 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
1400 tree indx_before_incr, indx_after_incr;
1401 tree orig_cond_expr;
1402 HOST_WIDE_INT old_N = 0;
1403 int vf;
1404 tree cond_stmt;
1405 tree new_loop_bound;
1406 tree cond;
1407 tree lb_type;
1409 #ifdef ENABLE_CHECKING
1410 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1411 abort ();
1412 #endif
1413 old_N = LOOP_VINFO_NITERS (loop_vinfo);
1414 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1416 #ifdef ENABLE_CHECKING
1417 /* FORNOW:
1418 assuming number-of-iterations divides by the vectorization factor. */
1419 if (old_N % vf)
1420 abort ();
1421 #endif
1423 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
1424 #ifdef ENABLE_CHECKING
1425 if (!orig_cond_expr)
1426 abort ();
1427 #endif
1428 if (orig_cond_expr != bsi_stmt (loop_exit_bsi))
1429 abort ();
1431 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
1432 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
1434 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
1435 to point to the exit condition. */
1436 bsi_next (&loop_exit_bsi);
1437 if (bsi_stmt (loop_exit_bsi) != orig_cond_expr)
1438 abort ();
1440 /* new loop exit test: */
1441 lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
1442 new_loop_bound = build_int_cst (lb_type, old_N/vf);
1444 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
1445 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1446 else /* 'then' edge loops back. */
1447 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1449 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
1450 TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
1452 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
1454 /* remove old loop exit test: */
1455 bsi_remove (&loop_exit_bsi);
1457 if (vect_debug_details (NULL))
1458 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
1462 /* Function vect_transform_loop.
1464 The analysis phase has determined that the loop is vectorizable.
1465 Vectorize the loop - created vectorized stmts to replace the scalar
1466 stmts in the loop, and update the loop exit condition. */
1468 static void
1469 vect_transform_loop (loop_vec_info loop_vinfo,
1470 struct loops *loops ATTRIBUTE_UNUSED)
1472 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1473 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1474 int nbbs = loop->num_nodes;
1475 block_stmt_iterator si;
1476 int i;
1477 #ifdef ENABLE_CHECKING
1478 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1479 #endif
1481 if (vect_debug_details (NULL))
1482 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
1484 /* 1) Make sure the loop header has exactly two entries
1485 2) Make sure we have a preheader basic block. */
1487 if (!loop->header->pred->pred_next
1488 || loop->header->pred->pred_next->pred_next)
1489 abort ();
1491 loop_split_edge_with (loop_preheader_edge (loop), NULL);
1494 /* FORNOW: the vectorizer supports only loops which body consist
1495 of one basic block (header + empty latch). When the vectorizer will
1496 support more involved loop forms, the order by which the BBs are
1497 traversed need to be reconsidered. */
1499 for (i = 0; i < nbbs; i++)
1501 basic_block bb = bbs[i];
1503 for (si = bsi_start (bb); !bsi_end_p (si);)
1505 tree stmt = bsi_stmt (si);
1506 stmt_vec_info stmt_info;
1507 bool is_store;
1508 #ifdef ENABLE_CHECKING
1509 tree vectype;
1510 #endif
1512 if (vect_debug_details (NULL))
1514 fprintf (dump_file, "------>vectorizing statement: ");
1515 print_generic_expr (dump_file, stmt, TDF_SLIM);
1517 stmt_info = vinfo_for_stmt (stmt);
1518 #ifdef ENABLE_CHECKING
1519 if (!stmt_info)
1520 abort ();
1521 #endif
1522 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1524 bsi_next (&si);
1525 continue;
1527 #ifdef ENABLE_CHECKING
1528 /* FORNOW: Verify that all stmts operate on the same number of
1529 units and no inner unrolling is necessary. */
1530 vectype = STMT_VINFO_VECTYPE (stmt_info);
1531 if (GET_MODE_NUNITS (TYPE_MODE (vectype)) != vectorization_factor)
1532 abort ();
1533 #endif
1534 /* -------- vectorize statement ------------ */
1535 if (vect_debug_details (NULL))
1536 fprintf (dump_file, "transform statement.");
1538 is_store = vect_transform_stmt (stmt, &si);
1539 if (is_store)
1541 /* free the attached stmt_vec_info and remove the stmt. */
1542 stmt_ann_t ann = stmt_ann (stmt);
1543 free (stmt_info);
1544 set_stmt_info (ann, NULL);
1545 bsi_remove (&si);
1546 continue;
1549 bsi_next (&si);
1550 } /* stmts in BB */
1551 } /* BBs in loop */
1553 vect_transform_loop_bound (loop_vinfo);
1555 if (vect_debug_details (loop))
1556 fprintf (dump_file,"Success! loop vectorized.");
1557 if (vect_debug_stats (loop))
1558 fprintf (dump_file, "LOOP VECTORIZED.");
1562 /* Function vect_is_simple_use.
1564 Input:
1565 LOOP - the loop that is being vectorized.
1566 OPERAND - operand of a stmt in LOOP.
1567 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1569 Returns whether a stmt with OPERAND can be vectorized.
1570 Supportable operands are constants, loop invariants, and operands that are
1571 defined by the current iteration of the loop. Unsupportable opernads are
1572 those that are defined by a previous iteration of the loop (as is the case
1573 in reduction/induction computations). */
1575 static bool
1576 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
1578 tree def_stmt;
1579 basic_block bb;
1581 if (def)
1582 *def = NULL_TREE;
1584 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1585 return true;
1587 if (TREE_CODE (operand) != SSA_NAME)
1588 return false;
1590 def_stmt = SSA_NAME_DEF_STMT (operand);
1591 if (def_stmt == NULL_TREE )
1593 if (vect_debug_details (NULL))
1594 fprintf (dump_file, "no def_stmt.");
1595 return false;
1598 /* empty stmt is expected only in case of a function argument.
1599 (Otherwise - we expect a phi_node or a modify_expr). */
1600 if (IS_EMPTY_STMT (def_stmt))
1602 tree arg = TREE_OPERAND (def_stmt, 0);
1603 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1604 return true;
1605 if (vect_debug_details (NULL))
1607 fprintf (dump_file, "Unexpected empty stmt: ");
1608 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1610 return false;
1613 /* phi_node inside the loop indicates an induction/reduction pattern.
1614 This is not supported yet. */
1615 bb = bb_for_stmt (def_stmt);
1616 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1618 if (vect_debug_details (NULL))
1619 fprintf (dump_file, "reduction/induction - unsupported.");
1620 return false; /* FORNOW: not supported yet. */
1623 /* Expecting a modify_expr or a phi_node. */
1624 if (TREE_CODE (def_stmt) == MODIFY_EXPR
1625 || TREE_CODE (def_stmt) == PHI_NODE)
1627 if (def)
1628 *def = def_stmt;
1629 return true;
1632 return false;
1636 /* Function vect_analyze_operations.
1638 Scan the loop stmts and make sure they are all vectorizable. */
1640 static bool
1641 vect_analyze_operations (loop_vec_info loop_vinfo)
1643 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1644 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1645 int nbbs = loop->num_nodes;
1646 block_stmt_iterator si;
1647 int vectorization_factor = 0;
1648 int i;
1649 bool ok;
1650 tree scalar_type;
1652 if (vect_debug_details (NULL))
1653 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
1655 for (i = 0; i < nbbs; i++)
1657 basic_block bb = bbs[i];
1659 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1661 tree stmt = bsi_stmt (si);
1662 int nunits;
1663 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1664 tree vectype;
1666 if (vect_debug_details (NULL))
1668 fprintf (dump_file, "==> examining statement: ");
1669 print_generic_expr (dump_file, stmt, TDF_SLIM);
1671 #ifdef ENABLE_CHECKING
1672 if (!stmt_info)
1673 abort ();
1674 #endif
1675 /* skip stmts which do not need to be vectorized.
1676 this is expected to include:
1677 - the COND_EXPR which is the loop exit condition
1678 - any LABEL_EXPRs in the loop
1679 - computations that are used only for array indexing or loop
1680 control */
1682 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1684 if (vect_debug_details (NULL))
1685 fprintf (dump_file, "irrelevant.");
1686 continue;
1689 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
1691 if (vect_debug_stats (loop) || vect_debug_details (loop))
1693 fprintf (dump_file, "not vectorized: vector stmt in loop:");
1694 print_generic_expr (dump_file, stmt, TDF_SLIM);
1696 return false;
1699 if (STMT_VINFO_DATA_REF (stmt_info))
1700 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
1701 else if (TREE_CODE (stmt) == MODIFY_EXPR)
1702 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
1703 else
1704 scalar_type = TREE_TYPE (stmt);
1706 if (vect_debug_details (NULL))
1708 fprintf (dump_file, "get vectype for scalar type: ");
1709 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1712 vectype = get_vectype_for_scalar_type (scalar_type);
1713 if (!vectype)
1715 if (vect_debug_stats (loop) || vect_debug_details (loop))
1717 fprintf (dump_file, "not vectorized: unsupported data-type ");
1718 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1720 return false;
1723 if (vect_debug_details (NULL))
1725 fprintf (dump_file, "vectype: ");
1726 print_generic_expr (dump_file, vectype, TDF_SLIM);
1728 STMT_VINFO_VECTYPE (stmt_info) = vectype;
1730 ok = (vectorizable_operation (stmt, NULL, NULL)
1731 || vectorizable_assignment (stmt, NULL, NULL)
1732 || vectorizable_load (stmt, NULL, NULL)
1733 || vectorizable_store (stmt, NULL, NULL));
1735 if (!ok)
1737 if (vect_debug_stats (loop) || vect_debug_details (loop))
1739 fprintf (dump_file, "not vectorized: stmt not supported: ");
1740 print_generic_expr (dump_file, stmt, TDF_SLIM);
1742 return false;
1745 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1746 if (vect_debug_details (NULL))
1747 fprintf (dump_file, "nunits = %d", nunits);
1749 if (vectorization_factor)
1751 /* FORNOW: don't allow mixed units.
1752 This restriction will be relaxed in the future. */
1753 if (nunits != vectorization_factor)
1755 if (vect_debug_stats (loop) || vect_debug_details (loop))
1756 fprintf (dump_file, "not vectorized: mixed data-types");
1757 return false;
1760 else
1761 vectorization_factor = nunits;
1765 /* TODO: Analyze cost. Decide if worth while to vectorize. */
1766 if (!vectorization_factor)
1768 if (vect_debug_stats (loop) || vect_debug_details (loop))
1769 fprintf (dump_file, "not vectorized: unsupported data-type");
1770 return false;
1772 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1774 /* FORNOW: handle only cases where the loop bound divides by the
1775 vectorization factor. */
1777 if (vect_debug_details (NULL))
1778 fprintf (dump_file,
1779 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
1780 vectorization_factor, LOOP_VINFO_NITERS (loop_vinfo));
1782 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1784 if (vect_debug_stats (loop) || vect_debug_details (loop))
1785 fprintf (dump_file, "not vectorized: Unknown loop bound.");
1786 return false;
1789 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1790 && LOOP_VINFO_NITERS (loop_vinfo) % vectorization_factor != 0)
1792 if (vect_debug_stats (loop) || vect_debug_details (loop))
1793 fprintf (dump_file, "not vectorized: loop bound doesn't divided by %d.",
1794 vectorization_factor);
1795 return false;
1798 return true;
1802 /* Function exist_non_indexing_operands_for_use_p
1804 USE is one of the uses attached to STMT. Check if USE is
1805 used in STMT for anything other than indexing an array. */
1807 static bool
1808 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
1810 tree operand;
1811 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1813 /* USE corresponds to some operand in STMT. If there is no data
1814 reference in STMT, then any operand that corresponds to USE
1815 is not indexing an array. */
1816 if (!STMT_VINFO_DATA_REF (stmt_info))
1817 return true;
1819 /* STMT has a data_ref. FORNOW this means that its of one of
1820 the following forms:
1821 -1- ARRAY_REF = var
1822 -2- var = ARRAY_REF
1823 (This should have been verified in analyze_data_refs).
1825 'var' in the second case corresponds to a def, not a use,
1826 so USE cannot correspond to any operands that are not used
1827 for array indexing.
1829 Therefore, all we need to check is if STMT falls into the
1830 first case, and whether var corresponds to USE. */
1832 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
1833 return false;
1835 operand = TREE_OPERAND (stmt, 1);
1837 if (TREE_CODE (operand) != SSA_NAME)
1838 return false;
1840 if (operand == use)
1841 return true;
1843 return false;
1847 /* Function vect_is_simple_iv_evolution.
1849 FORNOW: A simple evolution of an induction variables in the loop is
1850 considered a polynomial evolution with constant step. */
1852 static bool
1853 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1854 tree * step, bool strict)
1856 tree init_expr;
1857 tree step_expr;
1859 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1861 /* When there is no evolution in this loop, the evolution function
1862 is not "simple". */
1863 if (evolution_part == NULL_TREE)
1864 return false;
1866 /* When the evolution is a polynomial of degree >= 2
1867 the evolution function is not "simple". */
1868 if (tree_is_chrec (evolution_part))
1869 return false;
1871 step_expr = evolution_part;
1872 init_expr = initial_condition (access_fn);
1874 if (vect_debug_details (NULL))
1876 fprintf (dump_file, "step: ");
1877 print_generic_expr (dump_file, step_expr, TDF_SLIM);
1878 fprintf (dump_file, ", init: ");
1879 print_generic_expr (dump_file, init_expr, TDF_SLIM);
1882 *init = init_expr;
1883 *step = step_expr;
1885 if (TREE_CODE (step_expr) != INTEGER_CST)
1887 if (vect_debug_details (NULL))
1888 fprintf (dump_file, "step unknown.");
1889 return false;
1892 if (strict)
1893 if (!integer_onep (step_expr))
1895 if (vect_debug_details (NULL))
1896 print_generic_expr (dump_file, step_expr, TDF_SLIM);
1897 return false;
1900 return true;
1904 /* Function vect_analyze_scalar_cycles.
1906 Examine the cross iteration def-use cycles of scalar variables, by
1907 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
1908 cycles that they represent do not impede vectorization.
1910 FORNOW: Reduction as in the following loop, is not supported yet:
1911 loop1:
1912 for (i=0; i<N; i++)
1913 sum += a[i];
1914 The cross-iteration cycle corresponding to variable 'sum' will be
1915 considered too complicated and will impede vectorization.
1917 FORNOW: Induction as in the following loop, is not supported yet:
1918 loop2:
1919 for (i=0; i<N; i++)
1920 a[i] = i;
1922 However, the following loop *is* vectorizable:
1923 loop3:
1924 for (i=0; i<N; i++)
1925 a[i] = b[i];
1927 In both loops there exists a def-use cycle for the variable i:
1928 loop: i_2 = PHI (i_0, i_1)
1929 a[i_2] = ...;
1930 i_1 = i_2 + 1;
1931 GOTO loop;
1933 The evolution of the above cycle is considered simple enough,
1934 however, we also check that the cycle does not need to be
1935 vectorized, i.e - we check that the variable that this cycle
1936 defines is only used for array indexing or in stmts that do not
1937 need to be vectorized. This is not the case in loop2, but it
1938 *is* the case in loop3. */
1940 static bool
1941 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
1943 tree phi;
1944 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1945 basic_block bb = loop->header;
1946 tree dummy;
1948 if (vect_debug_details (NULL))
1949 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
1951 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
1953 tree access_fn = NULL;
1955 if (vect_debug_details (NULL))
1957 fprintf (dump_file, "Analyze phi: ");
1958 print_generic_expr (dump_file, phi, TDF_SLIM);
1961 /* Skip virtual phi's. The data dependences that are associated with
1962 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1964 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1966 if (vect_debug_details (NULL))
1967 fprintf (dump_file, "virtual phi. skip.");
1968 continue;
1971 /* Analyze the evolution function. */
1973 /* FORNOW: The only scalar cross-iteration cycles that we allow are
1974 those of loop induction variables; This property is verified here.
1976 Furthermore, if that induction variable is used in an operation
1977 that needs to be vectorized (i.e, is not solely used to index
1978 arrays and check the exit condition) - we do not support its
1979 vectorization yet. This property is verified in vect_is_simple_use,
1980 during vect_analyze_operations. */
1982 access_fn = instantiate_parameters
1983 (loop,
1984 analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1986 if (!access_fn)
1988 if (vect_debug_stats (loop) || vect_debug_details (loop))
1989 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
1990 return false;
1993 if (vect_debug_details (NULL))
1995 fprintf (dump_file, "Access function of PHI: ");
1996 print_generic_expr (dump_file, access_fn, TDF_SLIM);
1999 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
2000 &dummy, false))
2002 if (vect_debug_stats (loop) || vect_debug_details (loop))
2003 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
2004 return false;
2008 return true;
2012 /* Function vect_analyze_data_ref_dependence.
2014 Return TRUE if there (might) exist a dependence between a memory-reference
2015 DRA and a memory-reference DRB. */
2017 static bool
2018 vect_analyze_data_ref_dependence (struct data_reference *dra,
2019 struct data_reference *drb,
2020 struct loop *loop)
2022 bool differ_p;
2023 struct data_dependence_relation *ddr;
2025 if (!array_base_name_differ_p (dra, drb, &differ_p))
2027 if (vect_debug_stats (loop) || vect_debug_details (loop))
2029 fprintf (dump_file,
2030 "not vectorized: can't determine dependence between: ");
2031 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
2032 fprintf (dump_file, " and ");
2033 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
2035 return true;
2038 if (differ_p)
2039 return false;
2041 ddr = initialize_data_dependence_relation (dra, drb);
2042 compute_affine_dependence (ddr);
2044 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2045 return false;
2047 if (vect_debug_stats (loop) || vect_debug_details (loop))
2049 fprintf (dump_file,
2050 "not vectorized: possible dependence between data-refs ");
2051 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
2052 fprintf (dump_file, " and ");
2053 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
2056 return true;
2060 /* Function vect_analyze_data_ref_dependences.
2062 Examine all the data references in the loop, and make sure there do not
2063 exist any data dependences between them.
2065 TODO: dependences which distance is greater than the vectorization factor
2066 can be ignored. */
2068 static bool
2069 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
2071 unsigned int i, j;
2072 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2073 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2074 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2076 /* Examine store-store (output) dependences. */
2078 if (vect_debug_details (NULL))
2079 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
2081 if (vect_debug_details (NULL))
2082 fprintf (dump_file, "compare all store-store pairs.");
2084 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
2086 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2088 struct data_reference *dra =
2089 VARRAY_GENERIC_PTR (loop_write_refs, i);
2090 struct data_reference *drb =
2091 VARRAY_GENERIC_PTR (loop_write_refs, j);
2092 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2093 return false;
2097 /* Examine load-store (true/anti) dependences. */
2099 if (vect_debug_details (NULL))
2100 fprintf (dump_file, "compare all load-store pairs.");
2102 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
2104 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2106 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
2107 struct data_reference *drb =
2108 VARRAY_GENERIC_PTR (loop_write_refs, j);
2109 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2110 return false;
2114 return true;
2118 /* Function vect_get_first_index.
2120 REF is a data reference.
2121 If it is an ARRAY_REF: if its lower bound is simple enough,
2122 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
2123 If it is not an ARRAY_REF: REF has no "first index";
2124 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
2126 static bool
2127 vect_get_first_index (tree ref, tree *array_first_index)
2129 tree array_start;
2131 if (TREE_CODE (ref) != ARRAY_REF)
2132 *array_first_index = size_zero_node;
2133 else
2135 array_start = array_ref_low_bound (ref);
2136 if (!host_integerp (array_start,0))
2138 if (vect_debug_details (NULL))
2140 fprintf (dump_file, "array min val not simple integer cst.");
2141 print_generic_expr (dump_file, array_start, TDF_DETAILS);
2143 return false;
2145 *array_first_index = array_start;
2148 return true;
2152 /* Function vect_compute_data_ref_alignment
2154 Compute the misalignment of the data reference DR.
2156 FOR NOW: No analysis is actually performed. Misalignment is calculated
2157 only for trivial cases. TODO. */
2159 static void
2160 vect_compute_data_ref_alignment (struct data_reference *dr,
2161 loop_vec_info loop_vinfo ATTRIBUTE_UNUSED)
2163 tree stmt = DR_STMT (dr);
2164 tree ref = DR_REF (dr);
2165 tree vectype;
2166 tree access_fn = DR_ACCESS_FN (dr, 0); /* FORNOW: single access_fn. */
2167 tree init;
2168 tree scalar_type;
2169 tree misalign;
2170 tree array_first_index;
2171 tree array_base = DR_BASE_NAME (dr);
2172 tree base_decl = NULL_TREE;
2173 tree bit_offset = size_zero_node;
2174 tree offset = size_zero_node;
2175 tree unit_bits = build_int_cst (unsigned_type_node, BITS_PER_UNIT);
2176 tree nunits;
2177 tree alignment;
2179 if (vect_debug_details (NULL))
2180 fprintf (dump_file, "vect_compute_data_ref_alignment:");
2182 /* Initialize misalignment to unknown. */
2183 DR_MISALIGNMENT (dr) = -1;
2185 scalar_type = TREE_TYPE (ref);
2186 vectype = get_vectype_for_scalar_type (scalar_type);
2187 if (!vectype)
2189 if (vect_debug_details (NULL))
2191 fprintf (dump_file, "no vectype for stmt: ");
2192 print_generic_expr (dump_file, stmt, TDF_SLIM);
2193 fprintf (dump_file, "scalar_type: ");
2194 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
2196 return;
2199 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (array_base))) < TYPE_ALIGN (vectype))
2201 base_decl = vect_get_base_decl_and_bit_offset (array_base, &bit_offset);
2202 if (!base_decl)
2204 if (vect_debug_details (NULL))
2205 fprintf (dump_file, "Unknown alignment for access");
2206 return;
2209 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
2210 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
2211 if (!integer_zerop (bit_offset))
2213 if (vect_debug_details (NULL))
2215 fprintf (dump_file, "bit offset alignment: ");
2216 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
2218 return;
2221 if (!base_decl ||
2222 (DECL_ALIGN (base_decl) < TYPE_ALIGN (vectype)
2223 && !vect_can_force_dr_alignment_p (base_decl, TYPE_ALIGN (vectype))))
2225 if (vect_debug_details (NULL))
2227 fprintf (dump_file, "can't force alignment of ref: ");
2228 print_generic_expr (dump_file, array_base, TDF_SLIM);
2230 return;
2233 if (DECL_ALIGN (base_decl) < TYPE_ALIGN (vectype))
2235 /* Force the alignment of the decl.
2236 NOTE: This is the only change to the code we make during
2237 the analysis phase, before deciding to vectorize the loop. */
2238 if (vect_debug_details (NULL))
2239 fprintf (dump_file, "force alignment");
2240 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
2241 DECL_USER_ALIGN (base_decl) = TYPE_ALIGN (vectype);
2245 /* The misalignement is:
2246 (base_alignment + offset + index_access_fn_init) % alignment.
2247 At this point we already guaranteed that base_alignment == 0,
2248 and computed the offset.
2249 It remains to check the first index accessed. */
2251 if (!vect_get_first_index (ref, &array_first_index))
2253 if (vect_debug_details (NULL))
2254 fprintf (dump_file, "no first_index for array.");
2255 return;
2258 /* Check the index of the array_ref. */
2260 init = initial_condition (access_fn);
2262 /* FORNOW: In order to simplify the handling of alignment, we make sure
2263 that the first location at which the array is accessed ('init') is on an
2264 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
2265 This is too conservative, since we require that
2266 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
2267 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
2268 This should be relaxed in the future. */
2270 if (!init || !host_integerp (init,0))
2272 if (vect_debug_details (NULL))
2273 fprintf (dump_file, "init not simple INTEGER_CST.");
2274 return;
2277 /* alignment required, in bytes: */
2278 alignment = build_int_cst (unsigned_type_node,
2279 TYPE_ALIGN (vectype)/BITS_PER_UNIT);
2280 /* bytes per scalar element: */
2281 nunits = build_int_cst (unsigned_type_node,
2282 GET_MODE_SIZE (TYPE_MODE (scalar_type)));
2284 /* misalign = (offset + (init-array_first_index)*nunits) % alignment */
2285 if (vect_debug_details (NULL))
2287 fprintf (dump_file, "misalign = ( offset <");
2288 print_generic_expr (dump_file, offset, TDF_SLIM);
2289 fprintf (dump_file, "> + (init <");
2290 print_generic_expr (dump_file, init, TDF_SLIM);
2291 fprintf (dump_file, "> - first_indx <");
2292 print_generic_expr (dump_file, array_first_index, TDF_SLIM);
2293 fprintf (dump_file, ">) * nunits <");
2294 print_generic_expr (dump_file, nunits, TDF_SLIM);
2295 fprintf (dump_file, ">) mod alignment <");
2296 print_generic_expr (dump_file, alignment, TDF_SLIM);
2297 fprintf (dump_file, ">");
2300 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
2301 misalign = int_const_binop (MULT_EXPR, misalign, nunits, 0);
2302 misalign = int_const_binop (PLUS_EXPR, misalign, offset, 0);
2303 misalign = int_const_binop (TRUNC_MOD_EXPR, misalign, alignment, 0);
2305 if (vect_debug_details (NULL))
2307 fprintf (dump_file, "misalign = ");
2308 print_generic_expr (dump_file, misalign, TDF_SLIM);
2311 if (!host_integerp (misalign,1) || TREE_OVERFLOW (misalign))
2313 if (vect_debug_details (NULL))
2314 fprintf (dump_file, "unexpected misalign value");
2315 return;
2318 DR_MISALIGNMENT (dr) = tree_low_cst (misalign,1);
2320 if (vect_debug_details (NULL))
2321 fprintf (dump_file, "misalign = %d",DR_MISALIGNMENT (dr));
2325 /* Function vect_compute_data_refs_alignment
2327 Compute the misalignment of data references in the loop.
2328 This pass may take place at function granularity instead of at loop
2329 granularity.
2331 FOR NOW: No analysis is actually performed. Misalignment is calculated
2332 only for trivial cases. TODO. */
2334 static void
2335 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
2337 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2338 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2339 unsigned int i;
2341 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2343 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2344 vect_compute_data_ref_alignment (dr, loop_vinfo);
2347 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2349 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2350 vect_compute_data_ref_alignment (dr, loop_vinfo);
2355 /* Function vect_enhance_data_refs_alignment
2357 This pass will use loop versioning and loop peeling in order to enhance
2358 the alignment of data references in the loop.
2360 FOR NOW: we assume that whatever versioning/peeling takes place, only the
2361 original loop is to be vectorized; Any other loops that are created by
2362 the transformations performed in this pass - are not supposed to be
2363 vectorized. This restriction will be relaxed.
2365 FOR NOW: No transformation is actually performed. TODO. */
2367 static void
2368 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo ATTRIBUTE_UNUSED)
2371 This pass will require a cost model to guide it whether to apply peeling
2372 or versioning or a combination of the two. For example, the scheme that
2373 intel uses when given a loop with several memory accesses, is as follows:
2374 choose one memory access ('p') which alignment you want to force by doing
2375 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
2376 other accesses are not necessarily aligned, or (2) use loop versioning to
2377 generate one loop in which all accesses are aligned, and another loop in
2378 which only 'p' is necessarily aligned.
2380 ("Automatic Intra-Register Vectorization for the Intel Architecture",
2381 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
2382 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
2384 Devising a cost model is the most critical aspect of this work. It will
2385 guide us on which access to peel for, whether to use loop versioning, how
2386 many versions to create, etc. The cost model will probably consist of
2387 generic considerations as well as target specific considerations (on
2388 powerpc for example, misaligned stores are more painful than misaligned
2389 loads).
2391 Here is the general steps involved in alignment enhancements:
2393 -- original loop, before alignment analysis:
2394 for (i=0; i<N; i++){
2395 x = q[i]; # DR_MISALIGNMENT(q) = unknown
2396 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2399 -- After vect_compute_data_refs_alignment:
2400 for (i=0; i<N; i++){
2401 x = q[i]; # DR_MISALIGNMENT(q) = 3
2402 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2405 -- Possibility 1: we do loop versioning:
2406 if (p is aligned) {
2407 for (i=0; i<N; i++){ # loop 1A
2408 x = q[i]; # DR_MISALIGNMENT(q) = 3
2409 p[i] = y; # DR_MISALIGNMENT(p) = 0
2412 else {
2413 for (i=0; i<N; i++){ # loop 1B
2414 x = q[i]; # DR_MISALIGNMENT(q) = 3
2415 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2419 -- Possibility 2: we do loop peeling:
2420 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2421 x = q[i];
2422 p[i] = y;
2424 for (i = 3; i < N; i++){ # loop 2A
2425 x = q[i]; # DR_MISALIGNMENT(q) = 0
2426 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2429 -- Possibility 3: combination of loop peeling and versioning:
2430 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2431 x = q[i];
2432 p[i] = y;
2434 if (p is aligned) {
2435 for (i = 3; i<N; i++){ # loop 3A
2436 x = q[i]; # DR_MISALIGNMENT(q) = 0
2437 p[i] = y; # DR_MISALIGNMENT(p) = 0
2440 else {
2441 for (i = 3; i<N; i++){ # loop 3B
2442 x = q[i]; # DR_MISALIGNMENT(q) = 0
2443 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2447 These loops are later passed to loop_transform to be vectorized. The
2448 vectorizer will use the alignment information to guide the transformation
2449 (whether to generate regular loads/stores, or with special handling for
2450 misalignment).
2455 /* Function vect_analyze_data_refs_alignment
2457 Analyze the alignment of the data-references in the loop.
2458 FOR NOW: Until support for misliagned accesses is in place, only if all
2459 accesses are aligned can the loop be vectorized. This restriction will be
2460 relaxed. */
2462 static bool
2463 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2465 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2466 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2467 unsigned int i;
2469 if (vect_debug_details (NULL))
2470 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
2473 /* This pass may take place at function granularity instead of at loop
2474 granularity. */
2476 vect_compute_data_refs_alignment (loop_vinfo);
2479 /* This pass will use loop versioning and loop peeling in order to enhance
2480 the alignment of data references in the loop.
2481 FOR NOW: we assume that whatever versioning/peeling took place, the
2482 original loop is to be vectorized. Any other loops that were created by
2483 the transformations performed in this pass - are not supposed to be
2484 vectorized. This restriction will be relaxed. */
2486 vect_enhance_data_refs_alignment (loop_vinfo);
2489 /* Finally, check that loop can be vectorized.
2490 FOR NOW: Until support for misaligned accesses is in place, only if all
2491 accesses are aligned can the loop be vectorized. This restriction will be
2492 relaxed. */
2494 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2496 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2497 if (!aligned_access_p (dr))
2499 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
2500 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
2501 fprintf (dump_file, "not vectorized: unaligned store.");
2502 return false;
2506 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2508 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2509 if (!aligned_access_p (dr))
2511 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
2512 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
2513 fprintf (dump_file, "not vectorized: unaligned load.");
2514 return false;
2518 return true;
2522 /* Function vect_analyze_data_ref_access.
2524 Analyze the access pattern of the data-reference DR. For now, a data access
2525 has to consecutive and aligned to be considered vectorizable. */
2527 static bool
2528 vect_analyze_data_ref_access (struct data_reference *dr)
2530 varray_type access_fns = DR_ACCESS_FNS (dr);
2531 tree access_fn;
2532 tree init, step;
2534 /* FORNOW: handle only one dimensional arrays.
2535 This restriction will be relaxed in the future. */
2536 if (VARRAY_ACTIVE_SIZE (access_fns) != 1)
2538 if (vect_debug_details (NULL))
2539 fprintf (dump_file, "multi dimensional array reference.");
2540 return false;
2542 access_fn = DR_ACCESS_FN (dr, 0);
2544 if (!vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
2545 access_fn, &init, &step, true))
2547 if (vect_debug_details (NULL))
2549 fprintf (dump_file, "too complicated access function.");
2550 print_generic_expr (dump_file, access_fn, TDF_SLIM);
2552 return false;
2555 return true;
2559 /* Function vect_analyze_data_ref_accesses.
2561 Analyze the access pattern of all the data references in the loop.
2563 FORNOW: the only access pattern that is considered vectorizable is a
2564 simple step 1 (consecutive) access.
2566 FORNOW: handle only one dimensional arrays, and pointer accesses. */
2568 static bool
2569 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2571 unsigned int i;
2572 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2573 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2575 if (vect_debug_details (NULL))
2576 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
2578 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2580 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2581 bool ok = vect_analyze_data_ref_access (dr);
2582 if (!ok)
2584 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
2585 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
2586 fprintf (dump_file, "not vectorized: complicated access pattern.");
2587 return false;
2591 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2593 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2594 bool ok = vect_analyze_data_ref_access (dr);
2595 if (!ok)
2597 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
2598 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
2599 fprintf (dump_file, "not vectorized: complicated access pattern.");
2600 return false;
2604 return true;
2608 /* Function vect_analyze_pointer_ref_access.
2610 Input:
2611 STMT - a stmt that contains a data-ref
2612 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
2614 If the data-ref access is vectorizable, return a data_reference structure
2615 that represents it (DR). Otherwise - return NULL. */
2617 static struct data_reference *
2618 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
2620 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2621 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2622 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
2623 tree init, step;
2624 int step_val;
2625 tree reftype, innertype;
2626 enum machine_mode innermode;
2627 tree indx_access_fn;
2628 int loopnum = loop->num;
2629 struct data_reference *dr;
2631 if (!access_fn)
2633 if (vect_debug_stats (loop) || vect_debug_details (loop))
2634 fprintf (dump_file, "not vectorized: complicated pointer access.");
2635 return NULL;
2638 if (vect_debug_details (NULL))
2640 fprintf (dump_file, "Access function of ptr: ");
2641 print_generic_expr (dump_file, access_fn, TDF_SLIM);
2644 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
2646 if (vect_debug_stats (loop) || vect_debug_details (loop))
2647 fprintf (dump_file, "not vectorized: pointer access is not simple.");
2648 return NULL;
2651 if (TREE_CODE (init) != SSA_NAME /* FORNOW */
2652 || !host_integerp (step,0))
2654 if (vect_debug_stats (loop) || vect_debug_details (loop))
2655 fprintf (dump_file,
2656 "not vectorized: non constant init/step for pointer access.");
2657 return NULL;
2660 step_val = TREE_INT_CST_LOW (step);
2662 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
2663 if (TREE_CODE (reftype) != POINTER_TYPE)
2665 if (vect_debug_stats (loop) || vect_debug_details (loop))
2666 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
2667 return NULL;
2670 reftype = TREE_TYPE (init);
2671 if (TREE_CODE (reftype) != POINTER_TYPE)
2673 if (vect_debug_stats (loop) || vect_debug_details (loop))
2674 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
2675 return NULL;
2678 innertype = TREE_TYPE (reftype);
2679 innermode = TYPE_MODE (innertype);
2680 if (GET_MODE_SIZE (innermode) != step_val)
2682 /* FORNOW: support only consecutive access */
2683 if (vect_debug_stats (loop) || vect_debug_details (loop))
2684 fprintf (dump_file, "not vectorized: non consecutive access.");
2685 return NULL;
2688 indx_access_fn =
2689 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
2690 if (vect_debug_details (NULL))
2692 fprintf (dump_file, "Access function of ptr indx: ");
2693 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
2695 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
2696 return dr;
2700 /* Function vect_analyze_data_refs.
2702 Find all the data references in the loop.
2704 FORNOW: Handle aligned INDIRECT_REFs and one dimensional ARRAY_REFs
2705 which base is really an array (not a pointer) and which alignment
2706 can be forced. This restriction will be relaxed. */
2708 static bool
2709 vect_analyze_data_refs (loop_vec_info loop_vinfo)
2711 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2712 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2713 int nbbs = loop->num_nodes;
2714 block_stmt_iterator si;
2715 int j;
2716 struct data_reference *dr;
2718 if (vect_debug_details (NULL))
2719 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
2721 for (j = 0; j < nbbs; j++)
2723 basic_block bb = bbs[j];
2724 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2726 bool is_read = false;
2727 tree stmt = bsi_stmt (si);
2728 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2729 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2730 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2731 vuse_optype vuses = STMT_VUSE_OPS (stmt);
2732 varray_type *datarefs = NULL;
2733 int nvuses, nv_may_defs, nv_must_defs;
2734 tree memref = NULL;
2735 tree array_base;
2736 tree symbl;
2738 /* Assumption: there exists a data-ref in stmt, if and only if
2739 it has vuses/vdefs. */
2741 if (!vuses && !v_may_defs && !v_must_defs)
2742 continue;
2744 nvuses = NUM_VUSES (vuses);
2745 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
2746 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
2748 if (nvuses && (nv_may_defs || nv_must_defs))
2750 if (vect_debug_details (NULL))
2752 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
2753 print_generic_expr (dump_file, stmt, TDF_SLIM);
2755 return false;
2758 if (TREE_CODE (stmt) != MODIFY_EXPR)
2760 if (vect_debug_details (NULL))
2762 fprintf (dump_file, "unexpected vops in stmt: ");
2763 print_generic_expr (dump_file, stmt, TDF_SLIM);
2765 return false;
2768 if (vuses)
2770 memref = TREE_OPERAND (stmt, 1);
2771 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
2772 is_read = true;
2774 else /* vdefs */
2776 memref = TREE_OPERAND (stmt, 0);
2777 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
2778 is_read = false;
2781 if (TREE_CODE (memref) == INDIRECT_REF)
2783 dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
2784 if (! dr)
2785 return false;
2786 symbl = DR_BASE_NAME (dr);
2788 else if (TREE_CODE (memref) == ARRAY_REF)
2790 tree base;
2791 tree offset = size_zero_node;
2792 array_base = TREE_OPERAND (memref, 0);
2794 /* FORNOW: make sure that the array is one dimensional.
2795 This restriction will be relaxed in the future. */
2796 if (TREE_CODE (array_base) == ARRAY_REF)
2798 if (vect_debug_stats (loop) || vect_debug_details (loop))
2800 fprintf (dump_file,
2801 "not vectorized: multi-dimensional array.");
2802 print_generic_expr (dump_file, stmt, TDF_SLIM);
2804 return false;
2807 dr = analyze_array (stmt, memref, is_read);
2809 /* Find the relevant symbol for aliasing purposes. */
2810 base = DR_BASE_NAME (dr);
2811 switch (TREE_CODE (base))
2813 case VAR_DECL:
2814 symbl = base;
2815 break;
2816 /* FORNOW: Disabled.
2817 case INDIRECT_REF:
2818 symbl = TREE_OPERAND (base, 0);
2819 break;
2821 case COMPONENT_REF:
2822 /* CHECKME: could have recorded more accurate information -
2823 i.e, the actual FIELD_DECL that is being referenced -
2824 but later passes expect VAR_DECL as the nmt. */
2825 symbl = vect_get_base_decl_and_bit_offset (base, &offset);
2826 if (symbl)
2827 break;
2828 /* fall through */
2829 default:
2830 if (vect_debug_stats (loop) || vect_debug_details (loop))
2832 fprintf (dump_file,
2833 "not vectorized: unhandled struct/class field access ");
2834 print_generic_expr (dump_file, stmt, TDF_SLIM);
2836 return false;
2837 } /* switch */
2839 else
2841 if (vect_debug_stats (loop) || vect_debug_details (loop))
2843 fprintf (dump_file, "not vectorized: unhandled data ref: ");
2844 print_generic_expr (dump_file, stmt, TDF_SLIM);
2846 return false;
2849 /* Find and record the memtag assigned to this data-ref. */
2850 if (TREE_CODE (symbl) == VAR_DECL)
2851 STMT_VINFO_MEMTAG (stmt_info) = symbl;
2852 else if (TREE_CODE (symbl) == SSA_NAME)
2854 tree tag;
2855 symbl = SSA_NAME_VAR (symbl);
2856 tag = get_var_ann (symbl)->type_mem_tag;
2857 if (!tag)
2859 tree ptr = TREE_OPERAND (memref, 0);
2860 if (TREE_CODE (ptr) == SSA_NAME)
2861 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
2863 if (!tag)
2865 if (vect_debug_stats (loop) || vect_debug_details (loop))
2866 fprintf (dump_file, "not vectorized: no memtag for ref.");
2867 return false;
2869 STMT_VINFO_MEMTAG (stmt_info) = tag;
2871 else
2873 if (vect_debug_stats (loop) || vect_debug_details (loop))
2875 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
2876 print_generic_expr (dump_file, memref, TDF_SLIM);
2878 return false;
2881 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
2882 STMT_VINFO_DATA_REF (stmt_info) = dr;
2886 return true;
2890 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2892 /* Function vect_mark_relevant.
2894 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2896 static void
2897 vect_mark_relevant (varray_type worklist, tree stmt)
2899 stmt_vec_info stmt_info;
2901 if (vect_debug_details (NULL))
2902 fprintf (dump_file, "mark relevant.");
2904 if (TREE_CODE (stmt) == PHI_NODE)
2906 VARRAY_PUSH_TREE (worklist, stmt);
2907 return;
2910 stmt_info = vinfo_for_stmt (stmt);
2912 if (!stmt_info)
2914 if (vect_debug_details (NULL))
2916 fprintf (dump_file, "mark relevant: no stmt info!!.");
2917 print_generic_expr (dump_file, stmt, TDF_SLIM);
2919 return;
2922 if (STMT_VINFO_RELEVANT_P (stmt_info))
2924 if (vect_debug_details (NULL))
2925 fprintf (dump_file, "already marked relevant.");
2926 return;
2929 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
2930 VARRAY_PUSH_TREE (worklist, stmt);
2934 /* Function vect_stmt_relevant_p.
2936 Return true if STMT in loop that is represented by LOOP_VINFO is
2937 "relevant for vectorization".
2939 A stmt is considered "relevant for vectorization" if:
2940 - it has uses outside the loop.
2941 - it has vdefs (it alters memory).
2942 - control stmts in the loop (except for the exit condition).
2944 CHECKME: what other side effects would the vectorizer allow? */
2946 static bool
2947 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
2949 v_may_def_optype v_may_defs;
2950 v_must_def_optype v_must_defs;
2951 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2952 int i;
2953 dataflow_t df;
2954 int num_uses;
2956 /* cond stmt other than loop exit cond. */
2957 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2958 return true;
2960 /* changing memory. */
2961 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2962 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2963 if (v_may_defs || v_must_defs)
2965 if (vect_debug_details (NULL))
2966 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
2967 return true;
2970 /* uses outside the loop. */
2971 df = get_immediate_uses (stmt);
2972 num_uses = num_immediate_uses (df);
2973 for (i = 0; i < num_uses; i++)
2975 tree use = immediate_use (df, i);
2976 basic_block bb = bb_for_stmt (use);
2977 if (!flow_bb_inside_loop_p (loop, bb))
2979 if (vect_debug_details (NULL))
2980 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
2981 return true;
2985 return false;
2989 /* Function vect_mark_stmts_to_be_vectorized.
2991 Not all stmts in the loop need to be vectorized. For example:
2993 for i...
2994 for j...
2995 1. T0 = i + j
2996 2. T1 = a[T0]
2998 3. j = j + 1
3000 Stmt 1 and 3 do not need to be vectorized, because loop control and
3001 addressing of vectorized data-refs are handled differently.
3003 This pass detects such stmts. */
3005 static bool
3006 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3008 varray_type worklist;
3009 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3010 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3011 unsigned int nbbs = loop->num_nodes;
3012 block_stmt_iterator si;
3013 tree stmt;
3014 stmt_ann_t ann;
3015 unsigned int i;
3016 int j;
3017 use_optype use_ops;
3018 stmt_vec_info stmt_info;
3020 if (vect_debug_details (NULL))
3021 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
3023 VARRAY_TREE_INIT (worklist, 64, "work list");
3025 /* 1. Init worklist. */
3027 for (i = 0; i < nbbs; i++)
3029 basic_block bb = bbs[i];
3030 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3032 stmt = bsi_stmt (si);
3034 if (vect_debug_details (NULL))
3036 fprintf (dump_file, "init: stmt relevant? ");
3037 print_generic_expr (dump_file, stmt, TDF_SLIM);
3040 stmt_info = vinfo_for_stmt (stmt);
3041 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
3043 if (vect_stmt_relevant_p (stmt, loop_vinfo))
3044 vect_mark_relevant (worklist, stmt);
3049 /* 2. Process_worklist */
3051 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
3053 stmt = VARRAY_TOP_TREE (worklist);
3054 VARRAY_POP (worklist);
3056 if (vect_debug_details (NULL))
3058 fprintf (dump_file, "worklist: examine stmt: ");
3059 print_generic_expr (dump_file, stmt, TDF_SLIM);
3062 /* Examine the USES in this statement. Mark all the statements which
3063 feed this statement's uses as "relevant", unless the USE is used as
3064 an array index. */
3066 if (TREE_CODE (stmt) == PHI_NODE)
3068 /* follow the def-use chain inside the loop. */
3069 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
3071 tree arg = PHI_ARG_DEF (stmt, j);
3072 tree def_stmt = NULL_TREE;
3073 basic_block bb;
3074 if (!vect_is_simple_use (arg, loop, &def_stmt))
3076 if (vect_debug_details (NULL))
3077 fprintf (dump_file, "worklist: unsupported use.");
3078 varray_clear (worklist);
3079 return false;
3081 if (!def_stmt)
3082 continue;
3084 if (vect_debug_details (NULL))
3086 fprintf (dump_file, "worklist: def_stmt: ");
3087 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3090 bb = bb_for_stmt (def_stmt);
3091 if (flow_bb_inside_loop_p (loop, bb))
3092 vect_mark_relevant (worklist, def_stmt);
3096 ann = stmt_ann (stmt);
3097 use_ops = USE_OPS (ann);
3099 for (i = 0; i < NUM_USES (use_ops); i++)
3101 tree use = USE_OP (use_ops, i);
3103 /* We are only interested in uses that need to be vectorized. Uses
3104 that are used for address computation are not considered relevant.
3106 if (exist_non_indexing_operands_for_use_p (use, stmt))
3108 tree def_stmt = NULL_TREE;
3109 basic_block bb;
3110 if (!vect_is_simple_use (use, loop, &def_stmt))
3112 if (vect_debug_details (NULL))
3113 fprintf (dump_file, "worklist: unsupported use.");
3114 varray_clear (worklist);
3115 return false;
3118 if (!def_stmt)
3119 continue;
3121 if (vect_debug_details (NULL))
3123 fprintf (dump_file, "worklist: examine use %d: ", i);
3124 print_generic_expr (dump_file, use, TDF_SLIM);
3127 bb = bb_for_stmt (def_stmt);
3128 if (flow_bb_inside_loop_p (loop, bb))
3129 vect_mark_relevant (worklist, def_stmt);
3132 } /* while worklist */
3134 varray_clear (worklist);
3135 return true;
3139 /* Function vect_get_loop_niters.
3141 Determine how many iterations the loop is executed. */
3143 static tree
3144 vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations)
3146 tree niters;
3148 if (vect_debug_details (NULL))
3149 fprintf (dump_file, "\n<<get_loop_niters>>\n");
3151 niters = number_of_iterations_in_loop (loop);
3153 if (niters != NULL_TREE
3154 && niters != chrec_dont_know
3155 && host_integerp (niters,0))
3157 *number_of_iterations = TREE_INT_CST_LOW (niters);
3159 if (vect_debug_details (NULL))
3160 fprintf (dump_file, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC,
3161 *number_of_iterations);
3164 return get_loop_exit_condition (loop);
3168 /* Function vect_analyze_loop_form.
3170 Verify the following restrictions (some may be relaxed in the future):
3171 - it's an inner-most loop
3172 - number of BBs = 2 (which are the loop header and the latch)
3173 - the loop has a pre-header
3174 - the loop has a single entry and exit
3175 - the loop exit condition is simple enough, and the number of iterations
3176 can be analyzed (a countable loop). */
3178 static loop_vec_info
3179 vect_analyze_loop_form (struct loop *loop)
3181 loop_vec_info loop_vinfo;
3182 tree loop_cond;
3183 HOST_WIDE_INT number_of_iterations = -1;
3185 if (vect_debug_details (loop))
3186 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
3188 if (loop->inner
3189 || !loop->single_exit
3190 || loop->num_nodes != 2)
3192 if (vect_debug_stats (loop) || vect_debug_details (loop))
3194 fprintf (dump_file, "not vectorized: bad loop form. ");
3195 if (loop->inner)
3196 fprintf (dump_file, "nested loop.");
3197 else if (!loop->single_exit)
3198 fprintf (dump_file, "multiple exits.");
3199 else if (loop->num_nodes != 2)
3200 fprintf (dump_file, "too many BBs in loop.");
3203 return NULL;
3206 /* We assume that the loop exit condition is at the end of the loop. i.e,
3207 that the loop is represented as a do-while (with a proper if-guard
3208 before the loop if needed), where the loop header contains all the
3209 executable statements, and the latch is empty. */
3210 if (!empty_block_p (loop->latch))
3212 if (vect_debug_stats (loop) || vect_debug_details (loop))
3213 fprintf (dump_file, "not vectorized: unexpectd loop form.");
3214 return NULL;
3217 if (empty_block_p (loop->header))
3219 if (vect_debug_stats (loop) || vect_debug_details (loop))
3220 fprintf (dump_file, "not vectorized: empty loop.");
3221 return NULL;
3224 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
3225 if (!loop_cond)
3227 if (vect_debug_stats (loop) || vect_debug_details (loop))
3228 fprintf (dump_file, "not vectorized: complicated exit condition.");
3229 return NULL;
3232 if (number_of_iterations < 0)
3234 if (vect_debug_stats (loop) || vect_debug_details (loop))
3235 fprintf (dump_file, "not vectorized: unknown loop bound.");
3236 return NULL;
3239 if (number_of_iterations == 0) /* CHECKME: can this happen? */
3241 if (vect_debug_stats (loop) || vect_debug_details (loop))
3242 fprintf (dump_file, "not vectorized: number of iterations = 0.");
3243 return NULL;
3246 loop_vinfo = new_loop_vec_info (loop);
3247 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
3248 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
3250 return loop_vinfo;
3254 /* Function vect_analyze_loop.
3256 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3257 for it. The different analyses will record information in the
3258 loop_vec_info struct. */
3260 static loop_vec_info
3261 vect_analyze_loop (struct loop *loop)
3263 bool ok;
3264 loop_vec_info loop_vinfo;
3266 if (vect_debug_details (NULL))
3267 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
3269 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3271 loop_vinfo = vect_analyze_loop_form (loop);
3272 if (!loop_vinfo)
3274 if (vect_debug_details (loop))
3275 fprintf (dump_file, "bad loop form.");
3276 return NULL;
3279 /* Find all data references in the loop (which correspond to vdefs/vuses)
3280 and analyze their evolution in the loop.
3282 FORNOW: Handle only simple, one-dimensional, array references, which
3283 alignment can be forced, and aligned pointer-references. */
3285 ok = vect_analyze_data_refs (loop_vinfo);
3286 if (!ok)
3288 if (vect_debug_details (loop))
3289 fprintf (dump_file, "bad data references.");
3290 destroy_loop_vec_info (loop_vinfo);
3291 return NULL;
3295 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
3297 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
3298 if (!ok)
3300 if (vect_debug_details (loop))
3301 fprintf (dump_file, "unexpected pattern.");
3302 if (vect_debug_details (loop))
3303 fprintf (dump_file, "not vectorized: unexpected pattern.");
3304 destroy_loop_vec_info (loop_vinfo);
3305 return NULL;
3309 /* Check that all cross-iteration scalar data-flow cycles are OK.
3310 Cross-iteration cycles caused by virtual phis are analyzed separately. */
3312 ok = vect_analyze_scalar_cycles (loop_vinfo);
3313 if (!ok)
3315 if (vect_debug_details (loop))
3316 fprintf (dump_file, "bad scalar cycle.");
3317 destroy_loop_vec_info (loop_vinfo);
3318 return NULL;
3322 /* Analyze data dependences between the data-refs in the loop.
3323 FORNOW: fail at the first data dependence that we encounter. */
3325 ok = vect_analyze_data_ref_dependences (loop_vinfo);
3326 if (!ok)
3328 if (vect_debug_details (loop))
3329 fprintf (dump_file, "bad data dependence.");
3330 destroy_loop_vec_info (loop_vinfo);
3331 return NULL;
3335 /* Analyze the access patterns of the data-refs in the loop (consecutive,
3336 complex, etc.). FORNOW: Only handle consecutive access pattern. */
3338 ok = vect_analyze_data_ref_accesses (loop_vinfo);
3339 if (!ok)
3341 if (vect_debug_details (loop))
3342 fprintf (dump_file, "bad data access.");
3343 destroy_loop_vec_info (loop_vinfo);
3344 return NULL;
3348 /* Analyze the alignment of the data-refs in the loop.
3349 FORNOW: Only aligned accesses are handled. */
3351 ok = vect_analyze_data_refs_alignment (loop_vinfo);
3352 if (!ok)
3354 if (vect_debug_details (loop))
3355 fprintf (dump_file, "bad data alignment.");
3356 destroy_loop_vec_info (loop_vinfo);
3357 return NULL;
3361 /* Scan all the operations in the loop and make sure they are
3362 vectorizable. */
3364 ok = vect_analyze_operations (loop_vinfo);
3365 if (!ok)
3367 if (vect_debug_details (loop))
3368 fprintf (dump_file, "bad operation or unsupported loop bound.");
3369 destroy_loop_vec_info (loop_vinfo);
3370 return NULL;
3373 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
3375 return loop_vinfo;
3379 /* Function need_imm_uses_for.
3381 Return whether we ought to include information for 'var'
3382 when calculating immediate uses. For this pass we only want use
3383 information for non-virtual variables. */
3385 static bool
3386 need_imm_uses_for (tree var)
3388 return is_gimple_reg (var);
3392 /* Function vectorize_loops.
3394 Entry Point to loop vectorization phase. */
3396 void
3397 vectorize_loops (struct loops *loops)
3399 unsigned int i, loops_num;
3400 unsigned int num_vectorized_loops = 0;
3402 /* Does the target support SIMD? */
3403 /* FORNOW: until more sophisticated machine modelling is in place. */
3404 if (!UNITS_PER_SIMD_WORD)
3406 if (vect_debug_details (NULL))
3407 fprintf (dump_file, "vectorizer: target vector size is not defined.");
3408 return;
3411 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
3413 /* ----------- Analyze loops. ----------- */
3415 /* If some loop was duplicated, it gets bigger number
3416 than all previously defined loops. This fact allows us to run
3417 only over initial loops skipping newly generated ones. */
3418 loops_num = loops->num;
3419 for (i = 1; i < loops_num; i++)
3421 loop_vec_info loop_vinfo;
3422 struct loop *loop = loops->parray[i];
3424 if (!loop)
3425 continue;
3427 loop_vinfo = vect_analyze_loop (loop);
3428 loop->aux = loop_vinfo;
3430 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
3431 continue;
3433 vect_transform_loop (loop_vinfo, loops);
3434 num_vectorized_loops++;
3437 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
3438 fprintf (dump_file, "\nvectorized %u loops in function.\n",
3439 num_vectorized_loops);
3441 /* ----------- Finalize. ----------- */
3443 free_df ();
3444 for (i = 1; i < loops_num; i++)
3446 struct loop *loop = loops->parray[i];
3447 loop_vec_info loop_vinfo = loop->aux;
3448 if (!loop)
3449 continue;
3450 destroy_loop_vec_info (loop_vinfo);
3451 loop->aux = NULL;
3454 loop_commit_inserts ();
3455 rewrite_into_ssa (false);
3456 if (bitmap_first_set_bit (vars_to_rename) >= 0)
3458 /* The rewrite of ssa names may cause violation of loop closed ssa
3459 form invariants. TODO -- avoid these rewrites completely.
3460 Information in virtual phi nodes is sufficient for it. */
3461 rewrite_into_loop_closed_ssa ();
3463 bitmap_clear (vars_to_rename);