* trans-stmt.c (gfc_trans_simple_do): New function.
[official-gcc.git] / gcc / tree-vectorizer.c
blobe4de78637d3ac34dc1e22b9e95628f66fae36bda
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "errors.h"
128 #include "ggc.h"
129 #include "tree.h"
130 #include "target.h"
132 #include "rtl.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
137 #include "timevar.h"
138 #include "cfgloop.h"
139 #include "cfglayout.h"
140 #include "expr.h"
141 #include "optabs.h"
142 #include "tree-chrec.h"
143 #include "tree-data-ref.h"
144 #include "tree-scalar-evolution.h"
145 #include "tree-vectorizer.h"
146 #include "tree-pass.h"
148 /* Main analysis functions. */
149 static loop_vec_info vect_analyze_loop (struct loop *);
150 static loop_vec_info vect_analyze_loop_form (struct loop *);
151 static bool vect_analyze_data_refs (loop_vec_info);
152 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
153 static bool vect_analyze_scalar_cycles (loop_vec_info);
154 static bool vect_analyze_data_ref_accesses (loop_vec_info);
155 static bool vect_analyze_data_refs_alignment (loop_vec_info);
156 static void vect_compute_data_refs_alignment (loop_vec_info);
157 static bool vect_analyze_operations (loop_vec_info);
159 /* Main code transformation functions. */
160 static void vect_transform_loop (loop_vec_info, struct loops *);
161 static void vect_transform_loop_bound (loop_vec_info);
162 static bool vect_transform_stmt (tree, block_stmt_iterator *);
163 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
164 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
165 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
166 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
167 static void vect_align_data_ref (tree);
168 static void vect_enhance_data_refs_alignment (loop_vec_info);
170 /* Utility functions for the analyses. */
171 static bool vect_is_simple_use (tree , struct loop *, tree *);
172 static bool exist_non_indexing_operands_for_use_p (tree, tree);
173 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
174 static void vect_mark_relevant (varray_type, tree);
175 static bool vect_stmt_relevant_p (tree, loop_vec_info);
176 static tree vect_get_loop_niters (struct loop *, HOST_WIDE_INT *);
177 static bool vect_compute_data_ref_alignment
178 (struct data_reference *, loop_vec_info);
179 static bool vect_analyze_data_ref_access (struct data_reference *);
180 static bool vect_get_first_index (tree, tree *);
181 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
182 static struct data_reference * vect_analyze_pointer_ref_access
183 (tree, tree, bool);
184 static tree vect_get_base_and_bit_offset
185 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
186 static struct data_reference * vect_analyze_pointer_ref_access
187 (tree, tree, bool);
188 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
189 static tree vect_compute_array_ref_alignment
190 (struct data_reference *, loop_vec_info, tree, tree *);
191 static tree vect_get_ptr_offset (tree, tree, tree *);
192 static tree vect_get_symbl_and_dr
193 (tree, tree, bool, loop_vec_info, struct data_reference **);
195 /* Utility functions for the code transformation. */
196 static tree vect_create_destination_var (tree, tree);
197 static tree vect_create_data_ref_ptr
198 (tree, block_stmt_iterator *, tree, tree *, bool);
199 static tree vect_create_index_for_vector_ref
200 (struct loop *, block_stmt_iterator *);
201 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
202 static tree get_vectype_for_scalar_type (tree);
203 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
204 static tree vect_get_vec_def_for_operand (tree, tree);
205 static tree vect_init_vector (tree, tree);
206 static void vect_finish_stmt_generation
207 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
209 /* Utilities for creation and deletion of vec_info structs. */
210 loop_vec_info new_loop_vec_info (struct loop *loop);
211 void destroy_loop_vec_info (loop_vec_info);
212 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
214 static bool vect_debug_stats (struct loop *loop);
215 static bool vect_debug_details (struct loop *loop);
218 /* Function new_stmt_vec_info.
220 Create and initialize a new stmt_vec_info struct for STMT. */
222 stmt_vec_info
223 new_stmt_vec_info (tree stmt, struct loop *loop)
225 stmt_vec_info res;
226 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
228 STMT_VINFO_TYPE (res) = undef_vec_info_type;
229 STMT_VINFO_STMT (res) = stmt;
230 STMT_VINFO_LOOP (res) = loop;
231 STMT_VINFO_RELEVANT_P (res) = 0;
232 STMT_VINFO_VECTYPE (res) = NULL;
233 STMT_VINFO_VEC_STMT (res) = NULL;
234 STMT_VINFO_DATA_REF (res) = NULL;
235 STMT_VINFO_MEMTAG (res) = NULL;
236 STMT_VINFO_VECT_DR_BASE (res) = NULL;
238 return res;
242 /* Function new_loop_vec_info.
244 Create and initialize a new loop_vec_info struct for LOOP, as well as
245 stmt_vec_info structs for all the stmts in LOOP. */
247 loop_vec_info
248 new_loop_vec_info (struct loop *loop)
250 loop_vec_info res;
251 basic_block *bbs;
252 block_stmt_iterator si;
253 unsigned int i;
255 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
257 bbs = get_loop_body (loop);
259 /* Create stmt_info for all stmts in the loop. */
260 for (i = 0; i < loop->num_nodes; i++)
262 basic_block bb = bbs[i];
263 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
265 tree stmt = bsi_stmt (si);
266 stmt_ann_t ann;
268 get_stmt_operands (stmt);
269 ann = stmt_ann (stmt);
270 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
274 LOOP_VINFO_LOOP (res) = loop;
275 LOOP_VINFO_BBS (res) = bbs;
276 LOOP_VINFO_EXIT_COND (res) = NULL;
277 LOOP_VINFO_NITERS (res) = -1;
278 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
279 LOOP_VINFO_VECT_FACTOR (res) = 0;
280 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
281 "loop_write_datarefs");
282 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
283 "loop_read_datarefs");
284 return res;
288 /* Function destroy_loop_vec_info.
290 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
291 stmts in the loop. */
293 void
294 destroy_loop_vec_info (loop_vec_info loop_vinfo)
296 struct loop *loop;
297 basic_block *bbs;
298 int nbbs;
299 block_stmt_iterator si;
300 int j;
302 if (!loop_vinfo)
303 return;
305 loop = LOOP_VINFO_LOOP (loop_vinfo);
307 bbs = LOOP_VINFO_BBS (loop_vinfo);
308 nbbs = loop->num_nodes;
310 for (j = 0; j < nbbs; j++)
312 basic_block bb = bbs[j];
313 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
315 tree stmt = bsi_stmt (si);
316 stmt_ann_t ann = stmt_ann (stmt);
317 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
318 free (stmt_info);
319 set_stmt_info (ann, NULL);
323 free (LOOP_VINFO_BBS (loop_vinfo));
324 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
325 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
327 free (loop_vinfo);
331 /* Function debug_loop_stats.
333 For vectorization statistics dumps. */
335 static bool
336 vect_debug_stats (struct loop *loop)
338 basic_block bb;
339 block_stmt_iterator si;
340 tree node = NULL_TREE;
342 if (!dump_file || !(dump_flags & TDF_STATS))
343 return false;
345 if (!loop)
347 fprintf (dump_file, "\n");
348 return true;
351 if (!loop->header)
352 return false;
354 bb = loop->header;
356 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
358 node = bsi_stmt (si);
359 if (node && EXPR_P (node) && EXPR_LOCUS (node))
360 break;
363 if (node && EXPR_P (node) && EXPR_LOCUS (node)
364 && EXPR_FILENAME (node) && EXPR_LINENO (node))
366 fprintf (dump_file, "\nloop at %s:%d: ",
367 EXPR_FILENAME (node), EXPR_LINENO (node));
368 return true;
371 return false;
375 /* Function debug_loop_details.
377 For vectorization debug dumps. */
379 static bool
380 vect_debug_details (struct loop *loop)
382 basic_block bb;
383 block_stmt_iterator si;
384 tree node = NULL_TREE;
386 if (!dump_file || !(dump_flags & TDF_DETAILS))
387 return false;
389 if (!loop)
391 fprintf (dump_file, "\n");
392 return true;
395 if (!loop->header)
396 return false;
398 bb = loop->header;
400 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
402 node = bsi_stmt (si);
403 if (node && EXPR_P (node) && EXPR_LOCUS (node))
404 break;
407 if (node && EXPR_P (node) && EXPR_LOCUS (node)
408 && EXPR_FILENAME (node) && EXPR_LINENO (node))
410 fprintf (dump_file, "\nloop at %s:%d: ",
411 EXPR_FILENAME (node), EXPR_LINENO (node));
412 return true;
415 return false;
419 /* Function vect_get_ptr_offset
421 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
423 static tree
424 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
425 tree vectype ATTRIBUTE_UNUSED,
426 tree *offset ATTRIBUTE_UNUSED)
428 /* TODO: Use alignment information. */
429 return NULL_TREE;
433 /* Function vect_get_base_and_bit_offset
435 Return the BASE of the data reference EXPR.
436 If VECTYPE is given, also compute the OFFSET from BASE in bits.
437 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
438 bits of 'a.b[i] + 4B' from a.
440 Input:
441 EXPR - the memory reference that is being analyzed
442 DR - the data_reference struct of the _original_ memory reference
443 (Note: DR_REF (DR) is not necessarily EXPR)
444 VECTYPE - the type that defines the alignment (i.e, we compute
445 alignment relative to TYPE_ALIGN(VECTYPE))
447 Output:
448 BASE (returned value) - the base of the data reference EXPR.
449 E.g, if EXPR is a.b[k].c[i][j] the returned
450 base is a.
451 OFFSET - offset of EXPR from BASE in bits
452 BASE_ALIGNED_P - indicates if BASE is aligned
454 If something unexpected is encountered (an unsupported form of data-ref),
455 or if VECTYPE is given but OFFSET cannot be determined:
456 then NULL_TREE is returned. */
458 static tree
459 vect_get_base_and_bit_offset (struct data_reference *dr,
460 tree expr,
461 tree vectype,
462 loop_vec_info loop_vinfo,
463 tree *offset,
464 bool *base_aligned_p)
466 tree this_offset = size_zero_node;
467 tree base = NULL_TREE;
468 tree next_ref;
469 tree oprnd0, oprnd1;
470 struct data_reference *array_dr;
471 enum tree_code code = TREE_CODE (expr);
473 *base_aligned_p = false;
475 switch (code)
477 /* These cases end the recursion: */
478 case VAR_DECL:
479 *offset = size_zero_node;
480 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
481 *base_aligned_p = true;
482 return expr;
484 case SSA_NAME:
485 if (!vectype)
486 return expr;
488 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
489 return NULL_TREE;
491 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
493 base = vect_get_ptr_offset (expr, vectype, offset);
494 if (base)
495 *base_aligned_p = true;
497 else
499 *base_aligned_p = true;
500 *offset = size_zero_node;
501 base = expr;
503 return base;
505 case INTEGER_CST:
506 *offset = int_const_binop (MULT_EXPR, expr,
507 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
508 return expr;
510 /* These cases continue the recursion: */
511 case COMPONENT_REF:
512 oprnd0 = TREE_OPERAND (expr, 0);
513 oprnd1 = TREE_OPERAND (expr, 1);
515 this_offset = bit_position (oprnd1);
516 if (vectype && !host_integerp (this_offset, 1))
517 return NULL_TREE;
518 next_ref = oprnd0;
519 break;
521 case ADDR_EXPR:
522 oprnd0 = TREE_OPERAND (expr, 0);
523 next_ref = oprnd0;
524 break;
526 case INDIRECT_REF:
527 oprnd0 = TREE_OPERAND (expr, 0);
528 next_ref = oprnd0;
529 break;
531 case ARRAY_REF:
532 if (DR_REF (dr) != expr)
533 /* Build array data_reference struct if the existing DR_REF
534 doesn't match EXPR. This happens, for example, when the
535 EXPR is *T and T is initialized to &arr[indx]. The DR struct
536 contains information on the access of T, not of arr. In order
537 to continue the analysis, we create a new DR struct that
538 describes the access of arr.
540 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
541 else
542 array_dr = dr;
544 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
545 vectype, &this_offset);
546 if (!next_ref)
547 return NULL_TREE;
549 if (vectype &&
550 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
552 *offset = this_offset;
553 *base_aligned_p = true;
554 return next_ref;
556 break;
558 case PLUS_EXPR:
559 case MINUS_EXPR:
560 /* In case we have a PLUS_EXPR of the form
561 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
562 This is verified in vect_get_symbl_and_dr. */
563 oprnd0 = TREE_OPERAND (expr, 0);
564 oprnd1 = TREE_OPERAND (expr, 1);
566 base = vect_get_base_and_bit_offset
567 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
568 if (vectype && !base)
569 return NULL_TREE;
571 next_ref = oprnd0;
572 break;
574 default:
575 return NULL_TREE;
578 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
579 loop_vinfo, offset, base_aligned_p);
581 if (vectype && base)
583 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
584 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
585 return NULL_TREE;
587 if (vect_debug_details (NULL))
589 print_generic_expr (dump_file, expr, TDF_SLIM);
590 fprintf (dump_file, " --> total offset for ref: ");
591 print_generic_expr (dump_file, *offset, TDF_SLIM);
594 return base;
599 /* Function vect_force_dr_alignment_p.
601 Returns whether the alignment of a DECL can be forced to be aligned
602 on ALIGNMENT bit boundary. */
604 static bool
605 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
607 if (TREE_CODE (decl) != VAR_DECL)
608 return false;
610 if (DECL_EXTERNAL (decl))
611 return false;
613 if (TREE_STATIC (decl))
614 return (alignment <= MAX_OFILE_ALIGNMENT);
615 else
616 /* This is not 100% correct. The absolute correct stack alignment
617 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
618 PREFERRED_STACK_BOUNDARY is honored by all translation units.
619 However, until someone implements forced stack alignment, SSE
620 isn't really usable without this. */
621 return (alignment <= PREFERRED_STACK_BOUNDARY);
625 /* Function vect_get_new_vect_var.
627 Returns a name for a new variable. The current naming scheme appends the
628 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
629 the name of vectorizer generated variables, and appends that to NAME if
630 provided. */
632 static tree
633 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
635 const char *prefix;
636 int prefix_len;
637 tree new_vect_var;
639 if (var_kind == vect_simple_var)
640 prefix = "vect_";
641 else
642 prefix = "vect_p";
644 prefix_len = strlen (prefix);
646 if (name)
647 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
648 else
649 new_vect_var = create_tmp_var (type, prefix);
651 return new_vect_var;
655 /* Function vect_create_index_for_vector_ref.
657 Create (and return) an index variable, along with it's update chain in the
658 loop. This variable will be used to access a memory location in a vector
659 operation.
661 Input:
662 LOOP: The loop being vectorized.
663 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
664 function can be added here, or in the loop pre-header.
666 Output:
667 Return an index that will be used to index a vector array. It is expected
668 that a pointer to the first vector will be used as the base address for the
669 indexed reference.
671 FORNOW: we are not trying to be efficient, just creating a new index each
672 time from scratch. At this time all vector references could use the same
673 index.
675 TODO: create only one index to be used by all vector references. Record
676 the index in the LOOP_VINFO the first time this procedure is called and
677 return it on subsequent calls. The increment of this index must be placed
678 just before the conditional expression that ends the single block loop. */
680 static tree
681 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
683 tree init, step;
684 tree indx_before_incr, indx_after_incr;
686 /* It is assumed that the base pointer used for vectorized access contains
687 the address of the first vector. Therefore the index used for vectorized
688 access must be initialized to zero and incremented by 1. */
690 init = integer_zero_node;
691 step = integer_one_node;
693 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
694 create_iv (init, step, NULL_TREE, loop, bsi, false,
695 &indx_before_incr, &indx_after_incr);
697 return indx_before_incr;
701 /* Function vect_create_addr_base_for_vector_ref.
703 Create an expression that computes the address of the first memory location
704 that will be accessed for a data reference.
706 Input:
707 STMT: The statement containing the data reference.
708 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
709 OFFSET: Optional. If supplied, it is be added to the initial address.
711 Output:
712 1. Return an SSA_NAME whose value is the address of the memory location of the
713 first vector of the data reference.
714 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
715 these statement(s) which define the returned SSA_NAME.
717 FORNOW: We are only handling array accesses with step 1. */
719 static tree
720 vect_create_addr_base_for_vector_ref (tree stmt,
721 tree *new_stmt_list,
722 tree offset)
724 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
725 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
726 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
727 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
728 tree base_name = unshare_expr (DR_BASE_NAME (dr));
729 tree ref = DR_REF (dr);
730 tree data_ref_base_type = TREE_TYPE (data_ref_base);
731 tree scalar_type = TREE_TYPE (ref);
732 tree scalar_ptr_type = build_pointer_type (scalar_type);
733 tree access_fn;
734 tree init_val, step, init_oval;
735 bool ok;
736 bool is_ptr_ref, is_array_ref, is_addr_expr;
737 tree array_base;
738 tree vec_stmt;
739 tree new_temp;
740 tree array_ref;
741 tree addr_base, addr_expr;
742 tree dest, new_stmt;
744 /* Only the access function of the last index is relevant (i_n in
745 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
746 access_fn = DR_ACCESS_FN (dr, 0);
747 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step, true);
748 if (!ok)
749 init_oval = integer_zero_node;
751 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
752 && TREE_CODE (data_ref_base) == SSA_NAME;
753 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE
754 && (TREE_CODE (data_ref_base) == VAR_DECL
755 || TREE_CODE (data_ref_base) == COMPONENT_REF
756 || TREE_CODE (data_ref_base) == ARRAY_REF);
757 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
758 || TREE_CODE (data_ref_base) == PLUS_EXPR
759 || TREE_CODE (data_ref_base) == MINUS_EXPR;
760 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
762 /** Create: &(base[init_val])
764 if data_ref_base is an ARRAY_TYPE:
765 base = data_ref_base
767 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
768 base = *((scalar_array *) data_ref_base)
771 if (is_array_ref)
772 array_base = data_ref_base;
773 else /* is_ptr_ref or is_addr_expr */
775 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
776 tree scalar_array_type = build_array_type (scalar_type, 0);
777 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
778 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
779 add_referenced_tmp_var (array_ptr);
781 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
782 add_referenced_tmp_var (dest);
783 data_ref_base =
784 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
785 append_to_statement_list_force (new_stmt, new_stmt_list);
787 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
788 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
789 new_temp = make_ssa_name (array_ptr, vec_stmt);
790 TREE_OPERAND (vec_stmt, 0) = new_temp;
791 append_to_statement_list_force (vec_stmt, new_stmt_list);
793 /* (*array_ptr) */
794 array_base = build_fold_indirect_ref (new_temp);
797 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
798 add_referenced_tmp_var (dest);
799 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
800 append_to_statement_list_force (new_stmt, new_stmt_list);
802 if (offset)
804 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
805 add_referenced_tmp_var (tmp);
806 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
807 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
808 init_val = make_ssa_name (tmp, vec_stmt);
809 TREE_OPERAND (vec_stmt, 0) = init_val;
810 append_to_statement_list_force (vec_stmt, new_stmt_list);
813 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
814 NULL_TREE, NULL_TREE);
815 addr_base = build_fold_addr_expr (array_ref);
817 /* addr_expr = addr_base */
818 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
819 get_name (base_name));
820 add_referenced_tmp_var (addr_expr);
821 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
822 new_temp = make_ssa_name (addr_expr, vec_stmt);
823 TREE_OPERAND (vec_stmt, 0) = new_temp;
824 append_to_statement_list_force (vec_stmt, new_stmt_list);
826 return new_temp;
830 /* Function get_vectype_for_scalar_type.
832 Returns the vector type corresponding to SCALAR_TYPE as supported
833 by the target. */
835 static tree
836 get_vectype_for_scalar_type (tree scalar_type)
838 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
839 int nbytes = GET_MODE_SIZE (inner_mode);
840 int nunits;
841 tree vectype;
843 if (nbytes == 0)
844 return NULL_TREE;
846 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
847 is expected. */
848 nunits = UNITS_PER_SIMD_WORD / nbytes;
850 vectype = build_vector_type (scalar_type, nunits);
851 if (TYPE_MODE (vectype) == BLKmode)
852 return NULL_TREE;
853 return vectype;
857 /* Function vect_align_data_ref.
859 Handle mislignment of a memory accesses.
861 FORNOW: Can't handle misaligned accesses.
862 Make sure that the dataref is aligned. */
864 static void
865 vect_align_data_ref (tree stmt)
867 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
868 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
870 /* FORNOW: can't handle misaligned accesses;
871 all accesses expected to be aligned. */
872 gcc_assert (aligned_access_p (dr));
876 /* Function vect_create_data_ref_ptr.
878 Create a memory reference expression for vector access, to be used in a
879 vector load/store stmt. The reference is based on a new pointer to vector
880 type (vp).
882 Input:
883 1. STMT: a stmt that references memory. Expected to be of the form
884 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
885 2. BSI: block_stmt_iterator where new stmts can be added.
886 3. OFFSET (optional): an offset to be added to the initial address accessed
887 by the data-ref in STMT.
888 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
889 pointing to the initial address.
891 Output:
892 1. Declare a new ptr to vector_type, and have it point to the base of the
893 data reference (initial addressed accessed by the data reference).
894 For example, for vector of type V8HI, the following code is generated:
896 v8hi *vp;
897 vp = (v8hi *)initial_address;
899 if OFFSET is not supplied:
900 initial_address = &a[init];
901 if OFFSET is supplied:
902 initial_address = &a[init + OFFSET];
904 Return the initial_address in INITIAL_ADDRESS.
906 2. Create a data-reference in the loop based on the new vector pointer vp,
907 and using a new index variable 'idx' as follows:
909 vp' = vp + update
911 where if ONLY_INIT is true:
912 update = zero
913 and otherwise
914 update = idx + vector_type_size
916 Return the pointer vp'.
919 FORNOW: handle only aligned and consecutive accesses. */
921 static tree
922 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
923 tree *initial_address, bool only_init)
925 tree base_name;
926 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
927 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
928 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
929 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
930 tree vect_ptr_type;
931 tree vect_ptr;
932 tree tag;
933 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
934 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
935 vuse_optype vuses = STMT_VUSE_OPS (stmt);
936 int nvuses, nv_may_defs, nv_must_defs;
937 int i;
938 tree new_temp;
939 tree vec_stmt;
940 tree new_stmt_list = NULL_TREE;
941 tree idx;
942 edge pe = loop_preheader_edge (loop);
943 basic_block new_bb;
944 tree vect_ptr_init;
945 tree vectype_size;
946 tree ptr_update;
947 tree data_ref_ptr;
949 base_name = unshare_expr (DR_BASE_NAME (dr));
950 if (vect_debug_details (NULL))
952 tree data_ref_base = base_name;
953 fprintf (dump_file, "create array_ref of type: ");
954 print_generic_expr (dump_file, vectype, TDF_SLIM);
955 if (TREE_CODE (data_ref_base) == VAR_DECL)
956 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
957 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
958 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
959 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
960 fprintf (dump_file, "vectorizing a record based array ref: ");
961 else if (TREE_CODE (data_ref_base) == SSA_NAME)
962 fprintf (dump_file, "vectorizing a pointer ref: ");
963 print_generic_expr (dump_file, base_name, TDF_SLIM);
966 /** (1) Create the new vector-pointer variable: **/
968 vect_ptr_type = build_pointer_type (vectype);
969 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
970 get_name (base_name));
971 add_referenced_tmp_var (vect_ptr);
974 /** (2) Handle aliasing information of the new vector-pointer: **/
976 tag = STMT_VINFO_MEMTAG (stmt_info);
977 gcc_assert (tag);
978 get_var_ann (vect_ptr)->type_mem_tag = tag;
980 /* Mark for renaming all aliased variables
981 (i.e, the may-aliases of the type-mem-tag). */
982 nvuses = NUM_VUSES (vuses);
983 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
984 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
985 for (i = 0; i < nvuses; i++)
987 tree use = VUSE_OP (vuses, i);
988 if (TREE_CODE (use) == SSA_NAME)
989 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
991 for (i = 0; i < nv_may_defs; i++)
993 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
994 if (TREE_CODE (def) == SSA_NAME)
995 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
997 for (i = 0; i < nv_must_defs; i++)
999 tree def = V_MUST_DEF_OP (v_must_defs, i);
1000 if (TREE_CODE (def) == SSA_NAME)
1001 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1005 /** (3) Calculate the initial address the vector-pointer, and set
1006 the vector-pointer to point to it before the loop: **/
1008 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1009 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1010 offset);
1011 pe = loop_preheader_edge (loop);
1012 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1013 gcc_assert (!new_bb);
1014 *initial_address = new_temp;
1016 /* Create: p = (vectype *) initial_base */
1017 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1018 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1019 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1020 TREE_OPERAND (vec_stmt, 0) = new_temp;
1021 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1022 gcc_assert (!new_bb);
1023 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1026 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1028 if (only_init) /* No update in loop is required. */
1029 return vect_ptr_init;
1031 idx = vect_create_index_for_vector_ref (loop, bsi);
1033 /* Create: update = idx * vectype_size */
1034 ptr_update = create_tmp_var (integer_type_node, "update");
1035 add_referenced_tmp_var (ptr_update);
1036 vectype_size = build_int_cst (integer_type_node,
1037 GET_MODE_SIZE (TYPE_MODE (vectype)));
1038 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1039 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1040 new_temp = make_ssa_name (ptr_update, vec_stmt);
1041 TREE_OPERAND (vec_stmt, 0) = new_temp;
1042 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1044 /* Create: data_ref_ptr = vect_ptr_init + update */
1045 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1046 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1047 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1048 TREE_OPERAND (vec_stmt, 0) = new_temp;
1049 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1050 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1052 return data_ref_ptr;
1056 /* Function vect_create_destination_var.
1058 Create a new temporary of type VECTYPE. */
1060 static tree
1061 vect_create_destination_var (tree scalar_dest, tree vectype)
1063 tree vec_dest;
1064 const char *new_name;
1066 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1068 new_name = get_name (scalar_dest);
1069 if (!new_name)
1070 new_name = "var_";
1071 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1072 add_referenced_tmp_var (vec_dest);
1074 return vec_dest;
1078 /* Function vect_init_vector.
1080 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1081 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1082 used in the vectorization of STMT. */
1084 static tree
1085 vect_init_vector (tree stmt, tree vector_var)
1087 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1088 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1089 tree new_var;
1090 tree init_stmt;
1091 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1092 tree vec_oprnd;
1093 edge pe;
1094 tree new_temp;
1095 basic_block new_bb;
1097 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
1098 add_referenced_tmp_var (new_var);
1100 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
1101 new_temp = make_ssa_name (new_var, init_stmt);
1102 TREE_OPERAND (init_stmt, 0) = new_temp;
1104 pe = loop_preheader_edge (loop);
1105 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1106 gcc_assert (!new_bb);
1108 if (vect_debug_details (NULL))
1110 fprintf (dump_file, "created new init_stmt: ");
1111 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
1114 vec_oprnd = TREE_OPERAND (init_stmt, 0);
1115 return vec_oprnd;
1119 /* Function vect_get_vec_def_for_operand.
1121 OP is an operand in STMT. This function returns a (vector) def that will be
1122 used in the vectorized stmt for STMT.
1124 In the case that OP is an SSA_NAME which is defined in the loop, then
1125 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1127 In case OP is an invariant or constant, a new stmt that creates a vector def
1128 needs to be introduced. */
1130 static tree
1131 vect_get_vec_def_for_operand (tree op, tree stmt)
1133 tree vec_oprnd;
1134 tree vec_stmt;
1135 tree def_stmt;
1136 stmt_vec_info def_stmt_info = NULL;
1137 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1138 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1139 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1140 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1141 basic_block bb;
1142 tree vec_inv;
1143 tree t = NULL_TREE;
1144 tree def;
1145 int i;
1147 if (vect_debug_details (NULL))
1149 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
1150 print_generic_expr (dump_file, op, TDF_SLIM);
1153 /** ===> Case 1: operand is a constant. **/
1155 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
1157 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1159 tree vec_cst;
1160 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1161 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1162 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1163 tree t = NULL_TREE;
1164 int i;
1166 /* Build a tree with vector elements. */
1167 if (vect_debug_details (NULL))
1168 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
1170 for (i = nunits - 1; i >= 0; --i)
1172 t = tree_cons (NULL_TREE, op, t);
1174 vec_cst = build_vector (vectype, t);
1175 return vect_init_vector (stmt, vec_cst);
1178 gcc_assert (TREE_CODE (op) == SSA_NAME);
1180 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
1182 def_stmt = SSA_NAME_DEF_STMT (op);
1183 def_stmt_info = vinfo_for_stmt (def_stmt);
1185 if (vect_debug_details (NULL))
1187 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
1188 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1192 /** ==> Case 2.1: operand is defined inside the loop. **/
1194 if (def_stmt_info)
1196 /* Get the def from the vectorized stmt. */
1198 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1199 gcc_assert (vec_stmt);
1200 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
1201 return vec_oprnd;
1205 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
1206 it is a reduction/induction. **/
1208 bb = bb_for_stmt (def_stmt);
1209 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1211 if (vect_debug_details (NULL))
1212 fprintf (dump_file, "reduction/induction - unsupported.");
1213 internal_error ("no support for reduction/induction"); /* FORNOW */
1217 /** ==> Case 2.3: operand is defined outside the loop -
1218 it is a loop invariant. */
1220 switch (TREE_CODE (def_stmt))
1222 case PHI_NODE:
1223 def = PHI_RESULT (def_stmt);
1224 break;
1225 case MODIFY_EXPR:
1226 def = TREE_OPERAND (def_stmt, 0);
1227 break;
1228 case NOP_EXPR:
1229 def = TREE_OPERAND (def_stmt, 0);
1230 gcc_assert (IS_EMPTY_STMT (def_stmt));
1231 def = op;
1232 break;
1233 default:
1234 if (vect_debug_details (NULL))
1236 fprintf (dump_file, "unsupported defining stmt: ");
1237 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1239 internal_error ("unsupported defining stmt");
1242 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
1244 if (vect_debug_details (NULL))
1245 fprintf (dump_file, "Create vector_inv.");
1247 for (i = nunits - 1; i >= 0; --i)
1249 t = tree_cons (NULL_TREE, def, t);
1252 vec_inv = build_constructor (vectype, t);
1253 return vect_init_vector (stmt, vec_inv);
1257 /* Function vect_finish_stmt_generation.
1259 Insert a new stmt. */
1261 static void
1262 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
1264 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1266 if (vect_debug_details (NULL))
1268 fprintf (dump_file, "add new stmt: ");
1269 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
1272 /* Make sure bsi points to the stmt that is being vectorized. */
1274 /* Assumption: any stmts created for the vectorization of stmt S were
1275 inserted before S. BSI is expected to point to S or some new stmt before S. */
1277 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
1278 bsi_next (bsi);
1279 gcc_assert (stmt == bsi_stmt (*bsi));
1283 /* Function vectorizable_assignment.
1285 Check if STMT performs an assignment (copy) that can be vectorized.
1286 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1287 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1288 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1290 static bool
1291 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1293 tree vec_dest;
1294 tree scalar_dest;
1295 tree op;
1296 tree vec_oprnd;
1297 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1298 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1299 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1300 tree new_temp;
1302 /* Is vectorizable assignment? */
1304 if (TREE_CODE (stmt) != MODIFY_EXPR)
1305 return false;
1307 scalar_dest = TREE_OPERAND (stmt, 0);
1308 if (TREE_CODE (scalar_dest) != SSA_NAME)
1309 return false;
1311 op = TREE_OPERAND (stmt, 1);
1312 if (!vect_is_simple_use (op, loop, NULL))
1314 if (vect_debug_details (NULL))
1315 fprintf (dump_file, "use not simple.");
1316 return false;
1319 if (!vec_stmt) /* transformation not required. */
1321 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1322 return true;
1325 /** Trasform. **/
1326 if (vect_debug_details (NULL))
1327 fprintf (dump_file, "transform assignment.");
1329 /* Handle def. */
1330 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1332 /* Handle use. */
1333 op = TREE_OPERAND (stmt, 1);
1334 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
1336 /* Arguments are ready. create the new vector stmt. */
1337 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1338 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1339 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1340 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1342 return true;
1346 /* Function vectorizable_operation.
1348 Check if STMT performs a binary or unary operation that can be vectorized.
1349 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1350 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1351 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1353 static bool
1354 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1356 tree vec_dest;
1357 tree scalar_dest;
1358 tree operation;
1359 tree op0, op1 = NULL;
1360 tree vec_oprnd0, vec_oprnd1=NULL;
1361 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1362 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1363 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1364 int i;
1365 enum tree_code code;
1366 enum machine_mode vec_mode;
1367 tree new_temp;
1368 int op_type;
1369 tree op;
1370 optab optab;
1372 /* Is STMT a vectorizable binary/unary operation? */
1373 if (TREE_CODE (stmt) != MODIFY_EXPR)
1374 return false;
1376 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1377 return false;
1379 operation = TREE_OPERAND (stmt, 1);
1380 code = TREE_CODE (operation);
1381 optab = optab_for_tree_code (code, vectype);
1383 /* Support only unary or binary operations. */
1384 op_type = TREE_CODE_LENGTH (code);
1385 if (op_type != unary_op && op_type != binary_op)
1387 if (vect_debug_details (NULL))
1388 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
1389 return false;
1392 for (i = 0; i < op_type; i++)
1394 op = TREE_OPERAND (operation, i);
1395 if (!vect_is_simple_use (op, loop, NULL))
1397 if (vect_debug_details (NULL))
1398 fprintf (dump_file, "use not simple.");
1399 return false;
1403 /* Supportable by target? */
1404 if (!optab)
1406 if (vect_debug_details (NULL))
1407 fprintf (dump_file, "no optab.");
1408 return false;
1410 vec_mode = TYPE_MODE (vectype);
1411 if (!VECTOR_MODE_P (vec_mode))
1413 /* TODO: tree-complex.c sometimes can parallelize operations
1414 on generic vectors. We can vectorize the loop in that case,
1415 but then we should re-run the lowering pass. */
1416 if (vect_debug_details (NULL))
1417 fprintf (dump_file, "mode not supported by target.");
1418 return false;
1421 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1423 if (vect_debug_details (NULL))
1424 fprintf (dump_file, "op not supported by target.");
1425 return false;
1428 if (!vec_stmt) /* transformation not required. */
1430 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1431 return true;
1434 /** Transform. **/
1436 if (vect_debug_details (NULL))
1437 fprintf (dump_file, "transform binary/unary operation.");
1439 /* Handle def. */
1440 scalar_dest = TREE_OPERAND (stmt, 0);
1441 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1443 /* Handle uses. */
1444 op0 = TREE_OPERAND (operation, 0);
1445 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
1447 if (op_type == binary_op)
1449 op1 = TREE_OPERAND (operation, 1);
1450 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
1453 /* Arguments are ready. create the new vector stmt. */
1455 if (op_type == binary_op)
1456 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1457 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1458 else
1459 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1460 build1 (code, vectype, vec_oprnd0));
1461 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1462 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1463 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1465 return true;
1469 /* Function vectorizable_store.
1471 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1472 can be vectorized.
1473 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1474 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1475 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1477 static bool
1478 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1480 tree scalar_dest;
1481 tree data_ref;
1482 tree op;
1483 tree vec_oprnd1;
1484 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1485 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1486 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1487 enum machine_mode vec_mode;
1488 tree dummy;
1490 /* Is vectorizable store? */
1492 if (TREE_CODE (stmt) != MODIFY_EXPR)
1493 return false;
1495 scalar_dest = TREE_OPERAND (stmt, 0);
1496 if (TREE_CODE (scalar_dest) != ARRAY_REF
1497 && TREE_CODE (scalar_dest) != INDIRECT_REF)
1498 return false;
1500 op = TREE_OPERAND (stmt, 1);
1501 if (!vect_is_simple_use (op, loop, NULL))
1503 if (vect_debug_details (NULL))
1504 fprintf (dump_file, "use not simple.");
1505 return false;
1508 vec_mode = TYPE_MODE (vectype);
1509 /* FORNOW. In some cases can vectorize even if data-type not supported
1510 (e.g. - array initialization with 0). */
1511 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1512 return false;
1514 if (!STMT_VINFO_DATA_REF (stmt_info))
1515 return false;
1517 if (!aligned_access_p (STMT_VINFO_DATA_REF (stmt_info)))
1518 return false;
1520 if (!vec_stmt) /* transformation not required. */
1522 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1523 return true;
1526 /** Trasform. **/
1528 if (vect_debug_details (NULL))
1529 fprintf (dump_file, "transform store");
1531 /* Handle use - get the vectorized def from the defining stmt. */
1532 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
1534 /* Handle def. */
1535 /* FORNOW: make sure the data reference is aligned. */
1536 vect_align_data_ref (stmt);
1537 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1538 data_ref = build_fold_indirect_ref (data_ref);
1540 /* Arguments are ready. create the new vector stmt. */
1541 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1542 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1544 return true;
1548 /* vectorizable_load.
1550 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1551 can be vectorized.
1552 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1553 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1554 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1556 static bool
1557 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1559 tree scalar_dest;
1560 tree vec_dest = NULL;
1561 tree data_ref = NULL;
1562 tree op;
1563 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1564 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1565 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1566 tree new_temp;
1567 int mode;
1568 tree init_addr;
1569 tree new_stmt;
1570 tree dummy;
1571 basic_block new_bb;
1572 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1573 edge pe = loop_preheader_edge (loop);
1574 bool software_pipeline_loads_p = false;
1576 /* Is vectorizable load? */
1578 if (TREE_CODE (stmt) != MODIFY_EXPR)
1579 return false;
1581 scalar_dest = TREE_OPERAND (stmt, 0);
1582 if (TREE_CODE (scalar_dest) != SSA_NAME)
1583 return false;
1585 op = TREE_OPERAND (stmt, 1);
1586 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1587 return false;
1589 if (!STMT_VINFO_DATA_REF (stmt_info))
1590 return false;
1592 mode = (int) TYPE_MODE (vectype);
1594 /* FORNOW. In some cases can vectorize even if data-type not supported
1595 (e.g. - data copies). */
1596 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1598 if (vect_debug_details (loop))
1599 fprintf (dump_file, "Aligned load, but unsupported type.");
1600 return false;
1603 if (!aligned_access_p (dr))
1605 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1606 && (!targetm.vectorize.builtin_mask_for_load
1607 || targetm.vectorize.builtin_mask_for_load ()))
1608 software_pipeline_loads_p = true;
1609 else if (!targetm.vectorize.misaligned_mem_ok (mode))
1611 /* Possibly unaligned access, and can't software pipeline the loads */
1612 if (vect_debug_details (loop))
1613 fprintf (dump_file, "Arbitrary load not supported.");
1614 return false;
1618 if (!vec_stmt) /* transformation not required. */
1620 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1621 return true;
1624 /** Trasform. **/
1626 if (vect_debug_details (NULL))
1627 fprintf (dump_file, "transform load.");
1629 if (!software_pipeline_loads_p)
1631 /* Create:
1632 p = initial_addr;
1633 indx = 0;
1634 loop {
1635 vec_dest = *(p);
1636 indx = indx + 1;
1640 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1641 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1642 if (aligned_access_p (dr))
1643 data_ref = build_fold_indirect_ref (data_ref);
1644 else
1646 int mis = DR_MISALIGNMENT (dr);
1647 tree tmis = (mis == -1 ?
1648 integer_zero_node :
1649 build_int_cst (integer_type_node, mis));
1650 tmis = int_const_binop (MULT_EXPR, tmis,
1651 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
1652 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1654 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1655 new_temp = make_ssa_name (vec_dest, new_stmt);
1656 TREE_OPERAND (new_stmt, 0) = new_temp;
1657 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1659 else /* software-pipeline the loads */
1661 /* Create:
1662 p1 = initial_addr;
1663 msq_init = *(floor(p1))
1664 p2 = initial_addr + VS - 1;
1665 magic = have_builtin ? builtin_result : initial_address;
1666 indx = 0;
1667 loop {
1668 p2' = p2 + indx * vectype_size
1669 lsq = *(floor(p2'))
1670 vec_dest = realign_load (msq, lsq, magic)
1671 indx = indx + 1;
1672 msq = lsq;
1676 tree offset;
1677 tree magic;
1678 tree phi_stmt;
1679 tree msq_init;
1680 tree msq, lsq;
1681 tree dataref_ptr;
1682 tree params;
1684 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1685 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1686 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1687 &init_addr, true);
1688 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1689 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1690 new_temp = make_ssa_name (vec_dest, new_stmt);
1691 TREE_OPERAND (new_stmt, 0) = new_temp;
1692 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1693 gcc_assert (!new_bb);
1694 msq_init = TREE_OPERAND (new_stmt, 0);
1697 /* <2> Create lsq = *(floor(p2')) in the loop */
1698 offset = build_int_cst (integer_type_node,
1699 GET_MODE_NUNITS (TYPE_MODE (vectype)));
1700 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1701 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1702 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1703 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1704 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1705 new_temp = make_ssa_name (vec_dest, new_stmt);
1706 TREE_OPERAND (new_stmt, 0) = new_temp;
1707 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1708 lsq = TREE_OPERAND (new_stmt, 0);
1711 /* <3> */
1712 if (targetm.vectorize.builtin_mask_for_load)
1714 /* Create permutation mask, if required, in loop preheader. */
1715 tree builtin_decl;
1716 params = build_tree_list (NULL_TREE, init_addr);
1717 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1718 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1719 new_stmt = build_function_call_expr (builtin_decl, params);
1720 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1721 new_temp = make_ssa_name (vec_dest, new_stmt);
1722 TREE_OPERAND (new_stmt, 0) = new_temp;
1723 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1724 gcc_assert (!new_bb);
1725 magic = TREE_OPERAND (new_stmt, 0);
1727 else
1729 /* Use current address instead of init_addr for reduced reg pressure. */
1730 magic = dataref_ptr;
1734 /* <4> Create msq = phi <msq_init, lsq> in loop */
1735 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1736 msq = make_ssa_name (vec_dest, NULL_TREE);
1737 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1738 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1739 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
1740 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
1743 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1744 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1745 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1746 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1747 new_temp = make_ssa_name (vec_dest, new_stmt);
1748 TREE_OPERAND (new_stmt, 0) = new_temp;
1749 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1752 *vec_stmt = new_stmt;
1753 return true;
1757 /* Function vect_transform_stmt.
1759 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1761 static bool
1762 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
1764 bool is_store = false;
1765 tree vec_stmt = NULL_TREE;
1766 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1767 bool done;
1769 switch (STMT_VINFO_TYPE (stmt_info))
1771 case op_vec_info_type:
1772 done = vectorizable_operation (stmt, bsi, &vec_stmt);
1773 gcc_assert (done);
1774 break;
1776 case assignment_vec_info_type:
1777 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
1778 gcc_assert (done);
1779 break;
1781 case load_vec_info_type:
1782 done = vectorizable_load (stmt, bsi, &vec_stmt);
1783 gcc_assert (done);
1784 break;
1786 case store_vec_info_type:
1787 done = vectorizable_store (stmt, bsi, &vec_stmt);
1788 gcc_assert (done);
1789 is_store = true;
1790 break;
1791 default:
1792 if (vect_debug_details (NULL))
1793 fprintf (dump_file, "stmt not supported.");
1794 gcc_unreachable ();
1797 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1799 return is_store;
1803 /* Function vect_transform_loop_bound.
1805 Create a new exit condition for the loop. */
1807 static void
1808 vect_transform_loop_bound (loop_vec_info loop_vinfo)
1810 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1811 edge exit_edge = loop->single_exit;
1812 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
1813 tree indx_before_incr, indx_after_incr;
1814 tree orig_cond_expr;
1815 HOST_WIDE_INT old_N = 0;
1816 int vf;
1817 tree cond_stmt;
1818 tree new_loop_bound;
1819 tree cond;
1820 tree lb_type;
1822 gcc_assert (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1823 old_N = LOOP_VINFO_NITERS (loop_vinfo);
1824 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1826 /* FORNOW:
1827 assuming number-of-iterations divides by the vectorization factor. */
1828 gcc_assert (!(old_N % vf));
1830 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
1831 gcc_assert (orig_cond_expr);
1832 gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
1834 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
1835 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
1837 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
1838 to point to the exit condition. */
1839 bsi_next (&loop_exit_bsi);
1840 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
1842 /* new loop exit test: */
1843 lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
1844 new_loop_bound = build_int_cst (lb_type, old_N/vf);
1846 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
1847 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1848 else /* 'then' edge loops back. */
1849 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1851 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
1852 TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
1854 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
1856 /* remove old loop exit test: */
1857 bsi_remove (&loop_exit_bsi);
1859 if (vect_debug_details (NULL))
1860 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
1864 /* Function vect_transform_loop.
1866 The analysis phase has determined that the loop is vectorizable.
1867 Vectorize the loop - created vectorized stmts to replace the scalar
1868 stmts in the loop, and update the loop exit condition. */
1870 static void
1871 vect_transform_loop (loop_vec_info loop_vinfo,
1872 struct loops *loops ATTRIBUTE_UNUSED)
1874 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1875 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1876 int nbbs = loop->num_nodes;
1877 block_stmt_iterator si;
1878 int i;
1879 #ifdef ENABLE_CHECKING
1880 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1881 #endif
1883 if (vect_debug_details (NULL))
1884 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
1886 /* 1) Make sure the loop header has exactly two entries
1887 2) Make sure we have a preheader basic block. */
1889 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
1891 loop_split_edge_with (loop_preheader_edge (loop), NULL);
1894 /* FORNOW: the vectorizer supports only loops which body consist
1895 of one basic block (header + empty latch). When the vectorizer will
1896 support more involved loop forms, the order by which the BBs are
1897 traversed need to be reconsidered. */
1899 for (i = 0; i < nbbs; i++)
1901 basic_block bb = bbs[i];
1903 for (si = bsi_start (bb); !bsi_end_p (si);)
1905 tree stmt = bsi_stmt (si);
1906 stmt_vec_info stmt_info;
1907 bool is_store;
1908 #ifdef ENABLE_CHECKING
1909 tree vectype;
1910 #endif
1912 if (vect_debug_details (NULL))
1914 fprintf (dump_file, "------>vectorizing statement: ");
1915 print_generic_expr (dump_file, stmt, TDF_SLIM);
1917 stmt_info = vinfo_for_stmt (stmt);
1918 gcc_assert (stmt_info);
1919 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1921 bsi_next (&si);
1922 continue;
1924 #ifdef ENABLE_CHECKING
1925 /* FORNOW: Verify that all stmts operate on the same number of
1926 units and no inner unrolling is necessary. */
1927 vectype = STMT_VINFO_VECTYPE (stmt_info);
1928 gcc_assert (GET_MODE_NUNITS (TYPE_MODE (vectype))
1929 == vectorization_factor);
1930 #endif
1931 /* -------- vectorize statement ------------ */
1932 if (vect_debug_details (NULL))
1933 fprintf (dump_file, "transform statement.");
1935 is_store = vect_transform_stmt (stmt, &si);
1936 if (is_store)
1938 /* free the attached stmt_vec_info and remove the stmt. */
1939 stmt_ann_t ann = stmt_ann (stmt);
1940 free (stmt_info);
1941 set_stmt_info (ann, NULL);
1942 bsi_remove (&si);
1943 continue;
1946 bsi_next (&si);
1947 } /* stmts in BB */
1948 } /* BBs in loop */
1950 vect_transform_loop_bound (loop_vinfo);
1952 if (vect_debug_details (loop))
1953 fprintf (dump_file,"Success! loop vectorized.");
1954 if (vect_debug_stats (loop))
1955 fprintf (dump_file, "LOOP VECTORIZED.");
1959 /* Function vect_is_simple_use.
1961 Input:
1962 LOOP - the loop that is being vectorized.
1963 OPERAND - operand of a stmt in LOOP.
1964 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1966 Returns whether a stmt with OPERAND can be vectorized.
1967 Supportable operands are constants, loop invariants, and operands that are
1968 defined by the current iteration of the loop. Unsupportable operands are
1969 those that are defined by a previous iteration of the loop (as is the case
1970 in reduction/induction computations). */
1972 static bool
1973 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
1975 tree def_stmt;
1976 basic_block bb;
1978 if (def)
1979 *def = NULL_TREE;
1981 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1982 return true;
1984 if (TREE_CODE (operand) != SSA_NAME)
1985 return false;
1987 def_stmt = SSA_NAME_DEF_STMT (operand);
1988 if (def_stmt == NULL_TREE )
1990 if (vect_debug_details (NULL))
1991 fprintf (dump_file, "no def_stmt.");
1992 return false;
1995 /* empty stmt is expected only in case of a function argument.
1996 (Otherwise - we expect a phi_node or a modify_expr). */
1997 if (IS_EMPTY_STMT (def_stmt))
1999 tree arg = TREE_OPERAND (def_stmt, 0);
2000 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
2001 return true;
2002 if (vect_debug_details (NULL))
2004 fprintf (dump_file, "Unexpected empty stmt: ");
2005 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2007 return false;
2010 /* phi_node inside the loop indicates an induction/reduction pattern.
2011 This is not supported yet. */
2012 bb = bb_for_stmt (def_stmt);
2013 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2015 if (vect_debug_details (NULL))
2016 fprintf (dump_file, "reduction/induction - unsupported.");
2017 return false; /* FORNOW: not supported yet. */
2020 /* Expecting a modify_expr or a phi_node. */
2021 if (TREE_CODE (def_stmt) == MODIFY_EXPR
2022 || TREE_CODE (def_stmt) == PHI_NODE)
2024 if (def)
2025 *def = def_stmt;
2026 return true;
2029 return false;
2033 /* Function vect_analyze_operations.
2035 Scan the loop stmts and make sure they are all vectorizable. */
2037 static bool
2038 vect_analyze_operations (loop_vec_info loop_vinfo)
2040 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2041 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2042 int nbbs = loop->num_nodes;
2043 block_stmt_iterator si;
2044 int vectorization_factor = 0;
2045 int i;
2046 bool ok;
2047 tree scalar_type;
2049 if (vect_debug_details (NULL))
2050 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
2052 for (i = 0; i < nbbs; i++)
2054 basic_block bb = bbs[i];
2056 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2058 tree stmt = bsi_stmt (si);
2059 int nunits;
2060 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2061 tree vectype;
2063 if (vect_debug_details (NULL))
2065 fprintf (dump_file, "==> examining statement: ");
2066 print_generic_expr (dump_file, stmt, TDF_SLIM);
2069 gcc_assert (stmt_info);
2071 /* skip stmts which do not need to be vectorized.
2072 this is expected to include:
2073 - the COND_EXPR which is the loop exit condition
2074 - any LABEL_EXPRs in the loop
2075 - computations that are used only for array indexing or loop
2076 control */
2078 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2080 if (vect_debug_details (NULL))
2081 fprintf (dump_file, "irrelevant.");
2082 continue;
2085 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
2087 if (vect_debug_stats (loop) || vect_debug_details (loop))
2089 fprintf (dump_file, "not vectorized: vector stmt in loop:");
2090 print_generic_expr (dump_file, stmt, TDF_SLIM);
2092 return false;
2095 if (STMT_VINFO_DATA_REF (stmt_info))
2096 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
2097 else if (TREE_CODE (stmt) == MODIFY_EXPR)
2098 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
2099 else
2100 scalar_type = TREE_TYPE (stmt);
2102 if (vect_debug_details (NULL))
2104 fprintf (dump_file, "get vectype for scalar type: ");
2105 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
2108 vectype = get_vectype_for_scalar_type (scalar_type);
2109 if (!vectype)
2111 if (vect_debug_stats (loop) || vect_debug_details (loop))
2113 fprintf (dump_file, "not vectorized: unsupported data-type ");
2114 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
2116 return false;
2119 if (vect_debug_details (NULL))
2121 fprintf (dump_file, "vectype: ");
2122 print_generic_expr (dump_file, vectype, TDF_SLIM);
2124 STMT_VINFO_VECTYPE (stmt_info) = vectype;
2126 ok = (vectorizable_operation (stmt, NULL, NULL)
2127 || vectorizable_assignment (stmt, NULL, NULL)
2128 || vectorizable_load (stmt, NULL, NULL)
2129 || vectorizable_store (stmt, NULL, NULL));
2131 if (!ok)
2133 if (vect_debug_stats (loop) || vect_debug_details (loop))
2135 fprintf (dump_file, "not vectorized: stmt not supported: ");
2136 print_generic_expr (dump_file, stmt, TDF_SLIM);
2138 return false;
2141 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2142 if (vect_debug_details (NULL))
2143 fprintf (dump_file, "nunits = %d", nunits);
2145 if (vectorization_factor)
2147 /* FORNOW: don't allow mixed units.
2148 This restriction will be relaxed in the future. */
2149 if (nunits != vectorization_factor)
2151 if (vect_debug_stats (loop) || vect_debug_details (loop))
2152 fprintf (dump_file, "not vectorized: mixed data-types");
2153 return false;
2156 else
2157 vectorization_factor = nunits;
2161 /* TODO: Analyze cost. Decide if worth while to vectorize. */
2162 if (!vectorization_factor)
2164 if (vect_debug_stats (loop) || vect_debug_details (loop))
2165 fprintf (dump_file, "not vectorized: unsupported data-type");
2166 return false;
2168 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
2170 /* FORNOW: handle only cases where the loop bound divides by the
2171 vectorization factor. */
2173 if (vect_debug_details (NULL))
2174 fprintf (dump_file,
2175 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
2176 vectorization_factor, LOOP_VINFO_NITERS (loop_vinfo));
2178 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2180 if (vect_debug_stats (loop) || vect_debug_details (loop))
2181 fprintf (dump_file, "not vectorized: Unknown loop bound.");
2182 return false;
2185 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2186 && LOOP_VINFO_NITERS (loop_vinfo) % vectorization_factor != 0)
2188 if (vect_debug_stats (loop) || vect_debug_details (loop))
2189 fprintf (dump_file, "not vectorized: loop bound doesn't divided by %d.",
2190 vectorization_factor);
2191 return false;
2194 return true;
2198 /* Function exist_non_indexing_operands_for_use_p
2200 USE is one of the uses attached to STMT. Check if USE is
2201 used in STMT for anything other than indexing an array. */
2203 static bool
2204 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
2206 tree operand;
2207 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2209 /* USE corresponds to some operand in STMT. If there is no data
2210 reference in STMT, then any operand that corresponds to USE
2211 is not indexing an array. */
2212 if (!STMT_VINFO_DATA_REF (stmt_info))
2213 return true;
2215 /* STMT has a data_ref. FORNOW this means that its of one of
2216 the following forms:
2217 -1- ARRAY_REF = var
2218 -2- var = ARRAY_REF
2219 (This should have been verified in analyze_data_refs).
2221 'var' in the second case corresponds to a def, not a use,
2222 so USE cannot correspond to any operands that are not used
2223 for array indexing.
2225 Therefore, all we need to check is if STMT falls into the
2226 first case, and whether var corresponds to USE. */
2228 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
2229 return false;
2231 operand = TREE_OPERAND (stmt, 1);
2233 if (TREE_CODE (operand) != SSA_NAME)
2234 return false;
2236 if (operand == use)
2237 return true;
2239 return false;
2243 /* Function vect_is_simple_iv_evolution.
2245 FORNOW: A simple evolution of an induction variables in the loop is
2246 considered a polynomial evolution with constant step. */
2248 static bool
2249 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
2250 tree * step, bool strict)
2252 tree init_expr;
2253 tree step_expr;
2255 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
2257 /* When there is no evolution in this loop, the evolution function
2258 is not "simple". */
2259 if (evolution_part == NULL_TREE)
2260 return false;
2262 /* When the evolution is a polynomial of degree >= 2
2263 the evolution function is not "simple". */
2264 if (tree_is_chrec (evolution_part))
2265 return false;
2267 step_expr = evolution_part;
2268 init_expr = initial_condition (access_fn);
2270 if (vect_debug_details (NULL))
2272 fprintf (dump_file, "step: ");
2273 print_generic_expr (dump_file, step_expr, TDF_SLIM);
2274 fprintf (dump_file, ", init: ");
2275 print_generic_expr (dump_file, init_expr, TDF_SLIM);
2278 *init = init_expr;
2279 *step = step_expr;
2281 if (TREE_CODE (step_expr) != INTEGER_CST)
2283 if (vect_debug_details (NULL))
2284 fprintf (dump_file, "step unknown.");
2285 return false;
2288 if (strict)
2289 if (!integer_onep (step_expr))
2291 if (vect_debug_details (NULL))
2292 print_generic_expr (dump_file, step_expr, TDF_SLIM);
2293 return false;
2296 return true;
2300 /* Function vect_analyze_scalar_cycles.
2302 Examine the cross iteration def-use cycles of scalar variables, by
2303 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
2304 cycles that they represent do not impede vectorization.
2306 FORNOW: Reduction as in the following loop, is not supported yet:
2307 loop1:
2308 for (i=0; i<N; i++)
2309 sum += a[i];
2310 The cross-iteration cycle corresponding to variable 'sum' will be
2311 considered too complicated and will impede vectorization.
2313 FORNOW: Induction as in the following loop, is not supported yet:
2314 loop2:
2315 for (i=0; i<N; i++)
2316 a[i] = i;
2318 However, the following loop *is* vectorizable:
2319 loop3:
2320 for (i=0; i<N; i++)
2321 a[i] = b[i];
2323 In both loops there exists a def-use cycle for the variable i:
2324 loop: i_2 = PHI (i_0, i_1)
2325 a[i_2] = ...;
2326 i_1 = i_2 + 1;
2327 GOTO loop;
2329 The evolution of the above cycle is considered simple enough,
2330 however, we also check that the cycle does not need to be
2331 vectorized, i.e - we check that the variable that this cycle
2332 defines is only used for array indexing or in stmts that do not
2333 need to be vectorized. This is not the case in loop2, but it
2334 *is* the case in loop3. */
2336 static bool
2337 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
2339 tree phi;
2340 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2341 basic_block bb = loop->header;
2342 tree dummy;
2344 if (vect_debug_details (NULL))
2345 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
2347 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
2349 tree access_fn = NULL;
2351 if (vect_debug_details (NULL))
2353 fprintf (dump_file, "Analyze phi: ");
2354 print_generic_expr (dump_file, phi, TDF_SLIM);
2357 /* Skip virtual phi's. The data dependences that are associated with
2358 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2360 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2362 if (vect_debug_details (NULL))
2363 fprintf (dump_file, "virtual phi. skip.");
2364 continue;
2367 /* Analyze the evolution function. */
2369 /* FORNOW: The only scalar cross-iteration cycles that we allow are
2370 those of loop induction variables; This property is verified here.
2372 Furthermore, if that induction variable is used in an operation
2373 that needs to be vectorized (i.e, is not solely used to index
2374 arrays and check the exit condition) - we do not support its
2375 vectorization yet. This property is verified in vect_is_simple_use,
2376 during vect_analyze_operations. */
2378 access_fn = /* instantiate_parameters
2379 (loop,*/
2380 analyze_scalar_evolution (loop, PHI_RESULT (phi));
2382 if (!access_fn)
2384 if (vect_debug_stats (loop) || vect_debug_details (loop))
2385 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
2386 return false;
2389 if (vect_debug_details (NULL))
2391 fprintf (dump_file, "Access function of PHI: ");
2392 print_generic_expr (dump_file, access_fn, TDF_SLIM);
2395 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
2396 &dummy, false))
2398 if (vect_debug_stats (loop) || vect_debug_details (loop))
2399 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
2400 return false;
2404 return true;
2408 /* Function vect_analyze_data_ref_dependence.
2410 Return TRUE if there (might) exist a dependence between a memory-reference
2411 DRA and a memory-reference DRB. */
2413 static bool
2414 vect_analyze_data_ref_dependence (struct data_reference *dra,
2415 struct data_reference *drb,
2416 struct loop *loop)
2418 bool differ_p;
2419 struct data_dependence_relation *ddr;
2421 if (!array_base_name_differ_p (dra, drb, &differ_p))
2423 if (vect_debug_stats (loop) || vect_debug_details (loop))
2425 fprintf (dump_file,
2426 "not vectorized: can't determine dependence between: ");
2427 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
2428 fprintf (dump_file, " and ");
2429 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
2431 return true;
2434 if (differ_p)
2435 return false;
2437 ddr = initialize_data_dependence_relation (dra, drb);
2438 compute_affine_dependence (ddr);
2440 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2441 return false;
2443 if (vect_debug_stats (loop) || vect_debug_details (loop))
2445 fprintf (dump_file,
2446 "not vectorized: possible dependence between data-refs ");
2447 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
2448 fprintf (dump_file, " and ");
2449 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
2452 return true;
2456 /* Function vect_analyze_data_ref_dependences.
2458 Examine all the data references in the loop, and make sure there do not
2459 exist any data dependences between them.
2461 TODO: dependences which distance is greater than the vectorization factor
2462 can be ignored. */
2464 static bool
2465 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
2467 unsigned int i, j;
2468 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2469 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2470 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2472 /* Examine store-store (output) dependences. */
2474 if (vect_debug_details (NULL))
2475 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
2477 if (vect_debug_details (NULL))
2478 fprintf (dump_file, "compare all store-store pairs.");
2480 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
2482 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2484 struct data_reference *dra =
2485 VARRAY_GENERIC_PTR (loop_write_refs, i);
2486 struct data_reference *drb =
2487 VARRAY_GENERIC_PTR (loop_write_refs, j);
2488 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2489 return false;
2493 /* Examine load-store (true/anti) dependences. */
2495 if (vect_debug_details (NULL))
2496 fprintf (dump_file, "compare all load-store pairs.");
2498 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
2500 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2502 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
2503 struct data_reference *drb =
2504 VARRAY_GENERIC_PTR (loop_write_refs, j);
2505 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2506 return false;
2510 return true;
2514 /* Function vect_get_first_index.
2516 REF is a data reference.
2517 If it is an ARRAY_REF: if its lower bound is simple enough,
2518 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
2519 If it is not an ARRAY_REF: REF has no "first index";
2520 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
2522 static bool
2523 vect_get_first_index (tree ref, tree *array_first_index)
2525 tree array_start;
2527 if (TREE_CODE (ref) != ARRAY_REF)
2528 *array_first_index = size_zero_node;
2529 else
2531 array_start = array_ref_low_bound (ref);
2532 if (!host_integerp (array_start,0))
2534 if (vect_debug_details (NULL))
2536 fprintf (dump_file, "array min val not simple integer cst.");
2537 print_generic_expr (dump_file, array_start, TDF_DETAILS);
2539 return false;
2541 *array_first_index = array_start;
2544 return true;
2548 /* Function vect_compute_array_base_alignment.
2549 A utility function of vect_compute_array_ref_alignment.
2551 Compute the misalignment of ARRAY in bits.
2553 Input:
2554 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
2555 VECTYPE - we are interested in the misalignment modulo the size of vectype.
2556 if NULL: don't compute misalignment, just return the base of ARRAY.
2557 PREV_DIMENSIONS - initialized to one.
2558 MISALIGNMENT - the computed misalignment in bits.
2560 Output:
2561 If VECTYPE is not NULL:
2562 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
2563 the base of the array, and put the computed misalignment in MISALIGNMENT.
2564 If VECTYPE is NULL:
2565 Return the base of the array.
2567 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
2568 a[idx_N]...[idx_2][idx_1] is
2569 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
2570 ... + idx_N * dim_0 * ... * dim_N-1}.
2571 (The misalignment of &a is not checked here).
2572 Note, that every term contains dim_0, therefore, if dim_0 is a
2573 multiple of NUNITS, the whole sum is a multiple of NUNITS.
2574 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
2575 NUINTS, we can say that the misalignment of the sum is equal to
2576 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
2577 we can't determine this array misalignment, and we return
2578 false.
2579 We proceed recursively in this manner, accumulating total misalignment
2580 and the multiplication of previous dimensions for correct misalignment
2581 calculation. */
2583 static tree
2584 vect_compute_array_base_alignment (tree array,
2585 tree vectype,
2586 tree *prev_dimensions,
2587 tree *misalignment)
2589 tree index;
2590 tree domain;
2591 tree dimension_size;
2592 tree mis;
2593 tree bits_per_vectype;
2594 tree bits_per_vectype_unit;
2596 /* The 'stop condition' of the recursion. */
2597 if (TREE_CODE (array) != ARRAY_REF)
2598 return array;
2600 if (!vectype)
2601 /* Just get the base decl. */
2602 return vect_compute_array_base_alignment
2603 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
2605 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
2606 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
2607 return NULL_TREE;
2609 domain = TYPE_DOMAIN (TREE_TYPE (array));
2610 dimension_size =
2611 int_const_binop (PLUS_EXPR,
2612 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
2613 TYPE_MIN_VALUE (domain), 1),
2614 size_one_node, 1);
2616 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
2617 is a multiple of NUNITS:
2619 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
2621 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
2622 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
2623 if (integer_zerop (mis))
2624 /* This array is aligned. Continue just in order to get the base decl. */
2625 return vect_compute_array_base_alignment
2626 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
2628 index = TREE_OPERAND (array, 1);
2629 if (!host_integerp (index, 1))
2630 /* The current index is not constant. */
2631 return NULL_TREE;
2633 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
2635 bits_per_vectype = fold_convert (unsigned_type_node,
2636 build_int_cst (NULL_TREE, BITS_PER_UNIT *
2637 GET_MODE_SIZE (TYPE_MODE (vectype))));
2638 bits_per_vectype_unit = fold_convert (unsigned_type_node,
2639 build_int_cst (NULL_TREE, BITS_PER_UNIT *
2640 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
2642 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
2643 earlier:
2645 *misalignment =
2646 (*misalignment + index_val * dimension_size * *prev_dimensions)
2647 % vectype_nunits;
2650 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
2651 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
2652 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
2653 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
2654 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
2657 *prev_dimensions = int_const_binop (MULT_EXPR,
2658 *prev_dimensions, dimension_size, 1);
2660 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
2661 prev_dimensions,
2662 misalignment);
2666 /* Function vect_compute_data_ref_alignment
2668 Compute the misalignment of the data reference DR.
2670 Output:
2671 1. If during the misalignment computation it is found that the data reference
2672 cannot be vectorized then false is returned.
2673 2. DR_MISALIGNMENT (DR) is defined.
2675 FOR NOW: No analysis is actually performed. Misalignment is calculated
2676 only for trivial cases. TODO. */
2678 static bool
2679 vect_compute_data_ref_alignment (struct data_reference *dr,
2680 loop_vec_info loop_vinfo)
2682 tree stmt = DR_STMT (dr);
2683 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2684 tree ref = DR_REF (dr);
2685 tree vectype;
2686 tree scalar_type;
2687 tree offset = size_zero_node;
2688 tree base, bit_offset, alignment;
2689 tree unit_bits = fold_convert (unsigned_type_node,
2690 build_int_cst (NULL_TREE, BITS_PER_UNIT));
2691 tree dr_base;
2692 bool base_aligned_p;
2694 if (vect_debug_details (NULL))
2695 fprintf (dump_file, "vect_compute_data_ref_alignment:");
2697 /* Initialize misalignment to unknown. */
2698 DR_MISALIGNMENT (dr) = -1;
2700 scalar_type = TREE_TYPE (ref);
2701 vectype = get_vectype_for_scalar_type (scalar_type);
2702 if (!vectype)
2704 if (vect_debug_details (NULL))
2706 fprintf (dump_file, "no vectype for stmt: ");
2707 print_generic_expr (dump_file, stmt, TDF_SLIM);
2708 fprintf (dump_file, " scalar_type: ");
2709 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
2711 /* It is not possible to vectorize this data reference. */
2712 return false;
2714 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
2716 if (TREE_CODE (ref) == ARRAY_REF)
2717 dr_base = ref;
2718 else
2719 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
2721 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
2722 loop_vinfo, &bit_offset, &base_aligned_p);
2723 if (!base)
2725 if (vect_debug_details (NULL))
2727 fprintf (dump_file, "Unknown alignment for access: ");
2728 print_generic_expr (dump_file,
2729 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
2731 return true;
2734 if (!base_aligned_p)
2736 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
2738 if (vect_debug_details (NULL))
2740 fprintf (dump_file, "can't force alignment of ref: ");
2741 print_generic_expr (dump_file, ref, TDF_SLIM);
2743 return true;
2746 /* Force the alignment of the decl.
2747 NOTE: This is the only change to the code we make during
2748 the analysis phase, before deciding to vectorize the loop. */
2749 if (vect_debug_details (NULL))
2750 fprintf (dump_file, "force alignment");
2751 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
2752 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
2755 /* At this point we assume that the base is aligned, and the offset from it
2756 (including index, if relevant) has been computed and is in BIT_OFFSET. */
2757 gcc_assert (base_aligned_p
2758 || (TREE_CODE (base) == VAR_DECL
2759 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
2761 /* Convert into bytes. */
2762 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
2763 /* Check that there is no remainder in bits. */
2764 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
2765 if (!integer_zerop (bit_offset))
2767 if (vect_debug_details (NULL))
2769 fprintf (dump_file, "bit offset alignment: ");
2770 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
2772 return false;
2775 /* Alignment required, in bytes: */
2776 alignment = fold_convert (unsigned_type_node,
2777 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
2779 /* Modulo alignment. */
2780 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
2781 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
2783 if (vect_debug_details (NULL))
2784 fprintf (dump_file, "unexpected misalign value");
2785 return false;
2788 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
2790 if (vect_debug_details (NULL))
2791 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
2793 return true;
2797 /* Function vect_compute_array_ref_alignment
2799 Compute the alignment of an array-ref.
2800 The alignment we compute here is relative to
2801 TYPE_ALIGN(VECTYPE) boundary.
2803 Output:
2804 OFFSET - the alignment in bits
2805 Return value - the base of the array-ref. E.g,
2806 if the array-ref is a.b[k].c[i][j] the returned
2807 base is a.b[k].c
2810 static tree
2811 vect_compute_array_ref_alignment (struct data_reference *dr,
2812 loop_vec_info loop_vinfo,
2813 tree vectype,
2814 tree *offset)
2816 tree array_first_index = size_zero_node;
2817 tree init;
2818 tree ref = DR_REF (dr);
2819 tree scalar_type = TREE_TYPE (ref);
2820 tree oprnd0 = TREE_OPERAND (ref, 0);
2821 tree dims = size_one_node;
2822 tree misalign = size_zero_node;
2823 tree next_ref, this_offset = size_zero_node;
2824 tree nunits;
2825 tree nbits;
2827 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
2828 /* The reference is an array without its last index. */
2829 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims, &misalign);
2830 else
2831 next_ref =
2832 vect_compute_array_base_alignment (oprnd0, vectype, &dims, &misalign);
2833 if (!vectype)
2834 /* Alignment is not requested. Just return the base. */
2835 return next_ref;
2837 /* Compute alignment. */
2838 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
2839 return NULL_TREE;
2840 this_offset = misalign;
2842 /* Check the first index accessed. */
2843 if (!vect_get_first_index (ref, &array_first_index))
2845 if (vect_debug_details (NULL))
2846 fprintf (dump_file, "no first_index for array.");
2847 return NULL_TREE;
2850 /* Check the index of the array_ref. */
2851 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
2852 LOOP_VINFO_LOOP (loop_vinfo)->num);
2854 /* FORNOW: In order to simplify the handling of alignment, we make sure
2855 that the first location at which the array is accessed ('init') is on an
2856 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
2857 This is too conservative, since we require that
2858 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
2859 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
2860 This should be relaxed in the future. */
2862 if (!init || !host_integerp (init, 0))
2864 if (vect_debug_details (NULL))
2865 fprintf (dump_file, "non constant init. ");
2866 return NULL_TREE;
2869 /* bytes per scalar element: */
2870 nunits = fold_convert (unsigned_type_node,
2871 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
2872 nbits = int_const_binop (MULT_EXPR, nunits,
2873 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
2875 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
2876 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
2877 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
2878 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
2880 /* TODO: allow negative misalign values. */
2881 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
2883 if (vect_debug_details (NULL))
2884 fprintf (dump_file, "unexpected misalign value");
2885 return NULL_TREE;
2887 *offset = misalign;
2888 return next_ref;
2892 /* Function vect_compute_data_refs_alignment
2894 Compute the misalignment of data references in the loop.
2895 This pass may take place at function granularity instead of at loop
2896 granularity.
2898 FOR NOW: No analysis is actually performed. Misalignment is calculated
2899 only for trivial cases. TODO. */
2901 static void
2902 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
2904 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2905 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2906 unsigned int i;
2908 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2910 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2911 vect_compute_data_ref_alignment (dr, loop_vinfo);
2914 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2916 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2917 vect_compute_data_ref_alignment (dr, loop_vinfo);
2922 /* Function vect_enhance_data_refs_alignment
2924 This pass will use loop versioning and loop peeling in order to enhance
2925 the alignment of data references in the loop.
2927 FOR NOW: we assume that whatever versioning/peeling takes place, only the
2928 original loop is to be vectorized; Any other loops that are created by
2929 the transformations performed in this pass - are not supposed to be
2930 vectorized. This restriction will be relaxed.
2932 FOR NOW: No transformation is actually performed. TODO. */
2934 static void
2935 vect_enhance_data_refs_alignment (loop_vec_info loop_info ATTRIBUTE_UNUSED)
2938 This pass will require a cost model to guide it whether to apply peeling
2939 or versioning or a combination of the two. For example, the scheme that
2940 intel uses when given a loop with several memory accesses, is as follows:
2941 choose one memory access ('p') which alignment you want to force by doing
2942 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
2943 other accesses are not necessarily aligned, or (2) use loop versioning to
2944 generate one loop in which all accesses are aligned, and another loop in
2945 which only 'p' is necessarily aligned.
2947 ("Automatic Intra-Register Vectorization for the Intel Architecture",
2948 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
2949 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
2951 Devising a cost model is the most critical aspect of this work. It will
2952 guide us on which access to peel for, whether to use loop versioning, how
2953 many versions to create, etc. The cost model will probably consist of
2954 generic considerations as well as target specific considerations (on
2955 powerpc for example, misaligned stores are more painful than misaligned
2956 loads).
2958 Here is the general steps involved in alignment enhancements:
2960 -- original loop, before alignment analysis:
2961 for (i=0; i<N; i++){
2962 x = q[i]; # DR_MISALIGNMENT(q) = unknown
2963 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2966 -- After vect_compute_data_refs_alignment:
2967 for (i=0; i<N; i++){
2968 x = q[i]; # DR_MISALIGNMENT(q) = 3
2969 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2972 -- Possibility 1: we do loop versioning:
2973 if (p is aligned) {
2974 for (i=0; i<N; i++){ # loop 1A
2975 x = q[i]; # DR_MISALIGNMENT(q) = 3
2976 p[i] = y; # DR_MISALIGNMENT(p) = 0
2979 else {
2980 for (i=0; i<N; i++){ # loop 1B
2981 x = q[i]; # DR_MISALIGNMENT(q) = 3
2982 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2986 -- Possibility 2: we do loop peeling:
2987 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2988 x = q[i];
2989 p[i] = y;
2991 for (i = 3; i < N; i++){ # loop 2A
2992 x = q[i]; # DR_MISALIGNMENT(q) = 0
2993 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2996 -- Possibility 3: combination of loop peeling and versioning:
2997 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2998 x = q[i];
2999 p[i] = y;
3001 if (p is aligned) {
3002 for (i = 3; i<N; i++){ # loop 3A
3003 x = q[i]; # DR_MISALIGNMENT(q) = 0
3004 p[i] = y; # DR_MISALIGNMENT(p) = 0
3007 else {
3008 for (i = 3; i<N; i++){ # loop 3B
3009 x = q[i]; # DR_MISALIGNMENT(q) = 0
3010 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
3014 These loops are later passed to loop_transform to be vectorized. The
3015 vectorizer will use the alignment information to guide the transformation
3016 (whether to generate regular loads/stores, or with special handling for
3017 misalignment).
3022 /* Function vect_analyze_data_refs_alignment
3024 Analyze the alignment of the data-references in the loop.
3025 FOR NOW: Until support for misliagned accesses is in place, only if all
3026 accesses are aligned can the loop be vectorized. This restriction will be
3027 relaxed. */
3029 static bool
3030 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
3032 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3033 /*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/
3035 unsigned int i;
3037 if (vect_debug_details (NULL))
3038 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
3041 /* This pass may take place at function granularity instead of at loop
3042 granularity. */
3044 vect_compute_data_refs_alignment (loop_vinfo);
3047 /* This pass will use loop versioning and loop peeling in order to enhance
3048 the alignment of data references in the loop.
3049 FOR NOW: we assume that whatever versioning/peeling took place, the
3050 original loop is to be vectorized. Any other loops that were created by
3051 the transformations performed in this pass - are not supposed to be
3052 vectorized. This restriction will be relaxed. */
3054 vect_enhance_data_refs_alignment (loop_vinfo);
3057 /* Finally, check that loop can be vectorized.
3058 FOR NOW: Until support for misaligned accesses is in place, only if all
3059 accesses are aligned can the loop be vectorized. This restriction will be
3060 relaxed. */
3062 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3064 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3065 if (!aligned_access_p (dr))
3067 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3068 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3069 fprintf (dump_file, "not vectorized: unaligned store.");
3070 return false;
3074 /* The vectorizer now supports misaligned loads, so we don't fail anymore
3075 in the presence of a misaligned read dataref. For some targets however
3076 it may be preferable not to vectorize in such a case as misaligned
3077 accesses are very costly. This should be considered in the future. */
3079 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3081 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3082 if (!aligned_access_p (dr))
3084 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3085 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3086 fprintf (dump_file, "not vectorized: unaligned load.");
3087 return false;
3092 return true;
3096 /* Function vect_analyze_data_ref_access.
3098 Analyze the access pattern of the data-reference DR. For now, a data access
3099 has to consecutive and aligned to be considered vectorizable. */
3101 static bool
3102 vect_analyze_data_ref_access (struct data_reference *dr)
3104 varray_type access_fns = DR_ACCESS_FNS (dr);
3105 tree access_fn;
3106 tree init, step;
3107 unsigned int dimensions, i;
3109 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
3110 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
3111 access is contiguous). */
3112 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
3114 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
3116 access_fn = DR_ACCESS_FN (dr, i);
3118 if (evolution_part_in_loop_num (access_fn,
3119 loop_containing_stmt (DR_STMT (dr))->num))
3121 /* Evolution part is not NULL in this loop (it is neither constant nor
3122 invariant). */
3123 if (vect_debug_details (NULL))
3125 fprintf (dump_file,
3126 "not vectorized: complicated multidimensional array access.");
3127 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3129 return false;
3133 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
3134 if (!evolution_function_is_constant_p (access_fn)
3135 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
3136 access_fn, &init, &step, true))
3138 if (vect_debug_details (NULL))
3140 fprintf (dump_file, "not vectorized: too complicated access function.");
3141 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3143 return false;
3146 return true;
3150 /* Function vect_analyze_data_ref_accesses.
3152 Analyze the access pattern of all the data references in the loop.
3154 FORNOW: the only access pattern that is considered vectorizable is a
3155 simple step 1 (consecutive) access.
3157 FORNOW: handle only arrays and pointer accesses. */
3159 static bool
3160 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
3162 unsigned int i;
3163 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3164 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3166 if (vect_debug_details (NULL))
3167 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
3169 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3171 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3172 bool ok = vect_analyze_data_ref_access (dr);
3173 if (!ok)
3175 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3176 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3177 fprintf (dump_file, "not vectorized: complicated access pattern.");
3178 return false;
3182 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3184 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3185 bool ok = vect_analyze_data_ref_access (dr);
3186 if (!ok)
3188 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3189 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3190 fprintf (dump_file, "not vectorized: complicated access pattern.");
3191 return false;
3195 return true;
3199 /* Function vect_analyze_pointer_ref_access.
3201 Input:
3202 STMT - a stmt that contains a data-ref
3203 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
3205 If the data-ref access is vectorizable, return a data_reference structure
3206 that represents it (DR). Otherwise - return NULL. */
3208 static struct data_reference *
3209 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
3211 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3212 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
3213 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
3214 tree init, step;
3215 int step_val;
3216 tree reftype, innertype;
3217 enum machine_mode innermode;
3218 tree indx_access_fn;
3219 int loopnum = loop->num;
3220 struct data_reference *dr;
3222 if (!access_fn)
3224 if (vect_debug_stats (loop) || vect_debug_details (loop))
3225 fprintf (dump_file, "not vectorized: complicated pointer access.");
3226 return NULL;
3229 if (vect_debug_details (NULL))
3231 fprintf (dump_file, "Access function of ptr: ");
3232 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3235 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
3237 if (vect_debug_stats (loop) || vect_debug_details (loop))
3238 fprintf (dump_file, "not vectorized: pointer access is not simple.");
3239 return NULL;
3242 STRIP_NOPS (init);
3244 if (!host_integerp (step,0))
3246 if (vect_debug_stats (loop) || vect_debug_details (loop))
3247 fprintf (dump_file,
3248 "not vectorized: non constant step for pointer access.");
3249 return NULL;
3252 step_val = TREE_INT_CST_LOW (step);
3254 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
3255 if (TREE_CODE (reftype) != POINTER_TYPE)
3257 if (vect_debug_stats (loop) || vect_debug_details (loop))
3258 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
3259 return NULL;
3262 reftype = TREE_TYPE (init);
3263 if (TREE_CODE (reftype) != POINTER_TYPE)
3265 if (vect_debug_stats (loop) || vect_debug_details (loop))
3266 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
3267 return NULL;
3270 innertype = TREE_TYPE (reftype);
3271 innermode = TYPE_MODE (innertype);
3272 if (GET_MODE_SIZE (innermode) != step_val)
3274 /* FORNOW: support only consecutive access */
3275 if (vect_debug_stats (loop) || vect_debug_details (loop))
3276 fprintf (dump_file, "not vectorized: non consecutive access.");
3277 return NULL;
3280 indx_access_fn =
3281 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
3282 if (vect_debug_details (NULL))
3284 fprintf (dump_file, "Access function of ptr indx: ");
3285 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
3287 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
3288 return dr;
3292 /* Function vect_get_symbl_and_dr.
3294 The function returns SYMBL - the relevant variable for
3295 memory tag (for aliasing purposes).
3296 Also data reference structure DR is created.
3298 Input:
3299 MEMREF - data reference in STMT
3300 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
3302 Output:
3303 DR - data_reference struct for MEMREF
3304 return value - the relevant variable for memory tag (for aliasing purposes).
3308 static tree
3309 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
3310 loop_vec_info loop_vinfo, struct data_reference **dr)
3312 tree symbl, oprnd0, oprnd1;
3313 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3314 tree offset;
3315 tree array_base, base;
3316 struct data_reference *new_dr;
3317 bool base_aligned_p;
3319 *dr = NULL;
3320 switch (TREE_CODE (memref))
3322 case INDIRECT_REF:
3323 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
3324 if (! new_dr)
3325 return NULL_TREE;
3326 *dr = new_dr;
3327 symbl = DR_BASE_NAME (new_dr);
3328 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
3330 switch (TREE_CODE (symbl))
3332 case PLUS_EXPR:
3333 case MINUS_EXPR:
3334 oprnd0 = TREE_OPERAND (symbl, 0);
3335 oprnd1 = TREE_OPERAND (symbl, 1);
3337 STRIP_NOPS(oprnd1);
3338 /* Only {address_base + offset} expressions are supported,
3339 where address_base can be POINTER_TYPE or ARRAY_TYPE and
3340 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
3341 TODO: swap operands if {offset + address_base}. */
3342 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
3343 && TREE_CODE (oprnd1) != INTEGER_CST)
3344 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
3345 return NULL_TREE;
3347 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
3348 symbl = oprnd0;
3349 else
3350 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
3351 loop_vinfo, &new_dr);
3353 case SSA_NAME:
3354 case ADDR_EXPR:
3355 /* symbl remains unchanged. */
3356 break;
3358 default:
3359 if (vect_debug_details (NULL))
3361 fprintf (dump_file, "unhandled data ref: ");
3362 print_generic_expr (dump_file, memref, TDF_SLIM);
3363 fprintf (dump_file, " (symbl ");
3364 print_generic_expr (dump_file, symbl, TDF_SLIM);
3365 fprintf (dump_file, ") in stmt ");
3366 print_generic_expr (dump_file, stmt, TDF_SLIM);
3368 return NULL_TREE;
3370 break;
3372 case ARRAY_REF:
3373 offset = size_zero_node;
3375 /* Store the array base in the stmt info.
3376 For one dimensional array ref a[i], the base is a,
3377 for multidimensional a[i1][i2]..[iN], the base is
3378 a[i1][i2]..[iN-1]. */
3379 array_base = TREE_OPERAND (memref, 0);
3380 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
3382 new_dr = analyze_array (stmt, memref, is_read);
3383 *dr = new_dr;
3385 /* Find the relevant symbol for aliasing purposes. */
3386 base = DR_BASE_NAME (new_dr);
3387 switch (TREE_CODE (base))
3389 case VAR_DECL:
3390 symbl = base;
3391 break;
3393 case INDIRECT_REF:
3394 symbl = TREE_OPERAND (base, 0);
3395 break;
3397 case COMPONENT_REF:
3398 /* Could have recorded more accurate information -
3399 i.e, the actual FIELD_DECL that is being referenced -
3400 but later passes expect VAR_DECL as the nmt. */
3401 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
3402 loop_vinfo, &offset, &base_aligned_p);
3403 if (symbl)
3404 break;
3405 /* fall through */
3406 default:
3407 if (vect_debug_details (NULL))
3409 fprintf (dump_file, "unhandled struct/class field access ");
3410 print_generic_expr (dump_file, stmt, TDF_SLIM);
3412 return NULL_TREE;
3414 break;
3416 default:
3417 if (vect_debug_details (NULL))
3419 fprintf (dump_file, "unhandled data ref: ");
3420 print_generic_expr (dump_file, memref, TDF_SLIM);
3421 fprintf (dump_file, " in stmt ");
3422 print_generic_expr (dump_file, stmt, TDF_SLIM);
3424 return NULL_TREE;
3426 return symbl;
3430 /* Function vect_analyze_data_refs.
3432 Find all the data references in the loop.
3434 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
3435 which base is really an array (not a pointer) and which alignment
3436 can be forced. This restriction will be relaxed. */
3438 static bool
3439 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3441 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3442 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3443 int nbbs = loop->num_nodes;
3444 block_stmt_iterator si;
3445 int j;
3446 struct data_reference *dr;
3447 tree tag;
3448 tree address_base;
3450 if (vect_debug_details (NULL))
3451 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
3453 for (j = 0; j < nbbs; j++)
3455 basic_block bb = bbs[j];
3456 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3458 bool is_read = false;
3459 tree stmt = bsi_stmt (si);
3460 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3461 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
3462 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
3463 vuse_optype vuses = STMT_VUSE_OPS (stmt);
3464 varray_type *datarefs = NULL;
3465 int nvuses, nv_may_defs, nv_must_defs;
3466 tree memref = NULL;
3467 tree symbl;
3469 /* Assumption: there exists a data-ref in stmt, if and only if
3470 it has vuses/vdefs. */
3472 if (!vuses && !v_may_defs && !v_must_defs)
3473 continue;
3475 nvuses = NUM_VUSES (vuses);
3476 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
3477 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
3479 if (nvuses && (nv_may_defs || nv_must_defs))
3481 if (vect_debug_details (NULL))
3483 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
3484 print_generic_expr (dump_file, stmt, TDF_SLIM);
3486 return false;
3489 if (TREE_CODE (stmt) != MODIFY_EXPR)
3491 if (vect_debug_details (NULL))
3493 fprintf (dump_file, "unexpected vops in stmt: ");
3494 print_generic_expr (dump_file, stmt, TDF_SLIM);
3496 return false;
3499 if (vuses)
3501 memref = TREE_OPERAND (stmt, 1);
3502 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
3503 is_read = true;
3505 else /* vdefs */
3507 memref = TREE_OPERAND (stmt, 0);
3508 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
3509 is_read = false;
3512 /* Analyze MEMREF. If it is of a supported form, build data_reference
3513 struct for it (DR) and find the relevant symbol for aliasing
3514 purposes. */
3515 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo, &dr);
3516 if (!symbl)
3518 if (vect_debug_stats (loop) || vect_debug_details (loop))
3520 fprintf (dump_file, "not vectorized: unhandled data ref: ");
3521 print_generic_expr (dump_file, stmt, TDF_SLIM);
3523 return false;
3526 /* Find and record the memtag assigned to this data-ref. */
3527 switch (TREE_CODE (symbl))
3529 case VAR_DECL:
3530 STMT_VINFO_MEMTAG (stmt_info) = symbl;
3531 break;
3533 case SSA_NAME:
3534 symbl = SSA_NAME_VAR (symbl);
3535 tag = get_var_ann (symbl)->type_mem_tag;
3536 if (!tag)
3538 tree ptr = TREE_OPERAND (memref, 0);
3539 if (TREE_CODE (ptr) == SSA_NAME)
3540 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
3542 if (!tag)
3544 if (vect_debug_stats (loop) || vect_debug_details (loop))
3545 fprintf (dump_file, "not vectorized: no memtag for ref.");
3546 return false;
3548 STMT_VINFO_MEMTAG (stmt_info) = tag;
3549 break;
3551 case ADDR_EXPR:
3552 address_base = TREE_OPERAND (symbl, 0);
3554 switch (TREE_CODE (address_base))
3556 case ARRAY_REF:
3557 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0), DR_IS_READ(dr));
3558 STMT_VINFO_MEMTAG (stmt_info) = DR_BASE_NAME (dr);
3559 break;
3561 case VAR_DECL:
3562 STMT_VINFO_MEMTAG (stmt_info) = address_base;
3563 break;
3565 default:
3566 if (vect_debug_stats (loop) || vect_debug_details (loop))
3568 fprintf (dump_file, "not vectorized: unhandled address expression: ");
3569 print_generic_expr (dump_file, stmt, TDF_SLIM);
3571 return false;
3573 break;
3575 default:
3576 if (vect_debug_stats (loop) || vect_debug_details (loop))
3578 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
3579 print_generic_expr (dump_file, memref, TDF_SLIM);
3581 return false;
3584 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
3585 STMT_VINFO_DATA_REF (stmt_info) = dr;
3589 return true;
3593 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3595 /* Function vect_mark_relevant.
3597 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3599 static void
3600 vect_mark_relevant (varray_type worklist, tree stmt)
3602 stmt_vec_info stmt_info;
3604 if (vect_debug_details (NULL))
3605 fprintf (dump_file, "mark relevant.");
3607 if (TREE_CODE (stmt) == PHI_NODE)
3609 VARRAY_PUSH_TREE (worklist, stmt);
3610 return;
3613 stmt_info = vinfo_for_stmt (stmt);
3615 if (!stmt_info)
3617 if (vect_debug_details (NULL))
3619 fprintf (dump_file, "mark relevant: no stmt info!!.");
3620 print_generic_expr (dump_file, stmt, TDF_SLIM);
3622 return;
3625 if (STMT_VINFO_RELEVANT_P (stmt_info))
3627 if (vect_debug_details (NULL))
3628 fprintf (dump_file, "already marked relevant.");
3629 return;
3632 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
3633 VARRAY_PUSH_TREE (worklist, stmt);
3637 /* Function vect_stmt_relevant_p.
3639 Return true if STMT in loop that is represented by LOOP_VINFO is
3640 "relevant for vectorization".
3642 A stmt is considered "relevant for vectorization" if:
3643 - it has uses outside the loop.
3644 - it has vdefs (it alters memory).
3645 - control stmts in the loop (except for the exit condition).
3647 CHECKME: what other side effects would the vectorizer allow? */
3649 static bool
3650 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
3652 v_may_def_optype v_may_defs;
3653 v_must_def_optype v_must_defs;
3654 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3655 int i;
3656 dataflow_t df;
3657 int num_uses;
3659 /* cond stmt other than loop exit cond. */
3660 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
3661 return true;
3663 /* changing memory. */
3664 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
3665 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
3666 if (v_may_defs || v_must_defs)
3668 if (vect_debug_details (NULL))
3669 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
3670 return true;
3673 /* uses outside the loop. */
3674 df = get_immediate_uses (stmt);
3675 num_uses = num_immediate_uses (df);
3676 for (i = 0; i < num_uses; i++)
3678 tree use = immediate_use (df, i);
3679 basic_block bb = bb_for_stmt (use);
3680 if (!flow_bb_inside_loop_p (loop, bb))
3682 if (vect_debug_details (NULL))
3683 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
3684 return true;
3688 return false;
3692 /* Function vect_mark_stmts_to_be_vectorized.
3694 Not all stmts in the loop need to be vectorized. For example:
3696 for i...
3697 for j...
3698 1. T0 = i + j
3699 2. T1 = a[T0]
3701 3. j = j + 1
3703 Stmt 1 and 3 do not need to be vectorized, because loop control and
3704 addressing of vectorized data-refs are handled differently.
3706 This pass detects such stmts. */
3708 static bool
3709 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3711 varray_type worklist;
3712 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3713 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3714 unsigned int nbbs = loop->num_nodes;
3715 block_stmt_iterator si;
3716 tree stmt;
3717 stmt_ann_t ann;
3718 unsigned int i;
3719 int j;
3720 use_optype use_ops;
3721 stmt_vec_info stmt_info;
3723 if (vect_debug_details (NULL))
3724 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
3726 VARRAY_TREE_INIT (worklist, 64, "work list");
3728 /* 1. Init worklist. */
3730 for (i = 0; i < nbbs; i++)
3732 basic_block bb = bbs[i];
3733 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3735 stmt = bsi_stmt (si);
3737 if (vect_debug_details (NULL))
3739 fprintf (dump_file, "init: stmt relevant? ");
3740 print_generic_expr (dump_file, stmt, TDF_SLIM);
3743 stmt_info = vinfo_for_stmt (stmt);
3744 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
3746 if (vect_stmt_relevant_p (stmt, loop_vinfo))
3747 vect_mark_relevant (worklist, stmt);
3752 /* 2. Process_worklist */
3754 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
3756 stmt = VARRAY_TOP_TREE (worklist);
3757 VARRAY_POP (worklist);
3759 if (vect_debug_details (NULL))
3761 fprintf (dump_file, "worklist: examine stmt: ");
3762 print_generic_expr (dump_file, stmt, TDF_SLIM);
3765 /* Examine the USES in this statement. Mark all the statements which
3766 feed this statement's uses as "relevant", unless the USE is used as
3767 an array index. */
3769 if (TREE_CODE (stmt) == PHI_NODE)
3771 /* follow the def-use chain inside the loop. */
3772 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
3774 tree arg = PHI_ARG_DEF (stmt, j);
3775 tree def_stmt = NULL_TREE;
3776 basic_block bb;
3777 if (!vect_is_simple_use (arg, loop, &def_stmt))
3779 if (vect_debug_details (NULL))
3780 fprintf (dump_file, "worklist: unsupported use.");
3781 varray_clear (worklist);
3782 return false;
3784 if (!def_stmt)
3785 continue;
3787 if (vect_debug_details (NULL))
3789 fprintf (dump_file, "worklist: def_stmt: ");
3790 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3793 bb = bb_for_stmt (def_stmt);
3794 if (flow_bb_inside_loop_p (loop, bb))
3795 vect_mark_relevant (worklist, def_stmt);
3799 ann = stmt_ann (stmt);
3800 use_ops = USE_OPS (ann);
3802 for (i = 0; i < NUM_USES (use_ops); i++)
3804 tree use = USE_OP (use_ops, i);
3806 /* We are only interested in uses that need to be vectorized. Uses
3807 that are used for address computation are not considered relevant.
3809 if (exist_non_indexing_operands_for_use_p (use, stmt))
3811 tree def_stmt = NULL_TREE;
3812 basic_block bb;
3813 if (!vect_is_simple_use (use, loop, &def_stmt))
3815 if (vect_debug_details (NULL))
3816 fprintf (dump_file, "worklist: unsupported use.");
3817 varray_clear (worklist);
3818 return false;
3821 if (!def_stmt)
3822 continue;
3824 if (vect_debug_details (NULL))
3826 fprintf (dump_file, "worklist: examine use %d: ", i);
3827 print_generic_expr (dump_file, use, TDF_SLIM);
3830 bb = bb_for_stmt (def_stmt);
3831 if (flow_bb_inside_loop_p (loop, bb))
3832 vect_mark_relevant (worklist, def_stmt);
3835 } /* while worklist */
3837 varray_clear (worklist);
3838 return true;
3842 /* Function vect_get_loop_niters.
3844 Determine how many iterations the loop is executed. */
3846 static tree
3847 vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations)
3849 tree niters;
3851 if (vect_debug_details (NULL))
3852 fprintf (dump_file, "\n<<get_loop_niters>>\n");
3854 niters = number_of_iterations_in_loop (loop);
3856 if (niters != NULL_TREE
3857 && niters != chrec_dont_know
3858 && host_integerp (niters,0))
3860 *number_of_iterations = TREE_INT_CST_LOW (niters);
3862 if (vect_debug_details (NULL))
3863 fprintf (dump_file, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC,
3864 *number_of_iterations);
3867 return get_loop_exit_condition (loop);
3871 /* Function vect_analyze_loop_form.
3873 Verify the following restrictions (some may be relaxed in the future):
3874 - it's an inner-most loop
3875 - number of BBs = 2 (which are the loop header and the latch)
3876 - the loop has a pre-header
3877 - the loop has a single entry and exit
3878 - the loop exit condition is simple enough, and the number of iterations
3879 can be analyzed (a countable loop). */
3881 static loop_vec_info
3882 vect_analyze_loop_form (struct loop *loop)
3884 loop_vec_info loop_vinfo;
3885 tree loop_cond;
3886 HOST_WIDE_INT number_of_iterations = -1;
3888 if (vect_debug_details (loop))
3889 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
3891 if (loop->inner
3892 || !loop->single_exit
3893 || loop->num_nodes != 2)
3895 if (vect_debug_stats (loop) || vect_debug_details (loop))
3897 fprintf (dump_file, "not vectorized: bad loop form. ");
3898 if (loop->inner)
3899 fprintf (dump_file, "nested loop.");
3900 else if (!loop->single_exit)
3901 fprintf (dump_file, "multiple exits.");
3902 else if (loop->num_nodes != 2)
3903 fprintf (dump_file, "too many BBs in loop.");
3906 return NULL;
3909 /* We assume that the loop exit condition is at the end of the loop. i.e,
3910 that the loop is represented as a do-while (with a proper if-guard
3911 before the loop if needed), where the loop header contains all the
3912 executable statements, and the latch is empty. */
3913 if (!empty_block_p (loop->latch))
3915 if (vect_debug_stats (loop) || vect_debug_details (loop))
3916 fprintf (dump_file, "not vectorized: unexpectd loop form.");
3917 return NULL;
3920 if (empty_block_p (loop->header))
3922 if (vect_debug_stats (loop) || vect_debug_details (loop))
3923 fprintf (dump_file, "not vectorized: empty loop.");
3924 return NULL;
3927 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
3928 if (!loop_cond)
3930 if (vect_debug_stats (loop) || vect_debug_details (loop))
3931 fprintf (dump_file, "not vectorized: complicated exit condition.");
3932 return NULL;
3935 if (number_of_iterations < 0)
3937 if (vect_debug_stats (loop) || vect_debug_details (loop))
3938 fprintf (dump_file, "not vectorized: unknown loop bound.");
3939 return NULL;
3942 if (number_of_iterations == 0) /* CHECKME: can this happen? */
3944 if (vect_debug_stats (loop) || vect_debug_details (loop))
3945 fprintf (dump_file, "not vectorized: number of iterations = 0.");
3946 return NULL;
3949 loop_vinfo = new_loop_vec_info (loop);
3950 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
3951 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
3953 return loop_vinfo;
3957 /* Function vect_analyze_loop.
3959 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3960 for it. The different analyses will record information in the
3961 loop_vec_info struct. */
3963 static loop_vec_info
3964 vect_analyze_loop (struct loop *loop)
3966 bool ok;
3967 loop_vec_info loop_vinfo;
3969 if (vect_debug_details (NULL))
3970 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
3972 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3974 loop_vinfo = vect_analyze_loop_form (loop);
3975 if (!loop_vinfo)
3977 if (vect_debug_details (loop))
3978 fprintf (dump_file, "bad loop form.");
3979 return NULL;
3982 /* Find all data references in the loop (which correspond to vdefs/vuses)
3983 and analyze their evolution in the loop.
3985 FORNOW: Handle only simple, array references, which
3986 alignment can be forced, and aligned pointer-references. */
3988 ok = vect_analyze_data_refs (loop_vinfo);
3989 if (!ok)
3991 if (vect_debug_details (loop))
3992 fprintf (dump_file, "bad data references.");
3993 destroy_loop_vec_info (loop_vinfo);
3994 return NULL;
3997 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
3999 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4000 if (!ok)
4002 if (vect_debug_details (loop))
4003 fprintf (dump_file, "unexpected pattern.");
4004 if (vect_debug_details (loop))
4005 fprintf (dump_file, "not vectorized: unexpected pattern.");
4006 destroy_loop_vec_info (loop_vinfo);
4007 return NULL;
4010 /* Check that all cross-iteration scalar data-flow cycles are OK.
4011 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4013 ok = vect_analyze_scalar_cycles (loop_vinfo);
4014 if (!ok)
4016 if (vect_debug_details (loop))
4017 fprintf (dump_file, "bad scalar cycle.");
4018 destroy_loop_vec_info (loop_vinfo);
4019 return NULL;
4022 /* Analyze data dependences between the data-refs in the loop.
4023 FORNOW: fail at the first data dependence that we encounter. */
4025 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4026 if (!ok)
4028 if (vect_debug_details (loop))
4029 fprintf (dump_file, "bad data dependence.");
4030 destroy_loop_vec_info (loop_vinfo);
4031 return NULL;
4034 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4035 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4037 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4038 if (!ok)
4040 if (vect_debug_details (loop))
4041 fprintf (dump_file, "bad data access.");
4042 destroy_loop_vec_info (loop_vinfo);
4043 return NULL;
4046 /* Analyze the alignment of the data-refs in the loop.
4047 FORNOW: Only aligned accesses are handled. */
4049 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4050 if (!ok)
4052 if (vect_debug_details (loop))
4053 fprintf (dump_file, "bad data alignment.");
4054 destroy_loop_vec_info (loop_vinfo);
4055 return NULL;
4058 /* Scan all the operations in the loop and make sure they are
4059 vectorizable. */
4061 ok = vect_analyze_operations (loop_vinfo);
4062 if (!ok)
4064 if (vect_debug_details (loop))
4065 fprintf (dump_file, "bad operation or unsupported loop bound.");
4066 destroy_loop_vec_info (loop_vinfo);
4067 return NULL;
4070 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4072 return loop_vinfo;
4076 /* Function need_imm_uses_for.
4078 Return whether we ought to include information for 'var'
4079 when calculating immediate uses. For this pass we only want use
4080 information for non-virtual variables. */
4082 static bool
4083 need_imm_uses_for (tree var)
4085 return is_gimple_reg (var);
4089 /* Function vectorize_loops.
4091 Entry Point to loop vectorization phase. */
4093 void
4094 vectorize_loops (struct loops *loops)
4096 unsigned int i, loops_num;
4097 unsigned int num_vectorized_loops = 0;
4099 /* Does the target support SIMD? */
4100 /* FORNOW: until more sophisticated machine modelling is in place. */
4101 if (!UNITS_PER_SIMD_WORD)
4103 if (vect_debug_details (NULL))
4104 fprintf (dump_file, "vectorizer: target vector size is not defined.");
4105 return;
4108 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
4110 /* ----------- Analyze loops. ----------- */
4112 /* If some loop was duplicated, it gets bigger number
4113 than all previously defined loops. This fact allows us to run
4114 only over initial loops skipping newly generated ones. */
4115 loops_num = loops->num;
4116 for (i = 1; i < loops_num; i++)
4118 loop_vec_info loop_vinfo;
4119 struct loop *loop = loops->parray[i];
4121 if (!loop)
4122 continue;
4124 loop_vinfo = vect_analyze_loop (loop);
4125 loop->aux = loop_vinfo;
4127 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
4128 continue;
4130 vect_transform_loop (loop_vinfo, loops);
4131 num_vectorized_loops++;
4134 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
4135 fprintf (dump_file, "\nvectorized %u loops in function.\n",
4136 num_vectorized_loops);
4138 /* ----------- Finalize. ----------- */
4140 free_df ();
4141 for (i = 1; i < loops_num; i++)
4143 struct loop *loop = loops->parray[i];
4144 loop_vec_info loop_vinfo;
4146 if (!loop)
4147 continue;
4148 loop_vinfo = loop->aux;
4149 destroy_loop_vec_info (loop_vinfo);
4150 loop->aux = NULL;
4153 rewrite_into_ssa (false);
4154 if (bitmap_first_set_bit (vars_to_rename) >= 0)
4156 /* The rewrite of ssa names may cause violation of loop closed ssa
4157 form invariants. TODO -- avoid these rewrites completely.
4158 Information in virtual phi nodes is sufficient for it. */
4159 rewrite_into_loop_closed_ssa ();
4161 bitmap_clear (vars_to_rename);