* varasm.c (default_assemble_integer): Return false for values wider
[official-gcc.git] / gcc / tree-vectorizer.c
blob33f68ad56d4c4e06a226866a0c4f8f7becb6e42d
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "errors.h"
128 #include "ggc.h"
129 #include "tree.h"
130 #include "target.h"
132 #include "rtl.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
137 #include "timevar.h"
138 #include "cfgloop.h"
139 #include "cfglayout.h"
140 #include "expr.h"
141 #include "optabs.h"
142 #include "tree-chrec.h"
143 #include "tree-data-ref.h"
144 #include "tree-scalar-evolution.h"
145 #include "tree-vectorizer.h"
146 #include "tree-pass.h"
148 /* Main analysis functions. */
149 static loop_vec_info vect_analyze_loop (struct loop *);
150 static loop_vec_info vect_analyze_loop_form (struct loop *);
151 static bool vect_analyze_data_refs (loop_vec_info);
152 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
153 static bool vect_analyze_scalar_cycles (loop_vec_info);
154 static bool vect_analyze_data_ref_accesses (loop_vec_info);
155 static bool vect_analyze_data_refs_alignment (loop_vec_info);
156 static void vect_compute_data_refs_alignment (loop_vec_info);
157 static bool vect_analyze_operations (loop_vec_info);
159 /* Main code transformation functions. */
160 static void vect_transform_loop (loop_vec_info, struct loops *);
161 static void vect_transform_loop_bound (loop_vec_info);
162 static bool vect_transform_stmt (tree, block_stmt_iterator *);
163 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
164 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
165 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
166 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
167 static void vect_align_data_ref (tree);
168 static void vect_enhance_data_refs_alignment (loop_vec_info);
170 /* Utility functions for the analyses. */
171 static bool vect_is_simple_use (tree , struct loop *, tree *);
172 static bool exist_non_indexing_operands_for_use_p (tree, tree);
173 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
174 static void vect_mark_relevant (varray_type, tree);
175 static bool vect_stmt_relevant_p (tree, loop_vec_info);
176 static tree vect_get_loop_niters (struct loop *, HOST_WIDE_INT *);
177 static bool vect_compute_data_ref_alignment
178 (struct data_reference *, loop_vec_info);
179 static bool vect_analyze_data_ref_access (struct data_reference *);
180 static bool vect_get_first_index (tree, tree *);
181 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
182 static struct data_reference * vect_analyze_pointer_ref_access (tree, tree, bool);
183 static tree vect_get_base_and_bit_offset
184 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
185 static struct data_reference * vect_analyze_pointer_ref_access
186 (tree, tree, bool);
187 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
188 static tree vect_compute_array_ref_alignment
189 (struct data_reference *, loop_vec_info, tree, tree *);
190 static tree vect_get_ptr_offset (tree, tree, tree *);
191 static tree vect_get_symbl_and_dr
192 (tree, tree, bool, loop_vec_info, struct data_reference **);
194 /* Utility functions for the code transformation. */
195 static tree vect_create_destination_var (tree, tree);
196 static tree vect_create_data_ref (tree, block_stmt_iterator *);
197 static tree vect_create_index_for_vector_ref (struct loop *, block_stmt_iterator *);
198 static tree vect_create_addr_base_for_vector_ref (tree, tree *);
199 static tree get_vectype_for_scalar_type (tree);
200 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
201 static tree vect_get_vec_def_for_operand (tree, tree);
202 static tree vect_init_vector (tree, tree);
203 static void vect_finish_stmt_generation
204 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
206 /* Utilities for creation and deletion of vec_info structs. */
207 loop_vec_info new_loop_vec_info (struct loop *loop);
208 void destroy_loop_vec_info (loop_vec_info);
209 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
211 static bool vect_debug_stats (struct loop *loop);
212 static bool vect_debug_details (struct loop *loop);
215 /* Function new_stmt_vec_info.
217 Create and initialize a new stmt_vec_info struct for STMT. */
219 stmt_vec_info
220 new_stmt_vec_info (tree stmt, struct loop *loop)
222 stmt_vec_info res;
223 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
225 STMT_VINFO_TYPE (res) = undef_vec_info_type;
226 STMT_VINFO_STMT (res) = stmt;
227 STMT_VINFO_LOOP (res) = loop;
228 STMT_VINFO_RELEVANT_P (res) = 0;
229 STMT_VINFO_VECTYPE (res) = NULL;
230 STMT_VINFO_VEC_STMT (res) = NULL;
231 STMT_VINFO_DATA_REF (res) = NULL;
232 STMT_VINFO_MEMTAG (res) = NULL;
233 STMT_VINFO_VECT_DR_BASE (res) = NULL;
235 return res;
239 /* Function new_loop_vec_info.
241 Create and initialize a new loop_vec_info struct for LOOP, as well as
242 stmt_vec_info structs for all the stmts in LOOP. */
244 loop_vec_info
245 new_loop_vec_info (struct loop *loop)
247 loop_vec_info res;
248 basic_block *bbs;
249 block_stmt_iterator si;
250 unsigned int i;
252 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
254 bbs = get_loop_body (loop);
256 /* Create stmt_info for all stmts in the loop. */
257 for (i = 0; i < loop->num_nodes; i++)
259 basic_block bb = bbs[i];
260 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
262 tree stmt = bsi_stmt (si);
263 stmt_ann_t ann;
265 get_stmt_operands (stmt);
266 ann = stmt_ann (stmt);
267 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
271 LOOP_VINFO_LOOP (res) = loop;
272 LOOP_VINFO_BBS (res) = bbs;
273 LOOP_VINFO_EXIT_COND (res) = NULL;
274 LOOP_VINFO_NITERS (res) = -1;
275 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
276 LOOP_VINFO_VECT_FACTOR (res) = 0;
277 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
278 "loop_write_datarefs");
279 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
280 "loop_read_datarefs");
281 return res;
285 /* Function destroy_loop_vec_info.
287 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
288 stmts in the loop. */
290 void
291 destroy_loop_vec_info (loop_vec_info loop_vinfo)
293 struct loop *loop;
294 basic_block *bbs;
295 int nbbs;
296 block_stmt_iterator si;
297 int j;
299 if (!loop_vinfo)
300 return;
302 loop = LOOP_VINFO_LOOP (loop_vinfo);
304 bbs = LOOP_VINFO_BBS (loop_vinfo);
305 nbbs = loop->num_nodes;
307 for (j = 0; j < nbbs; j++)
309 basic_block bb = bbs[j];
310 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
312 tree stmt = bsi_stmt (si);
313 stmt_ann_t ann = stmt_ann (stmt);
314 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
315 free (stmt_info);
316 set_stmt_info (ann, NULL);
320 free (LOOP_VINFO_BBS (loop_vinfo));
321 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
322 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
324 free (loop_vinfo);
328 /* Function debug_loop_stats.
330 For vectorization statistics dumps. */
332 static bool
333 vect_debug_stats (struct loop *loop)
335 basic_block bb;
336 block_stmt_iterator si;
337 tree node = NULL_TREE;
339 if (!dump_file || !(dump_flags & TDF_STATS))
340 return false;
342 if (!loop)
344 fprintf (dump_file, "\n");
345 return true;
348 if (!loop->header)
349 return false;
351 bb = loop->header;
353 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
355 node = bsi_stmt (si);
356 if (node && EXPR_P (node) && EXPR_LOCUS (node))
357 break;
360 if (node && EXPR_P (node) && EXPR_LOCUS (node)
361 && EXPR_FILENAME (node) && EXPR_LINENO (node))
363 fprintf (dump_file, "\nloop at %s:%d: ",
364 EXPR_FILENAME (node), EXPR_LINENO (node));
365 return true;
368 return false;
372 /* Function debug_loop_details.
374 For vectorization debug dumps. */
376 static bool
377 vect_debug_details (struct loop *loop)
379 basic_block bb;
380 block_stmt_iterator si;
381 tree node = NULL_TREE;
383 if (!dump_file || !(dump_flags & TDF_DETAILS))
384 return false;
386 if (!loop)
388 fprintf (dump_file, "\n");
389 return true;
392 if (!loop->header)
393 return false;
395 bb = loop->header;
397 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
399 node = bsi_stmt (si);
400 if (node && EXPR_P (node) && EXPR_LOCUS (node))
401 break;
404 if (node && EXPR_P (node) && EXPR_LOCUS (node)
405 && EXPR_FILENAME (node) && EXPR_LINENO (node))
407 fprintf (dump_file, "\nloop at %s:%d: ",
408 EXPR_FILENAME (node), EXPR_LINENO (node));
409 return true;
412 return false;
416 /* Function vect_get_ptr_offset
418 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
420 static tree
421 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
422 tree vectype ATTRIBUTE_UNUSED,
423 tree *offset ATTRIBUTE_UNUSED)
425 /* TODO: Use alignment information. */
426 return NULL_TREE;
430 /* Function vect_get_base_and_bit_offset
432 Return the BASE of the data reference EXPR.
433 If VECTYPE is given, also compute the OFFSET from BASE in bits.
434 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
435 bits of 'a.b[i] + 4B' from a.
437 Input:
438 EXPR - the memory reference that is being analyzed
439 DR - the data_reference struct of the _original_ memory reference
440 (Note: DR_REF (DR) is not necessarily EXPR)
441 VECTYPE - the type that defines the alignment (i.e, we compute
442 alignment relative to TYPE_ALIGN(VECTYPE))
444 Output:
445 BASE (returned value) - the base of the data reference EXPR.
446 E.g, if EXPR is a.b[k].c[i][j] the returned
447 base is a.
448 OFFSET - offset of EXPR from BASE in bits
449 BASE_ALIGNED_P - indicates if BASE is aligned
451 If something unexpected is encountered (an unsupported form of data-ref),
452 or if VECTYPE is given but OFFSET cannot be determined:
453 then NULL_TREE is returned. */
455 static tree
456 vect_get_base_and_bit_offset (struct data_reference *dr,
457 tree expr,
458 tree vectype,
459 loop_vec_info loop_vinfo,
460 tree *offset,
461 bool *base_aligned_p)
463 tree this_offset = size_zero_node;
464 tree base = NULL_TREE;
465 tree next_ref;
466 tree oprnd0, oprnd1;
467 struct data_reference *array_dr;
468 enum tree_code code = TREE_CODE (expr);
470 *base_aligned_p = false;
472 switch (code)
474 /* These cases end the recursion: */
475 case VAR_DECL:
476 *offset = size_zero_node;
477 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
478 *base_aligned_p = true;
479 return expr;
481 case SSA_NAME:
482 if (!vectype)
483 return expr;
485 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
486 return NULL_TREE;
488 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
490 base = vect_get_ptr_offset (expr, vectype, offset);
491 if (base)
492 *base_aligned_p = true;
494 else
496 *base_aligned_p = true;
497 *offset = size_zero_node;
498 base = expr;
500 return base;
502 case INTEGER_CST:
503 *offset = int_const_binop (MULT_EXPR, expr,
504 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
505 return expr;
507 /* These cases continue the recursion: */
508 case COMPONENT_REF:
509 oprnd0 = TREE_OPERAND (expr, 0);
510 oprnd1 = TREE_OPERAND (expr, 1);
512 this_offset = bit_position (oprnd1);
513 if (vectype && !host_integerp (this_offset, 1))
514 return NULL_TREE;
515 next_ref = oprnd0;
516 break;
518 case ADDR_EXPR:
519 oprnd0 = TREE_OPERAND (expr, 0);
520 next_ref = oprnd0;
521 break;
523 case INDIRECT_REF:
524 oprnd0 = TREE_OPERAND (expr, 0);
525 next_ref = oprnd0;
526 break;
528 case ARRAY_REF:
529 if (DR_REF (dr) != expr)
530 /* Build array data_reference struct if the existing DR_REF
531 doesn't match EXPR. This happens, for example, when the
532 EXPR is *T and T is initialized to &arr[indx]. The DR struct
533 contains information on the access of T, not of arr. In order
534 to continue the analysis, we create a new DR struct that
535 describes the access of arr.
537 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
538 else
539 array_dr = dr;
541 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
542 vectype, &this_offset);
543 if (!next_ref)
544 return NULL_TREE;
546 if (vectype &&
547 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
549 *offset = this_offset;
550 *base_aligned_p = true;
551 return next_ref;
553 break;
555 case PLUS_EXPR:
556 case MINUS_EXPR:
557 /* In case we have a PLUS_EXPR of the form
558 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
559 This is verified in vect_get_symbl_and_dr. */
560 oprnd0 = TREE_OPERAND (expr, 0);
561 oprnd1 = TREE_OPERAND (expr, 1);
563 base = vect_get_base_and_bit_offset
564 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
565 if (vectype && !base)
566 return NULL_TREE;
568 next_ref = oprnd0;
569 break;
571 default:
572 return NULL_TREE;
575 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
576 loop_vinfo, offset, base_aligned_p);
578 if (vectype && base)
580 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
581 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
582 return NULL_TREE;
584 if (vect_debug_details (NULL))
586 print_generic_expr (dump_file, expr, TDF_SLIM);
587 fprintf (dump_file, " --> total offset for ref: ");
588 print_generic_expr (dump_file, *offset, TDF_SLIM);
591 return base;
596 /* Function vect_force_dr_alignment_p.
598 Returns whether the alignment of a DECL can be forced to be aligned
599 on ALIGNMENT bit boundary. */
601 static bool
602 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
604 if (TREE_CODE (decl) != VAR_DECL)
605 return false;
607 if (DECL_EXTERNAL (decl))
608 return false;
610 if (TREE_STATIC (decl))
611 return (alignment <= MAX_OFILE_ALIGNMENT);
612 else
613 /* This is not 100% correct. The absolute correct stack alignment
614 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
615 PREFERRED_STACK_BOUNDARY is honored by all translation units.
616 However, until someone implements forced stack alignment, SSE
617 isn't really usable without this. */
618 return (alignment <= PREFERRED_STACK_BOUNDARY);
622 /* Function vect_get_new_vect_var.
624 Returns a name for a new variable. The current naming scheme appends the
625 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
626 the name of vectorizer generated variables, and appends that to NAME if
627 provided. */
629 static tree
630 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
632 const char *prefix;
633 int prefix_len;
634 tree new_vect_var;
636 if (var_kind == vect_simple_var)
637 prefix = "vect_";
638 else
639 prefix = "vect_p";
641 prefix_len = strlen (prefix);
643 if (name)
644 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
645 else
646 new_vect_var = create_tmp_var (type, prefix);
648 return new_vect_var;
652 /* Function vect_create_index_for_vector_ref.
654 Create (and return) an index variable, along with it's update chain in the
655 loop. This variable will be used to access a memory location in a vector
656 operation.
658 Input:
659 LOOP: The loop being vectorized.
660 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
661 function can be added here, or in the loop pre-header.
663 Output:
664 Return an index that will be used to index a vector array. It is expected
665 that a pointer to the first vector will be used as the base address for the
666 indexed reference.
668 FORNOW: we are not trying to be efficient, just creating a new index each
669 time from scratch. At this time all vector references could use the same
670 index.
672 TODO: create only one index to be used by all vector references. Record
673 the index in the LOOP_VINFO the first time this procedure is called and
674 return it on subsequent calls. The increment of this index must be placed
675 just before the conditional expression that ends the single block loop. */
677 static tree
678 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
680 tree init, step;
681 tree indx_before_incr, indx_after_incr;
683 /* It is assumed that the base pointer used for vectorized access contains
684 the address of the first vector. Therefore the index used for vectorized
685 access must be initialized to zero and incremented by 1. */
687 init = integer_zero_node;
688 step = integer_one_node;
690 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
691 create_iv (init, step, NULL_TREE, loop, bsi, false,
692 &indx_before_incr, &indx_after_incr);
694 return indx_before_incr;
698 /* Function vect_create_addr_base_for_vector_ref.
700 Create an expression that computes the address of the first memory location
701 that will be accessed for a data reference.
703 Input:
704 STMT: The statement containing the data reference.
705 NEW_STMT_LIST: Must be initialized to NULL_TREE or a
706 statement list.
708 Output:
709 1. Return an SSA_NAME whose value is the address of the memory location of the
710 first vector of the data reference.
711 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
712 these statement(s) which define the returned SSA_NAME.
714 FORNOW: We are only handling array accesses with step 1. */
716 static tree
717 vect_create_addr_base_for_vector_ref (tree stmt,
718 tree *new_stmt_list)
720 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
721 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
722 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
723 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
724 tree base_name = unshare_expr (DR_BASE_NAME (dr));
725 tree ref = DR_REF (dr);
726 tree data_ref_base_type = TREE_TYPE (data_ref_base);
727 tree scalar_type = TREE_TYPE (ref);
728 tree scalar_ptr_type = build_pointer_type (scalar_type);
729 tree access_fn;
730 tree init_val, step, init_oval;
731 bool ok;
732 bool is_ptr_ref, is_array_ref, is_addr_expr;
733 tree array_base;
734 tree vec_stmt;
735 tree new_temp;
736 tree array_ref;
737 tree addr_base, addr_expr;
738 tree dest, new_stmt;
740 /* Only the access function of the last index is relevant (i_n in
741 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
742 access_fn = DR_ACCESS_FN (dr, 0);
743 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step, true);
744 if (!ok)
745 init_oval = integer_zero_node;
747 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
748 && TREE_CODE (data_ref_base) == SSA_NAME;
749 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE
750 && (TREE_CODE (data_ref_base) == VAR_DECL
751 || TREE_CODE (data_ref_base) == COMPONENT_REF
752 || TREE_CODE (data_ref_base) == ARRAY_REF);
753 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
754 || TREE_CODE (data_ref_base) == PLUS_EXPR
755 || TREE_CODE (data_ref_base) == MINUS_EXPR;
756 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
758 /** Create: &(base[init_val])
760 if data_ref_base is an ARRAY_TYPE:
761 base = data_ref_base
763 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
764 base = *((scalar_array *) data_ref_base)
767 if (is_array_ref)
768 array_base = data_ref_base;
769 else /* is_ptr_ref or is_addr_expr */
771 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
772 tree scalar_array_type = build_array_type (scalar_type, 0);
773 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
774 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
775 add_referenced_tmp_var (array_ptr);
777 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
778 add_referenced_tmp_var (dest);
779 data_ref_base = force_gimple_operand (data_ref_base, &new_stmt, false, dest);
780 append_to_statement_list_force (new_stmt, new_stmt_list);
782 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
783 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
784 new_temp = make_ssa_name (array_ptr, vec_stmt);
785 TREE_OPERAND (vec_stmt, 0) = new_temp;
786 append_to_statement_list_force (vec_stmt, new_stmt_list);
788 /* (*array_ptr) */
789 array_base = build_fold_indirect_ref (new_temp);
792 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
793 add_referenced_tmp_var (dest);
794 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
795 append_to_statement_list_force (new_stmt, new_stmt_list);
797 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
798 NULL_TREE, NULL_TREE);
799 addr_base = build_fold_addr_expr (array_ref);
801 /* addr_expr = addr_base */
802 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
803 get_name (base_name));
804 add_referenced_tmp_var (addr_expr);
805 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
806 new_temp = make_ssa_name (addr_expr, vec_stmt);
807 TREE_OPERAND (vec_stmt, 0) = new_temp;
808 append_to_statement_list_force (vec_stmt, new_stmt_list);
809 return new_temp;
813 /* Function get_vectype_for_scalar_type.
815 Returns the vector type corresponding to SCALAR_TYPE as supported
816 by the target. */
818 static tree
819 get_vectype_for_scalar_type (tree scalar_type)
821 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
822 int nbytes = GET_MODE_SIZE (inner_mode);
823 int nunits;
824 tree vectype;
826 if (nbytes == 0)
827 return NULL_TREE;
829 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
830 is expected. */
831 nunits = UNITS_PER_SIMD_WORD / nbytes;
833 vectype = build_vector_type (scalar_type, nunits);
834 if (TYPE_MODE (vectype) == BLKmode)
835 return NULL_TREE;
836 return vectype;
840 /* Function vect_align_data_ref.
842 Handle mislignment of a memory accesses.
844 FORNOW: Can't handle misaligned accesses.
845 Make sure that the dataref is aligned. */
847 static void
848 vect_align_data_ref (tree stmt)
850 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
851 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
853 /* FORNOW: can't handle misaligned accesses;
854 all accesses expected to be aligned. */
855 gcc_assert (aligned_access_p (dr));
859 /* Function vect_create_data_ref.
861 Create a memory reference expression for vector access, to be used in a
862 vector load/store stmt.
864 Input:
865 STMT: a stmt that references memory. expected to be of the form
866 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
867 BSI: block_stmt_iterator where new stmts can be added.
869 Output:
870 1. Declare a new ptr to vector_type, and have it point to the array base.
871 For example, for vector of type V8HI:
872 v8hi *p0;
873 p0 = (v8hi *)&a;
874 2. Create a data-reference based on the new vector pointer p0, and using
875 a new index variable 'idx'. Return the expression '(*p0)[idx]'.
877 FORNOW: handle only aligned and consecutive accesses. */
879 static tree
880 vect_create_data_ref (tree stmt, block_stmt_iterator *bsi)
882 tree base_name, data_ref_base, data_ref_base_type;
883 tree array_type;
884 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
885 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
886 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
887 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
888 tree vect_ptr_type;
889 tree vect_ptr;
890 tree tag;
891 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
892 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
893 vuse_optype vuses = STMT_VUSE_OPS (stmt);
894 int nvuses, nv_may_defs, nv_must_defs;
895 int i;
896 tree new_temp;
897 tree vec_stmt;
898 tree new_stmt_list = NULL_TREE;
899 tree idx;
900 tree new_base;
901 tree data_ref;
902 edge pe;
903 basic_block new_bb;
905 /* FORNOW: make sure the data reference is aligned. */
906 vect_align_data_ref (stmt);
908 base_name = unshare_expr (DR_BASE_NAME (dr));
909 data_ref_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
910 data_ref_base_type = TREE_TYPE (data_ref_base);
912 array_type = build_array_type (vectype, 0);
913 TYPE_ALIGN (array_type) = TYPE_ALIGN (data_ref_base_type);
914 vect_ptr_type = build_pointer_type (array_type);
916 if (vect_debug_details (NULL))
918 fprintf (dump_file, "create array_ref of type: ");
919 print_generic_expr (dump_file, vectype, TDF_SLIM);
922 /* Create: vectype *p; */
923 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
924 get_name (base_name));
925 add_referenced_tmp_var (vect_ptr);
927 if (vect_debug_details (NULL))
929 if (TREE_CODE (data_ref_base) == VAR_DECL)
930 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
931 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
932 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
933 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
934 fprintf (dump_file, "vectorizing a record based array ref: ");
935 else if (TREE_CODE (data_ref_base) == SSA_NAME)
936 fprintf (dump_file, "vectorizing a pointer ref: ");
937 else if (TREE_CODE (data_ref_base) == ADDR_EXPR
938 || TREE_CODE (data_ref_base) == PLUS_EXPR
939 || TREE_CODE (data_ref_base) == MINUS_EXPR)
940 fprintf (dump_file, "vectorizing an address expr: ");
941 print_generic_expr (dump_file, base_name, TDF_SLIM);
944 /* Handle aliasing: */
945 tag = STMT_VINFO_MEMTAG (stmt_info);
946 gcc_assert (tag);
947 get_var_ann (vect_ptr)->type_mem_tag = tag;
949 /* Mark for renaming all aliased variables
950 (i.e, the may-aliases of the type-mem-tag). */
951 nvuses = NUM_VUSES (vuses);
952 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
953 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
954 for (i = 0; i < nvuses; i++)
956 tree use = VUSE_OP (vuses, i);
957 if (TREE_CODE (use) == SSA_NAME)
958 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
960 for (i = 0; i < nv_may_defs; i++)
962 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
963 if (TREE_CODE (def) == SSA_NAME)
964 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
966 for (i = 0; i < nv_must_defs; i++)
968 tree def = V_MUST_DEF_OP (v_must_defs, i);
969 if (TREE_CODE (def) == SSA_NAME)
970 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
973 pe = loop_preheader_edge (loop);
975 /* Create: (&(base[init_val]) */
976 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list);
978 pe = loop_preheader_edge (loop);
979 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
980 gcc_assert (!new_bb);
982 /* p = (vectype_array *) addr_base */
983 vec_stmt = fold_convert (vect_ptr_type, new_temp);
984 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
985 new_temp = make_ssa_name (vect_ptr, vec_stmt);
986 TREE_OPERAND (vec_stmt, 0) = new_temp;
987 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
988 gcc_assert (!new_bb);
990 /*** create data ref: '(*p)[idx]' ***/
991 idx = vect_create_index_for_vector_ref (loop, bsi);
992 new_base = build_fold_indirect_ref (new_temp);
993 data_ref = build4 (ARRAY_REF, vectype, new_base, idx, NULL_TREE, NULL_TREE);
995 if (vect_debug_details (NULL))
997 fprintf (dump_file, "created new data-ref: ");
998 print_generic_expr (dump_file, data_ref, TDF_SLIM);
1001 return data_ref;
1005 /* Function vect_create_destination_var.
1007 Create a new temporary of type VECTYPE. */
1009 static tree
1010 vect_create_destination_var (tree scalar_dest, tree vectype)
1012 tree vec_dest;
1013 const char *new_name;
1015 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1017 new_name = get_name (scalar_dest);
1018 if (!new_name)
1019 new_name = "var_";
1020 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1021 add_referenced_tmp_var (vec_dest);
1023 return vec_dest;
1027 /* Function vect_init_vector.
1029 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1030 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1031 used in the vectorization of STMT. */
1033 static tree
1034 vect_init_vector (tree stmt, tree vector_var)
1036 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1037 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1038 tree new_var;
1039 tree init_stmt;
1040 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1041 tree vec_oprnd;
1042 edge pe;
1043 tree new_temp;
1044 basic_block new_bb;
1046 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
1047 add_referenced_tmp_var (new_var);
1049 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
1050 new_temp = make_ssa_name (new_var, init_stmt);
1051 TREE_OPERAND (init_stmt, 0) = new_temp;
1053 pe = loop_preheader_edge (loop);
1054 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1055 gcc_assert (!new_bb);
1057 if (vect_debug_details (NULL))
1059 fprintf (dump_file, "created new init_stmt: ");
1060 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
1063 vec_oprnd = TREE_OPERAND (init_stmt, 0);
1064 return vec_oprnd;
1068 /* Function vect_get_vec_def_for_operand.
1070 OP is an operand in STMT. This function returns a (vector) def that will be
1071 used in the vectorized stmt for STMT.
1073 In the case that OP is an SSA_NAME which is defined in the loop, then
1074 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1076 In case OP is an invariant or constant, a new stmt that creates a vector def
1077 needs to be introduced. */
1079 static tree
1080 vect_get_vec_def_for_operand (tree op, tree stmt)
1082 tree vec_oprnd;
1083 tree vec_stmt;
1084 tree def_stmt;
1085 stmt_vec_info def_stmt_info = NULL;
1086 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1087 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1088 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1089 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1090 basic_block bb;
1091 tree vec_inv;
1092 tree t = NULL_TREE;
1093 tree def;
1094 int i;
1096 if (vect_debug_details (NULL))
1098 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
1099 print_generic_expr (dump_file, op, TDF_SLIM);
1102 /** ===> Case 1: operand is a constant. **/
1104 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
1106 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1108 tree vec_cst;
1109 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1110 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1111 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1112 tree t = NULL_TREE;
1113 int i;
1115 /* Build a tree with vector elements. */
1116 if (vect_debug_details (NULL))
1117 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
1119 for (i = nunits - 1; i >= 0; --i)
1121 t = tree_cons (NULL_TREE, op, t);
1123 vec_cst = build_vector (vectype, t);
1124 return vect_init_vector (stmt, vec_cst);
1127 gcc_assert (TREE_CODE (op) == SSA_NAME);
1129 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
1131 def_stmt = SSA_NAME_DEF_STMT (op);
1132 def_stmt_info = vinfo_for_stmt (def_stmt);
1134 if (vect_debug_details (NULL))
1136 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
1137 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1141 /** ==> Case 2.1: operand is defined inside the loop. **/
1143 if (def_stmt_info)
1145 /* Get the def from the vectorized stmt. */
1147 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1148 gcc_assert (vec_stmt);
1149 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
1150 return vec_oprnd;
1154 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
1155 it is a reduction/induction. **/
1157 bb = bb_for_stmt (def_stmt);
1158 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1160 if (vect_debug_details (NULL))
1161 fprintf (dump_file, "reduction/induction - unsupported.");
1162 internal_error ("no support for reduction/induction"); /* FORNOW */
1166 /** ==> Case 2.3: operand is defined outside the loop -
1167 it is a loop invariant. */
1169 switch (TREE_CODE (def_stmt))
1171 case PHI_NODE:
1172 def = PHI_RESULT (def_stmt);
1173 break;
1174 case MODIFY_EXPR:
1175 def = TREE_OPERAND (def_stmt, 0);
1176 break;
1177 case NOP_EXPR:
1178 def = TREE_OPERAND (def_stmt, 0);
1179 gcc_assert (IS_EMPTY_STMT (def_stmt));
1180 def = op;
1181 break;
1182 default:
1183 if (vect_debug_details (NULL))
1185 fprintf (dump_file, "unsupported defining stmt: ");
1186 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1188 internal_error ("unsupported defining stmt");
1191 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
1193 if (vect_debug_details (NULL))
1194 fprintf (dump_file, "Create vector_inv.");
1196 for (i = nunits - 1; i >= 0; --i)
1198 t = tree_cons (NULL_TREE, def, t);
1201 vec_inv = build_constructor (vectype, t);
1202 return vect_init_vector (stmt, vec_inv);
1206 /* Function vect_finish_stmt_generation.
1208 Insert a new stmt. */
1210 static void
1211 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
1213 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1215 if (vect_debug_details (NULL))
1217 fprintf (dump_file, "add new stmt: ");
1218 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
1221 /* Make sure bsi points to the stmt that is being vectorized. */
1223 /* Assumption: any stmts created for the vectorization of smtmt S are
1224 inserted before S. BSI may point to S or some new stmt before it. */
1226 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
1227 bsi_next (bsi);
1228 gcc_assert (stmt == bsi_stmt (*bsi));
1232 /* Function vectorizable_assignment.
1234 Check if STMT performs an assignment (copy) that can be vectorized.
1235 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1236 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1237 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1239 static bool
1240 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1242 tree vec_dest;
1243 tree scalar_dest;
1244 tree op;
1245 tree vec_oprnd;
1246 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1247 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1248 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1249 tree new_temp;
1251 /* Is vectorizable assignment? */
1253 if (TREE_CODE (stmt) != MODIFY_EXPR)
1254 return false;
1256 scalar_dest = TREE_OPERAND (stmt, 0);
1257 if (TREE_CODE (scalar_dest) != SSA_NAME)
1258 return false;
1260 op = TREE_OPERAND (stmt, 1);
1261 if (!vect_is_simple_use (op, loop, NULL))
1263 if (vect_debug_details (NULL))
1264 fprintf (dump_file, "use not simple.");
1265 return false;
1268 if (!vec_stmt) /* transformation not required. */
1270 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1271 return true;
1274 /** Trasform. **/
1275 if (vect_debug_details (NULL))
1276 fprintf (dump_file, "transform assignment.");
1278 /* Handle def. */
1279 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1281 /* Handle use. */
1282 op = TREE_OPERAND (stmt, 1);
1283 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
1285 /* Arguments are ready. create the new vector stmt. */
1286 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1287 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1288 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1289 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1291 return true;
1295 /* Function vectorizable_operation.
1297 Check if STMT performs a binary or unary operation that can be vectorized.
1298 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1299 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1300 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1302 static bool
1303 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1305 tree vec_dest;
1306 tree scalar_dest;
1307 tree operation;
1308 tree op0, op1 = NULL;
1309 tree vec_oprnd0, vec_oprnd1=NULL;
1310 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1311 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1312 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1313 int i;
1314 enum tree_code code;
1315 enum machine_mode vec_mode;
1316 tree new_temp;
1317 int op_type;
1318 tree op;
1319 optab optab;
1321 /* Is STMT a vectorizable binary/unary operation? */
1322 if (TREE_CODE (stmt) != MODIFY_EXPR)
1323 return false;
1325 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1326 return false;
1328 operation = TREE_OPERAND (stmt, 1);
1329 code = TREE_CODE (operation);
1330 optab = optab_for_tree_code (code, vectype);
1332 /* Support only unary or binary operations. */
1333 op_type = TREE_CODE_LENGTH (code);
1334 if (op_type != unary_op && op_type != binary_op)
1336 if (vect_debug_details (NULL))
1337 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
1338 return false;
1341 for (i = 0; i < op_type; i++)
1343 op = TREE_OPERAND (operation, i);
1344 if (!vect_is_simple_use (op, loop, NULL))
1346 if (vect_debug_details (NULL))
1347 fprintf (dump_file, "use not simple.");
1348 return false;
1352 /* Supportable by target? */
1353 if (!optab)
1355 if (vect_debug_details (NULL))
1356 fprintf (dump_file, "no optab.");
1357 return false;
1359 vec_mode = TYPE_MODE (vectype);
1360 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1362 if (vect_debug_details (NULL))
1363 fprintf (dump_file, "op not supported by target.");
1364 return false;
1367 if (!vec_stmt) /* transformation not required. */
1369 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1370 return true;
1373 /** Trasform. **/
1375 if (vect_debug_details (NULL))
1376 fprintf (dump_file, "transform binary/unary operation.");
1378 /* Handle def. */
1379 scalar_dest = TREE_OPERAND (stmt, 0);
1380 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1382 /* Handle uses. */
1383 op0 = TREE_OPERAND (operation, 0);
1384 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
1386 if (op_type == binary_op)
1388 op1 = TREE_OPERAND (operation, 1);
1389 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
1392 /* Arguments are ready. create the new vector stmt. */
1394 if (op_type == binary_op)
1395 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1396 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1397 else
1398 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1399 build1 (code, vectype, vec_oprnd0));
1400 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1401 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1402 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1404 return true;
1408 /* Function vectorizable_store.
1410 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1411 can be vectorized.
1412 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1413 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1414 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1416 static bool
1417 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1419 tree scalar_dest;
1420 tree data_ref;
1421 tree op;
1422 tree vec_oprnd1;
1423 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1424 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1425 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1426 enum machine_mode vec_mode;
1428 /* Is vectorizable store? */
1430 if (TREE_CODE (stmt) != MODIFY_EXPR)
1431 return false;
1433 scalar_dest = TREE_OPERAND (stmt, 0);
1434 if (TREE_CODE (scalar_dest) != ARRAY_REF
1435 && TREE_CODE (scalar_dest) != INDIRECT_REF)
1436 return false;
1438 op = TREE_OPERAND (stmt, 1);
1439 if (!vect_is_simple_use (op, loop, NULL))
1441 if (vect_debug_details (NULL))
1442 fprintf (dump_file, "use not simple.");
1443 return false;
1446 vec_mode = TYPE_MODE (vectype);
1447 /* FORNOW. In some cases can vectorize even if data-type not supported
1448 (e.g. - array initialization with 0). */
1449 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1450 return false;
1452 if (!STMT_VINFO_DATA_REF (stmt_info))
1453 return false;
1455 if (!vec_stmt) /* transformation not required. */
1457 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1458 return true;
1461 /** Trasform. **/
1463 if (vect_debug_details (NULL))
1464 fprintf (dump_file, "transform store");
1466 /* Handle use - get the vectorized def from the defining stmt. */
1467 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
1469 /* Handle def. */
1470 data_ref = vect_create_data_ref (stmt, bsi);
1472 /* Arguments are ready. create the new vector stmt. */
1473 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1474 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1476 return true;
1480 /* vectorizable_load.
1482 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1483 can be vectorized.
1484 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1485 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1486 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1488 static bool
1489 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1491 tree scalar_dest;
1492 tree vec_dest = NULL;
1493 tree data_ref = NULL;
1494 tree op;
1495 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1496 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1497 tree new_temp;
1498 enum machine_mode vec_mode;
1500 /* Is vectorizable load? */
1502 if (TREE_CODE (stmt) != MODIFY_EXPR)
1503 return false;
1505 scalar_dest = TREE_OPERAND (stmt, 0);
1506 if (TREE_CODE (scalar_dest) != SSA_NAME)
1507 return false;
1509 op = TREE_OPERAND (stmt, 1);
1510 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1511 return false;
1513 if (!STMT_VINFO_DATA_REF (stmt_info))
1514 return false;
1516 vec_mode = TYPE_MODE (vectype);
1517 /* FORNOW. In some cases can vectorize even if data-type not supported
1518 (e.g. - data copies). */
1519 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1520 return false;
1522 if (!vec_stmt) /* transformation not required. */
1524 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1525 return true;
1528 /** Trasform. **/
1530 if (vect_debug_details (NULL))
1531 fprintf (dump_file, "transform load.");
1533 /* Handle def. */
1534 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1536 /* Handle use. */
1537 op = TREE_OPERAND (stmt, 1);
1538 data_ref = vect_create_data_ref (stmt, bsi);
1540 /* Arguments are ready. create the new vector stmt. */
1541 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1542 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1543 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1544 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1546 return true;
1550 /* Function vect_transform_stmt.
1552 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1554 static bool
1555 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
1557 bool is_store = false;
1558 tree vec_stmt = NULL_TREE;
1559 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1560 bool done;
1562 switch (STMT_VINFO_TYPE (stmt_info))
1564 case op_vec_info_type:
1565 done = vectorizable_operation (stmt, bsi, &vec_stmt);
1566 gcc_assert (done);
1567 break;
1569 case assignment_vec_info_type:
1570 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
1571 gcc_assert (done);
1572 break;
1574 case load_vec_info_type:
1575 done = vectorizable_load (stmt, bsi, &vec_stmt);
1576 gcc_assert (done);
1577 break;
1579 case store_vec_info_type:
1580 done = vectorizable_store (stmt, bsi, &vec_stmt);
1581 gcc_assert (done);
1582 is_store = true;
1583 break;
1584 default:
1585 if (vect_debug_details (NULL))
1586 fprintf (dump_file, "stmt not supported.");
1587 gcc_unreachable ();
1590 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1592 return is_store;
1596 /* Function vect_transform_loop_bound.
1598 Create a new exit condition for the loop. */
1600 static void
1601 vect_transform_loop_bound (loop_vec_info loop_vinfo)
1603 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1604 edge exit_edge = loop->single_exit;
1605 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
1606 tree indx_before_incr, indx_after_incr;
1607 tree orig_cond_expr;
1608 HOST_WIDE_INT old_N = 0;
1609 int vf;
1610 tree cond_stmt;
1611 tree new_loop_bound;
1612 tree cond;
1613 tree lb_type;
1615 gcc_assert (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1616 old_N = LOOP_VINFO_NITERS (loop_vinfo);
1617 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1619 /* FORNOW:
1620 assuming number-of-iterations divides by the vectorization factor. */
1621 gcc_assert (!(old_N % vf));
1623 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
1624 gcc_assert (orig_cond_expr);
1625 gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
1627 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
1628 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
1630 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
1631 to point to the exit condition. */
1632 bsi_next (&loop_exit_bsi);
1633 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
1635 /* new loop exit test: */
1636 lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
1637 new_loop_bound = build_int_cst (lb_type, old_N/vf);
1639 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
1640 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1641 else /* 'then' edge loops back. */
1642 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1644 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
1645 TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
1647 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
1649 /* remove old loop exit test: */
1650 bsi_remove (&loop_exit_bsi);
1652 if (vect_debug_details (NULL))
1653 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
1657 /* Function vect_transform_loop.
1659 The analysis phase has determined that the loop is vectorizable.
1660 Vectorize the loop - created vectorized stmts to replace the scalar
1661 stmts in the loop, and update the loop exit condition. */
1663 static void
1664 vect_transform_loop (loop_vec_info loop_vinfo,
1665 struct loops *loops ATTRIBUTE_UNUSED)
1667 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1668 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1669 int nbbs = loop->num_nodes;
1670 block_stmt_iterator si;
1671 int i;
1672 #ifdef ENABLE_CHECKING
1673 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1674 #endif
1676 if (vect_debug_details (NULL))
1677 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
1679 /* 1) Make sure the loop header has exactly two entries
1680 2) Make sure we have a preheader basic block. */
1682 gcc_assert (loop->header->pred->pred_next);
1683 gcc_assert (!loop->header->pred->pred_next->pred_next);
1685 loop_split_edge_with (loop_preheader_edge (loop), NULL);
1688 /* FORNOW: the vectorizer supports only loops which body consist
1689 of one basic block (header + empty latch). When the vectorizer will
1690 support more involved loop forms, the order by which the BBs are
1691 traversed need to be reconsidered. */
1693 for (i = 0; i < nbbs; i++)
1695 basic_block bb = bbs[i];
1697 for (si = bsi_start (bb); !bsi_end_p (si);)
1699 tree stmt = bsi_stmt (si);
1700 stmt_vec_info stmt_info;
1701 bool is_store;
1702 #ifdef ENABLE_CHECKING
1703 tree vectype;
1704 #endif
1706 if (vect_debug_details (NULL))
1708 fprintf (dump_file, "------>vectorizing statement: ");
1709 print_generic_expr (dump_file, stmt, TDF_SLIM);
1711 stmt_info = vinfo_for_stmt (stmt);
1712 gcc_assert (stmt_info);
1713 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1715 bsi_next (&si);
1716 continue;
1718 #ifdef ENABLE_CHECKING
1719 /* FORNOW: Verify that all stmts operate on the same number of
1720 units and no inner unrolling is necessary. */
1721 vectype = STMT_VINFO_VECTYPE (stmt_info);
1722 gcc_assert (GET_MODE_NUNITS (TYPE_MODE (vectype))
1723 == vectorization_factor);
1724 #endif
1725 /* -------- vectorize statement ------------ */
1726 if (vect_debug_details (NULL))
1727 fprintf (dump_file, "transform statement.");
1729 is_store = vect_transform_stmt (stmt, &si);
1730 if (is_store)
1732 /* free the attached stmt_vec_info and remove the stmt. */
1733 stmt_ann_t ann = stmt_ann (stmt);
1734 free (stmt_info);
1735 set_stmt_info (ann, NULL);
1736 bsi_remove (&si);
1737 continue;
1740 bsi_next (&si);
1741 } /* stmts in BB */
1742 } /* BBs in loop */
1744 vect_transform_loop_bound (loop_vinfo);
1746 if (vect_debug_details (loop))
1747 fprintf (dump_file,"Success! loop vectorized.");
1748 if (vect_debug_stats (loop))
1749 fprintf (dump_file, "LOOP VECTORIZED.");
1753 /* Function vect_is_simple_use.
1755 Input:
1756 LOOP - the loop that is being vectorized.
1757 OPERAND - operand of a stmt in LOOP.
1758 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1760 Returns whether a stmt with OPERAND can be vectorized.
1761 Supportable operands are constants, loop invariants, and operands that are
1762 defined by the current iteration of the loop. Unsupportable operands are
1763 those that are defined by a previous iteration of the loop (as is the case
1764 in reduction/induction computations). */
1766 static bool
1767 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
1769 tree def_stmt;
1770 basic_block bb;
1772 if (def)
1773 *def = NULL_TREE;
1775 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1776 return true;
1778 if (TREE_CODE (operand) != SSA_NAME)
1779 return false;
1781 def_stmt = SSA_NAME_DEF_STMT (operand);
1782 if (def_stmt == NULL_TREE )
1784 if (vect_debug_details (NULL))
1785 fprintf (dump_file, "no def_stmt.");
1786 return false;
1789 /* empty stmt is expected only in case of a function argument.
1790 (Otherwise - we expect a phi_node or a modify_expr). */
1791 if (IS_EMPTY_STMT (def_stmt))
1793 tree arg = TREE_OPERAND (def_stmt, 0);
1794 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1795 return true;
1796 if (vect_debug_details (NULL))
1798 fprintf (dump_file, "Unexpected empty stmt: ");
1799 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1801 return false;
1804 /* phi_node inside the loop indicates an induction/reduction pattern.
1805 This is not supported yet. */
1806 bb = bb_for_stmt (def_stmt);
1807 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1809 if (vect_debug_details (NULL))
1810 fprintf (dump_file, "reduction/induction - unsupported.");
1811 return false; /* FORNOW: not supported yet. */
1814 /* Expecting a modify_expr or a phi_node. */
1815 if (TREE_CODE (def_stmt) == MODIFY_EXPR
1816 || TREE_CODE (def_stmt) == PHI_NODE)
1818 if (def)
1819 *def = def_stmt;
1820 return true;
1823 return false;
1827 /* Function vect_analyze_operations.
1829 Scan the loop stmts and make sure they are all vectorizable. */
1831 static bool
1832 vect_analyze_operations (loop_vec_info loop_vinfo)
1834 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1835 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1836 int nbbs = loop->num_nodes;
1837 block_stmt_iterator si;
1838 int vectorization_factor = 0;
1839 int i;
1840 bool ok;
1841 tree scalar_type;
1843 if (vect_debug_details (NULL))
1844 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
1846 for (i = 0; i < nbbs; i++)
1848 basic_block bb = bbs[i];
1850 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1852 tree stmt = bsi_stmt (si);
1853 int nunits;
1854 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1855 tree vectype;
1857 if (vect_debug_details (NULL))
1859 fprintf (dump_file, "==> examining statement: ");
1860 print_generic_expr (dump_file, stmt, TDF_SLIM);
1863 gcc_assert (stmt_info);
1865 /* skip stmts which do not need to be vectorized.
1866 this is expected to include:
1867 - the COND_EXPR which is the loop exit condition
1868 - any LABEL_EXPRs in the loop
1869 - computations that are used only for array indexing or loop
1870 control */
1872 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1874 if (vect_debug_details (NULL))
1875 fprintf (dump_file, "irrelevant.");
1876 continue;
1879 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
1881 if (vect_debug_stats (loop) || vect_debug_details (loop))
1883 fprintf (dump_file, "not vectorized: vector stmt in loop:");
1884 print_generic_expr (dump_file, stmt, TDF_SLIM);
1886 return false;
1889 if (STMT_VINFO_DATA_REF (stmt_info))
1890 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
1891 else if (TREE_CODE (stmt) == MODIFY_EXPR)
1892 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
1893 else
1894 scalar_type = TREE_TYPE (stmt);
1896 if (vect_debug_details (NULL))
1898 fprintf (dump_file, "get vectype for scalar type: ");
1899 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1902 vectype = get_vectype_for_scalar_type (scalar_type);
1903 if (!vectype)
1905 if (vect_debug_stats (loop) || vect_debug_details (loop))
1907 fprintf (dump_file, "not vectorized: unsupported data-type ");
1908 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1910 return false;
1913 if (vect_debug_details (NULL))
1915 fprintf (dump_file, "vectype: ");
1916 print_generic_expr (dump_file, vectype, TDF_SLIM);
1918 STMT_VINFO_VECTYPE (stmt_info) = vectype;
1920 ok = (vectorizable_operation (stmt, NULL, NULL)
1921 || vectorizable_assignment (stmt, NULL, NULL)
1922 || vectorizable_load (stmt, NULL, NULL)
1923 || vectorizable_store (stmt, NULL, NULL));
1925 if (!ok)
1927 if (vect_debug_stats (loop) || vect_debug_details (loop))
1929 fprintf (dump_file, "not vectorized: stmt not supported: ");
1930 print_generic_expr (dump_file, stmt, TDF_SLIM);
1932 return false;
1935 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1936 if (vect_debug_details (NULL))
1937 fprintf (dump_file, "nunits = %d", nunits);
1939 if (vectorization_factor)
1941 /* FORNOW: don't allow mixed units.
1942 This restriction will be relaxed in the future. */
1943 if (nunits != vectorization_factor)
1945 if (vect_debug_stats (loop) || vect_debug_details (loop))
1946 fprintf (dump_file, "not vectorized: mixed data-types");
1947 return false;
1950 else
1951 vectorization_factor = nunits;
1955 /* TODO: Analyze cost. Decide if worth while to vectorize. */
1956 if (!vectorization_factor)
1958 if (vect_debug_stats (loop) || vect_debug_details (loop))
1959 fprintf (dump_file, "not vectorized: unsupported data-type");
1960 return false;
1962 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1964 /* FORNOW: handle only cases where the loop bound divides by the
1965 vectorization factor. */
1967 if (vect_debug_details (NULL))
1968 fprintf (dump_file,
1969 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
1970 vectorization_factor, LOOP_VINFO_NITERS (loop_vinfo));
1972 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1974 if (vect_debug_stats (loop) || vect_debug_details (loop))
1975 fprintf (dump_file, "not vectorized: Unknown loop bound.");
1976 return false;
1979 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1980 && LOOP_VINFO_NITERS (loop_vinfo) % vectorization_factor != 0)
1982 if (vect_debug_stats (loop) || vect_debug_details (loop))
1983 fprintf (dump_file, "not vectorized: loop bound doesn't divided by %d.",
1984 vectorization_factor);
1985 return false;
1988 return true;
1992 /* Function exist_non_indexing_operands_for_use_p
1994 USE is one of the uses attached to STMT. Check if USE is
1995 used in STMT for anything other than indexing an array. */
1997 static bool
1998 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
2000 tree operand;
2001 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2003 /* USE corresponds to some operand in STMT. If there is no data
2004 reference in STMT, then any operand that corresponds to USE
2005 is not indexing an array. */
2006 if (!STMT_VINFO_DATA_REF (stmt_info))
2007 return true;
2009 /* STMT has a data_ref. FORNOW this means that its of one of
2010 the following forms:
2011 -1- ARRAY_REF = var
2012 -2- var = ARRAY_REF
2013 (This should have been verified in analyze_data_refs).
2015 'var' in the second case corresponds to a def, not a use,
2016 so USE cannot correspond to any operands that are not used
2017 for array indexing.
2019 Therefore, all we need to check is if STMT falls into the
2020 first case, and whether var corresponds to USE. */
2022 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
2023 return false;
2025 operand = TREE_OPERAND (stmt, 1);
2027 if (TREE_CODE (operand) != SSA_NAME)
2028 return false;
2030 if (operand == use)
2031 return true;
2033 return false;
2037 /* Function vect_is_simple_iv_evolution.
2039 FORNOW: A simple evolution of an induction variables in the loop is
2040 considered a polynomial evolution with constant step. */
2042 static bool
2043 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
2044 tree * step, bool strict)
2046 tree init_expr;
2047 tree step_expr;
2049 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
2051 /* When there is no evolution in this loop, the evolution function
2052 is not "simple". */
2053 if (evolution_part == NULL_TREE)
2054 return false;
2056 /* When the evolution is a polynomial of degree >= 2
2057 the evolution function is not "simple". */
2058 if (tree_is_chrec (evolution_part))
2059 return false;
2061 step_expr = evolution_part;
2062 init_expr = initial_condition (access_fn);
2064 if (vect_debug_details (NULL))
2066 fprintf (dump_file, "step: ");
2067 print_generic_expr (dump_file, step_expr, TDF_SLIM);
2068 fprintf (dump_file, ", init: ");
2069 print_generic_expr (dump_file, init_expr, TDF_SLIM);
2072 *init = init_expr;
2073 *step = step_expr;
2075 if (TREE_CODE (step_expr) != INTEGER_CST)
2077 if (vect_debug_details (NULL))
2078 fprintf (dump_file, "step unknown.");
2079 return false;
2082 if (strict)
2083 if (!integer_onep (step_expr))
2085 if (vect_debug_details (NULL))
2086 print_generic_expr (dump_file, step_expr, TDF_SLIM);
2087 return false;
2090 return true;
2094 /* Function vect_analyze_scalar_cycles.
2096 Examine the cross iteration def-use cycles of scalar variables, by
2097 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
2098 cycles that they represent do not impede vectorization.
2100 FORNOW: Reduction as in the following loop, is not supported yet:
2101 loop1:
2102 for (i=0; i<N; i++)
2103 sum += a[i];
2104 The cross-iteration cycle corresponding to variable 'sum' will be
2105 considered too complicated and will impede vectorization.
2107 FORNOW: Induction as in the following loop, is not supported yet:
2108 loop2:
2109 for (i=0; i<N; i++)
2110 a[i] = i;
2112 However, the following loop *is* vectorizable:
2113 loop3:
2114 for (i=0; i<N; i++)
2115 a[i] = b[i];
2117 In both loops there exists a def-use cycle for the variable i:
2118 loop: i_2 = PHI (i_0, i_1)
2119 a[i_2] = ...;
2120 i_1 = i_2 + 1;
2121 GOTO loop;
2123 The evolution of the above cycle is considered simple enough,
2124 however, we also check that the cycle does not need to be
2125 vectorized, i.e - we check that the variable that this cycle
2126 defines is only used for array indexing or in stmts that do not
2127 need to be vectorized. This is not the case in loop2, but it
2128 *is* the case in loop3. */
2130 static bool
2131 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
2133 tree phi;
2134 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2135 basic_block bb = loop->header;
2136 tree dummy;
2138 if (vect_debug_details (NULL))
2139 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
2141 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
2143 tree access_fn = NULL;
2145 if (vect_debug_details (NULL))
2147 fprintf (dump_file, "Analyze phi: ");
2148 print_generic_expr (dump_file, phi, TDF_SLIM);
2151 /* Skip virtual phi's. The data dependences that are associated with
2152 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2154 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2156 if (vect_debug_details (NULL))
2157 fprintf (dump_file, "virtual phi. skip.");
2158 continue;
2161 /* Analyze the evolution function. */
2163 /* FORNOW: The only scalar cross-iteration cycles that we allow are
2164 those of loop induction variables; This property is verified here.
2166 Furthermore, if that induction variable is used in an operation
2167 that needs to be vectorized (i.e, is not solely used to index
2168 arrays and check the exit condition) - we do not support its
2169 vectorization yet. This property is verified in vect_is_simple_use,
2170 during vect_analyze_operations. */
2172 access_fn = /* instantiate_parameters
2173 (loop,*/
2174 analyze_scalar_evolution (loop, PHI_RESULT (phi));
2176 if (!access_fn)
2178 if (vect_debug_stats (loop) || vect_debug_details (loop))
2179 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
2180 return false;
2183 if (vect_debug_details (NULL))
2185 fprintf (dump_file, "Access function of PHI: ");
2186 print_generic_expr (dump_file, access_fn, TDF_SLIM);
2189 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
2190 &dummy, false))
2192 if (vect_debug_stats (loop) || vect_debug_details (loop))
2193 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
2194 return false;
2198 return true;
2202 /* Function vect_analyze_data_ref_dependence.
2204 Return TRUE if there (might) exist a dependence between a memory-reference
2205 DRA and a memory-reference DRB. */
2207 static bool
2208 vect_analyze_data_ref_dependence (struct data_reference *dra,
2209 struct data_reference *drb,
2210 struct loop *loop)
2212 bool differ_p;
2213 struct data_dependence_relation *ddr;
2215 if (!array_base_name_differ_p (dra, drb, &differ_p))
2217 if (vect_debug_stats (loop) || vect_debug_details (loop))
2219 fprintf (dump_file,
2220 "not vectorized: can't determine dependence between: ");
2221 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
2222 fprintf (dump_file, " and ");
2223 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
2225 return true;
2228 if (differ_p)
2229 return false;
2231 ddr = initialize_data_dependence_relation (dra, drb);
2232 compute_affine_dependence (ddr);
2234 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2235 return false;
2237 if (vect_debug_stats (loop) || vect_debug_details (loop))
2239 fprintf (dump_file,
2240 "not vectorized: possible dependence between data-refs ");
2241 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
2242 fprintf (dump_file, " and ");
2243 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
2246 return true;
2250 /* Function vect_analyze_data_ref_dependences.
2252 Examine all the data references in the loop, and make sure there do not
2253 exist any data dependences between them.
2255 TODO: dependences which distance is greater than the vectorization factor
2256 can be ignored. */
2258 static bool
2259 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
2261 unsigned int i, j;
2262 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2263 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2264 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2266 /* Examine store-store (output) dependences. */
2268 if (vect_debug_details (NULL))
2269 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
2271 if (vect_debug_details (NULL))
2272 fprintf (dump_file, "compare all store-store pairs.");
2274 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
2276 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2278 struct data_reference *dra =
2279 VARRAY_GENERIC_PTR (loop_write_refs, i);
2280 struct data_reference *drb =
2281 VARRAY_GENERIC_PTR (loop_write_refs, j);
2282 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2283 return false;
2287 /* Examine load-store (true/anti) dependences. */
2289 if (vect_debug_details (NULL))
2290 fprintf (dump_file, "compare all load-store pairs.");
2292 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
2294 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2296 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
2297 struct data_reference *drb =
2298 VARRAY_GENERIC_PTR (loop_write_refs, j);
2299 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2300 return false;
2304 return true;
2308 /* Function vect_get_first_index.
2310 REF is a data reference.
2311 If it is an ARRAY_REF: if its lower bound is simple enough,
2312 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
2313 If it is not an ARRAY_REF: REF has no "first index";
2314 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
2316 static bool
2317 vect_get_first_index (tree ref, tree *array_first_index)
2319 tree array_start;
2321 if (TREE_CODE (ref) != ARRAY_REF)
2322 *array_first_index = size_zero_node;
2323 else
2325 array_start = array_ref_low_bound (ref);
2326 if (!host_integerp (array_start,0))
2328 if (vect_debug_details (NULL))
2330 fprintf (dump_file, "array min val not simple integer cst.");
2331 print_generic_expr (dump_file, array_start, TDF_DETAILS);
2333 return false;
2335 *array_first_index = array_start;
2338 return true;
2342 /* Function vect_compute_array_base_alignment.
2343 A utility function of vect_compute_array_ref_alignment.
2345 Compute the misalignment of ARRAY in bits.
2347 Input:
2348 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
2349 VECTYPE - we are interested in the misalignment modulo the size of vectype.
2350 if NULL: don't compute misalignment, just return the base of ARRAY.
2351 PREV_DIMENSIONS - initialized to one.
2352 MISALIGNMENT - the computed misalignment in bits.
2354 Output:
2355 If VECTYPE is not NULL:
2356 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
2357 the base of the array, and put the computed misalignment in MISALIGNMENT.
2358 If VECTYPE is NULL:
2359 Return the base of the array.
2361 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
2362 a[idx_N]...[idx_2][idx_1] is
2363 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
2364 ... + idx_N * dim_0 * ... * dim_N-1}.
2365 (The misalignment of &a is not checked here).
2366 Note, that every term contains dim_0, therefore, if dim_0 is a
2367 multiple of NUNITS, the whole sum is a multiple of NUNITS.
2368 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
2369 NUINTS, we can say that the misalignment of the sum is equal to
2370 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
2371 we can't determine this array misalignment, and we return
2372 false.
2373 We proceed recursively in this manner, accumulating total misalignment
2374 and the multiplication of previous dimensions for correct misalignment
2375 calculation. */
2377 static tree
2378 vect_compute_array_base_alignment (tree array,
2379 tree vectype,
2380 tree *prev_dimensions,
2381 tree *misalignment)
2383 tree index;
2384 tree domain;
2385 tree dimension_size;
2386 tree mis;
2387 tree bits_per_vectype;
2388 tree bits_per_vectype_unit;
2390 /* The 'stop condition' of the recursion. */
2391 if (TREE_CODE (array) != ARRAY_REF)
2392 return array;
2394 if (!vectype)
2395 /* Just get the base decl. */
2396 return vect_compute_array_base_alignment
2397 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
2399 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
2400 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
2401 return NULL_TREE;
2403 domain = TYPE_DOMAIN (TREE_TYPE (array));
2404 dimension_size =
2405 int_const_binop (PLUS_EXPR,
2406 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
2407 TYPE_MIN_VALUE (domain), 1),
2408 size_one_node, 1);
2410 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
2411 is a multiple of NUNITS:
2413 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
2415 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
2416 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
2417 if (integer_zerop (mis))
2418 /* This array is aligned. Continue just in order to get the base decl. */
2419 return vect_compute_array_base_alignment
2420 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
2422 index = TREE_OPERAND (array, 1);
2423 if (!host_integerp (index, 1))
2424 /* The current index is not constant. */
2425 return NULL_TREE;
2427 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
2429 bits_per_vectype = fold_convert (unsigned_type_node,
2430 build_int_cst (NULL_TREE, BITS_PER_UNIT *
2431 GET_MODE_SIZE (TYPE_MODE (vectype))));
2432 bits_per_vectype_unit = fold_convert (unsigned_type_node,
2433 build_int_cst (NULL_TREE, BITS_PER_UNIT *
2434 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
2436 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
2437 earlier:
2439 *misalignment =
2440 (*misalignment + index_val * dimension_size * *prev_dimensions)
2441 % vectype_nunits;
2444 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
2445 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
2446 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
2447 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
2448 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
2451 *prev_dimensions = int_const_binop (MULT_EXPR,
2452 *prev_dimensions, dimension_size, 1);
2454 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
2455 prev_dimensions,
2456 misalignment);
2460 /* Function vect_compute_data_ref_alignment
2462 Compute the misalignment of the data reference DR.
2464 Output:
2465 1. If during the misalignment computation it is found that the data reference
2466 cannot be vectorized then false is returned.
2467 2. DR_MISALIGNMENT (DR) is defined.
2469 FOR NOW: No analysis is actually performed. Misalignment is calculated
2470 only for trivial cases. TODO. */
2472 static bool
2473 vect_compute_data_ref_alignment (struct data_reference *dr,
2474 loop_vec_info loop_vinfo)
2476 tree stmt = DR_STMT (dr);
2477 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2478 tree ref = DR_REF (dr);
2479 tree vectype;
2480 tree scalar_type;
2481 tree offset = size_zero_node;
2482 tree base, bit_offset, alignment;
2483 tree unit_bits = fold_convert (unsigned_type_node,
2484 build_int_cst (NULL_TREE, BITS_PER_UNIT));
2485 tree dr_base;
2486 bool base_aligned_p;
2488 if (vect_debug_details (NULL))
2489 fprintf (dump_file, "vect_compute_data_ref_alignment:");
2491 /* Initialize misalignment to unknown. */
2492 DR_MISALIGNMENT (dr) = -1;
2494 scalar_type = TREE_TYPE (ref);
2495 vectype = get_vectype_for_scalar_type (scalar_type);
2496 if (!vectype)
2498 if (vect_debug_details (NULL))
2500 fprintf (dump_file, "no vectype for stmt: ");
2501 print_generic_expr (dump_file, stmt, TDF_SLIM);
2502 fprintf (dump_file, " scalar_type: ");
2503 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
2505 /* It is not possible to vectorize this data reference. */
2506 return false;
2508 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
2510 if (TREE_CODE (ref) == ARRAY_REF)
2511 dr_base = ref;
2512 else
2513 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
2515 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
2516 loop_vinfo, &bit_offset, &base_aligned_p);
2517 if (!base)
2519 if (vect_debug_details (NULL))
2521 fprintf (dump_file, "Unknown alignment for access: ");
2522 print_generic_expr (dump_file,
2523 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
2525 return true;
2528 if (!base_aligned_p)
2530 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
2532 if (vect_debug_details (NULL))
2534 fprintf (dump_file, "can't force alignment of ref: ");
2535 print_generic_expr (dump_file, ref, TDF_SLIM);
2537 return true;
2540 /* Force the alignment of the decl.
2541 NOTE: This is the only change to the code we make during
2542 the analysis phase, before deciding to vectorize the loop. */
2543 if (vect_debug_details (NULL))
2544 fprintf (dump_file, "force alignment");
2545 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
2546 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
2549 /* At this point we assume that the base is aligned, and the offset from it
2550 (including index, if relevant) has been computed and is in BIT_OFFSET. */
2551 gcc_assert (base_aligned_p
2552 || (TREE_CODE (base) == VAR_DECL
2553 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
2555 /* Convert into bytes. */
2556 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
2557 /* Check that there is no remainder in bits. */
2558 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
2559 if (!integer_zerop (bit_offset))
2561 if (vect_debug_details (NULL))
2563 fprintf (dump_file, "bit offset alignment: ");
2564 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
2566 return false;
2569 /* Alignment required, in bytes: */
2570 alignment = fold_convert (unsigned_type_node,
2571 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
2573 /* Modulo alignment. */
2574 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
2575 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
2577 if (vect_debug_details (NULL))
2578 fprintf (dump_file, "unexpected misalign value");
2579 return false;
2582 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
2584 if (vect_debug_details (NULL))
2585 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
2587 return true;
2591 /* Function vect_compute_array_ref_alignment
2593 Compute the alignment of an array-ref.
2594 The alignment we compute here is relative to
2595 TYPE_ALIGN(VECTYPE) boundary.
2597 Output:
2598 OFFSET - the alignment in bits
2599 Return value - the base of the array-ref. E.g,
2600 if the array-ref is a.b[k].c[i][j] the returned
2601 base is a.b[k].c
2604 static tree
2605 vect_compute_array_ref_alignment (struct data_reference *dr,
2606 loop_vec_info loop_vinfo,
2607 tree vectype,
2608 tree *offset)
2610 tree array_first_index = size_zero_node;
2611 tree init;
2612 tree ref = DR_REF (dr);
2613 tree scalar_type = TREE_TYPE (ref);
2614 tree oprnd0 = TREE_OPERAND (ref, 0);
2615 tree dims = size_one_node;
2616 tree misalign = size_zero_node;
2617 tree next_ref, this_offset = size_zero_node;
2618 tree nunits;
2619 tree nbits;
2621 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
2622 /* The reference is an array without its last index. */
2623 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims, &misalign);
2624 else
2625 next_ref =
2626 vect_compute_array_base_alignment (oprnd0, vectype, &dims, &misalign);
2627 if (!vectype)
2628 /* Alignment is not requested. Just return the base. */
2629 return next_ref;
2631 /* Compute alignment. */
2632 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
2633 return NULL_TREE;
2634 this_offset = misalign;
2636 /* Check the first index accessed. */
2637 if (!vect_get_first_index (ref, &array_first_index))
2639 if (vect_debug_details (NULL))
2640 fprintf (dump_file, "no first_index for array.");
2641 return NULL_TREE;
2644 /* Check the index of the array_ref. */
2645 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
2646 LOOP_VINFO_LOOP (loop_vinfo)->num);
2648 /* FORNOW: In order to simplify the handling of alignment, we make sure
2649 that the first location at which the array is accessed ('init') is on an
2650 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
2651 This is too conservative, since we require that
2652 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
2653 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
2654 This should be relaxed in the future. */
2656 if (!init || !host_integerp (init, 0))
2658 if (vect_debug_details (NULL))
2659 fprintf (dump_file, "non constant init. ");
2660 return NULL_TREE;
2663 /* bytes per scalar element: */
2664 nunits = fold_convert (unsigned_type_node,
2665 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
2666 nbits = int_const_binop (MULT_EXPR, nunits,
2667 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
2669 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
2670 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
2671 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
2672 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
2674 /* TODO: allow negative misalign values. */
2675 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
2677 if (vect_debug_details (NULL))
2678 fprintf (dump_file, "unexpected misalign value");
2679 return NULL_TREE;
2681 *offset = misalign;
2682 return next_ref;
2686 /* Function vect_compute_data_refs_alignment
2688 Compute the misalignment of data references in the loop.
2689 This pass may take place at function granularity instead of at loop
2690 granularity.
2692 FOR NOW: No analysis is actually performed. Misalignment is calculated
2693 only for trivial cases. TODO. */
2695 static void
2696 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
2698 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2699 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2700 unsigned int i;
2702 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2704 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2705 vect_compute_data_ref_alignment (dr, loop_vinfo);
2708 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2710 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2711 vect_compute_data_ref_alignment (dr, loop_vinfo);
2716 /* Function vect_enhance_data_refs_alignment
2718 This pass will use loop versioning and loop peeling in order to enhance
2719 the alignment of data references in the loop.
2721 FOR NOW: we assume that whatever versioning/peeling takes place, only the
2722 original loop is to be vectorized; Any other loops that are created by
2723 the transformations performed in this pass - are not supposed to be
2724 vectorized. This restriction will be relaxed.
2726 FOR NOW: No transformation is actually performed. TODO. */
2728 static void
2729 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo ATTRIBUTE_UNUSED)
2732 This pass will require a cost model to guide it whether to apply peeling
2733 or versioning or a combination of the two. For example, the scheme that
2734 intel uses when given a loop with several memory accesses, is as follows:
2735 choose one memory access ('p') which alignment you want to force by doing
2736 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
2737 other accesses are not necessarily aligned, or (2) use loop versioning to
2738 generate one loop in which all accesses are aligned, and another loop in
2739 which only 'p' is necessarily aligned.
2741 ("Automatic Intra-Register Vectorization for the Intel Architecture",
2742 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
2743 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
2745 Devising a cost model is the most critical aspect of this work. It will
2746 guide us on which access to peel for, whether to use loop versioning, how
2747 many versions to create, etc. The cost model will probably consist of
2748 generic considerations as well as target specific considerations (on
2749 powerpc for example, misaligned stores are more painful than misaligned
2750 loads).
2752 Here is the general steps involved in alignment enhancements:
2754 -- original loop, before alignment analysis:
2755 for (i=0; i<N; i++){
2756 x = q[i]; # DR_MISALIGNMENT(q) = unknown
2757 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2760 -- After vect_compute_data_refs_alignment:
2761 for (i=0; i<N; i++){
2762 x = q[i]; # DR_MISALIGNMENT(q) = 3
2763 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2766 -- Possibility 1: we do loop versioning:
2767 if (p is aligned) {
2768 for (i=0; i<N; i++){ # loop 1A
2769 x = q[i]; # DR_MISALIGNMENT(q) = 3
2770 p[i] = y; # DR_MISALIGNMENT(p) = 0
2773 else {
2774 for (i=0; i<N; i++){ # loop 1B
2775 x = q[i]; # DR_MISALIGNMENT(q) = 3
2776 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2780 -- Possibility 2: we do loop peeling:
2781 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2782 x = q[i];
2783 p[i] = y;
2785 for (i = 3; i < N; i++){ # loop 2A
2786 x = q[i]; # DR_MISALIGNMENT(q) = 0
2787 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2790 -- Possibility 3: combination of loop peeling and versioning:
2791 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2792 x = q[i];
2793 p[i] = y;
2795 if (p is aligned) {
2796 for (i = 3; i<N; i++){ # loop 3A
2797 x = q[i]; # DR_MISALIGNMENT(q) = 0
2798 p[i] = y; # DR_MISALIGNMENT(p) = 0
2801 else {
2802 for (i = 3; i<N; i++){ # loop 3B
2803 x = q[i]; # DR_MISALIGNMENT(q) = 0
2804 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2808 These loops are later passed to loop_transform to be vectorized. The
2809 vectorizer will use the alignment information to guide the transformation
2810 (whether to generate regular loads/stores, or with special handling for
2811 misalignment).
2816 /* Function vect_analyze_data_refs_alignment
2818 Analyze the alignment of the data-references in the loop.
2819 FOR NOW: Until support for misliagned accesses is in place, only if all
2820 accesses are aligned can the loop be vectorized. This restriction will be
2821 relaxed. */
2823 static bool
2824 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2826 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2827 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2828 unsigned int i;
2830 if (vect_debug_details (NULL))
2831 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
2834 /* This pass may take place at function granularity instead of at loop
2835 granularity. */
2837 vect_compute_data_refs_alignment (loop_vinfo);
2840 /* This pass will use loop versioning and loop peeling in order to enhance
2841 the alignment of data references in the loop.
2842 FOR NOW: we assume that whatever versioning/peeling took place, the
2843 original loop is to be vectorized. Any other loops that were created by
2844 the transformations performed in this pass - are not supposed to be
2845 vectorized. This restriction will be relaxed. */
2847 vect_enhance_data_refs_alignment (loop_vinfo);
2850 /* Finally, check that loop can be vectorized.
2851 FOR NOW: Until support for misaligned accesses is in place, only if all
2852 accesses are aligned can the loop be vectorized. This restriction will be
2853 relaxed. */
2855 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2857 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2858 if (!aligned_access_p (dr))
2860 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
2861 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
2862 fprintf (dump_file, "not vectorized: unaligned store.");
2863 return false;
2867 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2869 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2870 if (!aligned_access_p (dr))
2872 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
2873 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
2874 fprintf (dump_file, "not vectorized: unaligned load.");
2875 return false;
2879 return true;
2883 /* Function vect_analyze_data_ref_access.
2885 Analyze the access pattern of the data-reference DR. For now, a data access
2886 has to consecutive and aligned to be considered vectorizable. */
2888 static bool
2889 vect_analyze_data_ref_access (struct data_reference *dr)
2891 varray_type access_fns = DR_ACCESS_FNS (dr);
2892 tree access_fn;
2893 tree init, step;
2894 unsigned int dimensions, i;
2896 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
2897 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
2898 access is contiguous). */
2899 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
2901 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
2903 access_fn = DR_ACCESS_FN (dr, i);
2905 if (evolution_part_in_loop_num (access_fn,
2906 loop_containing_stmt (DR_STMT (dr))->num))
2908 /* Evolution part is not NULL in this loop (it is neither constant nor
2909 invariant). */
2910 if (vect_debug_details (NULL))
2912 fprintf (dump_file,
2913 "not vectorized: complicated multidimensional array access.");
2914 print_generic_expr (dump_file, access_fn, TDF_SLIM);
2916 return false;
2920 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
2921 if (!evolution_function_is_constant_p (access_fn)
2922 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
2923 access_fn, &init, &step, true))
2925 if (vect_debug_details (NULL))
2927 fprintf (dump_file, "not vectorized: too complicated access function.");
2928 print_generic_expr (dump_file, access_fn, TDF_SLIM);
2930 return false;
2933 return true;
2937 /* Function vect_analyze_data_ref_accesses.
2939 Analyze the access pattern of all the data references in the loop.
2941 FORNOW: the only access pattern that is considered vectorizable is a
2942 simple step 1 (consecutive) access.
2944 FORNOW: handle only arrays and pointer accesses. */
2946 static bool
2947 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2949 unsigned int i;
2950 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2951 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2953 if (vect_debug_details (NULL))
2954 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
2956 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2958 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2959 bool ok = vect_analyze_data_ref_access (dr);
2960 if (!ok)
2962 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
2963 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
2964 fprintf (dump_file, "not vectorized: complicated access pattern.");
2965 return false;
2969 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2971 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2972 bool ok = vect_analyze_data_ref_access (dr);
2973 if (!ok)
2975 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
2976 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
2977 fprintf (dump_file, "not vectorized: complicated access pattern.");
2978 return false;
2982 return true;
2986 /* Function vect_analyze_pointer_ref_access.
2988 Input:
2989 STMT - a stmt that contains a data-ref
2990 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
2992 If the data-ref access is vectorizable, return a data_reference structure
2993 that represents it (DR). Otherwise - return NULL. */
2995 static struct data_reference *
2996 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
2998 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2999 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
3000 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
3001 tree init, step;
3002 int step_val;
3003 tree reftype, innertype;
3004 enum machine_mode innermode;
3005 tree indx_access_fn;
3006 int loopnum = loop->num;
3007 struct data_reference *dr;
3009 if (!access_fn)
3011 if (vect_debug_stats (loop) || vect_debug_details (loop))
3012 fprintf (dump_file, "not vectorized: complicated pointer access.");
3013 return NULL;
3016 if (vect_debug_details (NULL))
3018 fprintf (dump_file, "Access function of ptr: ");
3019 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3022 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
3024 if (vect_debug_stats (loop) || vect_debug_details (loop))
3025 fprintf (dump_file, "not vectorized: pointer access is not simple.");
3026 return NULL;
3029 STRIP_NOPS (init);
3031 if (!host_integerp (step,0))
3033 if (vect_debug_stats (loop) || vect_debug_details (loop))
3034 fprintf (dump_file,
3035 "not vectorized: non constant step for pointer access.");
3036 return NULL;
3039 step_val = TREE_INT_CST_LOW (step);
3041 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
3042 if (TREE_CODE (reftype) != POINTER_TYPE)
3044 if (vect_debug_stats (loop) || vect_debug_details (loop))
3045 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
3046 return NULL;
3049 reftype = TREE_TYPE (init);
3050 if (TREE_CODE (reftype) != POINTER_TYPE)
3052 if (vect_debug_stats (loop) || vect_debug_details (loop))
3053 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
3054 return NULL;
3057 innertype = TREE_TYPE (reftype);
3058 innermode = TYPE_MODE (innertype);
3059 if (GET_MODE_SIZE (innermode) != step_val)
3061 /* FORNOW: support only consecutive access */
3062 if (vect_debug_stats (loop) || vect_debug_details (loop))
3063 fprintf (dump_file, "not vectorized: non consecutive access.");
3064 return NULL;
3067 indx_access_fn =
3068 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
3069 if (vect_debug_details (NULL))
3071 fprintf (dump_file, "Access function of ptr indx: ");
3072 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
3074 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
3075 return dr;
3079 /* Function vect_get_symbl_and_dr.
3081 The function returns SYMBL - the relevant variable for
3082 memory tag (for aliasing purposes).
3083 Also data reference structure DR is created.
3085 Input:
3086 MEMREF - data reference in STMT
3087 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
3089 Output:
3090 DR - data_reference struct for MEMREF
3091 return value - the relevant variable for memory tag (for aliasing purposes).
3095 static tree
3096 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
3097 loop_vec_info loop_vinfo, struct data_reference **dr)
3099 tree symbl, oprnd0, oprnd1;
3100 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3101 tree offset;
3102 tree array_base, base;
3103 struct data_reference *new_dr;
3104 bool base_aligned_p;
3106 *dr = NULL;
3107 switch (TREE_CODE (memref))
3109 case INDIRECT_REF:
3110 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
3111 if (! new_dr)
3112 return NULL_TREE;
3113 *dr = new_dr;
3114 symbl = DR_BASE_NAME (new_dr);
3115 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
3117 switch (TREE_CODE (symbl))
3119 case PLUS_EXPR:
3120 case MINUS_EXPR:
3121 oprnd0 = TREE_OPERAND (symbl, 0);
3122 oprnd1 = TREE_OPERAND (symbl, 1);
3124 STRIP_NOPS(oprnd1);
3125 /* Only {address_base + offset} expressions are supported,
3126 where address_base can be POINTER_TYPE or ARRAY_TYPE and
3127 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
3128 TODO: swap operands if {offset + address_base}. */
3129 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
3130 && TREE_CODE (oprnd1) != INTEGER_CST)
3131 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
3132 return NULL_TREE;
3134 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
3135 symbl = oprnd0;
3136 else
3137 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
3138 loop_vinfo, &new_dr);
3140 case SSA_NAME:
3141 case ADDR_EXPR:
3142 /* symbl remains unchanged. */
3143 break;
3145 default:
3146 if (vect_debug_details (NULL))
3148 fprintf (dump_file, "unhandled data ref: ");
3149 print_generic_expr (dump_file, memref, TDF_SLIM);
3150 fprintf (dump_file, " (symbl ");
3151 print_generic_expr (dump_file, symbl, TDF_SLIM);
3152 fprintf (dump_file, ") in stmt ");
3153 print_generic_expr (dump_file, stmt, TDF_SLIM);
3155 return NULL_TREE;
3157 break;
3159 case ARRAY_REF:
3160 offset = size_zero_node;
3161 array_base = TREE_OPERAND (memref, 0);
3163 /* Store the array base in the stmt info.
3164 For one dimensional array ref a[i], the base is a,
3165 for multidimensional a[i1][i2]..[iN], the base is
3166 a[i1][i2]..[iN-1]. */
3167 array_base = TREE_OPERAND (memref, 0);
3168 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
3170 new_dr = analyze_array (stmt, memref, is_read);
3171 *dr = new_dr;
3173 /* Find the relevant symbol for aliasing purposes. */
3174 base = DR_BASE_NAME (new_dr);
3175 switch (TREE_CODE (base))
3177 case VAR_DECL:
3178 symbl = base;
3179 break;
3181 case INDIRECT_REF:
3182 symbl = TREE_OPERAND (base, 0);
3183 break;
3185 case COMPONENT_REF:
3186 /* Could have recorded more accurate information -
3187 i.e, the actual FIELD_DECL that is being referenced -
3188 but later passes expect VAR_DECL as the nmt. */
3189 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
3190 loop_vinfo, &offset, &base_aligned_p);
3191 if (symbl)
3192 break;
3193 /* fall through */
3194 default:
3195 if (vect_debug_details (NULL))
3197 fprintf (dump_file, "unhandled struct/class field access ");
3198 print_generic_expr (dump_file, stmt, TDF_SLIM);
3200 return NULL_TREE;
3202 break;
3204 default:
3205 if (vect_debug_details (NULL))
3207 fprintf (dump_file, "unhandled data ref: ");
3208 print_generic_expr (dump_file, memref, TDF_SLIM);
3209 fprintf (dump_file, " in stmt ");
3210 print_generic_expr (dump_file, stmt, TDF_SLIM);
3212 return NULL_TREE;
3214 return symbl;
3218 /* Function vect_analyze_data_refs.
3220 Find all the data references in the loop.
3222 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
3223 which base is really an array (not a pointer) and which alignment
3224 can be forced. This restriction will be relaxed. */
3226 static bool
3227 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3229 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3230 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3231 int nbbs = loop->num_nodes;
3232 block_stmt_iterator si;
3233 int j;
3234 struct data_reference *dr;
3235 tree tag;
3236 tree address_base;
3238 if (vect_debug_details (NULL))
3239 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
3241 for (j = 0; j < nbbs; j++)
3243 basic_block bb = bbs[j];
3244 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3246 bool is_read = false;
3247 tree stmt = bsi_stmt (si);
3248 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3249 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
3250 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
3251 vuse_optype vuses = STMT_VUSE_OPS (stmt);
3252 varray_type *datarefs = NULL;
3253 int nvuses, nv_may_defs, nv_must_defs;
3254 tree memref = NULL;
3255 tree symbl;
3257 /* Assumption: there exists a data-ref in stmt, if and only if
3258 it has vuses/vdefs. */
3260 if (!vuses && !v_may_defs && !v_must_defs)
3261 continue;
3263 nvuses = NUM_VUSES (vuses);
3264 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
3265 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
3267 if (nvuses && (nv_may_defs || nv_must_defs))
3269 if (vect_debug_details (NULL))
3271 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
3272 print_generic_expr (dump_file, stmt, TDF_SLIM);
3274 return false;
3277 if (TREE_CODE (stmt) != MODIFY_EXPR)
3279 if (vect_debug_details (NULL))
3281 fprintf (dump_file, "unexpected vops in stmt: ");
3282 print_generic_expr (dump_file, stmt, TDF_SLIM);
3284 return false;
3287 if (vuses)
3289 memref = TREE_OPERAND (stmt, 1);
3290 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
3291 is_read = true;
3293 else /* vdefs */
3295 memref = TREE_OPERAND (stmt, 0);
3296 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
3297 is_read = false;
3300 /* Analyze MEMREF. If it is of a supported form, build data_reference
3301 struct for it (DR) and find the relevant symbol for aliasing
3302 purposes. */
3303 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo, &dr);
3304 if (!symbl)
3306 if (vect_debug_stats (loop) || vect_debug_details (loop))
3308 fprintf (dump_file, "not vectorized: unhandled data ref: ");
3309 print_generic_expr (dump_file, stmt, TDF_SLIM);
3311 return false;
3314 /* Find and record the memtag assigned to this data-ref. */
3315 switch (TREE_CODE (symbl))
3317 case VAR_DECL:
3318 STMT_VINFO_MEMTAG (stmt_info) = symbl;
3319 break;
3321 case SSA_NAME:
3322 symbl = SSA_NAME_VAR (symbl);
3323 tag = get_var_ann (symbl)->type_mem_tag;
3324 if (!tag)
3326 tree ptr = TREE_OPERAND (memref, 0);
3327 if (TREE_CODE (ptr) == SSA_NAME)
3328 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
3330 if (!tag)
3332 if (vect_debug_stats (loop) || vect_debug_details (loop))
3333 fprintf (dump_file, "not vectorized: no memtag for ref.");
3334 return false;
3336 STMT_VINFO_MEMTAG (stmt_info) = tag;
3337 break;
3339 case ADDR_EXPR:
3340 address_base = TREE_OPERAND (symbl, 0);
3342 switch (TREE_CODE (address_base))
3344 case ARRAY_REF:
3345 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0), DR_IS_READ(dr));
3346 STMT_VINFO_MEMTAG (stmt_info) = DR_BASE_NAME (dr);
3347 break;
3349 case VAR_DECL:
3350 STMT_VINFO_MEMTAG (stmt_info) = address_base;
3351 break;
3353 default:
3354 if (vect_debug_stats (loop) || vect_debug_details (loop))
3356 fprintf (dump_file, "not vectorized: unhandled address expression: ");
3357 print_generic_expr (dump_file, stmt, TDF_SLIM);
3359 return false;
3361 break;
3363 default:
3364 if (vect_debug_stats (loop) || vect_debug_details (loop))
3366 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
3367 print_generic_expr (dump_file, memref, TDF_SLIM);
3369 return false;
3372 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
3373 STMT_VINFO_DATA_REF (stmt_info) = dr;
3377 return true;
3381 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3383 /* Function vect_mark_relevant.
3385 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3387 static void
3388 vect_mark_relevant (varray_type worklist, tree stmt)
3390 stmt_vec_info stmt_info;
3392 if (vect_debug_details (NULL))
3393 fprintf (dump_file, "mark relevant.");
3395 if (TREE_CODE (stmt) == PHI_NODE)
3397 VARRAY_PUSH_TREE (worklist, stmt);
3398 return;
3401 stmt_info = vinfo_for_stmt (stmt);
3403 if (!stmt_info)
3405 if (vect_debug_details (NULL))
3407 fprintf (dump_file, "mark relevant: no stmt info!!.");
3408 print_generic_expr (dump_file, stmt, TDF_SLIM);
3410 return;
3413 if (STMT_VINFO_RELEVANT_P (stmt_info))
3415 if (vect_debug_details (NULL))
3416 fprintf (dump_file, "already marked relevant.");
3417 return;
3420 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
3421 VARRAY_PUSH_TREE (worklist, stmt);
3425 /* Function vect_stmt_relevant_p.
3427 Return true if STMT in loop that is represented by LOOP_VINFO is
3428 "relevant for vectorization".
3430 A stmt is considered "relevant for vectorization" if:
3431 - it has uses outside the loop.
3432 - it has vdefs (it alters memory).
3433 - control stmts in the loop (except for the exit condition).
3435 CHECKME: what other side effects would the vectorizer allow? */
3437 static bool
3438 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
3440 v_may_def_optype v_may_defs;
3441 v_must_def_optype v_must_defs;
3442 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3443 int i;
3444 dataflow_t df;
3445 int num_uses;
3447 /* cond stmt other than loop exit cond. */
3448 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
3449 return true;
3451 /* changing memory. */
3452 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
3453 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
3454 if (v_may_defs || v_must_defs)
3456 if (vect_debug_details (NULL))
3457 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
3458 return true;
3461 /* uses outside the loop. */
3462 df = get_immediate_uses (stmt);
3463 num_uses = num_immediate_uses (df);
3464 for (i = 0; i < num_uses; i++)
3466 tree use = immediate_use (df, i);
3467 basic_block bb = bb_for_stmt (use);
3468 if (!flow_bb_inside_loop_p (loop, bb))
3470 if (vect_debug_details (NULL))
3471 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
3472 return true;
3476 return false;
3480 /* Function vect_mark_stmts_to_be_vectorized.
3482 Not all stmts in the loop need to be vectorized. For example:
3484 for i...
3485 for j...
3486 1. T0 = i + j
3487 2. T1 = a[T0]
3489 3. j = j + 1
3491 Stmt 1 and 3 do not need to be vectorized, because loop control and
3492 addressing of vectorized data-refs are handled differently.
3494 This pass detects such stmts. */
3496 static bool
3497 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3499 varray_type worklist;
3500 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3501 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3502 unsigned int nbbs = loop->num_nodes;
3503 block_stmt_iterator si;
3504 tree stmt;
3505 stmt_ann_t ann;
3506 unsigned int i;
3507 int j;
3508 use_optype use_ops;
3509 stmt_vec_info stmt_info;
3511 if (vect_debug_details (NULL))
3512 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
3514 VARRAY_TREE_INIT (worklist, 64, "work list");
3516 /* 1. Init worklist. */
3518 for (i = 0; i < nbbs; i++)
3520 basic_block bb = bbs[i];
3521 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3523 stmt = bsi_stmt (si);
3525 if (vect_debug_details (NULL))
3527 fprintf (dump_file, "init: stmt relevant? ");
3528 print_generic_expr (dump_file, stmt, TDF_SLIM);
3531 stmt_info = vinfo_for_stmt (stmt);
3532 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
3534 if (vect_stmt_relevant_p (stmt, loop_vinfo))
3535 vect_mark_relevant (worklist, stmt);
3540 /* 2. Process_worklist */
3542 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
3544 stmt = VARRAY_TOP_TREE (worklist);
3545 VARRAY_POP (worklist);
3547 if (vect_debug_details (NULL))
3549 fprintf (dump_file, "worklist: examine stmt: ");
3550 print_generic_expr (dump_file, stmt, TDF_SLIM);
3553 /* Examine the USES in this statement. Mark all the statements which
3554 feed this statement's uses as "relevant", unless the USE is used as
3555 an array index. */
3557 if (TREE_CODE (stmt) == PHI_NODE)
3559 /* follow the def-use chain inside the loop. */
3560 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
3562 tree arg = PHI_ARG_DEF (stmt, j);
3563 tree def_stmt = NULL_TREE;
3564 basic_block bb;
3565 if (!vect_is_simple_use (arg, loop, &def_stmt))
3567 if (vect_debug_details (NULL))
3568 fprintf (dump_file, "worklist: unsupported use.");
3569 varray_clear (worklist);
3570 return false;
3572 if (!def_stmt)
3573 continue;
3575 if (vect_debug_details (NULL))
3577 fprintf (dump_file, "worklist: def_stmt: ");
3578 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3581 bb = bb_for_stmt (def_stmt);
3582 if (flow_bb_inside_loop_p (loop, bb))
3583 vect_mark_relevant (worklist, def_stmt);
3587 ann = stmt_ann (stmt);
3588 use_ops = USE_OPS (ann);
3590 for (i = 0; i < NUM_USES (use_ops); i++)
3592 tree use = USE_OP (use_ops, i);
3594 /* We are only interested in uses that need to be vectorized. Uses
3595 that are used for address computation are not considered relevant.
3597 if (exist_non_indexing_operands_for_use_p (use, stmt))
3599 tree def_stmt = NULL_TREE;
3600 basic_block bb;
3601 if (!vect_is_simple_use (use, loop, &def_stmt))
3603 if (vect_debug_details (NULL))
3604 fprintf (dump_file, "worklist: unsupported use.");
3605 varray_clear (worklist);
3606 return false;
3609 if (!def_stmt)
3610 continue;
3612 if (vect_debug_details (NULL))
3614 fprintf (dump_file, "worklist: examine use %d: ", i);
3615 print_generic_expr (dump_file, use, TDF_SLIM);
3618 bb = bb_for_stmt (def_stmt);
3619 if (flow_bb_inside_loop_p (loop, bb))
3620 vect_mark_relevant (worklist, def_stmt);
3623 } /* while worklist */
3625 varray_clear (worklist);
3626 return true;
3630 /* Function vect_get_loop_niters.
3632 Determine how many iterations the loop is executed. */
3634 static tree
3635 vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations)
3637 tree niters;
3639 if (vect_debug_details (NULL))
3640 fprintf (dump_file, "\n<<get_loop_niters>>\n");
3642 niters = number_of_iterations_in_loop (loop);
3644 if (niters != NULL_TREE
3645 && niters != chrec_dont_know
3646 && host_integerp (niters,0))
3648 *number_of_iterations = TREE_INT_CST_LOW (niters);
3650 if (vect_debug_details (NULL))
3651 fprintf (dump_file, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC,
3652 *number_of_iterations);
3655 return get_loop_exit_condition (loop);
3659 /* Function vect_analyze_loop_form.
3661 Verify the following restrictions (some may be relaxed in the future):
3662 - it's an inner-most loop
3663 - number of BBs = 2 (which are the loop header and the latch)
3664 - the loop has a pre-header
3665 - the loop has a single entry and exit
3666 - the loop exit condition is simple enough, and the number of iterations
3667 can be analyzed (a countable loop). */
3669 static loop_vec_info
3670 vect_analyze_loop_form (struct loop *loop)
3672 loop_vec_info loop_vinfo;
3673 tree loop_cond;
3674 HOST_WIDE_INT number_of_iterations = -1;
3676 if (vect_debug_details (loop))
3677 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
3679 if (loop->inner
3680 || !loop->single_exit
3681 || loop->num_nodes != 2)
3683 if (vect_debug_stats (loop) || vect_debug_details (loop))
3685 fprintf (dump_file, "not vectorized: bad loop form. ");
3686 if (loop->inner)
3687 fprintf (dump_file, "nested loop.");
3688 else if (!loop->single_exit)
3689 fprintf (dump_file, "multiple exits.");
3690 else if (loop->num_nodes != 2)
3691 fprintf (dump_file, "too many BBs in loop.");
3694 return NULL;
3697 /* We assume that the loop exit condition is at the end of the loop. i.e,
3698 that the loop is represented as a do-while (with a proper if-guard
3699 before the loop if needed), where the loop header contains all the
3700 executable statements, and the latch is empty. */
3701 if (!empty_block_p (loop->latch))
3703 if (vect_debug_stats (loop) || vect_debug_details (loop))
3704 fprintf (dump_file, "not vectorized: unexpectd loop form.");
3705 return NULL;
3708 if (empty_block_p (loop->header))
3710 if (vect_debug_stats (loop) || vect_debug_details (loop))
3711 fprintf (dump_file, "not vectorized: empty loop.");
3712 return NULL;
3715 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
3716 if (!loop_cond)
3718 if (vect_debug_stats (loop) || vect_debug_details (loop))
3719 fprintf (dump_file, "not vectorized: complicated exit condition.");
3720 return NULL;
3723 if (number_of_iterations < 0)
3725 if (vect_debug_stats (loop) || vect_debug_details (loop))
3726 fprintf (dump_file, "not vectorized: unknown loop bound.");
3727 return NULL;
3730 if (number_of_iterations == 0) /* CHECKME: can this happen? */
3732 if (vect_debug_stats (loop) || vect_debug_details (loop))
3733 fprintf (dump_file, "not vectorized: number of iterations = 0.");
3734 return NULL;
3737 loop_vinfo = new_loop_vec_info (loop);
3738 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
3739 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
3741 return loop_vinfo;
3745 /* Function vect_analyze_loop.
3747 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3748 for it. The different analyses will record information in the
3749 loop_vec_info struct. */
3751 static loop_vec_info
3752 vect_analyze_loop (struct loop *loop)
3754 bool ok;
3755 loop_vec_info loop_vinfo;
3757 if (vect_debug_details (NULL))
3758 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
3760 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3762 loop_vinfo = vect_analyze_loop_form (loop);
3763 if (!loop_vinfo)
3765 if (vect_debug_details (loop))
3766 fprintf (dump_file, "bad loop form.");
3767 return NULL;
3770 /* Find all data references in the loop (which correspond to vdefs/vuses)
3771 and analyze their evolution in the loop.
3773 FORNOW: Handle only simple, array references, which
3774 alignment can be forced, and aligned pointer-references. */
3776 ok = vect_analyze_data_refs (loop_vinfo);
3777 if (!ok)
3779 if (vect_debug_details (loop))
3780 fprintf (dump_file, "bad data references.");
3781 destroy_loop_vec_info (loop_vinfo);
3782 return NULL;
3785 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
3787 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
3788 if (!ok)
3790 if (vect_debug_details (loop))
3791 fprintf (dump_file, "unexpected pattern.");
3792 if (vect_debug_details (loop))
3793 fprintf (dump_file, "not vectorized: unexpected pattern.");
3794 destroy_loop_vec_info (loop_vinfo);
3795 return NULL;
3798 /* Check that all cross-iteration scalar data-flow cycles are OK.
3799 Cross-iteration cycles caused by virtual phis are analyzed separately. */
3801 ok = vect_analyze_scalar_cycles (loop_vinfo);
3802 if (!ok)
3804 if (vect_debug_details (loop))
3805 fprintf (dump_file, "bad scalar cycle.");
3806 destroy_loop_vec_info (loop_vinfo);
3807 return NULL;
3810 /* Analyze data dependences between the data-refs in the loop.
3811 FORNOW: fail at the first data dependence that we encounter. */
3813 ok = vect_analyze_data_ref_dependences (loop_vinfo);
3814 if (!ok)
3816 if (vect_debug_details (loop))
3817 fprintf (dump_file, "bad data dependence.");
3818 destroy_loop_vec_info (loop_vinfo);
3819 return NULL;
3822 /* Analyze the access patterns of the data-refs in the loop (consecutive,
3823 complex, etc.). FORNOW: Only handle consecutive access pattern. */
3825 ok = vect_analyze_data_ref_accesses (loop_vinfo);
3826 if (!ok)
3828 if (vect_debug_details (loop))
3829 fprintf (dump_file, "bad data access.");
3830 destroy_loop_vec_info (loop_vinfo);
3831 return NULL;
3834 /* Analyze the alignment of the data-refs in the loop.
3835 FORNOW: Only aligned accesses are handled. */
3837 ok = vect_analyze_data_refs_alignment (loop_vinfo);
3838 if (!ok)
3840 if (vect_debug_details (loop))
3841 fprintf (dump_file, "bad data alignment.");
3842 destroy_loop_vec_info (loop_vinfo);
3843 return NULL;
3846 /* Scan all the operations in the loop and make sure they are
3847 vectorizable. */
3849 ok = vect_analyze_operations (loop_vinfo);
3850 if (!ok)
3852 if (vect_debug_details (loop))
3853 fprintf (dump_file, "bad operation or unsupported loop bound.");
3854 destroy_loop_vec_info (loop_vinfo);
3855 return NULL;
3858 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
3860 return loop_vinfo;
3864 /* Function need_imm_uses_for.
3866 Return whether we ought to include information for 'var'
3867 when calculating immediate uses. For this pass we only want use
3868 information for non-virtual variables. */
3870 static bool
3871 need_imm_uses_for (tree var)
3873 return is_gimple_reg (var);
3877 /* Function vectorize_loops.
3879 Entry Point to loop vectorization phase. */
3881 void
3882 vectorize_loops (struct loops *loops)
3884 unsigned int i, loops_num;
3885 unsigned int num_vectorized_loops = 0;
3887 /* Does the target support SIMD? */
3888 /* FORNOW: until more sophisticated machine modelling is in place. */
3889 if (!UNITS_PER_SIMD_WORD)
3891 if (vect_debug_details (NULL))
3892 fprintf (dump_file, "vectorizer: target vector size is not defined.");
3893 return;
3896 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
3898 /* ----------- Analyze loops. ----------- */
3900 /* If some loop was duplicated, it gets bigger number
3901 than all previously defined loops. This fact allows us to run
3902 only over initial loops skipping newly generated ones. */
3903 loops_num = loops->num;
3904 for (i = 1; i < loops_num; i++)
3906 loop_vec_info loop_vinfo;
3907 struct loop *loop = loops->parray[i];
3909 if (!loop)
3910 continue;
3912 loop_vinfo = vect_analyze_loop (loop);
3913 loop->aux = loop_vinfo;
3915 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
3916 continue;
3918 vect_transform_loop (loop_vinfo, loops);
3919 num_vectorized_loops++;
3922 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
3923 fprintf (dump_file, "\nvectorized %u loops in function.\n",
3924 num_vectorized_loops);
3926 /* ----------- Finalize. ----------- */
3928 free_df ();
3929 for (i = 1; i < loops_num; i++)
3931 struct loop *loop = loops->parray[i];
3932 loop_vec_info loop_vinfo;
3934 if (!loop)
3935 continue;
3936 loop_vinfo = loop->aux;
3937 destroy_loop_vec_info (loop_vinfo);
3938 loop->aux = NULL;
3941 rewrite_into_ssa (false);
3942 if (bitmap_first_set_bit (vars_to_rename) >= 0)
3944 /* The rewrite of ssa names may cause violation of loop closed ssa
3945 form invariants. TODO -- avoid these rewrites completely.
3946 Information in virtual phi nodes is sufficient for it. */
3947 rewrite_into_loop_closed_ssa ();
3949 bitmap_clear (vars_to_rename);