2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS that are one dimensional
61 arrays which base is an array DECL (not a pointer), and INDIRECT_REFS
62 through pointers; both array and pointer accesses are required to have a
63 simple (consecutive) access pattern.
67 The driver for the analysis phase is vect_analyze_loop_nest().
68 It applies a set of analyses, some of which rely on the scalar evolution
69 analyzer (scev) developed by Sebastian Pop.
71 During the analysis phase the vectorizer records some information
72 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
73 loop, as well as general information about the loop as a whole, which is
74 recorded in a "loop_vec_info" struct attached to each loop.
78 The loop transformation phase scans all the stmts in the loop, and
79 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
80 the loop that needs to be vectorized. It insert the vector code sequence
81 just before the scalar stmt S, and records a pointer to the vector code
82 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
83 attached to S). This pointer will be used for the vectorization of following
84 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
85 otherwise, we rely on dead code elimination for removing it.
87 For example, say stmt S1 was vectorized into stmt VS1:
90 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
93 To vectorize stmt S2, the vectorizer first finds the stmt that defines
94 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
95 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
96 resulting sequence would be:
99 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
101 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
103 Operands that are not SSA_NAMEs, are data-refs that appear in
104 load/store operations (like 'x[i]' in S1), and are handled differently.
108 Currently the only target specific information that is used is the
109 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
110 support different sizes of vectors, for now will need to specify one value
111 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
113 Since we only vectorize operations which vector form can be
114 expressed using existing tree codes, to verify that an operation is
115 supported, the vectorizer checks the relevant optab at the relevant
116 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
117 the value found is CODE_FOR_nothing, then there's no target support, and
118 we can't vectorize the stmt.
120 For additional information on this project see:
121 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
126 #include "coretypes.h"
134 #include "basic-block.h"
135 #include "diagnostic.h"
136 #include "tree-flow.h"
137 #include "tree-dump.h"
140 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
149 /* Main analysis functions. */
150 static loop_vec_info
vect_analyze_loop (struct loop
*);
151 static loop_vec_info
vect_analyze_loop_form (struct loop
*);
152 static bool vect_analyze_data_refs (loop_vec_info
);
153 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info
);
154 static bool vect_analyze_scalar_cycles (loop_vec_info
);
155 static bool vect_analyze_data_ref_accesses (loop_vec_info
);
156 static bool vect_analyze_data_refs_alignment (loop_vec_info
);
157 static void vect_compute_data_refs_alignment (loop_vec_info
);
158 static bool vect_analyze_operations (loop_vec_info
);
160 /* Main code transformation functions. */
161 static void vect_transform_loop (loop_vec_info
, struct loops
*);
162 static void vect_transform_loop_bound (loop_vec_info
);
163 static bool vect_transform_stmt (tree
, block_stmt_iterator
*);
164 static bool vectorizable_load (tree
, block_stmt_iterator
*, tree
*);
165 static bool vectorizable_store (tree
, block_stmt_iterator
*, tree
*);
166 static bool vectorizable_operation (tree
, block_stmt_iterator
*, tree
*);
167 static bool vectorizable_assignment (tree
, block_stmt_iterator
*, tree
*);
168 static void vect_align_data_ref (tree
);
169 static void vect_enhance_data_refs_alignment (loop_vec_info
);
171 /* Utility functions for the analyses. */
172 static bool vect_is_simple_use (tree
, struct loop
*, tree
*);
173 static bool exist_non_indexing_operands_for_use_p (tree
, tree
);
174 static bool vect_is_simple_iv_evolution (unsigned, tree
, tree
*, tree
*, bool);
175 static void vect_mark_relevant (varray_type
, tree
);
176 static bool vect_stmt_relevant_p (tree
, loop_vec_info
);
177 static tree
vect_get_loop_niters (struct loop
*, HOST_WIDE_INT
*);
178 static void vect_compute_data_ref_alignment
179 (struct data_reference
*, loop_vec_info
);
180 static bool vect_analyze_data_ref_access (struct data_reference
*);
181 static bool vect_get_first_index (tree
, tree
*);
182 static bool vect_can_force_dr_alignment_p (tree
, unsigned int);
183 static tree
vect_get_base_decl_and_bit_offset (tree
, tree
*);
184 static struct data_reference
* vect_analyze_pointer_ref_access (tree
, tree
, bool);
186 /* Utility functions for the code transformation. */
187 static tree
vect_create_destination_var (tree
, tree
);
188 static tree
vect_create_data_ref (tree
, block_stmt_iterator
*);
189 static tree
vect_create_index_for_array_ref (tree
, block_stmt_iterator
*);
190 static tree
get_vectype_for_scalar_type (tree
);
191 static tree
vect_get_new_vect_var (tree
, enum vect_var_kind
, const char *);
192 static tree
vect_get_vec_def_for_operand (tree
, tree
);
193 static tree
vect_init_vector (tree
, tree
);
194 static void vect_finish_stmt_generation
195 (tree stmt
, tree vec_stmt
, block_stmt_iterator
*bsi
);
197 /* Utilities for creation and deletion of vec_info structs. */
198 loop_vec_info
new_loop_vec_info (struct loop
*loop
);
199 void destroy_loop_vec_info (loop_vec_info
);
200 stmt_vec_info
new_stmt_vec_info (tree stmt
, struct loop
*loop
);
202 static bool vect_debug_stats (struct loop
*loop
);
203 static bool vect_debug_details (struct loop
*loop
);
206 /* Function new_stmt_vec_info.
208 Create and initialize a new stmt_vec_info struct for STMT. */
211 new_stmt_vec_info (tree stmt
, struct loop
*loop
)
214 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
216 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
217 STMT_VINFO_STMT (res
) = stmt
;
218 STMT_VINFO_LOOP (res
) = loop
;
219 STMT_VINFO_RELEVANT_P (res
) = 0;
220 STMT_VINFO_VECTYPE (res
) = NULL
;
221 STMT_VINFO_VEC_STMT (res
) = NULL
;
222 STMT_VINFO_DATA_REF (res
) = NULL
;
223 STMT_VINFO_MEMTAG (res
) = NULL
;
229 /* Function new_loop_vec_info.
231 Create and initialize a new loop_vec_info struct for LOOP, as well as
232 stmt_vec_info structs for all the stmts in LOOP. */
235 new_loop_vec_info (struct loop
*loop
)
239 block_stmt_iterator si
;
242 res
= (loop_vec_info
) xcalloc (1, sizeof (struct _loop_vec_info
));
244 bbs
= get_loop_body (loop
);
246 /* Create stmt_info for all stmts in the loop. */
247 for (i
= 0; i
< loop
->num_nodes
; i
++)
249 basic_block bb
= bbs
[i
];
250 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
252 tree stmt
= bsi_stmt (si
);
255 get_stmt_operands (stmt
);
256 ann
= stmt_ann (stmt
);
257 set_stmt_info (ann
, new_stmt_vec_info (stmt
, loop
));
261 LOOP_VINFO_LOOP (res
) = loop
;
262 LOOP_VINFO_BBS (res
) = bbs
;
263 LOOP_VINFO_EXIT_COND (res
) = NULL
;
264 LOOP_VINFO_NITERS (res
) = -1;
265 LOOP_VINFO_VECTORIZABLE_P (res
) = 0;
266 LOOP_VINFO_VECT_FACTOR (res
) = 0;
267 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res
), 20,
268 "loop_write_datarefs");
269 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res
), 20,
270 "loop_read_datarefs");
275 /* Function destroy_loop_vec_info.
277 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
278 stmts in the loop. */
281 destroy_loop_vec_info (loop_vec_info loop_vinfo
)
286 block_stmt_iterator si
;
292 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
294 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
295 nbbs
= loop
->num_nodes
;
297 for (j
= 0; j
< nbbs
; j
++)
299 basic_block bb
= bbs
[j
];
300 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
302 tree stmt
= bsi_stmt (si
);
303 stmt_ann_t ann
= stmt_ann (stmt
);
304 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
306 set_stmt_info (ann
, NULL
);
310 free (LOOP_VINFO_BBS (loop_vinfo
));
311 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo
));
312 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo
));
318 /* Function debug_loop_stats.
320 For vectorization statistics dumps. */
323 vect_debug_stats (struct loop
*loop
)
326 block_stmt_iterator si
;
327 tree node
= NULL_TREE
;
329 if (!dump_file
|| !(dump_flags
& TDF_STATS
))
334 fprintf (dump_file
, "\n");
343 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
345 node
= bsi_stmt (si
);
346 if (node
&& EXPR_P (node
) && EXPR_LOCUS (node
))
350 if (node
&& EXPR_P (node
) && EXPR_LOCUS (node
)
351 && EXPR_FILENAME (node
) && EXPR_LINENO (node
))
353 fprintf (dump_file
, "\nloop at %s:%d: ",
354 EXPR_FILENAME (node
), EXPR_LINENO (node
));
362 /* Function debug_loop_details.
364 For vectorization debug dumps. */
367 vect_debug_details (struct loop
*loop
)
370 block_stmt_iterator si
;
371 tree node
= NULL_TREE
;
373 if (!dump_file
|| !(dump_flags
& TDF_DETAILS
))
378 fprintf (dump_file
, "\n");
387 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
389 node
= bsi_stmt (si
);
390 if (node
&& EXPR_P (node
) && EXPR_LOCUS (node
))
394 if (node
&& EXPR_P (node
) && EXPR_LOCUS (node
)
395 && EXPR_FILENAME (node
) && EXPR_LINENO (node
))
397 fprintf (dump_file
, "\nloop at %s:%d: ",
398 EXPR_FILENAME (node
), EXPR_LINENO (node
));
406 /* THIS IS A COPY OF THE FUNCTION IN TREE-SSA-IVOPTS.C, MODIFIED
407 TO NOT USE FORCE_GIMPLE_OPERAND. When that function is accepted
408 into he mainline, This function can go away and be replaced by it.
409 Creates an induction variable with value BASE + STEP * iteration in
410 LOOP. It is expected that neither BASE nor STEP are shared with
411 other expressions (unless the sharing rules allow this). Use VAR
412 as a base var_decl for it (if NULL, a new temporary will be
413 created). The increment will occur at INCR_POS (after it if AFTER
414 is true, before it otherwise). The ssa versions of the variable
415 before and after increment will be stored in VAR_BEFORE and
416 VAR_AFTER (unless they are NULL). */
419 vect_create_iv_simple (tree base
, tree step
, tree var
, struct loop
*loop
,
420 block_stmt_iterator
*incr_pos
, bool after
,
421 tree
*var_before
, tree
*var_after
)
423 tree stmt
, stmts
, initial
;
429 var
= create_tmp_var (TREE_TYPE (base
), "ivtmp");
430 add_referenced_tmp_var (var
);
433 vb
= make_ssa_name (var
, build_empty_stmt ());
436 va
= make_ssa_name (var
, build_empty_stmt ());
440 stmt
= build (MODIFY_EXPR
, void_type_node
, va
,
441 build (PLUS_EXPR
, TREE_TYPE (base
), vb
, step
));
442 SSA_NAME_DEF_STMT (va
) = stmt
;
444 bsi_insert_after (incr_pos
, stmt
, BSI_NEW_STMT
);
446 bsi_insert_before (incr_pos
, stmt
, BSI_NEW_STMT
);
448 /* Our base is always a GIMPLE variable, thus, we don't need to
449 force_gimple_operand it. */
453 edge pe
= loop_preheader_edge (loop
);
454 bsi_insert_on_edge (pe
, stmts
);
457 stmt
= create_phi_node (vb
, loop
->header
);
458 SSA_NAME_DEF_STMT (vb
) = stmt
;
459 add_phi_arg (&stmt
, initial
, loop_preheader_edge (loop
));
460 add_phi_arg (&stmt
, va
, loop_latch_edge (loop
));
464 /* Function vect_get_base_decl_and_bit_offset
466 Get the decl from which the data reference REF is based,
467 and compute the OFFSET from it in bits on the way.
468 FORNOW: Handle only component-refs that consist of
469 VAR_DECLs (no ARRAY_REF or INDIRECT_REF). */
472 vect_get_base_decl_and_bit_offset (tree ref
, tree
*offset
)
475 if (TREE_CODE (ref
) == VAR_DECL
)
478 if (TREE_CODE (ref
) == COMPONENT_REF
)
481 tree oprnd0
= TREE_OPERAND (ref
, 0);
482 tree oprnd1
= TREE_OPERAND (ref
, 1);
484 this_offset
= bit_position (oprnd1
);
485 if (!host_integerp (this_offset
,1))
488 decl
= vect_get_base_decl_and_bit_offset (oprnd0
, offset
);
492 *offset
= int_const_binop (PLUS_EXPR
, *offset
, this_offset
, 1);
494 if (!host_integerp (*offset
,1) || TREE_OVERFLOW (*offset
))
497 if (vect_debug_details (NULL
))
499 print_generic_expr (dump_file
, ref
, TDF_SLIM
);
500 fprintf (dump_file
, " --> total offset for ref: ");
501 print_generic_expr (dump_file
, *offset
, TDF_SLIM
);
508 /* TODO: extend to handle more cases. */
513 /* Function vect_force_dr_alignment_p.
515 Returns whether the alignment of a DECL can be forced to be aligned
516 on ALIGNMENT bit boundary. */
519 vect_can_force_dr_alignment_p (tree decl
, unsigned int alignment
)
521 if (TREE_CODE (decl
) != VAR_DECL
)
524 if (DECL_EXTERNAL (decl
))
527 if (TREE_STATIC (decl
))
528 return (alignment
<= MAX_OFILE_ALIGNMENT
);
530 /* This is not 100% correct. The absolute correct stack alignment
531 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
532 PREFERRED_STACK_BOUNDARY is honored by all translation units.
533 However, until someone implements forced stack alignment, SSE
534 isn't really usable without this. */
535 return (alignment
<= PREFERRED_STACK_BOUNDARY
);
539 /* Function vect_get_new_vect_var.
541 Returns a name for a new variable. The current naming scheme appends the
542 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
543 the name of vectorizer generated variables, and appends that to NAME if
547 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
553 if (var_kind
== vect_simple_var
)
558 prefix_len
= strlen (prefix
);
561 new_vect_var
= create_tmp_var (type
, concat (prefix
, name
, NULL
));
563 new_vect_var
= create_tmp_var (type
, prefix
);
569 /* Function create_index_for_array_ref.
571 Create (and return) an index variable, along with it's update chain in the
572 loop. This variable will be used to access a memory location in a vector
576 STMT: The stmt that contains a memory data-ref.
577 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
578 function can be added here, or in the loop pre-header.
580 FORNOW: We are only handling array accesses with step 1. */
583 vect_create_index_for_array_ref (tree stmt
, block_stmt_iterator
*bsi
)
585 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
586 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
587 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
588 tree expr
= DR_REF (dr
);
591 loop_vec_info loop_info
= loop
->aux
;
592 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_info
);
594 tree array_first_index
;
595 tree indx_before_incr
, indx_after_incr
;
596 int loopnum
= loop
->num
;
598 #ifdef ENABLE_CHECKING
599 varray_type access_fns
= DR_ACCESS_FNS (dr
);
601 /* FORNOW: handling only one dimensional arrays. */
602 if (VARRAY_ACTIVE_SIZE (access_fns
) != 1)
605 if (!vectorization_factor
)
609 access_fn
= DR_ACCESS_FN (dr
, 0);
610 ok
= vect_is_simple_iv_evolution (loopnum
, access_fn
, &init
, &step
, true)
611 && vect_get_first_index (expr
, &array_first_index
);
613 #ifdef ENABLE_CHECKING
617 /* FORNOW: Handling only constant 'init'. */
618 if (TREE_CODE (init
) != INTEGER_CST
)
622 vf
= build_int_cst (unsigned_type_node
, vectorization_factor
, 0);
624 if (vect_debug_details (NULL
))
626 fprintf (dump_file
, "int vf = %d",vectorization_factor
);
627 fprintf (dump_file
, ", vf:");
628 print_generic_expr (dump_file
, vf
, TDF_SLIM
);
629 fprintf (dump_file
, ", init:");
630 print_generic_expr (dump_file
, init
, TDF_SLIM
);
631 fprintf (dump_file
, ", array_first_index:");
632 print_generic_expr (dump_file
, array_first_index
, TDF_SLIM
);
635 /* Calculate the 'init' of the new index.
636 init = (init - array_first_index) / vectorization_factor */
637 init
= int_const_binop (TRUNC_DIV_EXPR
,
638 int_const_binop (MINUS_EXPR
, init
, array_first_index
, 1),
641 /* Calculate the 'step' of the new index. FORNOW: always 1. */
642 step
= size_one_node
;
644 if (vect_debug_details (NULL
))
646 fprintf (dump_file
, "create iv for (");
647 print_generic_expr (dump_file
, init
, TDF_SLIM
);
648 fprintf (dump_file
, ", + ,");
649 print_generic_expr (dump_file
, step
, TDF_SLIM
);
650 fprintf (dump_file
, ")");
653 /* both init and step are guaranted to be gimple expressions,
654 so we can use vect_create_iv_simple. */
655 vect_create_iv_simple (init
, step
, NULL
, loop
, bsi
, false,
656 &indx_before_incr
, &indx_after_incr
);
658 return indx_before_incr
;
662 /* Function get_vectype_for_scalar_type.
664 Returns the vector type corresponding to SCALAR_TYPE as supported
668 get_vectype_for_scalar_type (tree scalar_type
)
670 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
671 int nbytes
= GET_MODE_SIZE (inner_mode
);
677 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
679 nunits
= UNITS_PER_SIMD_WORD
/ nbytes
;
681 return build_vector_type (scalar_type
, nunits
);
685 /* Function vect_align_data_ref.
687 Handle mislignment of a memory accesses.
689 FORNOW: Can't handle misaligned accesses.
690 Make sure that the dataref is aligned. */
693 vect_align_data_ref (tree stmt
)
695 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
696 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
698 /* FORNOW: can't handle misaligned accesses;
699 all accesses expected to be aligned. */
700 if (!aligned_access_p (dr
))
705 /* Function vect_create_data_ref.
707 Create a memory reference expression for vector access, to be used in a
708 vector load/store stmt.
711 STMT: a stmt that references memory. expected to be of the form
712 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
713 BSI: block_stmt_iterator where new stmts can be added.
716 1. Declare a new ptr to vector_type, and have it point to the array base.
717 For example, for vector of type V8HI:
720 2. Create a data-reference based on the new vector pointer p0, and using
721 a new index variable 'idx'. Return the expression '(*p0)[idx]'.
723 FORNOW: handle only aligned and consecutive accesses. */
726 vect_create_data_ref (tree stmt
, block_stmt_iterator
*bsi
)
733 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
734 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
738 v_may_def_optype v_may_defs
= STMT_V_MAY_DEF_OPS (stmt
);
739 v_must_def_optype v_must_defs
= STMT_V_MUST_DEF_OPS (stmt
);
740 vuse_optype vuses
= STMT_VUSE_OPS (stmt
);
741 int nvuses
, nv_may_defs
, nv_must_defs
;
743 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
745 tree base_addr
= NULL_TREE
;
746 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
750 tree scalar_ptr_type
;
752 /* FORNOW: make sure the data reference is aligned. */
753 vect_align_data_ref (stmt
);
755 addr_ref
= DR_BASE_NAME (dr
);
757 array_type
= build_array_type (vectype
, 0);
758 TYPE_ALIGN (array_type
) = TYPE_ALIGN (TREE_TYPE (addr_ref
));
759 vect_ptr_type
= build_pointer_type (array_type
);
760 scalar_ptr_type
= build_pointer_type (TREE_TYPE (addr_ref
));
762 if (vect_debug_details (NULL
))
764 fprintf (dump_file
, "create array_ref of type: ");
765 print_generic_expr (dump_file
, vectype
, TDF_SLIM
);
768 /*** create: vectype_array *p; ***/
769 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
770 get_name (addr_ref
));
771 add_referenced_tmp_var (vect_ptr
);
773 #ifdef ENABLE_CHECKING
774 if (TREE_CODE (addr_ref
) != VAR_DECL
775 && TREE_CODE (addr_ref
) != COMPONENT_REF
776 && TREE_CODE (addr_ref
) != SSA_NAME
)
780 if (vect_debug_details (NULL
))
782 if (TREE_CODE (addr_ref
) == VAR_DECL
)
783 fprintf (dump_file
, "vectorizing an array ref: ");
784 else if (TREE_CODE (addr_ref
) == SSA_NAME
)
785 fprintf (dump_file
, "vectorizing a pointer ref: ");
786 else if (TREE_CODE (addr_ref
) == COMPONENT_REF
)
787 fprintf (dump_file
, "vectorizing a record ref: ");
788 print_generic_expr (dump_file
, addr_ref
, TDF_SLIM
);
791 /* Get base address: */
792 if (TREE_CODE (addr_ref
) == SSA_NAME
)
793 base_addr
= addr_ref
;
795 base_addr
= build_fold_addr_expr (addr_ref
);
797 /* Handle aliasing: */
798 tag
= STMT_VINFO_MEMTAG (stmt_info
);
799 #ifdef ENABLE_CHECKING
803 get_var_ann (vect_ptr
)->type_mem_tag
= tag
;
805 /* Mark for renaming all aliased variables
806 (i.e, the may-aliases of the type-mem-tag) */
807 nvuses
= NUM_VUSES (vuses
);
808 nv_may_defs
= NUM_V_MAY_DEFS (v_may_defs
);
809 nv_must_defs
= NUM_V_MUST_DEFS (v_must_defs
);
810 for (i
= 0; i
< nvuses
; i
++)
812 tree use
= VUSE_OP (vuses
, i
);
813 if (TREE_CODE (use
) == SSA_NAME
)
814 bitmap_set_bit (vars_to_rename
, var_ann (SSA_NAME_VAR (use
))->uid
);
816 for (i
= 0; i
< nv_may_defs
; i
++)
818 tree def
= V_MAY_DEF_RESULT (v_may_defs
, i
);
819 if (TREE_CODE (def
) == SSA_NAME
)
820 bitmap_set_bit (vars_to_rename
, var_ann (SSA_NAME_VAR (def
))->uid
);
822 for (i
= 0; i
< nv_must_defs
; i
++)
824 tree def
= V_MUST_DEF_OP (v_must_defs
, i
);
825 if (TREE_CODE (def
) == SSA_NAME
)
826 bitmap_set_bit (vars_to_rename
, var_ann (SSA_NAME_VAR (def
))->uid
);
829 pe
= loop_preheader_edge (loop
);
831 /*** create: p = (vectype *)&a; ***/
834 addr_expr
= vect_get_new_vect_var (scalar_ptr_type
, vect_pointer_var
,
835 get_name (addr_ref
));
836 add_referenced_tmp_var (addr_expr
);
837 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, addr_expr
, base_addr
);
838 new_temp
= make_ssa_name (addr_expr
, vec_stmt
);
839 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
840 bsi_insert_on_edge (pe
, vec_stmt
);
842 /* vect_ptr = (vectype_array *)&a; */
843 vec_stmt
= fold_convert (vect_ptr_type
, new_temp
);
844 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, vect_ptr
, vec_stmt
);
845 new_temp
= make_ssa_name (vect_ptr
, vec_stmt
);
846 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
847 bsi_insert_on_edge (pe
, vec_stmt
);
849 /*** create data ref: '(*p)[idx]' ***/
851 idx
= vect_create_index_for_array_ref (stmt
, bsi
);
853 new_base
= build_fold_indirect_ref (new_temp
);
854 data_ref
= build4 (ARRAY_REF
, vectype
, new_base
, idx
, NULL_TREE
, NULL_TREE
);
856 if (vect_debug_details (NULL
))
858 fprintf (dump_file
, "created new data-ref: ");
859 print_generic_expr (dump_file
, data_ref
, TDF_SLIM
);
866 /* Function vect_create_destination_var.
868 Create a new temporary of type VECTYPE. */
871 vect_create_destination_var (tree scalar_dest
, tree vectype
)
874 const char *new_name
;
876 #ifdef ENABLE_CHECKING
877 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
881 new_name
= get_name (scalar_dest
);
884 vec_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, new_name
);
885 add_referenced_tmp_var (vec_dest
);
891 /* Function vect_init_vector.
893 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
894 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
895 used in the vectorization of STMT. */
898 vect_init_vector (tree stmt
, tree vector_var
)
900 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
901 struct loop
*loop
= STMT_VINFO_LOOP (stmt_vinfo
);
904 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
909 new_var
= vect_get_new_vect_var (vectype
, vect_simple_var
, "cst_");
910 add_referenced_tmp_var (new_var
);
912 init_stmt
= build2 (MODIFY_EXPR
, vectype
, new_var
, vector_var
);
913 new_temp
= make_ssa_name (new_var
, init_stmt
);
914 TREE_OPERAND (init_stmt
, 0) = new_temp
;
916 pe
= loop_preheader_edge (loop
);
917 bsi_insert_on_edge (pe
, init_stmt
);
919 if (vect_debug_details (NULL
))
921 fprintf (dump_file
, "created new init_stmt: ");
922 print_generic_expr (dump_file
, init_stmt
, TDF_SLIM
);
925 vec_oprnd
= TREE_OPERAND (init_stmt
, 0);
930 /* Function vect_get_vec_def_for_operand.
932 OP is an operand in STMT. This function returns a (vector) def that will be
933 used in the vectorized stmt for STMT.
935 In the case that OP is an SSA_NAME which is defined in the loop, then
936 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
938 In case OP is an invariant or constant, a new stmt that creates a vector def
939 needs to be introduced. */
942 vect_get_vec_def_for_operand (tree op
, tree stmt
)
947 stmt_vec_info def_stmt_info
= NULL
;
948 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
949 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
950 int nunits
= GET_MODE_NUNITS (TYPE_MODE (vectype
));
951 struct loop
*loop
= STMT_VINFO_LOOP (stmt_vinfo
);
958 if (vect_debug_details (NULL
))
960 fprintf (dump_file
, "vect_get_vec_def_for_operand: ");
961 print_generic_expr (dump_file
, op
, TDF_SLIM
);
964 /** ===> Case 1: operand is a constant. **/
966 if (TREE_CODE (op
) == INTEGER_CST
|| TREE_CODE (op
) == REAL_CST
)
968 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
971 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
972 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
973 int nunits
= GET_MODE_NUNITS (TYPE_MODE (vectype
));
977 /* Build a tree with vector elements. */
978 if (vect_debug_details (NULL
))
979 fprintf (dump_file
, "Create vector_cst. nunits = %d", nunits
);
981 for (i
= nunits
- 1; i
>= 0; --i
)
983 t
= tree_cons (NULL_TREE
, op
, t
);
985 vec_cst
= build_vector (vectype
, t
);
986 return vect_init_vector (stmt
, vec_cst
);
989 #ifdef ENABLE_CHECKING
990 if (TREE_CODE (op
) != SSA_NAME
)
994 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
996 def_stmt
= SSA_NAME_DEF_STMT (op
);
997 def_stmt_info
= vinfo_for_stmt (def_stmt
);
999 if (vect_debug_details (NULL
))
1001 fprintf (dump_file
, "vect_get_vec_def_for_operand: def_stmt: ");
1002 print_generic_expr (dump_file
, def_stmt
, TDF_SLIM
);
1006 /** ==> Case 2.1: operand is defined inside the loop. **/
1010 /* Get the def from the vectorized stmt. */
1012 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1013 #ifdef ENABLE_CHECKING
1017 vec_oprnd
= TREE_OPERAND (vec_stmt
, 0);
1022 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
1023 it is a reduction/induction. **/
1025 bb
= bb_for_stmt (def_stmt
);
1026 if (TREE_CODE (def_stmt
) == PHI_NODE
&& flow_bb_inside_loop_p (loop
, bb
))
1028 if (vect_debug_details (NULL
))
1029 fprintf (dump_file
, "reduction/induction - unsupported.");
1030 abort (); /* FORNOW no support for reduction/induction. */
1034 /** ==> Case 2.3: operand is defined outside the loop -
1035 it is a loop invariant. */
1037 switch (TREE_CODE (def_stmt
))
1040 def
= PHI_RESULT (def_stmt
);
1043 def
= TREE_OPERAND (def_stmt
, 0);
1046 def
= TREE_OPERAND (def_stmt
, 0);
1047 #ifdef ENABLE_CHECKING
1048 if (!IS_EMPTY_STMT (def_stmt
))
1054 if (vect_debug_details (NULL
))
1056 fprintf (dump_file
, "unsupported defining stmt: ");
1057 print_generic_expr (dump_file
, def_stmt
, TDF_SLIM
);
1062 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
1064 if (vect_debug_details (NULL
))
1065 fprintf (dump_file
, "Create vector_inv.");
1067 for (i
= nunits
- 1; i
>= 0; --i
)
1069 t
= tree_cons (NULL_TREE
, def
, t
);
1072 vec_inv
= build_constructor (vectype
, t
);
1073 return vect_init_vector (stmt
, vec_inv
);
1077 /* Function vect_finish_stmt_generation.
1079 Insert a new stmt. */
1082 vect_finish_stmt_generation (tree stmt
, tree vec_stmt
, block_stmt_iterator
*bsi
)
1084 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
1086 if (vect_debug_details (NULL
))
1088 fprintf (dump_file
, "add new stmt: ");
1089 print_generic_expr (dump_file
, vec_stmt
, TDF_SLIM
);
1092 /* Make sure bsi points to the stmt that is being vectorized. */
1094 /* Assumption: any stmts created for the vectorization of smtmt S are
1095 inserted before S. BSI may point to S or some new stmt before it. */
1097 while (stmt
!= bsi_stmt (*bsi
) && !bsi_end_p (*bsi
))
1099 #ifdef ENABLE_CHECKING
1100 if (stmt
!= bsi_stmt (*bsi
))
1106 /* Function vectorizable_assignment.
1108 Check if STMT performs an assignment (copy) that can be vectorized.
1109 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1110 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1111 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1114 vectorizable_assignment (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1120 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1121 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1122 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
1125 /* Is vectorizable assignment? */
1127 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1130 scalar_dest
= TREE_OPERAND (stmt
, 0);
1131 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
1134 op
= TREE_OPERAND (stmt
, 1);
1135 if (!vect_is_simple_use (op
, loop
, NULL
))
1137 if (vect_debug_details (NULL
))
1138 fprintf (dump_file
, "use not simple.");
1142 if (!vec_stmt
) /* transformation not required. */
1144 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
1149 if (vect_debug_details (NULL
))
1150 fprintf (dump_file
, "transform assignment.");
1153 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1156 op
= TREE_OPERAND (stmt
, 1);
1157 vec_oprnd
= vect_get_vec_def_for_operand (op
, stmt
);
1159 /* Arguments are ready. create the new vector stmt. */
1160 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, vec_oprnd
);
1161 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
1162 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
1163 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1169 /* Function vectorizable_operation.
1171 Check if STMT performs a binary or unary operation that can be vectorized.
1172 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1173 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1174 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1177 vectorizable_operation (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1182 tree op0
, op1
= NULL
;
1183 tree vec_oprnd0
, vec_oprnd1
=NULL
;
1184 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1185 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1186 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
1188 enum tree_code code
;
1189 enum machine_mode vec_mode
;
1195 /* Is STMT a vectorizable binary/unary operation? */
1196 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1199 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) != SSA_NAME
)
1202 operation
= TREE_OPERAND (stmt
, 1);
1203 code
= TREE_CODE (operation
);
1204 optab
= optab_for_tree_code (code
, vectype
);
1206 /* Support only unary or binary operations. */
1207 op_type
= TREE_CODE_LENGTH (code
);
1208 if (op_type
!= unary_op
&& op_type
!= binary_op
)
1210 if (vect_debug_details (NULL
))
1211 fprintf (dump_file
, "num. args = %d (not unary/binary op).", op_type
);
1215 for (i
= 0; i
< op_type
; i
++)
1217 op
= TREE_OPERAND (operation
, i
);
1218 if (!vect_is_simple_use (op
, loop
, NULL
))
1220 if (vect_debug_details (NULL
))
1221 fprintf (dump_file
, "use not simple.");
1226 /* Supportable by target? */
1229 if (vect_debug_details (NULL
))
1230 fprintf (dump_file
, "no optab.");
1233 vec_mode
= TYPE_MODE (vectype
);
1234 if (optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
1236 if (vect_debug_details (NULL
))
1237 fprintf (dump_file
, "op not supported by target.");
1241 if (!vec_stmt
) /* transformation not required. */
1243 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
1249 if (vect_debug_details (NULL
))
1250 fprintf (dump_file
, "transform binary/unary operation.");
1253 scalar_dest
= TREE_OPERAND (stmt
, 0);
1254 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1257 op0
= TREE_OPERAND (operation
, 0);
1258 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
1260 if (op_type
== binary_op
)
1262 op1
= TREE_OPERAND (operation
, 1);
1263 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
1266 /* Arguments are ready. create the new vector stmt. */
1268 if (op_type
== binary_op
)
1269 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
1270 build2 (code
, vectype
, vec_oprnd0
, vec_oprnd1
));
1272 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
1273 build1 (code
, vectype
, vec_oprnd0
));
1274 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
1275 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
1276 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1282 /* Function vectorizable_store.
1284 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1286 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1287 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1288 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1291 vectorizable_store (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1297 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1298 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1299 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
1300 enum machine_mode vec_mode
;
1302 /* Is vectorizable store? */
1304 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1307 scalar_dest
= TREE_OPERAND (stmt
, 0);
1308 if (TREE_CODE (scalar_dest
) != ARRAY_REF
1309 && TREE_CODE (scalar_dest
) != INDIRECT_REF
)
1312 op
= TREE_OPERAND (stmt
, 1);
1313 if (!vect_is_simple_use (op
, loop
, NULL
))
1315 if (vect_debug_details (NULL
))
1316 fprintf (dump_file
, "use not simple.");
1320 vec_mode
= TYPE_MODE (vectype
);
1321 /* FORNOW. In some cases can vectorize even if data-type not supported
1322 (e.g. - array initialization with 0). */
1323 if (mov_optab
->handlers
[(int)vec_mode
].insn_code
== CODE_FOR_nothing
)
1326 if (!STMT_VINFO_DATA_REF (stmt_info
))
1329 if (!vec_stmt
) /* transformation not required. */
1331 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
1337 if (vect_debug_details (NULL
))
1338 fprintf (dump_file
, "transform store");
1340 /* Handle use - get the vectorized def from the defining stmt. */
1341 vec_oprnd1
= vect_get_vec_def_for_operand (op
, stmt
);
1344 data_ref
= vect_create_data_ref (stmt
, bsi
);
1346 /* Arguments are ready. create the new vector stmt. */
1347 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, data_ref
, vec_oprnd1
);
1348 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1354 /* vectorizable_load.
1356 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1358 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1359 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1360 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1363 vectorizable_load (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1366 tree vec_dest
= NULL
;
1367 tree data_ref
= NULL
;
1369 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1370 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1372 enum machine_mode vec_mode
;
1374 /* Is vectorizable load? */
1376 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1379 scalar_dest
= TREE_OPERAND (stmt
, 0);
1380 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
1383 op
= TREE_OPERAND (stmt
, 1);
1384 if (TREE_CODE (op
) != ARRAY_REF
&& TREE_CODE (op
) != INDIRECT_REF
)
1387 if (!STMT_VINFO_DATA_REF (stmt_info
))
1390 vec_mode
= TYPE_MODE (vectype
);
1391 /* FORNOW. In some cases can vectorize even if data-type not supported
1392 (e.g. - data copies). */
1393 if (mov_optab
->handlers
[(int)vec_mode
].insn_code
== CODE_FOR_nothing
)
1396 if (!vec_stmt
) /* transformation not required. */
1398 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
1404 if (vect_debug_details (NULL
))
1405 fprintf (dump_file
, "transform load.");
1408 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1411 op
= TREE_OPERAND (stmt
, 1);
1412 data_ref
= vect_create_data_ref (stmt
, bsi
);
1414 /* Arguments are ready. create the new vector stmt. */
1415 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
1416 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
1417 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
1418 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1424 /* Function vect_transform_stmt.
1426 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1429 vect_transform_stmt (tree stmt
, block_stmt_iterator
*bsi
)
1431 bool is_store
= false;
1432 tree vec_stmt
= NULL_TREE
;
1433 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1435 switch (STMT_VINFO_TYPE (stmt_info
))
1437 case op_vec_info_type
:
1438 if (!vectorizable_operation (stmt
, bsi
, &vec_stmt
))
1442 case assignment_vec_info_type
:
1443 if (!vectorizable_assignment (stmt
, bsi
, &vec_stmt
))
1447 case load_vec_info_type
:
1448 if (!vectorizable_load (stmt
, bsi
, &vec_stmt
))
1452 case store_vec_info_type
:
1453 if (!vectorizable_store (stmt
, bsi
, &vec_stmt
))
1458 if (vect_debug_details (NULL
))
1459 fprintf (dump_file
, "stmt not supported.");
1463 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
1469 /* Function vect_transform_loop_bound.
1471 Create a new exit condition for the loop. */
1474 vect_transform_loop_bound (loop_vec_info loop_vinfo
)
1476 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1477 edge exit_edge
= loop
->exit_edges
[0];
1478 block_stmt_iterator loop_exit_bsi
= bsi_last (exit_edge
->src
);
1479 tree indx_before_incr
, indx_after_incr
;
1480 tree orig_cond_expr
;
1481 HOST_WIDE_INT old_N
= 0;
1484 tree new_loop_bound
;
1488 #ifdef ENABLE_CHECKING
1489 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
1492 old_N
= LOOP_VINFO_NITERS (loop_vinfo
);
1493 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1495 #ifdef ENABLE_CHECKING
1497 assuming number-of-iterations divides by the vectorization factor. */
1502 orig_cond_expr
= LOOP_VINFO_EXIT_COND (loop_vinfo
);
1503 #ifdef ENABLE_CHECKING
1504 if (!orig_cond_expr
)
1507 if (orig_cond_expr
!= bsi_stmt (loop_exit_bsi
))
1510 /* both init and step are guaranted to be gimple expressions,
1511 so we can use vect_create_iv_simple. */
1512 vect_create_iv_simple (integer_zero_node
, integer_one_node
, NULL_TREE
, loop
,
1513 &loop_exit_bsi
, false, &indx_before_incr
, &indx_after_incr
);
1515 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
1516 to point to the exit condition. */
1517 bsi_next (&loop_exit_bsi
);
1518 if (bsi_stmt (loop_exit_bsi
) != orig_cond_expr
)
1521 /* new loop exit test: */
1522 lb_type
= TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr
, 0), 1));
1523 new_loop_bound
= build_int_cst (lb_type
, old_N
/vf
, 0);
1525 if (exit_edge
->flags
& EDGE_TRUE_VALUE
) /* 'then' edge exits the loop. */
1526 cond
= build2 (GE_EXPR
, boolean_type_node
, indx_after_incr
, new_loop_bound
);
1527 else /* 'then' edge loops back. */
1528 cond
= build2 (LT_EXPR
, boolean_type_node
, indx_after_incr
, new_loop_bound
);
1530 cond_stmt
= build3 (COND_EXPR
, TREE_TYPE (orig_cond_expr
), cond
,
1531 TREE_OPERAND (orig_cond_expr
, 1), TREE_OPERAND (orig_cond_expr
, 2));
1533 bsi_insert_before (&loop_exit_bsi
, cond_stmt
, BSI_SAME_STMT
);
1535 /* remove old loop exit test: */
1536 bsi_remove (&loop_exit_bsi
);
1538 if (vect_debug_details (NULL
))
1539 print_generic_expr (dump_file
, cond_stmt
, TDF_SLIM
);
1543 /* Function vect_transform_loop.
1545 The analysis phase has determined that the loop is vectorizable.
1546 Vectorize the loop - created vectorized stmts to replace the scalar
1547 stmts in the loop, and update the loop exit condition. */
1550 vect_transform_loop (loop_vec_info loop_vinfo
,
1551 struct loops
*loops ATTRIBUTE_UNUSED
)
1553 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1554 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1555 int nbbs
= loop
->num_nodes
;
1556 block_stmt_iterator si
;
1558 #ifdef ENABLE_CHECKING
1559 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1562 if (vect_debug_details (NULL
))
1563 fprintf (dump_file
, "\n<<vec_transform_loop>>\n");
1565 /* 1) Make sure the loop header has exactly two entries
1566 2) Make sure we have a preheader basic block. */
1568 if (!loop
->header
->pred
->pred_next
1569 || loop
->header
->pred
->pred_next
->pred_next
)
1572 loop_split_edge_with (loop_preheader_edge (loop
), NULL
);
1575 /* FORNOW: the vectorizer supports only loops which body consist
1576 of one basic block (header + empty latch). When the vectorizer will
1577 support more involved loop forms, the order by which the BBs are
1578 traversed need to be reconsidered. */
1580 for (i
= 0; i
< nbbs
; i
++)
1582 basic_block bb
= bbs
[i
];
1584 for (si
= bsi_start (bb
); !bsi_end_p (si
);)
1586 tree stmt
= bsi_stmt (si
);
1587 stmt_vec_info stmt_info
;
1589 #ifdef ENABLE_CHECKING
1593 if (vect_debug_details (NULL
))
1595 fprintf (dump_file
, "------>vectorizing statement: ");
1596 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
1598 stmt_info
= vinfo_for_stmt (stmt
);
1599 #ifdef ENABLE_CHECKING
1603 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1608 #ifdef ENABLE_CHECKING
1609 /* FORNOW: Verify that all stmts operate on the same number of
1610 units and no inner unrolling is necessary. */
1611 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1612 if (GET_MODE_NUNITS (TYPE_MODE (vectype
)) != vectorization_factor
)
1615 /* -------- vectorize statement ------------ */
1616 if (vect_debug_details (NULL
))
1617 fprintf (dump_file
, "transform statement.");
1619 is_store
= vect_transform_stmt (stmt
, &si
);
1622 /* free the attached stmt_vec_info and remove the stmt. */
1623 stmt_ann_t ann
= stmt_ann (stmt
);
1625 set_stmt_info (ann
, NULL
);
1634 vect_transform_loop_bound (loop_vinfo
);
1636 if (vect_debug_details (loop
))
1637 fprintf (dump_file
,"Success! loop vectorized.");
1638 if (vect_debug_stats (loop
))
1639 fprintf (dump_file
, "LOOP VECTORIZED.");
1643 /* Function vect_is_simple_use.
1646 LOOP - the loop that is being vectorized.
1647 OPERAND - operand of a stmt in LOOP.
1648 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1650 Returns whether a stmt with OPERAND can be vectorized.
1651 Supportable operands are constants, loop invariants, and operands that are
1652 defined by the current iteration of the loop. Unsupportable opernads are
1653 those that are defined by a previous iteration of the loop (as is the case
1654 in reduction/induction computations). */
1657 vect_is_simple_use (tree operand
, struct loop
*loop
, tree
*def
)
1665 if (TREE_CODE (operand
) == INTEGER_CST
|| TREE_CODE (operand
) == REAL_CST
)
1668 if (TREE_CODE (operand
) != SSA_NAME
)
1671 def_stmt
= SSA_NAME_DEF_STMT (operand
);
1672 if (def_stmt
== NULL_TREE
)
1674 if (vect_debug_details (NULL
))
1675 fprintf (dump_file
, "no def_stmt.");
1679 /* empty stmt is expected only in case of a function argument.
1680 (Otherwise - we expect a phi_node or a modify_expr). */
1681 if (IS_EMPTY_STMT (def_stmt
))
1683 tree arg
= TREE_OPERAND (def_stmt
, 0);
1684 if (TREE_CODE (arg
) == INTEGER_CST
|| TREE_CODE (arg
) == REAL_CST
)
1686 if (vect_debug_details (NULL
))
1688 fprintf (dump_file
, "Unexpected empty stmt: ");
1689 print_generic_expr (dump_file
, def_stmt
, TDF_SLIM
);
1694 /* phi_node inside the loop indicates an induction/reduction pattern.
1695 This is not supported yet. */
1696 bb
= bb_for_stmt (def_stmt
);
1697 if (TREE_CODE (def_stmt
) == PHI_NODE
&& flow_bb_inside_loop_p (loop
, bb
))
1699 if (vect_debug_details (NULL
))
1700 fprintf (dump_file
, "reduction/induction - unsupported.");
1701 return false; /* FORNOW: not supported yet. */
1704 /* Expecting a modify_expr or a phi_node. */
1705 if (TREE_CODE (def_stmt
) == MODIFY_EXPR
1706 || TREE_CODE (def_stmt
) == PHI_NODE
)
1717 /* Function vect_analyze_operations.
1719 Scan the loop stmts and make sure they are all vectorizable. */
1722 vect_analyze_operations (loop_vec_info loop_vinfo
)
1724 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1725 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1726 int nbbs
= loop
->num_nodes
;
1727 block_stmt_iterator si
;
1728 int vectorization_factor
= 0;
1733 if (vect_debug_details (NULL
))
1734 fprintf (dump_file
, "\n<<vect_analyze_operations>>\n");
1736 for (i
= 0; i
< nbbs
; i
++)
1738 basic_block bb
= bbs
[i
];
1740 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
1742 tree stmt
= bsi_stmt (si
);
1744 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1747 if (vect_debug_details (NULL
))
1749 fprintf (dump_file
, "==> examining statement: ");
1750 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
1752 #ifdef ENABLE_CHECKING
1756 /* skip stmts which do not need to be vectorized.
1757 this is expected to include:
1758 - the COND_EXPR which is the loop exit condition
1759 - any LABEL_EXPRs in the loop
1760 - computations that are used only for array indexing or loop
1763 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1765 if (vect_debug_details (NULL
))
1766 fprintf (dump_file
, "irrelevant.");
1770 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))))
1772 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
1774 fprintf (dump_file
, "not vectorized: vector stmt in loop:");
1775 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
1780 if (STMT_VINFO_DATA_REF (stmt_info
))
1781 scalar_type
= TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info
)));
1782 else if (TREE_CODE (stmt
) == MODIFY_EXPR
)
1783 scalar_type
= TREE_TYPE (TREE_OPERAND (stmt
, 0));
1785 scalar_type
= TREE_TYPE (stmt
);
1787 if (vect_debug_details (NULL
))
1789 fprintf (dump_file
, "get vectype for scalar type: ");
1790 print_generic_expr (dump_file
, scalar_type
, TDF_SLIM
);
1793 vectype
= get_vectype_for_scalar_type (scalar_type
);
1796 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
1798 fprintf (dump_file
, "not vectorized: unsupported data-type ");
1799 print_generic_expr (dump_file
, scalar_type
, TDF_SLIM
);
1804 if (vect_debug_details (NULL
))
1806 fprintf (dump_file
, "vectype: ");
1807 print_generic_expr (dump_file
, vectype
, TDF_SLIM
);
1809 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
1811 ok
= (vectorizable_operation (stmt
, NULL
, NULL
)
1812 || vectorizable_assignment (stmt
, NULL
, NULL
)
1813 || vectorizable_load (stmt
, NULL
, NULL
)
1814 || vectorizable_store (stmt
, NULL
, NULL
));
1818 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
1820 fprintf (dump_file
, "not vectorized: stmt not supported: ");
1821 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
1826 nunits
= GET_MODE_NUNITS (TYPE_MODE (vectype
));
1827 if (vect_debug_details (NULL
))
1828 fprintf (dump_file
, "nunits = %d", nunits
);
1830 if (vectorization_factor
)
1832 /* FORNOW: don't allow mixed units.
1833 This restriction will be relaxed in the future. */
1834 if (nunits
!= vectorization_factor
)
1836 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
1837 fprintf (dump_file
, "not vectorized: mixed data-types");
1842 vectorization_factor
= nunits
;
1846 /* TODO: Analyze cost. Decide if worth while to vectorize. */
1847 if (!vectorization_factor
)
1849 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
1850 fprintf (dump_file
, "not vectorized: unsupported data-type");
1853 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
1855 /* FORNOW: handle only cases where the loop bound divides by the
1856 vectorization factor. */
1858 if (vect_debug_details (NULL
))
1860 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC
,
1861 vectorization_factor
, LOOP_VINFO_NITERS (loop_vinfo
));
1863 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
1865 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
1866 fprintf (dump_file
, "not vectorized: Unknown loop bound.");
1870 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1871 && LOOP_VINFO_NITERS (loop_vinfo
) % vectorization_factor
!= 0)
1873 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
1874 fprintf (dump_file
, "not vectorized: loop bound doesn't divided by %d.",
1875 vectorization_factor
);
1883 /* Function exist_non_indexing_operands_for_use_p
1885 USE is one of the uses attached to STMT. Check if USE is
1886 used in STMT for anything other than indexing an array. */
1889 exist_non_indexing_operands_for_use_p (tree use
, tree stmt
)
1892 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1894 /* USE corresponds to some operand in STMT. If there is no data
1895 reference in STMT, then any operand that corresponds to USE
1896 is not indexing an array. */
1897 if (!STMT_VINFO_DATA_REF (stmt_info
))
1900 /* STMT has a data_ref. FORNOW this means that its of one of
1901 the following forms:
1904 (This should have been verified in analyze_data_refs).
1906 'var' in the second case corresponds to a def, not a use,
1907 so USE cannot correspond to any operands that are not used
1910 Therefore, all we need to check is if STMT falls into the
1911 first case, and whether var corresponds to USE. */
1913 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) == SSA_NAME
)
1916 operand
= TREE_OPERAND (stmt
, 1);
1918 if (TREE_CODE (operand
) != SSA_NAME
)
1928 /* Function vect_is_simple_iv_evolution.
1930 FORNOW: A simple evolution of an induction variables in the loop is
1931 considered a polynomial evolution with constant step. */
1934 vect_is_simple_iv_evolution (unsigned loop_nb
, tree access_fn
, tree
* init
,
1935 tree
* step
, bool strict
)
1940 tree evolution_part
= evolution_part_in_loop_num (access_fn
, loop_nb
);
1942 /* When there is no evolution in this loop, the evolution function
1944 if (evolution_part
== NULL_TREE
)
1947 /* When the evolution is a polynomial of degree >= 2
1948 the evolution function is not "simple". */
1949 if (tree_is_chrec (evolution_part
))
1952 step_expr
= evolution_part
;
1953 init_expr
= initial_condition (access_fn
);
1955 if (vect_debug_details (NULL
))
1957 fprintf (dump_file
, "step: ");
1958 print_generic_expr (dump_file
, step_expr
, TDF_SLIM
);
1959 fprintf (dump_file
, ", init: ");
1960 print_generic_expr (dump_file
, init_expr
, TDF_SLIM
);
1966 if (TREE_CODE (step_expr
) != INTEGER_CST
)
1968 if (vect_debug_details (NULL
))
1969 fprintf (dump_file
, "step unknown.");
1974 if (!integer_onep (step_expr
))
1976 if (vect_debug_details (NULL
))
1977 print_generic_expr (dump_file
, step_expr
, TDF_SLIM
);
1985 /* Function vect_analyze_scalar_cycles.
1987 Examine the cross iteration def-use cycles of scalar variables, by
1988 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
1989 cycles that they represent do not impede vectorization.
1991 FORNOW: Reduction as in the following loop, is not supported yet:
1995 The cross-iteration cycle corresponding to variable 'sum' will be
1996 considered too complicated and will impede vectorization.
1998 FORNOW: Induction as in the following loop, is not supported yet:
2003 However, the following loop *is* vectorizable:
2008 In both loops there exists a def-use cycle for the variable i:
2009 loop: i_2 = PHI (i_0, i_1)
2014 The evolution of the above cycle is considered simple enough,
2015 however, we also check that the cycle does not need to be
2016 vectorized, i.e - we check that the variable that this cycle
2017 defines is only used for array indexing or in stmts that do not
2018 need to be vectorized. This is not the case in loop2, but it
2019 *is* the case in loop3. */
2022 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo
)
2025 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2026 basic_block bb
= loop
->header
;
2029 if (vect_debug_details (NULL
))
2030 fprintf (dump_file
, "\n<<vect_analyze_scalar_cycles>>\n");
2032 for (phi
= phi_nodes (bb
); phi
; phi
= TREE_CHAIN (phi
))
2034 tree access_fn
= NULL
;
2036 if (vect_debug_details (NULL
))
2038 fprintf (dump_file
, "Analyze phi: ");
2039 print_generic_expr (dump_file
, phi
, TDF_SLIM
);
2042 /* Skip virtual phi's. The data dependences that are associated with
2043 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2045 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
2047 if (vect_debug_details (NULL
))
2048 fprintf (dump_file
, "virtual phi. skip.");
2052 /* Analyze the evolution function. */
2054 /* FORNOW: The only scalar cross-iteration cycles that we allow are
2055 those of loop induction variables; This property is verified here.
2057 Furthermore, if that induction variable is used in an operation
2058 that needs to be vectorized (i.e, is not solely used to index
2059 arrays and check the exit condition) - we do not support its
2060 vectorization yet. This property is verified in vect_is_simple_use,
2061 during vect_analyze_operations. */
2063 access_fn
= instantiate_parameters
2065 analyze_scalar_evolution (loop
, PHI_RESULT (phi
)));
2069 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2070 fprintf (dump_file
, "not vectorized: unsupported scalar cycle.");
2074 if (vect_debug_details (NULL
))
2076 fprintf (dump_file
, "Access function of PHI: ");
2077 print_generic_expr (dump_file
, access_fn
, TDF_SLIM
);
2080 if (!vect_is_simple_iv_evolution (loop
->num
, access_fn
, &dummy
,
2083 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2084 fprintf (dump_file
, "not vectorized: unsupported scalar cycle.");
2093 /* Function vect_analyze_data_ref_dependence.
2095 Return TRUE if there (might) exist a dependence between a memory-reference
2096 DRA and a memory-reference DRB. */
2099 vect_analyze_data_ref_dependence (struct data_reference
*dra
,
2100 struct data_reference
*drb
,
2104 struct data_dependence_relation
*ddr
;
2106 if (!array_base_name_differ_p (dra
, drb
, &differ_p
))
2108 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2111 "not vectorized: can't determine dependence between: ");
2112 print_generic_expr (dump_file
, DR_REF (dra
), TDF_SLIM
);
2113 fprintf (dump_file
, " and ");
2114 print_generic_expr (dump_file
, DR_REF (drb
), TDF_SLIM
);
2122 ddr
= initialize_data_dependence_relation (dra
, drb
);
2123 compute_affine_dependence (ddr
);
2125 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
2128 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2131 "not vectorized: possible dependence between data-refs ");
2132 print_generic_expr (dump_file
, DR_REF (dra
), TDF_SLIM
);
2133 fprintf (dump_file
, " and ");
2134 print_generic_expr (dump_file
, DR_REF (drb
), TDF_SLIM
);
2141 /* Function vect_analyze_data_ref_dependences.
2143 Examine all the data references in the loop, and make sure there do not
2144 exist any data dependences between them.
2146 TODO: dependences which distance is greater than the vectorization factor
2150 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
)
2153 varray_type loop_write_refs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
2154 varray_type loop_read_refs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
2155 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2157 /* Examine store-store (output) dependences. */
2159 if (vect_debug_details (NULL
))
2160 fprintf (dump_file
, "\n<<vect_analyze_dependences>>\n");
2162 if (vect_debug_details (NULL
))
2163 fprintf (dump_file
, "compare all store-store pairs.");
2165 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_refs
); i
++)
2167 for (j
= i
+ 1; j
< VARRAY_ACTIVE_SIZE (loop_write_refs
); j
++)
2169 struct data_reference
*dra
=
2170 VARRAY_GENERIC_PTR (loop_write_refs
, i
);
2171 struct data_reference
*drb
=
2172 VARRAY_GENERIC_PTR (loop_write_refs
, j
);
2173 if (vect_analyze_data_ref_dependence (dra
, drb
, loop
))
2178 /* Examine load-store (true/anti) dependences. */
2180 if (vect_debug_details (NULL
))
2181 fprintf (dump_file
, "compare all load-store pairs.");
2183 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_refs
); i
++)
2185 for (j
= 0; j
< VARRAY_ACTIVE_SIZE (loop_write_refs
); j
++)
2187 struct data_reference
*dra
= VARRAY_GENERIC_PTR (loop_read_refs
, i
);
2188 struct data_reference
*drb
=
2189 VARRAY_GENERIC_PTR (loop_write_refs
, j
);
2190 if (vect_analyze_data_ref_dependence (dra
, drb
, loop
))
2199 /* Function vect_get_first_index.
2201 REF is a data reference.
2202 If it is an ARRAY_REF: if its lower bound is simple enough,
2203 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
2204 If it is not an ARRAY_REF: REF has no "first index";
2205 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
2208 vect_get_first_index (tree ref
, tree
*array_first_index
)
2212 if (TREE_CODE (ref
) != ARRAY_REF
)
2213 *array_first_index
= size_zero_node
;
2216 array_start
= array_ref_low_bound (ref
);
2217 if (!host_integerp (array_start
,0))
2219 if (vect_debug_details (NULL
))
2221 fprintf (dump_file
, "array min val not simple integer cst.");
2222 print_generic_expr (dump_file
, array_start
, TDF_DETAILS
);
2226 *array_first_index
= array_start
;
2233 /* Function vect_compute_data_ref_alignment
2235 Compute the misalignment of the data reference DR.
2237 FOR NOW: No analysis is actually performed. Misalignment is calculated
2238 only for trivial cases. TODO. */
2241 vect_compute_data_ref_alignment (struct data_reference
*dr
,
2242 loop_vec_info loop_vinfo ATTRIBUTE_UNUSED
)
2244 tree stmt
= DR_STMT (dr
);
2245 tree ref
= DR_REF (dr
);
2247 tree access_fn
= DR_ACCESS_FN (dr
, 0); /* FORNOW: single access_fn. */
2251 tree array_first_index
;
2252 tree array_base
= DR_BASE_NAME (dr
);
2253 tree base_decl
= NULL_TREE
;
2254 tree bit_offset
= size_zero_node
;
2255 tree offset
= size_zero_node
;
2256 tree unit_bits
= build_int_cst (unsigned_type_node
, BITS_PER_UNIT
, 0);
2260 if (vect_debug_details (NULL
))
2261 fprintf (dump_file
, "vect_compute_data_ref_alignment:");
2263 /* Initialize misalignment to unknown. */
2264 DR_MISALIGNMENT (dr
) = -1;
2266 scalar_type
= TREE_TYPE (ref
);
2267 vectype
= get_vectype_for_scalar_type (scalar_type
);
2270 if (vect_debug_details (NULL
))
2272 fprintf (dump_file
, "no vectype for stmt: ");
2273 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
2274 fprintf (dump_file
, "scalar_type: ");
2275 print_generic_expr (dump_file
, scalar_type
, TDF_DETAILS
);
2280 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (array_base
))) < TYPE_ALIGN (vectype
))
2282 base_decl
= vect_get_base_decl_and_bit_offset (array_base
, &bit_offset
);
2285 if (vect_debug_details (NULL
))
2286 fprintf (dump_file
, "Unknown alignment for access");
2290 offset
= int_const_binop (TRUNC_DIV_EXPR
, bit_offset
, unit_bits
, 1);
2291 bit_offset
= int_const_binop (TRUNC_MOD_EXPR
, bit_offset
, unit_bits
, 1);
2292 if (!integer_zerop (bit_offset
))
2294 if (vect_debug_details (NULL
))
2296 fprintf (dump_file
, "bit offset alignment: ");
2297 print_generic_expr (dump_file
, bit_offset
, TDF_SLIM
);
2303 (DECL_ALIGN (base_decl
) < TYPE_ALIGN (vectype
)
2304 && !vect_can_force_dr_alignment_p (base_decl
, TYPE_ALIGN (vectype
))))
2306 if (vect_debug_details (NULL
))
2308 fprintf (dump_file
, "can't force alignment of ref: ");
2309 print_generic_expr (dump_file
, array_base
, TDF_SLIM
);
2314 if (DECL_ALIGN (base_decl
) < TYPE_ALIGN (vectype
))
2316 /* Force the alignment of the decl.
2317 NOTE: This is the only change to the code we make during
2318 the analysis phase, before deciding to vectorize the loop. */
2319 if (vect_debug_details (NULL
))
2320 fprintf (dump_file
, "force alignment");
2321 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
2322 DECL_USER_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
2326 /* The misalignement is:
2327 (base_alignment + offset + index_access_fn_init) % alignment.
2328 At this point we already guaranteed that base_alignment == 0,
2329 and computed the offset.
2330 It remains to check the first index accessed. */
2332 if (!vect_get_first_index (ref
, &array_first_index
))
2334 if (vect_debug_details (NULL
))
2335 fprintf (dump_file
, "no first_index for array.");
2339 /* Check the index of the array_ref. */
2341 init
= initial_condition (access_fn
);
2343 /* FORNOW: In order to simplify the handling of alignment, we make sure
2344 that the first location at which the array is accessed ('init') is on an
2345 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
2346 This is too conservative, since we require that
2347 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
2348 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
2349 This should be relaxed in the future. */
2351 if (!init
|| !host_integerp (init
,0))
2353 if (vect_debug_details (NULL
))
2354 fprintf (dump_file
, "init not simple INTEGER_CST.");
2358 /* alignment required, in bytes: */
2359 alignment
= build_int_cst (unsigned_type_node
,
2360 TYPE_ALIGN (vectype
)/BITS_PER_UNIT
, 0);
2361 /* bytes per scalar element: */
2362 nunits
= build_int_cst (unsigned_type_node
,
2363 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), 0);
2365 /* misalign = (offset + (init-array_first_index)*nunits) % alignment */
2366 if (vect_debug_details (NULL
))
2368 fprintf (dump_file
, "misalign = ( offset <");
2369 print_generic_expr (dump_file
, offset
, TDF_SLIM
);
2370 fprintf (dump_file
, "> + (init <");
2371 print_generic_expr (dump_file
, init
, TDF_SLIM
);
2372 fprintf (dump_file
, "> - first_indx <");
2373 print_generic_expr (dump_file
, array_first_index
, TDF_SLIM
);
2374 fprintf (dump_file
, ">) * nunits <");
2375 print_generic_expr (dump_file
, nunits
, TDF_SLIM
);
2376 fprintf (dump_file
, ">) mod alignment <");
2377 print_generic_expr (dump_file
, alignment
, TDF_SLIM
);
2378 fprintf (dump_file
, ">");
2381 misalign
= int_const_binop (MINUS_EXPR
, init
, array_first_index
, 0);
2382 misalign
= int_const_binop (MULT_EXPR
, misalign
, nunits
, 0);
2383 misalign
= int_const_binop (PLUS_EXPR
, misalign
, offset
, 0);
2384 misalign
= int_const_binop (TRUNC_MOD_EXPR
, misalign
, alignment
, 0);
2386 if (vect_debug_details (NULL
))
2388 fprintf (dump_file
, "misalign = ");
2389 print_generic_expr (dump_file
, misalign
, TDF_SLIM
);
2392 if (!host_integerp (misalign
,1) || TREE_OVERFLOW (misalign
))
2394 if (vect_debug_details (NULL
))
2395 fprintf (dump_file
, "unexpected misalign value");
2399 DR_MISALIGNMENT (dr
) = tree_low_cst (misalign
,1);
2401 if (vect_debug_details (NULL
))
2402 fprintf (dump_file
, "misalign = %d",DR_MISALIGNMENT (dr
));
2406 /* Function vect_compute_data_refs_alignment
2408 Compute the misalignment of data references in the loop.
2409 This pass may take place at function granularity instead of at loop
2412 FOR NOW: No analysis is actually performed. Misalignment is calculated
2413 only for trivial cases. TODO. */
2416 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
)
2418 varray_type loop_write_datarefs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
2419 varray_type loop_read_datarefs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
2422 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
2424 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
2425 vect_compute_data_ref_alignment (dr
, loop_vinfo
);
2428 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_datarefs
); i
++)
2430 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_read_datarefs
, i
);
2431 vect_compute_data_ref_alignment (dr
, loop_vinfo
);
2436 /* Function vect_enhance_data_refs_alignment
2438 This pass will use loop versioning and loop peeling in order to enhance
2439 the alignment of data references in the loop.
2441 FOR NOW: we assume that whatever versioning/peeling takes place, only the
2442 original loop is to be vectorized; Any other loops that are created by
2443 the transformations performed in this pass - are not supposed to be
2444 vectorized. This restriction will be relaxed.
2446 FOR NOW: No transformation is actually performed. TODO. */
2449 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo ATTRIBUTE_UNUSED
)
2452 This pass will require a cost model to guide it whether to apply peeling
2453 or versioning or a combination of the two. For example, the scheme that
2454 intel uses when given a loop with several memory accesses, is as follows:
2455 choose one memory access ('p') which alignment you want to force by doing
2456 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
2457 other accesses are not necessarily aligned, or (2) use loop versioning to
2458 generate one loop in which all accesses are aligned, and another loop in
2459 which only 'p' is necessarily aligned.
2461 ("Automatic Intra-Register Vectorization for the Intel Architecture",
2462 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
2463 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
2465 Devising a cost model is the most critical aspect of this work. It will
2466 guide us on which access to peel for, whether to use loop versioning, how
2467 many versions to create, etc. The cost model will probably consist of
2468 generic considerations as well as target specific considerations (on
2469 powerpc for example, misaligned stores are more painful than misaligned
2472 Here is the general steps involved in alignment enhancements:
2474 -- original loop, before alignment analysis:
2475 for (i=0; i<N; i++){
2476 x = q[i]; # DR_MISALIGNMENT(q) = unknown
2477 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2480 -- After vect_compute_data_refs_alignment:
2481 for (i=0; i<N; i++){
2482 x = q[i]; # DR_MISALIGNMENT(q) = 3
2483 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2486 -- Possibility 1: we do loop versioning:
2488 for (i=0; i<N; i++){ # loop 1A
2489 x = q[i]; # DR_MISALIGNMENT(q) = 3
2490 p[i] = y; # DR_MISALIGNMENT(p) = 0
2494 for (i=0; i<N; i++){ # loop 1B
2495 x = q[i]; # DR_MISALIGNMENT(q) = 3
2496 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2500 -- Possibility 2: we do loop peeling:
2501 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2505 for (i = 3; i < N; i++){ # loop 2A
2506 x = q[i]; # DR_MISALIGNMENT(q) = 0
2507 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2510 -- Possibility 3: combination of loop peeling and versioning:
2511 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2516 for (i = 3; i<N; i++){ # loop 3A
2517 x = q[i]; # DR_MISALIGNMENT(q) = 0
2518 p[i] = y; # DR_MISALIGNMENT(p) = 0
2522 for (i = 3; i<N; i++){ # loop 3B
2523 x = q[i]; # DR_MISALIGNMENT(q) = 0
2524 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2528 These loops are later passed to loop_transform to be vectorized. The
2529 vectorizer will use the alignment information to guide the transformation
2530 (whether to generate regular loads/stores, or with special handling for
2536 /* Function vect_analyze_data_refs_alignment
2538 Analyze the alignment of the data-references in the loop.
2539 FOR NOW: Until support for misliagned accesses is in place, only if all
2540 accesses are aligned can the loop be vectorized. This restriction will be
2544 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
)
2546 varray_type loop_write_datarefs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
2547 varray_type loop_read_datarefs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
2550 if (vect_debug_details (NULL
))
2551 fprintf (dump_file
, "\n<<vect_analyze_data_refs_alignment>>\n");
2554 /* This pass may take place at function granularity instead of at loop
2557 vect_compute_data_refs_alignment (loop_vinfo
);
2560 /* This pass will use loop versioning and loop peeling in order to enhance
2561 the alignment of data references in the loop.
2562 FOR NOW: we assume that whatever versioning/peeling took place, the
2563 original loop is to be vectorized. Any other loops that were created by
2564 the transformations performed in this pass - are not supposed to be
2565 vectorized. This restriction will be relaxed. */
2567 vect_enhance_data_refs_alignment (loop_vinfo
);
2570 /* Finally, check that loop can be vectorized.
2571 FOR NOW: Until support for misaligned accesses is in place, only if all
2572 accesses are aligned can the loop be vectorized. This restriction will be
2575 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
2577 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
2578 if (!aligned_access_p (dr
))
2580 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo
))
2581 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo
)))
2582 fprintf (dump_file
, "not vectorized: unaligned store.");
2587 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_datarefs
); i
++)
2589 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_read_datarefs
, i
);
2590 if (!aligned_access_p (dr
))
2592 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo
))
2593 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo
)))
2594 fprintf (dump_file
, "not vectorized: unaligned load.");
2603 /* Function vect_analyze_data_ref_access.
2605 Analyze the access pattern of the data-reference DR. For now, a data access
2606 has to consecutive and aligned to be considered vectorizable. */
2609 vect_analyze_data_ref_access (struct data_reference
*dr
)
2611 varray_type access_fns
= DR_ACCESS_FNS (dr
);
2615 /* FORNOW: handle only one dimensional arrays.
2616 This restriction will be relaxed in the future. */
2617 if (VARRAY_ACTIVE_SIZE (access_fns
) != 1)
2619 if (vect_debug_details (NULL
))
2620 fprintf (dump_file
, "multi dimensional array reference.");
2623 access_fn
= DR_ACCESS_FN (dr
, 0);
2625 if (!vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr
))->num
,
2626 access_fn
, &init
, &step
, true))
2628 if (vect_debug_details (NULL
))
2630 fprintf (dump_file
, "too complicated access function.");
2631 print_generic_expr (dump_file
, access_fn
, TDF_SLIM
);
2640 /* Function vect_analyze_data_ref_accesses.
2642 Analyze the access pattern of all the data references in the loop.
2644 FORNOW: the only access pattern that is considered vectorizable is a
2645 simple step 1 (consecutive) access.
2647 FORNOW: handle only one dimensional arrays, and pointer accesses. */
2650 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
)
2653 varray_type loop_write_datarefs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
2654 varray_type loop_read_datarefs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
2656 if (vect_debug_details (NULL
))
2657 fprintf (dump_file
, "\n<<vect_analyze_data_ref_accesses>>\n");
2659 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
2661 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
2662 bool ok
= vect_analyze_data_ref_access (dr
);
2665 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo
))
2666 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo
)))
2667 fprintf (dump_file
, "not vectorized: complicated access pattern.");
2672 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_datarefs
); i
++)
2674 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_read_datarefs
, i
);
2675 bool ok
= vect_analyze_data_ref_access (dr
);
2678 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo
))
2679 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo
)))
2680 fprintf (dump_file
, "not vectorized: complicated access pattern.");
2689 /* Function vect_analyze_pointer_ref_access.
2692 STMT - a stmt that contains a data-ref
2693 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
2695 If the data-ref access is vectorizable, return a data_reference structure
2696 that represents it (DR). Otherwise - return NULL. */
2698 static struct data_reference
*
2699 vect_analyze_pointer_ref_access (tree memref
, tree stmt
, bool is_read
)
2701 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2702 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
2703 tree access_fn
= analyze_scalar_evolution (loop
, TREE_OPERAND (memref
, 0));
2706 tree reftype
, innertype
;
2707 enum machine_mode innermode
;
2708 tree indx_access_fn
;
2709 int loopnum
= loop
->num
;
2710 struct data_reference
*dr
;
2714 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2715 fprintf (dump_file
, "not vectorized: complicated pointer access.");
2719 if (vect_debug_details (NULL
))
2721 fprintf (dump_file
, "Access function of ptr: ");
2722 print_generic_expr (dump_file
, access_fn
, TDF_SLIM
);
2725 if (!vect_is_simple_iv_evolution (loopnum
, access_fn
, &init
, &step
, false))
2727 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2728 fprintf (dump_file
, "not vectorized: pointer access is not simple.");
2732 if (TREE_CODE (init
) != SSA_NAME
/* FORNOW */
2733 || !host_integerp (step
,0))
2735 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2737 "not vectorized: non constant init/step for pointer access.");
2741 step_val
= TREE_INT_CST_LOW (step
);
2743 reftype
= TREE_TYPE (TREE_OPERAND (memref
, 0));
2744 if (TREE_CODE (reftype
) != POINTER_TYPE
)
2746 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2747 fprintf (dump_file
, "not vectorized: unexpected pointer access form.");
2751 reftype
= TREE_TYPE (init
);
2752 if (TREE_CODE (reftype
) != POINTER_TYPE
)
2754 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2755 fprintf (dump_file
, "not vectorized: unexpected pointer access form.");
2759 innertype
= TREE_TYPE (reftype
);
2760 innermode
= TYPE_MODE (innertype
);
2761 if (GET_MODE_SIZE (innermode
) != step_val
)
2763 /* FORNOW: support only consecutive access */
2764 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2765 fprintf (dump_file
, "not vectorized: non consecutive access.");
2770 build_polynomial_chrec (loopnum
, integer_zero_node
, integer_one_node
);
2771 if (vect_debug_details (NULL
))
2773 fprintf (dump_file
, "Access function of ptr indx: ");
2774 print_generic_expr (dump_file
, indx_access_fn
, TDF_SLIM
);
2776 dr
= init_data_ref (stmt
, memref
, init
, indx_access_fn
, is_read
);
2781 /* Function vect_analyze_data_refs.
2783 Find all the data references in the loop.
2785 FORNOW: Handle aligned INDIRECT_REFs and one dimensional ARRAY_REFs
2786 which base is really an array (not a pointer) and which alignment
2787 can be forced. This restriction will be relaxed. */
2790 vect_analyze_data_refs (loop_vec_info loop_vinfo
)
2792 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2793 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2794 int nbbs
= loop
->num_nodes
;
2795 block_stmt_iterator si
;
2797 struct data_reference
*dr
;
2799 if (vect_debug_details (NULL
))
2800 fprintf (dump_file
, "\n<<vect_analyze_data_refs>>\n");
2802 for (j
= 0; j
< nbbs
; j
++)
2804 basic_block bb
= bbs
[j
];
2805 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
2807 bool is_read
= false;
2808 tree stmt
= bsi_stmt (si
);
2809 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2810 v_may_def_optype v_may_defs
= STMT_V_MAY_DEF_OPS (stmt
);
2811 v_must_def_optype v_must_defs
= STMT_V_MUST_DEF_OPS (stmt
);
2812 vuse_optype vuses
= STMT_VUSE_OPS (stmt
);
2813 varray_type
*datarefs
= NULL
;
2814 int nvuses
, nv_may_defs
, nv_must_defs
;
2819 /* Assumption: there exists a data-ref in stmt, if and only if
2820 it has vuses/vdefs. */
2822 if (!vuses
&& !v_may_defs
&& !v_must_defs
)
2825 nvuses
= NUM_VUSES (vuses
);
2826 nv_may_defs
= NUM_V_MAY_DEFS (v_may_defs
);
2827 nv_must_defs
= NUM_V_MUST_DEFS (v_must_defs
);
2829 if (nvuses
&& (nv_may_defs
|| nv_must_defs
))
2831 if (vect_debug_details (NULL
))
2833 fprintf (dump_file
, "unexpected vdefs and vuses in stmt: ");
2834 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
2839 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
2841 if (vect_debug_details (NULL
))
2843 fprintf (dump_file
, "unexpected vops in stmt: ");
2844 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
2851 memref
= TREE_OPERAND (stmt
, 1);
2852 datarefs
= &(LOOP_VINFO_DATAREF_READS (loop_vinfo
));
2857 memref
= TREE_OPERAND (stmt
, 0);
2858 datarefs
= &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo
));
2862 if (TREE_CODE (memref
) == INDIRECT_REF
)
2864 dr
= vect_analyze_pointer_ref_access (memref
, stmt
, is_read
);
2867 symbl
= DR_BASE_NAME (dr
);
2869 else if (TREE_CODE (memref
) == ARRAY_REF
)
2872 tree offset
= size_zero_node
;
2873 array_base
= TREE_OPERAND (memref
, 0);
2875 /* FORNOW: make sure that the array is one dimensional.
2876 This restriction will be relaxed in the future. */
2877 if (TREE_CODE (array_base
) == ARRAY_REF
)
2879 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2882 "not vectorized: multi-dimensional array.");
2883 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
2888 dr
= analyze_array (stmt
, memref
, is_read
);
2890 /* Find the relevant symbol for aliasing purposes. */
2891 base
= DR_BASE_NAME (dr
);
2892 switch (TREE_CODE (base
))
2897 /* FORNOW: Disabled.
2899 symbl = TREE_OPERAND (base, 0);
2903 /* CHECKME: could have recorded more accurate information -
2904 i.e, the actual FIELD_DECL that is being referenced -
2905 but later passes expect VAR_DECL as the nmt. */
2906 symbl
= vect_get_base_decl_and_bit_offset (base
, &offset
);
2911 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2914 "not vectorized: unhandled struct/class field access ");
2915 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
2922 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2924 fprintf (dump_file
, "not vectorized: unhandled data ref: ");
2925 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
2930 /* Find and record the memtag assigned to this data-ref. */
2931 if (TREE_CODE (symbl
) == VAR_DECL
)
2932 STMT_VINFO_MEMTAG (stmt_info
) = symbl
;
2933 else if (TREE_CODE (symbl
) == SSA_NAME
)
2936 symbl
= SSA_NAME_VAR (symbl
);
2937 tag
= get_var_ann (symbl
)->type_mem_tag
;
2940 tree ptr
= TREE_OPERAND (memref
, 0);
2941 if (TREE_CODE (ptr
) == SSA_NAME
)
2942 tag
= get_var_ann (SSA_NAME_VAR (ptr
))->type_mem_tag
;
2946 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2947 fprintf (dump_file
, "not vectorized: no memtag for ref.");
2950 STMT_VINFO_MEMTAG (stmt_info
) = tag
;
2954 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2956 fprintf (dump_file
, "not vectorized: unsupported data-ref: ");
2957 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
2962 VARRAY_PUSH_GENERIC_PTR (*datarefs
, dr
);
2963 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
2971 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2973 /* Function vect_mark_relevant.
2975 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2978 vect_mark_relevant (varray_type worklist
, tree stmt
)
2980 stmt_vec_info stmt_info
;
2982 if (vect_debug_details (NULL
))
2983 fprintf (dump_file
, "mark relevant.");
2985 if (TREE_CODE (stmt
) == PHI_NODE
)
2987 VARRAY_PUSH_TREE (worklist
, stmt
);
2991 stmt_info
= vinfo_for_stmt (stmt
);
2995 if (vect_debug_details (NULL
))
2997 fprintf (dump_file
, "mark relevant: no stmt info!!.");
2998 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
3003 if (STMT_VINFO_RELEVANT_P (stmt_info
))
3005 if (vect_debug_details (NULL
))
3006 fprintf (dump_file
, "already marked relevant.");
3010 STMT_VINFO_RELEVANT_P (stmt_info
) = 1;
3011 VARRAY_PUSH_TREE (worklist
, stmt
);
3015 /* Function vect_stmt_relevant_p.
3017 Return true if STMT in loop that is represented by LOOP_VINFO is
3018 "relevant for vectorization".
3020 A stmt is considered "relevant for vectorization" if:
3021 - it has uses outside the loop.
3022 - it has vdefs (it alters memory).
3023 - control stmts in the loop (except for the exit condition).
3025 CHECKME: what other side effects would the vectorizer allow? */
3028 vect_stmt_relevant_p (tree stmt
, loop_vec_info loop_vinfo
)
3030 v_may_def_optype v_may_defs
;
3031 v_must_def_optype v_must_defs
;
3032 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3037 /* cond stmt other than loop exit cond. */
3038 if (is_ctrl_stmt (stmt
) && (stmt
!= LOOP_VINFO_EXIT_COND (loop_vinfo
)))
3041 /* changing memory. */
3042 v_may_defs
= STMT_V_MAY_DEF_OPS (stmt
);
3043 v_must_defs
= STMT_V_MUST_DEF_OPS (stmt
);
3044 if (v_may_defs
|| v_must_defs
)
3046 if (vect_debug_details (NULL
))
3047 fprintf (dump_file
, "vec_stmt_relevant_p: stmt has vdefs.");
3051 /* uses outside the loop. */
3052 df
= get_immediate_uses (stmt
);
3053 num_uses
= num_immediate_uses (df
);
3054 for (i
= 0; i
< num_uses
; i
++)
3056 tree use
= immediate_use (df
, i
);
3057 basic_block bb
= bb_for_stmt (use
);
3058 if (!flow_bb_inside_loop_p (loop
, bb
))
3060 if (vect_debug_details (NULL
))
3061 fprintf (dump_file
, "vec_stmt_relevant_p: used out of loop.");
3070 /* Function vect_mark_stmts_to_be_vectorized.
3072 Not all stmts in the loop need to be vectorized. For example:
3081 Stmt 1 and 3 do not need to be vectorized, because loop control and
3082 addressing of vectorized data-refs are handled differently.
3084 This pass detects such stmts. */
3087 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
3089 varray_type worklist
;
3090 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3091 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3092 unsigned int nbbs
= loop
->num_nodes
;
3093 block_stmt_iterator si
;
3099 stmt_vec_info stmt_info
;
3101 if (vect_debug_details (NULL
))
3102 fprintf (dump_file
, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
3104 VARRAY_TREE_INIT (worklist
, 64, "work list");
3106 /* 1. Init worklist. */
3108 for (i
= 0; i
< nbbs
; i
++)
3110 basic_block bb
= bbs
[i
];
3111 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
3113 stmt
= bsi_stmt (si
);
3115 if (vect_debug_details (NULL
))
3117 fprintf (dump_file
, "init: stmt relevant? ");
3118 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
3121 stmt_info
= vinfo_for_stmt (stmt
);
3122 STMT_VINFO_RELEVANT_P (stmt_info
) = 0;
3124 if (vect_stmt_relevant_p (stmt
, loop_vinfo
))
3125 vect_mark_relevant (worklist
, stmt
);
3130 /* 2. Process_worklist */
3132 while (VARRAY_ACTIVE_SIZE (worklist
) > 0)
3134 stmt
= VARRAY_TOP_TREE (worklist
);
3135 VARRAY_POP (worklist
);
3137 if (vect_debug_details (NULL
))
3139 fprintf (dump_file
, "worklist: examine stmt: ");
3140 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
3143 /* Examine the USES in this statement. Mark all the statements which
3144 feed this statement's uses as "relevant", unless the USE is used as
3147 if (TREE_CODE (stmt
) == PHI_NODE
)
3149 /* follow the def-use chain inside the loop. */
3150 for (j
= 0; j
< PHI_NUM_ARGS (stmt
); j
++)
3152 tree arg
= PHI_ARG_DEF (stmt
, j
);
3153 tree def_stmt
= NULL_TREE
;
3155 if (!vect_is_simple_use (arg
, loop
, &def_stmt
))
3157 if (vect_debug_details (NULL
))
3158 fprintf (dump_file
, "worklist: unsupported use.");
3159 varray_clear (worklist
);
3165 if (vect_debug_details (NULL
))
3167 fprintf (dump_file
, "worklist: def_stmt: ");
3168 print_generic_expr (dump_file
, def_stmt
, TDF_SLIM
);
3171 bb
= bb_for_stmt (def_stmt
);
3172 if (flow_bb_inside_loop_p (loop
, bb
))
3173 vect_mark_relevant (worklist
, def_stmt
);
3177 ann
= stmt_ann (stmt
);
3178 use_ops
= USE_OPS (ann
);
3180 for (i
= 0; i
< NUM_USES (use_ops
); i
++)
3182 tree use
= USE_OP (use_ops
, i
);
3184 /* We are only interested in uses that need to be vectorized. Uses
3185 that are used for address computation are not considered relevant.
3187 if (exist_non_indexing_operands_for_use_p (use
, stmt
))
3189 tree def_stmt
= NULL_TREE
;
3191 if (!vect_is_simple_use (use
, loop
, &def_stmt
))
3193 if (vect_debug_details (NULL
))
3194 fprintf (dump_file
, "worklist: unsupported use.");
3195 varray_clear (worklist
);
3202 if (vect_debug_details (NULL
))
3204 fprintf (dump_file
, "worklist: examine use %d: ", i
);
3205 print_generic_expr (dump_file
, use
, TDF_SLIM
);
3208 bb
= bb_for_stmt (def_stmt
);
3209 if (flow_bb_inside_loop_p (loop
, bb
))
3210 vect_mark_relevant (worklist
, def_stmt
);
3213 } /* while worklist */
3215 varray_clear (worklist
);
3220 /* Function vect_get_loop_niters.
3222 Determine how many iterations the loop is executed. */
3225 vect_get_loop_niters (struct loop
*loop
, HOST_WIDE_INT
*number_of_iterations
)
3229 if (vect_debug_details (NULL
))
3230 fprintf (dump_file
, "\n<<get_loop_niters>>\n");
3232 niters
= number_of_iterations_in_loop (loop
);
3234 if (niters
!= NULL_TREE
3235 && niters
!= chrec_dont_know
3236 && host_integerp (niters
,0))
3238 *number_of_iterations
= TREE_INT_CST_LOW (niters
);
3240 if (vect_debug_details (NULL
))
3241 fprintf (dump_file
, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC
,
3242 *number_of_iterations
);
3245 return get_loop_exit_condition (loop
);
3249 /* Function vect_analyze_loop_form.
3251 Verify the following restrictions (some may be relaxed in the future):
3252 - it's an inner-most loop
3253 - number of BBs = 2 (which are the loop header and the latch)
3254 - the loop has a pre-header
3255 - the loop has a single entry and exit
3256 - the loop exit condition is simple enough, and the number of iterations
3257 can be analyzed (a countable loop). */
3259 static loop_vec_info
3260 vect_analyze_loop_form (struct loop
*loop
)
3262 loop_vec_info loop_vinfo
;
3264 HOST_WIDE_INT number_of_iterations
= -1;
3266 if (vect_debug_details (loop
))
3267 fprintf (dump_file
, "\n<<vect_analyze_loop_form>>\n");
3269 if (loop
->level
> 1 /* FORNOW: inner-most loop */
3270 || loop
->num_exits
> 1 || loop
->num_entries
> 1 || loop
->num_nodes
!= 2
3271 || !loop
->pre_header
|| !loop
->header
|| !loop
->latch
)
3273 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3275 fprintf (dump_file
, "not vectorized: bad loop form. ");
3276 if (loop
->level
> 1)
3277 fprintf (dump_file
, "nested loop.");
3278 else if (loop
->num_exits
> 1 || loop
->num_entries
> 1)
3279 fprintf (dump_file
, "multiple entries or exits.");
3280 else if (loop
->num_nodes
!= 2 || !loop
->header
|| !loop
->latch
)
3281 fprintf (dump_file
, "too many BBs in loop.");
3282 else if (!loop
->pre_header
)
3283 fprintf (dump_file
, "no pre-header BB for loop.");
3289 /* We assume that the loop exit condition is at the end of the loop. i.e,
3290 that the loop is represented as a do-while (with a proper if-guard
3291 before the loop if needed), where the loop header contains all the
3292 executable statements, and the latch is empty. */
3293 if (!empty_block_p (loop
->latch
))
3295 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3296 fprintf (dump_file
, "not vectorized: unexpectd loop form.");
3300 if (empty_block_p (loop
->header
))
3302 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3303 fprintf (dump_file
, "not vectorized: empty loop.");
3307 loop_cond
= vect_get_loop_niters (loop
, &number_of_iterations
);
3310 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3311 fprintf (dump_file
, "not vectorized: complicated exit condition.");
3315 if (number_of_iterations
< 0)
3317 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3318 fprintf (dump_file
, "not vectorized: unknown loop bound.");
3322 if (number_of_iterations
== 0) /* CHECKME: can this happen? */
3324 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3325 fprintf (dump_file
, "not vectorized: number of iterations = 0.");
3329 loop_vinfo
= new_loop_vec_info (loop
);
3330 LOOP_VINFO_EXIT_COND (loop_vinfo
) = loop_cond
;
3331 LOOP_VINFO_NITERS (loop_vinfo
) = number_of_iterations
;
3337 /* Function vect_analyze_loop.
3339 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3340 for it. The different analyses will record information in the
3341 loop_vec_info struct. */
3343 static loop_vec_info
3344 vect_analyze_loop (struct loop
*loop
)
3347 loop_vec_info loop_vinfo
;
3349 if (vect_debug_details (NULL
))
3350 fprintf (dump_file
, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
3352 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3354 loop_vinfo
= vect_analyze_loop_form (loop
);
3357 if (vect_debug_details (loop
))
3358 fprintf (dump_file
, "bad loop form.");
3362 /* Find all data references in the loop (which correspond to vdefs/vuses)
3363 and analyze their evolution in the loop.
3365 FORNOW: Handle only simple, one-dimensional, array references, which
3366 alignment can be forced, and aligned pointer-references. */
3368 ok
= vect_analyze_data_refs (loop_vinfo
);
3371 if (vect_debug_details (loop
))
3372 fprintf (dump_file
, "bad data references.");
3373 destroy_loop_vec_info (loop_vinfo
);
3378 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
3380 ok
= vect_mark_stmts_to_be_vectorized (loop_vinfo
);
3383 if (vect_debug_details (loop
))
3384 fprintf (dump_file
, "unexpected pattern.");
3385 if (vect_debug_details (loop
))
3386 fprintf (dump_file
, "not vectorized: unexpected pattern.");
3387 destroy_loop_vec_info (loop_vinfo
);
3392 /* Check that all cross-iteration scalar data-flow cycles are OK.
3393 Cross-iteration cycles caused by virtual phis are analyzed separately. */
3395 ok
= vect_analyze_scalar_cycles (loop_vinfo
);
3398 if (vect_debug_details (loop
))
3399 fprintf (dump_file
, "bad scalar cycle.");
3400 destroy_loop_vec_info (loop_vinfo
);
3405 /* Analyze data dependences between the data-refs in the loop.
3406 FORNOW: fail at the first data dependence that we encounter. */
3408 ok
= vect_analyze_data_ref_dependences (loop_vinfo
);
3411 if (vect_debug_details (loop
))
3412 fprintf (dump_file
, "bad data dependence.");
3413 destroy_loop_vec_info (loop_vinfo
);
3418 /* Analyze the access patterns of the data-refs in the loop (consecutive,
3419 complex, etc.). FORNOW: Only handle consecutive access pattern. */
3421 ok
= vect_analyze_data_ref_accesses (loop_vinfo
);
3424 if (vect_debug_details (loop
))
3425 fprintf (dump_file
, "bad data access.");
3426 destroy_loop_vec_info (loop_vinfo
);
3431 /* Analyze the alignment of the data-refs in the loop.
3432 FORNOW: Only aligned accesses are handled. */
3434 ok
= vect_analyze_data_refs_alignment (loop_vinfo
);
3437 if (vect_debug_details (loop
))
3438 fprintf (dump_file
, "bad data alignment.");
3439 destroy_loop_vec_info (loop_vinfo
);
3444 /* Scan all the operations in the loop and make sure they are
3447 ok
= vect_analyze_operations (loop_vinfo
);
3450 if (vect_debug_details (loop
))
3451 fprintf (dump_file
, "bad operation or unsupported loop bound.");
3452 destroy_loop_vec_info (loop_vinfo
);
3456 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
) = 1;
3462 /* Function need_imm_uses_for.
3464 Return whether we ought to include information for 'var'
3465 when calculating immediate uses. For this pass we only want use
3466 information for non-virtual variables. */
3469 need_imm_uses_for (tree var
)
3471 return is_gimple_reg (var
);
3475 /* Function vectorize_loops.
3477 Entry Point to loop vectorization phase. */
3480 vectorize_loops (struct loops
*loops
)
3482 unsigned int i
, loops_num
;
3483 unsigned int num_vectorized_loops
= 0;
3485 /* Does the target support SIMD? */
3486 /* FORNOW: until more sophisticated machine modelling is in place. */
3487 if (!UNITS_PER_SIMD_WORD
)
3489 if (vect_debug_details (NULL
))
3490 fprintf (dump_file
, "vectorizer: target vector size is not defined.");
3494 compute_immediate_uses (TDFA_USE_OPS
, need_imm_uses_for
);
3496 /* ----------- Analyze loops. ----------- */
3498 /* If some loop was duplicated, it gets bigger number
3499 than all previously defined loops. This fact allows us to run
3500 only over initial loops skipping newly generated ones. */
3501 loops_num
= loops
->num
;
3502 for (i
= 1; i
< loops_num
; i
++)
3504 loop_vec_info loop_vinfo
;
3505 struct loop
*loop
= loops
->parray
[i
];
3510 flow_loop_scan (loop
, LOOP_ALL
);
3512 loop_vinfo
= vect_analyze_loop (loop
);
3513 loop
->aux
= loop_vinfo
;
3515 if (!loop_vinfo
|| !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
))
3518 vect_transform_loop (loop_vinfo
, loops
);
3519 num_vectorized_loops
++;
3522 if (vect_debug_stats (NULL
) || vect_debug_details (NULL
))
3523 fprintf (dump_file
, "\nvectorized %u loops in function.\n",
3524 num_vectorized_loops
);
3526 /* ----------- Finalize. ----------- */
3529 for (i
= 1; i
< loops_num
; i
++)
3531 struct loop
*loop
= loops
->parray
[i
];
3532 loop_vec_info loop_vinfo
= loop
->aux
;
3535 destroy_loop_vec_info (loop_vinfo
);
3539 loop_commit_inserts ();
3540 rewrite_into_ssa (false);
3541 if (bitmap_first_set_bit (vars_to_rename
) >= 0)
3543 /* The rewrite of ssa names may cause violation of loop closed ssa
3544 form invariants. TODO -- avoid these rewrites completely.
3545 Information in virtual phi nodes is sufficient for it. */
3546 rewrite_into_loop_closed_ssa ();
3548 bitmap_clear (vars_to_rename
);