1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
47 /* Utility functions for the code transformation. */
48 static bool vect_transform_stmt (tree
, block_stmt_iterator
*);
49 static void vect_align_data_ref (tree
);
50 static tree
vect_create_destination_var (tree
, tree
);
51 static tree vect_create_data_ref_ptr
52 (tree
, block_stmt_iterator
*, tree
, tree
*, bool);
53 static tree
vect_create_index_for_vector_ref (loop_vec_info
);
54 static tree
vect_create_addr_base_for_vector_ref (tree
, tree
*, tree
);
55 static tree
vect_get_new_vect_var (tree
, enum vect_var_kind
, const char *);
56 static tree
vect_get_vec_def_for_operand (tree
, tree
);
57 static tree
vect_init_vector (tree
, tree
);
58 static void vect_finish_stmt_generation
59 (tree stmt
, tree vec_stmt
, block_stmt_iterator
*bsi
);
61 /* Utility function dealing with loop peeling (not peeling itself). */
62 static void vect_generate_tmps_on_preheader
63 (loop_vec_info
, tree
*, tree
*, tree
*);
64 static tree
vect_build_loop_niters (loop_vec_info
);
65 static void vect_update_ivs_after_vectorizer (loop_vec_info
, tree
, edge
);
66 static tree
vect_gen_niters_for_prolog_loop (loop_vec_info
, tree
);
67 static void vect_update_inits_of_dr (struct data_reference
*, tree niters
);
68 static void vect_update_inits_of_drs (loop_vec_info
, tree
);
69 static void vect_do_peeling_for_alignment (loop_vec_info
, struct loops
*);
70 static void vect_do_peeling_for_loop_bound
71 (loop_vec_info
, tree
*, struct loops
*);
74 /* Function vect_get_new_vect_var.
76 Returns a name for a new variable. The current naming scheme appends the
77 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
78 the name of vectorizer generated variables, and appends that to NAME if
82 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
88 if (var_kind
== vect_simple_var
)
93 prefix_len
= strlen (prefix
);
96 new_vect_var
= create_tmp_var (type
, concat (prefix
, name
, NULL
));
98 new_vect_var
= create_tmp_var (type
, prefix
);
104 /* Function vect_create_index_for_vector_ref.
106 Create (and return) an index variable, along with it's update chain in the
107 loop. This variable will be used to access a memory location in a vector
111 LOOP: The loop being vectorized.
112 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
113 function can be added here, or in the loop pre-header.
116 Return an index that will be used to index a vector array. It is expected
117 that a pointer to the first vector will be used as the base address for the
120 FORNOW: we are not trying to be efficient, just creating a new index each
121 time from scratch. At this time all vector references could use the same
124 TODO: create only one index to be used by all vector references. Record
125 the index in the LOOP_VINFO the first time this procedure is called and
126 return it on subsequent calls. The increment of this index must be placed
127 just before the conditional expression that ends the single block loop. */
130 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo
)
133 block_stmt_iterator incr_bsi
;
135 tree indx_before_incr
, indx_after_incr
;
136 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
139 /* It is assumed that the base pointer used for vectorized access contains
140 the address of the first vector. Therefore the index used for vectorized
141 access must be initialized to zero and incremented by 1. */
143 init
= integer_zero_node
;
144 step
= integer_one_node
;
146 standard_iv_increment_position (loop
, &incr_bsi
, &insert_after
);
147 create_iv (init
, step
, NULL_TREE
, loop
, &incr_bsi
, insert_after
,
148 &indx_before_incr
, &indx_after_incr
);
149 incr
= bsi_stmt (incr_bsi
);
150 get_stmt_operands (incr
);
151 set_stmt_info (stmt_ann (incr
), new_stmt_vec_info (incr
, loop_vinfo
));
153 return indx_before_incr
;
157 /* Function vect_create_addr_base_for_vector_ref.
159 Create an expression that computes the address of the first memory location
160 that will be accessed for a data reference.
163 STMT: The statement containing the data reference.
164 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
165 OFFSET: Optional. If supplied, it is be added to the initial address.
168 1. Return an SSA_NAME whose value is the address of the memory location of
169 the first vector of the data reference.
170 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
171 these statement(s) which define the returned SSA_NAME.
173 FORNOW: We are only handling array accesses with step 1. */
176 vect_create_addr_base_for_vector_ref (tree stmt
,
180 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
181 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
183 unshare_expr (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info
));
184 tree base_name
= build_fold_indirect_ref (data_ref_base
);
185 tree ref
= DR_REF (dr
);
186 tree scalar_type
= TREE_TYPE (ref
);
187 tree scalar_ptr_type
= build_pointer_type (scalar_type
);
190 tree addr_base
, addr_expr
;
192 tree base_offset
= unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info
));
194 /* Create base_offset */
195 dest
= create_tmp_var (TREE_TYPE (base_offset
), "base_off");
196 add_referenced_tmp_var (dest
);
197 base_offset
= force_gimple_operand (base_offset
, &new_stmt
, false, dest
);
198 append_to_statement_list_force (new_stmt
, new_stmt_list
);
202 tree tmp
= create_tmp_var (TREE_TYPE (base_offset
), "offset");
203 add_referenced_tmp_var (tmp
);
204 offset
= fold (build2 (MULT_EXPR
, TREE_TYPE (offset
), offset
,
205 STMT_VINFO_VECT_STEP (stmt_info
)));
206 base_offset
= fold (build2 (PLUS_EXPR
, TREE_TYPE (base_offset
),
207 base_offset
, offset
));
208 base_offset
= force_gimple_operand (base_offset
, &new_stmt
, false, tmp
);
209 append_to_statement_list_force (new_stmt
, new_stmt_list
);
212 /* base + base_offset */
213 addr_base
= fold (build2 (PLUS_EXPR
, TREE_TYPE (data_ref_base
), data_ref_base
,
216 /* addr_expr = addr_base */
217 addr_expr
= vect_get_new_vect_var (scalar_ptr_type
, vect_pointer_var
,
218 get_name (base_name
));
219 add_referenced_tmp_var (addr_expr
);
220 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, addr_expr
, addr_base
);
221 new_temp
= make_ssa_name (addr_expr
, vec_stmt
);
222 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
223 append_to_statement_list_force (vec_stmt
, new_stmt_list
);
225 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
227 fprintf (vect_dump
, "created ");
228 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
234 /* Function vect_align_data_ref.
236 Handle mislignment of a memory accesses.
238 FORNOW: Can't handle misaligned accesses.
239 Make sure that the dataref is aligned. */
242 vect_align_data_ref (tree stmt
)
244 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
245 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
247 /* FORNOW: can't handle misaligned accesses;
248 all accesses expected to be aligned. */
249 gcc_assert (aligned_access_p (dr
));
253 /* Function vect_create_data_ref_ptr.
255 Create a memory reference expression for vector access, to be used in a
256 vector load/store stmt. The reference is based on a new pointer to vector
260 1. STMT: a stmt that references memory. Expected to be of the form
261 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
262 2. BSI: block_stmt_iterator where new stmts can be added.
263 3. OFFSET (optional): an offset to be added to the initial address accessed
264 by the data-ref in STMT.
265 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
266 pointing to the initial address.
269 1. Declare a new ptr to vector_type, and have it point to the base of the
270 data reference (initial addressed accessed by the data reference).
271 For example, for vector of type V8HI, the following code is generated:
274 vp = (v8hi *)initial_address;
276 if OFFSET is not supplied:
277 initial_address = &a[init];
278 if OFFSET is supplied:
279 initial_address = &a[init + OFFSET];
281 Return the initial_address in INITIAL_ADDRESS.
283 2. Create a data-reference in the loop based on the new vector pointer vp,
284 and using a new index variable 'idx' as follows:
288 where if ONLY_INIT is true:
291 update = idx + vector_type_size
293 Return the pointer vp'.
296 FORNOW: handle only aligned and consecutive accesses. */
299 vect_create_data_ref_ptr (tree stmt
, block_stmt_iterator
*bsi
, tree offset
,
300 tree
*initial_address
, bool only_init
)
303 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
304 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
305 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
306 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
310 v_may_def_optype v_may_defs
= STMT_V_MAY_DEF_OPS (stmt
);
311 v_must_def_optype v_must_defs
= STMT_V_MUST_DEF_OPS (stmt
);
312 vuse_optype vuses
= STMT_VUSE_OPS (stmt
);
313 int nvuses
, nv_may_defs
, nv_must_defs
;
317 tree new_stmt_list
= NULL_TREE
;
319 edge pe
= loop_preheader_edge (loop
);
325 tree type
, tmp
, size
;
327 base_name
= build_fold_indirect_ref (unshare_expr (
328 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info
)));
330 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
332 tree data_ref_base
= base_name
;
333 fprintf (vect_dump
, "create array_ref of type: ");
334 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
335 if (TREE_CODE (data_ref_base
) == VAR_DECL
)
336 fprintf (vect_dump
, " vectorizing a one dimensional array ref: ");
337 else if (TREE_CODE (data_ref_base
) == ARRAY_REF
)
338 fprintf (vect_dump
, " vectorizing a multidimensional array ref: ");
339 else if (TREE_CODE (data_ref_base
) == COMPONENT_REF
)
340 fprintf (vect_dump
, " vectorizing a record based array ref: ");
341 else if (TREE_CODE (data_ref_base
) == SSA_NAME
)
342 fprintf (vect_dump
, " vectorizing a pointer ref: ");
343 print_generic_expr (vect_dump
, base_name
, TDF_SLIM
);
346 /** (1) Create the new vector-pointer variable: **/
348 vect_ptr_type
= build_pointer_type (vectype
);
349 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
350 get_name (base_name
));
351 add_referenced_tmp_var (vect_ptr
);
354 /** (2) Handle aliasing information of the new vector-pointer: **/
356 tag
= STMT_VINFO_MEMTAG (stmt_info
);
358 get_var_ann (vect_ptr
)->type_mem_tag
= tag
;
360 /* Mark for renaming all aliased variables
361 (i.e, the may-aliases of the type-mem-tag). */
362 nvuses
= NUM_VUSES (vuses
);
363 nv_may_defs
= NUM_V_MAY_DEFS (v_may_defs
);
364 nv_must_defs
= NUM_V_MUST_DEFS (v_must_defs
);
365 for (i
= 0; i
< nvuses
; i
++)
367 tree use
= VUSE_OP (vuses
, i
);
368 if (TREE_CODE (use
) == SSA_NAME
)
369 bitmap_set_bit (vars_to_rename
, var_ann (SSA_NAME_VAR (use
))->uid
);
371 for (i
= 0; i
< nv_may_defs
; i
++)
373 tree def
= V_MAY_DEF_RESULT (v_may_defs
, i
);
374 if (TREE_CODE (def
) == SSA_NAME
)
375 bitmap_set_bit (vars_to_rename
, var_ann (SSA_NAME_VAR (def
))->uid
);
377 for (i
= 0; i
< nv_must_defs
; i
++)
379 tree def
= V_MUST_DEF_RESULT (v_must_defs
, i
);
380 if (TREE_CODE (def
) == SSA_NAME
)
381 bitmap_set_bit (vars_to_rename
, var_ann (SSA_NAME_VAR (def
))->uid
);
385 /** (3) Calculate the initial address the vector-pointer, and set
386 the vector-pointer to point to it before the loop: **/
388 /* Create: (&(base[init_val+offset]) in the loop preheader. */
389 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
391 pe
= loop_preheader_edge (loop
);
392 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt_list
);
393 gcc_assert (!new_bb
);
394 *initial_address
= new_temp
;
396 /* Create: p = (vectype *) initial_base */
397 vec_stmt
= fold_convert (vect_ptr_type
, new_temp
);
398 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, vect_ptr
, vec_stmt
);
399 new_temp
= make_ssa_name (vect_ptr
, vec_stmt
);
400 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
401 new_bb
= bsi_insert_on_edge_immediate (pe
, vec_stmt
);
402 gcc_assert (!new_bb
);
403 vect_ptr_init
= TREE_OPERAND (vec_stmt
, 0);
406 /** (4) Handle the updating of the vector-pointer inside the loop: **/
408 if (only_init
) /* No update in loop is required. */
409 return vect_ptr_init
;
411 idx
= vect_create_index_for_vector_ref (loop_vinfo
);
413 /* Create: update = idx * vectype_size */
414 tmp
= create_tmp_var (integer_type_node
, "update");
415 add_referenced_tmp_var (tmp
);
416 size
= TYPE_SIZE (vect_ptr_type
);
417 type
= lang_hooks
.types
.type_for_size (tree_low_cst (size
, 1), 1);
418 ptr_update
= create_tmp_var (type
, "update");
419 add_referenced_tmp_var (ptr_update
);
420 vectype_size
= TYPE_SIZE_UNIT (vectype
);
421 vec_stmt
= build2 (MULT_EXPR
, integer_type_node
, idx
, vectype_size
);
422 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, tmp
, vec_stmt
);
423 new_temp
= make_ssa_name (tmp
, vec_stmt
);
424 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
425 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
426 vec_stmt
= fold_convert (type
, new_temp
);
427 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, ptr_update
, vec_stmt
);
428 new_temp
= make_ssa_name (ptr_update
, vec_stmt
);
429 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
430 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
432 /* Create: data_ref_ptr = vect_ptr_init + update */
433 vec_stmt
= build2 (PLUS_EXPR
, vect_ptr_type
, vect_ptr_init
, new_temp
);
434 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, vect_ptr
, vec_stmt
);
435 new_temp
= make_ssa_name (vect_ptr
, vec_stmt
);
436 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
437 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
438 data_ref_ptr
= TREE_OPERAND (vec_stmt
, 0);
444 /* Function vect_create_destination_var.
446 Create a new temporary of type VECTYPE. */
449 vect_create_destination_var (tree scalar_dest
, tree vectype
)
452 const char *new_name
;
454 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
456 new_name
= get_name (scalar_dest
);
459 vec_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, new_name
);
460 add_referenced_tmp_var (vec_dest
);
466 /* Function vect_init_vector.
468 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
469 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
470 used in the vectorization of STMT. */
473 vect_init_vector (tree stmt
, tree vector_var
)
475 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
476 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
477 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
480 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
486 new_var
= vect_get_new_vect_var (vectype
, vect_simple_var
, "cst_");
487 add_referenced_tmp_var (new_var
);
489 init_stmt
= build2 (MODIFY_EXPR
, vectype
, new_var
, vector_var
);
490 new_temp
= make_ssa_name (new_var
, init_stmt
);
491 TREE_OPERAND (init_stmt
, 0) = new_temp
;
493 pe
= loop_preheader_edge (loop
);
494 new_bb
= bsi_insert_on_edge_immediate (pe
, init_stmt
);
495 gcc_assert (!new_bb
);
497 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
499 fprintf (vect_dump
, "created new init_stmt: ");
500 print_generic_expr (vect_dump
, init_stmt
, TDF_SLIM
);
503 vec_oprnd
= TREE_OPERAND (init_stmt
, 0);
508 /* Function vect_get_vec_def_for_operand.
510 OP is an operand in STMT. This function returns a (vector) def that will be
511 used in the vectorized stmt for STMT.
513 In the case that OP is an SSA_NAME which is defined in the loop, then
514 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
516 In case OP is an invariant or constant, a new stmt that creates a vector def
517 needs to be introduced. */
520 vect_get_vec_def_for_operand (tree op
, tree stmt
)
525 stmt_vec_info def_stmt_info
= NULL
;
526 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
527 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
528 int nunits
= GET_MODE_NUNITS (TYPE_MODE (vectype
));
529 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
530 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
537 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
539 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
540 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
543 /** ===> Case 1: operand is a constant. **/
545 if (TREE_CODE (op
) == INTEGER_CST
|| TREE_CODE (op
) == REAL_CST
)
547 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
551 /* Build a tree with vector elements. */
552 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
553 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
555 for (i
= nunits
- 1; i
>= 0; --i
)
557 t
= tree_cons (NULL_TREE
, op
, t
);
559 vec_cst
= build_vector (vectype
, t
);
560 return vect_init_vector (stmt
, vec_cst
);
563 gcc_assert (TREE_CODE (op
) == SSA_NAME
);
565 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
567 def_stmt
= SSA_NAME_DEF_STMT (op
);
568 def_stmt_info
= vinfo_for_stmt (def_stmt
);
570 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
572 fprintf (vect_dump
, "vect_get_vec_def_for_operand: def_stmt: ");
573 print_generic_expr (vect_dump
, def_stmt
, TDF_SLIM
);
577 /** ==> Case 2.1: operand is defined inside the loop. **/
581 /* Get the def from the vectorized stmt. */
583 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
584 gcc_assert (vec_stmt
);
585 vec_oprnd
= TREE_OPERAND (vec_stmt
, 0);
590 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
591 it is a reduction/induction. **/
593 bb
= bb_for_stmt (def_stmt
);
594 if (TREE_CODE (def_stmt
) == PHI_NODE
&& flow_bb_inside_loop_p (loop
, bb
))
596 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
597 fprintf (vect_dump
, "reduction/induction - unsupported.");
598 internal_error ("no support for reduction/induction"); /* FORNOW */
602 /** ==> Case 2.3: operand is defined outside the loop -
603 it is a loop invariant. */
605 switch (TREE_CODE (def_stmt
))
608 def
= PHI_RESULT (def_stmt
);
611 def
= TREE_OPERAND (def_stmt
, 0);
614 def
= TREE_OPERAND (def_stmt
, 0);
615 gcc_assert (IS_EMPTY_STMT (def_stmt
));
619 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
621 fprintf (vect_dump
, "unsupported defining stmt: ");
622 print_generic_expr (vect_dump
, def_stmt
, TDF_SLIM
);
624 internal_error ("unsupported defining stmt");
627 /* Build a tree with vector elements.
628 Create 'vec_inv = {inv,inv,..,inv}' */
630 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
631 fprintf (vect_dump
, "Create vector_inv.");
633 for (i
= nunits
- 1; i
>= 0; --i
)
635 t
= tree_cons (NULL_TREE
, def
, t
);
638 vec_inv
= build_constructor (vectype
, t
);
639 return vect_init_vector (stmt
, vec_inv
);
643 /* Function vect_finish_stmt_generation.
645 Insert a new stmt. */
648 vect_finish_stmt_generation (tree stmt
, tree vec_stmt
, block_stmt_iterator
*bsi
)
650 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
652 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
654 fprintf (vect_dump
, "add new stmt: ");
655 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
658 #ifdef ENABLE_CHECKING
659 /* Make sure bsi points to the stmt that is being vectorized. */
660 gcc_assert (stmt
== bsi_stmt (*bsi
));
663 #ifdef USE_MAPPED_LOCATION
664 SET_EXPR_LOCATION (vec_stmt
, EXPR_LOCUS (stmt
));
666 SET_EXPR_LOCUS (vec_stmt
, EXPR_LOCUS (stmt
));
671 /* Function vectorizable_assignment.
673 Check if STMT performs an assignment (copy) that can be vectorized.
674 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
675 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
676 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
679 vectorizable_assignment (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
685 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
686 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
687 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
690 /* Is vectorizable assignment? */
692 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
695 scalar_dest
= TREE_OPERAND (stmt
, 0);
696 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
699 op
= TREE_OPERAND (stmt
, 1);
700 if (!vect_is_simple_use (op
, loop_vinfo
, NULL
))
702 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
703 fprintf (vect_dump
, "use not simple.");
707 if (!vec_stmt
) /* transformation not required. */
709 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
714 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
715 fprintf (vect_dump
, "transform assignment.");
718 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
721 op
= TREE_OPERAND (stmt
, 1);
722 vec_oprnd
= vect_get_vec_def_for_operand (op
, stmt
);
724 /* Arguments are ready. create the new vector stmt. */
725 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, vec_oprnd
);
726 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
727 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
728 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
734 /* Function vectorizable_operation.
736 Check if STMT performs a binary or unary operation that can be vectorized.
737 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
738 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
739 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
742 vectorizable_operation (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
747 tree op0
, op1
= NULL
;
748 tree vec_oprnd0
, vec_oprnd1
=NULL
;
749 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
750 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
751 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
754 enum machine_mode vec_mode
;
760 /* Is STMT a vectorizable binary/unary operation? */
761 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
764 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) != SSA_NAME
)
767 operation
= TREE_OPERAND (stmt
, 1);
768 code
= TREE_CODE (operation
);
769 optab
= optab_for_tree_code (code
, vectype
);
771 /* Support only unary or binary operations. */
772 op_type
= TREE_CODE_LENGTH (code
);
773 if (op_type
!= unary_op
&& op_type
!= binary_op
)
775 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
776 fprintf (vect_dump
, "num. args = %d (not unary/binary op).", op_type
);
780 for (i
= 0; i
< op_type
; i
++)
782 op
= TREE_OPERAND (operation
, i
);
783 if (!vect_is_simple_use (op
, loop_vinfo
, NULL
))
785 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
786 fprintf (vect_dump
, "use not simple.");
791 /* Supportable by target? */
794 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
795 fprintf (vect_dump
, "no optab.");
798 vec_mode
= TYPE_MODE (vectype
);
799 if (optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
801 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
802 fprintf (vect_dump
, "op not supported by target.");
806 if (!vec_stmt
) /* transformation not required. */
808 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
814 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
815 fprintf (vect_dump
, "transform binary/unary operation.");
818 scalar_dest
= TREE_OPERAND (stmt
, 0);
819 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
822 op0
= TREE_OPERAND (operation
, 0);
823 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
825 if (op_type
== binary_op
)
827 op1
= TREE_OPERAND (operation
, 1);
828 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
831 /* Arguments are ready. create the new vector stmt. */
833 if (op_type
== binary_op
)
834 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
835 build2 (code
, vectype
, vec_oprnd0
, vec_oprnd1
));
837 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
838 build1 (code
, vectype
, vec_oprnd0
));
839 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
840 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
841 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
847 /* Function vectorizable_store.
849 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
851 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
852 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
853 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
856 vectorizable_store (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
862 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
863 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
864 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
865 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
866 enum machine_mode vec_mode
;
868 enum dr_alignment_support alignment_support_cheme
;
870 /* Is vectorizable store? */
872 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
875 scalar_dest
= TREE_OPERAND (stmt
, 0);
876 if (TREE_CODE (scalar_dest
) != ARRAY_REF
877 && TREE_CODE (scalar_dest
) != INDIRECT_REF
)
880 op
= TREE_OPERAND (stmt
, 1);
881 if (!vect_is_simple_use (op
, loop_vinfo
, NULL
))
883 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
884 fprintf (vect_dump
, "use not simple.");
888 vec_mode
= TYPE_MODE (vectype
);
889 /* FORNOW. In some cases can vectorize even if data-type not supported
890 (e.g. - array initialization with 0). */
891 if (mov_optab
->handlers
[(int)vec_mode
].insn_code
== CODE_FOR_nothing
)
894 if (!STMT_VINFO_DATA_REF (stmt_info
))
898 if (!vec_stmt
) /* transformation not required. */
900 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
906 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
907 fprintf (vect_dump
, "transform store");
909 alignment_support_cheme
= vect_supportable_dr_alignment (dr
);
910 gcc_assert (alignment_support_cheme
);
911 gcc_assert (alignment_support_cheme
= dr_aligned
); /* FORNOW */
913 /* Handle use - get the vectorized def from the defining stmt. */
914 vec_oprnd1
= vect_get_vec_def_for_operand (op
, stmt
);
917 /* FORNOW: make sure the data reference is aligned. */
918 vect_align_data_ref (stmt
);
919 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
, &dummy
, false);
920 data_ref
= build_fold_indirect_ref (data_ref
);
922 /* Arguments are ready. create the new vector stmt. */
923 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, data_ref
, vec_oprnd1
);
924 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
930 /* vectorizable_load.
932 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
934 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
935 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
936 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
939 vectorizable_load (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
942 tree vec_dest
= NULL
;
943 tree data_ref
= NULL
;
945 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
946 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
947 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
954 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
955 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
956 edge pe
= loop_preheader_edge (loop
);
957 enum dr_alignment_support alignment_support_cheme
;
959 /* Is vectorizable load? */
961 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
964 scalar_dest
= TREE_OPERAND (stmt
, 0);
965 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
968 op
= TREE_OPERAND (stmt
, 1);
969 if (TREE_CODE (op
) != ARRAY_REF
&& TREE_CODE (op
) != INDIRECT_REF
)
972 if (!STMT_VINFO_DATA_REF (stmt_info
))
975 mode
= (int) TYPE_MODE (vectype
);
977 /* FORNOW. In some cases can vectorize even if data-type not supported
978 (e.g. - data copies). */
979 if (mov_optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
981 if (vect_print_dump_info (REPORT_DETAILS
, LOOP_LOC (loop_vinfo
)))
982 fprintf (vect_dump
, "Aligned load, but unsupported type.");
986 if (!vec_stmt
) /* transformation not required. */
988 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
994 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
995 fprintf (vect_dump
, "transform load.");
997 alignment_support_cheme
= vect_supportable_dr_alignment (dr
);
998 gcc_assert (alignment_support_cheme
);
1000 if (alignment_support_cheme
== dr_aligned
1001 || alignment_support_cheme
== dr_unaligned_supported
)
1012 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1013 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
, &dummy
, false);
1014 if (aligned_access_p (dr
))
1015 data_ref
= build_fold_indirect_ref (data_ref
);
1018 int mis
= DR_MISALIGNMENT (dr
);
1019 tree tmis
= (mis
== -1 ? size_zero_node
: size_int (mis
));
1020 tmis
= size_binop (MULT_EXPR
, tmis
, size_int(BITS_PER_UNIT
));
1021 data_ref
= build2 (MISALIGNED_INDIRECT_REF
, vectype
, data_ref
, tmis
);
1023 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
1024 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1025 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1026 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1028 else if (alignment_support_cheme
== dr_unaligned_software_pipeline
)
1032 msq_init = *(floor(p1))
1033 p2 = initial_addr + VS - 1;
1034 magic = have_builtin ? builtin_result : initial_address;
1037 p2' = p2 + indx * vectype_size
1039 vec_dest = realign_load (msq, lsq, magic)
1053 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1054 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1055 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
,
1057 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, data_ref
);
1058 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
1059 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1060 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1061 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
1062 gcc_assert (!new_bb
);
1063 msq_init
= TREE_OPERAND (new_stmt
, 0);
1066 /* <2> Create lsq = *(floor(p2')) in the loop */
1067 offset
= build_int_cst (integer_type_node
,
1068 GET_MODE_NUNITS (TYPE_MODE (vectype
)));
1069 offset
= int_const_binop (MINUS_EXPR
, offset
, integer_one_node
, 1);
1070 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1071 dataref_ptr
= vect_create_data_ref_ptr (stmt
, bsi
, offset
, &dummy
, false);
1072 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, dataref_ptr
);
1073 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
1074 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1075 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1076 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1077 lsq
= TREE_OPERAND (new_stmt
, 0);
1081 if (targetm
.vectorize
.builtin_mask_for_load
)
1083 /* Create permutation mask, if required, in loop preheader. */
1085 params
= build_tree_list (NULL_TREE
, init_addr
);
1086 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1087 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
1088 new_stmt
= build_function_call_expr (builtin_decl
, params
);
1089 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, new_stmt
);
1090 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1091 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1092 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
1093 gcc_assert (!new_bb
);
1094 magic
= TREE_OPERAND (new_stmt
, 0);
1096 /* Since we have just created a CALL_EXPR, we may need to
1097 rename call-clobbered variables. */
1098 mark_call_clobbered_vars_to_rename ();
1102 /* Use current address instead of init_addr for reduced reg pressure.
1104 magic
= dataref_ptr
;
1108 /* <4> Create msq = phi <msq_init, lsq> in loop */
1109 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1110 msq
= make_ssa_name (vec_dest
, NULL_TREE
);
1111 phi_stmt
= create_phi_node (msq
, loop
->header
); /* CHECKME */
1112 SSA_NAME_DEF_STMT (msq
) = phi_stmt
;
1113 add_phi_arg (phi_stmt
, msq_init
, loop_preheader_edge (loop
));
1114 add_phi_arg (phi_stmt
, lsq
, loop_latch_edge (loop
));
1117 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1118 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1119 new_stmt
= build3 (REALIGN_LOAD_EXPR
, vectype
, msq
, lsq
, magic
);
1120 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, new_stmt
);
1121 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1122 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1123 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1128 *vec_stmt
= new_stmt
;
1133 /* Function vect_transform_stmt.
1135 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1138 vect_transform_stmt (tree stmt
, block_stmt_iterator
*bsi
)
1140 bool is_store
= false;
1141 tree vec_stmt
= NULL_TREE
;
1142 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1145 switch (STMT_VINFO_TYPE (stmt_info
))
1147 case op_vec_info_type
:
1148 done
= vectorizable_operation (stmt
, bsi
, &vec_stmt
);
1152 case assignment_vec_info_type
:
1153 done
= vectorizable_assignment (stmt
, bsi
, &vec_stmt
);
1157 case load_vec_info_type
:
1158 done
= vectorizable_load (stmt
, bsi
, &vec_stmt
);
1162 case store_vec_info_type
:
1163 done
= vectorizable_store (stmt
, bsi
, &vec_stmt
);
1168 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1169 fprintf (vect_dump
, "stmt not supported.");
1173 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
1179 /* This function builds ni_name = number of iterations loop executes
1180 on the loop preheader. */
1183 vect_build_loop_niters (loop_vec_info loop_vinfo
)
1185 tree ni_name
, stmt
, var
;
1187 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1188 tree ni
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
1190 var
= create_tmp_var (TREE_TYPE (ni
), "niters");
1191 add_referenced_tmp_var (var
);
1192 ni_name
= force_gimple_operand (ni
, &stmt
, false, var
);
1194 pe
= loop_preheader_edge (loop
);
1197 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
1198 gcc_assert (!new_bb
);
1205 /* This function generates the following statements:
1207 ni_name = number of iterations loop executes
1208 ratio = ni_name / vf
1209 ratio_mult_vf_name = ratio * vf
1211 and places them at the loop preheader edge. */
1214 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo
,
1216 tree
*ratio_mult_vf_name_ptr
,
1217 tree
*ratio_name_ptr
)
1225 tree ratio_mult_vf_name
;
1226 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1227 tree ni
= LOOP_VINFO_NITERS (loop_vinfo
);
1228 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1229 tree log_vf
= build_int_cst (unsigned_type_node
, exact_log2 (vf
));
1231 pe
= loop_preheader_edge (loop
);
1233 /* Generate temporary variable that contains
1234 number of iterations loop executes. */
1236 ni_name
= vect_build_loop_niters (loop_vinfo
);
1238 /* Create: ratio = ni >> log2(vf) */
1240 var
= create_tmp_var (TREE_TYPE (ni
), "bnd");
1241 add_referenced_tmp_var (var
);
1242 ratio_name
= make_ssa_name (var
, NULL_TREE
);
1243 stmt
= build2 (MODIFY_EXPR
, void_type_node
, ratio_name
,
1244 build2 (RSHIFT_EXPR
, TREE_TYPE (ni_name
), ni_name
, log_vf
));
1245 SSA_NAME_DEF_STMT (ratio_name
) = stmt
;
1247 pe
= loop_preheader_edge (loop
);
1248 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
1249 gcc_assert (!new_bb
);
1251 /* Create: ratio_mult_vf = ratio << log2 (vf). */
1253 var
= create_tmp_var (TREE_TYPE (ni
), "ratio_mult_vf");
1254 add_referenced_tmp_var (var
);
1255 ratio_mult_vf_name
= make_ssa_name (var
, NULL_TREE
);
1256 stmt
= build2 (MODIFY_EXPR
, void_type_node
, ratio_mult_vf_name
,
1257 build2 (LSHIFT_EXPR
, TREE_TYPE (ratio_name
), ratio_name
, log_vf
));
1258 SSA_NAME_DEF_STMT (ratio_mult_vf_name
) = stmt
;
1260 pe
= loop_preheader_edge (loop
);
1261 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
1262 gcc_assert (!new_bb
);
1264 *ni_name_ptr
= ni_name
;
1265 *ratio_mult_vf_name_ptr
= ratio_mult_vf_name
;
1266 *ratio_name_ptr
= ratio_name
;
1272 /* Function vect_update_ivs_after_vectorizer.
1274 "Advance" the induction variables of LOOP to the value they should take
1275 after the execution of LOOP. This is currently necessary because the
1276 vectorizer does not handle induction variables that are used after the
1277 loop. Such a situation occurs when the last iterations of LOOP are
1279 1. We introduced new uses after LOOP for IVs that were not originally used
1280 after LOOP: the IVs of LOOP are now used by an epilog loop.
1281 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1282 times, whereas the loop IVs should be bumped N times.
1285 - LOOP - a loop that is going to be vectorized. The last few iterations
1286 of LOOP were peeled.
1287 - NITERS - the number of iterations that LOOP executes (before it is
1288 vectorized). i.e, the number of times the ivs should be bumped.
1289 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1290 coming out from LOOP on which there are uses of the LOOP ivs
1291 (this is the path from LOOP->exit to epilog_loop->preheader).
1293 The new definitions of the ivs are placed in LOOP->exit.
1294 The phi args associated with the edge UPDATE_E in the bb
1295 UPDATE_E->dest are updated accordingly.
1297 Assumption 1: Like the rest of the vectorizer, this function assumes
1298 a single loop exit that has a single predecessor.
1300 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1301 organized in the same order.
1303 Assumption 3: The access function of the ivs is simple enough (see
1304 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1306 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1307 coming out of LOOP on which the ivs of LOOP are used (this is the path
1308 that leads to the epilog loop; other paths skip the epilog loop). This
1309 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1310 needs to have its phis updated.
1314 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo
, tree niters
,
1317 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1318 basic_block exit_bb
= loop
->exit_edges
[0]->dest
;
1320 basic_block update_bb
= update_e
->dest
;
1322 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1324 /* Make sure there exists a single-predecessor exit bb: */
1325 gcc_assert (EDGE_COUNT (exit_bb
->preds
) == 1);
1327 for (phi
= phi_nodes (loop
->header
), phi1
= phi_nodes (update_bb
);
1329 phi
= PHI_CHAIN (phi
), phi1
= PHI_CHAIN (phi1
))
1331 tree access_fn
= NULL
;
1332 tree evolution_part
;
1335 tree var
, stmt
, ni
, ni_name
;
1336 block_stmt_iterator last_bsi
;
1338 /* Skip virtual phi's. */
1339 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
1341 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1342 fprintf (vect_dump
, "virtual phi. skip.");
1346 access_fn
= analyze_scalar_evolution (loop
, PHI_RESULT (phi
));
1347 gcc_assert (access_fn
);
1349 unshare_expr (evolution_part_in_loop_num (access_fn
, loop
->num
));
1350 gcc_assert (evolution_part
!= NULL_TREE
);
1352 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1353 of degree >= 2 or exponential. */
1354 gcc_assert (!tree_is_chrec (evolution_part
));
1356 step_expr
= evolution_part
;
1357 init_expr
= unshare_expr (initial_condition_in_loop_num (access_fn
,
1360 ni
= build2 (PLUS_EXPR
, TREE_TYPE (init_expr
),
1361 build2 (MULT_EXPR
, TREE_TYPE (niters
),
1362 niters
, step_expr
), init_expr
);
1364 var
= create_tmp_var (TREE_TYPE (init_expr
), "tmp");
1365 add_referenced_tmp_var (var
);
1367 ni_name
= force_gimple_operand (ni
, &stmt
, false, var
);
1369 /* Insert stmt into exit_bb. */
1370 last_bsi
= bsi_last (exit_bb
);
1372 bsi_insert_before (&last_bsi
, stmt
, BSI_SAME_STMT
);
1374 /* Fix phi expressions in the successor bb. */
1375 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1
, update_e
) ==
1376 PHI_ARG_DEF_FROM_EDGE (phi
, EDGE_SUCC (loop
->latch
, 0)));
1377 SET_PHI_ARG_DEF (phi1
, update_e
->dest_idx
, ni_name
);
1382 /* Function vect_do_peeling_for_loop_bound
1384 Peel the last iterations of the loop represented by LOOP_VINFO.
1385 The peeled iterations form a new epilog loop. Given that the loop now
1386 iterates NITERS times, the new epilog loop iterates
1387 NITERS % VECTORIZATION_FACTOR times.
1389 The original loop will later be made to iterate
1390 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
1393 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo
, tree
*ratio
,
1394 struct loops
*loops
)
1397 tree ni_name
, ratio_mult_vf_name
;
1398 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1399 struct loop
*new_loop
;
1401 #ifdef ENABLE_CHECKING
1405 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1406 fprintf (vect_dump
, "=== vect_transtorm_for_unknown_loop_bound ===");
1408 /* Generate the following variables on the preheader of original loop:
1410 ni_name = number of iteration the original loop executes
1411 ratio = ni_name / vf
1412 ratio_mult_vf_name = ratio * vf */
1413 vect_generate_tmps_on_preheader (loop_vinfo
, &ni_name
,
1414 &ratio_mult_vf_name
, ratio
);
1416 /* Update loop info. */
1417 loop
->pre_header
= loop_preheader_edge (loop
)->src
;
1418 loop
->pre_header_edges
[0] = loop_preheader_edge (loop
);
1420 #ifdef ENABLE_CHECKING
1421 loop_num
= loop
->num
;
1423 new_loop
= slpeel_tree_peel_loop_to_edge (loop
, loops
, loop
->exit_edges
[0],
1424 ratio_mult_vf_name
, ni_name
, false);
1425 #ifdef ENABLE_CHECKING
1426 gcc_assert (new_loop
);
1427 gcc_assert (loop_num
== loop
->num
);
1428 slpeel_verify_cfg_after_peeling (loop
, new_loop
);
1431 /* A guard that controls whether the new_loop is to be executed or skipped
1432 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1433 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1434 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1435 is on the path where the LOOP IVs are used and need to be updated. */
1437 if (EDGE_PRED (new_loop
->pre_header
, 0)->src
== loop
->exit_edges
[0]->dest
)
1438 update_e
= EDGE_PRED (new_loop
->pre_header
, 0);
1440 update_e
= EDGE_PRED (new_loop
->pre_header
, 1);
1442 /* Update IVs of original loop as if they were advanced
1443 by ratio_mult_vf_name steps. */
1444 vect_update_ivs_after_vectorizer (loop_vinfo
, ratio_mult_vf_name
, update_e
);
1446 /* After peeling we have to reset scalar evolution analyzer. */
1453 /* Function vect_gen_niters_for_prolog_loop
1455 Set the number of iterations for the loop represented by LOOP_VINFO
1456 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1457 and the misalignment of DR - the first data reference recorded in
1458 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
1459 this loop, the data reference DR will refer to an aligned location.
1461 The following computation is generated:
1463 compute address misalignment in bytes:
1464 addr_mis = addr & (vectype_size - 1)
1466 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
1468 (elem_size = element type size; an element is the scalar element
1469 whose type is the inner type of the vectype) */
1472 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo
, tree loop_niters
)
1474 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
1475 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1476 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1478 tree iters
, iters_name
;
1481 tree dr_stmt
= DR_STMT (dr
);
1482 stmt_vec_info stmt_info
= vinfo_for_stmt (dr_stmt
);
1483 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1484 int vectype_align
= TYPE_ALIGN (vectype
) / BITS_PER_UNIT
;
1487 tree new_stmts
= NULL_TREE
;
1489 vect_create_addr_base_for_vector_ref (dr_stmt
, &new_stmts
, NULL_TREE
);
1490 tree ptr_type
= TREE_TYPE (start_addr
);
1491 tree size
= TYPE_SIZE (ptr_type
);
1492 tree type
= lang_hooks
.types
.type_for_size (tree_low_cst (size
, 1), 1);
1493 tree vectype_size_minus_1
= build_int_cst (type
, vectype_align
- 1);
1494 tree vf_minus_1
= build_int_cst (unsigned_type_node
, vf
- 1);
1495 tree niters_type
= TREE_TYPE (loop_niters
);
1496 tree elem_size_log
=
1497 build_int_cst (unsigned_type_node
, exact_log2 (vectype_align
/vf
));
1498 tree vf_tree
= build_int_cst (unsigned_type_node
, vf
);
1500 pe
= loop_preheader_edge (loop
);
1501 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmts
);
1502 gcc_assert (!new_bb
);
1504 /* Create: byte_misalign = addr & (vectype_size - 1) */
1505 byte_misalign
= build2 (BIT_AND_EXPR
, type
, start_addr
, vectype_size_minus_1
);
1507 /* Create: elem_misalign = byte_misalign / element_size */
1509 build2 (RSHIFT_EXPR
, unsigned_type_node
, byte_misalign
, elem_size_log
);
1511 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
1512 iters
= build2 (MINUS_EXPR
, unsigned_type_node
, vf_tree
, elem_misalign
);
1513 iters
= build2 (BIT_AND_EXPR
, unsigned_type_node
, iters
, vf_minus_1
);
1514 iters
= fold_convert (niters_type
, iters
);
1516 /* Create: prolog_loop_niters = min (iters, loop_niters) */
1517 /* If the loop bound is known at compile time we already verified that it is
1518 greater than vf; since the misalignment ('iters') is at most vf, there's
1519 no need to generate the MIN_EXPR in this case. */
1520 if (TREE_CODE (loop_niters
) != INTEGER_CST
)
1521 iters
= build2 (MIN_EXPR
, niters_type
, iters
, loop_niters
);
1523 var
= create_tmp_var (niters_type
, "prolog_loop_niters");
1524 add_referenced_tmp_var (var
);
1525 iters_name
= force_gimple_operand (iters
, &stmt
, false, var
);
1527 /* Insert stmt on loop preheader edge. */
1528 pe
= loop_preheader_edge (loop
);
1531 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
1532 gcc_assert (!new_bb
);
1539 /* Function vect_update_inits_of_dr
1541 NITERS iterations were peeled from LOOP. DR represents a data reference
1542 in LOOP. This function updates the information recorded in DR to
1543 account for the fact that the first NITERS iterations had already been
1544 executed. Specifically, it updates the OFFSET field of stmt_info. */
1547 vect_update_inits_of_dr (struct data_reference
*dr
, tree niters
)
1549 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
1550 tree offset
= STMT_VINFO_VECT_INIT_OFFSET (stmt_info
);
1552 niters
= fold (build2 (MULT_EXPR
, TREE_TYPE (niters
), niters
,
1553 STMT_VINFO_VECT_STEP (stmt_info
)));
1554 offset
= fold (build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
, niters
));
1555 STMT_VINFO_VECT_INIT_OFFSET (stmt_info
) = offset
;
1559 /* Function vect_update_inits_of_drs
1561 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1562 This function updates the information recorded for the data references in
1563 the loop to account for the fact that the first NITERS iterations had
1564 already been executed. Specifically, it updates the initial_condition of the
1565 access_function of all the data_references in the loop. */
1568 vect_update_inits_of_drs (loop_vec_info loop_vinfo
, tree niters
)
1571 varray_type loop_write_datarefs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
1572 varray_type loop_read_datarefs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
1574 if (vect_dump
&& (dump_flags
& TDF_DETAILS
))
1575 fprintf (vect_dump
, "=== vect_update_inits_of_dr ===");
1577 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
1579 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
1580 vect_update_inits_of_dr (dr
, niters
);
1583 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_datarefs
); i
++)
1585 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_read_datarefs
, i
);
1586 vect_update_inits_of_dr (dr
, niters
);
1591 /* Function vect_do_peeling_for_alignment
1593 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1594 'niters' is set to the misalignment of one of the data references in the
1595 loop, thereby forcing it to refer to an aligned location at the beginning
1596 of the execution of this loop. The data reference for which we are
1597 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
1600 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo
, struct loops
*loops
)
1602 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1603 tree niters_of_prolog_loop
, ni_name
;
1605 struct loop
*new_loop
;
1607 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1608 fprintf (vect_dump
, "=== vect_do_peeling_for_alignment ===");
1610 ni_name
= vect_build_loop_niters (loop_vinfo
);
1611 niters_of_prolog_loop
= vect_gen_niters_for_prolog_loop (loop_vinfo
, ni_name
);
1613 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
1615 slpeel_tree_peel_loop_to_edge (loop
, loops
, loop_preheader_edge (loop
),
1616 niters_of_prolog_loop
, ni_name
, true);
1617 #ifdef ENABLE_CHECKING
1618 gcc_assert (new_loop
);
1619 slpeel_verify_cfg_after_peeling (new_loop
, loop
);
1622 /* Update number of times loop executes. */
1623 n_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
1624 LOOP_VINFO_NITERS (loop_vinfo
) =
1625 build2 (MINUS_EXPR
, TREE_TYPE (n_iters
), n_iters
, niters_of_prolog_loop
);
1627 /* Update the init conditions of the access functions of all data refs. */
1628 vect_update_inits_of_drs (loop_vinfo
, niters_of_prolog_loop
);
1630 /* After peeling we have to reset scalar evolution analyzer. */
1637 /* Function vect_transform_loop.
1639 The analysis phase has determined that the loop is vectorizable.
1640 Vectorize the loop - created vectorized stmts to replace the scalar
1641 stmts in the loop, and update the loop exit condition. */
1644 vect_transform_loop (loop_vec_info loop_vinfo
,
1645 struct loops
*loops ATTRIBUTE_UNUSED
)
1647 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1648 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1649 int nbbs
= loop
->num_nodes
;
1650 block_stmt_iterator si
;
1653 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1655 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1656 fprintf (vect_dump
, "=== vec_transform_loop ===");
1659 /* Peel the loop if there are data refs with unknown alignment.
1660 Only one data ref with unknown store is allowed. */
1662 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo
))
1663 vect_do_peeling_for_alignment (loop_vinfo
, loops
);
1665 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
1666 compile time constant), or it is a constant that doesn't divide by the
1667 vectorization factor, then an epilog loop needs to be created.
1668 We therefore duplicate the loop: the original loop will be vectorized,
1669 and will compute the first (n/VF) iterations. The second copy of the loop
1670 will remain scalar and will compute the remaining (n%VF) iterations.
1671 (VF is the vectorization factor). */
1673 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1674 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1675 && LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0))
1676 vect_do_peeling_for_loop_bound (loop_vinfo
, &ratio
, loops
);
1678 ratio
= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
)),
1679 LOOP_VINFO_INT_NITERS (loop_vinfo
) / vectorization_factor
);
1681 /* 1) Make sure the loop header has exactly two entries
1682 2) Make sure we have a preheader basic block. */
1684 gcc_assert (EDGE_COUNT (loop
->header
->preds
) == 2);
1686 loop_split_edge_with (loop_preheader_edge (loop
), NULL
);
1689 /* FORNOW: the vectorizer supports only loops which body consist
1690 of one basic block (header + empty latch). When the vectorizer will
1691 support more involved loop forms, the order by which the BBs are
1692 traversed need to be reconsidered. */
1694 for (i
= 0; i
< nbbs
; i
++)
1696 basic_block bb
= bbs
[i
];
1698 for (si
= bsi_start (bb
); !bsi_end_p (si
);)
1700 tree stmt
= bsi_stmt (si
);
1701 stmt_vec_info stmt_info
;
1704 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1706 fprintf (vect_dump
, "------>vectorizing statement: ");
1707 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1709 stmt_info
= vinfo_for_stmt (stmt
);
1710 gcc_assert (stmt_info
);
1711 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1716 #ifdef ENABLE_CHECKING
1717 /* FORNOW: Verify that all stmts operate on the same number of
1718 units and no inner unrolling is necessary. */
1720 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info
)))
1721 == vectorization_factor
);
1723 /* -------- vectorize statement ------------ */
1724 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1725 fprintf (vect_dump
, "transform statement.");
1727 is_store
= vect_transform_stmt (stmt
, &si
);
1730 /* free the attached stmt_vec_info and remove the stmt. */
1731 stmt_ann_t ann
= stmt_ann (stmt
);
1733 set_stmt_info (ann
, NULL
);
1742 slpeel_make_loop_iterate_ntimes (loop
, ratio
);
1744 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS
, LOOP_LOC (loop_vinfo
)))
1745 fprintf (vect_dump
, "LOOP VECTORIZED.");