1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
47 /* Utility functions for the code transformation. */
48 static bool vect_transform_stmt (tree
, block_stmt_iterator
*);
49 static void vect_align_data_ref (tree
);
50 static tree
vect_create_destination_var (tree
, tree
);
51 static tree vect_create_data_ref_ptr
52 (tree
, block_stmt_iterator
*, tree
, tree
*, bool);
53 static tree
vect_create_index_for_vector_ref (loop_vec_info
);
54 static tree
vect_create_addr_base_for_vector_ref (tree
, tree
*, tree
);
55 static tree
vect_get_new_vect_var (tree
, enum vect_var_kind
, const char *);
56 static tree
vect_get_vec_def_for_operand (tree
, tree
);
57 static tree
vect_init_vector (tree
, tree
);
58 static void vect_finish_stmt_generation
59 (tree stmt
, tree vec_stmt
, block_stmt_iterator
*bsi
);
60 static void update_vuses_to_preheader (tree
, struct loop
*);
62 /* Utility function dealing with loop peeling (not peeling itself). */
63 static void vect_generate_tmps_on_preheader
64 (loop_vec_info
, tree
*, tree
*, tree
*);
65 static tree
vect_build_loop_niters (loop_vec_info
);
66 static void vect_update_ivs_after_vectorizer (loop_vec_info
, tree
, edge
);
67 static tree
vect_gen_niters_for_prolog_loop (loop_vec_info
, tree
);
68 static void vect_update_init_of_dr (struct data_reference
*, tree niters
);
69 static void vect_update_inits_of_drs (loop_vec_info
, tree
);
70 static void vect_do_peeling_for_alignment (loop_vec_info
, struct loops
*);
71 static void vect_do_peeling_for_loop_bound
72 (loop_vec_info
, tree
*, struct loops
*);
75 /* Function vect_get_new_vect_var.
77 Returns a name for a new variable. The current naming scheme appends the
78 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
79 the name of vectorizer generated variables, and appends that to NAME if
83 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
88 if (var_kind
== vect_simple_var
)
94 new_vect_var
= create_tmp_var (type
, concat (prefix
, name
, NULL
));
96 new_vect_var
= create_tmp_var (type
, prefix
);
102 /* Function vect_create_index_for_vector_ref.
104 Create (and return) an index variable, along with it's update chain in the
105 loop. This variable will be used to access a memory location in a vector
109 LOOP: The loop being vectorized.
110 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
111 function can be added here, or in the loop pre-header.
114 Return an index that will be used to index a vector array. It is expected
115 that a pointer to the first vector will be used as the base address for the
118 FORNOW: we are not trying to be efficient, just creating a new index each
119 time from scratch. At this time all vector references could use the same
122 TODO: create only one index to be used by all vector references. Record
123 the index in the LOOP_VINFO the first time this procedure is called and
124 return it on subsequent calls. The increment of this index must be placed
125 just before the conditional expression that ends the single block loop. */
128 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo
)
131 block_stmt_iterator incr_bsi
;
133 tree indx_before_incr
, indx_after_incr
;
134 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
137 /* It is assumed that the base pointer used for vectorized access contains
138 the address of the first vector. Therefore the index used for vectorized
139 access must be initialized to zero and incremented by 1. */
141 init
= integer_zero_node
;
142 step
= integer_one_node
;
144 standard_iv_increment_position (loop
, &incr_bsi
, &insert_after
);
145 create_iv (init
, step
, NULL_TREE
, loop
, &incr_bsi
, insert_after
,
146 &indx_before_incr
, &indx_after_incr
);
147 incr
= bsi_stmt (incr_bsi
);
148 get_stmt_operands (incr
);
149 set_stmt_info (stmt_ann (incr
), new_stmt_vec_info (incr
, loop_vinfo
));
151 return indx_before_incr
;
155 /* Function vect_create_addr_base_for_vector_ref.
157 Create an expression that computes the address of the first memory location
158 that will be accessed for a data reference.
161 STMT: The statement containing the data reference.
162 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
163 OFFSET: Optional. If supplied, it is be added to the initial address.
166 1. Return an SSA_NAME whose value is the address of the memory location of
167 the first vector of the data reference.
168 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
169 these statement(s) which define the returned SSA_NAME.
171 FORNOW: We are only handling array accesses with step 1. */
174 vect_create_addr_base_for_vector_ref (tree stmt
,
178 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
179 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
181 unshare_expr (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info
));
182 tree base_name
= build_fold_indirect_ref (data_ref_base
);
183 tree ref
= DR_REF (dr
);
184 tree scalar_type
= TREE_TYPE (ref
);
185 tree scalar_ptr_type
= build_pointer_type (scalar_type
);
188 tree addr_base
, addr_expr
;
190 tree base_offset
= unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info
));
192 /* Create base_offset */
193 dest
= create_tmp_var (TREE_TYPE (base_offset
), "base_off");
194 add_referenced_tmp_var (dest
);
195 base_offset
= force_gimple_operand (base_offset
, &new_stmt
, false, dest
);
196 append_to_statement_list_force (new_stmt
, new_stmt_list
);
200 tree tmp
= create_tmp_var (TREE_TYPE (base_offset
), "offset");
201 add_referenced_tmp_var (tmp
);
202 offset
= fold (build2 (MULT_EXPR
, TREE_TYPE (offset
), offset
,
203 STMT_VINFO_VECT_STEP (stmt_info
)));
204 base_offset
= fold (build2 (PLUS_EXPR
, TREE_TYPE (base_offset
),
205 base_offset
, offset
));
206 base_offset
= force_gimple_operand (base_offset
, &new_stmt
, false, tmp
);
207 append_to_statement_list_force (new_stmt
, new_stmt_list
);
210 /* base + base_offset */
211 addr_base
= fold (build2 (PLUS_EXPR
, TREE_TYPE (data_ref_base
), data_ref_base
,
214 /* addr_expr = addr_base */
215 addr_expr
= vect_get_new_vect_var (scalar_ptr_type
, vect_pointer_var
,
216 get_name (base_name
));
217 add_referenced_tmp_var (addr_expr
);
218 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, addr_expr
, addr_base
);
219 new_temp
= make_ssa_name (addr_expr
, vec_stmt
);
220 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
221 append_to_statement_list_force (vec_stmt
, new_stmt_list
);
223 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
225 fprintf (vect_dump
, "created ");
226 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
232 /* Function vect_align_data_ref.
234 Handle mislignment of a memory accesses.
236 FORNOW: Can't handle misaligned accesses.
237 Make sure that the dataref is aligned. */
240 vect_align_data_ref (tree stmt
)
242 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
243 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
245 /* FORNOW: can't handle misaligned accesses;
246 all accesses expected to be aligned. */
247 gcc_assert (aligned_access_p (dr
));
251 /* Function vect_create_data_ref_ptr.
253 Create a memory reference expression for vector access, to be used in a
254 vector load/store stmt. The reference is based on a new pointer to vector
258 1. STMT: a stmt that references memory. Expected to be of the form
259 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
260 2. BSI: block_stmt_iterator where new stmts can be added.
261 3. OFFSET (optional): an offset to be added to the initial address accessed
262 by the data-ref in STMT.
263 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
264 pointing to the initial address.
267 1. Declare a new ptr to vector_type, and have it point to the base of the
268 data reference (initial addressed accessed by the data reference).
269 For example, for vector of type V8HI, the following code is generated:
272 vp = (v8hi *)initial_address;
274 if OFFSET is not supplied:
275 initial_address = &a[init];
276 if OFFSET is supplied:
277 initial_address = &a[init + OFFSET];
279 Return the initial_address in INITIAL_ADDRESS.
281 2. Create a data-reference in the loop based on the new vector pointer vp,
282 and using a new index variable 'idx' as follows:
286 where if ONLY_INIT is true:
289 update = idx + vector_type_size
291 Return the pointer vp'.
294 FORNOW: handle only aligned and consecutive accesses. */
297 vect_create_data_ref_ptr (tree stmt
, block_stmt_iterator
*bsi
, tree offset
,
298 tree
*initial_address
, bool only_init
)
301 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
302 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
303 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
310 tree new_stmt_list
= NULL_TREE
;
312 edge pe
= loop_preheader_edge (loop
);
318 tree type
, tmp
, size
;
320 base_name
= build_fold_indirect_ref (unshare_expr (
321 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info
)));
323 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
325 tree data_ref_base
= base_name
;
326 fprintf (vect_dump
, "create array_ref of type: ");
327 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
328 if (TREE_CODE (data_ref_base
) == VAR_DECL
)
329 fprintf (vect_dump
, " vectorizing a one dimensional array ref: ");
330 else if (TREE_CODE (data_ref_base
) == ARRAY_REF
)
331 fprintf (vect_dump
, " vectorizing a multidimensional array ref: ");
332 else if (TREE_CODE (data_ref_base
) == COMPONENT_REF
)
333 fprintf (vect_dump
, " vectorizing a record based array ref: ");
334 else if (TREE_CODE (data_ref_base
) == SSA_NAME
)
335 fprintf (vect_dump
, " vectorizing a pointer ref: ");
336 print_generic_expr (vect_dump
, base_name
, TDF_SLIM
);
339 /** (1) Create the new vector-pointer variable: **/
341 vect_ptr_type
= build_pointer_type (vectype
);
342 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
343 get_name (base_name
));
344 add_referenced_tmp_var (vect_ptr
);
347 /** (2) Add aliasing information to the new vector-pointer:
348 (The points-to info (SSA_NAME_PTR_INFO) may be defined later.) **/
350 tag
= STMT_VINFO_MEMTAG (stmt_info
);
352 get_var_ann (vect_ptr
)->type_mem_tag
= tag
;
353 get_var_ann (vect_ptr
)->subvars
= STMT_VINFO_SUBVARS (stmt_info
);
356 /** (3) Calculate the initial address the vector-pointer, and set
357 the vector-pointer to point to it before the loop: **/
359 /* Create: (&(base[init_val+offset]) in the loop preheader. */
360 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
362 pe
= loop_preheader_edge (loop
);
363 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt_list
);
364 gcc_assert (!new_bb
);
365 *initial_address
= new_temp
;
367 /* Create: p = (vectype *) initial_base */
368 vec_stmt
= fold_convert (vect_ptr_type
, new_temp
);
369 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, vect_ptr
, vec_stmt
);
370 new_temp
= make_ssa_name (vect_ptr
, vec_stmt
);
371 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
372 new_bb
= bsi_insert_on_edge_immediate (pe
, vec_stmt
);
373 gcc_assert (!new_bb
);
374 vect_ptr_init
= TREE_OPERAND (vec_stmt
, 0);
377 /** (4) Handle the updating of the vector-pointer inside the loop: **/
379 if (only_init
) /* No update in loop is required. */
381 /* Copy the points-to information if it exists. */
382 if (STMT_VINFO_PTR_INFO (stmt_info
))
383 duplicate_ssa_name_ptr_info (vect_ptr_init
,
384 STMT_VINFO_PTR_INFO (stmt_info
));
385 return vect_ptr_init
;
388 idx
= vect_create_index_for_vector_ref (loop_vinfo
);
390 /* Create: update = idx * vectype_size */
391 tmp
= create_tmp_var (integer_type_node
, "update");
392 add_referenced_tmp_var (tmp
);
393 size
= TYPE_SIZE (vect_ptr_type
);
394 type
= lang_hooks
.types
.type_for_size (tree_low_cst (size
, 1), 1);
395 ptr_update
= create_tmp_var (type
, "update");
396 add_referenced_tmp_var (ptr_update
);
397 vectype_size
= TYPE_SIZE_UNIT (vectype
);
398 vec_stmt
= build2 (MULT_EXPR
, integer_type_node
, idx
, vectype_size
);
399 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, tmp
, vec_stmt
);
400 new_temp
= make_ssa_name (tmp
, vec_stmt
);
401 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
402 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
403 vec_stmt
= fold_convert (type
, new_temp
);
404 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, ptr_update
, vec_stmt
);
405 new_temp
= make_ssa_name (ptr_update
, vec_stmt
);
406 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
407 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
409 /* Create: data_ref_ptr = vect_ptr_init + update */
410 vec_stmt
= build2 (PLUS_EXPR
, vect_ptr_type
, vect_ptr_init
, new_temp
);
411 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, vect_ptr
, vec_stmt
);
412 new_temp
= make_ssa_name (vect_ptr
, vec_stmt
);
413 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
414 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
415 data_ref_ptr
= TREE_OPERAND (vec_stmt
, 0);
417 /* Copy the points-to information if it exists. */
418 if (STMT_VINFO_PTR_INFO (stmt_info
))
419 duplicate_ssa_name_ptr_info (data_ref_ptr
, STMT_VINFO_PTR_INFO (stmt_info
));
424 /* Function vect_create_destination_var.
426 Create a new temporary of type VECTYPE. */
429 vect_create_destination_var (tree scalar_dest
, tree vectype
)
432 const char *new_name
;
434 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
436 new_name
= get_name (scalar_dest
);
439 vec_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, new_name
);
440 add_referenced_tmp_var (vec_dest
);
446 /* Function vect_init_vector.
448 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
449 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
450 used in the vectorization of STMT. */
453 vect_init_vector (tree stmt
, tree vector_var
)
455 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
456 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
457 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
460 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
466 new_var
= vect_get_new_vect_var (vectype
, vect_simple_var
, "cst_");
467 add_referenced_tmp_var (new_var
);
469 init_stmt
= build2 (MODIFY_EXPR
, vectype
, new_var
, vector_var
);
470 new_temp
= make_ssa_name (new_var
, init_stmt
);
471 TREE_OPERAND (init_stmt
, 0) = new_temp
;
473 pe
= loop_preheader_edge (loop
);
474 new_bb
= bsi_insert_on_edge_immediate (pe
, init_stmt
);
475 gcc_assert (!new_bb
);
477 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
479 fprintf (vect_dump
, "created new init_stmt: ");
480 print_generic_expr (vect_dump
, init_stmt
, TDF_SLIM
);
483 vec_oprnd
= TREE_OPERAND (init_stmt
, 0);
488 /* Function vect_get_vec_def_for_operand.
490 OP is an operand in STMT. This function returns a (vector) def that will be
491 used in the vectorized stmt for STMT.
493 In the case that OP is an SSA_NAME which is defined in the loop, then
494 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
496 In case OP is an invariant or constant, a new stmt that creates a vector def
497 needs to be introduced. */
500 vect_get_vec_def_for_operand (tree op
, tree stmt
)
505 stmt_vec_info def_stmt_info
= NULL
;
506 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
507 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
508 int nunits
= GET_MODE_NUNITS (TYPE_MODE (vectype
));
509 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
510 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
517 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
519 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
520 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
523 /** ===> Case 1: operand is a constant. **/
525 if (TREE_CODE (op
) == INTEGER_CST
|| TREE_CODE (op
) == REAL_CST
)
527 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
531 /* Build a tree with vector elements. */
532 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
533 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
535 for (i
= nunits
- 1; i
>= 0; --i
)
537 t
= tree_cons (NULL_TREE
, op
, t
);
539 vec_cst
= build_vector (vectype
, t
);
540 return vect_init_vector (stmt
, vec_cst
);
543 gcc_assert (TREE_CODE (op
) == SSA_NAME
);
545 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
547 def_stmt
= SSA_NAME_DEF_STMT (op
);
548 def_stmt_info
= vinfo_for_stmt (def_stmt
);
550 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
552 fprintf (vect_dump
, "vect_get_vec_def_for_operand: def_stmt: ");
553 print_generic_expr (vect_dump
, def_stmt
, TDF_SLIM
);
557 /** ==> Case 2.1: operand is defined inside the loop. **/
561 /* Get the def from the vectorized stmt. */
563 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
564 gcc_assert (vec_stmt
);
565 vec_oprnd
= TREE_OPERAND (vec_stmt
, 0);
570 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
571 it is a reduction/induction. **/
573 bb
= bb_for_stmt (def_stmt
);
574 if (TREE_CODE (def_stmt
) == PHI_NODE
&& flow_bb_inside_loop_p (loop
, bb
))
576 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
577 fprintf (vect_dump
, "reduction/induction - unsupported.");
578 internal_error ("no support for reduction/induction"); /* FORNOW */
582 /** ==> Case 2.3: operand is defined outside the loop -
583 it is a loop invariant. */
585 switch (TREE_CODE (def_stmt
))
588 def
= PHI_RESULT (def_stmt
);
591 def
= TREE_OPERAND (def_stmt
, 0);
594 def
= TREE_OPERAND (def_stmt
, 0);
595 gcc_assert (IS_EMPTY_STMT (def_stmt
));
599 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
601 fprintf (vect_dump
, "unsupported defining stmt: ");
602 print_generic_expr (vect_dump
, def_stmt
, TDF_SLIM
);
604 internal_error ("unsupported defining stmt");
607 /* Build a tree with vector elements.
608 Create 'vec_inv = {inv,inv,..,inv}' */
610 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
611 fprintf (vect_dump
, "Create vector_inv.");
613 for (i
= nunits
- 1; i
>= 0; --i
)
615 t
= tree_cons (NULL_TREE
, def
, t
);
618 vec_inv
= build_constructor (vectype
, t
);
619 return vect_init_vector (stmt
, vec_inv
);
623 /* Function vect_finish_stmt_generation.
625 Insert a new stmt. */
628 vect_finish_stmt_generation (tree stmt
, tree vec_stmt
, block_stmt_iterator
*bsi
)
630 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
632 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
634 fprintf (vect_dump
, "add new stmt: ");
635 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
638 #ifdef ENABLE_CHECKING
639 /* Make sure bsi points to the stmt that is being vectorized. */
640 gcc_assert (stmt
== bsi_stmt (*bsi
));
643 #ifdef USE_MAPPED_LOCATION
644 SET_EXPR_LOCATION (vec_stmt
, EXPR_LOCATION (stmt
));
646 SET_EXPR_LOCUS (vec_stmt
, EXPR_LOCUS (stmt
));
651 /* Function vectorizable_assignment.
653 Check if STMT performs an assignment (copy) that can be vectorized.
654 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
655 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
656 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
659 vectorizable_assignment (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
665 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
666 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
667 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
670 /* Is vectorizable assignment? */
672 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
675 scalar_dest
= TREE_OPERAND (stmt
, 0);
676 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
679 op
= TREE_OPERAND (stmt
, 1);
680 if (!vect_is_simple_use (op
, loop_vinfo
, NULL
))
682 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
683 fprintf (vect_dump
, "use not simple.");
687 if (!vec_stmt
) /* transformation not required. */
689 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
694 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
695 fprintf (vect_dump
, "transform assignment.");
698 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
701 op
= TREE_OPERAND (stmt
, 1);
702 vec_oprnd
= vect_get_vec_def_for_operand (op
, stmt
);
704 /* Arguments are ready. create the new vector stmt. */
705 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, vec_oprnd
);
706 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
707 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
708 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
714 /* Function vectorizable_operation.
716 Check if STMT performs a binary or unary operation that can be vectorized.
717 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
718 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
719 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
722 vectorizable_operation (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
727 tree op0
, op1
= NULL
;
728 tree vec_oprnd0
, vec_oprnd1
=NULL
;
729 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
730 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
731 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
734 enum machine_mode vec_mode
;
740 /* Is STMT a vectorizable binary/unary operation? */
741 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
744 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) != SSA_NAME
)
747 operation
= TREE_OPERAND (stmt
, 1);
748 code
= TREE_CODE (operation
);
749 optab
= optab_for_tree_code (code
, vectype
);
751 /* Support only unary or binary operations. */
752 op_type
= TREE_CODE_LENGTH (code
);
753 if (op_type
!= unary_op
&& op_type
!= binary_op
)
755 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
756 fprintf (vect_dump
, "num. args = %d (not unary/binary op).", op_type
);
760 for (i
= 0; i
< op_type
; i
++)
762 op
= TREE_OPERAND (operation
, i
);
763 if (!vect_is_simple_use (op
, loop_vinfo
, NULL
))
765 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
766 fprintf (vect_dump
, "use not simple.");
771 /* Supportable by target? */
774 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
775 fprintf (vect_dump
, "no optab.");
778 vec_mode
= TYPE_MODE (vectype
);
779 if (optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
781 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
782 fprintf (vect_dump
, "op not supported by target.");
786 if (!vec_stmt
) /* transformation not required. */
788 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
794 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
795 fprintf (vect_dump
, "transform binary/unary operation.");
798 scalar_dest
= TREE_OPERAND (stmt
, 0);
799 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
802 op0
= TREE_OPERAND (operation
, 0);
803 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
805 if (op_type
== binary_op
)
807 op1
= TREE_OPERAND (operation
, 1);
808 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
811 /* Arguments are ready. create the new vector stmt. */
813 if (op_type
== binary_op
)
814 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
815 build2 (code
, vectype
, vec_oprnd0
, vec_oprnd1
));
817 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
818 build1 (code
, vectype
, vec_oprnd0
));
819 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
820 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
821 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
827 /* Function vectorizable_store.
829 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
831 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
832 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
833 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
836 vectorizable_store (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
842 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
843 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
844 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
845 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
846 enum machine_mode vec_mode
;
848 enum dr_alignment_support alignment_support_cheme
;
849 v_may_def_optype v_may_defs
;
852 /* Is vectorizable store? */
854 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
857 scalar_dest
= TREE_OPERAND (stmt
, 0);
858 if (TREE_CODE (scalar_dest
) != ARRAY_REF
859 && TREE_CODE (scalar_dest
) != INDIRECT_REF
)
862 op
= TREE_OPERAND (stmt
, 1);
863 if (!vect_is_simple_use (op
, loop_vinfo
, NULL
))
865 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
866 fprintf (vect_dump
, "use not simple.");
870 vec_mode
= TYPE_MODE (vectype
);
871 /* FORNOW. In some cases can vectorize even if data-type not supported
872 (e.g. - array initialization with 0). */
873 if (mov_optab
->handlers
[(int)vec_mode
].insn_code
== CODE_FOR_nothing
)
876 if (!STMT_VINFO_DATA_REF (stmt_info
))
880 if (!vec_stmt
) /* transformation not required. */
882 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
888 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
889 fprintf (vect_dump
, "transform store");
891 alignment_support_cheme
= vect_supportable_dr_alignment (dr
);
892 gcc_assert (alignment_support_cheme
);
893 gcc_assert (alignment_support_cheme
== dr_aligned
); /* FORNOW */
895 /* Handle use - get the vectorized def from the defining stmt. */
896 vec_oprnd1
= vect_get_vec_def_for_operand (op
, stmt
);
899 /* FORNOW: make sure the data reference is aligned. */
900 vect_align_data_ref (stmt
);
901 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
, &dummy
, false);
902 data_ref
= build_fold_indirect_ref (data_ref
);
904 /* Arguments are ready. create the new vector stmt. */
905 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, data_ref
, vec_oprnd1
);
906 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
908 /* Copy the V_MAY_DEFS representing the aliasing of the original array
909 element's definition to the vector's definition then update the
910 defining statement. The original is being deleted so the same
911 SSA_NAMEs can be used. */
912 copy_virtual_operands (*vec_stmt
, stmt
);
913 v_may_defs
= STMT_V_MAY_DEF_OPS (*vec_stmt
);
914 nv_may_defs
= NUM_V_MAY_DEFS (v_may_defs
);
916 for (i
= 0; i
< nv_may_defs
; i
++)
918 tree ssa_name
= V_MAY_DEF_RESULT (v_may_defs
, i
);
919 SSA_NAME_DEF_STMT (ssa_name
) = *vec_stmt
;
926 /* vectorizable_load.
928 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
930 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
931 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
932 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
935 vectorizable_load (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
938 tree vec_dest
= NULL
;
939 tree data_ref
= NULL
;
941 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
942 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
943 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
950 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
951 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
952 edge pe
= loop_preheader_edge (loop
);
953 enum dr_alignment_support alignment_support_cheme
;
955 /* Is vectorizable load? */
957 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
960 scalar_dest
= TREE_OPERAND (stmt
, 0);
961 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
964 op
= TREE_OPERAND (stmt
, 1);
965 if (TREE_CODE (op
) != ARRAY_REF
&& TREE_CODE (op
) != INDIRECT_REF
)
968 if (!STMT_VINFO_DATA_REF (stmt_info
))
971 mode
= (int) TYPE_MODE (vectype
);
973 /* FORNOW. In some cases can vectorize even if data-type not supported
974 (e.g. - data copies). */
975 if (mov_optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
977 if (vect_print_dump_info (REPORT_DETAILS
, LOOP_LOC (loop_vinfo
)))
978 fprintf (vect_dump
, "Aligned load, but unsupported type.");
982 if (!vec_stmt
) /* transformation not required. */
984 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
990 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
991 fprintf (vect_dump
, "transform load.");
993 alignment_support_cheme
= vect_supportable_dr_alignment (dr
);
994 gcc_assert (alignment_support_cheme
);
996 if (alignment_support_cheme
== dr_aligned
997 || alignment_support_cheme
== dr_unaligned_supported
)
1008 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1009 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
, &dummy
, false);
1010 if (aligned_access_p (dr
))
1011 data_ref
= build_fold_indirect_ref (data_ref
);
1014 int mis
= DR_MISALIGNMENT (dr
);
1015 tree tmis
= (mis
== -1 ? size_zero_node
: size_int (mis
));
1016 tmis
= size_binop (MULT_EXPR
, tmis
, size_int(BITS_PER_UNIT
));
1017 data_ref
= build2 (MISALIGNED_INDIRECT_REF
, vectype
, data_ref
, tmis
);
1019 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
1020 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1021 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1022 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1023 copy_virtual_operands (new_stmt
, stmt
);
1025 else if (alignment_support_cheme
== dr_unaligned_software_pipeline
)
1029 msq_init = *(floor(p1))
1030 p2 = initial_addr + VS - 1;
1031 magic = have_builtin ? builtin_result : initial_address;
1034 p2' = p2 + indx * vectype_size
1036 vec_dest = realign_load (msq, lsq, magic)
1050 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1051 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1052 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
,
1054 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, data_ref
);
1055 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
1056 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1057 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1058 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
1059 gcc_assert (!new_bb
);
1060 msq_init
= TREE_OPERAND (new_stmt
, 0);
1061 copy_virtual_operands (new_stmt
, stmt
);
1062 update_vuses_to_preheader (new_stmt
, loop
);
1065 /* <2> Create lsq = *(floor(p2')) in the loop */
1066 offset
= build_int_cst (integer_type_node
,
1067 GET_MODE_NUNITS (TYPE_MODE (vectype
)));
1068 offset
= int_const_binop (MINUS_EXPR
, offset
, integer_one_node
, 1);
1069 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1070 dataref_ptr
= vect_create_data_ref_ptr (stmt
, bsi
, offset
, &dummy
, false);
1071 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, dataref_ptr
);
1072 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
1073 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1074 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1075 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1076 lsq
= TREE_OPERAND (new_stmt
, 0);
1077 copy_virtual_operands (new_stmt
, stmt
);
1081 if (targetm
.vectorize
.builtin_mask_for_load
)
1083 /* Create permutation mask, if required, in loop preheader. */
1085 params
= build_tree_list (NULL_TREE
, init_addr
);
1086 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1087 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
1088 new_stmt
= build_function_call_expr (builtin_decl
, params
);
1089 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, new_stmt
);
1090 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1091 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1092 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
1093 gcc_assert (!new_bb
);
1094 magic
= TREE_OPERAND (new_stmt
, 0);
1096 /* The result of the CALL_EXPR to this builtin is determined from
1097 the value of the parameter and no global variables are touched
1098 which makes the builtin a "const" function. Requiring the
1099 builtin to have the "const" attribute makes it unnecessary
1100 to call mark_call_clobbered_vars_to_rename. */
1101 gcc_assert (TREE_READONLY (builtin_decl
));
1105 /* Use current address instead of init_addr for reduced reg pressure.
1107 magic
= dataref_ptr
;
1111 /* <4> Create msq = phi <msq_init, lsq> in loop */
1112 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1113 msq
= make_ssa_name (vec_dest
, NULL_TREE
);
1114 phi_stmt
= create_phi_node (msq
, loop
->header
); /* CHECKME */
1115 SSA_NAME_DEF_STMT (msq
) = phi_stmt
;
1116 add_phi_arg (phi_stmt
, msq_init
, loop_preheader_edge (loop
));
1117 add_phi_arg (phi_stmt
, lsq
, loop_latch_edge (loop
));
1120 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1121 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1122 new_stmt
= build3 (REALIGN_LOAD_EXPR
, vectype
, msq
, lsq
, magic
);
1123 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, new_stmt
);
1124 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1125 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1126 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1131 *vec_stmt
= new_stmt
;
1136 /* Function vect_transform_stmt.
1138 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1141 vect_transform_stmt (tree stmt
, block_stmt_iterator
*bsi
)
1143 bool is_store
= false;
1144 tree vec_stmt
= NULL_TREE
;
1145 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1148 switch (STMT_VINFO_TYPE (stmt_info
))
1150 case op_vec_info_type
:
1151 done
= vectorizable_operation (stmt
, bsi
, &vec_stmt
);
1155 case assignment_vec_info_type
:
1156 done
= vectorizable_assignment (stmt
, bsi
, &vec_stmt
);
1160 case load_vec_info_type
:
1161 done
= vectorizable_load (stmt
, bsi
, &vec_stmt
);
1165 case store_vec_info_type
:
1166 done
= vectorizable_store (stmt
, bsi
, &vec_stmt
);
1171 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1172 fprintf (vect_dump
, "stmt not supported.");
1176 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
1182 /* This function builds ni_name = number of iterations loop executes
1183 on the loop preheader. */
1186 vect_build_loop_niters (loop_vec_info loop_vinfo
)
1188 tree ni_name
, stmt
, var
;
1190 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1191 tree ni
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
1193 var
= create_tmp_var (TREE_TYPE (ni
), "niters");
1194 add_referenced_tmp_var (var
);
1195 ni_name
= force_gimple_operand (ni
, &stmt
, false, var
);
1197 pe
= loop_preheader_edge (loop
);
1200 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
1201 gcc_assert (!new_bb
);
1208 /* This function generates the following statements:
1210 ni_name = number of iterations loop executes
1211 ratio = ni_name / vf
1212 ratio_mult_vf_name = ratio * vf
1214 and places them at the loop preheader edge. */
1217 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo
,
1219 tree
*ratio_mult_vf_name_ptr
,
1220 tree
*ratio_name_ptr
)
1228 tree ratio_mult_vf_name
;
1229 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1230 tree ni
= LOOP_VINFO_NITERS (loop_vinfo
);
1231 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1232 tree log_vf
= build_int_cst (unsigned_type_node
, exact_log2 (vf
));
1234 pe
= loop_preheader_edge (loop
);
1236 /* Generate temporary variable that contains
1237 number of iterations loop executes. */
1239 ni_name
= vect_build_loop_niters (loop_vinfo
);
1241 /* Create: ratio = ni >> log2(vf) */
1243 var
= create_tmp_var (TREE_TYPE (ni
), "bnd");
1244 add_referenced_tmp_var (var
);
1245 ratio_name
= make_ssa_name (var
, NULL_TREE
);
1246 stmt
= build2 (MODIFY_EXPR
, void_type_node
, ratio_name
,
1247 build2 (RSHIFT_EXPR
, TREE_TYPE (ni_name
), ni_name
, log_vf
));
1248 SSA_NAME_DEF_STMT (ratio_name
) = stmt
;
1250 pe
= loop_preheader_edge (loop
);
1251 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
1252 gcc_assert (!new_bb
);
1254 /* Create: ratio_mult_vf = ratio << log2 (vf). */
1256 var
= create_tmp_var (TREE_TYPE (ni
), "ratio_mult_vf");
1257 add_referenced_tmp_var (var
);
1258 ratio_mult_vf_name
= make_ssa_name (var
, NULL_TREE
);
1259 stmt
= build2 (MODIFY_EXPR
, void_type_node
, ratio_mult_vf_name
,
1260 build2 (LSHIFT_EXPR
, TREE_TYPE (ratio_name
), ratio_name
, log_vf
));
1261 SSA_NAME_DEF_STMT (ratio_mult_vf_name
) = stmt
;
1263 pe
= loop_preheader_edge (loop
);
1264 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
1265 gcc_assert (!new_bb
);
1267 *ni_name_ptr
= ni_name
;
1268 *ratio_mult_vf_name_ptr
= ratio_mult_vf_name
;
1269 *ratio_name_ptr
= ratio_name
;
1275 /* Function update_vuses_to_preheader.
1278 STMT - a statement with potential VUSEs.
1279 LOOP - the loop whose preheader will contain STMT.
1281 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
1282 appears to be defined in a V_MAY_DEF in another statement in a loop.
1283 One such case is when the VUSE is at the dereference of a __restricted__
1284 pointer in a load and the V_MAY_DEF is at the dereference of a different
1285 __restricted__ pointer in a store. Vectorization may result in
1286 copy_virtual_uses being called to copy the problematic VUSE to a new
1287 statement that is being inserted in the loop preheader. This procedure
1288 is called to change the SSA_NAME in the new statement's VUSE from the
1289 SSA_NAME updated in the loop to the related SSA_NAME available on the
1290 path entering the loop.
1292 When this function is called, we have the following situation:
1297 # name1 = phi < name0 , name2>
1302 # name2 = vdef <name1>
1307 Stmt S1 was created in the loop preheader block as part of misaligned-load
1308 handling. This function fixes the name of the vuse of S1 from 'name1' to
1312 update_vuses_to_preheader (tree stmt
, struct loop
*loop
)
1314 basic_block header_bb
= loop
->header
;
1315 edge preheader_e
= loop_preheader_edge (loop
);
1316 vuse_optype vuses
= STMT_VUSE_OPS (stmt
);
1317 int nvuses
= NUM_VUSES (vuses
);
1320 for (i
= 0; i
< nvuses
; i
++)
1322 tree ssa_name
= VUSE_OP (vuses
, i
);
1323 tree def_stmt
= SSA_NAME_DEF_STMT (ssa_name
);
1324 tree name_var
= SSA_NAME_VAR (ssa_name
);
1325 basic_block bb
= bb_for_stmt (def_stmt
);
1327 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
1328 if (!IS_EMPTY_STMT (def_stmt
)
1329 && flow_bb_inside_loop_p (loop
, bb
))
1331 /* If the block containing the statement defining the SSA_NAME
1332 is in the loop then it's necessary to find the definition
1333 outside the loop using the PHI nodes of the header. */
1335 bool updated
= false;
1337 for (phi
= phi_nodes (header_bb
); phi
; phi
= TREE_CHAIN (phi
))
1339 if (SSA_NAME_VAR (PHI_RESULT (phi
)) == name_var
)
1341 SET_VUSE_OP (vuses
, i
,
1342 PHI_ARG_DEF (phi
, preheader_e
->dest_idx
));
1347 gcc_assert (updated
);
1353 /* Function vect_update_ivs_after_vectorizer.
1355 "Advance" the induction variables of LOOP to the value they should take
1356 after the execution of LOOP. This is currently necessary because the
1357 vectorizer does not handle induction variables that are used after the
1358 loop. Such a situation occurs when the last iterations of LOOP are
1360 1. We introduced new uses after LOOP for IVs that were not originally used
1361 after LOOP: the IVs of LOOP are now used by an epilog loop.
1362 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1363 times, whereas the loop IVs should be bumped N times.
1366 - LOOP - a loop that is going to be vectorized. The last few iterations
1367 of LOOP were peeled.
1368 - NITERS - the number of iterations that LOOP executes (before it is
1369 vectorized). i.e, the number of times the ivs should be bumped.
1370 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1371 coming out from LOOP on which there are uses of the LOOP ivs
1372 (this is the path from LOOP->exit to epilog_loop->preheader).
1374 The new definitions of the ivs are placed in LOOP->exit.
1375 The phi args associated with the edge UPDATE_E in the bb
1376 UPDATE_E->dest are updated accordingly.
1378 Assumption 1: Like the rest of the vectorizer, this function assumes
1379 a single loop exit that has a single predecessor.
1381 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1382 organized in the same order.
1384 Assumption 3: The access function of the ivs is simple enough (see
1385 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1387 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1388 coming out of LOOP on which the ivs of LOOP are used (this is the path
1389 that leads to the epilog loop; other paths skip the epilog loop). This
1390 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1391 needs to have its phis updated.
1395 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo
, tree niters
,
1398 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1399 basic_block exit_bb
= loop
->single_exit
->dest
;
1401 basic_block update_bb
= update_e
->dest
;
1403 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1405 /* Make sure there exists a single-predecessor exit bb: */
1406 gcc_assert (single_pred_p (exit_bb
));
1408 for (phi
= phi_nodes (loop
->header
), phi1
= phi_nodes (update_bb
);
1410 phi
= PHI_CHAIN (phi
), phi1
= PHI_CHAIN (phi1
))
1412 tree access_fn
= NULL
;
1413 tree evolution_part
;
1416 tree var
, stmt
, ni
, ni_name
;
1417 block_stmt_iterator last_bsi
;
1419 /* Skip virtual phi's. */
1420 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
1422 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1423 fprintf (vect_dump
, "virtual phi. skip.");
1427 access_fn
= analyze_scalar_evolution (loop
, PHI_RESULT (phi
));
1428 gcc_assert (access_fn
);
1430 unshare_expr (evolution_part_in_loop_num (access_fn
, loop
->num
));
1431 gcc_assert (evolution_part
!= NULL_TREE
);
1433 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1434 of degree >= 2 or exponential. */
1435 gcc_assert (!tree_is_chrec (evolution_part
));
1437 step_expr
= evolution_part
;
1438 init_expr
= unshare_expr (initial_condition_in_loop_num (access_fn
,
1441 ni
= build2 (PLUS_EXPR
, TREE_TYPE (init_expr
),
1442 build2 (MULT_EXPR
, TREE_TYPE (niters
),
1443 niters
, step_expr
), init_expr
);
1445 var
= create_tmp_var (TREE_TYPE (init_expr
), "tmp");
1446 add_referenced_tmp_var (var
);
1448 ni_name
= force_gimple_operand (ni
, &stmt
, false, var
);
1450 /* Insert stmt into exit_bb. */
1451 last_bsi
= bsi_last (exit_bb
);
1453 bsi_insert_before (&last_bsi
, stmt
, BSI_SAME_STMT
);
1455 /* Fix phi expressions in the successor bb. */
1456 SET_PHI_ARG_DEF (phi1
, update_e
->dest_idx
, ni_name
);
1461 /* Function vect_do_peeling_for_loop_bound
1463 Peel the last iterations of the loop represented by LOOP_VINFO.
1464 The peeled iterations form a new epilog loop. Given that the loop now
1465 iterates NITERS times, the new epilog loop iterates
1466 NITERS % VECTORIZATION_FACTOR times.
1468 The original loop will later be made to iterate
1469 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
1472 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo
, tree
*ratio
,
1473 struct loops
*loops
)
1476 tree ni_name
, ratio_mult_vf_name
;
1477 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1478 struct loop
*new_loop
;
1480 basic_block preheader
;
1481 #ifdef ENABLE_CHECKING
1485 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1486 fprintf (vect_dump
, "=== vect_transtorm_for_unknown_loop_bound ===");
1488 /* Generate the following variables on the preheader of original loop:
1490 ni_name = number of iteration the original loop executes
1491 ratio = ni_name / vf
1492 ratio_mult_vf_name = ratio * vf */
1493 vect_generate_tmps_on_preheader (loop_vinfo
, &ni_name
,
1494 &ratio_mult_vf_name
, ratio
);
1496 #ifdef ENABLE_CHECKING
1497 loop_num
= loop
->num
;
1499 new_loop
= slpeel_tree_peel_loop_to_edge (loop
, loops
, loop
->single_exit
,
1500 ratio_mult_vf_name
, ni_name
, false);
1501 #ifdef ENABLE_CHECKING
1502 gcc_assert (new_loop
);
1503 gcc_assert (loop_num
== loop
->num
);
1504 slpeel_verify_cfg_after_peeling (loop
, new_loop
);
1507 /* A guard that controls whether the new_loop is to be executed or skipped
1508 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1509 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1510 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1511 is on the path where the LOOP IVs are used and need to be updated. */
1513 preheader
= loop_preheader_edge (new_loop
)->src
;
1514 if (EDGE_PRED (preheader
, 0)->src
== loop
->single_exit
->dest
)
1515 update_e
= EDGE_PRED (preheader
, 0);
1517 update_e
= EDGE_PRED (preheader
, 1);
1519 /* Update IVs of original loop as if they were advanced
1520 by ratio_mult_vf_name steps. */
1521 vect_update_ivs_after_vectorizer (loop_vinfo
, ratio_mult_vf_name
, update_e
);
1523 /* After peeling we have to reset scalar evolution analyzer. */
1530 /* Function vect_gen_niters_for_prolog_loop
1532 Set the number of iterations for the loop represented by LOOP_VINFO
1533 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1534 and the misalignment of DR - the data reference recorded in
1535 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
1536 this loop, the data reference DR will refer to an aligned location.
1538 The following computation is generated:
1540 If the misalignment of DR is known at compile time:
1541 addr_mis = int mis = DR_MISALIGNMENT (dr);
1542 Else, compute address misalignment in bytes:
1543 addr_mis = addr & (vectype_size - 1)
1545 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
1547 (elem_size = element type size; an element is the scalar element
1548 whose type is the inner type of the vectype) */
1551 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo
, tree loop_niters
)
1553 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
1554 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1555 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1557 tree iters
, iters_name
;
1560 tree dr_stmt
= DR_STMT (dr
);
1561 stmt_vec_info stmt_info
= vinfo_for_stmt (dr_stmt
);
1562 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1563 int vectype_align
= TYPE_ALIGN (vectype
) / BITS_PER_UNIT
;
1564 tree vf_minus_1
= build_int_cst (unsigned_type_node
, vf
- 1);
1565 tree niters_type
= TREE_TYPE (loop_niters
);
1567 pe
= loop_preheader_edge (loop
);
1569 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) > 0)
1571 int byte_misalign
= LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
);
1572 int element_size
= vectype_align
/vf
;
1573 int elem_misalign
= byte_misalign
/ element_size
;
1575 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1576 fprintf (vect_dump
, "known alignment = %d.", byte_misalign
);
1577 iters
= build_int_cst (niters_type
, (vf
- elem_misalign
)&(vf
-1));
1581 tree new_stmts
= NULL_TREE
;
1583 vect_create_addr_base_for_vector_ref (dr_stmt
, &new_stmts
, NULL_TREE
);
1584 tree ptr_type
= TREE_TYPE (start_addr
);
1585 tree size
= TYPE_SIZE (ptr_type
);
1586 tree type
= lang_hooks
.types
.type_for_size (tree_low_cst (size
, 1), 1);
1587 tree vectype_size_minus_1
= build_int_cst (type
, vectype_align
- 1);
1588 tree elem_size_log
=
1589 build_int_cst (unsigned_type_node
, exact_log2 (vectype_align
/vf
));
1590 tree vf_tree
= build_int_cst (unsigned_type_node
, vf
);
1594 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmts
);
1595 gcc_assert (!new_bb
);
1597 /* Create: byte_misalign = addr & (vectype_size - 1) */
1599 build2 (BIT_AND_EXPR
, type
, start_addr
, vectype_size_minus_1
);
1601 /* Create: elem_misalign = byte_misalign / element_size */
1603 build2 (RSHIFT_EXPR
, unsigned_type_node
, byte_misalign
, elem_size_log
);
1605 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
1606 iters
= build2 (MINUS_EXPR
, unsigned_type_node
, vf_tree
, elem_misalign
);
1607 iters
= build2 (BIT_AND_EXPR
, unsigned_type_node
, iters
, vf_minus_1
);
1608 iters
= fold_convert (niters_type
, iters
);
1611 /* Create: prolog_loop_niters = min (iters, loop_niters) */
1612 /* If the loop bound is known at compile time we already verified that it is
1613 greater than vf; since the misalignment ('iters') is at most vf, there's
1614 no need to generate the MIN_EXPR in this case. */
1615 if (TREE_CODE (loop_niters
) != INTEGER_CST
)
1616 iters
= build2 (MIN_EXPR
, niters_type
, iters
, loop_niters
);
1618 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1620 fprintf (vect_dump
, "niters for prolog loop: ");
1621 print_generic_expr (vect_dump
, iters
, TDF_SLIM
);
1624 var
= create_tmp_var (niters_type
, "prolog_loop_niters");
1625 add_referenced_tmp_var (var
);
1626 iters_name
= force_gimple_operand (iters
, &stmt
, false, var
);
1628 /* Insert stmt on loop preheader edge. */
1631 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
1632 gcc_assert (!new_bb
);
1639 /* Function vect_update_init_of_dr
1641 NITERS iterations were peeled from LOOP. DR represents a data reference
1642 in LOOP. This function updates the information recorded in DR to
1643 account for the fact that the first NITERS iterations had already been
1644 executed. Specifically, it updates the OFFSET field of stmt_info. */
1647 vect_update_init_of_dr (struct data_reference
*dr
, tree niters
)
1649 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
1650 tree offset
= STMT_VINFO_VECT_INIT_OFFSET (stmt_info
);
1652 niters
= fold (build2 (MULT_EXPR
, TREE_TYPE (niters
), niters
,
1653 STMT_VINFO_VECT_STEP (stmt_info
)));
1654 offset
= fold (build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
, niters
));
1655 STMT_VINFO_VECT_INIT_OFFSET (stmt_info
) = offset
;
1659 /* Function vect_update_inits_of_drs
1661 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1662 This function updates the information recorded for the data references in
1663 the loop to account for the fact that the first NITERS iterations had
1664 already been executed. Specifically, it updates the initial_condition of the
1665 access_function of all the data_references in the loop. */
1668 vect_update_inits_of_drs (loop_vec_info loop_vinfo
, tree niters
)
1671 varray_type loop_write_datarefs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
1672 varray_type loop_read_datarefs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
1674 if (vect_dump
&& (dump_flags
& TDF_DETAILS
))
1675 fprintf (vect_dump
, "=== vect_update_inits_of_dr ===");
1677 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
1679 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
1680 vect_update_init_of_dr (dr
, niters
);
1683 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_datarefs
); i
++)
1685 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_read_datarefs
, i
);
1686 vect_update_init_of_dr (dr
, niters
);
1691 /* Function vect_do_peeling_for_alignment
1693 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1694 'niters' is set to the misalignment of one of the data references in the
1695 loop, thereby forcing it to refer to an aligned location at the beginning
1696 of the execution of this loop. The data reference for which we are
1697 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
1700 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo
, struct loops
*loops
)
1702 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1703 tree niters_of_prolog_loop
, ni_name
;
1705 struct loop
*new_loop
;
1707 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1708 fprintf (vect_dump
, "=== vect_do_peeling_for_alignment ===");
1710 ni_name
= vect_build_loop_niters (loop_vinfo
);
1711 niters_of_prolog_loop
= vect_gen_niters_for_prolog_loop (loop_vinfo
, ni_name
);
1713 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
1715 slpeel_tree_peel_loop_to_edge (loop
, loops
, loop_preheader_edge (loop
),
1716 niters_of_prolog_loop
, ni_name
, true);
1717 #ifdef ENABLE_CHECKING
1718 gcc_assert (new_loop
);
1719 slpeel_verify_cfg_after_peeling (new_loop
, loop
);
1722 /* Update number of times loop executes. */
1723 n_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
1724 LOOP_VINFO_NITERS (loop_vinfo
) = fold (build2 (MINUS_EXPR
,
1725 TREE_TYPE (n_iters
), n_iters
, niters_of_prolog_loop
));
1727 /* Update the init conditions of the access functions of all data refs. */
1728 vect_update_inits_of_drs (loop_vinfo
, niters_of_prolog_loop
);
1730 /* After peeling we have to reset scalar evolution analyzer. */
1737 /* Function vect_transform_loop.
1739 The analysis phase has determined that the loop is vectorizable.
1740 Vectorize the loop - created vectorized stmts to replace the scalar
1741 stmts in the loop, and update the loop exit condition. */
1744 vect_transform_loop (loop_vec_info loop_vinfo
,
1745 struct loops
*loops ATTRIBUTE_UNUSED
)
1747 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1748 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1749 int nbbs
= loop
->num_nodes
;
1750 block_stmt_iterator si
;
1753 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1755 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1756 fprintf (vect_dump
, "=== vec_transform_loop ===");
1759 /* Peel the loop if there are data refs with unknown alignment.
1760 Only one data ref with unknown store is allowed. */
1762 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
1763 vect_do_peeling_for_alignment (loop_vinfo
, loops
);
1765 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
1766 compile time constant), or it is a constant that doesn't divide by the
1767 vectorization factor, then an epilog loop needs to be created.
1768 We therefore duplicate the loop: the original loop will be vectorized,
1769 and will compute the first (n/VF) iterations. The second copy of the loop
1770 will remain scalar and will compute the remaining (n%VF) iterations.
1771 (VF is the vectorization factor). */
1773 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1774 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1775 && LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0))
1776 vect_do_peeling_for_loop_bound (loop_vinfo
, &ratio
, loops
);
1778 ratio
= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
)),
1779 LOOP_VINFO_INT_NITERS (loop_vinfo
) / vectorization_factor
);
1781 /* 1) Make sure the loop header has exactly two entries
1782 2) Make sure we have a preheader basic block. */
1784 gcc_assert (EDGE_COUNT (loop
->header
->preds
) == 2);
1786 loop_split_edge_with (loop_preheader_edge (loop
), NULL
);
1789 /* FORNOW: the vectorizer supports only loops which body consist
1790 of one basic block (header + empty latch). When the vectorizer will
1791 support more involved loop forms, the order by which the BBs are
1792 traversed need to be reconsidered. */
1794 for (i
= 0; i
< nbbs
; i
++)
1796 basic_block bb
= bbs
[i
];
1798 for (si
= bsi_start (bb
); !bsi_end_p (si
);)
1800 tree stmt
= bsi_stmt (si
);
1801 stmt_vec_info stmt_info
;
1804 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1806 fprintf (vect_dump
, "------>vectorizing statement: ");
1807 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
1809 stmt_info
= vinfo_for_stmt (stmt
);
1810 gcc_assert (stmt_info
);
1811 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1816 #ifdef ENABLE_CHECKING
1817 /* FORNOW: Verify that all stmts operate on the same number of
1818 units and no inner unrolling is necessary. */
1820 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info
)))
1821 == vectorization_factor
);
1823 /* -------- vectorize statement ------------ */
1824 if (vect_print_dump_info (REPORT_DETAILS
, UNKNOWN_LOC
))
1825 fprintf (vect_dump
, "transform statement.");
1827 is_store
= vect_transform_stmt (stmt
, &si
);
1830 /* free the attached stmt_vec_info and remove the stmt. */
1831 stmt_ann_t ann
= stmt_ann (stmt
);
1833 set_stmt_info (ann
, NULL
);
1842 slpeel_make_loop_iterate_ntimes (loop
, ratio
);
1844 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS
, LOOP_LOC (loop_vinfo
)))
1845 fprintf (vect_dump
, "LOOP VECTORIZED.");