1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (tree
, block_stmt_iterator
*);
50 static void vect_align_data_ref (tree
);
51 static tree
vect_create_destination_var (tree
, tree
);
52 static tree vect_create_data_ref_ptr
53 (tree
, block_stmt_iterator
*, tree
, tree
*, bool);
54 static tree
vect_create_addr_base_for_vector_ref (tree
, tree
*, tree
);
55 static tree
vect_get_new_vect_var (tree
, enum vect_var_kind
, const char *);
56 static tree
vect_get_vec_def_for_operand (tree
, tree
, tree
*);
57 static tree
vect_init_vector (tree
, tree
);
58 static void vect_finish_stmt_generation
59 (tree stmt
, tree vec_stmt
, block_stmt_iterator
*bsi
);
60 static bool vect_is_simple_cond (tree
, loop_vec_info
);
61 static void update_vuses_to_preheader (tree
, struct loop
*);
62 static tree
get_initial_def_for_reduction (tree
, tree
, tree
*);
64 /* Utility function dealing with loop peeling (not peeling itself). */
65 static void vect_generate_tmps_on_preheader
66 (loop_vec_info
, tree
*, tree
*, tree
*);
67 static tree
vect_build_loop_niters (loop_vec_info
);
68 static void vect_update_ivs_after_vectorizer (loop_vec_info
, tree
, edge
);
69 static tree
vect_gen_niters_for_prolog_loop (loop_vec_info
, tree
);
70 static void vect_update_init_of_dr (struct data_reference
*, tree niters
);
71 static void vect_update_inits_of_drs (loop_vec_info
, tree
);
72 static void vect_do_peeling_for_alignment (loop_vec_info
, struct loops
*);
73 static void vect_do_peeling_for_loop_bound
74 (loop_vec_info
, tree
*, struct loops
*);
75 static int vect_min_worthwhile_factor (enum tree_code
);
78 /* Function vect_get_new_vect_var.
80 Returns a name for a new variable. The current naming scheme appends the
81 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
82 the name of vectorizer generated variables, and appends that to NAME if
86 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
99 case vect_pointer_var
:
107 new_vect_var
= create_tmp_var (type
, concat (prefix
, name
, NULL
));
109 new_vect_var
= create_tmp_var (type
, prefix
);
115 /* Function vect_create_addr_base_for_vector_ref.
117 Create an expression that computes the address of the first memory location
118 that will be accessed for a data reference.
121 STMT: The statement containing the data reference.
122 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
123 OFFSET: Optional. If supplied, it is be added to the initial address.
126 1. Return an SSA_NAME whose value is the address of the memory location of
127 the first vector of the data reference.
128 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
129 these statement(s) which define the returned SSA_NAME.
131 FORNOW: We are only handling array accesses with step 1. */
134 vect_create_addr_base_for_vector_ref (tree stmt
,
138 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
139 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
140 tree data_ref_base
= unshare_expr (DR_BASE_ADDRESS (dr
));
141 tree base_name
= build_fold_indirect_ref (data_ref_base
);
142 tree ref
= DR_REF (dr
);
143 tree scalar_type
= TREE_TYPE (ref
);
144 tree scalar_ptr_type
= build_pointer_type (scalar_type
);
147 tree addr_base
, addr_expr
;
149 tree base_offset
= unshare_expr (DR_OFFSET (dr
));
150 tree init
= unshare_expr (DR_INIT (dr
));
152 /* Create base_offset */
153 base_offset
= size_binop (PLUS_EXPR
, base_offset
, init
);
154 dest
= create_tmp_var (TREE_TYPE (base_offset
), "base_off");
155 add_referenced_tmp_var (dest
);
156 base_offset
= force_gimple_operand (base_offset
, &new_stmt
, false, dest
);
157 append_to_statement_list_force (new_stmt
, new_stmt_list
);
161 tree tmp
= create_tmp_var (TREE_TYPE (base_offset
), "offset");
162 add_referenced_tmp_var (tmp
);
163 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (offset
), offset
,
165 base_offset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base_offset
),
166 base_offset
, offset
);
167 base_offset
= force_gimple_operand (base_offset
, &new_stmt
, false, tmp
);
168 append_to_statement_list_force (new_stmt
, new_stmt_list
);
171 /* base + base_offset */
172 addr_base
= fold_build2 (PLUS_EXPR
, TREE_TYPE (data_ref_base
), data_ref_base
,
175 /* addr_expr = addr_base */
176 addr_expr
= vect_get_new_vect_var (scalar_ptr_type
, vect_pointer_var
,
177 get_name (base_name
));
178 add_referenced_tmp_var (addr_expr
);
179 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, addr_expr
, addr_base
);
180 new_temp
= make_ssa_name (addr_expr
, vec_stmt
);
181 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
182 append_to_statement_list_force (vec_stmt
, new_stmt_list
);
184 if (vect_print_dump_info (REPORT_DETAILS
))
186 fprintf (vect_dump
, "created ");
187 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
193 /* Function vect_align_data_ref.
195 Handle misalignment of a memory accesses.
197 FORNOW: Can't handle misaligned accesses.
198 Make sure that the dataref is aligned. */
201 vect_align_data_ref (tree stmt
)
203 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
204 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
206 /* FORNOW: can't handle misaligned accesses;
207 all accesses expected to be aligned. */
208 gcc_assert (aligned_access_p (dr
));
212 /* Function vect_create_data_ref_ptr.
214 Create a memory reference expression for vector access, to be used in a
215 vector load/store stmt. The reference is based on a new pointer to vector
219 1. STMT: a stmt that references memory. Expected to be of the form
220 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
221 2. BSI: block_stmt_iterator where new stmts can be added.
222 3. OFFSET (optional): an offset to be added to the initial address accessed
223 by the data-ref in STMT.
224 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
225 pointing to the initial address.
228 1. Declare a new ptr to vector_type, and have it point to the base of the
229 data reference (initial addressed accessed by the data reference).
230 For example, for vector of type V8HI, the following code is generated:
233 vp = (v8hi *)initial_address;
235 if OFFSET is not supplied:
236 initial_address = &a[init];
237 if OFFSET is supplied:
238 initial_address = &a[init + OFFSET];
240 Return the initial_address in INITIAL_ADDRESS.
242 2. If ONLY_INIT is true, return the initial pointer. Otherwise, create
243 a data-reference in the loop based on the new vector pointer vp. This
244 new data reference will by some means be updated each iteration of
245 the loop. Return the pointer vp'.
247 FORNOW: handle only aligned and consecutive accesses. */
250 vect_create_data_ref_ptr (tree stmt
,
251 block_stmt_iterator
*bsi ATTRIBUTE_UNUSED
,
252 tree offset
, tree
*initial_address
, bool only_init
)
255 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
256 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
257 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
258 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
264 tree new_stmt_list
= NULL_TREE
;
265 edge pe
= loop_preheader_edge (loop
);
268 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
270 base_name
= build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr
)));
272 if (vect_print_dump_info (REPORT_DETAILS
))
274 tree data_ref_base
= base_name
;
275 fprintf (vect_dump
, "create vector-pointer variable to type: ");
276 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
277 if (TREE_CODE (data_ref_base
) == VAR_DECL
)
278 fprintf (vect_dump
, " vectorizing a one dimensional array ref: ");
279 else if (TREE_CODE (data_ref_base
) == ARRAY_REF
)
280 fprintf (vect_dump
, " vectorizing a multidimensional array ref: ");
281 else if (TREE_CODE (data_ref_base
) == COMPONENT_REF
)
282 fprintf (vect_dump
, " vectorizing a record based array ref: ");
283 else if (TREE_CODE (data_ref_base
) == SSA_NAME
)
284 fprintf (vect_dump
, " vectorizing a pointer ref: ");
285 print_generic_expr (vect_dump
, base_name
, TDF_SLIM
);
288 /** (1) Create the new vector-pointer variable: **/
290 vect_ptr_type
= build_pointer_type (vectype
);
291 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
292 get_name (base_name
));
293 add_referenced_tmp_var (vect_ptr
);
296 /** (2) Add aliasing information to the new vector-pointer:
297 (The points-to info (DR_PTR_INFO) may be defined later.) **/
299 tag
= DR_MEMTAG (dr
);
302 /* If tag is a variable (and NOT_A_TAG) than a new type alias
303 tag must be created with tag added to its may alias list. */
305 new_type_alias (vect_ptr
, tag
);
307 var_ann (vect_ptr
)->type_mem_tag
= tag
;
309 var_ann (vect_ptr
)->subvars
= DR_SUBVARS (dr
);
311 /** (3) Calculate the initial address the vector-pointer, and set
312 the vector-pointer to point to it before the loop: **/
314 /* Create: (&(base[init_val+offset]) in the loop preheader. */
315 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
317 pe
= loop_preheader_edge (loop
);
318 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt_list
);
319 gcc_assert (!new_bb
);
320 *initial_address
= new_temp
;
322 /* Create: p = (vectype *) initial_base */
323 vec_stmt
= fold_convert (vect_ptr_type
, new_temp
);
324 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, vect_ptr
, vec_stmt
);
325 vect_ptr_init
= make_ssa_name (vect_ptr
, vec_stmt
);
326 TREE_OPERAND (vec_stmt
, 0) = vect_ptr_init
;
327 new_bb
= bsi_insert_on_edge_immediate (pe
, vec_stmt
);
328 gcc_assert (!new_bb
);
331 /** (4) Handle the updating of the vector-pointer inside the loop: **/
333 if (only_init
) /* No update in loop is required. */
335 /* Copy the points-to information if it exists. */
336 if (DR_PTR_INFO (dr
))
337 duplicate_ssa_name_ptr_info (vect_ptr_init
, DR_PTR_INFO (dr
));
338 return vect_ptr_init
;
342 block_stmt_iterator incr_bsi
;
344 tree indx_before_incr
, indx_after_incr
;
347 standard_iv_increment_position (loop
, &incr_bsi
, &insert_after
);
348 create_iv (vect_ptr_init
,
349 fold_convert (vect_ptr_type
, TYPE_SIZE_UNIT (vectype
)),
350 NULL_TREE
, loop
, &incr_bsi
, insert_after
,
351 &indx_before_incr
, &indx_after_incr
);
352 incr
= bsi_stmt (incr_bsi
);
353 set_stmt_info ((tree_ann_t
)stmt_ann (incr
),
354 new_stmt_vec_info (incr
, loop_vinfo
));
356 /* Copy the points-to information if it exists. */
357 if (DR_PTR_INFO (dr
))
359 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
360 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
362 merge_alias_info (vect_ptr_init
, indx_before_incr
);
363 merge_alias_info (vect_ptr_init
, indx_after_incr
);
365 return indx_before_incr
;
370 /* Function vect_create_destination_var.
372 Create a new temporary of type VECTYPE. */
375 vect_create_destination_var (tree scalar_dest
, tree vectype
)
378 const char *new_name
;
380 enum vect_var_kind kind
;
382 kind
= vectype
? vect_simple_var
: vect_scalar_var
;
383 type
= vectype
? vectype
: TREE_TYPE (scalar_dest
);
385 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
387 new_name
= get_name (scalar_dest
);
390 vec_dest
= vect_get_new_vect_var (type
, vect_simple_var
, new_name
);
391 add_referenced_tmp_var (vec_dest
);
397 /* Function vect_init_vector.
399 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
400 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
401 used in the vectorization of STMT. */
404 vect_init_vector (tree stmt
, tree vector_var
)
406 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
407 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
408 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
411 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
417 new_var
= vect_get_new_vect_var (vectype
, vect_simple_var
, "cst_");
418 add_referenced_tmp_var (new_var
);
420 init_stmt
= build2 (MODIFY_EXPR
, vectype
, new_var
, vector_var
);
421 new_temp
= make_ssa_name (new_var
, init_stmt
);
422 TREE_OPERAND (init_stmt
, 0) = new_temp
;
424 pe
= loop_preheader_edge (loop
);
425 new_bb
= bsi_insert_on_edge_immediate (pe
, init_stmt
);
426 gcc_assert (!new_bb
);
428 if (vect_print_dump_info (REPORT_DETAILS
))
430 fprintf (vect_dump
, "created new init_stmt: ");
431 print_generic_expr (vect_dump
, init_stmt
, TDF_SLIM
);
434 vec_oprnd
= TREE_OPERAND (init_stmt
, 0);
439 /* Function vect_get_vec_def_for_operand.
441 OP is an operand in STMT. This function returns a (vector) def that will be
442 used in the vectorized stmt for STMT.
444 In the case that OP is an SSA_NAME which is defined in the loop, then
445 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
447 In case OP is an invariant or constant, a new stmt that creates a vector def
448 needs to be introduced. */
451 vect_get_vec_def_for_operand (tree op
, tree stmt
, tree
*scalar_def
)
456 stmt_vec_info def_stmt_info
= NULL
;
457 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
458 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
459 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
460 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
461 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
467 enum vect_def_type dt
;
470 if (vect_print_dump_info (REPORT_DETAILS
))
472 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
473 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
476 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
477 gcc_assert (is_simple_use
);
478 if (vect_print_dump_info (REPORT_DETAILS
))
482 fprintf (vect_dump
, "def = ");
483 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
487 fprintf (vect_dump
, " def_stmt = ");
488 print_generic_expr (vect_dump
, def_stmt
, TDF_SLIM
);
494 /* Case 1: operand is a constant. */
495 case vect_constant_def
:
500 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
501 if (vect_print_dump_info (REPORT_DETAILS
))
502 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
504 for (i
= nunits
- 1; i
>= 0; --i
)
506 t
= tree_cons (NULL_TREE
, op
, t
);
508 vec_cst
= build_vector (vectype
, t
);
509 return vect_init_vector (stmt
, vec_cst
);
512 /* Case 2: operand is defined outside the loop - loop invariant. */
513 case vect_invariant_def
:
518 /* Create 'vec_inv = {inv,inv,..,inv}' */
519 if (vect_print_dump_info (REPORT_DETAILS
))
520 fprintf (vect_dump
, "Create vector_inv.");
522 for (i
= nunits
- 1; i
>= 0; --i
)
524 t
= tree_cons (NULL_TREE
, def
, t
);
527 /* FIXME: use build_constructor directly. */
528 vec_inv
= build_constructor_from_list (vectype
, t
);
529 return vect_init_vector (stmt
, vec_inv
);
532 /* Case 3: operand is defined inside the loop. */
536 *scalar_def
= def_stmt
;
538 /* Get the def from the vectorized stmt. */
539 def_stmt_info
= vinfo_for_stmt (def_stmt
);
540 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
541 gcc_assert (vec_stmt
);
542 vec_oprnd
= TREE_OPERAND (vec_stmt
, 0);
546 /* Case 4: operand is defined by a loop header phi - reduction */
547 case vect_reduction_def
:
549 gcc_assert (TREE_CODE (def_stmt
) == PHI_NODE
);
551 /* Get the def before the loop */
552 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
553 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
556 /* Case 5: operand is defined by loop-header phi - induction. */
557 case vect_induction_def
:
559 if (vect_print_dump_info (REPORT_DETAILS
))
560 fprintf (vect_dump
, "induction - unsupported.");
561 internal_error ("no support for induction"); /* FORNOW */
570 /* Function vect_finish_stmt_generation.
572 Insert a new stmt. */
575 vect_finish_stmt_generation (tree stmt
, tree vec_stmt
, block_stmt_iterator
*bsi
)
577 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
579 if (vect_print_dump_info (REPORT_DETAILS
))
581 fprintf (vect_dump
, "add new stmt: ");
582 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
585 /* Make sure bsi points to the stmt that is being vectorized. */
586 gcc_assert (stmt
== bsi_stmt (*bsi
));
588 #ifdef USE_MAPPED_LOCATION
589 SET_EXPR_LOCATION (vec_stmt
, EXPR_LOCATION (stmt
));
591 SET_EXPR_LOCUS (vec_stmt
, EXPR_LOCUS (stmt
));
596 #define ADJUST_IN_EPILOG 1
598 /* Function get_initial_def_for_reduction
601 STMT - a stmt that performs a reduction operation in the loop.
602 INIT_VAL - the initial value of the reduction variable
605 SCALAR_DEF - a tree that holds a value to be added to the final result
606 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
607 Return a vector variable, initialized according to the operation that STMT
608 performs. This vector will be used as the initial value of the
609 vector of partial results.
611 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
614 min/max: [init_val,init_val,..,init_val,init_val]
615 bit and/or: [init_val,init_val,..,init_val,init_val]
616 and when necessary (e.g. add/mult case) let the caller know
617 that it needs to adjust the result by init_val.
619 Option2: Initialize the vector as follows:
620 add: [0,0,...,0,init_val]
621 mult: [1,1,...,1,init_val]
622 min/max: [init_val,init_val,...,init_val]
623 bit and/or: [init_val,init_val,...,init_val]
624 and no adjustments are needed.
626 For example, for the following code:
632 STMT is 's = s + a[i]', and the reduction variable is 's'.
633 For a vector of 4 units, we want to return either [0,0,0,init_val],
634 or [0,0,0,0] and let the caller know that it needs to adjust
635 the result at the end by 'init_val'.
637 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
638 TODO: Use some cost-model to estimate which scheme is more profitable.
642 get_initial_def_for_reduction (tree stmt
, tree init_val
, tree
*scalar_def
)
644 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
645 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
646 int nunits
= GET_MODE_NUNITS (TYPE_MODE (vectype
));
648 enum tree_code code
= TREE_CODE (TREE_OPERAND (stmt
, 1));
649 tree type
= TREE_TYPE (init_val
);
651 tree vec
, t
= NULL_TREE
;
652 bool need_epilog_adjust
;
655 gcc_assert (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
));
660 if (INTEGRAL_TYPE_P (type
))
661 def
= build_int_cst (type
, 0);
663 def
= build_real (type
, dconst0
);
665 #ifdef ADJUST_IN_EPILOG
666 /* All the 'nunits' elements are set to 0. The final result will be
667 adjusted by 'init_val' at the loop epilog. */
669 need_epilog_adjust
= true;
671 /* 'nunits - 1' elements are set to 0; The last element is set to
672 'init_val'. No further adjustments at the epilog are needed. */
673 nelements
= nunits
- 1;
674 need_epilog_adjust
= false;
682 need_epilog_adjust
= false;
689 for (i
= nelements
- 1; i
>= 0; --i
)
690 t
= tree_cons (NULL_TREE
, def
, t
);
692 if (nelements
== nunits
- 1)
694 /* Set the last element of the vector. */
695 t
= tree_cons (NULL_TREE
, init_val
, t
);
698 gcc_assert (nelements
== nunits
);
700 if (TREE_CODE (init_val
) == INTEGER_CST
|| TREE_CODE (init_val
) == REAL_CST
)
701 vec
= build_vector (vectype
, t
);
703 vec
= build_constructor_from_list (vectype
, t
);
705 if (!need_epilog_adjust
)
706 *scalar_def
= NULL_TREE
;
708 *scalar_def
= init_val
;
710 return vect_init_vector (stmt
, vec
);
714 /* Function vect_create_epilog_for_reduction:
716 Create code at the loop-epilog to finalize the result of a reduction
719 LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it
720 into a single result, by applying the operation REDUC_CODE on the
721 partial-results-vector. For this, we need to create a new phi node at the
722 loop exit to preserve loop-closed form, as illustrated below.
724 STMT is the original scalar reduction stmt that is being vectorized.
725 REDUCTION_OP is the scalar reduction-variable.
726 REDUCTION_PHI is the phi-node that carries the reduction computation.
727 This function also sets the arguments for the REDUCTION_PHI:
728 The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP.
729 The loop-latch argument is VECT_DEF - the vector of partial sums.
731 This function transforms this:
734 vec_def = phi <null, null> # REDUCTION_PHI
739 s_out0 = phi <s_loop> # EXIT_PHI
747 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
752 s_out0 = phi <s_loop> # EXIT_PHI
753 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
755 v_out2 = reduc_expr <v_out1>
756 s_out3 = extract_field <v_out2, 0>
763 vect_create_epilog_for_reduction (tree vect_def
, tree stmt
, tree reduction_op
,
764 enum tree_code reduc_code
, tree reduction_phi
)
766 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
767 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
768 enum machine_mode mode
= TYPE_MODE (vectype
);
769 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
770 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
772 tree scalar_dest
= TREE_OPERAND (stmt
, 0);
773 tree scalar_type
= TREE_TYPE (scalar_dest
);
775 block_stmt_iterator exit_bsi
;
780 tree new_scalar_dest
, exit_phi
;
781 tree bitsize
, bitpos
, bytesize
;
782 enum tree_code code
= TREE_CODE (TREE_OPERAND (stmt
, 1));
783 tree scalar_initial_def
;
784 tree vec_initial_def
;
786 imm_use_iterator imm_iter
;
788 bool extract_scalar_result
;
790 /*** 1. Create the reduction def-use cycle ***/
792 /* 1.1 set the loop-entry arg of the reduction-phi: */
793 /* For the case of reduction, vect_get_vec_def_for_operand returns
794 the scalar def before the loop, that defines the initial value
795 of the reduction variable. */
796 vec_initial_def
= vect_get_vec_def_for_operand (reduction_op
, stmt
,
797 &scalar_initial_def
);
798 add_phi_arg (reduction_phi
, vec_initial_def
, loop_preheader_edge (loop
));
801 /* 1.2 set the loop-latch arg for the reduction-phi: */
802 add_phi_arg (reduction_phi
, vect_def
, loop_latch_edge (loop
));
804 if (vect_print_dump_info (REPORT_DETAILS
))
806 fprintf (vect_dump
, "transform reduction: created def-use cycle:");
807 print_generic_expr (vect_dump
, reduction_phi
, TDF_SLIM
);
808 fprintf (vect_dump
, "\n");
809 print_generic_expr (vect_dump
, SSA_NAME_DEF_STMT (vect_def
), TDF_SLIM
);
813 /*** 2. Create epilog code ***/
815 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
816 v_out1 = phi <v_loop> */
818 exit_bb
= loop
->single_exit
->dest
;
819 new_phi
= create_phi_node (SSA_NAME_VAR (vect_def
), exit_bb
);
820 SET_PHI_ARG_DEF (new_phi
, loop
->single_exit
->dest_idx
, vect_def
);
822 exit_bsi
= bsi_start (exit_bb
);
825 new_scalar_dest
= vect_create_destination_var (scalar_dest
, NULL
);
826 bitsize
= TYPE_SIZE (scalar_type
);
827 bytesize
= TYPE_SIZE_UNIT (scalar_type
);
829 /* 2.2 Create the reduction code. */
831 if (reduc_code
< NUM_TREE_CODES
)
834 v_out2 = reduc_expr <v_out1> */
836 if (vect_print_dump_info (REPORT_DETAILS
))
837 fprintf (vect_dump
, "Reduce using direct vector reduction.");
839 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
840 epilog_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
841 build1 (reduc_code
, vectype
, PHI_RESULT (new_phi
)));
842 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
843 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
844 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
846 extract_scalar_result
= true;
850 enum tree_code shift_code
= 0;
851 bool have_whole_vector_shift
= true;
852 enum tree_code code
= TREE_CODE (TREE_OPERAND (stmt
, 1)); /* CHECKME */
854 int element_bitsize
= tree_low_cst (bitsize
, 1);
855 int vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
858 /* The result of the reduction is expected to be at the least
859 significant bits of the vector. This is merely convention,
860 as it's the extraction later that really matters, and that
861 is also under our control. */
862 if (vec_shr_optab
->handlers
[mode
].insn_code
!= CODE_FOR_nothing
)
863 shift_code
= VEC_RSHIFT_EXPR
;
865 have_whole_vector_shift
= false;
867 /* Regardless of whether we have a whole vector shift, if we're
868 emulating the operation via tree-vect-generic, we don't want
869 to use it. Only the first round of the reduction is likely
870 to still be profitable via emulation. */
871 /* ??? It might be better to emit a reduction tree code here, so that
872 tree-vect-generic can expand the first round via bit tricks. */
873 if (!VECTOR_MODE_P (mode
))
874 have_whole_vector_shift
= false;
877 optab optab
= optab_for_tree_code (code
, vectype
);
878 if (optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
879 have_whole_vector_shift
= false;
882 if (have_whole_vector_shift
)
885 for (offset = VS/2; offset >= element_size; offset/=2)
887 Create: va' = vec_shift <va, offset>
888 Create: va = vop <va, va'>
891 if (vect_print_dump_info (REPORT_DETAILS
))
892 fprintf (vect_dump
, "Reduce using vector shifts");
894 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
895 new_temp
= PHI_RESULT (new_phi
);
897 for (bit_offset
= vec_size_in_bits
/2;
898 bit_offset
>= element_bitsize
;
901 tree bitpos
= size_int (bit_offset
);
903 epilog_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
904 build2 (shift_code
, vectype
, new_temp
, bitpos
));
905 new_name
= make_ssa_name (vec_dest
, epilog_stmt
);
906 TREE_OPERAND (epilog_stmt
, 0) = new_name
;
907 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
908 if (vect_print_dump_info (REPORT_DETAILS
))
909 print_generic_expr (vect_dump
, epilog_stmt
, TDF_SLIM
);
912 epilog_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
913 build2 (code
, vectype
, new_name
, new_temp
));
914 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
915 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
916 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
917 if (vect_print_dump_info (REPORT_DETAILS
))
918 print_generic_expr (vect_dump
, epilog_stmt
, TDF_SLIM
);
921 extract_scalar_result
= true;
929 s = extract_field <v_out2, 0>
930 for (offset=element_size; offset<vector_size; offset+=element_size;)
932 Create: s' = extract_field <v_out2, offset>
933 Create: s = op <s, s'>
936 if (vect_print_dump_info (REPORT_DETAILS
))
937 fprintf (vect_dump
, "Reduce using scalar code. ");
939 vec_temp
= PHI_RESULT (new_phi
);
940 vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
942 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
945 BIT_FIELD_REF_UNSIGNED (rhs
) = TYPE_UNSIGNED (scalar_type
);
946 epilog_stmt
= build2 (MODIFY_EXPR
, scalar_type
, new_scalar_dest
,
948 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
949 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
950 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
951 if (vect_print_dump_info (REPORT_DETAILS
))
952 print_generic_expr (vect_dump
, epilog_stmt
, TDF_SLIM
);
954 for (bit_offset
= element_bitsize
;
955 bit_offset
< vec_size_in_bits
;
956 bit_offset
+= element_bitsize
)
958 tree bitpos
= bitsize_int (bit_offset
);
959 tree rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
962 BIT_FIELD_REF_UNSIGNED (rhs
) = TYPE_UNSIGNED (scalar_type
);
963 epilog_stmt
= build2 (MODIFY_EXPR
, scalar_type
, new_scalar_dest
,
965 new_name
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
966 TREE_OPERAND (epilog_stmt
, 0) = new_name
;
967 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
968 if (vect_print_dump_info (REPORT_DETAILS
))
969 print_generic_expr (vect_dump
, epilog_stmt
, TDF_SLIM
);
972 epilog_stmt
= build2 (MODIFY_EXPR
, scalar_type
, new_scalar_dest
,
973 build2 (code
, scalar_type
, new_name
, new_temp
));
974 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
975 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
976 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
977 if (vect_print_dump_info (REPORT_DETAILS
))
978 print_generic_expr (vect_dump
, epilog_stmt
, TDF_SLIM
);
981 extract_scalar_result
= false;
986 /* 2.3 Extract the final scalar result. Create:
987 s_out3 = extract_field <v_out2, bitpos> */
989 if (extract_scalar_result
)
993 if (vect_print_dump_info (REPORT_DETAILS
))
994 fprintf (vect_dump
, "extract scalar result");
996 /* The result is in the low order bits. */
997 if (BYTES_BIG_ENDIAN
)
998 bitpos
= size_binop (MULT_EXPR
,
999 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1),
1000 TYPE_SIZE (scalar_type
));
1002 bitpos
= bitsize_zero_node
;
1004 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, new_temp
, bitsize
, bitpos
);
1005 BIT_FIELD_REF_UNSIGNED (rhs
) = TYPE_UNSIGNED (scalar_type
);
1006 epilog_stmt
= build2 (MODIFY_EXPR
, scalar_type
, new_scalar_dest
, rhs
);
1007 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1008 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
1009 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1010 if (vect_print_dump_info (REPORT_DETAILS
))
1011 print_generic_expr (vect_dump
, epilog_stmt
, TDF_SLIM
);
1015 /* 2.4 Adjust the final result by the initial value of the reduction
1016 variable. (when such adjustment is not needed, then
1017 'scalar_initial_def' is zero).
1020 s_out = scalar_expr <s_out, scalar_initial_def> */
1022 if (scalar_initial_def
)
1024 epilog_stmt
= build2 (MODIFY_EXPR
, scalar_type
, new_scalar_dest
,
1025 build2 (code
, scalar_type
, new_temp
, scalar_initial_def
));
1026 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1027 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
1028 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1030 if (vect_print_dump_info (REPORT_DETAILS
))
1031 print_generic_expr (vect_dump
, epilog_stmt
, TDF_SLIM
);
1035 /* 2.5 Replace uses of s_out0 with uses of s_out3 */
1037 /* Find the loop-closed-use at the loop exit of the original
1038 scalar result. (The reduction result is expected to have
1039 two immediate uses - one at the latch block, and one at the
1042 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
1044 if (!flow_bb_inside_loop_p (loop
, bb_for_stmt (USE_STMT (use_p
))))
1046 exit_phi
= USE_STMT (use_p
);
1051 orig_name
= PHI_RESULT (exit_phi
);
1053 FOR_EACH_IMM_USE_SAFE (use_p
, imm_iter
, orig_name
)
1054 SET_USE (use_p
, new_temp
);
1058 /* Function vectorizable_reduction.
1060 Check if STMT performs a reduction operation that can be vectorized.
1061 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1062 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1063 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1066 vectorizable_reduction (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1072 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1073 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1074 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1075 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1077 enum tree_code code
, reduc_code
= 0;
1078 enum machine_mode vec_mode
;
1080 optab optab
, reduc_optab
;
1082 tree def0
, def1
, def_stmt0
, def_stmt1
;
1083 enum vect_def_type dt0
, dt1
;
1086 bool is_simple_use0
;
1087 bool is_simple_use1
;
1089 /* Is vectorizable reduction? */
1091 /* Not supportable if the reduction variable is used in the loop. */
1092 if (STMT_VINFO_RELEVANT_P (stmt_info
))
1095 if (!STMT_VINFO_LIVE_P (stmt_info
))
1098 /* Make sure it was already recognized as a reduction pattern. */
1099 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
1102 gcc_assert (TREE_CODE (stmt
) == MODIFY_EXPR
);
1104 operation
= TREE_OPERAND (stmt
, 1);
1105 code
= TREE_CODE (operation
);
1106 op_type
= TREE_CODE_LENGTH (code
);
1108 if (op_type
!= binary_op
)
1111 op0
= TREE_OPERAND (operation
, 0);
1112 op1
= TREE_OPERAND (operation
, 1);
1113 scalar_dest
= TREE_OPERAND (stmt
, 0);
1114 scalar_type
= TREE_TYPE (scalar_dest
);
1116 /* Check the first operand. It is expected to be defined inside the loop. */
1118 vect_is_simple_use (op0
, loop_vinfo
, &def_stmt0
, &def0
, &dt0
);
1120 vect_is_simple_use (op1
, loop_vinfo
, &def_stmt1
, &def1
, &dt1
);
1122 gcc_assert (is_simple_use0
);
1123 gcc_assert (is_simple_use1
);
1124 gcc_assert (dt0
== vect_loop_def
);
1125 gcc_assert (dt1
== vect_reduction_def
);
1126 gcc_assert (TREE_CODE (def_stmt1
) == PHI_NODE
);
1127 gcc_assert (stmt
== vect_is_simple_reduction (loop
, def_stmt1
));
1129 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt1
)))
1132 /* Supportable by target? */
1134 /* check support for the operation in the loop */
1135 optab
= optab_for_tree_code (code
, vectype
);
1138 if (vect_print_dump_info (REPORT_DETAILS
))
1139 fprintf (vect_dump
, "no optab.");
1142 vec_mode
= TYPE_MODE (vectype
);
1143 if (optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
1145 if (vect_print_dump_info (REPORT_DETAILS
))
1146 fprintf (vect_dump
, "op not supported by target.");
1147 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
1148 || LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1149 < vect_min_worthwhile_factor (code
))
1151 if (vect_print_dump_info (REPORT_DETAILS
))
1152 fprintf (vect_dump
, "proceeding using word mode.");
1155 /* Worthwhile without SIMD support? */
1156 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1157 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1158 < vect_min_worthwhile_factor (code
))
1160 if (vect_print_dump_info (REPORT_DETAILS
))
1161 fprintf (vect_dump
, "not worthwhile without SIMD support.");
1165 /* check support for the epilog operation */
1166 if (!reduction_code_for_scalar_code (code
, &reduc_code
))
1168 reduc_optab
= optab_for_tree_code (reduc_code
, vectype
);
1171 if (vect_print_dump_info (REPORT_DETAILS
))
1172 fprintf (vect_dump
, "no optab for reduction.");
1173 reduc_code
= NUM_TREE_CODES
;
1175 if (reduc_optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
1177 if (vect_print_dump_info (REPORT_DETAILS
))
1178 fprintf (vect_dump
, "reduc op not supported by target.");
1179 reduc_code
= NUM_TREE_CODES
;
1182 if (!vec_stmt
) /* transformation not required. */
1184 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
1190 if (vect_print_dump_info (REPORT_DETAILS
))
1191 fprintf (vect_dump
, "transform reduction.");
1193 /* Create the destination vector */
1194 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1197 /* Create the reduction-phi that defines the reduction-operand. */
1198 new_phi
= create_phi_node (vec_dest
, loop
->header
);
1201 /* Prepare the operand that is defined inside the loop body */
1202 loop_vec_def
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1204 /* Create the vectorized operation that computes the partial results */
1205 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
1206 build2 (code
, vectype
, loop_vec_def
, PHI_RESULT (new_phi
)));
1207 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
1208 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
1209 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1212 /* Finalize the reduction-phi (set it's arguments) and create the
1213 epilog reduction code. */
1214 vect_create_epilog_for_reduction (new_temp
, stmt
, op1
, reduc_code
, new_phi
);
1219 /* Function vectorizable_assignment.
1221 Check if STMT performs an assignment (copy) that can be vectorized.
1222 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1223 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1224 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1227 vectorizable_assignment (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1233 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1234 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1235 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1238 enum vect_def_type dt
;
1240 /* Is vectorizable assignment? */
1241 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1244 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
1246 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1249 scalar_dest
= TREE_OPERAND (stmt
, 0);
1250 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
1253 op
= TREE_OPERAND (stmt
, 1);
1254 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
1256 if (vect_print_dump_info (REPORT_DETAILS
))
1257 fprintf (vect_dump
, "use not simple.");
1261 if (!vec_stmt
) /* transformation not required. */
1263 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
1268 if (vect_print_dump_info (REPORT_DETAILS
))
1269 fprintf (vect_dump
, "transform assignment.");
1272 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1275 op
= TREE_OPERAND (stmt
, 1);
1276 vec_oprnd
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1278 /* Arguments are ready. create the new vector stmt. */
1279 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, vec_oprnd
);
1280 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
1281 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
1282 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1288 /* Function vect_min_worthwhile_factor.
1290 For a loop where we could vectorize the operation indicated by CODE,
1291 return the minimum vectorization factor that makes it worthwhile
1292 to use generic vectors. */
1294 vect_min_worthwhile_factor (enum tree_code code
)
1315 /* Function vectorizable_operation.
1317 Check if STMT performs a binary or unary operation that can be vectorized.
1318 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1319 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1320 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1323 vectorizable_operation (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1328 tree op0
, op1
= NULL
;
1329 tree vec_oprnd0
, vec_oprnd1
=NULL
;
1330 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1331 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1332 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1334 enum tree_code code
;
1335 enum machine_mode vec_mode
;
1341 enum machine_mode optab_op2_mode
;
1343 enum vect_def_type dt
;
1345 /* Is STMT a vectorizable binary/unary operation? */
1346 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1349 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
1351 if (STMT_VINFO_LIVE_P (stmt_info
))
1353 /* FORNOW: not yet supported. */
1354 if (vect_print_dump_info (REPORT_DETAILS
))
1355 fprintf (vect_dump
, "value used after loop.");
1359 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1362 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) != SSA_NAME
)
1365 operation
= TREE_OPERAND (stmt
, 1);
1366 code
= TREE_CODE (operation
);
1367 optab
= optab_for_tree_code (code
, vectype
);
1369 /* Support only unary or binary operations. */
1370 op_type
= TREE_CODE_LENGTH (code
);
1371 if (op_type
!= unary_op
&& op_type
!= binary_op
)
1373 if (vect_print_dump_info (REPORT_DETAILS
))
1374 fprintf (vect_dump
, "num. args = %d (not unary/binary op).", op_type
);
1378 for (i
= 0; i
< op_type
; i
++)
1380 op
= TREE_OPERAND (operation
, i
);
1381 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
1383 if (vect_print_dump_info (REPORT_DETAILS
))
1384 fprintf (vect_dump
, "use not simple.");
1389 /* Supportable by target? */
1392 if (vect_print_dump_info (REPORT_DETAILS
))
1393 fprintf (vect_dump
, "no optab.");
1396 vec_mode
= TYPE_MODE (vectype
);
1397 icode
= (int) optab
->handlers
[(int) vec_mode
].insn_code
;
1398 if (icode
== CODE_FOR_nothing
)
1400 if (vect_print_dump_info (REPORT_DETAILS
))
1401 fprintf (vect_dump
, "op not supported by target.");
1402 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
1403 || LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1404 < vect_min_worthwhile_factor (code
))
1406 if (vect_print_dump_info (REPORT_DETAILS
))
1407 fprintf (vect_dump
, "proceeding using word mode.");
1410 /* Worthwhile without SIMD support? */
1411 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1412 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1413 < vect_min_worthwhile_factor (code
))
1415 if (vect_print_dump_info (REPORT_DETAILS
))
1416 fprintf (vect_dump
, "not worthwhile without SIMD support.");
1420 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
)
1422 /* FORNOW: not yet supported. */
1423 if (!VECTOR_MODE_P (vec_mode
))
1426 /* Invariant argument is needed for a vector shift
1427 by a scalar shift operand. */
1428 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
1429 if (! (VECTOR_MODE_P (optab_op2_mode
)
1430 || dt
== vect_constant_def
1431 || dt
== vect_invariant_def
))
1433 if (vect_print_dump_info (REPORT_DETAILS
))
1434 fprintf (vect_dump
, "operand mode requires invariant argument.");
1439 if (!vec_stmt
) /* transformation not required. */
1441 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
1447 if (vect_print_dump_info (REPORT_DETAILS
))
1448 fprintf (vect_dump
, "transform binary/unary operation.");
1451 scalar_dest
= TREE_OPERAND (stmt
, 0);
1452 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1455 op0
= TREE_OPERAND (operation
, 0);
1456 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1458 if (op_type
== binary_op
)
1460 op1
= TREE_OPERAND (operation
, 1);
1462 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
)
1464 /* Vector shl and shr insn patterns can be defined with
1465 scalar operand 2 (shift operand). In this case, use
1466 constant or loop invariant op1 directly, without
1467 extending it to vector mode first. */
1469 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
1470 if (!VECTOR_MODE_P (optab_op2_mode
))
1472 if (vect_print_dump_info (REPORT_DETAILS
))
1473 fprintf (vect_dump
, "operand 1 using scalar mode.");
1479 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1482 /* Arguments are ready. create the new vector stmt. */
1484 if (op_type
== binary_op
)
1485 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
1486 build2 (code
, vectype
, vec_oprnd0
, vec_oprnd1
));
1488 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
1489 build1 (code
, vectype
, vec_oprnd0
));
1490 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
1491 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
1492 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1498 /* Function vectorizable_store.
1500 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1502 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1503 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1504 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1507 vectorizable_store (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1513 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1514 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1515 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1516 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1517 enum machine_mode vec_mode
;
1519 enum dr_alignment_support alignment_support_cheme
;
1522 enum vect_def_type dt
;
1524 /* Is vectorizable store? */
1526 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1529 scalar_dest
= TREE_OPERAND (stmt
, 0);
1530 if (TREE_CODE (scalar_dest
) != ARRAY_REF
1531 && TREE_CODE (scalar_dest
) != INDIRECT_REF
)
1534 op
= TREE_OPERAND (stmt
, 1);
1535 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
1537 if (vect_print_dump_info (REPORT_DETAILS
))
1538 fprintf (vect_dump
, "use not simple.");
1542 vec_mode
= TYPE_MODE (vectype
);
1543 /* FORNOW. In some cases can vectorize even if data-type not supported
1544 (e.g. - array initialization with 0). */
1545 if (mov_optab
->handlers
[(int)vec_mode
].insn_code
== CODE_FOR_nothing
)
1548 if (!STMT_VINFO_DATA_REF (stmt_info
))
1552 if (!vec_stmt
) /* transformation not required. */
1554 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
1560 if (vect_print_dump_info (REPORT_DETAILS
))
1561 fprintf (vect_dump
, "transform store");
1563 alignment_support_cheme
= vect_supportable_dr_alignment (dr
);
1564 gcc_assert (alignment_support_cheme
);
1565 gcc_assert (alignment_support_cheme
== dr_aligned
); /* FORNOW */
1567 /* Handle use - get the vectorized def from the defining stmt. */
1568 vec_oprnd1
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1571 /* FORNOW: make sure the data reference is aligned. */
1572 vect_align_data_ref (stmt
);
1573 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
, &dummy
, false);
1574 data_ref
= build_fold_indirect_ref (data_ref
);
1576 /* Arguments are ready. create the new vector stmt. */
1577 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, data_ref
, vec_oprnd1
);
1578 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1580 /* Copy the V_MAY_DEFS representing the aliasing of the original array
1581 element's definition to the vector's definition then update the
1582 defining statement. The original is being deleted so the same
1583 SSA_NAMEs can be used. */
1584 copy_virtual_operands (*vec_stmt
, stmt
);
1586 FOR_EACH_SSA_TREE_OPERAND (def
, stmt
, iter
, SSA_OP_VMAYDEF
)
1588 SSA_NAME_DEF_STMT (def
) = *vec_stmt
;
1590 /* If this virtual def has a use outside the loop and a loop peel is
1591 performed then the def may be renamed by the peel. Mark it for
1592 renaming so the later use will also be renamed. */
1593 mark_sym_for_renaming (SSA_NAME_VAR (def
));
1600 /* vectorizable_load.
1602 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1604 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1605 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1606 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1609 vectorizable_load (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1612 tree vec_dest
= NULL
;
1613 tree data_ref
= NULL
;
1615 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1616 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1617 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1624 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1625 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1626 edge pe
= loop_preheader_edge (loop
);
1627 enum dr_alignment_support alignment_support_cheme
;
1629 /* Is vectorizable load? */
1630 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1633 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
1635 if (STMT_VINFO_LIVE_P (stmt_info
))
1637 /* FORNOW: not yet supported. */
1638 if (vect_print_dump_info (REPORT_DETAILS
))
1639 fprintf (vect_dump
, "value used after loop.");
1643 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1646 scalar_dest
= TREE_OPERAND (stmt
, 0);
1647 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
1650 op
= TREE_OPERAND (stmt
, 1);
1651 if (TREE_CODE (op
) != ARRAY_REF
&& TREE_CODE (op
) != INDIRECT_REF
)
1654 if (!STMT_VINFO_DATA_REF (stmt_info
))
1657 mode
= (int) TYPE_MODE (vectype
);
1659 /* FORNOW. In some cases can vectorize even if data-type not supported
1660 (e.g. - data copies). */
1661 if (mov_optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
1663 if (vect_print_dump_info (REPORT_DETAILS
))
1664 fprintf (vect_dump
, "Aligned load, but unsupported type.");
1668 if (!vec_stmt
) /* transformation not required. */
1670 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
1676 if (vect_print_dump_info (REPORT_DETAILS
))
1677 fprintf (vect_dump
, "transform load.");
1679 alignment_support_cheme
= vect_supportable_dr_alignment (dr
);
1680 gcc_assert (alignment_support_cheme
);
1682 if (alignment_support_cheme
== dr_aligned
1683 || alignment_support_cheme
== dr_unaligned_supported
)
1694 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1695 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
, &dummy
, false);
1696 if (aligned_access_p (dr
))
1697 data_ref
= build_fold_indirect_ref (data_ref
);
1700 int mis
= DR_MISALIGNMENT (dr
);
1701 tree tmis
= (mis
== -1 ? size_zero_node
: size_int (mis
));
1702 tmis
= size_binop (MULT_EXPR
, tmis
, size_int(BITS_PER_UNIT
));
1703 data_ref
= build2 (MISALIGNED_INDIRECT_REF
, vectype
, data_ref
, tmis
);
1705 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
1706 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1707 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1708 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1709 copy_virtual_operands (new_stmt
, stmt
);
1711 else if (alignment_support_cheme
== dr_unaligned_software_pipeline
)
1715 msq_init = *(floor(p1))
1716 p2 = initial_addr + VS - 1;
1717 magic = have_builtin ? builtin_result : initial_address;
1720 p2' = p2 + indx * vectype_size
1722 vec_dest = realign_load (msq, lsq, magic)
1736 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1737 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1738 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
,
1740 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, data_ref
);
1741 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
1742 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1743 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1744 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
1745 gcc_assert (!new_bb
);
1746 msq_init
= TREE_OPERAND (new_stmt
, 0);
1747 copy_virtual_operands (new_stmt
, stmt
);
1748 update_vuses_to_preheader (new_stmt
, loop
);
1751 /* <2> Create lsq = *(floor(p2')) in the loop */
1752 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
1753 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1754 dataref_ptr
= vect_create_data_ref_ptr (stmt
, bsi
, offset
, &dummy
, false);
1755 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, dataref_ptr
);
1756 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
1757 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1758 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1759 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1760 lsq
= TREE_OPERAND (new_stmt
, 0);
1761 copy_virtual_operands (new_stmt
, stmt
);
1765 if (targetm
.vectorize
.builtin_mask_for_load
)
1767 /* Create permutation mask, if required, in loop preheader. */
1769 params
= build_tree_list (NULL_TREE
, init_addr
);
1770 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1771 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
1772 new_stmt
= build_function_call_expr (builtin_decl
, params
);
1773 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, new_stmt
);
1774 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1775 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1776 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
1777 gcc_assert (!new_bb
);
1778 magic
= TREE_OPERAND (new_stmt
, 0);
1780 /* The result of the CALL_EXPR to this builtin is determined from
1781 the value of the parameter and no global variables are touched
1782 which makes the builtin a "const" function. Requiring the
1783 builtin to have the "const" attribute makes it unnecessary
1784 to call mark_call_clobbered. */
1785 gcc_assert (TREE_READONLY (builtin_decl
));
1789 /* Use current address instead of init_addr for reduced reg pressure.
1791 magic
= dataref_ptr
;
1795 /* <4> Create msq = phi <msq_init, lsq> in loop */
1796 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1797 msq
= make_ssa_name (vec_dest
, NULL_TREE
);
1798 phi_stmt
= create_phi_node (msq
, loop
->header
); /* CHECKME */
1799 SSA_NAME_DEF_STMT (msq
) = phi_stmt
;
1800 add_phi_arg (phi_stmt
, msq_init
, loop_preheader_edge (loop
));
1801 add_phi_arg (phi_stmt
, lsq
, loop_latch_edge (loop
));
1804 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1805 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1806 new_stmt
= build3 (REALIGN_LOAD_EXPR
, vectype
, msq
, lsq
, magic
);
1807 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, new_stmt
);
1808 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1809 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1810 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1815 *vec_stmt
= new_stmt
;
1820 /* Function vectorizable_live_operation.
1822 STMT computes a value that is used outside the loop. Check if
1823 it can be supported. */
1826 vectorizable_live_operation (tree stmt
,
1827 block_stmt_iterator
*bsi ATTRIBUTE_UNUSED
,
1828 tree
*vec_stmt ATTRIBUTE_UNUSED
)
1831 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1832 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1834 enum tree_code code
;
1838 enum vect_def_type dt
;
1840 if (!STMT_VINFO_LIVE_P (stmt_info
))
1843 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1846 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) != SSA_NAME
)
1849 operation
= TREE_OPERAND (stmt
, 1);
1850 code
= TREE_CODE (operation
);
1852 op_type
= TREE_CODE_LENGTH (code
);
1854 /* FORNOW: support only if all uses are invariant. This means
1855 that the scalar operations can remain in place, unvectorized.
1856 The original last scalar value that they compute will be used. */
1858 for (i
= 0; i
< op_type
; i
++)
1860 op
= TREE_OPERAND (operation
, i
);
1861 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
1863 if (vect_print_dump_info (REPORT_DETAILS
))
1864 fprintf (vect_dump
, "use not simple.");
1868 if (dt
!= vect_invariant_def
&& dt
!= vect_constant_def
)
1872 /* No transformation is required for the cases we currently support. */
1877 /* Function vect_is_simple_cond.
1880 LOOP - the loop that is being vectorized.
1881 COND - Condition that is checked for simple use.
1883 Returns whether a COND can be vectorized. Checks whether
1884 condition operands are supportable using vec_is_simple_use. */
1887 vect_is_simple_cond (tree cond
, loop_vec_info loop_vinfo
)
1891 enum vect_def_type dt
;
1893 if (!COMPARISON_CLASS_P (cond
))
1896 lhs
= TREE_OPERAND (cond
, 0);
1897 rhs
= TREE_OPERAND (cond
, 1);
1899 if (TREE_CODE (lhs
) == SSA_NAME
)
1901 tree lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
1902 if (!vect_is_simple_use (lhs
, loop_vinfo
, &lhs_def_stmt
, &def
, &dt
))
1905 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
)
1908 if (TREE_CODE (rhs
) == SSA_NAME
)
1910 tree rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
1911 if (!vect_is_simple_use (rhs
, loop_vinfo
, &rhs_def_stmt
, &def
, &dt
))
1914 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
)
1920 /* vectorizable_condition.
1922 Check if STMT is conditional modify expression that can be vectorized.
1923 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1924 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
1927 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1930 vectorizable_condition (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1932 tree scalar_dest
= NULL_TREE
;
1933 tree vec_dest
= NULL_TREE
;
1934 tree op
= NULL_TREE
;
1935 tree cond_expr
, then_clause
, else_clause
;
1936 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1937 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1938 tree vec_cond_lhs
, vec_cond_rhs
, vec_then_clause
, vec_else_clause
;
1939 tree vec_compare
, vec_cond_expr
;
1941 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1942 enum machine_mode vec_mode
;
1944 enum vect_def_type dt
;
1946 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1949 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
1951 if (STMT_VINFO_LIVE_P (stmt_info
))
1953 /* FORNOW: not yet supported. */
1954 if (vect_print_dump_info (REPORT_DETAILS
))
1955 fprintf (vect_dump
, "value used after loop.");
1959 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1962 op
= TREE_OPERAND (stmt
, 1);
1964 if (TREE_CODE (op
) != COND_EXPR
)
1967 cond_expr
= TREE_OPERAND (op
, 0);
1968 then_clause
= TREE_OPERAND (op
, 1);
1969 else_clause
= TREE_OPERAND (op
, 2);
1971 if (!vect_is_simple_cond (cond_expr
, loop_vinfo
))
1974 if (TREE_CODE (then_clause
) == SSA_NAME
)
1976 tree then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
1977 if (!vect_is_simple_use (then_clause
, loop_vinfo
,
1978 &then_def_stmt
, &def
, &dt
))
1981 else if (TREE_CODE (then_clause
) != INTEGER_CST
1982 && TREE_CODE (then_clause
) != REAL_CST
)
1985 if (TREE_CODE (else_clause
) == SSA_NAME
)
1987 tree else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
1988 if (!vect_is_simple_use (else_clause
, loop_vinfo
,
1989 &else_def_stmt
, &def
, &dt
))
1992 else if (TREE_CODE (else_clause
) != INTEGER_CST
1993 && TREE_CODE (else_clause
) != REAL_CST
)
1997 vec_mode
= TYPE_MODE (vectype
);
2001 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
2002 return expand_vec_cond_expr_p (op
, vec_mode
);
2008 scalar_dest
= TREE_OPERAND (stmt
, 0);
2009 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2011 /* Handle cond expr. */
2013 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0), stmt
, NULL
);
2015 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1), stmt
, NULL
);
2016 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
, stmt
, NULL
);
2017 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
, stmt
, NULL
);
2019 /* Arguments are ready. create the new vector stmt. */
2020 vec_compare
= build2 (TREE_CODE (cond_expr
), vectype
,
2021 vec_cond_lhs
, vec_cond_rhs
);
2022 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
2023 vec_compare
, vec_then_clause
, vec_else_clause
);
2025 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, vec_cond_expr
);
2026 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
2027 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
2028 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
2033 /* Function vect_transform_stmt.
2035 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2038 vect_transform_stmt (tree stmt
, block_stmt_iterator
*bsi
)
2040 bool is_store
= false;
2041 tree vec_stmt
= NULL_TREE
;
2042 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2045 if (STMT_VINFO_RELEVANT_P (stmt_info
))
2047 switch (STMT_VINFO_TYPE (stmt_info
))
2049 case op_vec_info_type
:
2050 done
= vectorizable_operation (stmt
, bsi
, &vec_stmt
);
2054 case assignment_vec_info_type
:
2055 done
= vectorizable_assignment (stmt
, bsi
, &vec_stmt
);
2059 case load_vec_info_type
:
2060 done
= vectorizable_load (stmt
, bsi
, &vec_stmt
);
2064 case store_vec_info_type
:
2065 done
= vectorizable_store (stmt
, bsi
, &vec_stmt
);
2070 case condition_vec_info_type
:
2071 done
= vectorizable_condition (stmt
, bsi
, &vec_stmt
);
2076 if (vect_print_dump_info (REPORT_DETAILS
))
2077 fprintf (vect_dump
, "stmt not supported.");
2081 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
2084 if (STMT_VINFO_LIVE_P (stmt_info
))
2086 switch (STMT_VINFO_TYPE (stmt_info
))
2088 case reduc_vec_info_type
:
2089 done
= vectorizable_reduction (stmt
, bsi
, &vec_stmt
);
2094 done
= vectorizable_live_operation (stmt
, bsi
, &vec_stmt
);
2100 gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info
));
2101 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
2109 /* This function builds ni_name = number of iterations loop executes
2110 on the loop preheader. */
2113 vect_build_loop_niters (loop_vec_info loop_vinfo
)
2115 tree ni_name
, stmt
, var
;
2117 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2118 tree ni
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
2120 var
= create_tmp_var (TREE_TYPE (ni
), "niters");
2121 add_referenced_tmp_var (var
);
2122 ni_name
= force_gimple_operand (ni
, &stmt
, false, var
);
2124 pe
= loop_preheader_edge (loop
);
2127 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
2128 gcc_assert (!new_bb
);
2135 /* This function generates the following statements:
2137 ni_name = number of iterations loop executes
2138 ratio = ni_name / vf
2139 ratio_mult_vf_name = ratio * vf
2141 and places them at the loop preheader edge. */
2144 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo
,
2146 tree
*ratio_mult_vf_name_ptr
,
2147 tree
*ratio_name_ptr
)
2155 tree ratio_mult_vf_name
;
2156 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2157 tree ni
= LOOP_VINFO_NITERS (loop_vinfo
);
2158 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2161 pe
= loop_preheader_edge (loop
);
2163 /* Generate temporary variable that contains
2164 number of iterations loop executes. */
2166 ni_name
= vect_build_loop_niters (loop_vinfo
);
2167 log_vf
= build_int_cst (TREE_TYPE (ni
), exact_log2 (vf
));
2169 /* Create: ratio = ni >> log2(vf) */
2171 var
= create_tmp_var (TREE_TYPE (ni
), "bnd");
2172 add_referenced_tmp_var (var
);
2173 ratio_name
= make_ssa_name (var
, NULL_TREE
);
2174 stmt
= build2 (MODIFY_EXPR
, void_type_node
, ratio_name
,
2175 build2 (RSHIFT_EXPR
, TREE_TYPE (ni_name
), ni_name
, log_vf
));
2176 SSA_NAME_DEF_STMT (ratio_name
) = stmt
;
2178 pe
= loop_preheader_edge (loop
);
2179 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
2180 gcc_assert (!new_bb
);
2182 /* Create: ratio_mult_vf = ratio << log2 (vf). */
2184 var
= create_tmp_var (TREE_TYPE (ni
), "ratio_mult_vf");
2185 add_referenced_tmp_var (var
);
2186 ratio_mult_vf_name
= make_ssa_name (var
, NULL_TREE
);
2187 stmt
= build2 (MODIFY_EXPR
, void_type_node
, ratio_mult_vf_name
,
2188 build2 (LSHIFT_EXPR
, TREE_TYPE (ratio_name
), ratio_name
, log_vf
));
2189 SSA_NAME_DEF_STMT (ratio_mult_vf_name
) = stmt
;
2191 pe
= loop_preheader_edge (loop
);
2192 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
2193 gcc_assert (!new_bb
);
2195 *ni_name_ptr
= ni_name
;
2196 *ratio_mult_vf_name_ptr
= ratio_mult_vf_name
;
2197 *ratio_name_ptr
= ratio_name
;
2203 /* Function update_vuses_to_preheader.
2206 STMT - a statement with potential VUSEs.
2207 LOOP - the loop whose preheader will contain STMT.
2209 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
2210 appears to be defined in a V_MAY_DEF in another statement in a loop.
2211 One such case is when the VUSE is at the dereference of a __restricted__
2212 pointer in a load and the V_MAY_DEF is at the dereference of a different
2213 __restricted__ pointer in a store. Vectorization may result in
2214 copy_virtual_uses being called to copy the problematic VUSE to a new
2215 statement that is being inserted in the loop preheader. This procedure
2216 is called to change the SSA_NAME in the new statement's VUSE from the
2217 SSA_NAME updated in the loop to the related SSA_NAME available on the
2218 path entering the loop.
2220 When this function is called, we have the following situation:
2225 # name1 = phi < name0 , name2>
2230 # name2 = vdef <name1>
2235 Stmt S1 was created in the loop preheader block as part of misaligned-load
2236 handling. This function fixes the name of the vuse of S1 from 'name1' to
2240 update_vuses_to_preheader (tree stmt
, struct loop
*loop
)
2242 basic_block header_bb
= loop
->header
;
2243 edge preheader_e
= loop_preheader_edge (loop
);
2245 use_operand_p use_p
;
2247 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_VUSE
)
2249 tree ssa_name
= USE_FROM_PTR (use_p
);
2250 tree def_stmt
= SSA_NAME_DEF_STMT (ssa_name
);
2251 tree name_var
= SSA_NAME_VAR (ssa_name
);
2252 basic_block bb
= bb_for_stmt (def_stmt
);
2254 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
2255 if (!IS_EMPTY_STMT (def_stmt
)
2256 && flow_bb_inside_loop_p (loop
, bb
))
2258 /* If the block containing the statement defining the SSA_NAME
2259 is in the loop then it's necessary to find the definition
2260 outside the loop using the PHI nodes of the header. */
2262 bool updated
= false;
2264 for (phi
= phi_nodes (header_bb
); phi
; phi
= TREE_CHAIN (phi
))
2266 if (SSA_NAME_VAR (PHI_RESULT (phi
)) == name_var
)
2268 SET_USE (use_p
, PHI_ARG_DEF (phi
, preheader_e
->dest_idx
));
2273 gcc_assert (updated
);
2279 /* Function vect_update_ivs_after_vectorizer.
2281 "Advance" the induction variables of LOOP to the value they should take
2282 after the execution of LOOP. This is currently necessary because the
2283 vectorizer does not handle induction variables that are used after the
2284 loop. Such a situation occurs when the last iterations of LOOP are
2286 1. We introduced new uses after LOOP for IVs that were not originally used
2287 after LOOP: the IVs of LOOP are now used by an epilog loop.
2288 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2289 times, whereas the loop IVs should be bumped N times.
2292 - LOOP - a loop that is going to be vectorized. The last few iterations
2293 of LOOP were peeled.
2294 - NITERS - the number of iterations that LOOP executes (before it is
2295 vectorized). i.e, the number of times the ivs should be bumped.
2296 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2297 coming out from LOOP on which there are uses of the LOOP ivs
2298 (this is the path from LOOP->exit to epilog_loop->preheader).
2300 The new definitions of the ivs are placed in LOOP->exit.
2301 The phi args associated with the edge UPDATE_E in the bb
2302 UPDATE_E->dest are updated accordingly.
2304 Assumption 1: Like the rest of the vectorizer, this function assumes
2305 a single loop exit that has a single predecessor.
2307 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2308 organized in the same order.
2310 Assumption 3: The access function of the ivs is simple enough (see
2311 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2313 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2314 coming out of LOOP on which the ivs of LOOP are used (this is the path
2315 that leads to the epilog loop; other paths skip the epilog loop). This
2316 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2317 needs to have its phis updated.
2321 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo
, tree niters
,
2324 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2325 basic_block exit_bb
= loop
->single_exit
->dest
;
2327 basic_block update_bb
= update_e
->dest
;
2329 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
2331 /* Make sure there exists a single-predecessor exit bb: */
2332 gcc_assert (single_pred_p (exit_bb
));
2334 for (phi
= phi_nodes (loop
->header
), phi1
= phi_nodes (update_bb
);
2336 phi
= PHI_CHAIN (phi
), phi1
= PHI_CHAIN (phi1
))
2338 tree access_fn
= NULL
;
2339 tree evolution_part
;
2342 tree var
, stmt
, ni
, ni_name
;
2343 block_stmt_iterator last_bsi
;
2345 if (vect_print_dump_info (REPORT_DETAILS
))
2347 fprintf (vect_dump
, "vect_update_ivs_after_vectorizer: phi: ");
2348 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
2351 /* Skip virtual phi's. */
2352 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
2354 if (vect_print_dump_info (REPORT_DETAILS
))
2355 fprintf (vect_dump
, "virtual phi. skip.");
2359 /* Skip reduction phis. */
2360 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
2362 if (vect_print_dump_info (REPORT_DETAILS
))
2363 fprintf (vect_dump
, "reduc phi. skip.");
2367 access_fn
= analyze_scalar_evolution (loop
, PHI_RESULT (phi
));
2368 gcc_assert (access_fn
);
2370 unshare_expr (evolution_part_in_loop_num (access_fn
, loop
->num
));
2371 gcc_assert (evolution_part
!= NULL_TREE
);
2373 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2374 of degree >= 2 or exponential. */
2375 gcc_assert (!tree_is_chrec (evolution_part
));
2377 step_expr
= evolution_part
;
2378 init_expr
= unshare_expr (initial_condition_in_loop_num (access_fn
,
2381 ni
= build2 (PLUS_EXPR
, TREE_TYPE (init_expr
),
2382 build2 (MULT_EXPR
, TREE_TYPE (niters
),
2383 niters
, step_expr
), init_expr
);
2385 var
= create_tmp_var (TREE_TYPE (init_expr
), "tmp");
2386 add_referenced_tmp_var (var
);
2388 ni_name
= force_gimple_operand (ni
, &stmt
, false, var
);
2390 /* Insert stmt into exit_bb. */
2391 last_bsi
= bsi_last (exit_bb
);
2393 bsi_insert_before (&last_bsi
, stmt
, BSI_SAME_STMT
);
2395 /* Fix phi expressions in the successor bb. */
2396 SET_PHI_ARG_DEF (phi1
, update_e
->dest_idx
, ni_name
);
2401 /* Function vect_do_peeling_for_loop_bound
2403 Peel the last iterations of the loop represented by LOOP_VINFO.
2404 The peeled iterations form a new epilog loop. Given that the loop now
2405 iterates NITERS times, the new epilog loop iterates
2406 NITERS % VECTORIZATION_FACTOR times.
2408 The original loop will later be made to iterate
2409 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
2412 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo
, tree
*ratio
,
2413 struct loops
*loops
)
2415 tree ni_name
, ratio_mult_vf_name
;
2416 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2417 struct loop
*new_loop
;
2419 basic_block preheader
;
2422 if (vect_print_dump_info (REPORT_DETAILS
))
2423 fprintf (vect_dump
, "=== vect_do_peeling_for_loop_bound ===");
2425 initialize_original_copy_tables ();
2427 /* Generate the following variables on the preheader of original loop:
2429 ni_name = number of iteration the original loop executes
2430 ratio = ni_name / vf
2431 ratio_mult_vf_name = ratio * vf */
2432 vect_generate_tmps_on_preheader (loop_vinfo
, &ni_name
,
2433 &ratio_mult_vf_name
, ratio
);
2435 loop_num
= loop
->num
;
2436 new_loop
= slpeel_tree_peel_loop_to_edge (loop
, loops
, loop
->single_exit
,
2437 ratio_mult_vf_name
, ni_name
, false);
2438 gcc_assert (new_loop
);
2439 gcc_assert (loop_num
== loop
->num
);
2440 #ifdef ENABLE_CHECKING
2441 slpeel_verify_cfg_after_peeling (loop
, new_loop
);
2444 /* A guard that controls whether the new_loop is to be executed or skipped
2445 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
2446 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
2447 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
2448 is on the path where the LOOP IVs are used and need to be updated. */
2450 preheader
= loop_preheader_edge (new_loop
)->src
;
2451 if (EDGE_PRED (preheader
, 0)->src
== loop
->single_exit
->dest
)
2452 update_e
= EDGE_PRED (preheader
, 0);
2454 update_e
= EDGE_PRED (preheader
, 1);
2456 /* Update IVs of original loop as if they were advanced
2457 by ratio_mult_vf_name steps. */
2458 vect_update_ivs_after_vectorizer (loop_vinfo
, ratio_mult_vf_name
, update_e
);
2460 /* After peeling we have to reset scalar evolution analyzer. */
2463 free_original_copy_tables ();
2467 /* Function vect_gen_niters_for_prolog_loop
2469 Set the number of iterations for the loop represented by LOOP_VINFO
2470 to the minimum between LOOP_NITERS (the original iteration count of the loop)
2471 and the misalignment of DR - the data reference recorded in
2472 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
2473 this loop, the data reference DR will refer to an aligned location.
2475 The following computation is generated:
2477 If the misalignment of DR is known at compile time:
2478 addr_mis = int mis = DR_MISALIGNMENT (dr);
2479 Else, compute address misalignment in bytes:
2480 addr_mis = addr & (vectype_size - 1)
2482 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
2484 (elem_size = element type size; an element is the scalar element
2485 whose type is the inner type of the vectype) */
2488 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo
, tree loop_niters
)
2490 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
2491 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2492 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2494 tree iters
, iters_name
;
2497 tree dr_stmt
= DR_STMT (dr
);
2498 stmt_vec_info stmt_info
= vinfo_for_stmt (dr_stmt
);
2499 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2500 int vectype_align
= TYPE_ALIGN (vectype
) / BITS_PER_UNIT
;
2501 tree niters_type
= TREE_TYPE (loop_niters
);
2503 pe
= loop_preheader_edge (loop
);
2505 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) > 0)
2507 int byte_misalign
= LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
);
2508 int element_size
= vectype_align
/vf
;
2509 int elem_misalign
= byte_misalign
/ element_size
;
2511 if (vect_print_dump_info (REPORT_DETAILS
))
2512 fprintf (vect_dump
, "known alignment = %d.", byte_misalign
);
2513 iters
= build_int_cst (niters_type
, (vf
- elem_misalign
)&(vf
-1));
2517 tree new_stmts
= NULL_TREE
;
2519 vect_create_addr_base_for_vector_ref (dr_stmt
, &new_stmts
, NULL_TREE
);
2520 tree ptr_type
= TREE_TYPE (start_addr
);
2521 tree size
= TYPE_SIZE (ptr_type
);
2522 tree type
= lang_hooks
.types
.type_for_size (tree_low_cst (size
, 1), 1);
2523 tree vectype_size_minus_1
= build_int_cst (type
, vectype_align
- 1);
2524 tree elem_size_log
=
2525 build_int_cst (type
, exact_log2 (vectype_align
/vf
));
2526 tree vf_minus_1
= build_int_cst (type
, vf
- 1);
2527 tree vf_tree
= build_int_cst (type
, vf
);
2531 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmts
);
2532 gcc_assert (!new_bb
);
2534 /* Create: byte_misalign = addr & (vectype_size - 1) */
2536 build2 (BIT_AND_EXPR
, type
, start_addr
, vectype_size_minus_1
);
2538 /* Create: elem_misalign = byte_misalign / element_size */
2540 build2 (RSHIFT_EXPR
, type
, byte_misalign
, elem_size_log
);
2542 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
2543 iters
= build2 (MINUS_EXPR
, type
, vf_tree
, elem_misalign
);
2544 iters
= build2 (BIT_AND_EXPR
, type
, iters
, vf_minus_1
);
2545 iters
= fold_convert (niters_type
, iters
);
2548 /* Create: prolog_loop_niters = min (iters, loop_niters) */
2549 /* If the loop bound is known at compile time we already verified that it is
2550 greater than vf; since the misalignment ('iters') is at most vf, there's
2551 no need to generate the MIN_EXPR in this case. */
2552 if (TREE_CODE (loop_niters
) != INTEGER_CST
)
2553 iters
= build2 (MIN_EXPR
, niters_type
, iters
, loop_niters
);
2555 if (vect_print_dump_info (REPORT_DETAILS
))
2557 fprintf (vect_dump
, "niters for prolog loop: ");
2558 print_generic_expr (vect_dump
, iters
, TDF_SLIM
);
2561 var
= create_tmp_var (niters_type
, "prolog_loop_niters");
2562 add_referenced_tmp_var (var
);
2563 iters_name
= force_gimple_operand (iters
, &stmt
, false, var
);
2565 /* Insert stmt on loop preheader edge. */
2568 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
2569 gcc_assert (!new_bb
);
2576 /* Function vect_update_init_of_dr
2578 NITERS iterations were peeled from LOOP. DR represents a data reference
2579 in LOOP. This function updates the information recorded in DR to
2580 account for the fact that the first NITERS iterations had already been
2581 executed. Specifically, it updates the OFFSET field of DR. */
2584 vect_update_init_of_dr (struct data_reference
*dr
, tree niters
)
2586 tree offset
= DR_OFFSET (dr
);
2588 niters
= fold_build2 (MULT_EXPR
, TREE_TYPE (niters
), niters
, DR_STEP (dr
));
2589 offset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
, niters
);
2590 DR_OFFSET (dr
) = offset
;
2594 /* Function vect_update_inits_of_drs
2596 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2597 This function updates the information recorded for the data references in
2598 the loop to account for the fact that the first NITERS iterations had
2599 already been executed. Specifically, it updates the initial_condition of the
2600 access_function of all the data_references in the loop. */
2603 vect_update_inits_of_drs (loop_vec_info loop_vinfo
, tree niters
)
2606 varray_type datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2608 if (vect_dump
&& (dump_flags
& TDF_DETAILS
))
2609 fprintf (vect_dump
, "=== vect_update_inits_of_dr ===");
2611 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (datarefs
); i
++)
2613 struct data_reference
*dr
= VARRAY_GENERIC_PTR (datarefs
, i
);
2614 vect_update_init_of_dr (dr
, niters
);
2619 /* Function vect_do_peeling_for_alignment
2621 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2622 'niters' is set to the misalignment of one of the data references in the
2623 loop, thereby forcing it to refer to an aligned location at the beginning
2624 of the execution of this loop. The data reference for which we are
2625 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
2628 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo
, struct loops
*loops
)
2630 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2631 tree niters_of_prolog_loop
, ni_name
;
2633 struct loop
*new_loop
;
2635 if (vect_print_dump_info (REPORT_DETAILS
))
2636 fprintf (vect_dump
, "=== vect_do_peeling_for_alignment ===");
2638 initialize_original_copy_tables ();
2640 ni_name
= vect_build_loop_niters (loop_vinfo
);
2641 niters_of_prolog_loop
= vect_gen_niters_for_prolog_loop (loop_vinfo
, ni_name
);
2643 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2645 slpeel_tree_peel_loop_to_edge (loop
, loops
, loop_preheader_edge (loop
),
2646 niters_of_prolog_loop
, ni_name
, true);
2647 gcc_assert (new_loop
);
2648 #ifdef ENABLE_CHECKING
2649 slpeel_verify_cfg_after_peeling (new_loop
, loop
);
2652 /* Update number of times loop executes. */
2653 n_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
2654 LOOP_VINFO_NITERS (loop_vinfo
) = fold_build2 (MINUS_EXPR
,
2655 TREE_TYPE (n_iters
), n_iters
, niters_of_prolog_loop
);
2657 /* Update the init conditions of the access functions of all data refs. */
2658 vect_update_inits_of_drs (loop_vinfo
, niters_of_prolog_loop
);
2660 /* After peeling we have to reset scalar evolution analyzer. */
2663 free_original_copy_tables ();
2667 /* Function vect_create_cond_for_align_checks.
2669 Create a conditional expression that represents the alignment checks for
2670 all of data references (array element references) whose alignment must be
2674 LOOP_VINFO - two fields of the loop information are used.
2675 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2676 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2679 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2681 The returned value is the conditional expression to be used in the if
2682 statement that controls which version of the loop gets executed at runtime.
2684 The algorithm makes two assumptions:
2685 1) The number of bytes "n" in a vector is a power of 2.
2686 2) An address "a" is aligned if a%n is zero and that this
2687 test can be done as a&(n-1) == 0. For example, for 16
2688 byte vectors the test is a&0xf == 0. */
2691 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo
,
2692 tree
*cond_expr_stmt_list
)
2694 VEC(tree
,heap
) *may_misalign_stmts
2695 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
2697 int mask
= LOOP_VINFO_PTR_MASK (loop_vinfo
);
2701 tree int_ptrsize_type
;
2703 tree or_tmp_name
= NULL_TREE
;
2704 tree and_tmp
, and_tmp_name
, and_stmt
;
2707 /* Check that mask is one less than a power of 2, i.e., mask is
2708 all zeros followed by all ones. */
2709 gcc_assert ((mask
!= 0) && ((mask
& (mask
+1)) == 0));
2711 /* CHECKME: what is the best integer or unsigned type to use to hold a
2712 cast from a pointer value? */
2713 psize
= TYPE_SIZE (ptr_type_node
);
2715 = lang_hooks
.types
.type_for_size (tree_low_cst (psize
, 1), 0);
2717 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2718 of the first vector of the i'th data reference. */
2720 for (i
= 0; VEC_iterate (tree
, may_misalign_stmts
, i
, ref_stmt
); i
++)
2722 tree new_stmt_list
= NULL_TREE
;
2724 tree addr_tmp
, addr_tmp_name
, addr_stmt
;
2725 tree or_tmp
, new_or_tmp_name
, or_stmt
;
2727 /* create: addr_tmp = (int)(address_of_first_vector) */
2728 addr_base
= vect_create_addr_base_for_vector_ref (ref_stmt
,
2732 if (new_stmt_list
!= NULL_TREE
)
2733 append_to_statement_list_force (new_stmt_list
, cond_expr_stmt_list
);
2735 sprintf (tmp_name
, "%s%d", "addr2int", i
);
2736 addr_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
2737 add_referenced_tmp_var (addr_tmp
);
2738 addr_tmp_name
= make_ssa_name (addr_tmp
, NULL_TREE
);
2739 addr_stmt
= fold_convert (int_ptrsize_type
, addr_base
);
2740 addr_stmt
= build2 (MODIFY_EXPR
, void_type_node
,
2741 addr_tmp_name
, addr_stmt
);
2742 SSA_NAME_DEF_STMT (addr_tmp_name
) = addr_stmt
;
2743 append_to_statement_list_force (addr_stmt
, cond_expr_stmt_list
);
2745 /* The addresses are OR together. */
2747 if (or_tmp_name
!= NULL_TREE
)
2749 /* create: or_tmp = or_tmp | addr_tmp */
2750 sprintf (tmp_name
, "%s%d", "orptrs", i
);
2751 or_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
2752 add_referenced_tmp_var (or_tmp
);
2753 new_or_tmp_name
= make_ssa_name (or_tmp
, NULL_TREE
);
2754 or_stmt
= build2 (MODIFY_EXPR
, void_type_node
, new_or_tmp_name
,
2755 build2 (BIT_IOR_EXPR
, int_ptrsize_type
,
2758 SSA_NAME_DEF_STMT (new_or_tmp_name
) = or_stmt
;
2759 append_to_statement_list_force (or_stmt
, cond_expr_stmt_list
);
2760 or_tmp_name
= new_or_tmp_name
;
2763 or_tmp_name
= addr_tmp_name
;
2767 mask_cst
= build_int_cst (int_ptrsize_type
, mask
);
2769 /* create: and_tmp = or_tmp & mask */
2770 and_tmp
= create_tmp_var (int_ptrsize_type
, "andmask" );
2771 add_referenced_tmp_var (and_tmp
);
2772 and_tmp_name
= make_ssa_name (and_tmp
, NULL_TREE
);
2774 and_stmt
= build2 (MODIFY_EXPR
, void_type_node
,
2776 build2 (BIT_AND_EXPR
, int_ptrsize_type
,
2777 or_tmp_name
, mask_cst
));
2778 SSA_NAME_DEF_STMT (and_tmp_name
) = and_stmt
;
2779 append_to_statement_list_force (and_stmt
, cond_expr_stmt_list
);
2781 /* Make and_tmp the left operand of the conditional test against zero.
2782 if and_tmp has a non-zero bit then some address is unaligned. */
2783 ptrsize_zero
= build_int_cst (int_ptrsize_type
, 0);
2784 return build2 (EQ_EXPR
, boolean_type_node
,
2785 and_tmp_name
, ptrsize_zero
);
2789 /* Function vect_transform_loop.
2791 The analysis phase has determined that the loop is vectorizable.
2792 Vectorize the loop - created vectorized stmts to replace the scalar
2793 stmts in the loop, and update the loop exit condition. */
2796 vect_transform_loop (loop_vec_info loop_vinfo
,
2797 struct loops
*loops ATTRIBUTE_UNUSED
)
2799 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2800 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2801 int nbbs
= loop
->num_nodes
;
2802 block_stmt_iterator si
;
2805 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2809 if (vect_print_dump_info (REPORT_DETAILS
))
2810 fprintf (vect_dump
, "=== vec_transform_loop ===");
2812 /* If the loop has data references that may or may not be aligned then
2813 two versions of the loop need to be generated, one which is vectorized
2814 and one which isn't. A test is then generated to control which of the
2815 loops is executed. The test checks for the alignment of all of the
2816 data references that may or may not be aligned. */
2818 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)))
2822 tree cond_expr_stmt_list
= NULL_TREE
;
2823 basic_block condition_bb
;
2824 block_stmt_iterator cond_exp_bsi
;
2825 basic_block merge_bb
;
2826 basic_block new_exit_bb
;
2828 tree orig_phi
, new_phi
, arg
;
2830 cond_expr
= vect_create_cond_for_align_checks (loop_vinfo
,
2831 &cond_expr_stmt_list
);
2832 initialize_original_copy_tables ();
2833 nloop
= loop_version (loops
, loop
, cond_expr
, &condition_bb
, true);
2834 free_original_copy_tables();
2836 /** Loop versioning violates an assumption we try to maintain during
2837 vectorization - that the loop exit block has a single predecessor.
2838 After versioning, the exit block of both loop versions is the same
2839 basic block (i.e. it has two predecessors). Just in order to simplify
2840 following transformations in the vectorizer, we fix this situation
2841 here by adding a new (empty) block on the exit-edge of the loop,
2842 with the proper loop-exit phis to maintain loop-closed-form. **/
2844 merge_bb
= loop
->single_exit
->dest
;
2845 gcc_assert (EDGE_COUNT (merge_bb
->preds
) == 2);
2846 new_exit_bb
= split_edge (loop
->single_exit
);
2847 add_bb_to_loop (new_exit_bb
, loop
->outer
);
2848 new_exit_e
= loop
->single_exit
;
2849 e
= EDGE_SUCC (new_exit_bb
, 0);
2851 for (orig_phi
= phi_nodes (merge_bb
); orig_phi
;
2852 orig_phi
= PHI_CHAIN (orig_phi
))
2854 new_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi
)),
2856 arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, e
);
2857 add_phi_arg (new_phi
, arg
, new_exit_e
);
2858 SET_PHI_ARG_DEF (orig_phi
, e
->dest_idx
, PHI_RESULT (new_phi
));
2861 /** end loop-exit-fixes after versioning **/
2863 update_ssa (TODO_update_ssa
);
2864 cond_exp_bsi
= bsi_last (condition_bb
);
2865 bsi_insert_before (&cond_exp_bsi
, cond_expr_stmt_list
, BSI_SAME_STMT
);
2868 /* CHECKME: we wouldn't need this if we calles update_ssa once
2870 bitmap_zero (vect_vnames_to_rename
);
2872 /* Peel the loop if there are data refs with unknown alignment.
2873 Only one data ref with unknown store is allowed. */
2875 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
2876 vect_do_peeling_for_alignment (loop_vinfo
, loops
);
2878 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
2879 compile time constant), or it is a constant that doesn't divide by the
2880 vectorization factor, then an epilog loop needs to be created.
2881 We therefore duplicate the loop: the original loop will be vectorized,
2882 and will compute the first (n/VF) iterations. The second copy of the loop
2883 will remain scalar and will compute the remaining (n%VF) iterations.
2884 (VF is the vectorization factor). */
2886 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
2887 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
2888 && LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0))
2889 vect_do_peeling_for_loop_bound (loop_vinfo
, &ratio
, loops
);
2891 ratio
= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
)),
2892 LOOP_VINFO_INT_NITERS (loop_vinfo
) / vectorization_factor
);
2894 /* 1) Make sure the loop header has exactly two entries
2895 2) Make sure we have a preheader basic block. */
2897 gcc_assert (EDGE_COUNT (loop
->header
->preds
) == 2);
2899 loop_split_edge_with (loop_preheader_edge (loop
), NULL
);
2902 /* FORNOW: the vectorizer supports only loops which body consist
2903 of one basic block (header + empty latch). When the vectorizer will
2904 support more involved loop forms, the order by which the BBs are
2905 traversed need to be reconsidered. */
2907 for (i
= 0; i
< nbbs
; i
++)
2909 basic_block bb
= bbs
[i
];
2911 for (si
= bsi_start (bb
); !bsi_end_p (si
);)
2913 tree stmt
= bsi_stmt (si
);
2914 stmt_vec_info stmt_info
;
2917 if (vect_print_dump_info (REPORT_DETAILS
))
2919 fprintf (vect_dump
, "------>vectorizing statement: ");
2920 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
2922 stmt_info
= vinfo_for_stmt (stmt
);
2923 gcc_assert (stmt_info
);
2924 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
2925 && !STMT_VINFO_LIVE_P (stmt_info
))
2930 /* FORNOW: Verify that all stmts operate on the same number of
2931 units and no inner unrolling is necessary. */
2933 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
))
2934 == (unsigned HOST_WIDE_INT
) vectorization_factor
);
2936 /* -------- vectorize statement ------------ */
2937 if (vect_print_dump_info (REPORT_DETAILS
))
2938 fprintf (vect_dump
, "transform statement.");
2940 is_store
= vect_transform_stmt (stmt
, &si
);
2943 /* Free the attached stmt_vec_info and remove the stmt. */
2944 stmt_ann_t ann
= stmt_ann (stmt
);
2946 set_stmt_info ((tree_ann_t
)ann
, NULL
);
2947 bsi_remove (&si
, true);
2955 slpeel_make_loop_iterate_ntimes (loop
, ratio
);
2957 EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename
, 0, j
, bi
)
2958 mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j
)));
2960 /* The memory tags and pointers in vectorized statements need to
2961 have their SSA forms updated. FIXME, why can't this be delayed
2962 until all the loops have been transformed? */
2963 update_ssa (TODO_update_ssa
);
2965 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS
))
2966 fprintf (vect_dump
, "LOOP VECTORIZED.");