1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (tree
, block_stmt_iterator
*);
50 static void vect_align_data_ref (tree
);
51 static tree
vect_create_destination_var (tree
, tree
);
52 static tree vect_create_data_ref_ptr
53 (tree
, block_stmt_iterator
*, tree
, tree
*, bool);
54 static tree
vect_create_addr_base_for_vector_ref (tree
, tree
*, tree
);
55 static tree
vect_get_new_vect_var (tree
, enum vect_var_kind
, const char *);
56 static tree
vect_get_vec_def_for_operand (tree
, tree
, tree
*);
57 static tree
vect_init_vector (tree
, tree
);
58 static void vect_finish_stmt_generation
59 (tree stmt
, tree vec_stmt
, block_stmt_iterator
*bsi
);
60 static bool vect_is_simple_cond (tree
, loop_vec_info
);
61 static void update_vuses_to_preheader (tree
, struct loop
*);
62 static void vect_create_epilog_for_reduction (tree
, tree
, enum tree_code
, tree
);
63 static tree
get_initial_def_for_reduction (tree
, tree
, tree
*);
65 /* Utility function dealing with loop peeling (not peeling itself). */
66 static void vect_generate_tmps_on_preheader
67 (loop_vec_info
, tree
*, tree
*, tree
*);
68 static tree
vect_build_loop_niters (loop_vec_info
);
69 static void vect_update_ivs_after_vectorizer (loop_vec_info
, tree
, edge
);
70 static tree
vect_gen_niters_for_prolog_loop (loop_vec_info
, tree
);
71 static void vect_update_init_of_dr (struct data_reference
*, tree niters
);
72 static void vect_update_inits_of_drs (loop_vec_info
, tree
);
73 static void vect_do_peeling_for_alignment (loop_vec_info
, struct loops
*);
74 static void vect_do_peeling_for_loop_bound
75 (loop_vec_info
, tree
*, struct loops
*);
76 static int vect_min_worthwhile_factor (enum tree_code
);
79 /* Function vect_get_new_vect_var.
81 Returns a name for a new variable. The current naming scheme appends the
82 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
83 the name of vectorizer generated variables, and appends that to NAME if
87 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
100 case vect_pointer_var
:
108 new_vect_var
= create_tmp_var (type
, concat (prefix
, name
, NULL
));
110 new_vect_var
= create_tmp_var (type
, prefix
);
116 /* Function vect_create_addr_base_for_vector_ref.
118 Create an expression that computes the address of the first memory location
119 that will be accessed for a data reference.
122 STMT: The statement containing the data reference.
123 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
124 OFFSET: Optional. If supplied, it is be added to the initial address.
127 1. Return an SSA_NAME whose value is the address of the memory location of
128 the first vector of the data reference.
129 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
130 these statement(s) which define the returned SSA_NAME.
132 FORNOW: We are only handling array accesses with step 1. */
135 vect_create_addr_base_for_vector_ref (tree stmt
,
139 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
140 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
141 tree data_ref_base
= unshare_expr (DR_BASE_ADDRESS (dr
));
142 tree base_name
= build_fold_indirect_ref (data_ref_base
);
143 tree ref
= DR_REF (dr
);
144 tree scalar_type
= TREE_TYPE (ref
);
145 tree scalar_ptr_type
= build_pointer_type (scalar_type
);
148 tree addr_base
, addr_expr
;
150 tree base_offset
= unshare_expr (DR_OFFSET (dr
));
151 tree init
= unshare_expr (DR_INIT (dr
));
153 /* Create base_offset */
154 base_offset
= size_binop (PLUS_EXPR
, base_offset
, init
);
155 dest
= create_tmp_var (TREE_TYPE (base_offset
), "base_off");
156 add_referenced_tmp_var (dest
);
157 base_offset
= force_gimple_operand (base_offset
, &new_stmt
, false, dest
);
158 append_to_statement_list_force (new_stmt
, new_stmt_list
);
162 tree tmp
= create_tmp_var (TREE_TYPE (base_offset
), "offset");
163 add_referenced_tmp_var (tmp
);
164 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (offset
), offset
,
166 base_offset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base_offset
),
167 base_offset
, offset
);
168 base_offset
= force_gimple_operand (base_offset
, &new_stmt
, false, tmp
);
169 append_to_statement_list_force (new_stmt
, new_stmt_list
);
172 /* base + base_offset */
173 addr_base
= fold_build2 (PLUS_EXPR
, TREE_TYPE (data_ref_base
), data_ref_base
,
176 /* addr_expr = addr_base */
177 addr_expr
= vect_get_new_vect_var (scalar_ptr_type
, vect_pointer_var
,
178 get_name (base_name
));
179 add_referenced_tmp_var (addr_expr
);
180 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, addr_expr
, addr_base
);
181 new_temp
= make_ssa_name (addr_expr
, vec_stmt
);
182 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
183 append_to_statement_list_force (vec_stmt
, new_stmt_list
);
185 if (vect_print_dump_info (REPORT_DETAILS
))
187 fprintf (vect_dump
, "created ");
188 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
194 /* Function vect_align_data_ref.
196 Handle misalignment of a memory accesses.
198 FORNOW: Can't handle misaligned accesses.
199 Make sure that the dataref is aligned. */
202 vect_align_data_ref (tree stmt
)
204 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
205 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
207 /* FORNOW: can't handle misaligned accesses;
208 all accesses expected to be aligned. */
209 gcc_assert (aligned_access_p (dr
));
213 /* Function vect_create_data_ref_ptr.
215 Create a memory reference expression for vector access, to be used in a
216 vector load/store stmt. The reference is based on a new pointer to vector
220 1. STMT: a stmt that references memory. Expected to be of the form
221 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
222 2. BSI: block_stmt_iterator where new stmts can be added.
223 3. OFFSET (optional): an offset to be added to the initial address accessed
224 by the data-ref in STMT.
225 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
226 pointing to the initial address.
229 1. Declare a new ptr to vector_type, and have it point to the base of the
230 data reference (initial addressed accessed by the data reference).
231 For example, for vector of type V8HI, the following code is generated:
234 vp = (v8hi *)initial_address;
236 if OFFSET is not supplied:
237 initial_address = &a[init];
238 if OFFSET is supplied:
239 initial_address = &a[init + OFFSET];
241 Return the initial_address in INITIAL_ADDRESS.
243 2. If ONLY_INIT is true, return the initial pointer. Otherwise, create
244 a data-reference in the loop based on the new vector pointer vp. This
245 new data reference will by some means be updated each iteration of
246 the loop. Return the pointer vp'.
248 FORNOW: handle only aligned and consecutive accesses. */
251 vect_create_data_ref_ptr (tree stmt
,
252 block_stmt_iterator
*bsi ATTRIBUTE_UNUSED
,
253 tree offset
, tree
*initial_address
, bool only_init
)
256 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
257 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
258 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
259 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
265 tree new_stmt_list
= NULL_TREE
;
266 edge pe
= loop_preheader_edge (loop
);
269 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
271 base_name
= build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr
)));
273 if (vect_print_dump_info (REPORT_DETAILS
))
275 tree data_ref_base
= base_name
;
276 fprintf (vect_dump
, "create vector-pointer variable to type: ");
277 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
278 if (TREE_CODE (data_ref_base
) == VAR_DECL
)
279 fprintf (vect_dump
, " vectorizing a one dimensional array ref: ");
280 else if (TREE_CODE (data_ref_base
) == ARRAY_REF
)
281 fprintf (vect_dump
, " vectorizing a multidimensional array ref: ");
282 else if (TREE_CODE (data_ref_base
) == COMPONENT_REF
)
283 fprintf (vect_dump
, " vectorizing a record based array ref: ");
284 else if (TREE_CODE (data_ref_base
) == SSA_NAME
)
285 fprintf (vect_dump
, " vectorizing a pointer ref: ");
286 print_generic_expr (vect_dump
, base_name
, TDF_SLIM
);
289 /** (1) Create the new vector-pointer variable: **/
291 vect_ptr_type
= build_pointer_type (vectype
);
292 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
293 get_name (base_name
));
294 add_referenced_tmp_var (vect_ptr
);
297 /** (2) Add aliasing information to the new vector-pointer:
298 (The points-to info (DR_PTR_INFO) may be defined later.) **/
300 tag
= DR_MEMTAG (dr
);
303 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
304 tag must be created with tag added to its may alias list. */
306 new_type_alias (vect_ptr
, tag
);
308 var_ann (vect_ptr
)->symbol_mem_tag
= tag
;
310 var_ann (vect_ptr
)->subvars
= DR_SUBVARS (dr
);
312 /** (3) Calculate the initial address the vector-pointer, and set
313 the vector-pointer to point to it before the loop: **/
315 /* Create: (&(base[init_val+offset]) in the loop preheader. */
316 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
318 pe
= loop_preheader_edge (loop
);
319 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt_list
);
320 gcc_assert (!new_bb
);
321 *initial_address
= new_temp
;
323 /* Create: p = (vectype *) initial_base */
324 vec_stmt
= fold_convert (vect_ptr_type
, new_temp
);
325 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, vect_ptr
, vec_stmt
);
326 vect_ptr_init
= make_ssa_name (vect_ptr
, vec_stmt
);
327 TREE_OPERAND (vec_stmt
, 0) = vect_ptr_init
;
328 new_bb
= bsi_insert_on_edge_immediate (pe
, vec_stmt
);
329 gcc_assert (!new_bb
);
332 /** (4) Handle the updating of the vector-pointer inside the loop: **/
334 if (only_init
) /* No update in loop is required. */
336 /* Copy the points-to information if it exists. */
337 if (DR_PTR_INFO (dr
))
338 duplicate_ssa_name_ptr_info (vect_ptr_init
, DR_PTR_INFO (dr
));
339 return vect_ptr_init
;
343 block_stmt_iterator incr_bsi
;
345 tree indx_before_incr
, indx_after_incr
;
348 standard_iv_increment_position (loop
, &incr_bsi
, &insert_after
);
349 create_iv (vect_ptr_init
,
350 fold_convert (vect_ptr_type
, TYPE_SIZE_UNIT (vectype
)),
351 NULL_TREE
, loop
, &incr_bsi
, insert_after
,
352 &indx_before_incr
, &indx_after_incr
);
353 incr
= bsi_stmt (incr_bsi
);
354 set_stmt_info ((tree_ann_t
)stmt_ann (incr
),
355 new_stmt_vec_info (incr
, loop_vinfo
));
357 /* Copy the points-to information if it exists. */
358 if (DR_PTR_INFO (dr
))
360 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
361 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
363 merge_alias_info (vect_ptr_init
, indx_before_incr
);
364 merge_alias_info (vect_ptr_init
, indx_after_incr
);
366 return indx_before_incr
;
371 /* Function vect_create_destination_var.
373 Create a new temporary of type VECTYPE. */
376 vect_create_destination_var (tree scalar_dest
, tree vectype
)
379 const char *new_name
;
381 enum vect_var_kind kind
;
383 kind
= vectype
? vect_simple_var
: vect_scalar_var
;
384 type
= vectype
? vectype
: TREE_TYPE (scalar_dest
);
386 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
388 new_name
= get_name (scalar_dest
);
391 vec_dest
= vect_get_new_vect_var (type
, vect_simple_var
, new_name
);
392 add_referenced_tmp_var (vec_dest
);
398 /* Function vect_init_vector.
400 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
401 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
402 used in the vectorization of STMT. */
405 vect_init_vector (tree stmt
, tree vector_var
)
407 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
408 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
409 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
412 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
418 new_var
= vect_get_new_vect_var (vectype
, vect_simple_var
, "cst_");
419 add_referenced_tmp_var (new_var
);
421 init_stmt
= build2 (MODIFY_EXPR
, vectype
, new_var
, vector_var
);
422 new_temp
= make_ssa_name (new_var
, init_stmt
);
423 TREE_OPERAND (init_stmt
, 0) = new_temp
;
425 pe
= loop_preheader_edge (loop
);
426 new_bb
= bsi_insert_on_edge_immediate (pe
, init_stmt
);
427 gcc_assert (!new_bb
);
429 if (vect_print_dump_info (REPORT_DETAILS
))
431 fprintf (vect_dump
, "created new init_stmt: ");
432 print_generic_expr (vect_dump
, init_stmt
, TDF_SLIM
);
435 vec_oprnd
= TREE_OPERAND (init_stmt
, 0);
440 /* Function vect_get_vec_def_for_operand.
442 OP is an operand in STMT. This function returns a (vector) def that will be
443 used in the vectorized stmt for STMT.
445 In the case that OP is an SSA_NAME which is defined in the loop, then
446 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
448 In case OP is an invariant or constant, a new stmt that creates a vector def
449 needs to be introduced. */
452 vect_get_vec_def_for_operand (tree op
, tree stmt
, tree
*scalar_def
)
457 stmt_vec_info def_stmt_info
= NULL
;
458 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
459 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
460 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
461 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
462 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
468 enum vect_def_type dt
;
471 if (vect_print_dump_info (REPORT_DETAILS
))
473 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
474 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
477 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
478 gcc_assert (is_simple_use
);
479 if (vect_print_dump_info (REPORT_DETAILS
))
483 fprintf (vect_dump
, "def = ");
484 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
488 fprintf (vect_dump
, " def_stmt = ");
489 print_generic_expr (vect_dump
, def_stmt
, TDF_SLIM
);
495 /* Case 1: operand is a constant. */
496 case vect_constant_def
:
501 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
502 if (vect_print_dump_info (REPORT_DETAILS
))
503 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
505 for (i
= nunits
- 1; i
>= 0; --i
)
507 t
= tree_cons (NULL_TREE
, op
, t
);
509 vec_cst
= build_vector (vectype
, t
);
510 return vect_init_vector (stmt
, vec_cst
);
513 /* Case 2: operand is defined outside the loop - loop invariant. */
514 case vect_invariant_def
:
519 /* Create 'vec_inv = {inv,inv,..,inv}' */
520 if (vect_print_dump_info (REPORT_DETAILS
))
521 fprintf (vect_dump
, "Create vector_inv.");
523 for (i
= nunits
- 1; i
>= 0; --i
)
525 t
= tree_cons (NULL_TREE
, def
, t
);
528 /* FIXME: use build_constructor directly. */
529 vec_inv
= build_constructor_from_list (vectype
, t
);
530 return vect_init_vector (stmt
, vec_inv
);
533 /* Case 3: operand is defined inside the loop. */
537 *scalar_def
= def_stmt
;
539 /* Get the def from the vectorized stmt. */
540 def_stmt_info
= vinfo_for_stmt (def_stmt
);
541 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
542 gcc_assert (vec_stmt
);
543 vec_oprnd
= TREE_OPERAND (vec_stmt
, 0);
547 /* Case 4: operand is defined by a loop header phi - reduction */
548 case vect_reduction_def
:
550 gcc_assert (TREE_CODE (def_stmt
) == PHI_NODE
);
552 /* Get the def before the loop */
553 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
554 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
557 /* Case 5: operand is defined by loop-header phi - induction. */
558 case vect_induction_def
:
560 if (vect_print_dump_info (REPORT_DETAILS
))
561 fprintf (vect_dump
, "induction - unsupported.");
562 internal_error ("no support for induction"); /* FORNOW */
571 /* Function vect_finish_stmt_generation.
573 Insert a new stmt. */
576 vect_finish_stmt_generation (tree stmt
, tree vec_stmt
, block_stmt_iterator
*bsi
)
578 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
580 if (vect_print_dump_info (REPORT_DETAILS
))
582 fprintf (vect_dump
, "add new stmt: ");
583 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
586 /* Make sure bsi points to the stmt that is being vectorized. */
587 gcc_assert (stmt
== bsi_stmt (*bsi
));
589 #ifdef USE_MAPPED_LOCATION
590 SET_EXPR_LOCATION (vec_stmt
, EXPR_LOCATION (stmt
));
592 SET_EXPR_LOCUS (vec_stmt
, EXPR_LOCUS (stmt
));
597 #define ADJUST_IN_EPILOG 1
599 /* Function get_initial_def_for_reduction
602 STMT - a stmt that performs a reduction operation in the loop.
603 INIT_VAL - the initial value of the reduction variable
606 SCALAR_DEF - a tree that holds a value to be added to the final result
607 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
608 Return a vector variable, initialized according to the operation that STMT
609 performs. This vector will be used as the initial value of the
610 vector of partial results.
612 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
615 min/max: [init_val,init_val,..,init_val,init_val]
616 bit and/or: [init_val,init_val,..,init_val,init_val]
617 and when necessary (e.g. add/mult case) let the caller know
618 that it needs to adjust the result by init_val.
620 Option2: Initialize the vector as follows:
621 add: [0,0,...,0,init_val]
622 mult: [1,1,...,1,init_val]
623 min/max: [init_val,init_val,...,init_val]
624 bit and/or: [init_val,init_val,...,init_val]
625 and no adjustments are needed.
627 For example, for the following code:
633 STMT is 's = s + a[i]', and the reduction variable is 's'.
634 For a vector of 4 units, we want to return either [0,0,0,init_val],
635 or [0,0,0,0] and let the caller know that it needs to adjust
636 the result at the end by 'init_val'.
638 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
639 TODO: Use some cost-model to estimate which scheme is more profitable.
643 get_initial_def_for_reduction (tree stmt
, tree init_val
, tree
*scalar_def
)
645 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
646 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
647 int nunits
= GET_MODE_NUNITS (TYPE_MODE (vectype
));
649 enum tree_code code
= TREE_CODE (TREE_OPERAND (stmt
, 1));
650 tree type
= TREE_TYPE (init_val
);
652 tree vec
, t
= NULL_TREE
;
653 bool need_epilog_adjust
;
656 gcc_assert (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
));
663 if (INTEGRAL_TYPE_P (type
))
664 def
= build_int_cst (type
, 0);
666 def
= build_real (type
, dconst0
);
668 #ifdef ADJUST_IN_EPILOG
669 /* All the 'nunits' elements are set to 0. The final result will be
670 adjusted by 'init_val' at the loop epilog. */
672 need_epilog_adjust
= true;
674 /* 'nunits - 1' elements are set to 0; The last element is set to
675 'init_val'. No further adjustments at the epilog are needed. */
676 nelements
= nunits
- 1;
677 need_epilog_adjust
= false;
685 need_epilog_adjust
= false;
692 for (i
= nelements
- 1; i
>= 0; --i
)
693 t
= tree_cons (NULL_TREE
, def
, t
);
695 if (nelements
== nunits
- 1)
697 /* Set the last element of the vector. */
698 t
= tree_cons (NULL_TREE
, init_val
, t
);
701 gcc_assert (nelements
== nunits
);
703 if (TREE_CODE (init_val
) == INTEGER_CST
|| TREE_CODE (init_val
) == REAL_CST
)
704 vec
= build_vector (vectype
, t
);
706 vec
= build_constructor_from_list (vectype
, t
);
708 if (!need_epilog_adjust
)
709 *scalar_def
= NULL_TREE
;
711 *scalar_def
= init_val
;
713 return vect_init_vector (stmt
, vec
);
717 /* Function vect_create_epilog_for_reduction
719 Create code at the loop-epilog to finalize the result of a reduction
722 VECT_DEF is a vector of partial results.
723 REDUC_CODE is the tree-code for the epilog reduction.
724 STMT is the scalar reduction stmt that is being vectorized.
725 REDUCTION_PHI is the phi-node that carries the reduction computation.
728 1. Creates the reduction def-use cycle: sets the the arguments for
730 The loop-entry argument is the vectorized initial-value of the reduction.
731 The loop-latch argument is VECT_DEF - the vector of partial sums.
732 2. "Reduces" the vector of partial results VECT_DEF into a single result,
733 by applying the operation specified by REDUC_CODE if available, or by
734 other means (whole-vector shifts or a scalar loop).
735 The function also creates a new phi node at the loop exit to preserve
736 loop-closed form, as illustrated below.
738 The flow at the entry to this function:
741 vec_def = phi <null, null> # REDUCTION_PHI
742 VECT_DEF = vector_stmt # vectorized form of STMT
743 s_loop = scalar_stmt # (scalar) STMT
745 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
749 The above is transformed by this function into:
752 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
753 VECT_DEF = vector_stmt # vectorized form of STMT
754 s_loop = scalar_stmt # (scalar) STMT
756 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
757 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
758 v_out2 = reduce <v_out1>
759 s_out3 = extract_field <v_out2, 0>
760 s_out4 = adjust_result <s_out3>
766 vect_create_epilog_for_reduction (tree vect_def
, tree stmt
,
767 enum tree_code reduc_code
, tree reduction_phi
)
769 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
771 enum machine_mode mode
;
772 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
773 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
778 block_stmt_iterator exit_bsi
;
783 tree new_scalar_dest
, exit_phi
;
784 tree bitsize
, bitpos
, bytesize
;
785 enum tree_code code
= TREE_CODE (TREE_OPERAND (stmt
, 1));
786 tree scalar_initial_def
;
787 tree vec_initial_def
;
789 imm_use_iterator imm_iter
;
791 bool extract_scalar_result
;
794 tree operation
= TREE_OPERAND (stmt
, 1);
797 op_type
= TREE_CODE_LENGTH (TREE_CODE (operation
));
798 reduction_op
= TREE_OPERAND (operation
, op_type
-1);
799 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
800 mode
= TYPE_MODE (vectype
);
802 /*** 1. Create the reduction def-use cycle ***/
804 /* 1.1 set the loop-entry arg of the reduction-phi: */
805 /* For the case of reduction, vect_get_vec_def_for_operand returns
806 the scalar def before the loop, that defines the initial value
807 of the reduction variable. */
808 vec_initial_def
= vect_get_vec_def_for_operand (reduction_op
, stmt
,
809 &scalar_initial_def
);
810 add_phi_arg (reduction_phi
, vec_initial_def
, loop_preheader_edge (loop
));
812 /* 1.2 set the loop-latch arg for the reduction-phi: */
813 add_phi_arg (reduction_phi
, vect_def
, loop_latch_edge (loop
));
815 if (vect_print_dump_info (REPORT_DETAILS
))
817 fprintf (vect_dump
, "transform reduction: created def-use cycle:");
818 print_generic_expr (vect_dump
, reduction_phi
, TDF_SLIM
);
819 fprintf (vect_dump
, "\n");
820 print_generic_expr (vect_dump
, SSA_NAME_DEF_STMT (vect_def
), TDF_SLIM
);
824 /*** 2. Create epilog code
825 The reduction epilog code operates across the elements of the vector
826 of partial results computed by the vectorized loop.
827 The reduction epilog code consists of:
828 step 1: compute the scalar result in a vector (v_out2)
829 step 2: extract the scalar result (s_out3) from the vector (v_out2)
830 step 3: adjust the scalar result (s_out3) if needed.
832 Step 1 can be accomplished using one the following three schemes:
833 (scheme 1) using reduc_code, if available.
834 (scheme 2) using whole-vector shifts, if available.
835 (scheme 3) using a scalar loop. In this case steps 1+2 above are
838 The overall epilog code looks like this:
840 s_out0 = phi <s_loop> # original EXIT_PHI
841 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
842 v_out2 = reduce <v_out1> # step 1
843 s_out3 = extract_field <v_out2, 0> # step 2
844 s_out4 = adjust_result <s_out3> # step 3
846 (step 3 is optional, and step2 1 and 2 may be combined).
847 Lastly, the uses of s_out0 are replaced by s_out4.
851 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
852 v_out1 = phi <v_loop> */
854 exit_bb
= loop
->single_exit
->dest
;
855 new_phi
= create_phi_node (SSA_NAME_VAR (vect_def
), exit_bb
);
856 SET_PHI_ARG_DEF (new_phi
, loop
->single_exit
->dest_idx
, vect_def
);
857 exit_bsi
= bsi_start (exit_bb
);
859 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
860 (i.e. when reduc_code is not available) and in the final adjustment code
861 (if needed). Also get the original scalar reduction variable as
862 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
863 represents a reduction pattern), the tree-code and scalar-def are
864 taken from the original stmt that the pattern-stmt (STMT) replaces.
865 Otherwise (it is a regular reduction) - the tree-code and scalar-def
866 are taken from STMT. */
868 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
871 /* Regular reduction */
876 /* Reduction pattern */
877 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt
);
878 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
));
879 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
881 code
= TREE_CODE (TREE_OPERAND (orig_stmt
, 1));
882 scalar_dest
= TREE_OPERAND (orig_stmt
, 0);
883 scalar_type
= TREE_TYPE (scalar_dest
);
884 new_scalar_dest
= vect_create_destination_var (scalar_dest
, NULL
);
885 bitsize
= TYPE_SIZE (scalar_type
);
886 bytesize
= TYPE_SIZE_UNIT (scalar_type
);
888 /* 2.3 Create the reduction code, using one of the three schemes described
891 if (reduc_code
< NUM_TREE_CODES
)
894 v_out2 = reduc_expr <v_out1> */
896 if (vect_print_dump_info (REPORT_DETAILS
))
897 fprintf (vect_dump
, "Reduce using direct vector reduction.");
899 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
900 epilog_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
901 build1 (reduc_code
, vectype
, PHI_RESULT (new_phi
)));
902 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
903 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
904 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
906 extract_scalar_result
= true;
910 enum tree_code shift_code
= 0;
911 bool have_whole_vector_shift
= true;
913 int element_bitsize
= tree_low_cst (bitsize
, 1);
914 int vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
917 if (vec_shr_optab
->handlers
[mode
].insn_code
!= CODE_FOR_nothing
)
918 shift_code
= VEC_RSHIFT_EXPR
;
920 have_whole_vector_shift
= false;
922 /* Regardless of whether we have a whole vector shift, if we're
923 emulating the operation via tree-vect-generic, we don't want
924 to use it. Only the first round of the reduction is likely
925 to still be profitable via emulation. */
926 /* ??? It might be better to emit a reduction tree code here, so that
927 tree-vect-generic can expand the first round via bit tricks. */
928 if (!VECTOR_MODE_P (mode
))
929 have_whole_vector_shift
= false;
932 optab optab
= optab_for_tree_code (code
, vectype
);
933 if (optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
934 have_whole_vector_shift
= false;
937 if (have_whole_vector_shift
)
940 for (offset = VS/2; offset >= element_size; offset/=2)
942 Create: va' = vec_shift <va, offset>
943 Create: va = vop <va, va'>
946 if (vect_print_dump_info (REPORT_DETAILS
))
947 fprintf (vect_dump
, "Reduce using vector shifts");
949 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
950 new_temp
= PHI_RESULT (new_phi
);
952 for (bit_offset
= vec_size_in_bits
/2;
953 bit_offset
>= element_bitsize
;
956 tree bitpos
= size_int (bit_offset
);
958 epilog_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
959 build2 (shift_code
, vectype
, new_temp
, bitpos
));
960 new_name
= make_ssa_name (vec_dest
, epilog_stmt
);
961 TREE_OPERAND (epilog_stmt
, 0) = new_name
;
962 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
964 epilog_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
965 build2 (code
, vectype
, new_name
, new_temp
));
966 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
967 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
968 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
971 extract_scalar_result
= true;
978 s = extract_field <v_out2, 0>
979 for (offset = element_size;
980 offset < vector_size;
981 offset += element_size;)
983 Create: s' = extract_field <v_out2, offset>
984 Create: s = op <s, s'>
987 if (vect_print_dump_info (REPORT_DETAILS
))
988 fprintf (vect_dump
, "Reduce using scalar code. ");
990 vec_temp
= PHI_RESULT (new_phi
);
991 vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
992 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
994 BIT_FIELD_REF_UNSIGNED (rhs
) = TYPE_UNSIGNED (scalar_type
);
995 epilog_stmt
= build2 (MODIFY_EXPR
, scalar_type
, new_scalar_dest
, rhs
);
996 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
997 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
998 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1000 for (bit_offset
= element_bitsize
;
1001 bit_offset
< vec_size_in_bits
;
1002 bit_offset
+= element_bitsize
)
1004 tree bitpos
= bitsize_int (bit_offset
);
1005 tree rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
1008 BIT_FIELD_REF_UNSIGNED (rhs
) = TYPE_UNSIGNED (scalar_type
);
1009 epilog_stmt
= build2 (MODIFY_EXPR
, scalar_type
, new_scalar_dest
,
1011 new_name
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1012 TREE_OPERAND (epilog_stmt
, 0) = new_name
;
1013 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1015 epilog_stmt
= build2 (MODIFY_EXPR
, scalar_type
, new_scalar_dest
,
1016 build2 (code
, scalar_type
, new_name
, new_temp
));
1017 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1018 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
1019 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1022 extract_scalar_result
= false;
1026 /* 2.4 Extract the final scalar result. Create:
1027 s_out3 = extract_field <v_out2, bitpos> */
1029 if (extract_scalar_result
)
1033 if (vect_print_dump_info (REPORT_DETAILS
))
1034 fprintf (vect_dump
, "extract scalar result");
1036 if (BYTES_BIG_ENDIAN
)
1037 bitpos
= size_binop (MULT_EXPR
,
1038 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1),
1039 TYPE_SIZE (scalar_type
));
1041 bitpos
= bitsize_zero_node
;
1043 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, new_temp
, bitsize
, bitpos
);
1044 BIT_FIELD_REF_UNSIGNED (rhs
) = TYPE_UNSIGNED (scalar_type
);
1045 epilog_stmt
= build2 (MODIFY_EXPR
, scalar_type
, new_scalar_dest
, rhs
);
1046 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1047 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
1048 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1051 /* 2.4 Adjust the final result by the initial value of the reduction
1052 variable. (When such adjustment is not needed, then
1053 'scalar_initial_def' is zero).
1056 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1058 if (scalar_initial_def
)
1060 epilog_stmt
= build2 (MODIFY_EXPR
, scalar_type
, new_scalar_dest
,
1061 build2 (code
, scalar_type
, new_temp
, scalar_initial_def
));
1062 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1063 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
1064 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1067 /* 2.6 Replace uses of s_out0 with uses of s_out3 */
1069 /* Find the loop-closed-use at the loop exit of the original scalar result.
1070 (The reduction result is expected to have two immediate uses - one at the
1071 latch block, and one at the loop exit). */
1073 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
1075 if (!flow_bb_inside_loop_p (loop
, bb_for_stmt (USE_STMT (use_p
))))
1077 exit_phi
= USE_STMT (use_p
);
1081 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1082 gcc_assert (exit_phi
);
1083 /* Replace the uses: */
1084 orig_name
= PHI_RESULT (exit_phi
);
1085 FOR_EACH_IMM_USE_SAFE (use_p
, imm_iter
, orig_name
)
1086 SET_USE (use_p
, new_temp
);
1090 /* Function vectorizable_reduction.
1092 Check if STMT performs a reduction operation that can be vectorized.
1093 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1094 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1095 Return FALSE if not a vectorizable STMT, TRUE otherwise.
1097 This function also handles reduction idioms (patterns) that have been
1098 recognized in advance during vect_pattern_recog. In this case, STMT may be
1100 X = pattern_expr (arg0, arg1, ..., X)
1101 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1102 sequence that had been detected and replaced by the pattern-stmt (STMT).
1104 In some cases of reduction patterns, the type of the reduction variable X is
1105 different than the type of the other arguments of STMT.
1106 In such cases, the vectype that is used when transforming STMT into a vector
1107 stmt is different than the vectype that is used to determine the
1108 vectorization factor, because it consists of a different number of elements
1109 than the actual number of elements that are being operated upon in parallel.
1111 For example, consider an accumulation of shorts into an int accumulator.
1112 On some targets it's possible to vectorize this pattern operating on 8
1113 shorts at a time (hence, the vectype for purposes of determining the
1114 vectorization factor should be V8HI); on the other hand, the vectype that
1115 is used to create the vector form is actually V4SI (the type of the result).
1117 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1118 indicates what is the actual level of parallelism (V8HI in the example), so
1119 that the right vectorization factor would be derived. This vectype
1120 corresponds to the type of arguments to the reduction stmt, and should *NOT*
1121 be used to create the vectorized stmt. The right vectype for the vectorized
1122 stmt is obtained from the type of the result X:
1123 get_vectype_for_scalar_type (TREE_TYPE (X))
1125 This means that, contrary to "regular" reductions (or "regular" stmts in
1126 general), the following equation:
1127 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1128 does *NOT* necessarily hold for reduction patterns. */
1131 vectorizable_reduction (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1136 tree loop_vec_def0
, loop_vec_def1
;
1137 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1138 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1139 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1140 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1142 enum tree_code code
, orig_code
, epilog_reduc_code
= 0;
1143 enum machine_mode vec_mode
;
1145 optab optab
, reduc_optab
;
1148 enum vect_def_type dt
;
1153 stmt_vec_info orig_stmt_info
;
1154 tree expr
= NULL_TREE
;
1157 /* 1. Is vectorizable reduction? */
1159 /* Not supportable if the reduction variable is used in the loop. */
1160 if (STMT_VINFO_RELEVANT_P (stmt_info
))
1163 if (!STMT_VINFO_LIVE_P (stmt_info
))
1166 /* Make sure it was already recognized as a reduction computation. */
1167 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
1170 /* 2. Has this been recognized as a reduction pattern?
1172 Check if STMT represents a pattern that has been recognized
1173 in earlier analysis stages. For stmts that represent a pattern,
1174 the STMT_VINFO_RELATED_STMT field records the last stmt in
1175 the original sequence that constitutes the pattern. */
1177 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
1180 orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
1181 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
) == stmt
);
1182 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info
));
1183 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info
));
1186 /* 3. Check the operands of the operation. The first operands are defined
1187 inside the loop body. The last operand is the reduction variable,
1188 which is defined by the loop-header-phi. */
1190 gcc_assert (TREE_CODE (stmt
) == MODIFY_EXPR
);
1192 operation
= TREE_OPERAND (stmt
, 1);
1193 code
= TREE_CODE (operation
);
1194 op_type
= TREE_CODE_LENGTH (code
);
1196 if (op_type
!= binary_op
&& op_type
!= ternary_op
)
1198 scalar_dest
= TREE_OPERAND (stmt
, 0);
1199 scalar_type
= TREE_TYPE (scalar_dest
);
1201 /* All uses but the last are expected to be defined in the loop.
1202 The last use is the reduction variable. */
1203 for (i
= 0; i
< op_type
-1; i
++)
1205 op
= TREE_OPERAND (operation
, i
);
1206 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
1207 gcc_assert (is_simple_use
);
1208 gcc_assert (dt
== vect_loop_def
|| dt
== vect_invariant_def
||
1209 dt
== vect_constant_def
);
1212 op
= TREE_OPERAND (operation
, i
);
1213 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
1214 gcc_assert (is_simple_use
);
1215 gcc_assert (dt
== vect_reduction_def
);
1216 gcc_assert (TREE_CODE (def_stmt
) == PHI_NODE
);
1218 gcc_assert (orig_stmt
== vect_is_simple_reduction (loop
, def_stmt
));
1220 gcc_assert (stmt
== vect_is_simple_reduction (loop
, def_stmt
));
1222 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
1225 /* 4. Supportable by target? */
1227 /* 4.1. check support for the operation in the loop */
1228 optab
= optab_for_tree_code (code
, vectype
);
1231 if (vect_print_dump_info (REPORT_DETAILS
))
1232 fprintf (vect_dump
, "no optab.");
1235 vec_mode
= TYPE_MODE (vectype
);
1236 if (optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
1238 if (vect_print_dump_info (REPORT_DETAILS
))
1239 fprintf (vect_dump
, "op not supported by target.");
1240 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
1241 || LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1242 < vect_min_worthwhile_factor (code
))
1244 if (vect_print_dump_info (REPORT_DETAILS
))
1245 fprintf (vect_dump
, "proceeding using word mode.");
1248 /* Worthwhile without SIMD support? */
1249 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1250 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1251 < vect_min_worthwhile_factor (code
))
1253 if (vect_print_dump_info (REPORT_DETAILS
))
1254 fprintf (vect_dump
, "not worthwhile without SIMD support.");
1258 /* 4.2. Check support for the epilog operation.
1260 If STMT represents a reduction pattern, then the type of the
1261 reduction variable may be different than the type of the rest
1262 of the arguments. For example, consider the case of accumulation
1263 of shorts into an int accumulator; The original code:
1264 S1: int_a = (int) short_a;
1265 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
1268 STMT: int_acc = widen_sum <short_a, int_acc>
1271 1. The tree-code that is used to create the vector operation in the
1272 epilog code (that reduces the partial results) is not the
1273 tree-code of STMT, but is rather the tree-code of the original
1274 stmt from the pattern that STMT is replacing. I.e, in the example
1275 above we want to use 'widen_sum' in the loop, but 'plus' in the
1277 2. The type (mode) we use to check available target support
1278 for the vector operation to be created in the *epilog*, is
1279 determined by the type of the reduction variable (in the example
1280 above we'd check this: plus_optab[vect_int_mode]).
1281 However the type (mode) we use to check available target support
1282 for the vector operation to be created *inside the loop*, is
1283 determined by the type of the other arguments to STMT (in the
1284 example we'd check this: widen_sum_optab[vect_short_mode]).
1286 This is contrary to "regular" reductions, in which the types of all
1287 the arguments are the same as the type of the reduction variable.
1288 For "regular" reductions we can therefore use the same vector type
1289 (and also the same tree-code) when generating the epilog code and
1290 when generating the code inside the loop. */
1294 /* This is a reduction pattern: get the vectype from the type of the
1295 reduction variable, and get the tree-code from orig_stmt. */
1296 orig_code
= TREE_CODE (TREE_OPERAND (orig_stmt
, 1));
1297 vectype
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1298 vec_mode
= TYPE_MODE (vectype
);
1302 /* Regular reduction: use the same vectype and tree-code as used for
1303 the vector code inside the loop can be used for the epilog code. */
1307 if (!reduction_code_for_scalar_code (orig_code
, &epilog_reduc_code
))
1309 reduc_optab
= optab_for_tree_code (epilog_reduc_code
, vectype
);
1312 if (vect_print_dump_info (REPORT_DETAILS
))
1313 fprintf (vect_dump
, "no optab for reduction.");
1314 epilog_reduc_code
= NUM_TREE_CODES
;
1316 if (reduc_optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
1318 if (vect_print_dump_info (REPORT_DETAILS
))
1319 fprintf (vect_dump
, "reduc op not supported by target.");
1320 epilog_reduc_code
= NUM_TREE_CODES
;
1323 if (!vec_stmt
) /* transformation not required. */
1325 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
1331 if (vect_print_dump_info (REPORT_DETAILS
))
1332 fprintf (vect_dump
, "transform reduction.");
1334 /* Create the destination vector */
1335 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1337 /* Create the reduction-phi that defines the reduction-operand. */
1338 new_phi
= create_phi_node (vec_dest
, loop
->header
);
1340 /* Prepare the operand that is defined inside the loop body */
1341 op
= TREE_OPERAND (operation
, 0);
1342 loop_vec_def0
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1343 if (op_type
== binary_op
)
1344 expr
= build2 (code
, vectype
, loop_vec_def0
, PHI_RESULT (new_phi
));
1345 else if (op_type
== ternary_op
)
1347 op
= TREE_OPERAND (operation
, 1);
1348 loop_vec_def1
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1349 expr
= build3 (code
, vectype
, loop_vec_def0
, loop_vec_def1
,
1350 PHI_RESULT (new_phi
));
1353 /* Create the vectorized operation that computes the partial results */
1354 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, expr
);
1355 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
1356 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
1357 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1359 /* Finalize the reduction-phi (set it's arguments) and create the
1360 epilog reduction code. */
1361 vect_create_epilog_for_reduction (new_temp
, stmt
, epilog_reduc_code
, new_phi
);
1366 /* Function vectorizable_assignment.
1368 Check if STMT performs an assignment (copy) that can be vectorized.
1369 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1370 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1371 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1374 vectorizable_assignment (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1380 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1381 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1382 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1385 enum vect_def_type dt
;
1387 /* Is vectorizable assignment? */
1388 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1391 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
1393 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1396 scalar_dest
= TREE_OPERAND (stmt
, 0);
1397 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
1400 op
= TREE_OPERAND (stmt
, 1);
1401 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
1403 if (vect_print_dump_info (REPORT_DETAILS
))
1404 fprintf (vect_dump
, "use not simple.");
1408 if (!vec_stmt
) /* transformation not required. */
1410 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
1415 if (vect_print_dump_info (REPORT_DETAILS
))
1416 fprintf (vect_dump
, "transform assignment.");
1419 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1422 op
= TREE_OPERAND (stmt
, 1);
1423 vec_oprnd
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1425 /* Arguments are ready. create the new vector stmt. */
1426 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, vec_oprnd
);
1427 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
1428 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
1429 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1435 /* Function vect_min_worthwhile_factor.
1437 For a loop where we could vectorize the operation indicated by CODE,
1438 return the minimum vectorization factor that makes it worthwhile
1439 to use generic vectors. */
1441 vect_min_worthwhile_factor (enum tree_code code
)
1462 /* Function vectorizable_operation.
1464 Check if STMT performs a binary or unary operation that can be vectorized.
1465 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1466 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1467 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1470 vectorizable_operation (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1475 tree op0
, op1
= NULL
;
1476 tree vec_oprnd0
, vec_oprnd1
=NULL
;
1477 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1478 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1479 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1481 enum tree_code code
;
1482 enum machine_mode vec_mode
;
1488 enum machine_mode optab_op2_mode
;
1490 enum vect_def_type dt
;
1492 /* Is STMT a vectorizable binary/unary operation? */
1493 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1496 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
1498 if (STMT_VINFO_LIVE_P (stmt_info
))
1500 /* FORNOW: not yet supported. */
1501 if (vect_print_dump_info (REPORT_DETAILS
))
1502 fprintf (vect_dump
, "value used after loop.");
1506 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1509 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) != SSA_NAME
)
1512 operation
= TREE_OPERAND (stmt
, 1);
1513 code
= TREE_CODE (operation
);
1514 optab
= optab_for_tree_code (code
, vectype
);
1516 /* Support only unary or binary operations. */
1517 op_type
= TREE_CODE_LENGTH (code
);
1518 if (op_type
!= unary_op
&& op_type
!= binary_op
)
1520 if (vect_print_dump_info (REPORT_DETAILS
))
1521 fprintf (vect_dump
, "num. args = %d (not unary/binary op).", op_type
);
1525 for (i
= 0; i
< op_type
; i
++)
1527 op
= TREE_OPERAND (operation
, i
);
1528 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
1530 if (vect_print_dump_info (REPORT_DETAILS
))
1531 fprintf (vect_dump
, "use not simple.");
1536 /* Supportable by target? */
1539 if (vect_print_dump_info (REPORT_DETAILS
))
1540 fprintf (vect_dump
, "no optab.");
1543 vec_mode
= TYPE_MODE (vectype
);
1544 icode
= (int) optab
->handlers
[(int) vec_mode
].insn_code
;
1545 if (icode
== CODE_FOR_nothing
)
1547 if (vect_print_dump_info (REPORT_DETAILS
))
1548 fprintf (vect_dump
, "op not supported by target.");
1549 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
1550 || LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1551 < vect_min_worthwhile_factor (code
))
1553 if (vect_print_dump_info (REPORT_DETAILS
))
1554 fprintf (vect_dump
, "proceeding using word mode.");
1557 /* Worthwhile without SIMD support? */
1558 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1559 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1560 < vect_min_worthwhile_factor (code
))
1562 if (vect_print_dump_info (REPORT_DETAILS
))
1563 fprintf (vect_dump
, "not worthwhile without SIMD support.");
1567 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
)
1569 /* FORNOW: not yet supported. */
1570 if (!VECTOR_MODE_P (vec_mode
))
1573 /* Invariant argument is needed for a vector shift
1574 by a scalar shift operand. */
1575 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
1576 if (! (VECTOR_MODE_P (optab_op2_mode
)
1577 || dt
== vect_constant_def
1578 || dt
== vect_invariant_def
))
1580 if (vect_print_dump_info (REPORT_DETAILS
))
1581 fprintf (vect_dump
, "operand mode requires invariant argument.");
1586 if (!vec_stmt
) /* transformation not required. */
1588 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
1594 if (vect_print_dump_info (REPORT_DETAILS
))
1595 fprintf (vect_dump
, "transform binary/unary operation.");
1598 scalar_dest
= TREE_OPERAND (stmt
, 0);
1599 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1602 op0
= TREE_OPERAND (operation
, 0);
1603 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1605 if (op_type
== binary_op
)
1607 op1
= TREE_OPERAND (operation
, 1);
1609 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
)
1611 /* Vector shl and shr insn patterns can be defined with
1612 scalar operand 2 (shift operand). In this case, use
1613 constant or loop invariant op1 directly, without
1614 extending it to vector mode first. */
1616 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
1617 if (!VECTOR_MODE_P (optab_op2_mode
))
1619 if (vect_print_dump_info (REPORT_DETAILS
))
1620 fprintf (vect_dump
, "operand 1 using scalar mode.");
1626 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1629 /* Arguments are ready. create the new vector stmt. */
1631 if (op_type
== binary_op
)
1632 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
1633 build2 (code
, vectype
, vec_oprnd0
, vec_oprnd1
));
1635 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
1636 build1 (code
, vectype
, vec_oprnd0
));
1637 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
1638 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
1639 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1645 /* Function vectorizable_store.
1647 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1649 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1650 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1651 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1654 vectorizable_store (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1660 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1661 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1662 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1663 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1664 enum machine_mode vec_mode
;
1666 enum dr_alignment_support alignment_support_cheme
;
1669 enum vect_def_type dt
;
1671 /* Is vectorizable store? */
1673 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1676 scalar_dest
= TREE_OPERAND (stmt
, 0);
1677 if (TREE_CODE (scalar_dest
) != ARRAY_REF
1678 && TREE_CODE (scalar_dest
) != INDIRECT_REF
)
1681 op
= TREE_OPERAND (stmt
, 1);
1682 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
1684 if (vect_print_dump_info (REPORT_DETAILS
))
1685 fprintf (vect_dump
, "use not simple.");
1689 vec_mode
= TYPE_MODE (vectype
);
1690 /* FORNOW. In some cases can vectorize even if data-type not supported
1691 (e.g. - array initialization with 0). */
1692 if (mov_optab
->handlers
[(int)vec_mode
].insn_code
== CODE_FOR_nothing
)
1695 if (!STMT_VINFO_DATA_REF (stmt_info
))
1699 if (!vec_stmt
) /* transformation not required. */
1701 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
1707 if (vect_print_dump_info (REPORT_DETAILS
))
1708 fprintf (vect_dump
, "transform store");
1710 alignment_support_cheme
= vect_supportable_dr_alignment (dr
);
1711 gcc_assert (alignment_support_cheme
);
1712 gcc_assert (alignment_support_cheme
== dr_aligned
); /* FORNOW */
1714 /* Handle use - get the vectorized def from the defining stmt. */
1715 vec_oprnd1
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1718 /* FORNOW: make sure the data reference is aligned. */
1719 vect_align_data_ref (stmt
);
1720 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
, &dummy
, false);
1721 data_ref
= build_fold_indirect_ref (data_ref
);
1723 /* Arguments are ready. create the new vector stmt. */
1724 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, data_ref
, vec_oprnd1
);
1725 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1727 /* Copy the V_MAY_DEFS representing the aliasing of the original array
1728 element's definition to the vector's definition then update the
1729 defining statement. The original is being deleted so the same
1730 SSA_NAMEs can be used. */
1731 copy_virtual_operands (*vec_stmt
, stmt
);
1733 FOR_EACH_SSA_TREE_OPERAND (def
, stmt
, iter
, SSA_OP_VMAYDEF
)
1735 SSA_NAME_DEF_STMT (def
) = *vec_stmt
;
1737 /* If this virtual def has a use outside the loop and a loop peel is
1738 performed then the def may be renamed by the peel. Mark it for
1739 renaming so the later use will also be renamed. */
1740 mark_sym_for_renaming (SSA_NAME_VAR (def
));
1747 /* vectorizable_load.
1749 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1751 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1752 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1753 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1756 vectorizable_load (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1759 tree vec_dest
= NULL
;
1760 tree data_ref
= NULL
;
1762 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1763 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1764 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1771 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1772 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1773 edge pe
= loop_preheader_edge (loop
);
1774 enum dr_alignment_support alignment_support_cheme
;
1776 /* Is vectorizable load? */
1777 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1780 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
1782 if (STMT_VINFO_LIVE_P (stmt_info
))
1784 /* FORNOW: not yet supported. */
1785 if (vect_print_dump_info (REPORT_DETAILS
))
1786 fprintf (vect_dump
, "value used after loop.");
1790 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1793 scalar_dest
= TREE_OPERAND (stmt
, 0);
1794 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
1797 op
= TREE_OPERAND (stmt
, 1);
1798 if (TREE_CODE (op
) != ARRAY_REF
&& TREE_CODE (op
) != INDIRECT_REF
)
1801 if (!STMT_VINFO_DATA_REF (stmt_info
))
1804 mode
= (int) TYPE_MODE (vectype
);
1806 /* FORNOW. In some cases can vectorize even if data-type not supported
1807 (e.g. - data copies). */
1808 if (mov_optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
1810 if (vect_print_dump_info (REPORT_DETAILS
))
1811 fprintf (vect_dump
, "Aligned load, but unsupported type.");
1815 if (!vec_stmt
) /* transformation not required. */
1817 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
1823 if (vect_print_dump_info (REPORT_DETAILS
))
1824 fprintf (vect_dump
, "transform load.");
1826 alignment_support_cheme
= vect_supportable_dr_alignment (dr
);
1827 gcc_assert (alignment_support_cheme
);
1829 if (alignment_support_cheme
== dr_aligned
1830 || alignment_support_cheme
== dr_unaligned_supported
)
1841 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1842 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
, &dummy
, false);
1843 if (aligned_access_p (dr
))
1844 data_ref
= build_fold_indirect_ref (data_ref
);
1847 int mis
= DR_MISALIGNMENT (dr
);
1848 tree tmis
= (mis
== -1 ? size_zero_node
: size_int (mis
));
1849 tmis
= size_binop (MULT_EXPR
, tmis
, size_int(BITS_PER_UNIT
));
1850 data_ref
= build2 (MISALIGNED_INDIRECT_REF
, vectype
, data_ref
, tmis
);
1852 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
1853 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1854 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1855 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1856 copy_virtual_operands (new_stmt
, stmt
);
1858 else if (alignment_support_cheme
== dr_unaligned_software_pipeline
)
1862 msq_init = *(floor(p1))
1863 p2 = initial_addr + VS - 1;
1864 magic = have_builtin ? builtin_result : initial_address;
1867 p2' = p2 + indx * vectype_size
1869 vec_dest = realign_load (msq, lsq, magic)
1883 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1884 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1885 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
,
1887 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, data_ref
);
1888 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
1889 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1890 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1891 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
1892 gcc_assert (!new_bb
);
1893 msq_init
= TREE_OPERAND (new_stmt
, 0);
1894 copy_virtual_operands (new_stmt
, stmt
);
1895 update_vuses_to_preheader (new_stmt
, loop
);
1898 /* <2> Create lsq = *(floor(p2')) in the loop */
1899 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
1900 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1901 dataref_ptr
= vect_create_data_ref_ptr (stmt
, bsi
, offset
, &dummy
, false);
1902 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, dataref_ptr
);
1903 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
1904 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1905 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1906 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1907 lsq
= TREE_OPERAND (new_stmt
, 0);
1908 copy_virtual_operands (new_stmt
, stmt
);
1912 if (targetm
.vectorize
.builtin_mask_for_load
)
1914 /* Create permutation mask, if required, in loop preheader. */
1916 params
= build_tree_list (NULL_TREE
, init_addr
);
1917 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1918 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
1919 new_stmt
= build_function_call_expr (builtin_decl
, params
);
1920 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, new_stmt
);
1921 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1922 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1923 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
1924 gcc_assert (!new_bb
);
1925 magic
= TREE_OPERAND (new_stmt
, 0);
1927 /* The result of the CALL_EXPR to this builtin is determined from
1928 the value of the parameter and no global variables are touched
1929 which makes the builtin a "const" function. Requiring the
1930 builtin to have the "const" attribute makes it unnecessary
1931 to call mark_call_clobbered. */
1932 gcc_assert (TREE_READONLY (builtin_decl
));
1936 /* Use current address instead of init_addr for reduced reg pressure.
1938 magic
= dataref_ptr
;
1942 /* <4> Create msq = phi <msq_init, lsq> in loop */
1943 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1944 msq
= make_ssa_name (vec_dest
, NULL_TREE
);
1945 phi_stmt
= create_phi_node (msq
, loop
->header
); /* CHECKME */
1946 SSA_NAME_DEF_STMT (msq
) = phi_stmt
;
1947 add_phi_arg (phi_stmt
, msq_init
, loop_preheader_edge (loop
));
1948 add_phi_arg (phi_stmt
, lsq
, loop_latch_edge (loop
));
1951 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1952 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1953 new_stmt
= build3 (REALIGN_LOAD_EXPR
, vectype
, msq
, lsq
, magic
);
1954 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, new_stmt
);
1955 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1956 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1957 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1962 *vec_stmt
= new_stmt
;
1967 /* Function vectorizable_live_operation.
1969 STMT computes a value that is used outside the loop. Check if
1970 it can be supported. */
1973 vectorizable_live_operation (tree stmt
,
1974 block_stmt_iterator
*bsi ATTRIBUTE_UNUSED
,
1975 tree
*vec_stmt ATTRIBUTE_UNUSED
)
1978 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1979 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1981 enum tree_code code
;
1985 enum vect_def_type dt
;
1987 if (!STMT_VINFO_LIVE_P (stmt_info
))
1990 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1993 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) != SSA_NAME
)
1996 operation
= TREE_OPERAND (stmt
, 1);
1997 code
= TREE_CODE (operation
);
1999 op_type
= TREE_CODE_LENGTH (code
);
2001 /* FORNOW: support only if all uses are invariant. This means
2002 that the scalar operations can remain in place, unvectorized.
2003 The original last scalar value that they compute will be used. */
2005 for (i
= 0; i
< op_type
; i
++)
2007 op
= TREE_OPERAND (operation
, i
);
2008 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
2010 if (vect_print_dump_info (REPORT_DETAILS
))
2011 fprintf (vect_dump
, "use not simple.");
2015 if (dt
!= vect_invariant_def
&& dt
!= vect_constant_def
)
2019 /* No transformation is required for the cases we currently support. */
2024 /* Function vect_is_simple_cond.
2027 LOOP - the loop that is being vectorized.
2028 COND - Condition that is checked for simple use.
2030 Returns whether a COND can be vectorized. Checks whether
2031 condition operands are supportable using vec_is_simple_use. */
2034 vect_is_simple_cond (tree cond
, loop_vec_info loop_vinfo
)
2038 enum vect_def_type dt
;
2040 if (!COMPARISON_CLASS_P (cond
))
2043 lhs
= TREE_OPERAND (cond
, 0);
2044 rhs
= TREE_OPERAND (cond
, 1);
2046 if (TREE_CODE (lhs
) == SSA_NAME
)
2048 tree lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
2049 if (!vect_is_simple_use (lhs
, loop_vinfo
, &lhs_def_stmt
, &def
, &dt
))
2052 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
)
2055 if (TREE_CODE (rhs
) == SSA_NAME
)
2057 tree rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
2058 if (!vect_is_simple_use (rhs
, loop_vinfo
, &rhs_def_stmt
, &def
, &dt
))
2061 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
)
2067 /* vectorizable_condition.
2069 Check if STMT is conditional modify expression that can be vectorized.
2070 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2071 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
2074 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2077 vectorizable_condition (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
2079 tree scalar_dest
= NULL_TREE
;
2080 tree vec_dest
= NULL_TREE
;
2081 tree op
= NULL_TREE
;
2082 tree cond_expr
, then_clause
, else_clause
;
2083 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2084 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2085 tree vec_cond_lhs
, vec_cond_rhs
, vec_then_clause
, vec_else_clause
;
2086 tree vec_compare
, vec_cond_expr
;
2088 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2089 enum machine_mode vec_mode
;
2091 enum vect_def_type dt
;
2093 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2096 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
2098 if (STMT_VINFO_LIVE_P (stmt_info
))
2100 /* FORNOW: not yet supported. */
2101 if (vect_print_dump_info (REPORT_DETAILS
))
2102 fprintf (vect_dump
, "value used after loop.");
2106 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
2109 op
= TREE_OPERAND (stmt
, 1);
2111 if (TREE_CODE (op
) != COND_EXPR
)
2114 cond_expr
= TREE_OPERAND (op
, 0);
2115 then_clause
= TREE_OPERAND (op
, 1);
2116 else_clause
= TREE_OPERAND (op
, 2);
2118 if (!vect_is_simple_cond (cond_expr
, loop_vinfo
))
2121 if (TREE_CODE (then_clause
) == SSA_NAME
)
2123 tree then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
2124 if (!vect_is_simple_use (then_clause
, loop_vinfo
,
2125 &then_def_stmt
, &def
, &dt
))
2128 else if (TREE_CODE (then_clause
) != INTEGER_CST
2129 && TREE_CODE (then_clause
) != REAL_CST
)
2132 if (TREE_CODE (else_clause
) == SSA_NAME
)
2134 tree else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
2135 if (!vect_is_simple_use (else_clause
, loop_vinfo
,
2136 &else_def_stmt
, &def
, &dt
))
2139 else if (TREE_CODE (else_clause
) != INTEGER_CST
2140 && TREE_CODE (else_clause
) != REAL_CST
)
2144 vec_mode
= TYPE_MODE (vectype
);
2148 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
2149 return expand_vec_cond_expr_p (op
, vec_mode
);
2155 scalar_dest
= TREE_OPERAND (stmt
, 0);
2156 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2158 /* Handle cond expr. */
2160 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0), stmt
, NULL
);
2162 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1), stmt
, NULL
);
2163 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
, stmt
, NULL
);
2164 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
, stmt
, NULL
);
2166 /* Arguments are ready. create the new vector stmt. */
2167 vec_compare
= build2 (TREE_CODE (cond_expr
), vectype
,
2168 vec_cond_lhs
, vec_cond_rhs
);
2169 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
2170 vec_compare
, vec_then_clause
, vec_else_clause
);
2172 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, vec_cond_expr
);
2173 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
2174 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
2175 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
2180 /* Function vect_transform_stmt.
2182 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2185 vect_transform_stmt (tree stmt
, block_stmt_iterator
*bsi
)
2187 bool is_store
= false;
2188 tree vec_stmt
= NULL_TREE
;
2189 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2190 tree orig_stmt_in_pattern
;
2193 if (STMT_VINFO_RELEVANT_P (stmt_info
))
2195 switch (STMT_VINFO_TYPE (stmt_info
))
2197 case op_vec_info_type
:
2198 done
= vectorizable_operation (stmt
, bsi
, &vec_stmt
);
2202 case assignment_vec_info_type
:
2203 done
= vectorizable_assignment (stmt
, bsi
, &vec_stmt
);
2207 case load_vec_info_type
:
2208 done
= vectorizable_load (stmt
, bsi
, &vec_stmt
);
2212 case store_vec_info_type
:
2213 done
= vectorizable_store (stmt
, bsi
, &vec_stmt
);
2218 case condition_vec_info_type
:
2219 done
= vectorizable_condition (stmt
, bsi
, &vec_stmt
);
2224 if (vect_print_dump_info (REPORT_DETAILS
))
2225 fprintf (vect_dump
, "stmt not supported.");
2229 gcc_assert (vec_stmt
);
2230 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
2231 orig_stmt_in_pattern
= STMT_VINFO_RELATED_STMT (stmt_info
);
2232 if (orig_stmt_in_pattern
)
2234 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt_in_pattern
);
2235 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2237 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
2239 /* STMT was inserted by the vectorizer to replace a computation
2240 idiom. ORIG_STMT_IN_PATTERN is a stmt in the original
2241 sequence that computed this idiom. We need to record a pointer
2242 to VEC_STMT in the stmt_info of ORIG_STMT_IN_PATTERN. See more
2243 detail in the documentation of vect_pattern_recog. */
2245 STMT_VINFO_VEC_STMT (stmt_vinfo
) = vec_stmt
;
2250 if (STMT_VINFO_LIVE_P (stmt_info
))
2252 switch (STMT_VINFO_TYPE (stmt_info
))
2254 case reduc_vec_info_type
:
2255 done
= vectorizable_reduction (stmt
, bsi
, &vec_stmt
);
2260 done
= vectorizable_live_operation (stmt
, bsi
, &vec_stmt
);
2266 gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info
));
2267 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
2275 /* This function builds ni_name = number of iterations loop executes
2276 on the loop preheader. */
2279 vect_build_loop_niters (loop_vec_info loop_vinfo
)
2281 tree ni_name
, stmt
, var
;
2283 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2284 tree ni
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
2286 var
= create_tmp_var (TREE_TYPE (ni
), "niters");
2287 add_referenced_tmp_var (var
);
2288 ni_name
= force_gimple_operand (ni
, &stmt
, false, var
);
2290 pe
= loop_preheader_edge (loop
);
2293 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
2294 gcc_assert (!new_bb
);
2301 /* This function generates the following statements:
2303 ni_name = number of iterations loop executes
2304 ratio = ni_name / vf
2305 ratio_mult_vf_name = ratio * vf
2307 and places them at the loop preheader edge. */
2310 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo
,
2312 tree
*ratio_mult_vf_name_ptr
,
2313 tree
*ratio_name_ptr
)
2321 tree ratio_mult_vf_name
;
2322 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2323 tree ni
= LOOP_VINFO_NITERS (loop_vinfo
);
2324 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2327 pe
= loop_preheader_edge (loop
);
2329 /* Generate temporary variable that contains
2330 number of iterations loop executes. */
2332 ni_name
= vect_build_loop_niters (loop_vinfo
);
2333 log_vf
= build_int_cst (TREE_TYPE (ni
), exact_log2 (vf
));
2335 /* Create: ratio = ni >> log2(vf) */
2337 var
= create_tmp_var (TREE_TYPE (ni
), "bnd");
2338 add_referenced_tmp_var (var
);
2339 ratio_name
= make_ssa_name (var
, NULL_TREE
);
2340 stmt
= build2 (MODIFY_EXPR
, void_type_node
, ratio_name
,
2341 build2 (RSHIFT_EXPR
, TREE_TYPE (ni_name
), ni_name
, log_vf
));
2342 SSA_NAME_DEF_STMT (ratio_name
) = stmt
;
2344 pe
= loop_preheader_edge (loop
);
2345 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
2346 gcc_assert (!new_bb
);
2348 /* Create: ratio_mult_vf = ratio << log2 (vf). */
2350 var
= create_tmp_var (TREE_TYPE (ni
), "ratio_mult_vf");
2351 add_referenced_tmp_var (var
);
2352 ratio_mult_vf_name
= make_ssa_name (var
, NULL_TREE
);
2353 stmt
= build2 (MODIFY_EXPR
, void_type_node
, ratio_mult_vf_name
,
2354 build2 (LSHIFT_EXPR
, TREE_TYPE (ratio_name
), ratio_name
, log_vf
));
2355 SSA_NAME_DEF_STMT (ratio_mult_vf_name
) = stmt
;
2357 pe
= loop_preheader_edge (loop
);
2358 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
2359 gcc_assert (!new_bb
);
2361 *ni_name_ptr
= ni_name
;
2362 *ratio_mult_vf_name_ptr
= ratio_mult_vf_name
;
2363 *ratio_name_ptr
= ratio_name
;
2369 /* Function update_vuses_to_preheader.
2372 STMT - a statement with potential VUSEs.
2373 LOOP - the loop whose preheader will contain STMT.
2375 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
2376 appears to be defined in a V_MAY_DEF in another statement in a loop.
2377 One such case is when the VUSE is at the dereference of a __restricted__
2378 pointer in a load and the V_MAY_DEF is at the dereference of a different
2379 __restricted__ pointer in a store. Vectorization may result in
2380 copy_virtual_uses being called to copy the problematic VUSE to a new
2381 statement that is being inserted in the loop preheader. This procedure
2382 is called to change the SSA_NAME in the new statement's VUSE from the
2383 SSA_NAME updated in the loop to the related SSA_NAME available on the
2384 path entering the loop.
2386 When this function is called, we have the following situation:
2391 # name1 = phi < name0 , name2>
2396 # name2 = vdef <name1>
2401 Stmt S1 was created in the loop preheader block as part of misaligned-load
2402 handling. This function fixes the name of the vuse of S1 from 'name1' to
2406 update_vuses_to_preheader (tree stmt
, struct loop
*loop
)
2408 basic_block header_bb
= loop
->header
;
2409 edge preheader_e
= loop_preheader_edge (loop
);
2411 use_operand_p use_p
;
2413 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_VUSE
)
2415 tree ssa_name
= USE_FROM_PTR (use_p
);
2416 tree def_stmt
= SSA_NAME_DEF_STMT (ssa_name
);
2417 tree name_var
= SSA_NAME_VAR (ssa_name
);
2418 basic_block bb
= bb_for_stmt (def_stmt
);
2420 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
2421 if (!IS_EMPTY_STMT (def_stmt
)
2422 && flow_bb_inside_loop_p (loop
, bb
))
2424 /* If the block containing the statement defining the SSA_NAME
2425 is in the loop then it's necessary to find the definition
2426 outside the loop using the PHI nodes of the header. */
2428 bool updated
= false;
2430 for (phi
= phi_nodes (header_bb
); phi
; phi
= TREE_CHAIN (phi
))
2432 if (SSA_NAME_VAR (PHI_RESULT (phi
)) == name_var
)
2434 SET_USE (use_p
, PHI_ARG_DEF (phi
, preheader_e
->dest_idx
));
2439 gcc_assert (updated
);
2445 /* Function vect_update_ivs_after_vectorizer.
2447 "Advance" the induction variables of LOOP to the value they should take
2448 after the execution of LOOP. This is currently necessary because the
2449 vectorizer does not handle induction variables that are used after the
2450 loop. Such a situation occurs when the last iterations of LOOP are
2452 1. We introduced new uses after LOOP for IVs that were not originally used
2453 after LOOP: the IVs of LOOP are now used by an epilog loop.
2454 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2455 times, whereas the loop IVs should be bumped N times.
2458 - LOOP - a loop that is going to be vectorized. The last few iterations
2459 of LOOP were peeled.
2460 - NITERS - the number of iterations that LOOP executes (before it is
2461 vectorized). i.e, the number of times the ivs should be bumped.
2462 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2463 coming out from LOOP on which there are uses of the LOOP ivs
2464 (this is the path from LOOP->exit to epilog_loop->preheader).
2466 The new definitions of the ivs are placed in LOOP->exit.
2467 The phi args associated with the edge UPDATE_E in the bb
2468 UPDATE_E->dest are updated accordingly.
2470 Assumption 1: Like the rest of the vectorizer, this function assumes
2471 a single loop exit that has a single predecessor.
2473 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2474 organized in the same order.
2476 Assumption 3: The access function of the ivs is simple enough (see
2477 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2479 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2480 coming out of LOOP on which the ivs of LOOP are used (this is the path
2481 that leads to the epilog loop; other paths skip the epilog loop). This
2482 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2483 needs to have its phis updated.
2487 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo
, tree niters
,
2490 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2491 basic_block exit_bb
= loop
->single_exit
->dest
;
2493 basic_block update_bb
= update_e
->dest
;
2495 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
2497 /* Make sure there exists a single-predecessor exit bb: */
2498 gcc_assert (single_pred_p (exit_bb
));
2500 for (phi
= phi_nodes (loop
->header
), phi1
= phi_nodes (update_bb
);
2502 phi
= PHI_CHAIN (phi
), phi1
= PHI_CHAIN (phi1
))
2504 tree access_fn
= NULL
;
2505 tree evolution_part
;
2508 tree var
, stmt
, ni
, ni_name
;
2509 block_stmt_iterator last_bsi
;
2511 if (vect_print_dump_info (REPORT_DETAILS
))
2513 fprintf (vect_dump
, "vect_update_ivs_after_vectorizer: phi: ");
2514 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
2517 /* Skip virtual phi's. */
2518 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
2520 if (vect_print_dump_info (REPORT_DETAILS
))
2521 fprintf (vect_dump
, "virtual phi. skip.");
2525 /* Skip reduction phis. */
2526 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
2528 if (vect_print_dump_info (REPORT_DETAILS
))
2529 fprintf (vect_dump
, "reduc phi. skip.");
2533 access_fn
= analyze_scalar_evolution (loop
, PHI_RESULT (phi
));
2534 gcc_assert (access_fn
);
2536 unshare_expr (evolution_part_in_loop_num (access_fn
, loop
->num
));
2537 gcc_assert (evolution_part
!= NULL_TREE
);
2539 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2540 of degree >= 2 or exponential. */
2541 gcc_assert (!tree_is_chrec (evolution_part
));
2543 step_expr
= evolution_part
;
2544 init_expr
= unshare_expr (initial_condition_in_loop_num (access_fn
,
2547 ni
= build2 (PLUS_EXPR
, TREE_TYPE (init_expr
),
2548 build2 (MULT_EXPR
, TREE_TYPE (niters
),
2549 niters
, step_expr
), init_expr
);
2551 var
= create_tmp_var (TREE_TYPE (init_expr
), "tmp");
2552 add_referenced_tmp_var (var
);
2554 ni_name
= force_gimple_operand (ni
, &stmt
, false, var
);
2556 /* Insert stmt into exit_bb. */
2557 last_bsi
= bsi_last (exit_bb
);
2559 bsi_insert_before (&last_bsi
, stmt
, BSI_SAME_STMT
);
2561 /* Fix phi expressions in the successor bb. */
2562 SET_PHI_ARG_DEF (phi1
, update_e
->dest_idx
, ni_name
);
2567 /* Function vect_do_peeling_for_loop_bound
2569 Peel the last iterations of the loop represented by LOOP_VINFO.
2570 The peeled iterations form a new epilog loop. Given that the loop now
2571 iterates NITERS times, the new epilog loop iterates
2572 NITERS % VECTORIZATION_FACTOR times.
2574 The original loop will later be made to iterate
2575 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
2578 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo
, tree
*ratio
,
2579 struct loops
*loops
)
2581 tree ni_name
, ratio_mult_vf_name
;
2582 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2583 struct loop
*new_loop
;
2585 basic_block preheader
;
2588 if (vect_print_dump_info (REPORT_DETAILS
))
2589 fprintf (vect_dump
, "=== vect_do_peeling_for_loop_bound ===");
2591 initialize_original_copy_tables ();
2593 /* Generate the following variables on the preheader of original loop:
2595 ni_name = number of iteration the original loop executes
2596 ratio = ni_name / vf
2597 ratio_mult_vf_name = ratio * vf */
2598 vect_generate_tmps_on_preheader (loop_vinfo
, &ni_name
,
2599 &ratio_mult_vf_name
, ratio
);
2601 loop_num
= loop
->num
;
2602 new_loop
= slpeel_tree_peel_loop_to_edge (loop
, loops
, loop
->single_exit
,
2603 ratio_mult_vf_name
, ni_name
, false);
2604 gcc_assert (new_loop
);
2605 gcc_assert (loop_num
== loop
->num
);
2606 #ifdef ENABLE_CHECKING
2607 slpeel_verify_cfg_after_peeling (loop
, new_loop
);
2610 /* A guard that controls whether the new_loop is to be executed or skipped
2611 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
2612 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
2613 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
2614 is on the path where the LOOP IVs are used and need to be updated. */
2616 preheader
= loop_preheader_edge (new_loop
)->src
;
2617 if (EDGE_PRED (preheader
, 0)->src
== loop
->single_exit
->dest
)
2618 update_e
= EDGE_PRED (preheader
, 0);
2620 update_e
= EDGE_PRED (preheader
, 1);
2622 /* Update IVs of original loop as if they were advanced
2623 by ratio_mult_vf_name steps. */
2624 vect_update_ivs_after_vectorizer (loop_vinfo
, ratio_mult_vf_name
, update_e
);
2626 /* After peeling we have to reset scalar evolution analyzer. */
2629 free_original_copy_tables ();
2633 /* Function vect_gen_niters_for_prolog_loop
2635 Set the number of iterations for the loop represented by LOOP_VINFO
2636 to the minimum between LOOP_NITERS (the original iteration count of the loop)
2637 and the misalignment of DR - the data reference recorded in
2638 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
2639 this loop, the data reference DR will refer to an aligned location.
2641 The following computation is generated:
2643 If the misalignment of DR is known at compile time:
2644 addr_mis = int mis = DR_MISALIGNMENT (dr);
2645 Else, compute address misalignment in bytes:
2646 addr_mis = addr & (vectype_size - 1)
2648 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
2650 (elem_size = element type size; an element is the scalar element
2651 whose type is the inner type of the vectype) */
2654 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo
, tree loop_niters
)
2656 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
2657 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2658 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2660 tree iters
, iters_name
;
2663 tree dr_stmt
= DR_STMT (dr
);
2664 stmt_vec_info stmt_info
= vinfo_for_stmt (dr_stmt
);
2665 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2666 int vectype_align
= TYPE_ALIGN (vectype
) / BITS_PER_UNIT
;
2667 tree niters_type
= TREE_TYPE (loop_niters
);
2669 pe
= loop_preheader_edge (loop
);
2671 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) > 0)
2673 int byte_misalign
= LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
);
2674 int element_size
= vectype_align
/vf
;
2675 int elem_misalign
= byte_misalign
/ element_size
;
2677 if (vect_print_dump_info (REPORT_DETAILS
))
2678 fprintf (vect_dump
, "known alignment = %d.", byte_misalign
);
2679 iters
= build_int_cst (niters_type
, (vf
- elem_misalign
)&(vf
-1));
2683 tree new_stmts
= NULL_TREE
;
2685 vect_create_addr_base_for_vector_ref (dr_stmt
, &new_stmts
, NULL_TREE
);
2686 tree ptr_type
= TREE_TYPE (start_addr
);
2687 tree size
= TYPE_SIZE (ptr_type
);
2688 tree type
= lang_hooks
.types
.type_for_size (tree_low_cst (size
, 1), 1);
2689 tree vectype_size_minus_1
= build_int_cst (type
, vectype_align
- 1);
2690 tree elem_size_log
=
2691 build_int_cst (type
, exact_log2 (vectype_align
/vf
));
2692 tree vf_minus_1
= build_int_cst (type
, vf
- 1);
2693 tree vf_tree
= build_int_cst (type
, vf
);
2697 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmts
);
2698 gcc_assert (!new_bb
);
2700 /* Create: byte_misalign = addr & (vectype_size - 1) */
2702 build2 (BIT_AND_EXPR
, type
, start_addr
, vectype_size_minus_1
);
2704 /* Create: elem_misalign = byte_misalign / element_size */
2706 build2 (RSHIFT_EXPR
, type
, byte_misalign
, elem_size_log
);
2708 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
2709 iters
= build2 (MINUS_EXPR
, type
, vf_tree
, elem_misalign
);
2710 iters
= build2 (BIT_AND_EXPR
, type
, iters
, vf_minus_1
);
2711 iters
= fold_convert (niters_type
, iters
);
2714 /* Create: prolog_loop_niters = min (iters, loop_niters) */
2715 /* If the loop bound is known at compile time we already verified that it is
2716 greater than vf; since the misalignment ('iters') is at most vf, there's
2717 no need to generate the MIN_EXPR in this case. */
2718 if (TREE_CODE (loop_niters
) != INTEGER_CST
)
2719 iters
= build2 (MIN_EXPR
, niters_type
, iters
, loop_niters
);
2721 if (vect_print_dump_info (REPORT_DETAILS
))
2723 fprintf (vect_dump
, "niters for prolog loop: ");
2724 print_generic_expr (vect_dump
, iters
, TDF_SLIM
);
2727 var
= create_tmp_var (niters_type
, "prolog_loop_niters");
2728 add_referenced_tmp_var (var
);
2729 iters_name
= force_gimple_operand (iters
, &stmt
, false, var
);
2731 /* Insert stmt on loop preheader edge. */
2734 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
2735 gcc_assert (!new_bb
);
2742 /* Function vect_update_init_of_dr
2744 NITERS iterations were peeled from LOOP. DR represents a data reference
2745 in LOOP. This function updates the information recorded in DR to
2746 account for the fact that the first NITERS iterations had already been
2747 executed. Specifically, it updates the OFFSET field of DR. */
2750 vect_update_init_of_dr (struct data_reference
*dr
, tree niters
)
2752 tree offset
= DR_OFFSET (dr
);
2754 niters
= fold_build2 (MULT_EXPR
, TREE_TYPE (niters
), niters
, DR_STEP (dr
));
2755 offset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
, niters
);
2756 DR_OFFSET (dr
) = offset
;
2760 /* Function vect_update_inits_of_drs
2762 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2763 This function updates the information recorded for the data references in
2764 the loop to account for the fact that the first NITERS iterations had
2765 already been executed. Specifically, it updates the initial_condition of the
2766 access_function of all the data_references in the loop. */
2769 vect_update_inits_of_drs (loop_vec_info loop_vinfo
, tree niters
)
2772 varray_type datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2774 if (vect_dump
&& (dump_flags
& TDF_DETAILS
))
2775 fprintf (vect_dump
, "=== vect_update_inits_of_dr ===");
2777 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (datarefs
); i
++)
2779 struct data_reference
*dr
= VARRAY_GENERIC_PTR (datarefs
, i
);
2780 vect_update_init_of_dr (dr
, niters
);
2785 /* Function vect_do_peeling_for_alignment
2787 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2788 'niters' is set to the misalignment of one of the data references in the
2789 loop, thereby forcing it to refer to an aligned location at the beginning
2790 of the execution of this loop. The data reference for which we are
2791 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
2794 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo
, struct loops
*loops
)
2796 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2797 tree niters_of_prolog_loop
, ni_name
;
2799 struct loop
*new_loop
;
2801 if (vect_print_dump_info (REPORT_DETAILS
))
2802 fprintf (vect_dump
, "=== vect_do_peeling_for_alignment ===");
2804 initialize_original_copy_tables ();
2806 ni_name
= vect_build_loop_niters (loop_vinfo
);
2807 niters_of_prolog_loop
= vect_gen_niters_for_prolog_loop (loop_vinfo
, ni_name
);
2809 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2811 slpeel_tree_peel_loop_to_edge (loop
, loops
, loop_preheader_edge (loop
),
2812 niters_of_prolog_loop
, ni_name
, true);
2813 gcc_assert (new_loop
);
2814 #ifdef ENABLE_CHECKING
2815 slpeel_verify_cfg_after_peeling (new_loop
, loop
);
2818 /* Update number of times loop executes. */
2819 n_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
2820 LOOP_VINFO_NITERS (loop_vinfo
) = fold_build2 (MINUS_EXPR
,
2821 TREE_TYPE (n_iters
), n_iters
, niters_of_prolog_loop
);
2823 /* Update the init conditions of the access functions of all data refs. */
2824 vect_update_inits_of_drs (loop_vinfo
, niters_of_prolog_loop
);
2826 /* After peeling we have to reset scalar evolution analyzer. */
2829 free_original_copy_tables ();
2833 /* Function vect_create_cond_for_align_checks.
2835 Create a conditional expression that represents the alignment checks for
2836 all of data references (array element references) whose alignment must be
2840 LOOP_VINFO - two fields of the loop information are used.
2841 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2842 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2845 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2847 The returned value is the conditional expression to be used in the if
2848 statement that controls which version of the loop gets executed at runtime.
2850 The algorithm makes two assumptions:
2851 1) The number of bytes "n" in a vector is a power of 2.
2852 2) An address "a" is aligned if a%n is zero and that this
2853 test can be done as a&(n-1) == 0. For example, for 16
2854 byte vectors the test is a&0xf == 0. */
2857 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo
,
2858 tree
*cond_expr_stmt_list
)
2860 VEC(tree
,heap
) *may_misalign_stmts
2861 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
2863 int mask
= LOOP_VINFO_PTR_MASK (loop_vinfo
);
2867 tree int_ptrsize_type
;
2869 tree or_tmp_name
= NULL_TREE
;
2870 tree and_tmp
, and_tmp_name
, and_stmt
;
2873 /* Check that mask is one less than a power of 2, i.e., mask is
2874 all zeros followed by all ones. */
2875 gcc_assert ((mask
!= 0) && ((mask
& (mask
+1)) == 0));
2877 /* CHECKME: what is the best integer or unsigned type to use to hold a
2878 cast from a pointer value? */
2879 psize
= TYPE_SIZE (ptr_type_node
);
2881 = lang_hooks
.types
.type_for_size (tree_low_cst (psize
, 1), 0);
2883 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2884 of the first vector of the i'th data reference. */
2886 for (i
= 0; VEC_iterate (tree
, may_misalign_stmts
, i
, ref_stmt
); i
++)
2888 tree new_stmt_list
= NULL_TREE
;
2890 tree addr_tmp
, addr_tmp_name
, addr_stmt
;
2891 tree or_tmp
, new_or_tmp_name
, or_stmt
;
2893 /* create: addr_tmp = (int)(address_of_first_vector) */
2894 addr_base
= vect_create_addr_base_for_vector_ref (ref_stmt
,
2898 if (new_stmt_list
!= NULL_TREE
)
2899 append_to_statement_list_force (new_stmt_list
, cond_expr_stmt_list
);
2901 sprintf (tmp_name
, "%s%d", "addr2int", i
);
2902 addr_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
2903 add_referenced_tmp_var (addr_tmp
);
2904 addr_tmp_name
= make_ssa_name (addr_tmp
, NULL_TREE
);
2905 addr_stmt
= fold_convert (int_ptrsize_type
, addr_base
);
2906 addr_stmt
= build2 (MODIFY_EXPR
, void_type_node
,
2907 addr_tmp_name
, addr_stmt
);
2908 SSA_NAME_DEF_STMT (addr_tmp_name
) = addr_stmt
;
2909 append_to_statement_list_force (addr_stmt
, cond_expr_stmt_list
);
2911 /* The addresses are OR together. */
2913 if (or_tmp_name
!= NULL_TREE
)
2915 /* create: or_tmp = or_tmp | addr_tmp */
2916 sprintf (tmp_name
, "%s%d", "orptrs", i
);
2917 or_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
2918 add_referenced_tmp_var (or_tmp
);
2919 new_or_tmp_name
= make_ssa_name (or_tmp
, NULL_TREE
);
2920 or_stmt
= build2 (MODIFY_EXPR
, void_type_node
, new_or_tmp_name
,
2921 build2 (BIT_IOR_EXPR
, int_ptrsize_type
,
2924 SSA_NAME_DEF_STMT (new_or_tmp_name
) = or_stmt
;
2925 append_to_statement_list_force (or_stmt
, cond_expr_stmt_list
);
2926 or_tmp_name
= new_or_tmp_name
;
2929 or_tmp_name
= addr_tmp_name
;
2933 mask_cst
= build_int_cst (int_ptrsize_type
, mask
);
2935 /* create: and_tmp = or_tmp & mask */
2936 and_tmp
= create_tmp_var (int_ptrsize_type
, "andmask" );
2937 add_referenced_tmp_var (and_tmp
);
2938 and_tmp_name
= make_ssa_name (and_tmp
, NULL_TREE
);
2940 and_stmt
= build2 (MODIFY_EXPR
, void_type_node
,
2942 build2 (BIT_AND_EXPR
, int_ptrsize_type
,
2943 or_tmp_name
, mask_cst
));
2944 SSA_NAME_DEF_STMT (and_tmp_name
) = and_stmt
;
2945 append_to_statement_list_force (and_stmt
, cond_expr_stmt_list
);
2947 /* Make and_tmp the left operand of the conditional test against zero.
2948 if and_tmp has a nonzero bit then some address is unaligned. */
2949 ptrsize_zero
= build_int_cst (int_ptrsize_type
, 0);
2950 return build2 (EQ_EXPR
, boolean_type_node
,
2951 and_tmp_name
, ptrsize_zero
);
2955 /* Function vect_transform_loop.
2957 The analysis phase has determined that the loop is vectorizable.
2958 Vectorize the loop - created vectorized stmts to replace the scalar
2959 stmts in the loop, and update the loop exit condition. */
2962 vect_transform_loop (loop_vec_info loop_vinfo
,
2963 struct loops
*loops ATTRIBUTE_UNUSED
)
2965 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2966 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2967 int nbbs
= loop
->num_nodes
;
2968 block_stmt_iterator si
;
2971 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2975 if (vect_print_dump_info (REPORT_DETAILS
))
2976 fprintf (vect_dump
, "=== vec_transform_loop ===");
2978 /* If the loop has data references that may or may not be aligned then
2979 two versions of the loop need to be generated, one which is vectorized
2980 and one which isn't. A test is then generated to control which of the
2981 loops is executed. The test checks for the alignment of all of the
2982 data references that may or may not be aligned. */
2984 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)))
2988 tree cond_expr_stmt_list
= NULL_TREE
;
2989 basic_block condition_bb
;
2990 block_stmt_iterator cond_exp_bsi
;
2991 basic_block merge_bb
;
2992 basic_block new_exit_bb
;
2994 tree orig_phi
, new_phi
, arg
;
2996 cond_expr
= vect_create_cond_for_align_checks (loop_vinfo
,
2997 &cond_expr_stmt_list
);
2998 initialize_original_copy_tables ();
2999 nloop
= loop_version (loops
, loop
, cond_expr
, &condition_bb
, true);
3000 free_original_copy_tables();
3002 /** Loop versioning violates an assumption we try to maintain during
3003 vectorization - that the loop exit block has a single predecessor.
3004 After versioning, the exit block of both loop versions is the same
3005 basic block (i.e. it has two predecessors). Just in order to simplify
3006 following transformations in the vectorizer, we fix this situation
3007 here by adding a new (empty) block on the exit-edge of the loop,
3008 with the proper loop-exit phis to maintain loop-closed-form. **/
3010 merge_bb
= loop
->single_exit
->dest
;
3011 gcc_assert (EDGE_COUNT (merge_bb
->preds
) == 2);
3012 new_exit_bb
= split_edge (loop
->single_exit
);
3013 add_bb_to_loop (new_exit_bb
, loop
->outer
);
3014 new_exit_e
= loop
->single_exit
;
3015 e
= EDGE_SUCC (new_exit_bb
, 0);
3017 for (orig_phi
= phi_nodes (merge_bb
); orig_phi
;
3018 orig_phi
= PHI_CHAIN (orig_phi
))
3020 new_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi
)),
3022 arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, e
);
3023 add_phi_arg (new_phi
, arg
, new_exit_e
);
3024 SET_PHI_ARG_DEF (orig_phi
, e
->dest_idx
, PHI_RESULT (new_phi
));
3027 /** end loop-exit-fixes after versioning **/
3029 update_ssa (TODO_update_ssa
);
3030 cond_exp_bsi
= bsi_last (condition_bb
);
3031 bsi_insert_before (&cond_exp_bsi
, cond_expr_stmt_list
, BSI_SAME_STMT
);
3034 /* CHECKME: we wouldn't need this if we calles update_ssa once
3036 bitmap_zero (vect_vnames_to_rename
);
3038 /* Peel the loop if there are data refs with unknown alignment.
3039 Only one data ref with unknown store is allowed. */
3041 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
3042 vect_do_peeling_for_alignment (loop_vinfo
, loops
);
3044 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3045 compile time constant), or it is a constant that doesn't divide by the
3046 vectorization factor, then an epilog loop needs to be created.
3047 We therefore duplicate the loop: the original loop will be vectorized,
3048 and will compute the first (n/VF) iterations. The second copy of the loop
3049 will remain scalar and will compute the remaining (n%VF) iterations.
3050 (VF is the vectorization factor). */
3052 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
3053 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
3054 && LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0))
3055 vect_do_peeling_for_loop_bound (loop_vinfo
, &ratio
, loops
);
3057 ratio
= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
)),
3058 LOOP_VINFO_INT_NITERS (loop_vinfo
) / vectorization_factor
);
3060 /* 1) Make sure the loop header has exactly two entries
3061 2) Make sure we have a preheader basic block. */
3063 gcc_assert (EDGE_COUNT (loop
->header
->preds
) == 2);
3065 loop_split_edge_with (loop_preheader_edge (loop
), NULL
);
3068 /* FORNOW: the vectorizer supports only loops which body consist
3069 of one basic block (header + empty latch). When the vectorizer will
3070 support more involved loop forms, the order by which the BBs are
3071 traversed need to be reconsidered. */
3073 for (i
= 0; i
< nbbs
; i
++)
3075 basic_block bb
= bbs
[i
];
3077 for (si
= bsi_start (bb
); !bsi_end_p (si
);)
3079 tree stmt
= bsi_stmt (si
);
3080 stmt_vec_info stmt_info
;
3083 if (vect_print_dump_info (REPORT_DETAILS
))
3085 fprintf (vect_dump
, "------>vectorizing statement: ");
3086 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
3088 stmt_info
= vinfo_for_stmt (stmt
);
3089 gcc_assert (stmt_info
);
3090 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
3091 && !STMT_VINFO_LIVE_P (stmt_info
))
3096 /* FORNOW: Verify that all stmts operate on the same number of
3097 units and no inner unrolling is necessary. */
3099 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
))
3100 == (unsigned HOST_WIDE_INT
) vectorization_factor
);
3102 /* -------- vectorize statement ------------ */
3103 if (vect_print_dump_info (REPORT_DETAILS
))
3104 fprintf (vect_dump
, "transform statement.");
3106 is_store
= vect_transform_stmt (stmt
, &si
);
3109 /* Free the attached stmt_vec_info and remove the stmt. */
3110 stmt_ann_t ann
= stmt_ann (stmt
);
3112 set_stmt_info ((tree_ann_t
)ann
, NULL
);
3113 bsi_remove (&si
, true);
3121 slpeel_make_loop_iterate_ntimes (loop
, ratio
);
3123 EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename
, 0, j
, bi
)
3124 mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j
)));
3126 /* The memory tags and pointers in vectorized statements need to
3127 have their SSA forms updated. FIXME, why can't this be delayed
3128 until all the loops have been transformed? */
3129 update_ssa (TODO_update_ssa
);
3131 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS
))
3132 fprintf (vect_dump
, "LOOP VECTORIZED.");