1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
40 #include "tree-data-ref.h"
41 #include "tree-chrec.h"
42 #include "tree-scalar-evolution.h"
43 #include "tree-vectorizer.h"
44 #include "langhooks.h"
45 #include "tree-pass.h"
49 /* Utility functions for the code transformation. */
50 static bool vect_transform_stmt (tree
, block_stmt_iterator
*, bool *);
51 static tree
vect_create_destination_var (tree
, tree
);
52 static tree vect_create_data_ref_ptr
53 (tree
, block_stmt_iterator
*, tree
, tree
*, tree
*, bool, tree
);
54 static tree
vect_create_addr_base_for_vector_ref (tree
, tree
*, tree
);
55 static tree
vect_setup_realignment (tree
, block_stmt_iterator
*, tree
*);
56 static tree
vect_get_new_vect_var (tree
, enum vect_var_kind
, const char *);
57 static tree
vect_get_vec_def_for_operand (tree
, tree
, tree
*);
58 static tree
vect_init_vector (tree
, tree
, tree
);
59 static void vect_finish_stmt_generation
60 (tree stmt
, tree vec_stmt
, block_stmt_iterator
*bsi
);
61 static bool vect_is_simple_cond (tree
, loop_vec_info
);
62 static void update_vuses_to_preheader (tree
, struct loop
*);
63 static void vect_create_epilog_for_reduction (tree
, tree
, enum tree_code
, tree
);
64 static tree
get_initial_def_for_reduction (tree
, tree
, tree
*);
66 /* Utility function dealing with loop peeling (not peeling itself). */
67 static void vect_generate_tmps_on_preheader
68 (loop_vec_info
, tree
*, tree
*, tree
*);
69 static tree
vect_build_loop_niters (loop_vec_info
);
70 static void vect_update_ivs_after_vectorizer (loop_vec_info
, tree
, edge
);
71 static tree
vect_gen_niters_for_prolog_loop (loop_vec_info
, tree
);
72 static void vect_update_init_of_dr (struct data_reference
*, tree niters
);
73 static void vect_update_inits_of_drs (loop_vec_info
, tree
);
74 static int vect_min_worthwhile_factor (enum tree_code
);
77 /* Function vect_get_new_vect_var.
79 Returns a name for a new variable. The current naming scheme appends the
80 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
81 the name of vectorizer generated variables, and appends that to NAME if
85 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
98 case vect_pointer_var
:
106 new_vect_var
= create_tmp_var (type
, concat (prefix
, name
, NULL
));
108 new_vect_var
= create_tmp_var (type
, prefix
);
110 /* Mark vector typed variable as a gimple register variable. */
111 if (TREE_CODE (type
) == VECTOR_TYPE
)
112 DECL_GIMPLE_REG_P (new_vect_var
) = true;
118 /* Function vect_create_addr_base_for_vector_ref.
120 Create an expression that computes the address of the first memory location
121 that will be accessed for a data reference.
124 STMT: The statement containing the data reference.
125 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
126 OFFSET: Optional. If supplied, it is be added to the initial address.
129 1. Return an SSA_NAME whose value is the address of the memory location of
130 the first vector of the data reference.
131 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
132 these statement(s) which define the returned SSA_NAME.
134 FORNOW: We are only handling array accesses with step 1. */
137 vect_create_addr_base_for_vector_ref (tree stmt
,
141 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
142 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
143 tree data_ref_base
= unshare_expr (DR_BASE_ADDRESS (dr
));
144 tree base_name
= build_fold_indirect_ref (data_ref_base
);
146 tree addr_base
, addr_expr
;
148 tree base_offset
= unshare_expr (DR_OFFSET (dr
));
149 tree init
= unshare_expr (DR_INIT (dr
));
150 tree vect_ptr_type
, addr_expr2
;
152 /* Create base_offset */
153 base_offset
= size_binop (PLUS_EXPR
, base_offset
, init
);
154 dest
= create_tmp_var (TREE_TYPE (base_offset
), "base_off");
155 add_referenced_var (dest
);
156 base_offset
= force_gimple_operand (base_offset
, &new_stmt
, false, dest
);
157 append_to_statement_list_force (new_stmt
, new_stmt_list
);
161 tree tmp
= create_tmp_var (TREE_TYPE (base_offset
), "offset");
164 /* For interleaved access step we divide STEP by the size of the
165 interleaving group. */
166 if (DR_GROUP_SIZE (stmt_info
))
167 step
= fold_build2 (TRUNC_DIV_EXPR
, TREE_TYPE (offset
), DR_STEP (dr
),
168 build_int_cst (TREE_TYPE (offset
),
169 DR_GROUP_SIZE (stmt_info
)));
173 add_referenced_var (tmp
);
174 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (offset
), offset
, step
);
175 base_offset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base_offset
),
176 base_offset
, offset
);
177 base_offset
= force_gimple_operand (base_offset
, &new_stmt
, false, tmp
);
178 append_to_statement_list_force (new_stmt
, new_stmt_list
);
181 /* base + base_offset */
182 addr_base
= fold_build2 (PLUS_EXPR
, TREE_TYPE (data_ref_base
), data_ref_base
,
185 vect_ptr_type
= build_pointer_type (STMT_VINFO_VECTYPE (stmt_info
));
187 /* addr_expr = addr_base */
188 addr_expr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
189 get_name (base_name
));
190 add_referenced_var (addr_expr
);
191 vec_stmt
= fold_convert (vect_ptr_type
, addr_base
);
192 addr_expr2
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
193 get_name (base_name
));
194 add_referenced_var (addr_expr2
);
195 vec_stmt
= force_gimple_operand (vec_stmt
, &new_stmt
, false, addr_expr2
);
196 append_to_statement_list_force (new_stmt
, new_stmt_list
);
198 if (vect_print_dump_info (REPORT_DETAILS
))
200 fprintf (vect_dump
, "created ");
201 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
207 /* Function vect_create_data_ref_ptr.
209 Create a new pointer to vector type (vp), that points to the first location
210 accessed in the loop by STMT, along with the def-use update chain to
211 appropriately advance the pointer through the loop iterations. Also set
212 aliasing information for the pointer. This vector pointer is used by the
213 callers to this function to create a memory reference expression for vector
217 1. STMT: a stmt that references memory. Expected to be of the form
218 GIMPLE_MODIFY_STMT <name, data-ref> or
219 GIMPLE_MODIFY_STMT <data-ref, name>.
220 2. BSI: block_stmt_iterator where new stmts can be added.
221 3. OFFSET (optional): an offset to be added to the initial address accessed
222 by the data-ref in STMT.
223 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
224 pointing to the initial address.
225 5. TYPE: if not NULL indicates the required type of the data-ref
228 1. Declare a new ptr to vector_type, and have it point to the base of the
229 data reference (initial addressed accessed by the data reference).
230 For example, for vector of type V8HI, the following code is generated:
233 vp = (v8hi *)initial_address;
235 if OFFSET is not supplied:
236 initial_address = &a[init];
237 if OFFSET is supplied:
238 initial_address = &a[init + OFFSET];
240 Return the initial_address in INITIAL_ADDRESS.
242 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
243 update the pointer in each iteration of the loop.
245 Return the increment stmt that updates the pointer in PTR_INCR.
247 3. Return the pointer. */
250 vect_create_data_ref_ptr (tree stmt
,
251 block_stmt_iterator
*bsi ATTRIBUTE_UNUSED
,
252 tree offset
, tree
*initial_address
, tree
*ptr_incr
,
253 bool only_init
, tree type
)
256 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
257 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
258 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
259 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
265 tree new_stmt_list
= NULL_TREE
;
266 edge pe
= loop_preheader_edge (loop
);
269 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
271 base_name
= build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr
)));
273 if (vect_print_dump_info (REPORT_DETAILS
))
275 tree data_ref_base
= base_name
;
276 fprintf (vect_dump
, "create vector-pointer variable to type: ");
277 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
278 if (TREE_CODE (data_ref_base
) == VAR_DECL
)
279 fprintf (vect_dump
, " vectorizing a one dimensional array ref: ");
280 else if (TREE_CODE (data_ref_base
) == ARRAY_REF
)
281 fprintf (vect_dump
, " vectorizing a multidimensional array ref: ");
282 else if (TREE_CODE (data_ref_base
) == COMPONENT_REF
)
283 fprintf (vect_dump
, " vectorizing a record based array ref: ");
284 else if (TREE_CODE (data_ref_base
) == SSA_NAME
)
285 fprintf (vect_dump
, " vectorizing a pointer ref: ");
286 print_generic_expr (vect_dump
, base_name
, TDF_SLIM
);
289 /** (1) Create the new vector-pointer variable: **/
291 vect_ptr_type
= build_pointer_type (type
);
293 vect_ptr_type
= build_pointer_type (vectype
);
294 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
295 get_name (base_name
));
296 add_referenced_var (vect_ptr
);
298 /** (2) Add aliasing information to the new vector-pointer:
299 (The points-to info (DR_PTR_INFO) may be defined later.) **/
301 tag
= DR_MEMTAG (dr
);
304 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
305 tag must be created with tag added to its may alias list. */
307 new_type_alias (vect_ptr
, tag
, DR_REF (dr
));
309 set_symbol_mem_tag (vect_ptr
, tag
);
311 var_ann (vect_ptr
)->subvars
= DR_SUBVARS (dr
);
313 /** (3) Calculate the initial address the vector-pointer, and set
314 the vector-pointer to point to it before the loop: **/
316 /* Create: (&(base[init_val+offset]) in the loop preheader. */
317 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
319 pe
= loop_preheader_edge (loop
);
320 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt_list
);
321 gcc_assert (!new_bb
);
322 *initial_address
= new_temp
;
324 /* Create: p = (vectype *) initial_base */
325 vec_stmt
= fold_convert (vect_ptr_type
, new_temp
);
326 vec_stmt
= build_gimple_modify_stmt (vect_ptr
, vec_stmt
);
327 vect_ptr_init
= make_ssa_name (vect_ptr
, vec_stmt
);
328 GIMPLE_STMT_OPERAND (vec_stmt
, 0) = vect_ptr_init
;
329 new_bb
= bsi_insert_on_edge_immediate (pe
, vec_stmt
);
330 gcc_assert (!new_bb
);
333 /** (4) Handle the updating of the vector-pointer inside the loop: **/
335 if (only_init
) /* No update in loop is required. */
337 /* Copy the points-to information if it exists. */
338 if (DR_PTR_INFO (dr
))
339 duplicate_ssa_name_ptr_info (vect_ptr_init
, DR_PTR_INFO (dr
));
340 return vect_ptr_init
;
344 block_stmt_iterator incr_bsi
;
346 tree indx_before_incr
, indx_after_incr
;
349 standard_iv_increment_position (loop
, &incr_bsi
, &insert_after
);
350 create_iv (vect_ptr_init
,
351 fold_convert (vect_ptr_type
, TYPE_SIZE_UNIT (vectype
)),
352 NULL_TREE
, loop
, &incr_bsi
, insert_after
,
353 &indx_before_incr
, &indx_after_incr
);
354 incr
= bsi_stmt (incr_bsi
);
355 set_stmt_info (stmt_ann (incr
),
356 new_stmt_vec_info (incr
, loop_vinfo
));
358 /* Copy the points-to information if it exists. */
359 if (DR_PTR_INFO (dr
))
361 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
362 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
364 merge_alias_info (vect_ptr_init
, indx_before_incr
);
365 merge_alias_info (vect_ptr_init
, indx_after_incr
);
369 return indx_before_incr
;
374 /* Function bump_vector_ptr
376 Increment a pointer (to a vector type) by vector-size. Connect the new
377 increment stmt to the existing def-use update-chain of the pointer.
379 The pointer def-use update-chain before this function:
380 DATAREF_PTR = phi (p_0, p_2)
382 PTR_INCR: p_2 = DATAREF_PTR + step
384 The pointer def-use update-chain after this function:
385 DATAREF_PTR = phi (p_0, p_2)
387 NEW_DATAREF_PTR = DATAREF_PTR + vector_size
389 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
392 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
394 PTR_INCR - the stmt that updates the pointer in each iteration of the loop.
395 The increment amount across iterations is also expected to be
397 BSI - location where the new update stmt is to be placed.
398 STMT - the original scalar memory-access stmt that is being vectorized.
400 Output: Return NEW_DATAREF_PTR as illustrated above.
405 bump_vector_ptr (tree dataref_ptr
, tree ptr_incr
, block_stmt_iterator
*bsi
,
408 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
409 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
410 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
411 tree vptr_type
= TREE_TYPE (dataref_ptr
);
412 tree ptr_var
= SSA_NAME_VAR (dataref_ptr
);
413 tree update
= fold_convert (vptr_type
, TYPE_SIZE_UNIT (vectype
));
417 tree new_dataref_ptr
;
419 incr_stmt
= build_gimple_modify_stmt (ptr_var
,
420 build2 (PLUS_EXPR
, vptr_type
,
421 dataref_ptr
, update
));
422 new_dataref_ptr
= make_ssa_name (ptr_var
, incr_stmt
);
423 GIMPLE_STMT_OPERAND (incr_stmt
, 0) = new_dataref_ptr
;
424 vect_finish_stmt_generation (stmt
, incr_stmt
, bsi
);
426 /* Update the vector-pointer's cross-iteration increment. */
427 FOR_EACH_SSA_USE_OPERAND (use_p
, ptr_incr
, iter
, SSA_OP_USE
)
429 tree use
= USE_FROM_PTR (use_p
);
431 if (use
== dataref_ptr
)
432 SET_USE (use_p
, new_dataref_ptr
);
434 gcc_assert (tree_int_cst_compare (use
, update
) == 0);
437 /* Copy the points-to information if it exists. */
438 if (DR_PTR_INFO (dr
))
439 duplicate_ssa_name_ptr_info (new_dataref_ptr
, DR_PTR_INFO (dr
));
440 merge_alias_info (new_dataref_ptr
, dataref_ptr
);
442 return new_dataref_ptr
;
446 /* Function vect_create_destination_var.
448 Create a new temporary of type VECTYPE. */
451 vect_create_destination_var (tree scalar_dest
, tree vectype
)
454 const char *new_name
;
456 enum vect_var_kind kind
;
458 kind
= vectype
? vect_simple_var
: vect_scalar_var
;
459 type
= vectype
? vectype
: TREE_TYPE (scalar_dest
);
461 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
463 new_name
= get_name (scalar_dest
);
466 vec_dest
= vect_get_new_vect_var (type
, kind
, new_name
);
467 add_referenced_var (vec_dest
);
473 /* Function vect_init_vector.
475 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
476 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
477 used in the vectorization of STMT. */
480 vect_init_vector (tree stmt
, tree vector_var
, tree vector_type
)
482 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
483 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
484 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
492 new_var
= vect_get_new_vect_var (vector_type
, vect_simple_var
, "cst_");
493 add_referenced_var (new_var
);
495 init_stmt
= build_gimple_modify_stmt (new_var
, vector_var
);
496 new_temp
= make_ssa_name (new_var
, init_stmt
);
497 GIMPLE_STMT_OPERAND (init_stmt
, 0) = new_temp
;
499 pe
= loop_preheader_edge (loop
);
500 new_bb
= bsi_insert_on_edge_immediate (pe
, init_stmt
);
501 gcc_assert (!new_bb
);
503 if (vect_print_dump_info (REPORT_DETAILS
))
505 fprintf (vect_dump
, "created new init_stmt: ");
506 print_generic_expr (vect_dump
, init_stmt
, TDF_SLIM
);
509 vec_oprnd
= GIMPLE_STMT_OPERAND (init_stmt
, 0);
514 /* Function get_initial_def_for_induction
517 STMT - a stmt that performs an induction operation in the loop.
518 IV_PHI - the initial value of the induction variable
521 Return a vector variable, initialized with the first VF values of
522 the induction variable. E.g., for an iv with IV_PHI='X' and
523 evolution S, for a vector of 4 units, we want to return:
524 [X, X + S, X + 2*S, X + 3*S]. */
527 get_initial_def_for_induction (tree stmt
, tree iv_phi
)
529 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
530 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
531 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
532 tree scalar_type
= TREE_TYPE (iv_phi
);
533 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
534 int nunits
= GET_MODE_NUNITS (TYPE_MODE (vectype
));
535 edge pe
= loop_preheader_edge (loop
);
537 block_stmt_iterator bsi
;
538 tree vec
, vec_init
, vec_step
, t
;
543 tree induction_phi
, induc_def
, new_stmt
, vec_def
, vec_dest
;
544 tree init_expr
, step_expr
;
545 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
548 int ncopies
= vf
/ nunits
;
550 stmt_vec_info phi_info
= vinfo_for_stmt (iv_phi
);
552 gcc_assert (phi_info
);
554 if (STMT_VINFO_VEC_STMT (phi_info
))
556 induction_phi
= STMT_VINFO_VEC_STMT (phi_info
);
557 gcc_assert (TREE_CODE (induction_phi
) == PHI_NODE
);
559 if (vect_print_dump_info (REPORT_DETAILS
))
561 fprintf (vect_dump
, "induction already vectorized:");
562 print_generic_expr (vect_dump
, iv_phi
, TDF_SLIM
);
563 fprintf (vect_dump
, "\n");
564 print_generic_expr (vect_dump
, induction_phi
, TDF_SLIM
);
567 return PHI_RESULT (induction_phi
);
570 gcc_assert (ncopies
>= 1);
572 access_fn
= analyze_scalar_evolution (loop
, PHI_RESULT (iv_phi
));
573 gcc_assert (access_fn
);
574 ok
= vect_is_simple_iv_evolution (loop
->num
, access_fn
, &init_expr
, &step_expr
);
577 /* Create the vector that holds the initial_value of the induction. */
578 new_name
= init_expr
;
580 t
= tree_cons (NULL_TREE
, init_expr
, t
);
581 for (i
= 1; i
< nunits
; i
++)
585 /* Create: new_name = new_name + step_expr */
586 new_var
= vect_get_new_vect_var (scalar_type
, vect_scalar_var
, "var_");
587 add_referenced_var (new_var
);
588 tmp
= fold_build2 (PLUS_EXPR
, scalar_type
, new_name
, step_expr
);
589 init_stmt
= build_gimple_modify_stmt (new_var
, tmp
);
590 new_name
= make_ssa_name (new_var
, init_stmt
);
591 GIMPLE_STMT_OPERAND (init_stmt
, 0) = new_name
;
593 new_bb
= bsi_insert_on_edge_immediate (pe
, init_stmt
);
594 gcc_assert (!new_bb
);
596 if (vect_print_dump_info (REPORT_DETAILS
))
598 fprintf (vect_dump
, "created new init_stmt: ");
599 print_generic_expr (vect_dump
, init_stmt
, TDF_SLIM
);
601 t
= tree_cons (NULL_TREE
, new_name
, t
);
603 vec
= build_constructor_from_list (vectype
, nreverse (t
));
604 vec_init
= vect_init_vector (stmt
, vec
, vectype
);
607 /* Create the vector that holds the step of the induction. */
608 expr
= build_int_cst (scalar_type
, vf
);
609 new_name
= fold_build2 (MULT_EXPR
, scalar_type
, expr
, step_expr
);
611 for (i
= 0; i
< nunits
; i
++)
612 t
= tree_cons (NULL_TREE
, unshare_expr (new_name
), t
);
613 vec
= build_constructor_from_list (vectype
, t
);
614 vec_step
= vect_init_vector (stmt
, vec
, vectype
);
617 /* Create the following def-use cycle:
619 vec_init = [X, X+S, X+2*S, X+3*S]
620 vec_step = [VF*S, VF*S, VF*S, VF*S]
622 vec_iv = PHI <vec_init, vec_loop>
626 vec_loop = vec_iv + vec_step; */
628 /* Create the induction-phi that defines the induction-operand. */
629 vec_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, "vec_iv_");
630 add_referenced_var (vec_dest
);
631 induction_phi
= create_phi_node (vec_dest
, loop
->header
);
632 set_stmt_info (get_stmt_ann (induction_phi
),
633 new_stmt_vec_info (induction_phi
, loop_vinfo
));
634 induc_def
= PHI_RESULT (induction_phi
);
636 /* Create the iv update inside the loop */
637 new_stmt
= build_gimple_modify_stmt (NULL_TREE
,
638 build2 (PLUS_EXPR
, vectype
,
639 induc_def
, vec_step
));
640 vec_def
= make_ssa_name (vec_dest
, new_stmt
);
641 GIMPLE_STMT_OPERAND (new_stmt
, 0) = vec_def
;
642 bsi
= bsi_for_stmt (stmt
);
643 vect_finish_stmt_generation (stmt
, new_stmt
, &bsi
);
645 /* Set the arguments of the phi node: */
646 add_phi_arg (induction_phi
, vec_init
, loop_preheader_edge (loop
));
647 add_phi_arg (induction_phi
, vec_def
, loop_latch_edge (loop
));
650 /* In case the vectorization factor (VF) is bigger than the number
651 of elements that we can fit in a vectype (nunits), we have to generate
652 more than one vector stmt - i.e - we need to "unroll" the
653 vector stmt by a factor VF/nunits. For more details see documentation
654 in vectorizable_operation. */
658 stmt_vec_info prev_stmt_vinfo
;
660 /* Create the vector that holds the step of the induction. */
661 expr
= build_int_cst (scalar_type
, nunits
);
662 new_name
= fold_build2 (MULT_EXPR
, scalar_type
, expr
, step_expr
);
664 for (i
= 0; i
< nunits
; i
++)
665 t
= tree_cons (NULL_TREE
, unshare_expr (new_name
), t
);
666 vec
= build_constructor_from_list (vectype
, t
);
667 vec_step
= vect_init_vector (stmt
, vec
, vectype
);
670 prev_stmt_vinfo
= vinfo_for_stmt (induction_phi
);
671 for (i
= 1; i
< ncopies
; i
++)
675 /* vec_i = vec_prev + vec_{step*nunits} */
676 tmp
= build2 (PLUS_EXPR
, vectype
, vec_def
, vec_step
);
677 new_stmt
= build_gimple_modify_stmt (NULL_TREE
, tmp
);
678 vec_def
= make_ssa_name (vec_dest
, new_stmt
);
679 GIMPLE_STMT_OPERAND (new_stmt
, 0) = vec_def
;
680 bsi
= bsi_for_stmt (stmt
);
681 vect_finish_stmt_generation (stmt
, new_stmt
, &bsi
);
683 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo
) = new_stmt
;
684 prev_stmt_vinfo
= vinfo_for_stmt (new_stmt
);
688 if (vect_print_dump_info (REPORT_DETAILS
))
690 fprintf (vect_dump
, "transform induction: created def-use cycle:");
691 print_generic_expr (vect_dump
, induction_phi
, TDF_SLIM
);
692 fprintf (vect_dump
, "\n");
693 print_generic_expr (vect_dump
, SSA_NAME_DEF_STMT (vec_def
), TDF_SLIM
);
696 STMT_VINFO_VEC_STMT (phi_info
) = induction_phi
;
701 /* Function vect_get_vec_def_for_operand.
703 OP is an operand in STMT. This function returns a (vector) def that will be
704 used in the vectorized stmt for STMT.
706 In the case that OP is an SSA_NAME which is defined in the loop, then
707 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
709 In case OP is an invariant or constant, a new stmt that creates a vector def
710 needs to be introduced. */
713 vect_get_vec_def_for_operand (tree op
, tree stmt
, tree
*scalar_def
)
718 stmt_vec_info def_stmt_info
= NULL
;
719 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
720 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
721 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
722 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
723 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
729 enum vect_def_type dt
;
733 if (vect_print_dump_info (REPORT_DETAILS
))
735 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
736 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
739 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
740 gcc_assert (is_simple_use
);
741 if (vect_print_dump_info (REPORT_DETAILS
))
745 fprintf (vect_dump
, "def = ");
746 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
750 fprintf (vect_dump
, " def_stmt = ");
751 print_generic_expr (vect_dump
, def_stmt
, TDF_SLIM
);
757 /* Case 1: operand is a constant. */
758 case vect_constant_def
:
763 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
764 if (vect_print_dump_info (REPORT_DETAILS
))
765 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
767 for (i
= nunits
- 1; i
>= 0; --i
)
769 t
= tree_cons (NULL_TREE
, op
, t
);
771 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
772 vec_cst
= build_vector (vector_type
, t
);
774 return vect_init_vector (stmt
, vec_cst
, vector_type
);
777 /* Case 2: operand is defined outside the loop - loop invariant. */
778 case vect_invariant_def
:
783 /* Create 'vec_inv = {inv,inv,..,inv}' */
784 if (vect_print_dump_info (REPORT_DETAILS
))
785 fprintf (vect_dump
, "Create vector_inv.");
787 for (i
= nunits
- 1; i
>= 0; --i
)
789 t
= tree_cons (NULL_TREE
, def
, t
);
792 /* FIXME: use build_constructor directly. */
793 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
794 vec_inv
= build_constructor_from_list (vector_type
, t
);
795 return vect_init_vector (stmt
, vec_inv
, vector_type
);
798 /* Case 3: operand is defined inside the loop. */
802 *scalar_def
= def_stmt
;
804 /* Get the def from the vectorized stmt. */
805 def_stmt_info
= vinfo_for_stmt (def_stmt
);
806 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
807 gcc_assert (vec_stmt
);
808 vec_oprnd
= GIMPLE_STMT_OPERAND (vec_stmt
, 0);
812 /* Case 4: operand is defined by a loop header phi - reduction */
813 case vect_reduction_def
:
815 gcc_assert (TREE_CODE (def_stmt
) == PHI_NODE
);
817 /* Get the def before the loop */
818 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
819 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
822 /* Case 5: operand is defined by loop-header phi - induction. */
823 case vect_induction_def
:
825 gcc_assert (TREE_CODE (def_stmt
) == PHI_NODE
);
827 /* Get the def before the loop */
828 return get_initial_def_for_induction (stmt
, def_stmt
);
837 /* Function vect_get_vec_def_for_stmt_copy
839 Return a vector-def for an operand. This function is used when the
840 vectorized stmt to be created (by the caller to this function) is a "copy"
841 created in case the vectorized result cannot fit in one vector, and several
842 copies of the vector-stmt are required. In this case the vector-def is
843 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
844 of the stmt that defines VEC_OPRND.
845 DT is the type of the vector def VEC_OPRND.
848 In case the vectorization factor (VF) is bigger than the number
849 of elements that can fit in a vectype (nunits), we have to generate
850 more than one vector stmt to vectorize the scalar stmt. This situation
851 arises when there are multiple data-types operated upon in the loop; the
852 smallest data-type determines the VF, and as a result, when vectorizing
853 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
854 vector stmt (each computing a vector of 'nunits' results, and together
855 computing 'VF' results in each iteration). This function is called when
856 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
857 which VF=16 and nunits=4, so the number of copies required is 4):
859 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
861 S1: x = load VS1.0: vx.0 = memref0 VS1.1
862 VS1.1: vx.1 = memref1 VS1.2
863 VS1.2: vx.2 = memref2 VS1.3
864 VS1.3: vx.3 = memref3
866 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
867 VSnew.1: vz1 = vx.1 + ... VSnew.2
868 VSnew.2: vz2 = vx.2 + ... VSnew.3
869 VSnew.3: vz3 = vx.3 + ...
871 The vectorization of S1 is explained in vectorizable_load.
872 The vectorization of S2:
873 To create the first vector-stmt out of the 4 copies - VSnew.0 -
874 the function 'vect_get_vec_def_for_operand' is called to
875 get the relevant vector-def for each operand of S2. For operand x it
876 returns the vector-def 'vx.0'.
878 To create the remaining copies of the vector-stmt (VSnew.j), this
879 function is called to get the relevant vector-def for each operand. It is
880 obtained from the respective VS1.j stmt, which is recorded in the
881 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
883 For example, to obtain the vector-def 'vx.1' in order to create the
884 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
885 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
886 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
887 and return its def ('vx.1').
888 Overall, to create the above sequence this function will be called 3 times:
889 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
890 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
891 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
894 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
896 tree vec_stmt_for_operand
;
897 stmt_vec_info def_stmt_info
;
899 /* Do nothing; can reuse same def. */
900 if (dt
== vect_invariant_def
|| dt
== vect_constant_def
)
903 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
904 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
905 gcc_assert (def_stmt_info
);
906 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
907 gcc_assert (vec_stmt_for_operand
);
908 vec_oprnd
= GIMPLE_STMT_OPERAND (vec_stmt_for_operand
, 0);
914 /* Function vect_finish_stmt_generation.
916 Insert a new stmt. */
919 vect_finish_stmt_generation (tree stmt
, tree vec_stmt
,
920 block_stmt_iterator
*bsi
)
922 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
923 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
925 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
926 set_stmt_info (get_stmt_ann (vec_stmt
),
927 new_stmt_vec_info (vec_stmt
, loop_vinfo
));
929 if (vect_print_dump_info (REPORT_DETAILS
))
931 fprintf (vect_dump
, "add new stmt: ");
932 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
935 /* Make sure bsi points to the stmt that is being vectorized. */
936 gcc_assert (stmt
== bsi_stmt (*bsi
));
938 #ifdef USE_MAPPED_LOCATION
939 SET_EXPR_LOCATION (vec_stmt
, EXPR_LOCATION (stmt
));
941 SET_EXPR_LOCUS (vec_stmt
, EXPR_LOCUS (stmt
));
946 #define ADJUST_IN_EPILOG 1
948 /* Function get_initial_def_for_reduction
951 STMT - a stmt that performs a reduction operation in the loop.
952 INIT_VAL - the initial value of the reduction variable
955 SCALAR_DEF - a tree that holds a value to be added to the final result
956 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
957 Return a vector variable, initialized according to the operation that STMT
958 performs. This vector will be used as the initial value of the
959 vector of partial results.
961 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
964 min/max: [init_val,init_val,..,init_val,init_val]
965 bit and/or: [init_val,init_val,..,init_val,init_val]
966 and when necessary (e.g. add/mult case) let the caller know
967 that it needs to adjust the result by init_val.
969 Option2: Initialize the vector as follows:
970 add: [0,0,...,0,init_val]
971 mult: [1,1,...,1,init_val]
972 min/max: [init_val,init_val,...,init_val]
973 bit and/or: [init_val,init_val,...,init_val]
974 and no adjustments are needed.
976 For example, for the following code:
982 STMT is 's = s + a[i]', and the reduction variable is 's'.
983 For a vector of 4 units, we want to return either [0,0,0,init_val],
984 or [0,0,0,0] and let the caller know that it needs to adjust
985 the result at the end by 'init_val'.
987 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
988 TODO: Use some cost-model to estimate which scheme is more profitable.
992 get_initial_def_for_reduction (tree stmt
, tree init_val
, tree
*scalar_def
)
994 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
995 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
996 int nunits
= GET_MODE_NUNITS (TYPE_MODE (vectype
));
998 enum tree_code code
= TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 1));
999 tree type
= TREE_TYPE (init_val
);
1001 tree vec
, t
= NULL_TREE
;
1002 bool need_epilog_adjust
;
1006 gcc_assert (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
));
1010 case WIDEN_SUM_EXPR
:
1013 if (INTEGRAL_TYPE_P (type
))
1014 def
= build_int_cst (type
, 0);
1016 def
= build_real (type
, dconst0
);
1018 #ifdef ADJUST_IN_EPILOG
1019 /* All the 'nunits' elements are set to 0. The final result will be
1020 adjusted by 'init_val' at the loop epilog. */
1022 need_epilog_adjust
= true;
1024 /* 'nunits - 1' elements are set to 0; The last element is set to
1025 'init_val'. No further adjustments at the epilog are needed. */
1026 nelements
= nunits
- 1;
1027 need_epilog_adjust
= false;
1035 need_epilog_adjust
= false;
1042 for (i
= nelements
- 1; i
>= 0; --i
)
1043 t
= tree_cons (NULL_TREE
, def
, t
);
1045 if (nelements
== nunits
- 1)
1047 /* Set the last element of the vector. */
1048 t
= tree_cons (NULL_TREE
, init_val
, t
);
1051 gcc_assert (nelements
== nunits
);
1053 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1054 if (TREE_CODE (init_val
) == INTEGER_CST
|| TREE_CODE (init_val
) == REAL_CST
)
1055 vec
= build_vector (vector_type
, t
);
1057 vec
= build_constructor_from_list (vector_type
, t
);
1059 if (!need_epilog_adjust
)
1060 *scalar_def
= NULL_TREE
;
1062 *scalar_def
= init_val
;
1064 return vect_init_vector (stmt
, vec
, vector_type
);
1068 /* Function vect_create_epilog_for_reduction
1070 Create code at the loop-epilog to finalize the result of a reduction
1073 VECT_DEF is a vector of partial results.
1074 REDUC_CODE is the tree-code for the epilog reduction.
1075 STMT is the scalar reduction stmt that is being vectorized.
1076 REDUCTION_PHI is the phi-node that carries the reduction computation.
1079 1. Creates the reduction def-use cycle: sets the arguments for
1081 The loop-entry argument is the vectorized initial-value of the reduction.
1082 The loop-latch argument is VECT_DEF - the vector of partial sums.
1083 2. "Reduces" the vector of partial results VECT_DEF into a single result,
1084 by applying the operation specified by REDUC_CODE if available, or by
1085 other means (whole-vector shifts or a scalar loop).
1086 The function also creates a new phi node at the loop exit to preserve
1087 loop-closed form, as illustrated below.
1089 The flow at the entry to this function:
1092 vec_def = phi <null, null> # REDUCTION_PHI
1093 VECT_DEF = vector_stmt # vectorized form of STMT
1094 s_loop = scalar_stmt # (scalar) STMT
1096 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1100 The above is transformed by this function into:
1103 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
1104 VECT_DEF = vector_stmt # vectorized form of STMT
1105 s_loop = scalar_stmt # (scalar) STMT
1107 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1108 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1109 v_out2 = reduce <v_out1>
1110 s_out3 = extract_field <v_out2, 0>
1111 s_out4 = adjust_result <s_out3>
1117 vect_create_epilog_for_reduction (tree vect_def
, tree stmt
,
1118 enum tree_code reduc_code
, tree reduction_phi
)
1120 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1122 enum machine_mode mode
;
1123 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1124 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1125 basic_block exit_bb
;
1129 block_stmt_iterator exit_bsi
;
1134 tree new_scalar_dest
, exit_phi
;
1135 tree bitsize
, bitpos
, bytesize
;
1136 enum tree_code code
= TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 1));
1137 tree scalar_initial_def
;
1138 tree vec_initial_def
;
1140 imm_use_iterator imm_iter
;
1141 use_operand_p use_p
;
1142 bool extract_scalar_result
;
1146 tree operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
1149 op_type
= TREE_OPERAND_LENGTH (operation
);
1150 reduction_op
= TREE_OPERAND (operation
, op_type
-1);
1151 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
1152 mode
= TYPE_MODE (vectype
);
1154 /*** 1. Create the reduction def-use cycle ***/
1156 /* 1.1 set the loop-entry arg of the reduction-phi: */
1157 /* For the case of reduction, vect_get_vec_def_for_operand returns
1158 the scalar def before the loop, that defines the initial value
1159 of the reduction variable. */
1160 vec_initial_def
= vect_get_vec_def_for_operand (reduction_op
, stmt
,
1161 &scalar_initial_def
);
1162 add_phi_arg (reduction_phi
, vec_initial_def
, loop_preheader_edge (loop
));
1164 /* 1.2 set the loop-latch arg for the reduction-phi: */
1165 add_phi_arg (reduction_phi
, vect_def
, loop_latch_edge (loop
));
1167 if (vect_print_dump_info (REPORT_DETAILS
))
1169 fprintf (vect_dump
, "transform reduction: created def-use cycle:");
1170 print_generic_expr (vect_dump
, reduction_phi
, TDF_SLIM
);
1171 fprintf (vect_dump
, "\n");
1172 print_generic_expr (vect_dump
, SSA_NAME_DEF_STMT (vect_def
), TDF_SLIM
);
1176 /*** 2. Create epilog code
1177 The reduction epilog code operates across the elements of the vector
1178 of partial results computed by the vectorized loop.
1179 The reduction epilog code consists of:
1180 step 1: compute the scalar result in a vector (v_out2)
1181 step 2: extract the scalar result (s_out3) from the vector (v_out2)
1182 step 3: adjust the scalar result (s_out3) if needed.
1184 Step 1 can be accomplished using one the following three schemes:
1185 (scheme 1) using reduc_code, if available.
1186 (scheme 2) using whole-vector shifts, if available.
1187 (scheme 3) using a scalar loop. In this case steps 1+2 above are
1190 The overall epilog code looks like this:
1192 s_out0 = phi <s_loop> # original EXIT_PHI
1193 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1194 v_out2 = reduce <v_out1> # step 1
1195 s_out3 = extract_field <v_out2, 0> # step 2
1196 s_out4 = adjust_result <s_out3> # step 3
1198 (step 3 is optional, and step2 1 and 2 may be combined).
1199 Lastly, the uses of s_out0 are replaced by s_out4.
1203 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
1204 v_out1 = phi <v_loop> */
1206 exit_bb
= single_exit (loop
)->dest
;
1207 new_phi
= create_phi_node (SSA_NAME_VAR (vect_def
), exit_bb
);
1208 SET_PHI_ARG_DEF (new_phi
, single_exit (loop
)->dest_idx
, vect_def
);
1209 exit_bsi
= bsi_start (exit_bb
);
1211 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
1212 (i.e. when reduc_code is not available) and in the final adjustment code
1213 (if needed). Also get the original scalar reduction variable as
1214 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
1215 represents a reduction pattern), the tree-code and scalar-def are
1216 taken from the original stmt that the pattern-stmt (STMT) replaces.
1217 Otherwise (it is a regular reduction) - the tree-code and scalar-def
1218 are taken from STMT. */
1220 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
1223 /* Regular reduction */
1228 /* Reduction pattern */
1229 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt
);
1230 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
));
1231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
1233 code
= TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt
, 1));
1234 scalar_dest
= GIMPLE_STMT_OPERAND (orig_stmt
, 0);
1235 scalar_type
= TREE_TYPE (scalar_dest
);
1236 new_scalar_dest
= vect_create_destination_var (scalar_dest
, NULL
);
1237 bitsize
= TYPE_SIZE (scalar_type
);
1238 bytesize
= TYPE_SIZE_UNIT (scalar_type
);
1240 /* 2.3 Create the reduction code, using one of the three schemes described
1243 if (reduc_code
< NUM_TREE_CODES
)
1247 /*** Case 1: Create:
1248 v_out2 = reduc_expr <v_out1> */
1250 if (vect_print_dump_info (REPORT_DETAILS
))
1251 fprintf (vect_dump
, "Reduce using direct vector reduction.");
1253 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1254 tmp
= build1 (reduc_code
, vectype
, PHI_RESULT (new_phi
));
1255 epilog_stmt
= build_gimple_modify_stmt (vec_dest
, tmp
);
1256 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
1257 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
1258 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1260 extract_scalar_result
= true;
1264 enum tree_code shift_code
= 0;
1265 bool have_whole_vector_shift
= true;
1267 int element_bitsize
= tree_low_cst (bitsize
, 1);
1268 int vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
1271 if (vec_shr_optab
->handlers
[mode
].insn_code
!= CODE_FOR_nothing
)
1272 shift_code
= VEC_RSHIFT_EXPR
;
1274 have_whole_vector_shift
= false;
1276 /* Regardless of whether we have a whole vector shift, if we're
1277 emulating the operation via tree-vect-generic, we don't want
1278 to use it. Only the first round of the reduction is likely
1279 to still be profitable via emulation. */
1280 /* ??? It might be better to emit a reduction tree code here, so that
1281 tree-vect-generic can expand the first round via bit tricks. */
1282 if (!VECTOR_MODE_P (mode
))
1283 have_whole_vector_shift
= false;
1286 optab optab
= optab_for_tree_code (code
, vectype
);
1287 if (optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
1288 have_whole_vector_shift
= false;
1291 if (have_whole_vector_shift
)
1293 /*** Case 2: Create:
1294 for (offset = VS/2; offset >= element_size; offset/=2)
1296 Create: va' = vec_shift <va, offset>
1297 Create: va = vop <va, va'>
1300 if (vect_print_dump_info (REPORT_DETAILS
))
1301 fprintf (vect_dump
, "Reduce using vector shifts");
1303 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1304 new_temp
= PHI_RESULT (new_phi
);
1306 for (bit_offset
= vec_size_in_bits
/2;
1307 bit_offset
>= element_bitsize
;
1310 tree bitpos
= size_int (bit_offset
);
1311 tree tmp
= build2 (shift_code
, vectype
, new_temp
, bitpos
);
1312 epilog_stmt
= build_gimple_modify_stmt (vec_dest
, tmp
);
1313 new_name
= make_ssa_name (vec_dest
, epilog_stmt
);
1314 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_name
;
1315 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1317 tmp
= build2 (code
, vectype
, new_name
, new_temp
);
1318 epilog_stmt
= build_gimple_modify_stmt (vec_dest
, tmp
);
1319 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
1320 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
1321 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1324 extract_scalar_result
= true;
1330 /*** Case 3: Create:
1331 s = extract_field <v_out2, 0>
1332 for (offset = element_size;
1333 offset < vector_size;
1334 offset += element_size;)
1336 Create: s' = extract_field <v_out2, offset>
1337 Create: s = op <s, s'>
1340 if (vect_print_dump_info (REPORT_DETAILS
))
1341 fprintf (vect_dump
, "Reduce using scalar code. ");
1343 vec_temp
= PHI_RESULT (new_phi
);
1344 vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
1345 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
1347 BIT_FIELD_REF_UNSIGNED (rhs
) = TYPE_UNSIGNED (scalar_type
);
1348 epilog_stmt
= build_gimple_modify_stmt (new_scalar_dest
, rhs
);
1349 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1350 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
1351 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1353 for (bit_offset
= element_bitsize
;
1354 bit_offset
< vec_size_in_bits
;
1355 bit_offset
+= element_bitsize
)
1358 tree bitpos
= bitsize_int (bit_offset
);
1359 tree rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
1362 BIT_FIELD_REF_UNSIGNED (rhs
) = TYPE_UNSIGNED (scalar_type
);
1363 epilog_stmt
= build_gimple_modify_stmt (new_scalar_dest
, rhs
);
1364 new_name
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1365 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_name
;
1366 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1368 tmp
= build2 (code
, scalar_type
, new_name
, new_temp
);
1369 epilog_stmt
= build_gimple_modify_stmt (new_scalar_dest
, tmp
);
1370 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1371 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
1372 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1375 extract_scalar_result
= false;
1379 /* 2.4 Extract the final scalar result. Create:
1380 s_out3 = extract_field <v_out2, bitpos> */
1382 if (extract_scalar_result
)
1386 if (vect_print_dump_info (REPORT_DETAILS
))
1387 fprintf (vect_dump
, "extract scalar result");
1389 if (BYTES_BIG_ENDIAN
)
1390 bitpos
= size_binop (MULT_EXPR
,
1391 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1),
1392 TYPE_SIZE (scalar_type
));
1394 bitpos
= bitsize_zero_node
;
1396 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, new_temp
, bitsize
, bitpos
);
1397 BIT_FIELD_REF_UNSIGNED (rhs
) = TYPE_UNSIGNED (scalar_type
);
1398 epilog_stmt
= build_gimple_modify_stmt (new_scalar_dest
, rhs
);
1399 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1400 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
1401 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1404 /* 2.4 Adjust the final result by the initial value of the reduction
1405 variable. (When such adjustment is not needed, then
1406 'scalar_initial_def' is zero).
1409 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1411 if (scalar_initial_def
)
1413 tree tmp
= build2 (code
, scalar_type
, new_temp
, scalar_initial_def
);
1414 epilog_stmt
= build_gimple_modify_stmt (new_scalar_dest
, tmp
);
1415 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1416 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
1417 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1420 /* 2.6 Replace uses of s_out0 with uses of s_out3 */
1422 /* Find the loop-closed-use at the loop exit of the original scalar result.
1423 (The reduction result is expected to have two immediate uses - one at the
1424 latch block, and one at the loop exit). */
1426 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
1428 if (!flow_bb_inside_loop_p (loop
, bb_for_stmt (USE_STMT (use_p
))))
1430 exit_phi
= USE_STMT (use_p
);
1434 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1435 gcc_assert (exit_phi
);
1436 /* Replace the uses: */
1437 orig_name
= PHI_RESULT (exit_phi
);
1438 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, orig_name
)
1439 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
1440 SET_USE (use_p
, new_temp
);
1444 /* Function vectorizable_reduction.
1446 Check if STMT performs a reduction operation that can be vectorized.
1447 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1448 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1449 Return FALSE if not a vectorizable STMT, TRUE otherwise.
1451 This function also handles reduction idioms (patterns) that have been
1452 recognized in advance during vect_pattern_recog. In this case, STMT may be
1454 X = pattern_expr (arg0, arg1, ..., X)
1455 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1456 sequence that had been detected and replaced by the pattern-stmt (STMT).
1458 In some cases of reduction patterns, the type of the reduction variable X is
1459 different than the type of the other arguments of STMT.
1460 In such cases, the vectype that is used when transforming STMT into a vector
1461 stmt is different than the vectype that is used to determine the
1462 vectorization factor, because it consists of a different number of elements
1463 than the actual number of elements that are being operated upon in parallel.
1465 For example, consider an accumulation of shorts into an int accumulator.
1466 On some targets it's possible to vectorize this pattern operating on 8
1467 shorts at a time (hence, the vectype for purposes of determining the
1468 vectorization factor should be V8HI); on the other hand, the vectype that
1469 is used to create the vector form is actually V4SI (the type of the result).
1471 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1472 indicates what is the actual level of parallelism (V8HI in the example), so
1473 that the right vectorization factor would be derived. This vectype
1474 corresponds to the type of arguments to the reduction stmt, and should *NOT*
1475 be used to create the vectorized stmt. The right vectype for the vectorized
1476 stmt is obtained from the type of the result X:
1477 get_vectype_for_scalar_type (TREE_TYPE (X))
1479 This means that, contrary to "regular" reductions (or "regular" stmts in
1480 general), the following equation:
1481 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1482 does *NOT* necessarily hold for reduction patterns. */
1485 vectorizable_reduction (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1490 tree loop_vec_def0
= NULL_TREE
, loop_vec_def1
= NULL_TREE
;
1491 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1492 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1493 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1494 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1496 enum tree_code code
, orig_code
, epilog_reduc_code
= 0;
1497 enum machine_mode vec_mode
;
1499 optab optab
, reduc_optab
;
1500 tree new_temp
= NULL_TREE
;
1502 enum vect_def_type dt
;
1507 stmt_vec_info orig_stmt_info
;
1508 tree expr
= NULL_TREE
;
1510 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1511 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1512 stmt_vec_info prev_stmt_info
;
1514 tree new_stmt
= NULL_TREE
;
1517 gcc_assert (ncopies
>= 1);
1519 /* 1. Is vectorizable reduction? */
1521 /* Not supportable if the reduction variable is used in the loop. */
1522 if (STMT_VINFO_RELEVANT_P (stmt_info
))
1525 if (!STMT_VINFO_LIVE_P (stmt_info
))
1528 /* Make sure it was already recognized as a reduction computation. */
1529 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
1532 /* 2. Has this been recognized as a reduction pattern?
1534 Check if STMT represents a pattern that has been recognized
1535 in earlier analysis stages. For stmts that represent a pattern,
1536 the STMT_VINFO_RELATED_STMT field records the last stmt in
1537 the original sequence that constitutes the pattern. */
1539 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
1542 orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
1543 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
) == stmt
);
1544 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info
));
1545 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info
));
1548 /* 3. Check the operands of the operation. The first operands are defined
1549 inside the loop body. The last operand is the reduction variable,
1550 which is defined by the loop-header-phi. */
1552 gcc_assert (TREE_CODE (stmt
) == GIMPLE_MODIFY_STMT
);
1554 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
1555 code
= TREE_CODE (operation
);
1556 op_type
= TREE_OPERAND_LENGTH (operation
);
1557 if (op_type
!= binary_op
&& op_type
!= ternary_op
)
1559 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
1560 scalar_type
= TREE_TYPE (scalar_dest
);
1562 /* All uses but the last are expected to be defined in the loop.
1563 The last use is the reduction variable. */
1564 for (i
= 0; i
< op_type
-1; i
++)
1566 op
= TREE_OPERAND (operation
, i
);
1567 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
1568 gcc_assert (is_simple_use
);
1569 if (dt
!= vect_loop_def
1570 && dt
!= vect_invariant_def
1571 && dt
!= vect_constant_def
1572 && dt
!= vect_induction_def
)
1576 op
= TREE_OPERAND (operation
, i
);
1577 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
1578 gcc_assert (is_simple_use
);
1579 gcc_assert (dt
== vect_reduction_def
);
1580 gcc_assert (TREE_CODE (def_stmt
) == PHI_NODE
);
1582 gcc_assert (orig_stmt
== vect_is_simple_reduction (loop
, def_stmt
));
1584 gcc_assert (stmt
== vect_is_simple_reduction (loop
, def_stmt
));
1586 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
1589 /* 4. Supportable by target? */
1591 /* 4.1. check support for the operation in the loop */
1592 optab
= optab_for_tree_code (code
, vectype
);
1595 if (vect_print_dump_info (REPORT_DETAILS
))
1596 fprintf (vect_dump
, "no optab.");
1599 vec_mode
= TYPE_MODE (vectype
);
1600 if (optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
1602 if (vect_print_dump_info (REPORT_DETAILS
))
1603 fprintf (vect_dump
, "op not supported by target.");
1604 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
1605 || LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1606 < vect_min_worthwhile_factor (code
))
1608 if (vect_print_dump_info (REPORT_DETAILS
))
1609 fprintf (vect_dump
, "proceeding using word mode.");
1612 /* Worthwhile without SIMD support? */
1613 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1614 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1615 < vect_min_worthwhile_factor (code
))
1617 if (vect_print_dump_info (REPORT_DETAILS
))
1618 fprintf (vect_dump
, "not worthwhile without SIMD support.");
1622 /* 4.2. Check support for the epilog operation.
1624 If STMT represents a reduction pattern, then the type of the
1625 reduction variable may be different than the type of the rest
1626 of the arguments. For example, consider the case of accumulation
1627 of shorts into an int accumulator; The original code:
1628 S1: int_a = (int) short_a;
1629 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
1632 STMT: int_acc = widen_sum <short_a, int_acc>
1635 1. The tree-code that is used to create the vector operation in the
1636 epilog code (that reduces the partial results) is not the
1637 tree-code of STMT, but is rather the tree-code of the original
1638 stmt from the pattern that STMT is replacing. I.e, in the example
1639 above we want to use 'widen_sum' in the loop, but 'plus' in the
1641 2. The type (mode) we use to check available target support
1642 for the vector operation to be created in the *epilog*, is
1643 determined by the type of the reduction variable (in the example
1644 above we'd check this: plus_optab[vect_int_mode]).
1645 However the type (mode) we use to check available target support
1646 for the vector operation to be created *inside the loop*, is
1647 determined by the type of the other arguments to STMT (in the
1648 example we'd check this: widen_sum_optab[vect_short_mode]).
1650 This is contrary to "regular" reductions, in which the types of all
1651 the arguments are the same as the type of the reduction variable.
1652 For "regular" reductions we can therefore use the same vector type
1653 (and also the same tree-code) when generating the epilog code and
1654 when generating the code inside the loop. */
1658 /* This is a reduction pattern: get the vectype from the type of the
1659 reduction variable, and get the tree-code from orig_stmt. */
1660 orig_code
= TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt
, 1));
1661 vectype
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1662 vec_mode
= TYPE_MODE (vectype
);
1666 /* Regular reduction: use the same vectype and tree-code as used for
1667 the vector code inside the loop can be used for the epilog code. */
1671 if (!reduction_code_for_scalar_code (orig_code
, &epilog_reduc_code
))
1673 reduc_optab
= optab_for_tree_code (epilog_reduc_code
, vectype
);
1676 if (vect_print_dump_info (REPORT_DETAILS
))
1677 fprintf (vect_dump
, "no optab for reduction.");
1678 epilog_reduc_code
= NUM_TREE_CODES
;
1680 if (reduc_optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
1682 if (vect_print_dump_info (REPORT_DETAILS
))
1683 fprintf (vect_dump
, "reduc op not supported by target.");
1684 epilog_reduc_code
= NUM_TREE_CODES
;
1687 if (!vec_stmt
) /* transformation not required. */
1689 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
1695 if (vect_print_dump_info (REPORT_DETAILS
))
1696 fprintf (vect_dump
, "transform reduction.");
1698 /* Create the destination vector */
1699 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1701 /* Create the reduction-phi that defines the reduction-operand. */
1702 new_phi
= create_phi_node (vec_dest
, loop
->header
);
1704 /* In case the vectorization factor (VF) is bigger than the number
1705 of elements that we can fit in a vectype (nunits), we have to generate
1706 more than one vector stmt - i.e - we need to "unroll" the
1707 vector stmt by a factor VF/nunits. For more details see documentation
1708 in vectorizable_operation. */
1710 prev_stmt_info
= NULL
;
1711 for (j
= 0; j
< ncopies
; j
++)
1716 op
= TREE_OPERAND (operation
, 0);
1717 loop_vec_def0
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1718 if (op_type
== ternary_op
)
1720 op
= TREE_OPERAND (operation
, 1);
1721 loop_vec_def1
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1724 /* Get the vector def for the reduction variable from the phi node */
1725 reduc_def
= PHI_RESULT (new_phi
);
1729 enum vect_def_type dt
= vect_unknown_def_type
; /* Dummy */
1730 loop_vec_def0
= vect_get_vec_def_for_stmt_copy (dt
, loop_vec_def0
);
1731 if (op_type
== ternary_op
)
1732 loop_vec_def1
= vect_get_vec_def_for_stmt_copy (dt
, loop_vec_def1
);
1734 /* Get the vector def for the reduction variable from the vectorized
1735 reduction operation generated in the previous iteration (j-1) */
1736 reduc_def
= GIMPLE_STMT_OPERAND (new_stmt
,0);
1739 /* Arguments are ready. create the new vector stmt. */
1741 if (op_type
== binary_op
)
1742 expr
= build2 (code
, vectype
, loop_vec_def0
, reduc_def
);
1744 expr
= build3 (code
, vectype
, loop_vec_def0
, loop_vec_def1
,
1746 new_stmt
= build_gimple_modify_stmt (vec_dest
, expr
);
1747 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1748 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
1749 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1752 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1754 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1755 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1758 /* Finalize the reduction-phi (set it's arguments) and create the
1759 epilog reduction code. */
1760 vect_create_epilog_for_reduction (new_temp
, stmt
, epilog_reduc_code
, new_phi
);
1764 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1765 a function declaration if the target has a vectorized version
1766 of the function, or NULL_TREE if the function cannot be vectorized. */
1769 vectorizable_function (tree call
, tree vectype_out
, tree vectype_in
)
1771 tree fndecl
= get_callee_fndecl (call
);
1772 enum built_in_function code
;
1774 /* We only handle functions that do not read or clobber memory -- i.e.
1775 const or novops ones. */
1776 if (!(call_expr_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1780 || TREE_CODE (fndecl
) != FUNCTION_DECL
1781 || !DECL_BUILT_IN (fndecl
))
1784 code
= DECL_FUNCTION_CODE (fndecl
);
1785 return targetm
.vectorize
.builtin_vectorized_function (code
, vectype_out
,
1789 /* Function vectorizable_call.
1791 Check if STMT performs a function call that can be vectorized.
1792 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1793 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1794 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1797 vectorizable_call (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1803 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
1804 tree vectype_out
, vectype_in
;
1805 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1806 tree fndecl
, rhs
, new_temp
, def
, def_stmt
, rhs_type
, lhs_type
;
1807 enum vect_def_type dt
[2];
1808 int ncopies
, j
, nargs
;
1809 call_expr_arg_iterator iter
;
1811 /* Is STMT a vectorizable call? */
1812 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
1815 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) != SSA_NAME
)
1818 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
1819 if (TREE_CODE (operation
) != CALL_EXPR
)
1822 /* Process function arguments. */
1823 rhs_type
= NULL_TREE
;
1825 FOR_EACH_CALL_EXPR_ARG (op
, iter
, operation
)
1829 /* Bail out if the function has more than two arguments, we
1830 do not have interesting builtin functions to vectorize with
1831 more than two arguments. */
1835 /* We can only handle calls with arguments of the same type. */
1837 && rhs_type
!= TREE_TYPE (op
))
1839 if (vect_print_dump_info (REPORT_DETAILS
))
1840 fprintf (vect_dump
, "argument types differ.");
1843 rhs_type
= TREE_TYPE (op
);
1845 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
[nargs
-1]))
1847 if (vect_print_dump_info (REPORT_DETAILS
))
1848 fprintf (vect_dump
, "use not simple.");
1853 /* No arguments is also not good. */
1857 vectype_in
= get_vectype_for_scalar_type (rhs_type
);
1859 lhs_type
= TREE_TYPE (GIMPLE_STMT_OPERAND (stmt
, 0));
1860 vectype_out
= get_vectype_for_scalar_type (lhs_type
);
1862 /* Only handle the case of vectors with the same number of elements.
1863 FIXME: We need a way to handle for example the SSE2 cvtpd2dq
1864 instruction which converts V2DFmode to V4SImode but only
1865 using the lower half of the V4SImode result. */
1866 if (TYPE_VECTOR_SUBPARTS (vectype_in
) != TYPE_VECTOR_SUBPARTS (vectype_out
))
1869 /* For now, we only vectorize functions if a target specific builtin
1870 is available. TODO -- in some cases, it might be profitable to
1871 insert the calls for pieces of the vector, in order to be able
1872 to vectorize other operations in the loop. */
1873 fndecl
= vectorizable_function (operation
, vectype_out
, vectype_in
);
1874 if (fndecl
== NULL_TREE
)
1876 if (vect_print_dump_info (REPORT_DETAILS
))
1877 fprintf (vect_dump
, "function is not vectorizable.");
1882 gcc_assert (ZERO_SSA_OPERANDS (stmt
, SSA_OP_ALL_VIRTUALS
));
1884 if (!vec_stmt
) /* transformation not required. */
1886 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1892 if (vect_print_dump_info (REPORT_DETAILS
))
1893 fprintf (vect_dump
, "transform operation.");
1895 ncopies
= (LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1896 / TYPE_VECTOR_SUBPARTS (vectype_out
));
1897 gcc_assert (ncopies
>= 1);
1900 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
1901 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1903 prev_stmt_info
= NULL
;
1904 for (j
= 0; j
< ncopies
; ++j
)
1906 tree new_stmt
, vargs
;
1910 /* Build argument list for the vectorized call. */
1911 /* FIXME: Rewrite this so that it doesn't construct a temporary
1915 FOR_EACH_CALL_EXPR_ARG (op
, iter
, operation
)
1920 vec_oprnd
[n
] = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1922 vec_oprnd
[n
] = vect_get_vec_def_for_stmt_copy (dt
[n
], vec_oprnd
[n
]);
1924 vargs
= tree_cons (NULL_TREE
, vec_oprnd
[n
], vargs
);
1926 vargs
= nreverse (vargs
);
1928 rhs
= build_function_call_expr (fndecl
, vargs
);
1929 new_stmt
= build_gimple_modify_stmt (vec_dest
, rhs
);
1930 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1931 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
1933 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1936 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1938 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1939 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1942 /* The call in STMT might prevent it from being removed in dce. We however
1943 cannot remove it here, due to the way the ssa name it defines is mapped
1944 to the new definition. So just replace rhs of the statement with something
1946 type
= TREE_TYPE (scalar_dest
);
1947 GIMPLE_STMT_OPERAND (stmt
, 1) = fold_convert (type
, integer_zero_node
);
1953 /* Function vectorizable_conversion.
1955 Check if STMT performs a conversion operation, that can be vectorized.
1956 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1957 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1958 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1961 vectorizable_conversion (tree stmt
, block_stmt_iterator
* bsi
,
1968 tree vec_oprnd0
= NULL_TREE
;
1969 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1970 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1971 enum tree_code code
;
1974 enum vect_def_type dt0
;
1979 tree vectype_out
, vectype_in
;
1980 tree rhs_type
, lhs_type
;
1982 stmt_vec_info prev_stmt_info
;
1984 /* Is STMT a vectorizable conversion? */
1986 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1989 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
1991 if (STMT_VINFO_LIVE_P (stmt_info
))
1993 /* FORNOW: not yet supported. */
1994 if (vect_print_dump_info (REPORT_DETAILS
))
1995 fprintf (vect_dump
, "value used after loop.");
1999 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
2002 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) != SSA_NAME
)
2005 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
2006 code
= TREE_CODE (operation
);
2007 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2010 /* Check types of lhs and rhs */
2011 op0
= TREE_OPERAND (operation
, 0);
2012 rhs_type
= TREE_TYPE (op0
);
2013 vectype_in
= get_vectype_for_scalar_type (rhs_type
);
2014 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2016 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
2017 lhs_type
= TREE_TYPE (scalar_dest
);
2018 vectype_out
= get_vectype_for_scalar_type (lhs_type
);
2019 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
) == vectype_out
);
2020 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2022 /* FORNOW: need to extend to support short<->float conversions as well. */
2023 if (nunits_out
!= nunits_in
)
2026 /* Bail out if the types are both integral or non-integral */
2027 if ((INTEGRAL_TYPE_P (rhs_type
) && INTEGRAL_TYPE_P (lhs_type
))
2028 || (!INTEGRAL_TYPE_P (rhs_type
) && !INTEGRAL_TYPE_P (lhs_type
)))
2031 /* Sanity check: make sure that at least one copy of the vectorized stmt
2032 needs to be generated. */
2033 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2034 gcc_assert (ncopies
>= 1);
2036 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt0
))
2038 if (vect_print_dump_info (REPORT_DETAILS
))
2039 fprintf (vect_dump
, "use not simple.");
2043 /* Supportable by target? */
2044 if (!targetm
.vectorize
.builtin_conversion (code
, vectype_in
))
2046 if (vect_print_dump_info (REPORT_DETAILS
))
2047 fprintf (vect_dump
, "op not supported by target.");
2051 if (!vec_stmt
) /* transformation not required. */
2053 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
2059 if (vect_print_dump_info (REPORT_DETAILS
))
2060 fprintf (vect_dump
, "transform conversion.");
2063 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2065 prev_stmt_info
= NULL
;
2066 for (j
= 0; j
< ncopies
; j
++)
2072 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2074 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt0
, vec_oprnd0
);
2077 targetm
.vectorize
.builtin_conversion (code
, vectype_in
);
2078 new_stmt
= build_call_expr (builtin_decl
, 1, vec_oprnd0
);
2080 /* Arguments are ready. create the new vector stmt. */
2081 new_stmt
= build_gimple_modify_stmt (vec_dest
, new_stmt
);
2082 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2083 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
2084 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
2085 FOR_EACH_SSA_TREE_OPERAND (sym
, new_stmt
, iter
, SSA_OP_ALL_VIRTUALS
)
2087 if (TREE_CODE (sym
) == SSA_NAME
)
2088 sym
= SSA_NAME_VAR (sym
);
2089 mark_sym_for_renaming (sym
);
2093 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2095 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2096 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2102 /* Function vectorizable_assignment.
2104 Check if STMT performs an assignment (copy) that can be vectorized.
2105 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2106 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2107 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2110 vectorizable_assignment (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
2116 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2117 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2118 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2121 enum vect_def_type dt
;
2122 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2123 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2125 gcc_assert (ncopies
>= 1);
2127 return false; /* FORNOW */
2129 /* Is vectorizable assignment? */
2130 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2133 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
2135 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
2138 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
2139 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2142 op
= GIMPLE_STMT_OPERAND (stmt
, 1);
2143 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
2145 if (vect_print_dump_info (REPORT_DETAILS
))
2146 fprintf (vect_dump
, "use not simple.");
2150 if (!vec_stmt
) /* transformation not required. */
2152 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
2157 if (vect_print_dump_info (REPORT_DETAILS
))
2158 fprintf (vect_dump
, "transform assignment.");
2161 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2164 op
= GIMPLE_STMT_OPERAND (stmt
, 1);
2165 vec_oprnd
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2167 /* Arguments are ready. create the new vector stmt. */
2168 *vec_stmt
= build_gimple_modify_stmt (vec_dest
, vec_oprnd
);
2169 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
2170 GIMPLE_STMT_OPERAND (*vec_stmt
, 0) = new_temp
;
2171 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
2177 /* Function vect_min_worthwhile_factor.
2179 For a loop where we could vectorize the operation indicated by CODE,
2180 return the minimum vectorization factor that makes it worthwhile
2181 to use generic vectors. */
2183 vect_min_worthwhile_factor (enum tree_code code
)
2204 /* Function vectorizable_operation.
2206 Check if STMT performs a binary or unary operation that can be vectorized.
2207 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2208 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2209 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2212 vectorizable_operation (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
2217 tree op0
, op1
= NULL
;
2218 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2219 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2220 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2221 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2222 enum tree_code code
;
2223 enum machine_mode vec_mode
;
2228 enum machine_mode optab_op2_mode
;
2230 enum vect_def_type dt0
, dt1
;
2232 stmt_vec_info prev_stmt_info
;
2233 int nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
2236 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2239 gcc_assert (ncopies
>= 1);
2241 /* Is STMT a vectorizable binary/unary operation? */
2242 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2245 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
2247 if (STMT_VINFO_LIVE_P (stmt_info
))
2249 /* FORNOW: not yet supported. */
2250 if (vect_print_dump_info (REPORT_DETAILS
))
2251 fprintf (vect_dump
, "value used after loop.");
2255 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
2258 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) != SSA_NAME
)
2261 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
2262 vectype_out
= get_vectype_for_scalar_type (TREE_TYPE (scalar_dest
));
2263 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2264 if (nunits_out
!= nunits_in
)
2267 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
2268 code
= TREE_CODE (operation
);
2269 optab
= optab_for_tree_code (code
, vectype
);
2271 /* Support only unary or binary operations. */
2272 op_type
= TREE_OPERAND_LENGTH (operation
);
2273 if (op_type
!= unary_op
&& op_type
!= binary_op
)
2275 if (vect_print_dump_info (REPORT_DETAILS
))
2276 fprintf (vect_dump
, "num. args = %d (not unary/binary op).", op_type
);
2280 op0
= TREE_OPERAND (operation
, 0);
2281 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt0
))
2283 if (vect_print_dump_info (REPORT_DETAILS
))
2284 fprintf (vect_dump
, "use not simple.");
2288 if (op_type
== binary_op
)
2290 op1
= TREE_OPERAND (operation
, 1);
2291 if (!vect_is_simple_use (op1
, loop_vinfo
, &def_stmt
, &def
, &dt1
))
2293 if (vect_print_dump_info (REPORT_DETAILS
))
2294 fprintf (vect_dump
, "use not simple.");
2299 /* Supportable by target? */
2302 if (vect_print_dump_info (REPORT_DETAILS
))
2303 fprintf (vect_dump
, "no optab.");
2306 vec_mode
= TYPE_MODE (vectype
);
2307 icode
= (int) optab
->handlers
[(int) vec_mode
].insn_code
;
2308 if (icode
== CODE_FOR_nothing
)
2310 if (vect_print_dump_info (REPORT_DETAILS
))
2311 fprintf (vect_dump
, "op not supported by target.");
2312 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
2313 || LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2314 < vect_min_worthwhile_factor (code
))
2316 if (vect_print_dump_info (REPORT_DETAILS
))
2317 fprintf (vect_dump
, "proceeding using word mode.");
2320 /* Worthwhile without SIMD support? */
2321 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
2322 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2323 < vect_min_worthwhile_factor (code
))
2325 if (vect_print_dump_info (REPORT_DETAILS
))
2326 fprintf (vect_dump
, "not worthwhile without SIMD support.");
2330 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
)
2332 /* FORNOW: not yet supported. */
2333 if (!VECTOR_MODE_P (vec_mode
))
2336 /* Invariant argument is needed for a vector shift
2337 by a scalar shift operand. */
2338 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
2339 if (! (VECTOR_MODE_P (optab_op2_mode
)
2340 || dt1
== vect_constant_def
2341 || dt1
== vect_invariant_def
))
2343 if (vect_print_dump_info (REPORT_DETAILS
))
2344 fprintf (vect_dump
, "operand mode requires invariant argument.");
2349 if (!vec_stmt
) /* transformation not required. */
2351 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
2357 if (vect_print_dump_info (REPORT_DETAILS
))
2358 fprintf (vect_dump
, "transform binary/unary operation.");
2361 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2363 /* In case the vectorization factor (VF) is bigger than the number
2364 of elements that we can fit in a vectype (nunits), we have to generate
2365 more than one vector stmt - i.e - we need to "unroll" the
2366 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2367 from one copy of the vector stmt to the next, in the field
2368 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2369 stages to find the correct vector defs to be used when vectorizing
2370 stmts that use the defs of the current stmt. The example below illustrates
2371 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2372 4 vectorized stmts):
2374 before vectorization:
2375 RELATED_STMT VEC_STMT
2379 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2381 RELATED_STMT VEC_STMT
2382 VS1_0: vx0 = memref0 VS1_1 -
2383 VS1_1: vx1 = memref1 VS1_2 -
2384 VS1_2: vx2 = memref2 VS1_3 -
2385 VS1_3: vx3 = memref3 - -
2386 S1: x = load - VS1_0
2389 step2: vectorize stmt S2 (done here):
2390 To vectorize stmt S2 we first need to find the relevant vector
2391 def for the first operand 'x'. This is, as usual, obtained from
2392 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2393 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2394 relevant vector def 'vx0'. Having found 'vx0' we can generate
2395 the vector stmt VS2_0, and as usual, record it in the
2396 STMT_VINFO_VEC_STMT of stmt S2.
2397 When creating the second copy (VS2_1), we obtain the relevant vector
2398 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2399 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2400 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2401 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2402 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2403 chain of stmts and pointers:
2404 RELATED_STMT VEC_STMT
2405 VS1_0: vx0 = memref0 VS1_1 -
2406 VS1_1: vx1 = memref1 VS1_2 -
2407 VS1_2: vx2 = memref2 VS1_3 -
2408 VS1_3: vx3 = memref3 - -
2409 S1: x = load - VS1_0
2410 VS2_0: vz0 = vx0 + v1 VS2_1 -
2411 VS2_1: vz1 = vx1 + v1 VS2_2 -
2412 VS2_2: vz2 = vx2 + v1 VS2_3 -
2413 VS2_3: vz3 = vx3 + v1 - -
2414 S2: z = x + 1 - VS2_0 */
2416 prev_stmt_info
= NULL
;
2417 for (j
= 0; j
< ncopies
; j
++)
2422 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2423 if (op_type
== binary_op
)
2425 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
)
2427 /* Vector shl and shr insn patterns can be defined with
2428 scalar operand 2 (shift operand). In this case, use
2429 constant or loop invariant op1 directly, without
2430 extending it to vector mode first. */
2431 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
2432 if (!VECTOR_MODE_P (optab_op2_mode
))
2434 if (vect_print_dump_info (REPORT_DETAILS
))
2435 fprintf (vect_dump
, "operand 1 using scalar mode.");
2440 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
2445 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt0
, vec_oprnd0
);
2446 if (op_type
== binary_op
)
2447 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt1
, vec_oprnd1
);
2450 /* Arguments are ready. create the new vector stmt. */
2452 if (op_type
== binary_op
)
2453 new_stmt
= build_gimple_modify_stmt (vec_dest
,
2454 build2 (code
, vectype
, vec_oprnd0
, vec_oprnd1
));
2456 new_stmt
= build_gimple_modify_stmt (vec_dest
,
2457 build1 (code
, vectype
, vec_oprnd0
));
2458 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2459 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
2460 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
2463 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2465 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2466 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2473 /* Function vectorizable_type_demotion
2475 Check if STMT performs a binary or unary operation that involves
2476 type demotion, and if it can be vectorized.
2477 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2478 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2479 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2482 vectorizable_type_demotion (tree stmt
, block_stmt_iterator
*bsi
,
2489 tree vec_oprnd0
=NULL
, vec_oprnd1
=NULL
;
2490 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2491 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2492 enum tree_code code
;
2495 enum vect_def_type dt0
;
2497 stmt_vec_info prev_stmt_info
;
2507 enum machine_mode vec_mode
;
2509 /* Is STMT a vectorizable type-demotion operation? */
2511 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2514 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
2516 if (STMT_VINFO_LIVE_P (stmt_info
))
2518 /* FORNOW: not yet supported. */
2519 if (vect_print_dump_info (REPORT_DETAILS
))
2520 fprintf (vect_dump
, "value used after loop.");
2524 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
2527 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) != SSA_NAME
)
2530 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
2531 code
= TREE_CODE (operation
);
2532 if (code
!= NOP_EXPR
&& code
!= CONVERT_EXPR
)
2535 op0
= TREE_OPERAND (operation
, 0);
2536 vectype_in
= get_vectype_for_scalar_type (TREE_TYPE (op0
));
2537 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2539 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
2540 scalar_type
= TREE_TYPE (scalar_dest
);
2541 vectype_out
= get_vectype_for_scalar_type (scalar_type
);
2542 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2543 if (nunits_in
!= nunits_out
/ 2) /* FORNOW */
2546 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2547 gcc_assert (ncopies
>= 1);
2549 if (! INTEGRAL_TYPE_P (scalar_type
)
2550 || !INTEGRAL_TYPE_P (TREE_TYPE (op0
)))
2553 /* Check the operands of the operation. */
2554 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt0
))
2556 if (vect_print_dump_info (REPORT_DETAILS
))
2557 fprintf (vect_dump
, "use not simple.");
2561 /* Supportable by target? */
2562 code
= VEC_PACK_MOD_EXPR
;
2563 optab
= optab_for_tree_code (VEC_PACK_MOD_EXPR
, vectype_in
);
2567 vec_mode
= TYPE_MODE (vectype_in
);
2568 if (optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
2571 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
2573 if (!vec_stmt
) /* transformation not required. */
2575 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2581 if (vect_print_dump_info (REPORT_DETAILS
))
2582 fprintf (vect_dump
, "transform type demotion operation. ncopies = %d.",
2586 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2588 /* In case the vectorization factor (VF) is bigger than the number
2589 of elements that we can fit in a vectype (nunits), we have to generate
2590 more than one vector stmt - i.e - we need to "unroll" the
2591 vector stmt by a factor VF/nunits. */
2592 prev_stmt_info
= NULL
;
2593 for (j
= 0; j
< ncopies
; j
++)
2598 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2599 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt0
, vec_oprnd0
);
2603 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt0
, vec_oprnd1
);
2604 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt0
, vec_oprnd0
);
2607 /* Arguments are ready. Create the new vector stmt. */
2608 expr
= build2 (code
, vectype_out
, vec_oprnd0
, vec_oprnd1
);
2609 new_stmt
= build_gimple_modify_stmt (vec_dest
, expr
);
2610 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2611 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
2612 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
2615 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2617 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2619 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2622 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2627 /* Function vect_gen_widened_results_half
2629 Create a vector stmt whose code, type, number of arguments, and result
2630 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
2631 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2632 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2633 needs to be created (DECL is a function-decl of a target-builtin).
2634 STMT is the original scalar stmt that we are vectorizing. */
2637 vect_gen_widened_results_half (enum tree_code code
, tree vectype
, tree decl
,
2638 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
2639 tree vec_dest
, block_stmt_iterator
*bsi
,
2648 /* Generate half of the widened result: */
2649 if (code
== CALL_EXPR
)
2651 /* Target specific support */
2652 if (op_type
== binary_op
)
2653 expr
= build_call_expr (decl
, 2, vec_oprnd0
, vec_oprnd1
);
2655 expr
= build_call_expr (decl
, 1, vec_oprnd0
);
2659 /* Generic support */
2660 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
2661 if (op_type
== binary_op
)
2662 expr
= build2 (code
, vectype
, vec_oprnd0
, vec_oprnd1
);
2664 expr
= build1 (code
, vectype
, vec_oprnd0
);
2666 new_stmt
= build_gimple_modify_stmt (vec_dest
, expr
);
2667 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2668 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
2669 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
2671 if (code
== CALL_EXPR
)
2673 FOR_EACH_SSA_TREE_OPERAND (sym
, new_stmt
, iter
, SSA_OP_ALL_VIRTUALS
)
2675 if (TREE_CODE (sym
) == SSA_NAME
)
2676 sym
= SSA_NAME_VAR (sym
);
2677 mark_sym_for_renaming (sym
);
2685 /* Function vectorizable_type_promotion
2687 Check if STMT performs a binary or unary operation that involves
2688 type promotion, and if it can be vectorized.
2689 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2690 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2691 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2694 vectorizable_type_promotion (tree stmt
, block_stmt_iterator
*bsi
,
2700 tree op0
, op1
= NULL
;
2701 tree vec_oprnd0
=NULL
, vec_oprnd1
=NULL
;
2702 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2703 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2704 enum tree_code code
, code1
= CODE_FOR_nothing
, code2
= CODE_FOR_nothing
;
2705 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2708 enum vect_def_type dt0
, dt1
;
2710 stmt_vec_info prev_stmt_info
;
2718 /* Is STMT a vectorizable type-promotion operation? */
2720 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2723 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
2725 if (STMT_VINFO_LIVE_P (stmt_info
))
2727 /* FORNOW: not yet supported. */
2728 if (vect_print_dump_info (REPORT_DETAILS
))
2729 fprintf (vect_dump
, "value used after loop.");
2733 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
2736 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) != SSA_NAME
)
2739 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
2740 code
= TREE_CODE (operation
);
2741 if (code
!= NOP_EXPR
&& code
!= WIDEN_MULT_EXPR
)
2744 op0
= TREE_OPERAND (operation
, 0);
2745 vectype_in
= get_vectype_for_scalar_type (TREE_TYPE (op0
));
2746 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2747 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2748 gcc_assert (ncopies
>= 1);
2750 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
2751 vectype_out
= get_vectype_for_scalar_type (TREE_TYPE (scalar_dest
));
2752 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2753 if (nunits_out
!= nunits_in
/ 2) /* FORNOW */
2756 if (! INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
2757 || !INTEGRAL_TYPE_P (TREE_TYPE (op0
)))
2760 /* Check the operands of the operation. */
2761 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt0
))
2763 if (vect_print_dump_info (REPORT_DETAILS
))
2764 fprintf (vect_dump
, "use not simple.");
2768 op_type
= TREE_CODE_LENGTH (code
);
2769 if (op_type
== binary_op
)
2771 op1
= TREE_OPERAND (operation
, 1);
2772 if (!vect_is_simple_use (op1
, loop_vinfo
, &def_stmt
, &def
, &dt1
))
2774 if (vect_print_dump_info (REPORT_DETAILS
))
2775 fprintf (vect_dump
, "use not simple.");
2780 /* Supportable by target? */
2781 if (!supportable_widening_operation (code
, stmt
, vectype_in
,
2782 &decl1
, &decl2
, &code1
, &code2
))
2785 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
2787 if (!vec_stmt
) /* transformation not required. */
2789 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2795 if (vect_print_dump_info (REPORT_DETAILS
))
2796 fprintf (vect_dump
, "transform type promotion operation. ncopies = %d.",
2800 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2802 /* In case the vectorization factor (VF) is bigger than the number
2803 of elements that we can fit in a vectype (nunits), we have to generate
2804 more than one vector stmt - i.e - we need to "unroll" the
2805 vector stmt by a factor VF/nunits. */
2807 prev_stmt_info
= NULL
;
2808 for (j
= 0; j
< ncopies
; j
++)
2813 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2814 if (op_type
== binary_op
)
2815 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
2819 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt0
, vec_oprnd0
);
2820 if (op_type
== binary_op
)
2821 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt1
, vec_oprnd1
);
2824 /* Arguments are ready. Create the new vector stmt. We are creating
2825 two vector defs because the widened result does not fit in one vector.
2826 The vectorized stmt can be expressed as a call to a taregt builtin,
2827 or a using a tree-code. */
2828 /* Generate first half of the widened result: */
2829 new_stmt
= vect_gen_widened_results_half (code1
, vectype_out
, decl1
,
2830 vec_oprnd0
, vec_oprnd1
, op_type
, vec_dest
, bsi
, stmt
);
2832 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2834 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2835 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2837 /* Generate second half of the widened result: */
2838 new_stmt
= vect_gen_widened_results_half (code2
, vectype_out
, decl2
,
2839 vec_oprnd0
, vec_oprnd1
, op_type
, vec_dest
, bsi
, stmt
);
2840 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2841 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2845 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2850 /* Function vect_strided_store_supported.
2852 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2853 and FALSE otherwise. */
2856 vect_strided_store_supported (tree vectype
)
2858 optab interleave_high_optab
, interleave_low_optab
;
2861 mode
= (int) TYPE_MODE (vectype
);
2863 /* Check that the operation is supported. */
2864 interleave_high_optab
= optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR
,
2866 interleave_low_optab
= optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR
,
2868 if (!interleave_high_optab
|| !interleave_low_optab
)
2870 if (vect_print_dump_info (REPORT_DETAILS
))
2871 fprintf (vect_dump
, "no optab for interleave.");
2875 if (interleave_high_optab
->handlers
[(int) mode
].insn_code
2877 || interleave_low_optab
->handlers
[(int) mode
].insn_code
2878 == CODE_FOR_nothing
)
2880 if (vect_print_dump_info (REPORT_DETAILS
))
2881 fprintf (vect_dump
, "interleave op not supported by target.");
2888 /* Function vect_permute_store_chain.
2890 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2891 a power of 2, generate interleave_high/low stmts to reorder the data
2892 correctly for the stores. Return the final references for stores in
2895 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2896 The input is 4 vectors each containing 8 elements. We assign a number to each
2897 element, the input sequence is:
2899 1st vec: 0 1 2 3 4 5 6 7
2900 2nd vec: 8 9 10 11 12 13 14 15
2901 3rd vec: 16 17 18 19 20 21 22 23
2902 4th vec: 24 25 26 27 28 29 30 31
2904 The output sequence should be:
2906 1st vec: 0 8 16 24 1 9 17 25
2907 2nd vec: 2 10 18 26 3 11 19 27
2908 3rd vec: 4 12 20 28 5 13 21 30
2909 4th vec: 6 14 22 30 7 15 23 31
2911 i.e., we interleave the contents of the four vectors in their order.
2913 We use interleave_high/low instructions to create such output. The input of
2914 each interleave_high/low operation is two vectors:
2917 the even elements of the result vector are obtained left-to-right from the
2918 high/low elements of the first vector. The odd elements of the result are
2919 obtained left-to-right from the high/low elements of the second vector.
2920 The output of interleave_high will be: 0 4 1 5
2921 and of interleave_low: 2 6 3 7
2924 The permutation is done in log LENGTH stages. In each stage interleave_high
2925 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2926 where the first argument is taken from the first half of DR_CHAIN and the
2927 second argument from it's second half.
2930 I1: interleave_high (1st vec, 3rd vec)
2931 I2: interleave_low (1st vec, 3rd vec)
2932 I3: interleave_high (2nd vec, 4th vec)
2933 I4: interleave_low (2nd vec, 4th vec)
2935 The output for the first stage is:
2937 I1: 0 16 1 17 2 18 3 19
2938 I2: 4 20 5 21 6 22 7 23
2939 I3: 8 24 9 25 10 26 11 27
2940 I4: 12 28 13 29 14 30 15 31
2942 The output of the second stage, i.e. the final result is:
2944 I1: 0 8 16 24 1 9 17 25
2945 I2: 2 10 18 26 3 11 19 27
2946 I3: 4 12 20 28 5 13 21 30
2947 I4: 6 14 22 30 7 15 23 31. */
2950 vect_permute_store_chain (VEC(tree
,heap
) *dr_chain
,
2951 unsigned int length
,
2953 block_stmt_iterator
*bsi
,
2954 VEC(tree
,heap
) **result_chain
)
2956 tree perm_dest
, perm_stmt
, vect1
, vect2
, high
, low
;
2957 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
2958 tree scalar_dest
, tmp
;
2961 VEC(tree
,heap
) *first
, *second
;
2963 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
2964 first
= VEC_alloc (tree
, heap
, length
/2);
2965 second
= VEC_alloc (tree
, heap
, length
/2);
2967 /* Check that the operation is supported. */
2968 if (!vect_strided_store_supported (vectype
))
2971 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
2973 for (i
= 0; i
< exact_log2 (length
); i
++)
2975 for (j
= 0; j
< length
/2; j
++)
2977 vect1
= VEC_index (tree
, dr_chain
, j
);
2978 vect2
= VEC_index (tree
, dr_chain
, j
+length
/2);
2980 /* Create interleaving stmt:
2981 in the case of big endian:
2982 high = interleave_high (vect1, vect2)
2983 and in the case of little endian:
2984 high = interleave_low (vect1, vect2). */
2985 perm_dest
= create_tmp_var (vectype
, "vect_inter_high");
2986 DECL_GIMPLE_REG_P (perm_dest
) = 1;
2987 add_referenced_var (perm_dest
);
2988 if (BYTES_BIG_ENDIAN
)
2989 tmp
= build2 (VEC_INTERLEAVE_HIGH_EXPR
, vectype
, vect1
, vect2
);
2991 tmp
= build2 (VEC_INTERLEAVE_LOW_EXPR
, vectype
, vect1
, vect2
);
2992 perm_stmt
= build_gimple_modify_stmt (perm_dest
, tmp
);
2993 high
= make_ssa_name (perm_dest
, perm_stmt
);
2994 GIMPLE_STMT_OPERAND (perm_stmt
, 0) = high
;
2995 vect_finish_stmt_generation (stmt
, perm_stmt
, bsi
);
2996 VEC_replace (tree
, *result_chain
, 2*j
, high
);
2998 /* Create interleaving stmt:
2999 in the case of big endian:
3000 low = interleave_low (vect1, vect2)
3001 and in the case of little endian:
3002 low = interleave_high (vect1, vect2). */
3003 perm_dest
= create_tmp_var (vectype
, "vect_inter_low");
3004 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3005 add_referenced_var (perm_dest
);
3006 if (BYTES_BIG_ENDIAN
)
3007 tmp
= build2 (VEC_INTERLEAVE_LOW_EXPR
, vectype
, vect1
, vect2
);
3009 tmp
= build2 (VEC_INTERLEAVE_HIGH_EXPR
, vectype
, vect1
, vect2
);
3010 perm_stmt
= build_gimple_modify_stmt (perm_dest
, tmp
);
3011 low
= make_ssa_name (perm_dest
, perm_stmt
);
3012 GIMPLE_STMT_OPERAND (perm_stmt
, 0) = low
;
3013 vect_finish_stmt_generation (stmt
, perm_stmt
, bsi
);
3014 VEC_replace (tree
, *result_chain
, 2*j
+1, low
);
3016 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
3022 /* Function vectorizable_store.
3024 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3026 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3027 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3028 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3031 vectorizable_store (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
3036 tree vec_oprnd
= NULL_TREE
;
3037 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3038 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
3039 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3040 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3041 enum machine_mode vec_mode
;
3043 enum dr_alignment_support alignment_support_cheme
;
3045 def_operand_p def_p
;
3047 enum vect_def_type dt
;
3048 stmt_vec_info prev_stmt_info
= NULL
;
3049 tree dataref_ptr
= NULL_TREE
;
3050 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3051 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3053 tree next_stmt
, first_stmt
;
3054 bool strided_store
= false;
3055 unsigned int group_size
, i
;
3056 VEC(tree
,heap
) *dr_chain
= NULL
, *oprnds
= NULL
, *result_chain
= NULL
;
3057 gcc_assert (ncopies
>= 1);
3059 /* Is vectorizable store? */
3061 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
3064 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
3065 if (TREE_CODE (scalar_dest
) != ARRAY_REF
3066 && TREE_CODE (scalar_dest
) != INDIRECT_REF
3067 && !DR_GROUP_FIRST_DR (stmt_info
))
3070 op
= GIMPLE_STMT_OPERAND (stmt
, 1);
3071 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
3073 if (vect_print_dump_info (REPORT_DETAILS
))
3074 fprintf (vect_dump
, "use not simple.");
3078 vec_mode
= TYPE_MODE (vectype
);
3079 /* FORNOW. In some cases can vectorize even if data-type not supported
3080 (e.g. - array initialization with 0). */
3081 if (mov_optab
->handlers
[(int)vec_mode
].insn_code
== CODE_FOR_nothing
)
3084 if (!STMT_VINFO_DATA_REF (stmt_info
))
3087 if (DR_GROUP_FIRST_DR (stmt_info
))
3089 strided_store
= true;
3090 if (!vect_strided_store_supported (vectype
))
3094 if (!vec_stmt
) /* transformation not required. */
3096 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
3102 if (vect_print_dump_info (REPORT_DETAILS
))
3103 fprintf (vect_dump
, "transform store. ncopies = %d",ncopies
);
3107 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
3108 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3109 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3111 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
3113 /* We vectorize all the stmts of the interleaving group when we
3114 reach the last stmt in the group. */
3115 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
3116 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
)))
3118 *vec_stmt
= NULL_TREE
;
3129 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
3130 oprnds
= VEC_alloc (tree
, heap
, group_size
);
3132 alignment_support_cheme
= vect_supportable_dr_alignment (first_dr
);
3133 gcc_assert (alignment_support_cheme
);
3134 gcc_assert (alignment_support_cheme
== dr_aligned
); /* FORNOW */
3136 /* In case the vectorization factor (VF) is bigger than the number
3137 of elements that we can fit in a vectype (nunits), we have to generate
3138 more than one vector stmt - i.e - we need to "unroll" the
3139 vector stmt by a factor VF/nunits. For more details see documentation in
3140 vect_get_vec_def_for_copy_stmt. */
3142 /* In case of interleaving (non-unit strided access):
3149 We create vectorized stores starting from base address (the access of the
3150 first stmt in the chain (S2 in the above example), when the last store stmt
3151 of the chain (S4) is reached:
3154 VS2: &base + vec_size*1 = vx0
3155 VS3: &base + vec_size*2 = vx1
3156 VS4: &base + vec_size*3 = vx3
3158 Then permutation statements are generated:
3160 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3161 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3164 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3165 (the order of the data-refs in the output of vect_permute_store_chain
3166 corresponds to the order of scalar stmts in the interleaving chain - see
3167 the documentation of vect_permute_store_chain()).
3169 In case of both multiple types and interleaving, above vector stores and
3170 permutation stmts are created for every copy. The result vector stmts are
3171 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3172 STMT_VINFO_RELATED_STMT for the next copies.
3175 prev_stmt_info
= NULL
;
3176 for (j
= 0; j
< ncopies
; j
++)
3183 /* For interleaved stores we collect vectorized defs for all the
3184 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then used
3185 as an input to vect_permute_store_chain(), and OPRNDS as an input
3186 to vect_get_vec_def_for_stmt_copy() for the next copy.
3187 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3188 OPRNDS are of size 1. */
3189 next_stmt
= first_stmt
;
3190 for (i
= 0; i
< group_size
; i
++)
3192 /* Since gaps are not supported for interleaved stores, GROUP_SIZE
3193 is the exact number of stmts in the chain. Therefore, NEXT_STMT
3194 can't be NULL_TREE. In case that there is no interleaving,
3195 GROUP_SIZE is 1, and only one iteration of the loop will be
3197 gcc_assert (next_stmt
);
3198 op
= GIMPLE_STMT_OPERAND (next_stmt
, 1);
3199 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
, NULL
);
3200 VEC_quick_push(tree
, dr_chain
, vec_oprnd
);
3201 VEC_quick_push(tree
, oprnds
, vec_oprnd
);
3202 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
3204 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, bsi
, NULL_TREE
,
3205 &dummy
, &ptr_incr
, false,
3206 TREE_TYPE (vec_oprnd
));
3210 /* For interleaved stores we created vectorized defs for all the
3211 defs stored in OPRNDS in the previous iteration (previous copy).
3212 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3213 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3215 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3216 OPRNDS are of size 1. */
3217 for (i
= 0; i
< group_size
; i
++)
3219 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
,
3220 VEC_index (tree
, oprnds
, i
));
3221 VEC_replace(tree
, dr_chain
, i
, vec_oprnd
);
3222 VEC_replace(tree
, oprnds
, i
, vec_oprnd
);
3224 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, bsi
, stmt
);
3229 result_chain
= VEC_alloc (tree
, heap
, group_size
);
3231 if (!vect_permute_store_chain (dr_chain
, group_size
, stmt
, bsi
,
3236 next_stmt
= first_stmt
;
3237 for (i
= 0; i
< group_size
; i
++)
3239 /* For strided stores vectorized defs are interleaved in
3240 vect_permute_store_chain(). */
3242 vec_oprnd
= VEC_index(tree
, result_chain
, i
);
3244 data_ref
= build_fold_indirect_ref (dataref_ptr
);
3245 /* Arguments are ready. Create the new vector stmt. */
3246 new_stmt
= build_gimple_modify_stmt (data_ref
, vec_oprnd
);
3247 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
3249 /* Set the VDEFs for the vector pointer. If this virtual def
3250 has a use outside the loop and a loop peel is performed
3251 then the def may be renamed by the peel. Mark it for
3252 renaming so the later use will also be renamed. */
3253 copy_virtual_operands (new_stmt
, next_stmt
);
3256 /* The original store is deleted so the same SSA_NAMEs
3258 FOR_EACH_SSA_TREE_OPERAND (def
, next_stmt
, iter
, SSA_OP_VDEF
)
3260 SSA_NAME_DEF_STMT (def
) = new_stmt
;
3261 mark_sym_for_renaming (SSA_NAME_VAR (def
));
3264 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3268 /* Create new names for all the definitions created by COPY and
3269 add replacement mappings for each new name. */
3270 FOR_EACH_SSA_DEF_OPERAND (def_p
, new_stmt
, iter
, SSA_OP_VDEF
)
3272 create_new_def_for (DEF_FROM_PTR (def_p
), new_stmt
, def_p
);
3273 mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p
)));
3276 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3279 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3280 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
3283 /* Bump the vector pointer. */
3284 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, bsi
, stmt
);
3292 /* Function vect_setup_realignment
3294 This function is called when vectorizing an unaligned load using
3295 the dr_unaligned_software_pipeline scheme.
3296 This function generates the following code at the loop prolog:
3299 msq_init = *(floor(p)); # prolog load
3300 realignment_token = call target_builtin;
3302 msq = phi (msq_init, ---)
3304 The code above sets up a new (vector) pointer, pointing to the first
3305 location accessed by STMT, and a "floor-aligned" load using that pointer.
3306 It also generates code to compute the "realignment-token" (if the relevant
3307 target hook was defined), and creates a phi-node at the loop-header bb
3308 whose arguments are the result of the prolog-load (created by this
3309 function) and the result of a load that takes place in the loop (to be
3310 created by the caller to this function).
3311 The caller to this function uses the phi-result (msq) to create the
3312 realignment code inside the loop, and sets up the missing phi argument,
3316 msq = phi (msq_init, lsq)
3317 lsq = *(floor(p')); # load in loop
3318 result = realign_load (msq, lsq, realignment_token);
3321 STMT - (scalar) load stmt to be vectorized. This load accesses
3322 a memory location that may be unaligned.
3323 BSI - place where new code is to be inserted.
3326 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3327 target hook, if defined.
3328 Return value - the result of the loop-header phi node. */
3331 vect_setup_realignment (tree stmt
, block_stmt_iterator
*bsi
,
3332 tree
*realignment_token
)
3334 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3335 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3336 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3337 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3338 edge pe
= loop_preheader_edge (loop
);
3339 tree scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
3352 /* 1. Create msq_init = *(floor(p1)) in the loop preheader */
3353 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3354 ptr
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
, &init_addr
, &inc
, true,
3356 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, ptr
);
3357 new_stmt
= build_gimple_modify_stmt (vec_dest
, data_ref
);
3358 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3359 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
3360 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
3361 gcc_assert (!new_bb
);
3362 msq_init
= GIMPLE_STMT_OPERAND (new_stmt
, 0);
3363 copy_virtual_operands (new_stmt
, stmt
);
3364 update_vuses_to_preheader (new_stmt
, loop
);
3366 /* 2. Create permutation mask, if required, in loop preheader. */
3367 if (targetm
.vectorize
.builtin_mask_for_load
)
3371 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
3372 new_stmt
= build_call_expr (builtin_decl
, 1, init_addr
);
3373 vec_dest
= vect_create_destination_var (scalar_dest
,
3374 TREE_TYPE (new_stmt
));
3375 new_stmt
= build_gimple_modify_stmt (vec_dest
, new_stmt
);
3376 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3377 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
3378 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
3379 gcc_assert (!new_bb
);
3380 *realignment_token
= GIMPLE_STMT_OPERAND (new_stmt
, 0);
3382 /* The result of the CALL_EXPR to this builtin is determined from
3383 the value of the parameter and no global variables are touched
3384 which makes the builtin a "const" function. Requiring the
3385 builtin to have the "const" attribute makes it unnecessary
3386 to call mark_call_clobbered. */
3387 gcc_assert (TREE_READONLY (builtin_decl
));
3390 /* 3. Create msq = phi <msq_init, lsq> in loop */
3391 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3392 msq
= make_ssa_name (vec_dest
, NULL_TREE
);
3393 phi_stmt
= create_phi_node (msq
, loop
->header
);
3394 SSA_NAME_DEF_STMT (msq
) = phi_stmt
;
3395 add_phi_arg (phi_stmt
, msq_init
, loop_preheader_edge (loop
));
3401 /* Function vect_strided_load_supported.
3403 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3404 and FALSE otherwise. */
3407 vect_strided_load_supported (tree vectype
)
3409 optab perm_even_optab
, perm_odd_optab
;
3412 mode
= (int) TYPE_MODE (vectype
);
3414 perm_even_optab
= optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR
, vectype
);
3415 if (!perm_even_optab
)
3417 if (vect_print_dump_info (REPORT_DETAILS
))
3418 fprintf (vect_dump
, "no optab for perm_even.");
3422 if (perm_even_optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
3424 if (vect_print_dump_info (REPORT_DETAILS
))
3425 fprintf (vect_dump
, "perm_even op not supported by target.");
3429 perm_odd_optab
= optab_for_tree_code (VEC_EXTRACT_ODD_EXPR
, vectype
);
3430 if (!perm_odd_optab
)
3432 if (vect_print_dump_info (REPORT_DETAILS
))
3433 fprintf (vect_dump
, "no optab for perm_odd.");
3437 if (perm_odd_optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
3439 if (vect_print_dump_info (REPORT_DETAILS
))
3440 fprintf (vect_dump
, "perm_odd op not supported by target.");
3447 /* Function vect_permute_load_chain.
3449 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3450 a power of 2, generate extract_even/odd stmts to reorder the input data
3451 correctly. Return the final references for loads in RESULT_CHAIN.
3453 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3454 The input is 4 vectors each containing 8 elements. We assign a number to each
3455 element, the input sequence is:
3457 1st vec: 0 1 2 3 4 5 6 7
3458 2nd vec: 8 9 10 11 12 13 14 15
3459 3rd vec: 16 17 18 19 20 21 22 23
3460 4th vec: 24 25 26 27 28 29 30 31
3462 The output sequence should be:
3464 1st vec: 0 4 8 12 16 20 24 28
3465 2nd vec: 1 5 9 13 17 21 25 29
3466 3rd vec: 2 6 10 14 18 22 26 30
3467 4th vec: 3 7 11 15 19 23 27 31
3469 i.e., the first output vector should contain the first elements of each
3470 interleaving group, etc.
3472 We use extract_even/odd instructions to create such output. The input of each
3473 extract_even/odd operation is two vectors
3477 and the output is the vector of extracted even/odd elements. The output of
3478 extract_even will be: 0 2 4 6
3479 and of extract_odd: 1 3 5 7
3482 The permutation is done in log LENGTH stages. In each stage extract_even and
3483 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3484 order. In our example,
3486 E1: extract_even (1st vec, 2nd vec)
3487 E2: extract_odd (1st vec, 2nd vec)
3488 E3: extract_even (3rd vec, 4th vec)
3489 E4: extract_odd (3rd vec, 4th vec)
3491 The output for the first stage will be:
3493 E1: 0 2 4 6 8 10 12 14
3494 E2: 1 3 5 7 9 11 13 15
3495 E3: 16 18 20 22 24 26 28 30
3496 E4: 17 19 21 23 25 27 29 31
3498 In order to proceed and create the correct sequence for the next stage (or
3499 for the correct output, if the second stage is the last one, as in our
3500 example), we first put the output of extract_even operation and then the
3501 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3502 The input for the second stage is:
3504 1st vec (E1): 0 2 4 6 8 10 12 14
3505 2nd vec (E3): 16 18 20 22 24 26 28 30
3506 3rd vec (E2): 1 3 5 7 9 11 13 15
3507 4th vec (E4): 17 19 21 23 25 27 29 31
3509 The output of the second stage:
3511 E1: 0 4 8 12 16 20 24 28
3512 E2: 2 6 10 14 18 22 26 30
3513 E3: 1 5 9 13 17 21 25 29
3514 E4: 3 7 11 15 19 23 27 31
3516 And RESULT_CHAIN after reordering:
3518 1st vec (E1): 0 4 8 12 16 20 24 28
3519 2nd vec (E3): 1 5 9 13 17 21 25 29
3520 3rd vec (E2): 2 6 10 14 18 22 26 30
3521 4th vec (E4): 3 7 11 15 19 23 27 31. */
3524 vect_permute_load_chain (VEC(tree
,heap
) *dr_chain
,
3525 unsigned int length
,
3527 block_stmt_iterator
*bsi
,
3528 VEC(tree
,heap
) **result_chain
)
3530 tree perm_dest
, perm_stmt
, data_ref
, first_vect
, second_vect
;
3531 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
3536 /* Check that the operation is supported. */
3537 if (!vect_strided_load_supported (vectype
))
3540 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
3541 for (i
= 0; i
< exact_log2 (length
); i
++)
3543 for (j
= 0; j
< length
; j
+=2)
3545 first_vect
= VEC_index (tree
, dr_chain
, j
);
3546 second_vect
= VEC_index (tree
, dr_chain
, j
+1);
3548 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3549 perm_dest
= create_tmp_var (vectype
, "vect_perm_even");
3550 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3551 add_referenced_var (perm_dest
);
3553 tmp
= build2 (VEC_EXTRACT_EVEN_EXPR
, vectype
,
3554 first_vect
, second_vect
);
3555 perm_stmt
= build_gimple_modify_stmt (perm_dest
, tmp
);
3557 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
3558 GIMPLE_STMT_OPERAND (perm_stmt
, 0) = data_ref
;
3559 vect_finish_stmt_generation (stmt
, perm_stmt
, bsi
);
3560 mark_symbols_for_renaming (perm_stmt
);
3562 VEC_replace (tree
, *result_chain
, j
/2, data_ref
);
3564 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3565 perm_dest
= create_tmp_var (vectype
, "vect_perm_odd");
3566 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3567 add_referenced_var (perm_dest
);
3569 tmp
= build2 (VEC_EXTRACT_ODD_EXPR
, vectype
,
3570 first_vect
, second_vect
);
3571 perm_stmt
= build_gimple_modify_stmt (perm_dest
, tmp
);
3572 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
3573 GIMPLE_STMT_OPERAND (perm_stmt
, 0) = data_ref
;
3574 vect_finish_stmt_generation (stmt
, perm_stmt
, bsi
);
3575 mark_symbols_for_renaming (perm_stmt
);
3577 VEC_replace (tree
, *result_chain
, j
/2+length
/2, data_ref
);
3579 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
3585 /* Function vect_transform_strided_load.
3587 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3588 to perform their permutation and ascribe the result vectorized statements to
3589 the scalar statements.
3593 vect_transform_strided_load (tree stmt
, VEC(tree
,heap
) *dr_chain
, int size
,
3594 block_stmt_iterator
*bsi
)
3596 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3597 tree first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
3598 tree next_stmt
, new_stmt
;
3599 VEC(tree
,heap
) *result_chain
= NULL
;
3600 unsigned int i
, gap_count
;
3603 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3604 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3605 vectors, that are ready for vector computation. */
3606 result_chain
= VEC_alloc (tree
, heap
, size
);
3608 if (!vect_permute_load_chain (dr_chain
, size
, stmt
, bsi
, &result_chain
))
3611 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3612 Since we scan the chain starting from it's first node, their order
3613 corresponds the order of data-refs in RESULT_CHAIN. */
3614 next_stmt
= first_stmt
;
3616 for (i
= 0; VEC_iterate(tree
, result_chain
, i
, tmp_data_ref
); i
++)
3621 /* Skip the gaps. Loads created for the gaps will be removed by dead
3622 code elimination pass later.
3623 DR_GROUP_GAP is the number of steps in elements from the previous
3624 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3625 correspond to the gaps.
3627 if (gap_count
< DR_GROUP_GAP (vinfo_for_stmt (next_stmt
)))
3635 new_stmt
= SSA_NAME_DEF_STMT (tmp_data_ref
);
3636 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3637 copies, and we put the new vector statement in the first available
3639 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)))
3640 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)) = new_stmt
;
3643 tree prev_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
));
3644 tree rel_stmt
= STMT_VINFO_RELATED_STMT (
3645 vinfo_for_stmt (prev_stmt
));
3648 prev_stmt
= rel_stmt
;
3649 rel_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt
));
3651 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
)) = new_stmt
;
3653 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
3655 /* If NEXT_STMT accesses the same DR as the previous statement,
3656 put the same TMP_DATA_REF as its vectorized statement; otherwise
3657 get the next data-ref from RESULT_CHAIN. */
3658 if (!next_stmt
|| !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
3666 /* vectorizable_load.
3668 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3670 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3671 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3672 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3675 vectorizable_load (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
3678 tree vec_dest
= NULL
;
3679 tree data_ref
= NULL
;
3681 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3682 stmt_vec_info prev_stmt_info
;
3683 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3684 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3685 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
3686 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3689 tree new_stmt
= NULL_TREE
;
3691 enum dr_alignment_support alignment_support_cheme
;
3692 tree dataref_ptr
= NULL_TREE
;
3694 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3695 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3696 int i
, j
, group_size
;
3697 tree msq
= NULL_TREE
, lsq
;
3698 tree offset
= NULL_TREE
;
3699 tree realignment_token
= NULL_TREE
;
3700 tree phi_stmt
= NULL_TREE
;
3701 VEC(tree
,heap
) *dr_chain
= NULL
;
3702 bool strided_load
= false;
3705 /* Is vectorizable load? */
3706 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3709 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
3711 if (STMT_VINFO_LIVE_P (stmt_info
))
3713 /* FORNOW: not yet supported. */
3714 if (vect_print_dump_info (REPORT_DETAILS
))
3715 fprintf (vect_dump
, "value used after loop.");
3719 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
3722 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
3723 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
3726 op
= GIMPLE_STMT_OPERAND (stmt
, 1);
3727 if (TREE_CODE (op
) != ARRAY_REF
3728 && TREE_CODE (op
) != INDIRECT_REF
3729 && !DR_GROUP_FIRST_DR (stmt_info
))
3732 if (!STMT_VINFO_DATA_REF (stmt_info
))
3735 mode
= (int) TYPE_MODE (vectype
);
3737 /* FORNOW. In some cases can vectorize even if data-type not supported
3738 (e.g. - data copies). */
3739 if (mov_optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
3741 if (vect_print_dump_info (REPORT_DETAILS
))
3742 fprintf (vect_dump
, "Aligned load, but unsupported type.");
3746 /* Check if the load is a part of an interleaving chain. */
3747 if (DR_GROUP_FIRST_DR (stmt_info
))
3749 strided_load
= true;
3751 /* Check if interleaving is supported. */
3752 if (!vect_strided_load_supported (vectype
))
3756 if (!vec_stmt
) /* transformation not required. */
3758 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
3764 if (vect_print_dump_info (REPORT_DETAILS
))
3765 fprintf (vect_dump
, "transform load.");
3769 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
3770 /* Check if the chain of loads is already vectorized. */
3771 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
)))
3773 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3776 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3777 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3778 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
3787 alignment_support_cheme
= vect_supportable_dr_alignment (first_dr
);
3788 gcc_assert (alignment_support_cheme
);
3791 /* In case the vectorization factor (VF) is bigger than the number
3792 of elements that we can fit in a vectype (nunits), we have to generate
3793 more than one vector stmt - i.e - we need to "unroll" the
3794 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3795 from one copy of the vector stmt to the next, in the field
3796 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3797 stages to find the correct vector defs to be used when vectorizing
3798 stmts that use the defs of the current stmt. The example below illustrates
3799 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3800 4 vectorized stmts):
3802 before vectorization:
3803 RELATED_STMT VEC_STMT
3807 step 1: vectorize stmt S1:
3808 We first create the vector stmt VS1_0, and, as usual, record a
3809 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3810 Next, we create the vector stmt VS1_1, and record a pointer to
3811 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3812 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3814 RELATED_STMT VEC_STMT
3815 VS1_0: vx0 = memref0 VS1_1 -
3816 VS1_1: vx1 = memref1 VS1_2 -
3817 VS1_2: vx2 = memref2 VS1_3 -
3818 VS1_3: vx3 = memref3 - -
3819 S1: x = load - VS1_0
3822 See in documentation in vect_get_vec_def_for_stmt_copy for how the
3823 information we recorded in RELATED_STMT field is used to vectorize
3826 /* In case of interleaving (non-unit strided access):
3833 Vectorized loads are created in the order of memory accesses
3834 starting from the access of the first stmt of the chain:
3837 VS2: vx1 = &base + vec_size*1
3838 VS3: vx3 = &base + vec_size*2
3839 VS4: vx4 = &base + vec_size*3
3841 Then permutation statements are generated:
3843 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3844 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3847 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3848 (the order of the data-refs in the output of vect_permute_load_chain
3849 corresponds to the order of scalar stmts in the interleaving chain - see
3850 the documentation of vect_permute_load_chain()).
3851 The generation of permutation stmts and recording them in
3852 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3854 In case of both multiple types and interleaving, the vector loads and
3855 permutation stmts above are created for every copy. The result vector stmts
3856 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3857 STMT_VINFO_RELATED_STMT for the next copies. */
3859 /* If the data reference is aligned (dr_aligned) or potentially unaligned
3860 on a target that supports unaligned accesses (dr_unaligned_supported)
3861 we generate the following code:
3865 p = p + indx * vectype_size;
3870 Otherwise, the data reference is potentially unaligned on a target that
3871 does not support unaligned accesses (dr_unaligned_software_pipeline) -
3872 then generate the following code, in which the data in each iteration is
3873 obtained by two vector loads, one from the previous iteration, and one
3874 from the current iteration:
3876 msq_init = *(floor(p1))
3877 p2 = initial_addr + VS - 1;
3878 realignment_token = call target_builtin;
3881 p2 = p2 + indx * vectype_size
3883 vec_dest = realign_load (msq, lsq, realignment_token)
3888 if (alignment_support_cheme
== dr_unaligned_software_pipeline
)
3890 msq
= vect_setup_realignment (first_stmt
, bsi
, &realignment_token
);
3891 phi_stmt
= SSA_NAME_DEF_STMT (msq
);
3892 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
3895 prev_stmt_info
= NULL
;
3896 for (j
= 0; j
< ncopies
; j
++)
3898 /* 1. Create the vector pointer update chain. */
3900 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, bsi
, offset
, &dummy
,
3901 &ptr_incr
, false, NULL_TREE
);
3903 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, bsi
, stmt
);
3905 for (i
= 0; i
< group_size
; i
++)
3907 /* 2. Create the vector-load in the loop. */
3908 switch (alignment_support_cheme
)
3911 gcc_assert (aligned_access_p (first_dr
));
3912 data_ref
= build_fold_indirect_ref (dataref_ptr
);
3914 case dr_unaligned_supported
:
3916 int mis
= DR_MISALIGNMENT (first_dr
);
3917 tree tmis
= (mis
== -1 ? size_zero_node
: size_int (mis
));
3919 gcc_assert (!aligned_access_p (first_dr
));
3920 tmis
= size_binop (MULT_EXPR
, tmis
, size_int(BITS_PER_UNIT
));
3922 build2 (MISALIGNED_INDIRECT_REF
, vectype
, dataref_ptr
, tmis
);
3925 case dr_unaligned_software_pipeline
:
3926 gcc_assert (!aligned_access_p (first_dr
));
3927 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, dataref_ptr
);
3932 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3933 new_stmt
= build_gimple_modify_stmt (vec_dest
, data_ref
);
3934 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3935 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
3936 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
3937 copy_virtual_operands (new_stmt
, stmt
);
3938 mark_symbols_for_renaming (new_stmt
);
3940 /* 3. Handle explicit realignment if necessary/supported. */
3941 if (alignment_support_cheme
== dr_unaligned_software_pipeline
)
3944 <vec_dest = realign_load (msq, lsq, realignment_token)> */
3945 lsq
= GIMPLE_STMT_OPERAND (new_stmt
, 0);
3946 if (!realignment_token
)
3947 realignment_token
= dataref_ptr
;
3948 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3950 build3 (REALIGN_LOAD_EXPR
, vectype
, msq
, lsq
, realignment_token
);
3951 new_stmt
= build_gimple_modify_stmt (vec_dest
, new_stmt
);
3952 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3953 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
3954 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
3955 if (i
== group_size
- 1 && j
== ncopies
- 1)
3956 add_phi_arg (phi_stmt
, lsq
, loop_latch_edge (loop
));
3960 VEC_quick_push (tree
, dr_chain
, new_temp
);
3961 if (i
< group_size
- 1)
3962 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, bsi
, stmt
);
3967 if (!vect_transform_strided_load (stmt
, dr_chain
, group_size
, bsi
))
3969 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3970 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
3975 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3977 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3978 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3986 /* Function vectorizable_live_operation.
3988 STMT computes a value that is used outside the loop. Check if
3989 it can be supported. */
3992 vectorizable_live_operation (tree stmt
,
3993 block_stmt_iterator
*bsi ATTRIBUTE_UNUSED
,
3994 tree
*vec_stmt ATTRIBUTE_UNUSED
)
3997 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3998 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4003 enum vect_def_type dt
;
4005 if (!STMT_VINFO_LIVE_P (stmt_info
))
4008 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
4011 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) != SSA_NAME
)
4014 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
4015 op_type
= TREE_OPERAND_LENGTH (operation
);
4017 /* FORNOW: support only if all uses are invariant. This means
4018 that the scalar operations can remain in place, unvectorized.
4019 The original last scalar value that they compute will be used. */
4021 for (i
= 0; i
< op_type
; i
++)
4023 op
= TREE_OPERAND (operation
, i
);
4024 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
4026 if (vect_print_dump_info (REPORT_DETAILS
))
4027 fprintf (vect_dump
, "use not simple.");
4031 if (dt
!= vect_invariant_def
&& dt
!= vect_constant_def
)
4035 /* No transformation is required for the cases we currently support. */
4040 /* Function vect_is_simple_cond.
4043 LOOP - the loop that is being vectorized.
4044 COND - Condition that is checked for simple use.
4046 Returns whether a COND can be vectorized. Checks whether
4047 condition operands are supportable using vec_is_simple_use. */
4050 vect_is_simple_cond (tree cond
, loop_vec_info loop_vinfo
)
4054 enum vect_def_type dt
;
4056 if (!COMPARISON_CLASS_P (cond
))
4059 lhs
= TREE_OPERAND (cond
, 0);
4060 rhs
= TREE_OPERAND (cond
, 1);
4062 if (TREE_CODE (lhs
) == SSA_NAME
)
4064 tree lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
4065 if (!vect_is_simple_use (lhs
, loop_vinfo
, &lhs_def_stmt
, &def
, &dt
))
4068 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
)
4071 if (TREE_CODE (rhs
) == SSA_NAME
)
4073 tree rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
4074 if (!vect_is_simple_use (rhs
, loop_vinfo
, &rhs_def_stmt
, &def
, &dt
))
4077 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
)
4083 /* vectorizable_condition.
4085 Check if STMT is conditional modify expression that can be vectorized.
4086 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4087 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4090 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4093 vectorizable_condition (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
4095 tree scalar_dest
= NULL_TREE
;
4096 tree vec_dest
= NULL_TREE
;
4097 tree op
= NULL_TREE
;
4098 tree cond_expr
, then_clause
, else_clause
;
4099 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4100 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4101 tree vec_cond_lhs
, vec_cond_rhs
, vec_then_clause
, vec_else_clause
;
4102 tree vec_compare
, vec_cond_expr
;
4104 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4105 enum machine_mode vec_mode
;
4107 enum vect_def_type dt
;
4108 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4109 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4111 gcc_assert (ncopies
>= 1);
4113 return false; /* FORNOW */
4115 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
4118 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
4120 if (STMT_VINFO_LIVE_P (stmt_info
))
4122 /* FORNOW: not yet supported. */
4123 if (vect_print_dump_info (REPORT_DETAILS
))
4124 fprintf (vect_dump
, "value used after loop.");
4128 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
4131 op
= GIMPLE_STMT_OPERAND (stmt
, 1);
4133 if (TREE_CODE (op
) != COND_EXPR
)
4136 cond_expr
= TREE_OPERAND (op
, 0);
4137 then_clause
= TREE_OPERAND (op
, 1);
4138 else_clause
= TREE_OPERAND (op
, 2);
4140 if (!vect_is_simple_cond (cond_expr
, loop_vinfo
))
4143 /* We do not handle two different vector types for the condition
4145 if (TREE_TYPE (TREE_OPERAND (cond_expr
, 0)) != TREE_TYPE (vectype
))
4148 if (TREE_CODE (then_clause
) == SSA_NAME
)
4150 tree then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
4151 if (!vect_is_simple_use (then_clause
, loop_vinfo
,
4152 &then_def_stmt
, &def
, &dt
))
4155 else if (TREE_CODE (then_clause
) != INTEGER_CST
4156 && TREE_CODE (then_clause
) != REAL_CST
)
4159 if (TREE_CODE (else_clause
) == SSA_NAME
)
4161 tree else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
4162 if (!vect_is_simple_use (else_clause
, loop_vinfo
,
4163 &else_def_stmt
, &def
, &dt
))
4166 else if (TREE_CODE (else_clause
) != INTEGER_CST
4167 && TREE_CODE (else_clause
) != REAL_CST
)
4171 vec_mode
= TYPE_MODE (vectype
);
4175 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
4176 return expand_vec_cond_expr_p (op
, vec_mode
);
4182 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
4183 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4185 /* Handle cond expr. */
4187 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0), stmt
, NULL
);
4189 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1), stmt
, NULL
);
4190 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
, stmt
, NULL
);
4191 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
, stmt
, NULL
);
4193 /* Arguments are ready. create the new vector stmt. */
4194 vec_compare
= build2 (TREE_CODE (cond_expr
), vectype
,
4195 vec_cond_lhs
, vec_cond_rhs
);
4196 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
4197 vec_compare
, vec_then_clause
, vec_else_clause
);
4199 *vec_stmt
= build_gimple_modify_stmt (vec_dest
, vec_cond_expr
);
4200 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
4201 GIMPLE_STMT_OPERAND (*vec_stmt
, 0) = new_temp
;
4202 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
4207 /* Function vect_transform_stmt.
4209 Create a vectorized stmt to replace STMT, and insert it at BSI. */
4212 vect_transform_stmt (tree stmt
, block_stmt_iterator
*bsi
, bool *strided_store
)
4214 bool is_store
= false;
4215 tree vec_stmt
= NULL_TREE
;
4216 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4217 tree orig_stmt_in_pattern
;
4220 if (STMT_VINFO_RELEVANT_P (stmt_info
))
4222 switch (STMT_VINFO_TYPE (stmt_info
))
4224 case type_demotion_vec_info_type
:
4225 done
= vectorizable_type_demotion (stmt
, bsi
, &vec_stmt
);
4229 case type_promotion_vec_info_type
:
4230 done
= vectorizable_type_promotion (stmt
, bsi
, &vec_stmt
);
4234 case type_conversion_vec_info_type
:
4235 done
= vectorizable_conversion (stmt
, bsi
, &vec_stmt
);
4239 case op_vec_info_type
:
4240 done
= vectorizable_operation (stmt
, bsi
, &vec_stmt
);
4244 case assignment_vec_info_type
:
4245 done
= vectorizable_assignment (stmt
, bsi
, &vec_stmt
);
4249 case load_vec_info_type
:
4250 done
= vectorizable_load (stmt
, bsi
, &vec_stmt
);
4254 case store_vec_info_type
:
4255 done
= vectorizable_store (stmt
, bsi
, &vec_stmt
);
4257 if (DR_GROUP_FIRST_DR (stmt_info
))
4259 /* In case of interleaving, the whole chain is vectorized when the
4260 last store in the chain is reached. Store stmts before the last
4261 one are skipped, and there vec_stmt_info shouldn't be freed
4263 *strided_store
= true;
4264 if (STMT_VINFO_VEC_STMT (stmt_info
))
4271 case condition_vec_info_type
:
4272 done
= vectorizable_condition (stmt
, bsi
, &vec_stmt
);
4276 case call_vec_info_type
:
4277 done
= vectorizable_call (stmt
, bsi
, &vec_stmt
);
4281 if (vect_print_dump_info (REPORT_DETAILS
))
4282 fprintf (vect_dump
, "stmt not supported.");
4286 gcc_assert (vec_stmt
|| *strided_store
);
4289 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
4290 orig_stmt_in_pattern
= STMT_VINFO_RELATED_STMT (stmt_info
);
4291 if (orig_stmt_in_pattern
)
4293 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt_in_pattern
);
4294 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
4296 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
4298 /* STMT was inserted by the vectorizer to replace a
4299 computation idiom. ORIG_STMT_IN_PATTERN is a stmt in the
4300 original sequence that computed this idiom. We need to
4301 record a pointer to VEC_STMT in the stmt_info of
4302 ORIG_STMT_IN_PATTERN. See more details in the
4303 documentation of vect_pattern_recog. */
4305 STMT_VINFO_VEC_STMT (stmt_vinfo
) = vec_stmt
;
4311 if (STMT_VINFO_LIVE_P (stmt_info
))
4313 switch (STMT_VINFO_TYPE (stmt_info
))
4315 case reduc_vec_info_type
:
4316 done
= vectorizable_reduction (stmt
, bsi
, &vec_stmt
);
4321 done
= vectorizable_live_operation (stmt
, bsi
, &vec_stmt
);
4330 /* This function builds ni_name = number of iterations loop executes
4331 on the loop preheader. */
4334 vect_build_loop_niters (loop_vec_info loop_vinfo
)
4336 tree ni_name
, stmt
, var
;
4338 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4339 tree ni
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
4341 var
= create_tmp_var (TREE_TYPE (ni
), "niters");
4342 add_referenced_var (var
);
4343 ni_name
= force_gimple_operand (ni
, &stmt
, false, var
);
4345 pe
= loop_preheader_edge (loop
);
4348 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
4349 gcc_assert (!new_bb
);
4356 /* This function generates the following statements:
4358 ni_name = number of iterations loop executes
4359 ratio = ni_name / vf
4360 ratio_mult_vf_name = ratio * vf
4362 and places them at the loop preheader edge. */
4365 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo
,
4367 tree
*ratio_mult_vf_name_ptr
,
4368 tree
*ratio_name_ptr
)
4376 tree ratio_mult_vf_name
;
4377 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4378 tree ni
= LOOP_VINFO_NITERS (loop_vinfo
);
4379 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4382 pe
= loop_preheader_edge (loop
);
4384 /* Generate temporary variable that contains
4385 number of iterations loop executes. */
4387 ni_name
= vect_build_loop_niters (loop_vinfo
);
4388 log_vf
= build_int_cst (TREE_TYPE (ni
), exact_log2 (vf
));
4390 /* Create: ratio = ni >> log2(vf) */
4392 ratio_name
= fold_build2 (RSHIFT_EXPR
, TREE_TYPE (ni_name
), ni_name
, log_vf
);
4393 if (!is_gimple_val (ratio_name
))
4395 var
= create_tmp_var (TREE_TYPE (ni
), "bnd");
4396 add_referenced_var (var
);
4398 ratio_name
= force_gimple_operand (ratio_name
, &stmt
, true, var
);
4399 pe
= loop_preheader_edge (loop
);
4400 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
4401 gcc_assert (!new_bb
);
4404 /* Create: ratio_mult_vf = ratio << log2 (vf). */
4406 ratio_mult_vf_name
= fold_build2 (LSHIFT_EXPR
, TREE_TYPE (ratio_name
),
4407 ratio_name
, log_vf
);
4408 if (!is_gimple_val (ratio_mult_vf_name
))
4410 var
= create_tmp_var (TREE_TYPE (ni
), "ratio_mult_vf");
4411 add_referenced_var (var
);
4413 ratio_mult_vf_name
= force_gimple_operand (ratio_mult_vf_name
, &stmt
,
4415 pe
= loop_preheader_edge (loop
);
4416 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
4417 gcc_assert (!new_bb
);
4420 *ni_name_ptr
= ni_name
;
4421 *ratio_mult_vf_name_ptr
= ratio_mult_vf_name
;
4422 *ratio_name_ptr
= ratio_name
;
4428 /* Function update_vuses_to_preheader.
4431 STMT - a statement with potential VUSEs.
4432 LOOP - the loop whose preheader will contain STMT.
4434 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
4435 appears to be defined in a VDEF in another statement in a loop.
4436 One such case is when the VUSE is at the dereference of a __restricted__
4437 pointer in a load and the VDEF is at the dereference of a different
4438 __restricted__ pointer in a store. Vectorization may result in
4439 copy_virtual_uses being called to copy the problematic VUSE to a new
4440 statement that is being inserted in the loop preheader. This procedure
4441 is called to change the SSA_NAME in the new statement's VUSE from the
4442 SSA_NAME updated in the loop to the related SSA_NAME available on the
4443 path entering the loop.
4445 When this function is called, we have the following situation:
4450 # name1 = phi < name0 , name2>
4455 # name2 = vdef <name1>
4460 Stmt S1 was created in the loop preheader block as part of misaligned-load
4461 handling. This function fixes the name of the vuse of S1 from 'name1' to
4465 update_vuses_to_preheader (tree stmt
, struct loop
*loop
)
4467 basic_block header_bb
= loop
->header
;
4468 edge preheader_e
= loop_preheader_edge (loop
);
4470 use_operand_p use_p
;
4472 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_VUSE
)
4474 tree ssa_name
= USE_FROM_PTR (use_p
);
4475 tree def_stmt
= SSA_NAME_DEF_STMT (ssa_name
);
4476 tree name_var
= SSA_NAME_VAR (ssa_name
);
4477 basic_block bb
= bb_for_stmt (def_stmt
);
4479 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
4480 if (!IS_EMPTY_STMT (def_stmt
)
4481 && flow_bb_inside_loop_p (loop
, bb
))
4483 /* If the block containing the statement defining the SSA_NAME
4484 is in the loop then it's necessary to find the definition
4485 outside the loop using the PHI nodes of the header. */
4487 bool updated
= false;
4489 for (phi
= phi_nodes (header_bb
); phi
; phi
= TREE_CHAIN (phi
))
4491 if (SSA_NAME_VAR (PHI_RESULT (phi
)) == name_var
)
4493 SET_USE (use_p
, PHI_ARG_DEF (phi
, preheader_e
->dest_idx
));
4498 gcc_assert (updated
);
4504 /* Function vect_update_ivs_after_vectorizer.
4506 "Advance" the induction variables of LOOP to the value they should take
4507 after the execution of LOOP. This is currently necessary because the
4508 vectorizer does not handle induction variables that are used after the
4509 loop. Such a situation occurs when the last iterations of LOOP are
4511 1. We introduced new uses after LOOP for IVs that were not originally used
4512 after LOOP: the IVs of LOOP are now used by an epilog loop.
4513 2. LOOP is going to be vectorized; this means that it will iterate N/VF
4514 times, whereas the loop IVs should be bumped N times.
4517 - LOOP - a loop that is going to be vectorized. The last few iterations
4518 of LOOP were peeled.
4519 - NITERS - the number of iterations that LOOP executes (before it is
4520 vectorized). i.e, the number of times the ivs should be bumped.
4521 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
4522 coming out from LOOP on which there are uses of the LOOP ivs
4523 (this is the path from LOOP->exit to epilog_loop->preheader).
4525 The new definitions of the ivs are placed in LOOP->exit.
4526 The phi args associated with the edge UPDATE_E in the bb
4527 UPDATE_E->dest are updated accordingly.
4529 Assumption 1: Like the rest of the vectorizer, this function assumes
4530 a single loop exit that has a single predecessor.
4532 Assumption 2: The phi nodes in the LOOP header and in update_bb are
4533 organized in the same order.
4535 Assumption 3: The access function of the ivs is simple enough (see
4536 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
4538 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
4539 coming out of LOOP on which the ivs of LOOP are used (this is the path
4540 that leads to the epilog loop; other paths skip the epilog loop). This
4541 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
4542 needs to have its phis updated.
4546 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo
, tree niters
,
4549 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4550 basic_block exit_bb
= single_exit (loop
)->dest
;
4552 basic_block update_bb
= update_e
->dest
;
4554 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
4556 /* Make sure there exists a single-predecessor exit bb: */
4557 gcc_assert (single_pred_p (exit_bb
));
4559 for (phi
= phi_nodes (loop
->header
), phi1
= phi_nodes (update_bb
);
4561 phi
= PHI_CHAIN (phi
), phi1
= PHI_CHAIN (phi1
))
4563 tree access_fn
= NULL
;
4564 tree evolution_part
;
4567 tree var
, stmt
, ni
, ni_name
;
4568 block_stmt_iterator last_bsi
;
4570 if (vect_print_dump_info (REPORT_DETAILS
))
4572 fprintf (vect_dump
, "vect_update_ivs_after_vectorizer: phi: ");
4573 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
4576 /* Skip virtual phi's. */
4577 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
4579 if (vect_print_dump_info (REPORT_DETAILS
))
4580 fprintf (vect_dump
, "virtual phi. skip.");
4584 /* Skip reduction phis. */
4585 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
4587 if (vect_print_dump_info (REPORT_DETAILS
))
4588 fprintf (vect_dump
, "reduc phi. skip.");
4592 access_fn
= analyze_scalar_evolution (loop
, PHI_RESULT (phi
));
4593 gcc_assert (access_fn
);
4595 unshare_expr (evolution_part_in_loop_num (access_fn
, loop
->num
));
4596 gcc_assert (evolution_part
!= NULL_TREE
);
4598 /* FORNOW: We do not support IVs whose evolution function is a polynomial
4599 of degree >= 2 or exponential. */
4600 gcc_assert (!tree_is_chrec (evolution_part
));
4602 step_expr
= evolution_part
;
4603 init_expr
= unshare_expr (initial_condition_in_loop_num (access_fn
,
4606 ni
= fold_build2 (PLUS_EXPR
, TREE_TYPE (init_expr
),
4607 fold_build2 (MULT_EXPR
, TREE_TYPE (init_expr
),
4608 fold_convert (TREE_TYPE (init_expr
),
4613 var
= create_tmp_var (TREE_TYPE (init_expr
), "tmp");
4614 add_referenced_var (var
);
4616 ni_name
= force_gimple_operand (ni
, &stmt
, false, var
);
4618 /* Insert stmt into exit_bb. */
4619 last_bsi
= bsi_last (exit_bb
);
4621 bsi_insert_before (&last_bsi
, stmt
, BSI_SAME_STMT
);
4623 /* Fix phi expressions in the successor bb. */
4624 SET_PHI_ARG_DEF (phi1
, update_e
->dest_idx
, ni_name
);
4629 /* Function vect_do_peeling_for_loop_bound
4631 Peel the last iterations of the loop represented by LOOP_VINFO.
4632 The peeled iterations form a new epilog loop. Given that the loop now
4633 iterates NITERS times, the new epilog loop iterates
4634 NITERS % VECTORIZATION_FACTOR times.
4636 The original loop will later be made to iterate
4637 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
4640 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo
, tree
*ratio
)
4642 tree ni_name
, ratio_mult_vf_name
;
4643 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4644 struct loop
*new_loop
;
4646 basic_block preheader
;
4650 if (vect_print_dump_info (REPORT_DETAILS
))
4651 fprintf (vect_dump
, "=== vect_do_peeling_for_loop_bound ===");
4653 initialize_original_copy_tables ();
4655 /* Generate the following variables on the preheader of original loop:
4657 ni_name = number of iteration the original loop executes
4658 ratio = ni_name / vf
4659 ratio_mult_vf_name = ratio * vf */
4660 vect_generate_tmps_on_preheader (loop_vinfo
, &ni_name
,
4661 &ratio_mult_vf_name
, ratio
);
4663 loop_num
= loop
->num
;
4664 /* Threshold for vectorized loop. */
4665 th
= (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND
)) *
4666 LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4667 new_loop
= slpeel_tree_peel_loop_to_edge (loop
, single_exit (loop
),
4668 ratio_mult_vf_name
, ni_name
, false, th
);
4669 gcc_assert (new_loop
);
4670 gcc_assert (loop_num
== loop
->num
);
4671 #ifdef ENABLE_CHECKING
4672 slpeel_verify_cfg_after_peeling (loop
, new_loop
);
4675 /* A guard that controls whether the new_loop is to be executed or skipped
4676 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
4677 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
4678 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
4679 is on the path where the LOOP IVs are used and need to be updated. */
4681 preheader
= loop_preheader_edge (new_loop
)->src
;
4682 if (EDGE_PRED (preheader
, 0)->src
== single_exit (loop
)->dest
)
4683 update_e
= EDGE_PRED (preheader
, 0);
4685 update_e
= EDGE_PRED (preheader
, 1);
4687 /* Update IVs of original loop as if they were advanced
4688 by ratio_mult_vf_name steps. */
4689 vect_update_ivs_after_vectorizer (loop_vinfo
, ratio_mult_vf_name
, update_e
);
4691 /* After peeling we have to reset scalar evolution analyzer. */
4694 free_original_copy_tables ();
4698 /* Function vect_gen_niters_for_prolog_loop
4700 Set the number of iterations for the loop represented by LOOP_VINFO
4701 to the minimum between LOOP_NITERS (the original iteration count of the loop)
4702 and the misalignment of DR - the data reference recorded in
4703 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
4704 this loop, the data reference DR will refer to an aligned location.
4706 The following computation is generated:
4708 If the misalignment of DR is known at compile time:
4709 addr_mis = int mis = DR_MISALIGNMENT (dr);
4710 Else, compute address misalignment in bytes:
4711 addr_mis = addr & (vectype_size - 1)
4713 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
4715 (elem_size = element type size; an element is the scalar element
4716 whose type is the inner type of the vectype)
4720 prolog_niters = min ( LOOP_NITERS ,
4721 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
4722 where group_size is the size of the interleaved group.
4726 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo
, tree loop_niters
)
4728 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
4729 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4730 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4732 tree iters
, iters_name
;
4735 tree dr_stmt
= DR_STMT (dr
);
4736 stmt_vec_info stmt_info
= vinfo_for_stmt (dr_stmt
);
4737 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4738 int vectype_align
= TYPE_ALIGN (vectype
) / BITS_PER_UNIT
;
4739 tree niters_type
= TREE_TYPE (loop_niters
);
4741 int element_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
4743 if (DR_GROUP_FIRST_DR (stmt_info
))
4745 /* For interleaved access element size must be multiplied by the size of
4746 the interleaved group. */
4747 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (
4748 DR_GROUP_FIRST_DR (stmt_info
)));
4749 element_size
*= group_size
;
4752 pe
= loop_preheader_edge (loop
);
4754 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) > 0)
4756 int byte_misalign
= LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
);
4757 int elem_misalign
= byte_misalign
/ element_size
;
4759 if (vect_print_dump_info (REPORT_DETAILS
))
4760 fprintf (vect_dump
, "known alignment = %d.", byte_misalign
);
4761 iters
= build_int_cst (niters_type
,
4762 (vf
- elem_misalign
)&(vf
/group_size
-1));
4766 tree new_stmts
= NULL_TREE
;
4768 vect_create_addr_base_for_vector_ref (dr_stmt
, &new_stmts
, NULL_TREE
);
4769 tree ptr_type
= TREE_TYPE (start_addr
);
4770 tree size
= TYPE_SIZE (ptr_type
);
4771 tree type
= lang_hooks
.types
.type_for_size (tree_low_cst (size
, 1), 1);
4772 tree vectype_size_minus_1
= build_int_cst (type
, vectype_align
- 1);
4773 tree elem_size_log
=
4774 build_int_cst (type
, exact_log2 (vectype_align
/vf
));
4775 tree vf_minus_1
= build_int_cst (type
, vf
- 1);
4776 tree vf_tree
= build_int_cst (type
, vf
);
4780 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmts
);
4781 gcc_assert (!new_bb
);
4783 /* Create: byte_misalign = addr & (vectype_size - 1) */
4785 fold_build2 (BIT_AND_EXPR
, type
, start_addr
, vectype_size_minus_1
);
4787 /* Create: elem_misalign = byte_misalign / element_size */
4789 fold_build2 (RSHIFT_EXPR
, type
, byte_misalign
, elem_size_log
);
4791 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
4792 iters
= fold_build2 (MINUS_EXPR
, type
, vf_tree
, elem_misalign
);
4793 iters
= fold_build2 (BIT_AND_EXPR
, type
, iters
, vf_minus_1
);
4794 iters
= fold_convert (niters_type
, iters
);
4797 /* Create: prolog_loop_niters = min (iters, loop_niters) */
4798 /* If the loop bound is known at compile time we already verified that it is
4799 greater than vf; since the misalignment ('iters') is at most vf, there's
4800 no need to generate the MIN_EXPR in this case. */
4801 if (TREE_CODE (loop_niters
) != INTEGER_CST
)
4802 iters
= fold_build2 (MIN_EXPR
, niters_type
, iters
, loop_niters
);
4804 if (vect_print_dump_info (REPORT_DETAILS
))
4806 fprintf (vect_dump
, "niters for prolog loop: ");
4807 print_generic_expr (vect_dump
, iters
, TDF_SLIM
);
4810 var
= create_tmp_var (niters_type
, "prolog_loop_niters");
4811 add_referenced_var (var
);
4812 iters_name
= force_gimple_operand (iters
, &stmt
, false, var
);
4814 /* Insert stmt on loop preheader edge. */
4817 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
4818 gcc_assert (!new_bb
);
4825 /* Function vect_update_init_of_dr
4827 NITERS iterations were peeled from LOOP. DR represents a data reference
4828 in LOOP. This function updates the information recorded in DR to
4829 account for the fact that the first NITERS iterations had already been
4830 executed. Specifically, it updates the OFFSET field of DR. */
4833 vect_update_init_of_dr (struct data_reference
*dr
, tree niters
)
4835 tree offset
= DR_OFFSET (dr
);
4837 niters
= fold_build2 (MULT_EXPR
, TREE_TYPE (niters
), niters
, DR_STEP (dr
));
4838 offset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
, niters
);
4839 DR_OFFSET (dr
) = offset
;
4843 /* Function vect_update_inits_of_drs
4845 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
4846 This function updates the information recorded for the data references in
4847 the loop to account for the fact that the first NITERS iterations had
4848 already been executed. Specifically, it updates the initial_condition of the
4849 access_function of all the data_references in the loop. */
4852 vect_update_inits_of_drs (loop_vec_info loop_vinfo
, tree niters
)
4855 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
4856 struct data_reference
*dr
;
4858 if (vect_dump
&& (dump_flags
& TDF_DETAILS
))
4859 fprintf (vect_dump
, "=== vect_update_inits_of_dr ===");
4861 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
4862 vect_update_init_of_dr (dr
, niters
);
4866 /* Function vect_do_peeling_for_alignment
4868 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
4869 'niters' is set to the misalignment of one of the data references in the
4870 loop, thereby forcing it to refer to an aligned location at the beginning
4871 of the execution of this loop. The data reference for which we are
4872 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
4875 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo
)
4877 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4878 tree niters_of_prolog_loop
, ni_name
;
4880 struct loop
*new_loop
;
4882 if (vect_print_dump_info (REPORT_DETAILS
))
4883 fprintf (vect_dump
, "=== vect_do_peeling_for_alignment ===");
4885 initialize_original_copy_tables ();
4887 ni_name
= vect_build_loop_niters (loop_vinfo
);
4888 niters_of_prolog_loop
= vect_gen_niters_for_prolog_loop (loop_vinfo
, ni_name
);
4890 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
4892 slpeel_tree_peel_loop_to_edge (loop
, loop_preheader_edge (loop
),
4893 niters_of_prolog_loop
, ni_name
, true, 0);
4894 gcc_assert (new_loop
);
4895 #ifdef ENABLE_CHECKING
4896 slpeel_verify_cfg_after_peeling (new_loop
, loop
);
4899 /* Update number of times loop executes. */
4900 n_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
4901 LOOP_VINFO_NITERS (loop_vinfo
) = fold_build2 (MINUS_EXPR
,
4902 TREE_TYPE (n_iters
), n_iters
, niters_of_prolog_loop
);
4904 /* Update the init conditions of the access functions of all data refs. */
4905 vect_update_inits_of_drs (loop_vinfo
, niters_of_prolog_loop
);
4907 /* After peeling we have to reset scalar evolution analyzer. */
4910 free_original_copy_tables ();
4914 /* Function vect_create_cond_for_align_checks.
4916 Create a conditional expression that represents the alignment checks for
4917 all of data references (array element references) whose alignment must be
4921 LOOP_VINFO - two fields of the loop information are used.
4922 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
4923 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
4926 COND_EXPR_STMT_LIST - statements needed to construct the conditional
4928 The returned value is the conditional expression to be used in the if
4929 statement that controls which version of the loop gets executed at runtime.
4931 The algorithm makes two assumptions:
4932 1) The number of bytes "n" in a vector is a power of 2.
4933 2) An address "a" is aligned if a%n is zero and that this
4934 test can be done as a&(n-1) == 0. For example, for 16
4935 byte vectors the test is a&0xf == 0. */
4938 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo
,
4939 tree
*cond_expr_stmt_list
)
4941 VEC(tree
,heap
) *may_misalign_stmts
4942 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
4944 int mask
= LOOP_VINFO_PTR_MASK (loop_vinfo
);
4948 tree int_ptrsize_type
;
4950 tree or_tmp_name
= NULL_TREE
;
4951 tree and_tmp
, and_tmp_name
, and_stmt
;
4954 /* Check that mask is one less than a power of 2, i.e., mask is
4955 all zeros followed by all ones. */
4956 gcc_assert ((mask
!= 0) && ((mask
& (mask
+1)) == 0));
4958 /* CHECKME: what is the best integer or unsigned type to use to hold a
4959 cast from a pointer value? */
4960 psize
= TYPE_SIZE (ptr_type_node
);
4962 = lang_hooks
.types
.type_for_size (tree_low_cst (psize
, 1), 0);
4964 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
4965 of the first vector of the i'th data reference. */
4967 for (i
= 0; VEC_iterate (tree
, may_misalign_stmts
, i
, ref_stmt
); i
++)
4969 tree new_stmt_list
= NULL_TREE
;
4971 tree addr_tmp
, addr_tmp_name
, addr_stmt
;
4972 tree or_tmp
, new_or_tmp_name
, or_stmt
;
4974 /* create: addr_tmp = (int)(address_of_first_vector) */
4975 addr_base
= vect_create_addr_base_for_vector_ref (ref_stmt
,
4979 if (new_stmt_list
!= NULL_TREE
)
4980 append_to_statement_list_force (new_stmt_list
, cond_expr_stmt_list
);
4982 sprintf (tmp_name
, "%s%d", "addr2int", i
);
4983 addr_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
4984 add_referenced_var (addr_tmp
);
4985 addr_tmp_name
= make_ssa_name (addr_tmp
, NULL_TREE
);
4986 addr_stmt
= fold_convert (int_ptrsize_type
, addr_base
);
4987 addr_stmt
= build_gimple_modify_stmt (addr_tmp_name
, addr_stmt
);
4988 SSA_NAME_DEF_STMT (addr_tmp_name
) = addr_stmt
;
4989 append_to_statement_list_force (addr_stmt
, cond_expr_stmt_list
);
4991 /* The addresses are OR together. */
4993 if (or_tmp_name
!= NULL_TREE
)
4995 /* create: or_tmp = or_tmp | addr_tmp */
4996 sprintf (tmp_name
, "%s%d", "orptrs", i
);
4997 or_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
4998 add_referenced_var (or_tmp
);
4999 new_or_tmp_name
= make_ssa_name (or_tmp
, NULL_TREE
);
5000 tmp
= build2 (BIT_IOR_EXPR
, int_ptrsize_type
,
5001 or_tmp_name
, addr_tmp_name
);
5002 or_stmt
= build_gimple_modify_stmt (new_or_tmp_name
, tmp
);
5003 SSA_NAME_DEF_STMT (new_or_tmp_name
) = or_stmt
;
5004 append_to_statement_list_force (or_stmt
, cond_expr_stmt_list
);
5005 or_tmp_name
= new_or_tmp_name
;
5008 or_tmp_name
= addr_tmp_name
;
5012 mask_cst
= build_int_cst (int_ptrsize_type
, mask
);
5014 /* create: and_tmp = or_tmp & mask */
5015 and_tmp
= create_tmp_var (int_ptrsize_type
, "andmask" );
5016 add_referenced_var (and_tmp
);
5017 and_tmp_name
= make_ssa_name (and_tmp
, NULL_TREE
);
5019 tmp
= build2 (BIT_AND_EXPR
, int_ptrsize_type
, or_tmp_name
, mask_cst
);
5020 and_stmt
= build_gimple_modify_stmt (and_tmp_name
, tmp
);
5021 SSA_NAME_DEF_STMT (and_tmp_name
) = and_stmt
;
5022 append_to_statement_list_force (and_stmt
, cond_expr_stmt_list
);
5024 /* Make and_tmp the left operand of the conditional test against zero.
5025 if and_tmp has a nonzero bit then some address is unaligned. */
5026 ptrsize_zero
= build_int_cst (int_ptrsize_type
, 0);
5027 return build2 (EQ_EXPR
, boolean_type_node
,
5028 and_tmp_name
, ptrsize_zero
);
5032 /* Function vect_transform_loop.
5034 The analysis phase has determined that the loop is vectorizable.
5035 Vectorize the loop - created vectorized stmts to replace the scalar
5036 stmts in the loop, and update the loop exit condition. */
5039 vect_transform_loop (loop_vec_info loop_vinfo
)
5041 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5042 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
5043 int nbbs
= loop
->num_nodes
;
5044 block_stmt_iterator si
, next_si
;
5047 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5050 if (vect_print_dump_info (REPORT_DETAILS
))
5051 fprintf (vect_dump
, "=== vec_transform_loop ===");
5053 /* If the loop has data references that may or may not be aligned then
5054 two versions of the loop need to be generated, one which is vectorized
5055 and one which isn't. A test is then generated to control which of the
5056 loops is executed. The test checks for the alignment of all of the
5057 data references that may or may not be aligned. */
5059 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)))
5063 tree cond_expr_stmt_list
= NULL_TREE
;
5064 basic_block condition_bb
;
5065 block_stmt_iterator cond_exp_bsi
;
5066 basic_block merge_bb
;
5067 basic_block new_exit_bb
;
5069 tree orig_phi
, new_phi
, arg
;
5070 unsigned prob
= 4 * REG_BR_PROB_BASE
/ 5;
5072 cond_expr
= vect_create_cond_for_align_checks (loop_vinfo
,
5073 &cond_expr_stmt_list
);
5074 initialize_original_copy_tables ();
5075 nloop
= loop_version (loop
, cond_expr
, &condition_bb
,
5076 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
5077 free_original_copy_tables();
5079 /** Loop versioning violates an assumption we try to maintain during
5080 vectorization - that the loop exit block has a single predecessor.
5081 After versioning, the exit block of both loop versions is the same
5082 basic block (i.e. it has two predecessors). Just in order to simplify
5083 following transformations in the vectorizer, we fix this situation
5084 here by adding a new (empty) block on the exit-edge of the loop,
5085 with the proper loop-exit phis to maintain loop-closed-form. **/
5087 merge_bb
= single_exit (loop
)->dest
;
5088 gcc_assert (EDGE_COUNT (merge_bb
->preds
) == 2);
5089 new_exit_bb
= split_edge (single_exit (loop
));
5090 new_exit_e
= single_exit (loop
);
5091 e
= EDGE_SUCC (new_exit_bb
, 0);
5093 for (orig_phi
= phi_nodes (merge_bb
); orig_phi
;
5094 orig_phi
= PHI_CHAIN (orig_phi
))
5096 new_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi
)),
5098 arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, e
);
5099 add_phi_arg (new_phi
, arg
, new_exit_e
);
5100 SET_PHI_ARG_DEF (orig_phi
, e
->dest_idx
, PHI_RESULT (new_phi
));
5103 /** end loop-exit-fixes after versioning **/
5105 update_ssa (TODO_update_ssa
);
5106 cond_exp_bsi
= bsi_last (condition_bb
);
5107 bsi_insert_before (&cond_exp_bsi
, cond_expr_stmt_list
, BSI_SAME_STMT
);
5110 /* CHECKME: we wouldn't need this if we called update_ssa once
5112 bitmap_zero (vect_memsyms_to_rename
);
5114 /* Peel the loop if there are data refs with unknown alignment.
5115 Only one data ref with unknown store is allowed. */
5117 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
5118 vect_do_peeling_for_alignment (loop_vinfo
);
5120 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5121 compile time constant), or it is a constant that doesn't divide by the
5122 vectorization factor, then an epilog loop needs to be created.
5123 We therefore duplicate the loop: the original loop will be vectorized,
5124 and will compute the first (n/VF) iterations. The second copy of the loop
5125 will remain scalar and will compute the remaining (n%VF) iterations.
5126 (VF is the vectorization factor). */
5128 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
5129 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
5130 && LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0))
5131 vect_do_peeling_for_loop_bound (loop_vinfo
, &ratio
);
5133 ratio
= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
)),
5134 LOOP_VINFO_INT_NITERS (loop_vinfo
) / vectorization_factor
);
5136 /* 1) Make sure the loop header has exactly two entries
5137 2) Make sure we have a preheader basic block. */
5139 gcc_assert (EDGE_COUNT (loop
->header
->preds
) == 2);
5141 split_edge (loop_preheader_edge (loop
));
5143 /* FORNOW: the vectorizer supports only loops which body consist
5144 of one basic block (header + empty latch). When the vectorizer will
5145 support more involved loop forms, the order by which the BBs are
5146 traversed need to be reconsidered. */
5148 for (i
= 0; i
< nbbs
; i
++)
5150 basic_block bb
= bbs
[i
];
5152 for (si
= bsi_start (bb
); !bsi_end_p (si
);)
5154 tree stmt
= bsi_stmt (si
);
5155 stmt_vec_info stmt_info
;
5158 if (vect_print_dump_info (REPORT_DETAILS
))
5160 fprintf (vect_dump
, "------>vectorizing statement: ");
5161 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
5163 stmt_info
= vinfo_for_stmt (stmt
);
5164 gcc_assert (stmt_info
);
5165 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5166 && !STMT_VINFO_LIVE_P (stmt_info
))
5172 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
))
5173 != (unsigned HOST_WIDE_INT
) vectorization_factor
)
5174 && vect_print_dump_info (REPORT_DETAILS
))
5175 fprintf (vect_dump
, "multiple-types.");
5177 /* -------- vectorize statement ------------ */
5178 if (vect_print_dump_info (REPORT_DETAILS
))
5179 fprintf (vect_dump
, "transform statement.");
5181 strided_store
= false;
5182 is_store
= vect_transform_stmt (stmt
, &si
, &strided_store
);
5186 if (DR_GROUP_FIRST_DR (stmt_info
))
5188 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5189 interleaving chain was completed - free all the stores in
5191 tree next
= DR_GROUP_FIRST_DR (stmt_info
);
5193 stmt_vec_info next_stmt_info
;
5197 next_si
= bsi_for_stmt (next
);
5198 next_stmt_info
= vinfo_for_stmt (next
);
5199 /* Free the attached stmt_vec_info and remove the stmt. */
5200 ann
= stmt_ann (next
);
5201 tmp
= DR_GROUP_NEXT_DR (next_stmt_info
);
5202 free (next_stmt_info
);
5203 set_stmt_info (ann
, NULL
);
5204 bsi_remove (&next_si
, true);
5207 bsi_remove (&si
, true);
5212 /* Free the attached stmt_vec_info and remove the stmt. */
5213 ann
= stmt_ann (stmt
);
5215 set_stmt_info (ann
, NULL
);
5216 bsi_remove (&si
, true);
5224 slpeel_make_loop_iterate_ntimes (loop
, ratio
);
5226 mark_set_for_renaming (vect_memsyms_to_rename
);
5228 /* The memory tags and pointers in vectorized statements need to
5229 have their SSA forms updated. FIXME, why can't this be delayed
5230 until all the loops have been transformed? */
5231 update_ssa (TODO_update_ssa
);
5233 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS
))
5234 fprintf (vect_dump
, "LOOP VECTORIZED.");