1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (tree
, block_stmt_iterator
*, bool *);
50 static tree
vect_create_destination_var (tree
, tree
);
51 static tree vect_create_data_ref_ptr
52 (tree
, block_stmt_iterator
*, tree
, tree
*, tree
*, bool, tree
);
53 static tree
vect_create_addr_base_for_vector_ref (tree
, tree
*, tree
);
54 static tree
vect_setup_realignment (tree
, block_stmt_iterator
*, tree
*);
55 static tree
vect_get_new_vect_var (tree
, enum vect_var_kind
, const char *);
56 static tree
vect_get_vec_def_for_operand (tree
, tree
, tree
*);
57 static tree
vect_init_vector (tree
, tree
, tree
);
58 static void vect_finish_stmt_generation
59 (tree stmt
, tree vec_stmt
, block_stmt_iterator
*bsi
);
60 static bool vect_is_simple_cond (tree
, loop_vec_info
);
61 static void update_vuses_to_preheader (tree
, struct loop
*);
62 static void vect_create_epilog_for_reduction (tree
, tree
, enum tree_code
, tree
);
63 static tree
get_initial_def_for_reduction (tree
, tree
, tree
*);
65 /* Utility function dealing with loop peeling (not peeling itself). */
66 static void vect_generate_tmps_on_preheader
67 (loop_vec_info
, tree
*, tree
*, tree
*);
68 static tree
vect_build_loop_niters (loop_vec_info
);
69 static void vect_update_ivs_after_vectorizer (loop_vec_info
, tree
, edge
);
70 static tree
vect_gen_niters_for_prolog_loop (loop_vec_info
, tree
);
71 static void vect_update_init_of_dr (struct data_reference
*, tree niters
);
72 static void vect_update_inits_of_drs (loop_vec_info
, tree
);
73 static int vect_min_worthwhile_factor (enum tree_code
);
76 /* Function vect_get_new_vect_var.
78 Returns a name for a new variable. The current naming scheme appends the
79 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
80 the name of vectorizer generated variables, and appends that to NAME if
84 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
97 case vect_pointer_var
:
105 new_vect_var
= create_tmp_var (type
, concat (prefix
, name
, NULL
));
107 new_vect_var
= create_tmp_var (type
, prefix
);
109 /* Mark vector typed variable as a gimple register variable. */
110 if (TREE_CODE (type
) == VECTOR_TYPE
)
111 DECL_GIMPLE_REG_P (new_vect_var
) = true;
117 /* Function vect_create_addr_base_for_vector_ref.
119 Create an expression that computes the address of the first memory location
120 that will be accessed for a data reference.
123 STMT: The statement containing the data reference.
124 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
125 OFFSET: Optional. If supplied, it is be added to the initial address.
128 1. Return an SSA_NAME whose value is the address of the memory location of
129 the first vector of the data reference.
130 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
131 these statement(s) which define the returned SSA_NAME.
133 FORNOW: We are only handling array accesses with step 1. */
136 vect_create_addr_base_for_vector_ref (tree stmt
,
140 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
141 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
142 tree data_ref_base
= unshare_expr (DR_BASE_ADDRESS (dr
));
143 tree base_name
= build_fold_indirect_ref (data_ref_base
);
145 tree addr_base
, addr_expr
;
147 tree base_offset
= unshare_expr (DR_OFFSET (dr
));
148 tree init
= unshare_expr (DR_INIT (dr
));
149 tree vect_ptr_type
, addr_expr2
;
151 /* Create base_offset */
152 base_offset
= size_binop (PLUS_EXPR
, base_offset
, init
);
153 dest
= create_tmp_var (TREE_TYPE (base_offset
), "base_off");
154 add_referenced_var (dest
);
155 base_offset
= force_gimple_operand (base_offset
, &new_stmt
, false, dest
);
156 append_to_statement_list_force (new_stmt
, new_stmt_list
);
160 tree tmp
= create_tmp_var (TREE_TYPE (base_offset
), "offset");
163 /* For interleaved access step we divide STEP by the size of the
164 interleaving group. */
165 if (DR_GROUP_SIZE (stmt_info
))
166 step
= fold_build2 (TRUNC_DIV_EXPR
, TREE_TYPE (offset
), DR_STEP (dr
),
167 build_int_cst (TREE_TYPE (offset
),
168 DR_GROUP_SIZE (stmt_info
)));
172 add_referenced_var (tmp
);
173 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (offset
), offset
, step
);
174 base_offset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base_offset
),
175 base_offset
, offset
);
176 base_offset
= force_gimple_operand (base_offset
, &new_stmt
, false, tmp
);
177 append_to_statement_list_force (new_stmt
, new_stmt_list
);
180 /* base + base_offset */
181 addr_base
= fold_build2 (PLUS_EXPR
, TREE_TYPE (data_ref_base
), data_ref_base
,
184 vect_ptr_type
= build_pointer_type (STMT_VINFO_VECTYPE (stmt_info
));
186 /* addr_expr = addr_base */
187 addr_expr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
188 get_name (base_name
));
189 add_referenced_var (addr_expr
);
190 vec_stmt
= fold_convert (vect_ptr_type
, addr_base
);
191 addr_expr2
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
192 get_name (base_name
));
193 add_referenced_var (addr_expr2
);
194 vec_stmt
= force_gimple_operand (vec_stmt
, &new_stmt
, false, addr_expr2
);
195 append_to_statement_list_force (new_stmt
, new_stmt_list
);
197 if (vect_print_dump_info (REPORT_DETAILS
))
199 fprintf (vect_dump
, "created ");
200 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
206 /* Function vect_create_data_ref_ptr.
208 Create a new pointer to vector type (vp), that points to the first location
209 accessed in the loop by STMT, along with the def-use update chain to
210 appropriately advance the pointer through the loop iterations. Also set
211 aliasing information for the pointer. This vector pointer is used by the
212 callers to this function to create a memory reference expression for vector
216 1. STMT: a stmt that references memory. Expected to be of the form
217 GIMPLE_MODIFY_STMT <name, data-ref> or
218 GIMPLE_MODIFY_STMT <data-ref, name>.
219 2. BSI: block_stmt_iterator where new stmts can be added.
220 3. OFFSET (optional): an offset to be added to the initial address accessed
221 by the data-ref in STMT.
222 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
223 pointing to the initial address.
224 5. TYPE: if not NULL indicates the required type of the data-ref
227 1. Declare a new ptr to vector_type, and have it point to the base of the
228 data reference (initial addressed accessed by the data reference).
229 For example, for vector of type V8HI, the following code is generated:
232 vp = (v8hi *)initial_address;
234 if OFFSET is not supplied:
235 initial_address = &a[init];
236 if OFFSET is supplied:
237 initial_address = &a[init + OFFSET];
239 Return the initial_address in INITIAL_ADDRESS.
241 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
242 update the pointer in each iteration of the loop.
244 Return the increment stmt that updates the pointer in PTR_INCR.
246 3. Return the pointer. */
249 vect_create_data_ref_ptr (tree stmt
,
250 block_stmt_iterator
*bsi ATTRIBUTE_UNUSED
,
251 tree offset
, tree
*initial_address
, tree
*ptr_incr
,
252 bool only_init
, tree type
)
255 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
256 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
257 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
258 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
264 tree new_stmt_list
= NULL_TREE
;
265 edge pe
= loop_preheader_edge (loop
);
268 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
270 base_name
= build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr
)));
272 if (vect_print_dump_info (REPORT_DETAILS
))
274 tree data_ref_base
= base_name
;
275 fprintf (vect_dump
, "create vector-pointer variable to type: ");
276 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
277 if (TREE_CODE (data_ref_base
) == VAR_DECL
)
278 fprintf (vect_dump
, " vectorizing a one dimensional array ref: ");
279 else if (TREE_CODE (data_ref_base
) == ARRAY_REF
)
280 fprintf (vect_dump
, " vectorizing a multidimensional array ref: ");
281 else if (TREE_CODE (data_ref_base
) == COMPONENT_REF
)
282 fprintf (vect_dump
, " vectorizing a record based array ref: ");
283 else if (TREE_CODE (data_ref_base
) == SSA_NAME
)
284 fprintf (vect_dump
, " vectorizing a pointer ref: ");
285 print_generic_expr (vect_dump
, base_name
, TDF_SLIM
);
288 /** (1) Create the new vector-pointer variable: **/
290 vect_ptr_type
= build_pointer_type (type
);
292 vect_ptr_type
= build_pointer_type (vectype
);
293 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
294 get_name (base_name
));
295 add_referenced_var (vect_ptr
);
297 /** (2) Add aliasing information to the new vector-pointer:
298 (The points-to info (DR_PTR_INFO) may be defined later.) **/
300 tag
= DR_MEMTAG (dr
);
303 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
304 tag must be created with tag added to its may alias list. */
306 new_type_alias (vect_ptr
, tag
, DR_REF (dr
));
308 set_symbol_mem_tag (vect_ptr
, tag
);
310 var_ann (vect_ptr
)->subvars
= DR_SUBVARS (dr
);
312 /** (3) Calculate the initial address the vector-pointer, and set
313 the vector-pointer to point to it before the loop: **/
315 /* Create: (&(base[init_val+offset]) in the loop preheader. */
316 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
318 pe
= loop_preheader_edge (loop
);
319 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt_list
);
320 gcc_assert (!new_bb
);
321 *initial_address
= new_temp
;
323 /* Create: p = (vectype *) initial_base */
324 vec_stmt
= fold_convert (vect_ptr_type
, new_temp
);
325 vec_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, vect_ptr
, vec_stmt
);
326 vect_ptr_init
= make_ssa_name (vect_ptr
, vec_stmt
);
327 GIMPLE_STMT_OPERAND (vec_stmt
, 0) = vect_ptr_init
;
328 new_bb
= bsi_insert_on_edge_immediate (pe
, vec_stmt
);
329 gcc_assert (!new_bb
);
332 /** (4) Handle the updating of the vector-pointer inside the loop: **/
334 if (only_init
) /* No update in loop is required. */
336 /* Copy the points-to information if it exists. */
337 if (DR_PTR_INFO (dr
))
338 duplicate_ssa_name_ptr_info (vect_ptr_init
, DR_PTR_INFO (dr
));
339 return vect_ptr_init
;
343 block_stmt_iterator incr_bsi
;
345 tree indx_before_incr
, indx_after_incr
;
348 standard_iv_increment_position (loop
, &incr_bsi
, &insert_after
);
349 create_iv (vect_ptr_init
,
350 fold_convert (vect_ptr_type
, TYPE_SIZE_UNIT (vectype
)),
351 NULL_TREE
, loop
, &incr_bsi
, insert_after
,
352 &indx_before_incr
, &indx_after_incr
);
353 incr
= bsi_stmt (incr_bsi
);
354 set_stmt_info (stmt_ann (incr
),
355 new_stmt_vec_info (incr
, loop_vinfo
));
357 /* Copy the points-to information if it exists. */
358 if (DR_PTR_INFO (dr
))
360 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
361 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
363 merge_alias_info (vect_ptr_init
, indx_before_incr
);
364 merge_alias_info (vect_ptr_init
, indx_after_incr
);
368 return indx_before_incr
;
373 /* Function bump_vector_ptr
375 Increment a pointer (to a vector type) by vector-size. Connect the new
376 increment stmt to the existing def-use update-chain of the pointer.
378 The pointer def-use update-chain before this function:
379 DATAREF_PTR = phi (p_0, p_2)
381 PTR_INCR: p_2 = DATAREF_PTR + step
383 The pointer def-use update-chain after this function:
384 DATAREF_PTR = phi (p_0, p_2)
386 NEW_DATAREF_PTR = DATAREF_PTR + vector_size
388 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
391 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
393 PTR_INCR - the stmt that updates the pointer in each iteration of the loop.
394 The increment amount across iterations is also expected to be
396 BSI - location where the new update stmt is to be placed.
397 STMT - the original scalar memory-access stmt that is being vectorized.
399 Output: Return NEW_DATAREF_PTR as illustrated above.
404 bump_vector_ptr (tree dataref_ptr
, tree ptr_incr
, block_stmt_iterator
*bsi
,
407 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
408 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
409 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
410 tree vptr_type
= TREE_TYPE (dataref_ptr
);
411 tree ptr_var
= SSA_NAME_VAR (dataref_ptr
);
412 tree update
= fold_convert (vptr_type
, TYPE_SIZE_UNIT (vectype
));
416 tree new_dataref_ptr
;
418 incr_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, ptr_var
,
419 build2 (PLUS_EXPR
, vptr_type
, dataref_ptr
, update
));
420 new_dataref_ptr
= make_ssa_name (ptr_var
, incr_stmt
);
421 GIMPLE_STMT_OPERAND (incr_stmt
, 0) = new_dataref_ptr
;
422 vect_finish_stmt_generation (stmt
, incr_stmt
, bsi
);
424 /* Update the vector-pointer's cross-iteration increment. */
425 FOR_EACH_SSA_USE_OPERAND (use_p
, ptr_incr
, iter
, SSA_OP_USE
)
427 tree use
= USE_FROM_PTR (use_p
);
429 if (use
== dataref_ptr
)
430 SET_USE (use_p
, new_dataref_ptr
);
432 gcc_assert (tree_int_cst_compare (use
, update
) == 0);
435 /* Copy the points-to information if it exists. */
436 if (DR_PTR_INFO (dr
))
437 duplicate_ssa_name_ptr_info (new_dataref_ptr
, DR_PTR_INFO (dr
));
438 merge_alias_info (new_dataref_ptr
, dataref_ptr
);
440 return new_dataref_ptr
;
444 /* Function vect_create_destination_var.
446 Create a new temporary of type VECTYPE. */
449 vect_create_destination_var (tree scalar_dest
, tree vectype
)
452 const char *new_name
;
454 enum vect_var_kind kind
;
456 kind
= vectype
? vect_simple_var
: vect_scalar_var
;
457 type
= vectype
? vectype
: TREE_TYPE (scalar_dest
);
459 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
461 new_name
= get_name (scalar_dest
);
464 vec_dest
= vect_get_new_vect_var (type
, vect_simple_var
, new_name
);
465 add_referenced_var (vec_dest
);
471 /* Function vect_init_vector.
473 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
474 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
475 used in the vectorization of STMT. */
478 vect_init_vector (tree stmt
, tree vector_var
, tree vector_type
)
480 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
481 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
482 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
490 new_var
= vect_get_new_vect_var (vector_type
, vect_simple_var
, "cst_");
491 add_referenced_var (new_var
);
493 init_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, new_var
, vector_var
);
494 new_temp
= make_ssa_name (new_var
, init_stmt
);
495 GIMPLE_STMT_OPERAND (init_stmt
, 0) = new_temp
;
497 pe
= loop_preheader_edge (loop
);
498 new_bb
= bsi_insert_on_edge_immediate (pe
, init_stmt
);
499 gcc_assert (!new_bb
);
501 if (vect_print_dump_info (REPORT_DETAILS
))
503 fprintf (vect_dump
, "created new init_stmt: ");
504 print_generic_expr (vect_dump
, init_stmt
, TDF_SLIM
);
507 vec_oprnd
= GIMPLE_STMT_OPERAND (init_stmt
, 0);
512 /* Function vect_get_vec_def_for_operand.
514 OP is an operand in STMT. This function returns a (vector) def that will be
515 used in the vectorized stmt for STMT.
517 In the case that OP is an SSA_NAME which is defined in the loop, then
518 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
520 In case OP is an invariant or constant, a new stmt that creates a vector def
521 needs to be introduced. */
524 vect_get_vec_def_for_operand (tree op
, tree stmt
, tree
*scalar_def
)
529 stmt_vec_info def_stmt_info
= NULL
;
530 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
531 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
532 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
533 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
534 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
540 enum vect_def_type dt
;
544 if (vect_print_dump_info (REPORT_DETAILS
))
546 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
547 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
550 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
551 gcc_assert (is_simple_use
);
552 if (vect_print_dump_info (REPORT_DETAILS
))
556 fprintf (vect_dump
, "def = ");
557 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
561 fprintf (vect_dump
, " def_stmt = ");
562 print_generic_expr (vect_dump
, def_stmt
, TDF_SLIM
);
568 /* Case 1: operand is a constant. */
569 case vect_constant_def
:
574 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
575 if (vect_print_dump_info (REPORT_DETAILS
))
576 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
578 for (i
= nunits
- 1; i
>= 0; --i
)
580 t
= tree_cons (NULL_TREE
, op
, t
);
582 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
583 vec_cst
= build_vector (vector_type
, t
);
585 return vect_init_vector (stmt
, vec_cst
, vector_type
);
588 /* Case 2: operand is defined outside the loop - loop invariant. */
589 case vect_invariant_def
:
594 /* Create 'vec_inv = {inv,inv,..,inv}' */
595 if (vect_print_dump_info (REPORT_DETAILS
))
596 fprintf (vect_dump
, "Create vector_inv.");
598 for (i
= nunits
- 1; i
>= 0; --i
)
600 t
= tree_cons (NULL_TREE
, def
, t
);
603 /* FIXME: use build_constructor directly. */
604 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
605 vec_inv
= build_constructor_from_list (vector_type
, t
);
606 return vect_init_vector (stmt
, vec_inv
, vector_type
);
609 /* Case 3: operand is defined inside the loop. */
613 *scalar_def
= def_stmt
;
615 /* Get the def from the vectorized stmt. */
616 def_stmt_info
= vinfo_for_stmt (def_stmt
);
617 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
618 gcc_assert (vec_stmt
);
619 vec_oprnd
= GIMPLE_STMT_OPERAND (vec_stmt
, 0);
623 /* Case 4: operand is defined by a loop header phi - reduction */
624 case vect_reduction_def
:
626 gcc_assert (TREE_CODE (def_stmt
) == PHI_NODE
);
628 /* Get the def before the loop */
629 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
630 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
633 /* Case 5: operand is defined by loop-header phi - induction. */
634 case vect_induction_def
:
636 if (vect_print_dump_info (REPORT_DETAILS
))
637 fprintf (vect_dump
, "induction - unsupported.");
638 internal_error ("no support for induction"); /* FORNOW */
647 /* Function vect_get_vec_def_for_stmt_copy
649 Return a vector-def for an operand. This function is used when the
650 vectorized stmt to be created (by the caller to this function) is a "copy"
651 created in case the vectorized result cannot fit in one vector, and several
652 copies of the vector-stmt are required. In this case the vector-def is
653 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
654 of the stmt that defines VEC_OPRND.
655 DT is the type of the vector def VEC_OPRND.
658 In case the vectorization factor (VF) is bigger than the number
659 of elements that can fit in a vectype (nunits), we have to generate
660 more than one vector stmt to vectorize the scalar stmt. This situation
661 arises when there are multiple data-types operated upon in the loop; the
662 smallest data-type determines the VF, and as a result, when vectorizing
663 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
664 vector stmt (each computing a vector of 'nunits' results, and together
665 computing 'VF' results in each iteration). This function is called when
666 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
667 which VF=16 and nuniti=4, so the number of copies required is 4):
669 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
671 S1: x = load VS1.0: vx.0 = memref0 VS1.1
672 VS1.1: vx.1 = memref1 VS1.2
673 VS1.2: vx.2 = memref2 VS1.3
674 VS1.3: vx.3 = memref3
676 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
677 VSnew.1: vz1 = vx.1 + ... VSnew.2
678 VSnew.2: vz2 = vx.2 + ... VSnew.3
679 VSnew.3: vz3 = vx.3 + ...
681 The vectorization of S1 is explained in vectorizable_load.
682 The vectorization of S2:
683 To create the first vector-stmt out of the 4 copies - VSnew.0 -
684 the function 'vect_get_vec_def_for_operand' is called to
685 get the relevant vector-def for each operand of S2. For operand x it
686 returns the vector-def 'vx.0'.
688 To create the remaining copies of the vector-stmt (VSnew.j), this
689 function is called to get the relevant vector-def for each operand. It is
690 obtained from the respective VS1.j stmt, which is recorded in the
691 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
693 For example, to obtain the vector-def 'vx.1' in order to create the
694 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
695 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
696 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
697 and return its def ('vx.1').
698 Overall, to create the above sequence this function will be called 3 times:
699 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
700 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
701 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
704 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
706 tree vec_stmt_for_operand
;
707 stmt_vec_info def_stmt_info
;
709 if (dt
== vect_invariant_def
|| dt
== vect_constant_def
)
711 /* Do nothing; can reuse same def. */ ;
715 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
716 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
717 gcc_assert (def_stmt_info
);
718 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
719 gcc_assert (vec_stmt_for_operand
);
720 vec_oprnd
= GIMPLE_STMT_OPERAND (vec_stmt_for_operand
, 0);
726 /* Function vect_finish_stmt_generation.
728 Insert a new stmt. */
731 vect_finish_stmt_generation (tree stmt
, tree vec_stmt
,
732 block_stmt_iterator
*bsi
)
734 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
735 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
737 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
738 set_stmt_info (get_stmt_ann (vec_stmt
),
739 new_stmt_vec_info (vec_stmt
, loop_vinfo
));
741 if (vect_print_dump_info (REPORT_DETAILS
))
743 fprintf (vect_dump
, "add new stmt: ");
744 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
747 /* Make sure bsi points to the stmt that is being vectorized. */
748 gcc_assert (stmt
== bsi_stmt (*bsi
));
750 #ifdef USE_MAPPED_LOCATION
751 SET_EXPR_LOCATION (vec_stmt
, EXPR_LOCATION (stmt
));
753 SET_EXPR_LOCUS (vec_stmt
, EXPR_LOCUS (stmt
));
758 #define ADJUST_IN_EPILOG 1
760 /* Function get_initial_def_for_reduction
763 STMT - a stmt that performs a reduction operation in the loop.
764 INIT_VAL - the initial value of the reduction variable
767 SCALAR_DEF - a tree that holds a value to be added to the final result
768 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
769 Return a vector variable, initialized according to the operation that STMT
770 performs. This vector will be used as the initial value of the
771 vector of partial results.
773 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
776 min/max: [init_val,init_val,..,init_val,init_val]
777 bit and/or: [init_val,init_val,..,init_val,init_val]
778 and when necessary (e.g. add/mult case) let the caller know
779 that it needs to adjust the result by init_val.
781 Option2: Initialize the vector as follows:
782 add: [0,0,...,0,init_val]
783 mult: [1,1,...,1,init_val]
784 min/max: [init_val,init_val,...,init_val]
785 bit and/or: [init_val,init_val,...,init_val]
786 and no adjustments are needed.
788 For example, for the following code:
794 STMT is 's = s + a[i]', and the reduction variable is 's'.
795 For a vector of 4 units, we want to return either [0,0,0,init_val],
796 or [0,0,0,0] and let the caller know that it needs to adjust
797 the result at the end by 'init_val'.
799 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
800 TODO: Use some cost-model to estimate which scheme is more profitable.
804 get_initial_def_for_reduction (tree stmt
, tree init_val
, tree
*scalar_def
)
806 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
807 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
808 int nunits
= GET_MODE_NUNITS (TYPE_MODE (vectype
));
810 enum tree_code code
= TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 1));
811 tree type
= TREE_TYPE (init_val
);
813 tree vec
, t
= NULL_TREE
;
814 bool need_epilog_adjust
;
818 gcc_assert (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
));
825 if (INTEGRAL_TYPE_P (type
))
826 def
= build_int_cst (type
, 0);
828 def
= build_real (type
, dconst0
);
830 #ifdef ADJUST_IN_EPILOG
831 /* All the 'nunits' elements are set to 0. The final result will be
832 adjusted by 'init_val' at the loop epilog. */
834 need_epilog_adjust
= true;
836 /* 'nunits - 1' elements are set to 0; The last element is set to
837 'init_val'. No further adjustments at the epilog are needed. */
838 nelements
= nunits
- 1;
839 need_epilog_adjust
= false;
847 need_epilog_adjust
= false;
854 for (i
= nelements
- 1; i
>= 0; --i
)
855 t
= tree_cons (NULL_TREE
, def
, t
);
857 if (nelements
== nunits
- 1)
859 /* Set the last element of the vector. */
860 t
= tree_cons (NULL_TREE
, init_val
, t
);
863 gcc_assert (nelements
== nunits
);
865 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
866 if (TREE_CODE (init_val
) == INTEGER_CST
|| TREE_CODE (init_val
) == REAL_CST
)
867 vec
= build_vector (vector_type
, t
);
869 vec
= build_constructor_from_list (vector_type
, t
);
871 if (!need_epilog_adjust
)
872 *scalar_def
= NULL_TREE
;
874 *scalar_def
= init_val
;
876 return vect_init_vector (stmt
, vec
, vector_type
);
880 /* Function vect_create_epilog_for_reduction
882 Create code at the loop-epilog to finalize the result of a reduction
885 VECT_DEF is a vector of partial results.
886 REDUC_CODE is the tree-code for the epilog reduction.
887 STMT is the scalar reduction stmt that is being vectorized.
888 REDUCTION_PHI is the phi-node that carries the reduction computation.
891 1. Creates the reduction def-use cycle: sets the the arguments for
893 The loop-entry argument is the vectorized initial-value of the reduction.
894 The loop-latch argument is VECT_DEF - the vector of partial sums.
895 2. "Reduces" the vector of partial results VECT_DEF into a single result,
896 by applying the operation specified by REDUC_CODE if available, or by
897 other means (whole-vector shifts or a scalar loop).
898 The function also creates a new phi node at the loop exit to preserve
899 loop-closed form, as illustrated below.
901 The flow at the entry to this function:
904 vec_def = phi <null, null> # REDUCTION_PHI
905 VECT_DEF = vector_stmt # vectorized form of STMT
906 s_loop = scalar_stmt # (scalar) STMT
908 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
912 The above is transformed by this function into:
915 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
916 VECT_DEF = vector_stmt # vectorized form of STMT
917 s_loop = scalar_stmt # (scalar) STMT
919 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
920 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
921 v_out2 = reduce <v_out1>
922 s_out3 = extract_field <v_out2, 0>
923 s_out4 = adjust_result <s_out3>
929 vect_create_epilog_for_reduction (tree vect_def
, tree stmt
,
930 enum tree_code reduc_code
, tree reduction_phi
)
932 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
934 enum machine_mode mode
;
935 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
936 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
941 block_stmt_iterator exit_bsi
;
946 tree new_scalar_dest
, exit_phi
;
947 tree bitsize
, bitpos
, bytesize
;
948 enum tree_code code
= TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 1));
949 tree scalar_initial_def
;
950 tree vec_initial_def
;
952 imm_use_iterator imm_iter
;
954 bool extract_scalar_result
;
958 tree operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
961 op_type
= TREE_CODE_LENGTH (TREE_CODE (operation
));
962 reduction_op
= TREE_OPERAND (operation
, op_type
-1);
963 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
964 mode
= TYPE_MODE (vectype
);
966 /*** 1. Create the reduction def-use cycle ***/
968 /* 1.1 set the loop-entry arg of the reduction-phi: */
969 /* For the case of reduction, vect_get_vec_def_for_operand returns
970 the scalar def before the loop, that defines the initial value
971 of the reduction variable. */
972 vec_initial_def
= vect_get_vec_def_for_operand (reduction_op
, stmt
,
973 &scalar_initial_def
);
974 add_phi_arg (reduction_phi
, vec_initial_def
, loop_preheader_edge (loop
));
976 /* 1.2 set the loop-latch arg for the reduction-phi: */
977 add_phi_arg (reduction_phi
, vect_def
, loop_latch_edge (loop
));
979 if (vect_print_dump_info (REPORT_DETAILS
))
981 fprintf (vect_dump
, "transform reduction: created def-use cycle:");
982 print_generic_expr (vect_dump
, reduction_phi
, TDF_SLIM
);
983 fprintf (vect_dump
, "\n");
984 print_generic_expr (vect_dump
, SSA_NAME_DEF_STMT (vect_def
), TDF_SLIM
);
988 /*** 2. Create epilog code
989 The reduction epilog code operates across the elements of the vector
990 of partial results computed by the vectorized loop.
991 The reduction epilog code consists of:
992 step 1: compute the scalar result in a vector (v_out2)
993 step 2: extract the scalar result (s_out3) from the vector (v_out2)
994 step 3: adjust the scalar result (s_out3) if needed.
996 Step 1 can be accomplished using one the following three schemes:
997 (scheme 1) using reduc_code, if available.
998 (scheme 2) using whole-vector shifts, if available.
999 (scheme 3) using a scalar loop. In this case steps 1+2 above are
1002 The overall epilog code looks like this:
1004 s_out0 = phi <s_loop> # original EXIT_PHI
1005 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1006 v_out2 = reduce <v_out1> # step 1
1007 s_out3 = extract_field <v_out2, 0> # step 2
1008 s_out4 = adjust_result <s_out3> # step 3
1010 (step 3 is optional, and step2 1 and 2 may be combined).
1011 Lastly, the uses of s_out0 are replaced by s_out4.
1015 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
1016 v_out1 = phi <v_loop> */
1018 exit_bb
= single_exit (loop
)->dest
;
1019 new_phi
= create_phi_node (SSA_NAME_VAR (vect_def
), exit_bb
);
1020 SET_PHI_ARG_DEF (new_phi
, single_exit (loop
)->dest_idx
, vect_def
);
1021 exit_bsi
= bsi_start (exit_bb
);
1023 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
1024 (i.e. when reduc_code is not available) and in the final adjustment code
1025 (if needed). Also get the original scalar reduction variable as
1026 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
1027 represents a reduction pattern), the tree-code and scalar-def are
1028 taken from the original stmt that the pattern-stmt (STMT) replaces.
1029 Otherwise (it is a regular reduction) - the tree-code and scalar-def
1030 are taken from STMT. */
1032 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
1035 /* Regular reduction */
1040 /* Reduction pattern */
1041 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt
);
1042 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
));
1043 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
1045 code
= TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt
, 1));
1046 scalar_dest
= GIMPLE_STMT_OPERAND (orig_stmt
, 0);
1047 scalar_type
= TREE_TYPE (scalar_dest
);
1048 new_scalar_dest
= vect_create_destination_var (scalar_dest
, NULL
);
1049 bitsize
= TYPE_SIZE (scalar_type
);
1050 bytesize
= TYPE_SIZE_UNIT (scalar_type
);
1052 /* 2.3 Create the reduction code, using one of the three schemes described
1055 if (reduc_code
< NUM_TREE_CODES
)
1057 /*** Case 1: Create:
1058 v_out2 = reduc_expr <v_out1> */
1060 if (vect_print_dump_info (REPORT_DETAILS
))
1061 fprintf (vect_dump
, "Reduce using direct vector reduction.");
1063 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1064 epilog_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, vec_dest
,
1065 build1 (reduc_code
, vectype
, PHI_RESULT (new_phi
)));
1066 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
1067 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
1068 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1070 extract_scalar_result
= true;
1074 enum tree_code shift_code
= 0;
1075 bool have_whole_vector_shift
= true;
1077 int element_bitsize
= tree_low_cst (bitsize
, 1);
1078 int vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
1081 if (vec_shr_optab
->handlers
[mode
].insn_code
!= CODE_FOR_nothing
)
1082 shift_code
= VEC_RSHIFT_EXPR
;
1084 have_whole_vector_shift
= false;
1086 /* Regardless of whether we have a whole vector shift, if we're
1087 emulating the operation via tree-vect-generic, we don't want
1088 to use it. Only the first round of the reduction is likely
1089 to still be profitable via emulation. */
1090 /* ??? It might be better to emit a reduction tree code here, so that
1091 tree-vect-generic can expand the first round via bit tricks. */
1092 if (!VECTOR_MODE_P (mode
))
1093 have_whole_vector_shift
= false;
1096 optab optab
= optab_for_tree_code (code
, vectype
);
1097 if (optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
1098 have_whole_vector_shift
= false;
1101 if (have_whole_vector_shift
)
1103 /*** Case 2: Create:
1104 for (offset = VS/2; offset >= element_size; offset/=2)
1106 Create: va' = vec_shift <va, offset>
1107 Create: va = vop <va, va'>
1110 if (vect_print_dump_info (REPORT_DETAILS
))
1111 fprintf (vect_dump
, "Reduce using vector shifts");
1113 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1114 new_temp
= PHI_RESULT (new_phi
);
1116 for (bit_offset
= vec_size_in_bits
/2;
1117 bit_offset
>= element_bitsize
;
1120 tree bitpos
= size_int (bit_offset
);
1122 epilog_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
,
1124 build2 (shift_code
, vectype
,
1126 new_name
= make_ssa_name (vec_dest
, epilog_stmt
);
1127 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_name
;
1128 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1130 epilog_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
,
1132 build2 (code
, vectype
,
1133 new_name
, new_temp
));
1134 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
1135 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
1136 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1139 extract_scalar_result
= true;
1145 /*** Case 3: Create:
1146 s = extract_field <v_out2, 0>
1147 for (offset = element_size;
1148 offset < vector_size;
1149 offset += element_size;)
1151 Create: s' = extract_field <v_out2, offset>
1152 Create: s = op <s, s'>
1155 if (vect_print_dump_info (REPORT_DETAILS
))
1156 fprintf (vect_dump
, "Reduce using scalar code. ");
1158 vec_temp
= PHI_RESULT (new_phi
);
1159 vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
1160 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
1162 BIT_FIELD_REF_UNSIGNED (rhs
) = TYPE_UNSIGNED (scalar_type
);
1163 epilog_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
,
1164 new_scalar_dest
, rhs
);
1165 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1166 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
1167 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1169 for (bit_offset
= element_bitsize
;
1170 bit_offset
< vec_size_in_bits
;
1171 bit_offset
+= element_bitsize
)
1173 tree bitpos
= bitsize_int (bit_offset
);
1174 tree rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
1177 BIT_FIELD_REF_UNSIGNED (rhs
) = TYPE_UNSIGNED (scalar_type
);
1178 epilog_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
,
1179 new_scalar_dest
, rhs
);
1180 new_name
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1181 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_name
;
1182 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1184 epilog_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
,
1186 build2 (code
, scalar_type
, new_name
, new_temp
));
1187 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1188 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
1189 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1192 extract_scalar_result
= false;
1196 /* 2.4 Extract the final scalar result. Create:
1197 s_out3 = extract_field <v_out2, bitpos> */
1199 if (extract_scalar_result
)
1203 if (vect_print_dump_info (REPORT_DETAILS
))
1204 fprintf (vect_dump
, "extract scalar result");
1206 if (BYTES_BIG_ENDIAN
)
1207 bitpos
= size_binop (MULT_EXPR
,
1208 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1),
1209 TYPE_SIZE (scalar_type
));
1211 bitpos
= bitsize_zero_node
;
1213 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, new_temp
, bitsize
, bitpos
);
1214 BIT_FIELD_REF_UNSIGNED (rhs
) = TYPE_UNSIGNED (scalar_type
);
1215 epilog_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
,
1216 new_scalar_dest
, rhs
);
1217 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1218 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
1219 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1222 /* 2.4 Adjust the final result by the initial value of the reduction
1223 variable. (When such adjustment is not needed, then
1224 'scalar_initial_def' is zero).
1227 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1229 if (scalar_initial_def
)
1231 epilog_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
,
1233 build2 (code
, scalar_type
, new_temp
, scalar_initial_def
));
1234 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1235 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
1236 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1239 /* 2.6 Replace uses of s_out0 with uses of s_out3 */
1241 /* Find the loop-closed-use at the loop exit of the original scalar result.
1242 (The reduction result is expected to have two immediate uses - one at the
1243 latch block, and one at the loop exit). */
1245 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
1247 if (!flow_bb_inside_loop_p (loop
, bb_for_stmt (USE_STMT (use_p
))))
1249 exit_phi
= USE_STMT (use_p
);
1253 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1254 gcc_assert (exit_phi
);
1255 /* Replace the uses: */
1256 orig_name
= PHI_RESULT (exit_phi
);
1257 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, orig_name
)
1258 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
1259 SET_USE (use_p
, new_temp
);
1263 /* Function vectorizable_reduction.
1265 Check if STMT performs a reduction operation that can be vectorized.
1266 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1267 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1268 Return FALSE if not a vectorizable STMT, TRUE otherwise.
1270 This function also handles reduction idioms (patterns) that have been
1271 recognized in advance during vect_pattern_recog. In this case, STMT may be
1273 X = pattern_expr (arg0, arg1, ..., X)
1274 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1275 sequence that had been detected and replaced by the pattern-stmt (STMT).
1277 In some cases of reduction patterns, the type of the reduction variable X is
1278 different than the type of the other arguments of STMT.
1279 In such cases, the vectype that is used when transforming STMT into a vector
1280 stmt is different than the vectype that is used to determine the
1281 vectorization factor, because it consists of a different number of elements
1282 than the actual number of elements that are being operated upon in parallel.
1284 For example, consider an accumulation of shorts into an int accumulator.
1285 On some targets it's possible to vectorize this pattern operating on 8
1286 shorts at a time (hence, the vectype for purposes of determining the
1287 vectorization factor should be V8HI); on the other hand, the vectype that
1288 is used to create the vector form is actually V4SI (the type of the result).
1290 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1291 indicates what is the actual level of parallelism (V8HI in the example), so
1292 that the right vectorization factor would be derived. This vectype
1293 corresponds to the type of arguments to the reduction stmt, and should *NOT*
1294 be used to create the vectorized stmt. The right vectype for the vectorized
1295 stmt is obtained from the type of the result X:
1296 get_vectype_for_scalar_type (TREE_TYPE (X))
1298 This means that, contrary to "regular" reductions (or "regular" stmts in
1299 general), the following equation:
1300 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1301 does *NOT* necessarily hold for reduction patterns. */
1304 vectorizable_reduction (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1309 tree loop_vec_def0
= NULL_TREE
, loop_vec_def1
= NULL_TREE
;
1310 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1311 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1312 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1313 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1315 enum tree_code code
, orig_code
, epilog_reduc_code
= 0;
1316 enum machine_mode vec_mode
;
1318 optab optab
, reduc_optab
;
1319 tree new_temp
= NULL_TREE
;
1321 enum vect_def_type dt
;
1326 stmt_vec_info orig_stmt_info
;
1327 tree expr
= NULL_TREE
;
1329 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1330 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1331 stmt_vec_info prev_stmt_info
;
1333 tree new_stmt
= NULL_TREE
;
1336 gcc_assert (ncopies
>= 1);
1338 /* 1. Is vectorizable reduction? */
1340 /* Not supportable if the reduction variable is used in the loop. */
1341 if (STMT_VINFO_RELEVANT_P (stmt_info
))
1344 if (!STMT_VINFO_LIVE_P (stmt_info
))
1347 /* Make sure it was already recognized as a reduction computation. */
1348 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
1351 /* 2. Has this been recognized as a reduction pattern?
1353 Check if STMT represents a pattern that has been recognized
1354 in earlier analysis stages. For stmts that represent a pattern,
1355 the STMT_VINFO_RELATED_STMT field records the last stmt in
1356 the original sequence that constitutes the pattern. */
1358 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
1361 orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
1362 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
) == stmt
);
1363 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info
));
1364 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info
));
1367 /* 3. Check the operands of the operation. The first operands are defined
1368 inside the loop body. The last operand is the reduction variable,
1369 which is defined by the loop-header-phi. */
1371 gcc_assert (TREE_CODE (stmt
) == GIMPLE_MODIFY_STMT
);
1373 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
1374 code
= TREE_CODE (operation
);
1375 op_type
= TREE_CODE_LENGTH (code
);
1376 if (op_type
!= binary_op
&& op_type
!= ternary_op
)
1378 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
1379 scalar_type
= TREE_TYPE (scalar_dest
);
1381 /* All uses but the last are expected to be defined in the loop.
1382 The last use is the reduction variable. */
1383 for (i
= 0; i
< op_type
-1; i
++)
1385 op
= TREE_OPERAND (operation
, i
);
1386 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
1387 gcc_assert (is_simple_use
);
1388 gcc_assert (dt
== vect_loop_def
|| dt
== vect_invariant_def
||
1389 dt
== vect_constant_def
);
1392 op
= TREE_OPERAND (operation
, i
);
1393 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
1394 gcc_assert (is_simple_use
);
1395 gcc_assert (dt
== vect_reduction_def
);
1396 gcc_assert (TREE_CODE (def_stmt
) == PHI_NODE
);
1398 gcc_assert (orig_stmt
== vect_is_simple_reduction (loop
, def_stmt
));
1400 gcc_assert (stmt
== vect_is_simple_reduction (loop
, def_stmt
));
1402 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
1405 /* 4. Supportable by target? */
1407 /* 4.1. check support for the operation in the loop */
1408 optab
= optab_for_tree_code (code
, vectype
);
1411 if (vect_print_dump_info (REPORT_DETAILS
))
1412 fprintf (vect_dump
, "no optab.");
1415 vec_mode
= TYPE_MODE (vectype
);
1416 if (optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
1418 if (vect_print_dump_info (REPORT_DETAILS
))
1419 fprintf (vect_dump
, "op not supported by target.");
1420 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
1421 || LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1422 < vect_min_worthwhile_factor (code
))
1424 if (vect_print_dump_info (REPORT_DETAILS
))
1425 fprintf (vect_dump
, "proceeding using word mode.");
1428 /* Worthwhile without SIMD support? */
1429 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1430 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1431 < vect_min_worthwhile_factor (code
))
1433 if (vect_print_dump_info (REPORT_DETAILS
))
1434 fprintf (vect_dump
, "not worthwhile without SIMD support.");
1438 /* 4.2. Check support for the epilog operation.
1440 If STMT represents a reduction pattern, then the type of the
1441 reduction variable may be different than the type of the rest
1442 of the arguments. For example, consider the case of accumulation
1443 of shorts into an int accumulator; The original code:
1444 S1: int_a = (int) short_a;
1445 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
1448 STMT: int_acc = widen_sum <short_a, int_acc>
1451 1. The tree-code that is used to create the vector operation in the
1452 epilog code (that reduces the partial results) is not the
1453 tree-code of STMT, but is rather the tree-code of the original
1454 stmt from the pattern that STMT is replacing. I.e, in the example
1455 above we want to use 'widen_sum' in the loop, but 'plus' in the
1457 2. The type (mode) we use to check available target support
1458 for the vector operation to be created in the *epilog*, is
1459 determined by the type of the reduction variable (in the example
1460 above we'd check this: plus_optab[vect_int_mode]).
1461 However the type (mode) we use to check available target support
1462 for the vector operation to be created *inside the loop*, is
1463 determined by the type of the other arguments to STMT (in the
1464 example we'd check this: widen_sum_optab[vect_short_mode]).
1466 This is contrary to "regular" reductions, in which the types of all
1467 the arguments are the same as the type of the reduction variable.
1468 For "regular" reductions we can therefore use the same vector type
1469 (and also the same tree-code) when generating the epilog code and
1470 when generating the code inside the loop. */
1474 /* This is a reduction pattern: get the vectype from the type of the
1475 reduction variable, and get the tree-code from orig_stmt. */
1476 orig_code
= TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt
, 1));
1477 vectype
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1478 vec_mode
= TYPE_MODE (vectype
);
1482 /* Regular reduction: use the same vectype and tree-code as used for
1483 the vector code inside the loop can be used for the epilog code. */
1487 if (!reduction_code_for_scalar_code (orig_code
, &epilog_reduc_code
))
1489 reduc_optab
= optab_for_tree_code (epilog_reduc_code
, vectype
);
1492 if (vect_print_dump_info (REPORT_DETAILS
))
1493 fprintf (vect_dump
, "no optab for reduction.");
1494 epilog_reduc_code
= NUM_TREE_CODES
;
1496 if (reduc_optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
1498 if (vect_print_dump_info (REPORT_DETAILS
))
1499 fprintf (vect_dump
, "reduc op not supported by target.");
1500 epilog_reduc_code
= NUM_TREE_CODES
;
1503 if (!vec_stmt
) /* transformation not required. */
1505 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
1511 if (vect_print_dump_info (REPORT_DETAILS
))
1512 fprintf (vect_dump
, "transform reduction.");
1514 /* Create the destination vector */
1515 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1517 /* Create the reduction-phi that defines the reduction-operand. */
1518 new_phi
= create_phi_node (vec_dest
, loop
->header
);
1520 /* In case the vectorization factor (VF) is bigger than the number
1521 of elements that we can fit in a vectype (nunits), we have to generate
1522 more than one vector stmt - i.e - we need to "unroll" the
1523 vector stmt by a factor VF/nunits. For more details see documentation
1524 in vectorizable_operation. */
1526 prev_stmt_info
= NULL
;
1527 for (j
= 0; j
< ncopies
; j
++)
1532 op
= TREE_OPERAND (operation
, 0);
1533 loop_vec_def0
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1534 if (op_type
== ternary_op
)
1536 op
= TREE_OPERAND (operation
, 1);
1537 loop_vec_def1
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1540 /* Get the vector def for the reduction variable from the phi node */
1541 reduc_def
= PHI_RESULT (new_phi
);
1545 enum vect_def_type dt
= vect_unknown_def_type
; /* Dummy */
1546 loop_vec_def0
= vect_get_vec_def_for_stmt_copy (dt
, loop_vec_def0
);
1547 if (op_type
== ternary_op
)
1548 loop_vec_def1
= vect_get_vec_def_for_stmt_copy (dt
, loop_vec_def1
);
1550 /* Get the vector def for the reduction variable from the vectorized
1551 reduction operation generated in the previous iteration (j-1) */
1552 reduc_def
= GIMPLE_STMT_OPERAND (new_stmt
,0);
1555 /* Arguments are ready. create the new vector stmt. */
1557 if (op_type
== binary_op
)
1558 expr
= build2 (code
, vectype
, loop_vec_def0
, reduc_def
);
1560 expr
= build3 (code
, vectype
, loop_vec_def0
, loop_vec_def1
,
1562 new_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, vec_dest
, expr
);
1563 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1564 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
1565 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1568 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1570 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1571 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1574 /* Finalize the reduction-phi (set it's arguments) and create the
1575 epilog reduction code. */
1576 vect_create_epilog_for_reduction (new_temp
, stmt
, epilog_reduc_code
, new_phi
);
1580 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1581 true if the target has a vectorized version of the function,
1582 or false if the function cannot be vectorized. */
1585 vectorizable_function (tree call
, tree vectype
)
1587 tree fndecl
= get_callee_fndecl (call
);
1589 /* We only handle functions that do not read or clobber memory -- i.e.
1590 const or novops ones. */
1591 if (!(call_expr_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1595 || TREE_CODE (fndecl
) != FUNCTION_DECL
1596 || !DECL_BUILT_IN (fndecl
))
1599 if (targetm
.vectorize
.builtin_vectorized_function (DECL_FUNCTION_CODE (fndecl
), vectype
))
1605 /* Returns an expression that performs a call to vectorized version
1606 of FNDECL in type VECTYPE, with the arguments given by ARGS.
1607 If extra statements need to be generated, they are inserted
1611 build_vectorized_function_call (tree fndecl
,
1612 tree vectype
, tree args
)
1615 enum built_in_function code
= DECL_FUNCTION_CODE (fndecl
);
1617 /* The target specific builtin should be available. */
1618 vfndecl
= targetm
.vectorize
.builtin_vectorized_function (code
, vectype
);
1619 gcc_assert (vfndecl
!= NULL_TREE
);
1621 return build_function_call_expr (vfndecl
, args
);
1624 /* Function vectorizable_call.
1626 Check if STMT performs a function call that can be vectorized.
1627 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1628 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1629 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1632 vectorizable_call (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1637 tree op
, args
, type
;
1638 tree vec_oprnd
, vargs
, *pvargs_end
;
1639 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1640 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1641 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1642 tree fndecl
, rhs
, new_temp
, def
, def_stmt
;
1643 enum vect_def_type dt
;
1645 /* Is STMT a vectorizable call? */
1646 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
1649 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) != SSA_NAME
)
1652 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
1653 if (TREE_CODE (operation
) != CALL_EXPR
)
1656 /* For now, we only vectorize functions if a target specific builtin
1657 is available. TODO -- in some cases, it might be profitable to
1658 insert the calls for pieces of the vector, in order to be able
1659 to vectorize other operations in the loop. */
1660 if (!vectorizable_function (operation
, vectype
))
1662 if (vect_print_dump_info (REPORT_DETAILS
))
1663 fprintf (vect_dump
, "function is not vectorizable.");
1667 gcc_assert (ZERO_SSA_OPERANDS (stmt
, SSA_OP_ALL_VIRTUALS
));
1669 for (args
= TREE_OPERAND (operation
, 1); args
; args
= TREE_CHAIN (args
))
1671 op
= TREE_VALUE (args
);
1673 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
1675 if (vect_print_dump_info (REPORT_DETAILS
))
1676 fprintf (vect_dump
, "use not simple.");
1681 if (!vec_stmt
) /* transformation not required. */
1683 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1689 if (vect_print_dump_info (REPORT_DETAILS
))
1690 fprintf (vect_dump
, "transform operation.");
1693 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
1694 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1698 pvargs_end
= &vargs
;
1699 for (args
= TREE_OPERAND (operation
, 1); args
; args
= TREE_CHAIN (args
))
1701 op
= TREE_VALUE (args
);
1702 vec_oprnd
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1704 *pvargs_end
= tree_cons (NULL_TREE
, vec_oprnd
, NULL_TREE
);
1705 pvargs_end
= &TREE_CHAIN (*pvargs_end
);
1708 fndecl
= get_callee_fndecl (operation
);
1709 rhs
= build_vectorized_function_call (fndecl
, vectype
, vargs
);
1710 *vec_stmt
= build2 (GIMPLE_MODIFY_STMT
, vectype
, vec_dest
, rhs
);
1711 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
1712 GIMPLE_STMT_OPERAND (*vec_stmt
, 0) = new_temp
;
1714 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1716 /* The call in STMT might prevent it from being removed in dce. We however
1717 cannot remove it here, due to the way the ssa name it defines is mapped
1718 to the new definition. So just replace rhs of the statement with something
1720 type
= TREE_TYPE (scalar_dest
);
1721 GIMPLE_STMT_OPERAND (stmt
, 1) = fold_convert (type
, integer_zero_node
);
1727 /* Function vectorizable_assignment.
1729 Check if STMT performs an assignment (copy) that can be vectorized.
1730 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1731 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1732 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1735 vectorizable_assignment (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1741 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1742 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1743 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1746 enum vect_def_type dt
;
1747 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1748 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1750 gcc_assert (ncopies
>= 1);
1752 return false; /* FORNOW */
1754 /* Is vectorizable assignment? */
1755 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1758 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
1760 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
1763 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
1764 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
1767 op
= GIMPLE_STMT_OPERAND (stmt
, 1);
1768 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
1770 if (vect_print_dump_info (REPORT_DETAILS
))
1771 fprintf (vect_dump
, "use not simple.");
1775 if (!vec_stmt
) /* transformation not required. */
1777 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
1782 if (vect_print_dump_info (REPORT_DETAILS
))
1783 fprintf (vect_dump
, "transform assignment.");
1786 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1789 op
= GIMPLE_STMT_OPERAND (stmt
, 1);
1790 vec_oprnd
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1792 /* Arguments are ready. create the new vector stmt. */
1793 *vec_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, vec_dest
, vec_oprnd
);
1794 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
1795 GIMPLE_STMT_OPERAND (*vec_stmt
, 0) = new_temp
;
1796 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1802 /* Function vect_min_worthwhile_factor.
1804 For a loop where we could vectorize the operation indicated by CODE,
1805 return the minimum vectorization factor that makes it worthwhile
1806 to use generic vectors. */
1808 vect_min_worthwhile_factor (enum tree_code code
)
1829 /* Function vectorizable_operation.
1831 Check if STMT performs a binary or unary operation that can be vectorized.
1832 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1833 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1834 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1837 vectorizable_operation (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1842 tree op0
, op1
= NULL
;
1843 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1844 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1845 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1846 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1847 enum tree_code code
;
1848 enum machine_mode vec_mode
;
1853 enum machine_mode optab_op2_mode
;
1855 enum vect_def_type dt0
, dt1
;
1857 stmt_vec_info prev_stmt_info
;
1858 int nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
1861 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1864 gcc_assert (ncopies
>= 1);
1866 /* Is STMT a vectorizable binary/unary operation? */
1867 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1870 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
1872 if (STMT_VINFO_LIVE_P (stmt_info
))
1874 /* FORNOW: not yet supported. */
1875 if (vect_print_dump_info (REPORT_DETAILS
))
1876 fprintf (vect_dump
, "value used after loop.");
1880 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
1883 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) != SSA_NAME
)
1886 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
1887 vectype_out
= get_vectype_for_scalar_type (TREE_TYPE (scalar_dest
));
1888 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1889 if (nunits_out
!= nunits_in
)
1892 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
1893 code
= TREE_CODE (operation
);
1894 optab
= optab_for_tree_code (code
, vectype
);
1896 /* Support only unary or binary operations. */
1897 op_type
= TREE_CODE_LENGTH (code
);
1898 if (op_type
!= unary_op
&& op_type
!= binary_op
)
1900 if (vect_print_dump_info (REPORT_DETAILS
))
1901 fprintf (vect_dump
, "num. args = %d (not unary/binary op).", op_type
);
1905 op0
= TREE_OPERAND (operation
, 0);
1906 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt0
))
1908 if (vect_print_dump_info (REPORT_DETAILS
))
1909 fprintf (vect_dump
, "use not simple.");
1913 if (op_type
== binary_op
)
1915 op1
= TREE_OPERAND (operation
, 1);
1916 if (!vect_is_simple_use (op1
, loop_vinfo
, &def_stmt
, &def
, &dt1
))
1918 if (vect_print_dump_info (REPORT_DETAILS
))
1919 fprintf (vect_dump
, "use not simple.");
1924 /* Supportable by target? */
1927 if (vect_print_dump_info (REPORT_DETAILS
))
1928 fprintf (vect_dump
, "no optab.");
1931 vec_mode
= TYPE_MODE (vectype
);
1932 icode
= (int) optab
->handlers
[(int) vec_mode
].insn_code
;
1933 if (icode
== CODE_FOR_nothing
)
1935 if (vect_print_dump_info (REPORT_DETAILS
))
1936 fprintf (vect_dump
, "op not supported by target.");
1937 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
1938 || LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1939 < vect_min_worthwhile_factor (code
))
1941 if (vect_print_dump_info (REPORT_DETAILS
))
1942 fprintf (vect_dump
, "proceeding using word mode.");
1945 /* Worthwhile without SIMD support? */
1946 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1947 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1948 < vect_min_worthwhile_factor (code
))
1950 if (vect_print_dump_info (REPORT_DETAILS
))
1951 fprintf (vect_dump
, "not worthwhile without SIMD support.");
1955 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
)
1957 /* FORNOW: not yet supported. */
1958 if (!VECTOR_MODE_P (vec_mode
))
1961 /* Invariant argument is needed for a vector shift
1962 by a scalar shift operand. */
1963 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
1964 if (! (VECTOR_MODE_P (optab_op2_mode
)
1965 || dt1
== vect_constant_def
1966 || dt1
== vect_invariant_def
))
1968 if (vect_print_dump_info (REPORT_DETAILS
))
1969 fprintf (vect_dump
, "operand mode requires invariant argument.");
1974 if (!vec_stmt
) /* transformation not required. */
1976 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
1982 if (vect_print_dump_info (REPORT_DETAILS
))
1983 fprintf (vect_dump
, "transform binary/unary operation.");
1986 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1988 /* In case the vectorization factor (VF) is bigger than the number
1989 of elements that we can fit in a vectype (nunits), we have to generate
1990 more than one vector stmt - i.e - we need to "unroll" the
1991 vector stmt by a factor VF/nunits. In doing so, we record a pointer
1992 from one copy of the vector stmt to the next, in the field
1993 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
1994 stages to find the correct vector defs to be used when vectorizing
1995 stmts that use the defs of the current stmt. The example below illustrates
1996 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
1997 4 vectorized stmts):
1999 before vectorization:
2000 RELATED_STMT VEC_STMT
2004 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2006 RELATED_STMT VEC_STMT
2007 VS1_0: vx0 = memref0 VS1_1 -
2008 VS1_1: vx1 = memref1 VS1_2 -
2009 VS1_2: vx2 = memref2 VS1_3 -
2010 VS1_3: vx3 = memref3 - -
2011 S1: x = load - VS1_0
2014 step2: vectorize stmt S2 (done here):
2015 To vectorize stmt S2 we first need to find the relevant vector
2016 def for the first operand 'x'. This is, as usual, obtained from
2017 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2018 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2019 relevant vector def 'vx0'. Having found 'vx0' we can generate
2020 the vector stmt VS2_0, and as usual, record it in the
2021 STMT_VINFO_VEC_STMT of stmt S2.
2022 When creating the second copy (VS2_1), we obtain the relevant vector
2023 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2024 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2025 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2026 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2027 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2028 chain of stmts and pointers:
2029 RELATED_STMT VEC_STMT
2030 VS1_0: vx0 = memref0 VS1_1 -
2031 VS1_1: vx1 = memref1 VS1_2 -
2032 VS1_2: vx2 = memref2 VS1_3 -
2033 VS1_3: vx3 = memref3 - -
2034 S1: x = load - VS1_0
2035 VS2_0: vz0 = vx0 + v1 VS2_1 -
2036 VS2_1: vz1 = vx1 + v1 VS2_2 -
2037 VS2_2: vz2 = vx2 + v1 VS2_3 -
2038 VS2_3: vz3 = vx3 + v1 - -
2039 S2: z = x + 1 - VS2_0 */
2041 prev_stmt_info
= NULL
;
2042 for (j
= 0; j
< ncopies
; j
++)
2047 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2048 if (op_type
== binary_op
)
2050 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
)
2052 /* Vector shl and shr insn patterns can be defined with
2053 scalar operand 2 (shift operand). In this case, use
2054 constant or loop invariant op1 directly, without
2055 extending it to vector mode first. */
2056 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
2057 if (!VECTOR_MODE_P (optab_op2_mode
))
2059 if (vect_print_dump_info (REPORT_DETAILS
))
2060 fprintf (vect_dump
, "operand 1 using scalar mode.");
2065 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
2070 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt0
, vec_oprnd0
);
2071 if (op_type
== binary_op
)
2072 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt1
, vec_oprnd1
);
2075 /* Arguments are ready. create the new vector stmt. */
2077 if (op_type
== binary_op
)
2078 new_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, vec_dest
,
2079 build2 (code
, vectype
, vec_oprnd0
, vec_oprnd1
));
2081 new_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, vec_dest
,
2082 build1 (code
, vectype
, vec_oprnd0
));
2083 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2084 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
2085 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
2088 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2090 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2091 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2098 /* Function vectorizable_type_demotion
2100 Check if STMT performs a binary or unary operation that involves
2101 type demotion, and if it can be vectorized.
2102 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2103 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2104 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2107 vectorizable_type_demotion (tree stmt
, block_stmt_iterator
*bsi
,
2114 tree vec_oprnd0
=NULL
, vec_oprnd1
=NULL
;
2115 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2116 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2117 enum tree_code code
;
2120 enum vect_def_type dt0
;
2122 stmt_vec_info prev_stmt_info
;
2132 enum machine_mode vec_mode
;
2134 /* Is STMT a vectorizable type-demotion operation? */
2136 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2139 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
2141 if (STMT_VINFO_LIVE_P (stmt_info
))
2143 /* FORNOW: not yet supported. */
2144 if (vect_print_dump_info (REPORT_DETAILS
))
2145 fprintf (vect_dump
, "value used after loop.");
2149 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
2152 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) != SSA_NAME
)
2155 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
2156 code
= TREE_CODE (operation
);
2157 if (code
!= NOP_EXPR
&& code
!= CONVERT_EXPR
)
2160 op0
= TREE_OPERAND (operation
, 0);
2161 vectype_in
= get_vectype_for_scalar_type (TREE_TYPE (op0
));
2162 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2164 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
2165 scalar_type
= TREE_TYPE (scalar_dest
);
2166 vectype_out
= get_vectype_for_scalar_type (scalar_type
);
2167 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2168 if (nunits_in
!= nunits_out
/ 2) /* FORNOW */
2171 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2172 gcc_assert (ncopies
>= 1);
2174 /* Check the operands of the operation. */
2175 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt0
))
2177 if (vect_print_dump_info (REPORT_DETAILS
))
2178 fprintf (vect_dump
, "use not simple.");
2182 /* Supportable by target? */
2183 code
= VEC_PACK_MOD_EXPR
;
2184 optab
= optab_for_tree_code (VEC_PACK_MOD_EXPR
, vectype_in
);
2188 vec_mode
= TYPE_MODE (vectype_in
);
2189 if (optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
2192 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
2194 if (!vec_stmt
) /* transformation not required. */
2196 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2202 if (vect_print_dump_info (REPORT_DETAILS
))
2203 fprintf (vect_dump
, "transform type demotion operation. ncopies = %d.",
2207 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2209 /* In case the vectorization factor (VF) is bigger than the number
2210 of elements that we can fit in a vectype (nunits), we have to generate
2211 more than one vector stmt - i.e - we need to "unroll" the
2212 vector stmt by a factor VF/nunits. */
2213 prev_stmt_info
= NULL
;
2214 for (j
= 0; j
< ncopies
; j
++)
2219 enum vect_def_type dt
= vect_unknown_def_type
; /* Dummy */
2220 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2221 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd0
);
2225 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt0
, vec_oprnd1
);
2226 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt0
, vec_oprnd0
);
2229 /* Arguments are ready. Create the new vector stmt. */
2230 expr
= build2 (code
, vectype_out
, vec_oprnd0
, vec_oprnd1
);
2231 new_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, vec_dest
, expr
);
2232 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2233 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
2234 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
2237 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2239 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2241 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2244 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2249 /* Function vect_gen_widened_results_half
2251 Create a vector stmt whose code, type, number of arguments, and result
2252 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
2253 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2254 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2255 needs to be created (DECL is a function-decl of a target-builtin).
2256 STMT is the original scalar stmt that we are vectorizing. */
2259 vect_gen_widened_results_half (enum tree_code code
, tree vectype
, tree decl
,
2260 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
2261 tree vec_dest
, block_stmt_iterator
*bsi
,
2271 /* Generate half of the widened result: */
2272 if (code
== CALL_EXPR
)
2274 /* Target specific support */
2275 vec_params
= build_tree_list (NULL_TREE
, vec_oprnd0
);
2276 if (op_type
== binary_op
)
2277 vec_params
= tree_cons (NULL_TREE
, vec_oprnd1
, vec_params
);
2278 expr
= build_function_call_expr (decl
, vec_params
);
2282 /* Generic support */
2283 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
2284 if (op_type
== binary_op
)
2285 expr
= build2 (code
, vectype
, vec_oprnd0
, vec_oprnd1
);
2287 expr
= build1 (code
, vectype
, vec_oprnd0
);
2289 new_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, vec_dest
, expr
);
2290 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2291 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
2292 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
2294 if (code
== CALL_EXPR
)
2296 FOR_EACH_SSA_TREE_OPERAND (sym
, new_stmt
, iter
, SSA_OP_ALL_VIRTUALS
)
2298 if (TREE_CODE (sym
) == SSA_NAME
)
2299 sym
= SSA_NAME_VAR (sym
);
2300 mark_sym_for_renaming (sym
);
2308 /* Function vectorizable_type_promotion
2310 Check if STMT performs a binary or unary operation that involves
2311 type promotion, and if it can be vectorized.
2312 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2313 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2314 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2317 vectorizable_type_promotion (tree stmt
, block_stmt_iterator
*bsi
,
2323 tree op0
, op1
= NULL
;
2324 tree vec_oprnd0
=NULL
, vec_oprnd1
=NULL
;
2325 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2326 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2327 enum tree_code code
, code1
= CODE_FOR_nothing
, code2
= CODE_FOR_nothing
;
2328 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2331 enum vect_def_type dt0
, dt1
;
2333 stmt_vec_info prev_stmt_info
;
2341 /* Is STMT a vectorizable type-promotion operation? */
2343 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2346 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
2348 if (STMT_VINFO_LIVE_P (stmt_info
))
2350 /* FORNOW: not yet supported. */
2351 if (vect_print_dump_info (REPORT_DETAILS
))
2352 fprintf (vect_dump
, "value used after loop.");
2356 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
2359 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) != SSA_NAME
)
2362 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
2363 code
= TREE_CODE (operation
);
2364 if (code
!= NOP_EXPR
&& code
!= WIDEN_MULT_EXPR
)
2367 op0
= TREE_OPERAND (operation
, 0);
2368 vectype_in
= get_vectype_for_scalar_type (TREE_TYPE (op0
));
2369 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2370 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2371 gcc_assert (ncopies
>= 1);
2373 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
2374 vectype_out
= get_vectype_for_scalar_type (TREE_TYPE (scalar_dest
));
2375 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2376 if (nunits_out
!= nunits_in
/ 2) /* FORNOW */
2379 /* Check the operands of the operation. */
2380 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt0
))
2382 if (vect_print_dump_info (REPORT_DETAILS
))
2383 fprintf (vect_dump
, "use not simple.");
2387 op_type
= TREE_CODE_LENGTH (code
);
2388 if (op_type
== binary_op
)
2390 op1
= TREE_OPERAND (operation
, 1);
2391 if (!vect_is_simple_use (op1
, loop_vinfo
, &def_stmt
, &def
, &dt1
))
2393 if (vect_print_dump_info (REPORT_DETAILS
))
2394 fprintf (vect_dump
, "use not simple.");
2399 /* Supportable by target? */
2400 if (!supportable_widening_operation (code
, stmt
, vectype_in
,
2401 &decl1
, &decl2
, &code1
, &code2
))
2404 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
2406 if (!vec_stmt
) /* transformation not required. */
2408 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2414 if (vect_print_dump_info (REPORT_DETAILS
))
2415 fprintf (vect_dump
, "transform type promotion operation. ncopies = %d.",
2419 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2421 /* In case the vectorization factor (VF) is bigger than the number
2422 of elements that we can fit in a vectype (nunits), we have to generate
2423 more than one vector stmt - i.e - we need to "unroll" the
2424 vector stmt by a factor VF/nunits. */
2426 prev_stmt_info
= NULL
;
2427 for (j
= 0; j
< ncopies
; j
++)
2432 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2433 if (op_type
== binary_op
)
2434 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
2438 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt0
, vec_oprnd0
);
2439 if (op_type
== binary_op
)
2440 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt1
, vec_oprnd1
);
2443 /* Arguments are ready. Create the new vector stmt. We are creating
2444 two vector defs because the widened result does not fit in one vector.
2445 The vectorized stmt can be expressed as a call to a taregt builtin,
2446 or a using a tree-code. */
2447 /* Generate first half of the widened result: */
2448 new_stmt
= vect_gen_widened_results_half (code1
, vectype_out
, decl1
,
2449 vec_oprnd0
, vec_oprnd1
, op_type
, vec_dest
, bsi
, stmt
);
2451 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2453 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2454 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2456 /* Generate second half of the widened result: */
2457 new_stmt
= vect_gen_widened_results_half (code2
, vectype_out
, decl2
,
2458 vec_oprnd0
, vec_oprnd1
, op_type
, vec_dest
, bsi
, stmt
);
2459 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2460 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2464 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2469 /* Function vect_strided_store_supported.
2471 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2472 and FALSE otherwise. */
2475 vect_strided_store_supported (tree vectype
)
2477 optab interleave_high_optab
, interleave_low_optab
;
2480 mode
= (int) TYPE_MODE (vectype
);
2482 /* Check that the operation is supported. */
2483 interleave_high_optab
= optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR
,
2485 interleave_low_optab
= optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR
,
2487 if (!interleave_high_optab
|| !interleave_low_optab
)
2489 if (vect_print_dump_info (REPORT_DETAILS
))
2490 fprintf (vect_dump
, "no optab for interleave.");
2494 if (interleave_high_optab
->handlers
[(int) mode
].insn_code
2496 || interleave_low_optab
->handlers
[(int) mode
].insn_code
2497 == CODE_FOR_nothing
)
2499 if (vect_print_dump_info (REPORT_DETAILS
))
2500 fprintf (vect_dump
, "interleave op not supported by target.");
2507 /* Function vect_permute_store_chain.
2509 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2510 a power of 2, generate interleave_high/low stmts to reorder the data
2511 correctly for the stores. Return the final references for stores in
2514 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2515 The input is 4 vectors each containing 8 elements. We assign a number to each
2516 element, the input sequence is:
2518 1st vec: 0 1 2 3 4 5 6 7
2519 2nd vec: 8 9 10 11 12 13 14 15
2520 3rd vec: 16 17 18 19 20 21 22 23
2521 4th vec: 24 25 26 27 28 29 30 31
2523 The output sequence should be:
2525 1st vec: 0 8 16 24 1 9 17 25
2526 2nd vec: 2 10 18 26 3 11 19 27
2527 3rd vec: 4 12 20 28 5 13 21 30
2528 4th vec: 6 14 22 30 7 15 23 31
2530 i.e., we interleave the contents of the four vectors in their order.
2532 We use interleave_high/low instructions to create such output. The input of
2533 each interleave_high/low operation is two vectors:
2536 the even elements of the result vector are obtained left-to-right from the
2537 high/low elements of the first vector. The odd elements of the result are
2538 obtained left-to-right from the high/low elements of the second vector.
2539 The output of interleave_high will be: 0 4 1 5
2540 and of interleave_low: 2 6 3 7
2543 The permutation is done in log LENGTH stages. In each stage interleave_high
2544 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2545 where the first argument is taken from the first half of DR_CHAIN and the
2546 second argument from it's second half.
2549 I1: interleave_high (1st vec, 3rd vec)
2550 I2: interleave_low (1st vec, 3rd vec)
2551 I3: interleave_high (2nd vec, 4th vec)
2552 I4: interleave_low (2nd vec, 4th vec)
2554 The output for the first stage is:
2556 I1: 0 16 1 17 2 18 3 19
2557 I2: 4 20 5 21 6 22 7 23
2558 I3: 8 24 9 25 10 26 11 27
2559 I4: 12 28 13 29 14 30 15 31
2561 The output of the second stage, i.e. the final result is:
2563 I1: 0 8 16 24 1 9 17 25
2564 I2: 2 10 18 26 3 11 19 27
2565 I3: 4 12 20 28 5 13 21 30
2566 I4: 6 14 22 30 7 15 23 31. */
2569 vect_permute_store_chain (VEC(tree
,heap
) *dr_chain
,
2570 unsigned int length
,
2572 block_stmt_iterator
*bsi
,
2573 VEC(tree
,heap
) **result_chain
)
2575 tree perm_dest
, perm_stmt
, vect1
, vect2
, high
, low
;
2576 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
2580 VEC(tree
,heap
) *first
, *second
;
2582 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
2583 first
= VEC_alloc (tree
, heap
, length
/2);
2584 second
= VEC_alloc (tree
, heap
, length
/2);
2586 /* Check that the operation is supported. */
2587 if (!vect_strided_store_supported (vectype
))
2590 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
2592 for (i
= 0; i
< exact_log2 (length
); i
++)
2594 for (j
= 0; j
< length
/2; j
++)
2596 vect1
= VEC_index (tree
, dr_chain
, j
);
2597 vect2
= VEC_index (tree
, dr_chain
, j
+length
/2);
2599 /* Create interleaving stmt:
2600 in the case of big endian:
2601 high = interleave_high (vect1, vect2)
2602 and in the case of little endian:
2603 high = interleave_low (vect1, vect2). */
2604 perm_dest
= create_tmp_var (vectype
, "vect_inter_high");
2605 DECL_GIMPLE_REG_P (perm_dest
) = 1;
2606 add_referenced_var (perm_dest
);
2607 if (BYTES_BIG_ENDIAN
)
2608 perm_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, perm_dest
,
2609 build2 (VEC_INTERLEAVE_HIGH_EXPR
, vectype
,
2612 perm_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, perm_dest
,
2613 build2 (VEC_INTERLEAVE_LOW_EXPR
, vectype
,
2615 high
= make_ssa_name (perm_dest
, perm_stmt
);
2616 GIMPLE_STMT_OPERAND (perm_stmt
, 0) = high
;
2617 vect_finish_stmt_generation (stmt
, perm_stmt
, bsi
);
2618 VEC_replace (tree
, *result_chain
, 2*j
, high
);
2620 /* Create interleaving stmt:
2621 in the case of big endian:
2622 low = interleave_low (vect1, vect2)
2623 and in the case of little endian:
2624 low = interleave_high (vect1, vect2). */
2625 perm_dest
= create_tmp_var (vectype
, "vect_inter_low");
2626 DECL_GIMPLE_REG_P (perm_dest
) = 1;
2627 add_referenced_var (perm_dest
);
2628 if (BYTES_BIG_ENDIAN
)
2629 perm_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, perm_dest
,
2630 build2 (VEC_INTERLEAVE_LOW_EXPR
, vectype
,
2633 perm_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, perm_dest
,
2634 build2 (VEC_INTERLEAVE_HIGH_EXPR
, vectype
,
2636 low
= make_ssa_name (perm_dest
, perm_stmt
);
2637 GIMPLE_STMT_OPERAND (perm_stmt
, 0) = low
;
2638 vect_finish_stmt_generation (stmt
, perm_stmt
, bsi
);
2639 VEC_replace (tree
, *result_chain
, 2*j
+1, low
);
2641 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
2647 /* Function vectorizable_store.
2649 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2651 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2652 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2653 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2656 vectorizable_store (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
2661 tree vec_oprnd
= NULL_TREE
;
2662 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2663 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
2664 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2665 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2666 enum machine_mode vec_mode
;
2668 enum dr_alignment_support alignment_support_cheme
;
2670 def_operand_p def_p
;
2672 enum vect_def_type dt
;
2673 stmt_vec_info prev_stmt_info
= NULL
;
2674 tree dataref_ptr
= NULL_TREE
;
2675 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2676 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2678 tree next_stmt
, first_stmt
;
2679 bool strided_store
= false;
2680 unsigned int group_size
, i
;
2681 VEC(tree
,heap
) *dr_chain
= NULL
, *oprnds
= NULL
, *result_chain
= NULL
;
2682 gcc_assert (ncopies
>= 1);
2684 /* Is vectorizable store? */
2686 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
2689 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
2690 if (TREE_CODE (scalar_dest
) != ARRAY_REF
2691 && TREE_CODE (scalar_dest
) != INDIRECT_REF
2692 && !DR_GROUP_FIRST_DR (stmt_info
))
2695 op
= GIMPLE_STMT_OPERAND (stmt
, 1);
2696 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
2698 if (vect_print_dump_info (REPORT_DETAILS
))
2699 fprintf (vect_dump
, "use not simple.");
2703 vec_mode
= TYPE_MODE (vectype
);
2704 /* FORNOW. In some cases can vectorize even if data-type not supported
2705 (e.g. - array initialization with 0). */
2706 if (mov_optab
->handlers
[(int)vec_mode
].insn_code
== CODE_FOR_nothing
)
2709 if (!STMT_VINFO_DATA_REF (stmt_info
))
2712 if (DR_GROUP_FIRST_DR (stmt_info
))
2714 strided_store
= true;
2715 if (!vect_strided_store_supported (vectype
))
2719 if (!vec_stmt
) /* transformation not required. */
2721 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
2727 if (vect_print_dump_info (REPORT_DETAILS
))
2728 fprintf (vect_dump
, "transform store. ncopies = %d",ncopies
);
2732 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
2733 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
2734 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
2736 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
2738 /* We vectorize all the stmts of the interleaving group when we
2739 reach the last stmt in the group. */
2740 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
2741 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
)))
2743 *vec_stmt
= NULL_TREE
;
2754 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
2755 oprnds
= VEC_alloc (tree
, heap
, group_size
);
2757 alignment_support_cheme
= vect_supportable_dr_alignment (first_dr
);
2758 gcc_assert (alignment_support_cheme
);
2759 gcc_assert (alignment_support_cheme
== dr_aligned
); /* FORNOW */
2761 /* In case the vectorization factor (VF) is bigger than the number
2762 of elements that we can fit in a vectype (nunits), we have to generate
2763 more than one vector stmt - i.e - we need to "unroll" the
2764 vector stmt by a factor VF/nunits. For more details see documentation in
2765 vect_get_vec_def_for_copy_stmt. */
2767 /* In case of interleaving (non-unit strided access):
2774 We create vectorized storess starting from base address (the access of the
2775 first stmt in the chain (S2 in the above example), when the last store stmt
2776 of the chain (S4) is reached:
2779 VS2: &base + vec_size*1 = vx0
2780 VS3: &base + vec_size*2 = vx1
2781 VS4: &base + vec_size*3 = vx3
2783 Then permutation statements are generated:
2785 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
2786 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
2789 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
2790 (the order of the data-refs in the output of vect_permute_store_chain
2791 corresponds to the order of scalar stmts in the interleaving chain - see
2792 the documentation of vect_permute_store_chain()).
2794 In case of both multiple types and interleaving, above vector stores and
2795 permutation stmts are created for every copy. The result vector stmts are
2796 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
2797 STMT_VINFO_RELATED_STMT for the next copies.
2800 prev_stmt_info
= NULL
;
2801 for (j
= 0; j
< ncopies
; j
++)
2808 /* For interleaved stores we collect vectorized defs for all the
2809 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then used
2810 as an input to vect_permute_store_chain(), and OPRNDS as an input
2811 to vect_get_vec_def_for_stmt_copy() for the next copy.
2812 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
2813 OPRNDS are of size 1.
2815 next_stmt
= first_stmt
;
2816 for (i
= 0; i
< group_size
; i
++)
2818 /* Since gaps are not supported for interleaved stores, GROUP_SIZE
2819 is the exact number of stmts in the chain. Therefore, NEXT_STMT
2820 can't be NULL_TREE. In case that there is no interleaving,
2821 GROUP_SIZE is 1, and only one iteration of the loop will be
2824 gcc_assert (next_stmt
);
2825 op
= GIMPLE_STMT_OPERAND (next_stmt
, 1);
2826 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
, NULL
);
2827 VEC_quick_push(tree
, dr_chain
, vec_oprnd
);
2828 VEC_quick_push(tree
, oprnds
, vec_oprnd
);
2829 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
2831 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, bsi
, NULL_TREE
,
2832 &dummy
, &ptr_incr
, false,
2833 TREE_TYPE (vec_oprnd
));
2837 /* For interleaved stores we created vectorized defs for all the
2838 defs stored in OPRNDS in the previous iteration (previous copy).
2839 DR_CHAIN is then used as an input to vect_permute_store_chain(),
2840 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
2842 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
2843 OPRNDS are of size 1.
2845 for (i
= 0; i
< group_size
; i
++)
2847 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
,
2848 VEC_index (tree
, oprnds
, i
));
2849 VEC_replace(tree
, dr_chain
, i
, vec_oprnd
);
2850 VEC_replace(tree
, oprnds
, i
, vec_oprnd
);
2852 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, bsi
, stmt
);
2857 result_chain
= VEC_alloc (tree
, heap
, group_size
);
2859 if (!vect_permute_store_chain (dr_chain
, group_size
, stmt
, bsi
,
2864 next_stmt
= first_stmt
;
2865 for (i
= 0; i
< group_size
; i
++)
2867 /* For strided stores vectorized defs are interleaved in
2868 vect_permute_store_chain(). */
2870 vec_oprnd
= VEC_index(tree
, result_chain
, i
);
2872 data_ref
= build_fold_indirect_ref (dataref_ptr
);
2873 /* Arguments are ready. Create the new vector stmt. */
2874 new_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, data_ref
,
2876 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
2878 /* Set the VDEFs for the vector pointer. If this virtual def
2879 has a use outside the loop and a loop peel is performed
2880 then the def may be renamed by the peel. Mark it for
2881 renaming so the later use will also be renamed. */
2882 copy_virtual_operands (new_stmt
, next_stmt
);
2885 /* The original store is deleted so the same SSA_NAMEs
2887 FOR_EACH_SSA_TREE_OPERAND (def
, next_stmt
, iter
, SSA_OP_VDEF
)
2889 SSA_NAME_DEF_STMT (def
) = new_stmt
;
2890 mark_sym_for_renaming (SSA_NAME_VAR (def
));
2893 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2897 /* Create new names for all the definitions created by COPY and
2898 add replacement mappings for each new name. */
2899 FOR_EACH_SSA_DEF_OPERAND (def_p
, new_stmt
, iter
, SSA_OP_VDEF
)
2901 create_new_def_for (DEF_FROM_PTR (def_p
), new_stmt
, def_p
);
2902 mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p
)));
2905 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2908 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2909 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
2912 /* Bump the vector pointer. */
2913 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, bsi
, stmt
);
2921 /* Function vect_setup_realignment
2923 This function is called when vectorizing an unaligned load using
2924 the dr_unaligned_software_pipeline scheme.
2925 This function generates the following code at the loop prolog:
2928 msq_init = *(floor(p)); # prolog load
2929 realignment_token = call target_builtin;
2931 msq = phi (msq_init, ---)
2933 The code above sets up a new (vector) pointer, pointing to the first
2934 location accessed by STMT, and a "floor-aligned" load using that pointer.
2935 It also generates code to compute the "realignment-token" (if the relevant
2936 target hook was defined), and creates a phi-node at the loop-header bb
2937 whose arguments are the result of the prolog-load (created by this
2938 function) and the result of a load that takes place in the loop (to be
2939 created by the caller to this function).
2940 The caller to this function uses the phi-result (msq) to create the
2941 realignment code inside the loop, and sets up the missing phi argument,
2945 msq = phi (msq_init, lsq)
2946 lsq = *(floor(p')); # load in loop
2947 result = realign_load (msq, lsq, realignment_token);
2950 STMT - (scalar) load stmt to be vectorized. This load accesses
2951 a memory location that may be unaligned.
2952 BSI - place where new code is to be inserted.
2955 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2956 target hook, if defined.
2957 Return value - the result of the loop-header phi node. */
2960 vect_setup_realignment (tree stmt
, block_stmt_iterator
*bsi
,
2961 tree
*realignment_token
)
2963 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2964 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2965 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2966 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2967 edge pe
= loop_preheader_edge (loop
);
2968 tree scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
2981 /* 1. Create msq_init = *(floor(p1)) in the loop preheader */
2982 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2983 ptr
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
, &init_addr
, &inc
, true,
2985 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, ptr
);
2986 new_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, vec_dest
, data_ref
);
2987 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2988 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
2989 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
2990 gcc_assert (!new_bb
);
2991 msq_init
= GIMPLE_STMT_OPERAND (new_stmt
, 0);
2992 copy_virtual_operands (new_stmt
, stmt
);
2993 update_vuses_to_preheader (new_stmt
, loop
);
2995 /* 2. Create permutation mask, if required, in loop preheader. */
2996 if (targetm
.vectorize
.builtin_mask_for_load
)
2999 tree params
= build_tree_list (NULL_TREE
, init_addr
);
3001 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
3002 new_stmt
= build_function_call_expr (builtin_decl
, params
);
3003 vec_dest
= vect_create_destination_var (scalar_dest
,
3004 TREE_TYPE (new_stmt
));
3005 new_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, vec_dest
,
3007 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3008 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
3009 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
3010 gcc_assert (!new_bb
);
3011 *realignment_token
= GIMPLE_STMT_OPERAND (new_stmt
, 0);
3013 /* The result of the CALL_EXPR to this builtin is determined from
3014 the value of the parameter and no global variables are touched
3015 which makes the builtin a "const" function. Requiring the
3016 builtin to have the "const" attribute makes it unnecessary
3017 to call mark_call_clobbered. */
3018 gcc_assert (TREE_READONLY (builtin_decl
));
3021 /* 3. Create msq = phi <msq_init, lsq> in loop */
3022 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3023 msq
= make_ssa_name (vec_dest
, NULL_TREE
);
3024 phi_stmt
= create_phi_node (msq
, loop
->header
);
3025 SSA_NAME_DEF_STMT (msq
) = phi_stmt
;
3026 add_phi_arg (phi_stmt
, msq_init
, loop_preheader_edge (loop
));
3032 /* Function vect_strided_load_supported.
3034 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3035 and FALSE otherwise. */
3038 vect_strided_load_supported (tree vectype
)
3040 optab perm_even_optab
, perm_odd_optab
;
3043 mode
= (int) TYPE_MODE (vectype
);
3045 perm_even_optab
= optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR
, vectype
);
3046 if (!perm_even_optab
)
3048 if (vect_print_dump_info (REPORT_DETAILS
))
3049 fprintf (vect_dump
, "no optab for perm_even.");
3053 if (perm_even_optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
3055 if (vect_print_dump_info (REPORT_DETAILS
))
3056 fprintf (vect_dump
, "perm_even op not supported by target.");
3060 perm_odd_optab
= optab_for_tree_code (VEC_EXTRACT_ODD_EXPR
, vectype
);
3061 if (!perm_odd_optab
)
3063 if (vect_print_dump_info (REPORT_DETAILS
))
3064 fprintf (vect_dump
, "no optab for perm_odd.");
3068 if (perm_odd_optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
3070 if (vect_print_dump_info (REPORT_DETAILS
))
3071 fprintf (vect_dump
, "perm_odd op not supported by target.");
3078 /* Function vect_permute_load_chain.
3080 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3081 a power of 2, generate extract_even/odd stmts to reorder the input data
3082 correctly. Return the final references for loads in RESULT_CHAIN.
3084 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3085 The input is 4 vectors each containing 8 elements. We assign a number to each
3086 element, the input sequence is:
3088 1st vec: 0 1 2 3 4 5 6 7
3089 2nd vec: 8 9 10 11 12 13 14 15
3090 3rd vec: 16 17 18 19 20 21 22 23
3091 4th vec: 24 25 26 27 28 29 30 31
3093 The output sequence should be:
3095 1st vec: 0 4 8 12 16 20 24 28
3096 2nd vec: 1 5 9 13 17 21 25 29
3097 3rd vec: 2 6 10 14 18 22 26 30
3098 4th vec: 3 7 11 15 19 23 27 31
3100 i.e., the first output vector should contain the first elements of each
3101 interleaving group, etc.
3103 We use extract_even/odd instructions to create such output. The input of each
3104 extract_even/odd operation is two vectors
3108 and the output is the vector of extracted even/odd elements. The output of
3109 extract_even will be: 0 2 4 6
3110 and of extract_odd: 1 3 5 7
3113 The permutation is done in log LENGTH stages. In each stage extract_even and
3114 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3115 order. In our example,
3117 E1: extract_even (1st vec, 2nd vec)
3118 E2: extract_odd (1st vec, 2nd vec)
3119 E3: extract_even (3rd vec, 4th vec)
3120 E4: extract_odd (3rd vec, 4th vec)
3122 The output for the first stage will be:
3124 E1: 0 2 4 6 8 10 12 14
3125 E2: 1 3 5 7 9 11 13 15
3126 E3: 16 18 20 22 24 26 28 30
3127 E4: 17 19 21 23 25 27 29 31
3129 In order to proceed and create the correct sequence for the next stage (or
3130 for the correct output, if the second stage is the last one, as in our
3131 example), we first put the output of extract_even operation and then the
3132 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3133 The input for the second stage is:
3135 1st vec (E1): 0 2 4 6 8 10 12 14
3136 2nd vec (E3): 16 18 20 22 24 26 28 30
3137 3rd vec (E2): 1 3 5 7 9 11 13 15
3138 4th vec (E4): 17 19 21 23 25 27 29 31
3140 The output of the second stage:
3142 E1: 0 4 8 12 16 20 24 28
3143 E2: 2 6 10 14 18 22 26 30
3144 E3: 1 5 9 13 17 21 25 29
3145 E4: 3 7 11 15 19 23 27 31
3147 And RESULT_CHAIN after reordering:
3149 1st vec (E1): 0 4 8 12 16 20 24 28
3150 2nd vec (E3): 1 5 9 13 17 21 25 29
3151 3rd vec (E2): 2 6 10 14 18 22 26 30
3152 4th vec (E4): 3 7 11 15 19 23 27 31. */
3155 vect_permute_load_chain (VEC(tree
,heap
) *dr_chain
,
3156 unsigned int length
,
3158 block_stmt_iterator
*bsi
,
3159 VEC(tree
,heap
) **result_chain
)
3161 tree perm_dest
, perm_stmt
, data_ref
, first_vect
, second_vect
;
3162 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
3166 /* Check that the operation is supported. */
3167 if (!vect_strided_load_supported (vectype
))
3170 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
3171 for (i
= 0; i
< exact_log2 (length
); i
++)
3173 for (j
= 0; j
< length
; j
+=2)
3175 first_vect
= VEC_index (tree
, dr_chain
, j
);
3176 second_vect
= VEC_index (tree
, dr_chain
, j
+1);
3178 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3179 perm_dest
= create_tmp_var (vectype
, "vect_perm_even");
3180 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3181 add_referenced_var (perm_dest
);
3183 perm_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, perm_dest
,
3184 build2 (VEC_EXTRACT_EVEN_EXPR
, vectype
,
3185 first_vect
, second_vect
));
3187 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
3188 GIMPLE_STMT_OPERAND (perm_stmt
, 0) = data_ref
;
3189 vect_finish_stmt_generation (stmt
, perm_stmt
, bsi
);
3190 mark_symbols_for_renaming (perm_stmt
);
3192 VEC_replace (tree
, *result_chain
, j
/2, data_ref
);
3194 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3195 perm_dest
= create_tmp_var (vectype
, "vect_perm_odd");
3196 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3197 add_referenced_var (perm_dest
);
3199 perm_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, perm_dest
,
3200 build2 (VEC_EXTRACT_ODD_EXPR
, vectype
,
3201 first_vect
, second_vect
));
3202 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
3203 GIMPLE_STMT_OPERAND (perm_stmt
, 0) = data_ref
;
3204 vect_finish_stmt_generation (stmt
, perm_stmt
, bsi
);
3205 mark_symbols_for_renaming (perm_stmt
);
3207 VEC_replace (tree
, *result_chain
, j
/2+length
/2, data_ref
);
3209 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
3215 /* Function vect_transform_strided_load.
3217 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3218 to perform their permutation and ascribe the result vectorized statements to
3219 the scalar statements.
3223 vect_transform_strided_load (tree stmt
, VEC(tree
,heap
) *dr_chain
, int size
,
3224 block_stmt_iterator
*bsi
)
3226 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3227 tree first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
3228 tree next_stmt
, new_stmt
;
3229 VEC(tree
,heap
) *result_chain
= NULL
;
3230 unsigned int i
, gap_count
;
3233 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3234 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3235 vectors, that are ready for vector computation. */
3236 result_chain
= VEC_alloc (tree
, heap
, size
);
3238 if (!vect_permute_load_chain (dr_chain
, size
, stmt
, bsi
, &result_chain
))
3241 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3242 Since we scan the chain starting from it's first node, their order
3243 corresponds the order of data-refs in RESULT_CHAIN. */
3244 next_stmt
= first_stmt
;
3246 for (i
= 0; VEC_iterate(tree
, result_chain
, i
, tmp_data_ref
); i
++)
3251 /* Skip the gaps. Loads created for the gaps will be removed by dead
3252 code elimination pass later.
3253 DR_GROUP_GAP is the number of steps in elements from the previous
3254 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3255 correspond to the gaps.
3257 if (gap_count
< DR_GROUP_GAP (vinfo_for_stmt (next_stmt
)))
3265 new_stmt
= SSA_NAME_DEF_STMT (tmp_data_ref
);
3266 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3267 copies, and we put the new vector statement in the first available
3269 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)))
3270 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)) = new_stmt
;
3273 tree prev_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
));
3274 tree rel_stmt
= STMT_VINFO_RELATED_STMT (
3275 vinfo_for_stmt (prev_stmt
));
3278 prev_stmt
= rel_stmt
;
3279 rel_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt
));
3281 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
)) = new_stmt
;
3283 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
3285 /* If NEXT_STMT accesses the same DR as the previous statement,
3286 put the same TMP_DATA_REF as its vectorized statement; otherwise
3287 get the next data-ref from RESULT_CHAIN. */
3288 if (!next_stmt
|| !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
3296 /* vectorizable_load.
3298 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3300 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3301 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3302 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3305 vectorizable_load (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
3308 tree vec_dest
= NULL
;
3309 tree data_ref
= NULL
;
3311 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3312 stmt_vec_info prev_stmt_info
;
3313 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3314 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3315 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
3316 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3319 tree new_stmt
= NULL_TREE
;
3321 enum dr_alignment_support alignment_support_cheme
;
3322 tree dataref_ptr
= NULL_TREE
;
3324 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3325 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3326 int i
, j
, group_size
;
3327 tree msq
= NULL_TREE
, lsq
;
3328 tree offset
= NULL_TREE
;
3329 tree realignment_token
= NULL_TREE
;
3330 tree phi_stmt
= NULL_TREE
;
3331 VEC(tree
,heap
) *dr_chain
= NULL
;
3332 bool strided_load
= false;
3335 /* Is vectorizable load? */
3336 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3339 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
3341 if (STMT_VINFO_LIVE_P (stmt_info
))
3343 /* FORNOW: not yet supported. */
3344 if (vect_print_dump_info (REPORT_DETAILS
))
3345 fprintf (vect_dump
, "value used after loop.");
3349 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
3352 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
3353 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
3356 op
= GIMPLE_STMT_OPERAND (stmt
, 1);
3357 if (TREE_CODE (op
) != ARRAY_REF
3358 && TREE_CODE (op
) != INDIRECT_REF
3359 && !DR_GROUP_FIRST_DR (stmt_info
))
3362 if (!STMT_VINFO_DATA_REF (stmt_info
))
3365 mode
= (int) TYPE_MODE (vectype
);
3367 /* FORNOW. In some cases can vectorize even if data-type not supported
3368 (e.g. - data copies). */
3369 if (mov_optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
3371 if (vect_print_dump_info (REPORT_DETAILS
))
3372 fprintf (vect_dump
, "Aligned load, but unsupported type.");
3376 /* Check if the load is a part of an interleaving chain. */
3377 if (DR_GROUP_FIRST_DR (stmt_info
))
3379 strided_load
= true;
3381 /* Check if interleaving is supported. */
3382 if (!vect_strided_load_supported (vectype
))
3386 if (!vec_stmt
) /* transformation not required. */
3388 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
3394 if (vect_print_dump_info (REPORT_DETAILS
))
3395 fprintf (vect_dump
, "transform load.");
3399 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
3400 /* Check if the chain of loads is already vectorized. */
3401 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
)))
3403 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3406 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3407 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3408 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
3417 alignment_support_cheme
= vect_supportable_dr_alignment (first_dr
);
3418 gcc_assert (alignment_support_cheme
);
3421 /* In case the vectorization factor (VF) is bigger than the number
3422 of elements that we can fit in a vectype (nunits), we have to generate
3423 more than one vector stmt - i.e - we need to "unroll" the
3424 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3425 from one copy of the vector stmt to the next, in the field
3426 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3427 stages to find the correct vector defs to be used when vectorizing
3428 stmts that use the defs of the current stmt. The example below illustrates
3429 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3430 4 vectorized stmts):
3432 before vectorization:
3433 RELATED_STMT VEC_STMT
3437 step 1: vectorize stmt S1:
3438 We first create the vector stmt VS1_0, and, as usual, record a
3439 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3440 Next, we create the vector stmt VS1_1, and record a pointer to
3441 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3442 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3444 RELATED_STMT VEC_STMT
3445 VS1_0: vx0 = memref0 VS1_1 -
3446 VS1_1: vx1 = memref1 VS1_2 -
3447 VS1_2: vx2 = memref2 VS1_3 -
3448 VS1_3: vx3 = memref3 - -
3449 S1: x = load - VS1_0
3452 See in documentation in vect_get_vec_def_for_stmt_copy for how the
3453 information we recorded in RELATED_STMT field is used to vectorize
3456 /* In case of interleaving (non-unit strided access):
3463 Vectorized loads are created in the order of memory accesses
3464 starting from the access of the first stmt of the chain:
3467 VS2: vx1 = &base + vec_size*1
3468 VS3: vx3 = &base + vec_size*2
3469 VS4: vx4 = &base + vec_size*3
3471 Then permutation statements are generated:
3473 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3474 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3477 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3478 (the order of the data-refs in the output of vect_permute_load_chain
3479 corresponds to the order of scalar stmts in the interleaving chain - see
3480 the documentation of vect_permute_load_chain()).
3481 The generation of permutation stmts and recording them in
3482 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3484 In case of both multiple types and interleaving, the vector loads and
3485 permutation stmts above are created for every copy. The result vector stmts
3486 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3487 STMT_VINFO_RELATED_STMT for the next copies. */
3489 /* If the data reference is aligned (dr_aligned) or potentially unaligned
3490 on a target that supports unaligned accesses (dr_unaligned_supported)
3491 we generate the following code:
3495 p = p + indx * vectype_size;
3500 Otherwise, the data reference is potentially unaligned on a target that
3501 does not support unaligned accesses (dr_unaligned_software_pipeline) -
3502 then generate the following code, in which the data in each iteration is
3503 obtained by two vector loads, one from the previous iteration, and one
3504 from the current iteration:
3506 msq_init = *(floor(p1))
3507 p2 = initial_addr + VS - 1;
3508 realignment_token = call target_builtin;
3511 p2 = p2 + indx * vectype_size
3513 vec_dest = realign_load (msq, lsq, realignment_token)
3518 if (alignment_support_cheme
== dr_unaligned_software_pipeline
)
3520 msq
= vect_setup_realignment (first_stmt
, bsi
, &realignment_token
);
3521 phi_stmt
= SSA_NAME_DEF_STMT (msq
);
3522 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
3525 prev_stmt_info
= NULL
;
3526 for (j
= 0; j
< ncopies
; j
++)
3528 /* 1. Create the vector pointer update chain. */
3530 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, bsi
, offset
, &dummy
,
3531 &ptr_incr
, false, NULL_TREE
);
3533 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, bsi
, stmt
);
3535 for (i
= 0; i
< group_size
; i
++)
3537 /* 2. Create the vector-load in the loop. */
3538 switch (alignment_support_cheme
)
3541 gcc_assert (aligned_access_p (first_dr
));
3542 data_ref
= build_fold_indirect_ref (dataref_ptr
);
3544 case dr_unaligned_supported
:
3546 int mis
= DR_MISALIGNMENT (first_dr
);
3547 tree tmis
= (mis
== -1 ? size_zero_node
: size_int (mis
));
3549 gcc_assert (!aligned_access_p (first_dr
));
3550 tmis
= size_binop (MULT_EXPR
, tmis
, size_int(BITS_PER_UNIT
));
3552 build2 (MISALIGNED_INDIRECT_REF
, vectype
, dataref_ptr
, tmis
);
3555 case dr_unaligned_software_pipeline
:
3556 gcc_assert (!aligned_access_p (first_dr
));
3557 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, dataref_ptr
);
3562 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3563 new_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, vec_dest
,
3565 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3566 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
3567 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
3568 copy_virtual_operands (new_stmt
, stmt
);
3569 mark_symbols_for_renaming (new_stmt
);
3571 /* 3. Handle explicit realignment if necessary/supported. */
3572 if (alignment_support_cheme
== dr_unaligned_software_pipeline
)
3575 <vec_dest = realign_load (msq, lsq, realignment_token)> */
3576 lsq
= GIMPLE_STMT_OPERAND (new_stmt
, 0);
3577 if (!realignment_token
)
3578 realignment_token
= dataref_ptr
;
3579 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3581 build3 (REALIGN_LOAD_EXPR
, vectype
, msq
, lsq
, realignment_token
);
3582 new_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, vec_dest
,
3584 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3585 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
3586 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
3587 if (i
== group_size
- 1 && j
== ncopies
- 1)
3588 add_phi_arg (phi_stmt
, lsq
, loop_latch_edge (loop
));
3592 VEC_quick_push (tree
, dr_chain
, new_temp
);
3593 if (i
< group_size
- 1)
3594 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, bsi
, stmt
);
3599 if (!vect_transform_strided_load (stmt
, dr_chain
, group_size
, bsi
))
3601 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3602 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
3607 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3609 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3610 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3618 /* Function vectorizable_live_operation.
3620 STMT computes a value that is used outside the loop. Check if
3621 it can be supported. */
3624 vectorizable_live_operation (tree stmt
,
3625 block_stmt_iterator
*bsi ATTRIBUTE_UNUSED
,
3626 tree
*vec_stmt ATTRIBUTE_UNUSED
)
3629 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3630 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3632 enum tree_code code
;
3636 enum vect_def_type dt
;
3638 if (!STMT_VINFO_LIVE_P (stmt_info
))
3641 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
3644 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) != SSA_NAME
)
3647 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
3648 code
= TREE_CODE (operation
);
3650 op_type
= TREE_CODE_LENGTH (code
);
3652 /* FORNOW: support only if all uses are invariant. This means
3653 that the scalar operations can remain in place, unvectorized.
3654 The original last scalar value that they compute will be used. */
3656 for (i
= 0; i
< op_type
; i
++)
3658 op
= TREE_OPERAND (operation
, i
);
3659 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
3661 if (vect_print_dump_info (REPORT_DETAILS
))
3662 fprintf (vect_dump
, "use not simple.");
3666 if (dt
!= vect_invariant_def
&& dt
!= vect_constant_def
)
3670 /* No transformation is required for the cases we currently support. */
3675 /* Function vect_is_simple_cond.
3678 LOOP - the loop that is being vectorized.
3679 COND - Condition that is checked for simple use.
3681 Returns whether a COND can be vectorized. Checks whether
3682 condition operands are supportable using vec_is_simple_use. */
3685 vect_is_simple_cond (tree cond
, loop_vec_info loop_vinfo
)
3689 enum vect_def_type dt
;
3691 if (!COMPARISON_CLASS_P (cond
))
3694 lhs
= TREE_OPERAND (cond
, 0);
3695 rhs
= TREE_OPERAND (cond
, 1);
3697 if (TREE_CODE (lhs
) == SSA_NAME
)
3699 tree lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
3700 if (!vect_is_simple_use (lhs
, loop_vinfo
, &lhs_def_stmt
, &def
, &dt
))
3703 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
)
3706 if (TREE_CODE (rhs
) == SSA_NAME
)
3708 tree rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
3709 if (!vect_is_simple_use (rhs
, loop_vinfo
, &rhs_def_stmt
, &def
, &dt
))
3712 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
)
3718 /* vectorizable_condition.
3720 Check if STMT is conditional modify expression that can be vectorized.
3721 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3722 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
3725 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3728 vectorizable_condition (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
3730 tree scalar_dest
= NULL_TREE
;
3731 tree vec_dest
= NULL_TREE
;
3732 tree op
= NULL_TREE
;
3733 tree cond_expr
, then_clause
, else_clause
;
3734 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3735 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3736 tree vec_cond_lhs
, vec_cond_rhs
, vec_then_clause
, vec_else_clause
;
3737 tree vec_compare
, vec_cond_expr
;
3739 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3740 enum machine_mode vec_mode
;
3742 enum vect_def_type dt
;
3743 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3744 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3746 gcc_assert (ncopies
>= 1);
3748 return false; /* FORNOW */
3750 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3753 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
3755 if (STMT_VINFO_LIVE_P (stmt_info
))
3757 /* FORNOW: not yet supported. */
3758 if (vect_print_dump_info (REPORT_DETAILS
))
3759 fprintf (vect_dump
, "value used after loop.");
3763 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
3766 op
= GIMPLE_STMT_OPERAND (stmt
, 1);
3768 if (TREE_CODE (op
) != COND_EXPR
)
3771 cond_expr
= TREE_OPERAND (op
, 0);
3772 then_clause
= TREE_OPERAND (op
, 1);
3773 else_clause
= TREE_OPERAND (op
, 2);
3775 if (!vect_is_simple_cond (cond_expr
, loop_vinfo
))
3778 /* We do not handle two different vector types for the condition
3780 if (TREE_TYPE (TREE_OPERAND (cond_expr
, 0)) != TREE_TYPE (vectype
))
3783 if (TREE_CODE (then_clause
) == SSA_NAME
)
3785 tree then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
3786 if (!vect_is_simple_use (then_clause
, loop_vinfo
,
3787 &then_def_stmt
, &def
, &dt
))
3790 else if (TREE_CODE (then_clause
) != INTEGER_CST
3791 && TREE_CODE (then_clause
) != REAL_CST
)
3794 if (TREE_CODE (else_clause
) == SSA_NAME
)
3796 tree else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
3797 if (!vect_is_simple_use (else_clause
, loop_vinfo
,
3798 &else_def_stmt
, &def
, &dt
))
3801 else if (TREE_CODE (else_clause
) != INTEGER_CST
3802 && TREE_CODE (else_clause
) != REAL_CST
)
3806 vec_mode
= TYPE_MODE (vectype
);
3810 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
3811 return expand_vec_cond_expr_p (op
, vec_mode
);
3817 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
3818 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3820 /* Handle cond expr. */
3822 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0), stmt
, NULL
);
3824 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1), stmt
, NULL
);
3825 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
, stmt
, NULL
);
3826 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
, stmt
, NULL
);
3828 /* Arguments are ready. create the new vector stmt. */
3829 vec_compare
= build2 (TREE_CODE (cond_expr
), vectype
,
3830 vec_cond_lhs
, vec_cond_rhs
);
3831 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
3832 vec_compare
, vec_then_clause
, vec_else_clause
);
3834 *vec_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, vec_dest
,
3836 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
3837 GIMPLE_STMT_OPERAND (*vec_stmt
, 0) = new_temp
;
3838 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
3843 /* Function vect_transform_stmt.
3845 Create a vectorized stmt to replace STMT, and insert it at BSI. */
3848 vect_transform_stmt (tree stmt
, block_stmt_iterator
*bsi
, bool *strided_store
)
3850 bool is_store
= false;
3851 tree vec_stmt
= NULL_TREE
;
3852 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3853 tree orig_stmt_in_pattern
;
3856 if (STMT_VINFO_RELEVANT_P (stmt_info
))
3858 switch (STMT_VINFO_TYPE (stmt_info
))
3860 case type_demotion_vec_info_type
:
3861 done
= vectorizable_type_demotion (stmt
, bsi
, &vec_stmt
);
3865 case type_promotion_vec_info_type
:
3866 done
= vectorizable_type_promotion (stmt
, bsi
, &vec_stmt
);
3870 case op_vec_info_type
:
3871 done
= vectorizable_operation (stmt
, bsi
, &vec_stmt
);
3875 case assignment_vec_info_type
:
3876 done
= vectorizable_assignment (stmt
, bsi
, &vec_stmt
);
3880 case load_vec_info_type
:
3881 done
= vectorizable_load (stmt
, bsi
, &vec_stmt
);
3885 case store_vec_info_type
:
3886 done
= vectorizable_store (stmt
, bsi
, &vec_stmt
);
3888 if (DR_GROUP_FIRST_DR (stmt_info
))
3890 /* In case of interleaving, the whole chain is vectorized when the
3891 last store in the chain is reached. Store stmts before the last
3892 one are skipped, and there vec_stmt_info shouldn't be freed
3894 *strided_store
= true;
3895 if (STMT_VINFO_VEC_STMT (stmt_info
))
3902 case condition_vec_info_type
:
3903 done
= vectorizable_condition (stmt
, bsi
, &vec_stmt
);
3907 case call_vec_info_type
:
3908 done
= vectorizable_call (stmt
, bsi
, &vec_stmt
);
3912 if (vect_print_dump_info (REPORT_DETAILS
))
3913 fprintf (vect_dump
, "stmt not supported.");
3917 gcc_assert (vec_stmt
|| *strided_store
);
3920 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
3921 orig_stmt_in_pattern
= STMT_VINFO_RELATED_STMT (stmt_info
);
3922 if (orig_stmt_in_pattern
)
3924 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt_in_pattern
);
3925 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
3927 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
3929 /* STMT was inserted by the vectorizer to replace a
3930 computation idiom. ORIG_STMT_IN_PATTERN is a stmt in the
3931 original sequence that computed this idiom. We need to
3932 record a pointer to VEC_STMT in the stmt_info of
3933 ORIG_STMT_IN_PATTERN. See more details in the
3934 documentation of vect_pattern_recog. */
3936 STMT_VINFO_VEC_STMT (stmt_vinfo
) = vec_stmt
;
3942 if (STMT_VINFO_LIVE_P (stmt_info
))
3944 switch (STMT_VINFO_TYPE (stmt_info
))
3946 case reduc_vec_info_type
:
3947 done
= vectorizable_reduction (stmt
, bsi
, &vec_stmt
);
3952 done
= vectorizable_live_operation (stmt
, bsi
, &vec_stmt
);
3961 /* This function builds ni_name = number of iterations loop executes
3962 on the loop preheader. */
3965 vect_build_loop_niters (loop_vec_info loop_vinfo
)
3967 tree ni_name
, stmt
, var
;
3969 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3970 tree ni
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
3972 var
= create_tmp_var (TREE_TYPE (ni
), "niters");
3973 add_referenced_var (var
);
3974 ni_name
= force_gimple_operand (ni
, &stmt
, false, var
);
3976 pe
= loop_preheader_edge (loop
);
3979 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
3980 gcc_assert (!new_bb
);
3987 /* This function generates the following statements:
3989 ni_name = number of iterations loop executes
3990 ratio = ni_name / vf
3991 ratio_mult_vf_name = ratio * vf
3993 and places them at the loop preheader edge. */
3996 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo
,
3998 tree
*ratio_mult_vf_name_ptr
,
3999 tree
*ratio_name_ptr
)
4007 tree ratio_mult_vf_name
;
4008 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4009 tree ni
= LOOP_VINFO_NITERS (loop_vinfo
);
4010 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4013 pe
= loop_preheader_edge (loop
);
4015 /* Generate temporary variable that contains
4016 number of iterations loop executes. */
4018 ni_name
= vect_build_loop_niters (loop_vinfo
);
4019 log_vf
= build_int_cst (TREE_TYPE (ni
), exact_log2 (vf
));
4021 /* Create: ratio = ni >> log2(vf) */
4023 ratio_name
= fold_build2 (RSHIFT_EXPR
, TREE_TYPE (ni_name
), ni_name
, log_vf
);
4024 if (!is_gimple_val (ratio_name
))
4026 var
= create_tmp_var (TREE_TYPE (ni
), "bnd");
4027 add_referenced_var (var
);
4029 ratio_name
= force_gimple_operand (ratio_name
, &stmt
, true, var
);
4030 pe
= loop_preheader_edge (loop
);
4031 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
4032 gcc_assert (!new_bb
);
4035 /* Create: ratio_mult_vf = ratio << log2 (vf). */
4037 ratio_mult_vf_name
= fold_build2 (LSHIFT_EXPR
, TREE_TYPE (ratio_name
),
4038 ratio_name
, log_vf
);
4039 if (!is_gimple_val (ratio_mult_vf_name
))
4041 var
= create_tmp_var (TREE_TYPE (ni
), "ratio_mult_vf");
4042 add_referenced_var (var
);
4044 ratio_mult_vf_name
= force_gimple_operand (ratio_mult_vf_name
, &stmt
,
4046 pe
= loop_preheader_edge (loop
);
4047 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
4048 gcc_assert (!new_bb
);
4051 *ni_name_ptr
= ni_name
;
4052 *ratio_mult_vf_name_ptr
= ratio_mult_vf_name
;
4053 *ratio_name_ptr
= ratio_name
;
4059 /* Function update_vuses_to_preheader.
4062 STMT - a statement with potential VUSEs.
4063 LOOP - the loop whose preheader will contain STMT.
4065 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
4066 appears to be defined in a VDEF in another statement in a loop.
4067 One such case is when the VUSE is at the dereference of a __restricted__
4068 pointer in a load and the VDEF is at the dereference of a different
4069 __restricted__ pointer in a store. Vectorization may result in
4070 copy_virtual_uses being called to copy the problematic VUSE to a new
4071 statement that is being inserted in the loop preheader. This procedure
4072 is called to change the SSA_NAME in the new statement's VUSE from the
4073 SSA_NAME updated in the loop to the related SSA_NAME available on the
4074 path entering the loop.
4076 When this function is called, we have the following situation:
4081 # name1 = phi < name0 , name2>
4086 # name2 = vdef <name1>
4091 Stmt S1 was created in the loop preheader block as part of misaligned-load
4092 handling. This function fixes the name of the vuse of S1 from 'name1' to
4096 update_vuses_to_preheader (tree stmt
, struct loop
*loop
)
4098 basic_block header_bb
= loop
->header
;
4099 edge preheader_e
= loop_preheader_edge (loop
);
4101 use_operand_p use_p
;
4103 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_VUSE
)
4105 tree ssa_name
= USE_FROM_PTR (use_p
);
4106 tree def_stmt
= SSA_NAME_DEF_STMT (ssa_name
);
4107 tree name_var
= SSA_NAME_VAR (ssa_name
);
4108 basic_block bb
= bb_for_stmt (def_stmt
);
4110 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
4111 if (!IS_EMPTY_STMT (def_stmt
)
4112 && flow_bb_inside_loop_p (loop
, bb
))
4114 /* If the block containing the statement defining the SSA_NAME
4115 is in the loop then it's necessary to find the definition
4116 outside the loop using the PHI nodes of the header. */
4118 bool updated
= false;
4120 for (phi
= phi_nodes (header_bb
); phi
; phi
= TREE_CHAIN (phi
))
4122 if (SSA_NAME_VAR (PHI_RESULT (phi
)) == name_var
)
4124 SET_USE (use_p
, PHI_ARG_DEF (phi
, preheader_e
->dest_idx
));
4129 gcc_assert (updated
);
4135 /* Function vect_update_ivs_after_vectorizer.
4137 "Advance" the induction variables of LOOP to the value they should take
4138 after the execution of LOOP. This is currently necessary because the
4139 vectorizer does not handle induction variables that are used after the
4140 loop. Such a situation occurs when the last iterations of LOOP are
4142 1. We introduced new uses after LOOP for IVs that were not originally used
4143 after LOOP: the IVs of LOOP are now used by an epilog loop.
4144 2. LOOP is going to be vectorized; this means that it will iterate N/VF
4145 times, whereas the loop IVs should be bumped N times.
4148 - LOOP - a loop that is going to be vectorized. The last few iterations
4149 of LOOP were peeled.
4150 - NITERS - the number of iterations that LOOP executes (before it is
4151 vectorized). i.e, the number of times the ivs should be bumped.
4152 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
4153 coming out from LOOP on which there are uses of the LOOP ivs
4154 (this is the path from LOOP->exit to epilog_loop->preheader).
4156 The new definitions of the ivs are placed in LOOP->exit.
4157 The phi args associated with the edge UPDATE_E in the bb
4158 UPDATE_E->dest are updated accordingly.
4160 Assumption 1: Like the rest of the vectorizer, this function assumes
4161 a single loop exit that has a single predecessor.
4163 Assumption 2: The phi nodes in the LOOP header and in update_bb are
4164 organized in the same order.
4166 Assumption 3: The access function of the ivs is simple enough (see
4167 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
4169 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
4170 coming out of LOOP on which the ivs of LOOP are used (this is the path
4171 that leads to the epilog loop; other paths skip the epilog loop). This
4172 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
4173 needs to have its phis updated.
4177 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo
, tree niters
,
4180 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4181 basic_block exit_bb
= single_exit (loop
)->dest
;
4183 basic_block update_bb
= update_e
->dest
;
4185 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
4187 /* Make sure there exists a single-predecessor exit bb: */
4188 gcc_assert (single_pred_p (exit_bb
));
4190 for (phi
= phi_nodes (loop
->header
), phi1
= phi_nodes (update_bb
);
4192 phi
= PHI_CHAIN (phi
), phi1
= PHI_CHAIN (phi1
))
4194 tree access_fn
= NULL
;
4195 tree evolution_part
;
4198 tree var
, stmt
, ni
, ni_name
;
4199 block_stmt_iterator last_bsi
;
4201 if (vect_print_dump_info (REPORT_DETAILS
))
4203 fprintf (vect_dump
, "vect_update_ivs_after_vectorizer: phi: ");
4204 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
4207 /* Skip virtual phi's. */
4208 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
4210 if (vect_print_dump_info (REPORT_DETAILS
))
4211 fprintf (vect_dump
, "virtual phi. skip.");
4215 /* Skip reduction phis. */
4216 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
4218 if (vect_print_dump_info (REPORT_DETAILS
))
4219 fprintf (vect_dump
, "reduc phi. skip.");
4223 access_fn
= analyze_scalar_evolution (loop
, PHI_RESULT (phi
));
4224 gcc_assert (access_fn
);
4226 unshare_expr (evolution_part_in_loop_num (access_fn
, loop
->num
));
4227 gcc_assert (evolution_part
!= NULL_TREE
);
4229 /* FORNOW: We do not support IVs whose evolution function is a polynomial
4230 of degree >= 2 or exponential. */
4231 gcc_assert (!tree_is_chrec (evolution_part
));
4233 step_expr
= evolution_part
;
4234 init_expr
= unshare_expr (initial_condition_in_loop_num (access_fn
,
4237 ni
= fold_build2 (PLUS_EXPR
, TREE_TYPE (init_expr
),
4238 fold_build2 (MULT_EXPR
, TREE_TYPE (init_expr
),
4239 fold_convert (TREE_TYPE (init_expr
),
4244 var
= create_tmp_var (TREE_TYPE (init_expr
), "tmp");
4245 add_referenced_var (var
);
4247 ni_name
= force_gimple_operand (ni
, &stmt
, false, var
);
4249 /* Insert stmt into exit_bb. */
4250 last_bsi
= bsi_last (exit_bb
);
4252 bsi_insert_before (&last_bsi
, stmt
, BSI_SAME_STMT
);
4254 /* Fix phi expressions in the successor bb. */
4255 SET_PHI_ARG_DEF (phi1
, update_e
->dest_idx
, ni_name
);
4260 /* Function vect_do_peeling_for_loop_bound
4262 Peel the last iterations of the loop represented by LOOP_VINFO.
4263 The peeled iterations form a new epilog loop. Given that the loop now
4264 iterates NITERS times, the new epilog loop iterates
4265 NITERS % VECTORIZATION_FACTOR times.
4267 The original loop will later be made to iterate
4268 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
4271 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo
, tree
*ratio
)
4273 tree ni_name
, ratio_mult_vf_name
;
4274 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4275 struct loop
*new_loop
;
4277 basic_block preheader
;
4280 if (vect_print_dump_info (REPORT_DETAILS
))
4281 fprintf (vect_dump
, "=== vect_do_peeling_for_loop_bound ===");
4283 initialize_original_copy_tables ();
4285 /* Generate the following variables on the preheader of original loop:
4287 ni_name = number of iteration the original loop executes
4288 ratio = ni_name / vf
4289 ratio_mult_vf_name = ratio * vf */
4290 vect_generate_tmps_on_preheader (loop_vinfo
, &ni_name
,
4291 &ratio_mult_vf_name
, ratio
);
4293 loop_num
= loop
->num
;
4294 new_loop
= slpeel_tree_peel_loop_to_edge (loop
, single_exit (loop
),
4295 ratio_mult_vf_name
, ni_name
, false);
4296 gcc_assert (new_loop
);
4297 gcc_assert (loop_num
== loop
->num
);
4298 #ifdef ENABLE_CHECKING
4299 slpeel_verify_cfg_after_peeling (loop
, new_loop
);
4302 /* A guard that controls whether the new_loop is to be executed or skipped
4303 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
4304 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
4305 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
4306 is on the path where the LOOP IVs are used and need to be updated. */
4308 preheader
= loop_preheader_edge (new_loop
)->src
;
4309 if (EDGE_PRED (preheader
, 0)->src
== single_exit (loop
)->dest
)
4310 update_e
= EDGE_PRED (preheader
, 0);
4312 update_e
= EDGE_PRED (preheader
, 1);
4314 /* Update IVs of original loop as if they were advanced
4315 by ratio_mult_vf_name steps. */
4316 vect_update_ivs_after_vectorizer (loop_vinfo
, ratio_mult_vf_name
, update_e
);
4318 /* After peeling we have to reset scalar evolution analyzer. */
4321 free_original_copy_tables ();
4325 /* Function vect_gen_niters_for_prolog_loop
4327 Set the number of iterations for the loop represented by LOOP_VINFO
4328 to the minimum between LOOP_NITERS (the original iteration count of the loop)
4329 and the misalignment of DR - the data reference recorded in
4330 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
4331 this loop, the data reference DR will refer to an aligned location.
4333 The following computation is generated:
4335 If the misalignment of DR is known at compile time:
4336 addr_mis = int mis = DR_MISALIGNMENT (dr);
4337 Else, compute address misalignment in bytes:
4338 addr_mis = addr & (vectype_size - 1)
4340 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
4342 (elem_size = element type size; an element is the scalar element
4343 whose type is the inner type of the vectype)
4347 prolog_niters = min ( LOOP_NITERS ,
4348 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
4349 where group_size is the size of the interleaved group.
4353 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo
, tree loop_niters
)
4355 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
4356 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4357 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4359 tree iters
, iters_name
;
4362 tree dr_stmt
= DR_STMT (dr
);
4363 stmt_vec_info stmt_info
= vinfo_for_stmt (dr_stmt
);
4364 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4365 int vectype_align
= TYPE_ALIGN (vectype
) / BITS_PER_UNIT
;
4366 tree niters_type
= TREE_TYPE (loop_niters
);
4368 int element_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
4370 if (DR_GROUP_FIRST_DR (stmt_info
))
4372 /* For interleaved access element size must be multiplied by the size of
4373 the interleaved group. */
4374 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (
4375 DR_GROUP_FIRST_DR (stmt_info
)));
4376 element_size
*= group_size
;
4379 pe
= loop_preheader_edge (loop
);
4381 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) > 0)
4383 int byte_misalign
= LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
);
4384 int elem_misalign
= byte_misalign
/ element_size
;
4386 if (vect_print_dump_info (REPORT_DETAILS
))
4387 fprintf (vect_dump
, "known alignment = %d.", byte_misalign
);
4388 iters
= build_int_cst (niters_type
,
4389 (vf
- elem_misalign
)&(vf
/group_size
-1));
4393 tree new_stmts
= NULL_TREE
;
4395 vect_create_addr_base_for_vector_ref (dr_stmt
, &new_stmts
, NULL_TREE
);
4396 tree ptr_type
= TREE_TYPE (start_addr
);
4397 tree size
= TYPE_SIZE (ptr_type
);
4398 tree type
= lang_hooks
.types
.type_for_size (tree_low_cst (size
, 1), 1);
4399 tree vectype_size_minus_1
= build_int_cst (type
, vectype_align
- 1);
4400 tree elem_size_log
=
4401 build_int_cst (type
, exact_log2 (vectype_align
/vf
));
4402 tree vf_minus_1
= build_int_cst (type
, vf
- 1);
4403 tree vf_tree
= build_int_cst (type
, vf
);
4407 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmts
);
4408 gcc_assert (!new_bb
);
4410 /* Create: byte_misalign = addr & (vectype_size - 1) */
4412 fold_build2 (BIT_AND_EXPR
, type
, start_addr
, vectype_size_minus_1
);
4414 /* Create: elem_misalign = byte_misalign / element_size */
4416 fold_build2 (RSHIFT_EXPR
, type
, byte_misalign
, elem_size_log
);
4418 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
4419 iters
= fold_build2 (MINUS_EXPR
, type
, vf_tree
, elem_misalign
);
4420 iters
= fold_build2 (BIT_AND_EXPR
, type
, iters
, vf_minus_1
);
4421 iters
= fold_convert (niters_type
, iters
);
4424 /* Create: prolog_loop_niters = min (iters, loop_niters) */
4425 /* If the loop bound is known at compile time we already verified that it is
4426 greater than vf; since the misalignment ('iters') is at most vf, there's
4427 no need to generate the MIN_EXPR in this case. */
4428 if (TREE_CODE (loop_niters
) != INTEGER_CST
)
4429 iters
= fold_build2 (MIN_EXPR
, niters_type
, iters
, loop_niters
);
4431 if (vect_print_dump_info (REPORT_DETAILS
))
4433 fprintf (vect_dump
, "niters for prolog loop: ");
4434 print_generic_expr (vect_dump
, iters
, TDF_SLIM
);
4437 var
= create_tmp_var (niters_type
, "prolog_loop_niters");
4438 add_referenced_var (var
);
4439 iters_name
= force_gimple_operand (iters
, &stmt
, false, var
);
4441 /* Insert stmt on loop preheader edge. */
4444 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
4445 gcc_assert (!new_bb
);
4452 /* Function vect_update_init_of_dr
4454 NITERS iterations were peeled from LOOP. DR represents a data reference
4455 in LOOP. This function updates the information recorded in DR to
4456 account for the fact that the first NITERS iterations had already been
4457 executed. Specifically, it updates the OFFSET field of DR. */
4460 vect_update_init_of_dr (struct data_reference
*dr
, tree niters
)
4462 tree offset
= DR_OFFSET (dr
);
4464 niters
= fold_build2 (MULT_EXPR
, TREE_TYPE (niters
), niters
, DR_STEP (dr
));
4465 offset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
, niters
);
4466 DR_OFFSET (dr
) = offset
;
4470 /* Function vect_update_inits_of_drs
4472 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
4473 This function updates the information recorded for the data references in
4474 the loop to account for the fact that the first NITERS iterations had
4475 already been executed. Specifically, it updates the initial_condition of the
4476 access_function of all the data_references in the loop. */
4479 vect_update_inits_of_drs (loop_vec_info loop_vinfo
, tree niters
)
4482 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
4483 struct data_reference
*dr
;
4485 if (vect_dump
&& (dump_flags
& TDF_DETAILS
))
4486 fprintf (vect_dump
, "=== vect_update_inits_of_dr ===");
4488 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
4489 vect_update_init_of_dr (dr
, niters
);
4493 /* Function vect_do_peeling_for_alignment
4495 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
4496 'niters' is set to the misalignment of one of the data references in the
4497 loop, thereby forcing it to refer to an aligned location at the beginning
4498 of the execution of this loop. The data reference for which we are
4499 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
4502 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo
)
4504 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4505 tree niters_of_prolog_loop
, ni_name
;
4507 struct loop
*new_loop
;
4509 if (vect_print_dump_info (REPORT_DETAILS
))
4510 fprintf (vect_dump
, "=== vect_do_peeling_for_alignment ===");
4512 initialize_original_copy_tables ();
4514 ni_name
= vect_build_loop_niters (loop_vinfo
);
4515 niters_of_prolog_loop
= vect_gen_niters_for_prolog_loop (loop_vinfo
, ni_name
);
4517 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
4519 slpeel_tree_peel_loop_to_edge (loop
, loop_preheader_edge (loop
),
4520 niters_of_prolog_loop
, ni_name
, true);
4521 gcc_assert (new_loop
);
4522 #ifdef ENABLE_CHECKING
4523 slpeel_verify_cfg_after_peeling (new_loop
, loop
);
4526 /* Update number of times loop executes. */
4527 n_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
4528 LOOP_VINFO_NITERS (loop_vinfo
) = fold_build2 (MINUS_EXPR
,
4529 TREE_TYPE (n_iters
), n_iters
, niters_of_prolog_loop
);
4531 /* Update the init conditions of the access functions of all data refs. */
4532 vect_update_inits_of_drs (loop_vinfo
, niters_of_prolog_loop
);
4534 /* After peeling we have to reset scalar evolution analyzer. */
4537 free_original_copy_tables ();
4541 /* Function vect_create_cond_for_align_checks.
4543 Create a conditional expression that represents the alignment checks for
4544 all of data references (array element references) whose alignment must be
4548 LOOP_VINFO - two fields of the loop information are used.
4549 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
4550 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
4553 COND_EXPR_STMT_LIST - statements needed to construct the conditional
4555 The returned value is the conditional expression to be used in the if
4556 statement that controls which version of the loop gets executed at runtime.
4558 The algorithm makes two assumptions:
4559 1) The number of bytes "n" in a vector is a power of 2.
4560 2) An address "a" is aligned if a%n is zero and that this
4561 test can be done as a&(n-1) == 0. For example, for 16
4562 byte vectors the test is a&0xf == 0. */
4565 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo
,
4566 tree
*cond_expr_stmt_list
)
4568 VEC(tree
,heap
) *may_misalign_stmts
4569 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
4571 int mask
= LOOP_VINFO_PTR_MASK (loop_vinfo
);
4575 tree int_ptrsize_type
;
4577 tree or_tmp_name
= NULL_TREE
;
4578 tree and_tmp
, and_tmp_name
, and_stmt
;
4581 /* Check that mask is one less than a power of 2, i.e., mask is
4582 all zeros followed by all ones. */
4583 gcc_assert ((mask
!= 0) && ((mask
& (mask
+1)) == 0));
4585 /* CHECKME: what is the best integer or unsigned type to use to hold a
4586 cast from a pointer value? */
4587 psize
= TYPE_SIZE (ptr_type_node
);
4589 = lang_hooks
.types
.type_for_size (tree_low_cst (psize
, 1), 0);
4591 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
4592 of the first vector of the i'th data reference. */
4594 for (i
= 0; VEC_iterate (tree
, may_misalign_stmts
, i
, ref_stmt
); i
++)
4596 tree new_stmt_list
= NULL_TREE
;
4598 tree addr_tmp
, addr_tmp_name
, addr_stmt
;
4599 tree or_tmp
, new_or_tmp_name
, or_stmt
;
4601 /* create: addr_tmp = (int)(address_of_first_vector) */
4602 addr_base
= vect_create_addr_base_for_vector_ref (ref_stmt
,
4606 if (new_stmt_list
!= NULL_TREE
)
4607 append_to_statement_list_force (new_stmt_list
, cond_expr_stmt_list
);
4609 sprintf (tmp_name
, "%s%d", "addr2int", i
);
4610 addr_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
4611 add_referenced_var (addr_tmp
);
4612 addr_tmp_name
= make_ssa_name (addr_tmp
, NULL_TREE
);
4613 addr_stmt
= fold_convert (int_ptrsize_type
, addr_base
);
4614 addr_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
,
4615 addr_tmp_name
, addr_stmt
);
4616 SSA_NAME_DEF_STMT (addr_tmp_name
) = addr_stmt
;
4617 append_to_statement_list_force (addr_stmt
, cond_expr_stmt_list
);
4619 /* The addresses are OR together. */
4621 if (or_tmp_name
!= NULL_TREE
)
4623 /* create: or_tmp = or_tmp | addr_tmp */
4624 sprintf (tmp_name
, "%s%d", "orptrs", i
);
4625 or_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
4626 add_referenced_var (or_tmp
);
4627 new_or_tmp_name
= make_ssa_name (or_tmp
, NULL_TREE
);
4628 or_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
,
4630 build2 (BIT_IOR_EXPR
, int_ptrsize_type
,
4633 SSA_NAME_DEF_STMT (new_or_tmp_name
) = or_stmt
;
4634 append_to_statement_list_force (or_stmt
, cond_expr_stmt_list
);
4635 or_tmp_name
= new_or_tmp_name
;
4638 or_tmp_name
= addr_tmp_name
;
4642 mask_cst
= build_int_cst (int_ptrsize_type
, mask
);
4644 /* create: and_tmp = or_tmp & mask */
4645 and_tmp
= create_tmp_var (int_ptrsize_type
, "andmask" );
4646 add_referenced_var (and_tmp
);
4647 and_tmp_name
= make_ssa_name (and_tmp
, NULL_TREE
);
4649 and_stmt
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
,
4651 build2 (BIT_AND_EXPR
, int_ptrsize_type
,
4652 or_tmp_name
, mask_cst
));
4653 SSA_NAME_DEF_STMT (and_tmp_name
) = and_stmt
;
4654 append_to_statement_list_force (and_stmt
, cond_expr_stmt_list
);
4656 /* Make and_tmp the left operand of the conditional test against zero.
4657 if and_tmp has a nonzero bit then some address is unaligned. */
4658 ptrsize_zero
= build_int_cst (int_ptrsize_type
, 0);
4659 return build2 (EQ_EXPR
, boolean_type_node
,
4660 and_tmp_name
, ptrsize_zero
);
4664 /* Function vect_transform_loop.
4666 The analysis phase has determined that the loop is vectorizable.
4667 Vectorize the loop - created vectorized stmts to replace the scalar
4668 stmts in the loop, and update the loop exit condition. */
4671 vect_transform_loop (loop_vec_info loop_vinfo
)
4673 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4674 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
4675 int nbbs
= loop
->num_nodes
;
4676 block_stmt_iterator si
;
4679 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4682 if (vect_print_dump_info (REPORT_DETAILS
))
4683 fprintf (vect_dump
, "=== vec_transform_loop ===");
4685 /* If the loop has data references that may or may not be aligned then
4686 two versions of the loop need to be generated, one which is vectorized
4687 and one which isn't. A test is then generated to control which of the
4688 loops is executed. The test checks for the alignment of all of the
4689 data references that may or may not be aligned. */
4691 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)))
4695 tree cond_expr_stmt_list
= NULL_TREE
;
4696 basic_block condition_bb
;
4697 block_stmt_iterator cond_exp_bsi
;
4698 basic_block merge_bb
;
4699 basic_block new_exit_bb
;
4701 tree orig_phi
, new_phi
, arg
;
4703 cond_expr
= vect_create_cond_for_align_checks (loop_vinfo
,
4704 &cond_expr_stmt_list
);
4705 initialize_original_copy_tables ();
4706 nloop
= loop_version (loop
, cond_expr
, &condition_bb
, true);
4707 free_original_copy_tables();
4709 /** Loop versioning violates an assumption we try to maintain during
4710 vectorization - that the loop exit block has a single predecessor.
4711 After versioning, the exit block of both loop versions is the same
4712 basic block (i.e. it has two predecessors). Just in order to simplify
4713 following transformations in the vectorizer, we fix this situation
4714 here by adding a new (empty) block on the exit-edge of the loop,
4715 with the proper loop-exit phis to maintain loop-closed-form. **/
4717 merge_bb
= single_exit (loop
)->dest
;
4718 gcc_assert (EDGE_COUNT (merge_bb
->preds
) == 2);
4719 new_exit_bb
= split_edge (single_exit (loop
));
4720 new_exit_e
= single_exit (loop
);
4721 e
= EDGE_SUCC (new_exit_bb
, 0);
4723 for (orig_phi
= phi_nodes (merge_bb
); orig_phi
;
4724 orig_phi
= PHI_CHAIN (orig_phi
))
4726 new_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi
)),
4728 arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, e
);
4729 add_phi_arg (new_phi
, arg
, new_exit_e
);
4730 SET_PHI_ARG_DEF (orig_phi
, e
->dest_idx
, PHI_RESULT (new_phi
));
4733 /** end loop-exit-fixes after versioning **/
4735 update_ssa (TODO_update_ssa
);
4736 cond_exp_bsi
= bsi_last (condition_bb
);
4737 bsi_insert_before (&cond_exp_bsi
, cond_expr_stmt_list
, BSI_SAME_STMT
);
4740 /* CHECKME: we wouldn't need this if we called update_ssa once
4742 bitmap_zero (vect_memsyms_to_rename
);
4744 /* Peel the loop if there are data refs with unknown alignment.
4745 Only one data ref with unknown store is allowed. */
4747 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
4748 vect_do_peeling_for_alignment (loop_vinfo
);
4750 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
4751 compile time constant), or it is a constant that doesn't divide by the
4752 vectorization factor, then an epilog loop needs to be created.
4753 We therefore duplicate the loop: the original loop will be vectorized,
4754 and will compute the first (n/VF) iterations. The second copy of the loop
4755 will remain scalar and will compute the remaining (n%VF) iterations.
4756 (VF is the vectorization factor). */
4758 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
4759 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
4760 && LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0))
4761 vect_do_peeling_for_loop_bound (loop_vinfo
, &ratio
);
4763 ratio
= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
)),
4764 LOOP_VINFO_INT_NITERS (loop_vinfo
) / vectorization_factor
);
4766 /* 1) Make sure the loop header has exactly two entries
4767 2) Make sure we have a preheader basic block. */
4769 gcc_assert (EDGE_COUNT (loop
->header
->preds
) == 2);
4771 split_edge (loop_preheader_edge (loop
));
4773 /* FORNOW: the vectorizer supports only loops which body consist
4774 of one basic block (header + empty latch). When the vectorizer will
4775 support more involved loop forms, the order by which the BBs are
4776 traversed need to be reconsidered. */
4778 for (i
= 0; i
< nbbs
; i
++)
4780 basic_block bb
= bbs
[i
];
4782 for (si
= bsi_start (bb
); !bsi_end_p (si
);)
4784 tree stmt
= bsi_stmt (si
);
4785 stmt_vec_info stmt_info
;
4788 if (vect_print_dump_info (REPORT_DETAILS
))
4790 fprintf (vect_dump
, "------>vectorizing statement: ");
4791 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
4793 stmt_info
= vinfo_for_stmt (stmt
);
4794 gcc_assert (stmt_info
);
4795 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
4796 && !STMT_VINFO_LIVE_P (stmt_info
))
4802 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
))
4803 != (unsigned HOST_WIDE_INT
) vectorization_factor
)
4804 && vect_print_dump_info (REPORT_DETAILS
))
4805 fprintf (vect_dump
, "multiple-types.");
4807 /* -------- vectorize statement ------------ */
4808 if (vect_print_dump_info (REPORT_DETAILS
))
4809 fprintf (vect_dump
, "transform statement.");
4811 strided_store
= false;
4812 is_store
= vect_transform_stmt (stmt
, &si
, &strided_store
);
4816 if (DR_GROUP_FIRST_DR (stmt_info
))
4818 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
4819 interleaving chain was completed - free all the stores in
4821 tree next
= DR_GROUP_FIRST_DR (stmt_info
);
4823 stmt_vec_info next_stmt_info
;
4827 next_stmt_info
= vinfo_for_stmt (next
);
4828 /* Free the attached stmt_vec_info and remove the stmt. */
4829 ann
= stmt_ann (next
);
4830 tmp
= DR_GROUP_NEXT_DR (next_stmt_info
);
4831 free (next_stmt_info
);
4832 set_stmt_info (ann
, NULL
);
4835 bsi_remove (&si
, true);
4840 /* Free the attached stmt_vec_info and remove the stmt. */
4841 ann
= stmt_ann (stmt
);
4843 set_stmt_info (ann
, NULL
);
4844 bsi_remove (&si
, true);
4852 /* This is case of skipped interleaved store. We don't free
4853 its stmt_vec_info. */
4854 bsi_remove (&si
, true);
4862 slpeel_make_loop_iterate_ntimes (loop
, ratio
);
4864 mark_set_for_renaming (vect_memsyms_to_rename
);
4866 /* The memory tags and pointers in vectorized statements need to
4867 have their SSA forms updated. FIXME, why can't this be delayed
4868 until all the loops have been transformed? */
4869 update_ssa (TODO_update_ssa
);
4871 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS
))
4872 fprintf (vect_dump
, "LOOP VECTORIZED.");