1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt
50 (tree
, block_stmt_iterator
*, block_stmt_iterator
*, bool *);
51 static tree
vect_create_destination_var (tree
, tree
);
52 static tree vect_create_data_ref_ptr
53 (tree
, block_stmt_iterator
*, tree
, tree
*, tree
*, bool);
54 static tree
vect_create_addr_base_for_vector_ref (tree
, tree
*, tree
);
55 static tree
vect_setup_realignment (tree
, block_stmt_iterator
*, tree
*);
56 static tree
bump_vector_ptr (tree
, tree
, block_stmt_iterator
*, tree
);
57 static tree
vect_get_new_vect_var (tree
, enum vect_var_kind
, const char *);
58 static tree
vect_get_vec_def_for_operand (tree
, tree
, tree
*);
59 static tree
vect_get_epilog_def_for_operand (tree
, tree
, block_stmt_iterator
*);
60 static tree
vect_get_vec_def_for_stmt_copy (enum vect_def_type
, tree
);
61 static tree
vect_init_vector (tree
, tree
);
62 static void vect_finish_stmt_generation
63 (tree stmt
, tree vec_stmt
, block_stmt_iterator
*bsi
);
64 static void update_vuses_to_preheader (tree
, struct loop
*);
65 static bool vect_is_simple_cond (tree
, loop_vec_info
);
66 static void vect_create_epilog_for_reduction (tree
, tree
, enum tree_code
, tree
);
67 static tree
get_initial_def_for_reduction (tree
, tree
, tree
*);
68 static tree vect_gen_widened_results_half
69 (enum tree_code
, tree
, tree
, tree
, tree
, int, tree
, block_stmt_iterator
*, tree
);
70 static bool vect_permute_store_chain (VEC(tree
,heap
) *, unsigned int, tree
,
71 block_stmt_iterator
*, VEC(tree
,heap
) **);
72 static bool vect_transform_strided_load (tree
, VEC(tree
,heap
) *, int,
73 block_stmt_iterator
*);
74 static bool vect_permute_load_chain (VEC(tree
,heap
) *, unsigned int, tree
,
75 block_stmt_iterator
*,
77 static bool vect_permute_store_chain (VEC(tree
,heap
) *, unsigned int, tree
,
78 block_stmt_iterator
*,
80 static bool vect_strided_load_supported (tree
);
81 static bool vect_strided_store_supported (tree
);
83 /* Utility function dealing with loop peeling (not peeling itself). */
84 static void vect_generate_tmps_on_preheader
85 (loop_vec_info
, tree
*, tree
*, tree
*);
86 static tree
vect_build_loop_niters (loop_vec_info
);
87 static void vect_update_ivs_after_vectorizer (loop_vec_info
, tree
, edge
);
88 static tree
vect_gen_niters_for_prolog_loop (loop_vec_info
, tree
);
89 static tree
advance_iv_by_n (struct loop
*, tree
, tree
, block_stmt_iterator
*);
90 static void vect_update_init_of_dr (struct data_reference
*, tree niters
);
91 static void vect_update_inits_of_drs (loop_vec_info
, tree
);
92 static void vect_do_peeling_for_alignment (loop_vec_info
, struct loops
*);
93 static void vect_do_peeling_for_loop_bound
94 (loop_vec_info
, tree
*, struct loops
*);
95 static int vect_min_worthwhile_factor (enum tree_code
);
98 /* Function vect_get_new_vect_var.
100 Returns a name for a new variable. The current naming scheme appends the
101 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
102 the name of vectorizer generated variables, and appends that to NAME if
106 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
113 case vect_simple_var
:
116 case vect_scalar_var
:
119 case vect_pointer_var
:
127 new_vect_var
= create_tmp_var (type
, concat (prefix
, name
, NULL
));
129 new_vect_var
= create_tmp_var (type
, prefix
);
135 /* Function vect_create_addr_base_for_vector_ref.
137 Create an expression that computes the address of the first memory location
138 that will be accessed for a data reference.
141 STMT: The statement containing the data reference.
142 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
143 OFFSET: Optional. If supplied, it is be added to the initial address.
146 1. Return an SSA_NAME whose value is the address of the memory location of
147 the first vector of the data reference.
148 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
149 these statement(s) which define the returned SSA_NAME.
151 FORNOW: We are only handling array accesses with step 1. */
154 vect_create_addr_base_for_vector_ref (tree stmt
,
158 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
159 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
160 tree data_ref_base
= unshare_expr (DR_BASE_ADDRESS (dr
));
161 tree base_name
= build_fold_indirect_ref (data_ref_base
);
162 tree ref
= DR_REF (dr
);
163 tree scalar_type
= TREE_TYPE (ref
);
164 tree scalar_ptr_type
= build_pointer_type (scalar_type
);
167 tree addr_base
, addr_expr
;
169 tree base_offset
= unshare_expr (DR_OFFSET (dr
));
170 tree init
= unshare_expr (DR_INIT (dr
));
172 /* Create base_offset */
173 base_offset
= size_binop (PLUS_EXPR
, base_offset
, init
);
174 dest
= create_tmp_var (TREE_TYPE (base_offset
), "base_off");
175 add_referenced_tmp_var (dest
);
176 base_offset
= force_gimple_operand (base_offset
, &new_stmt
, false, dest
);
177 append_to_statement_list_force (new_stmt
, new_stmt_list
);
181 tree tmp
= create_tmp_var (TREE_TYPE (base_offset
), "offset");
184 /* For interleaved access step we divide STEP by the size of the
185 interleaving group. */
186 if (DR_GROUP_SIZE (stmt_info
))
187 step
= fold_build2 (TRUNC_DIV_EXPR
, TREE_TYPE (offset
), DR_STEP (dr
),
188 build_int_cst (TREE_TYPE (offset
),
189 DR_GROUP_SIZE (stmt_info
)));
193 add_referenced_tmp_var (tmp
);
194 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (offset
), offset
, step
);
195 base_offset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base_offset
),
196 base_offset
, offset
);
197 base_offset
= force_gimple_operand (base_offset
, &new_stmt
, false, tmp
);
198 append_to_statement_list_force (new_stmt
, new_stmt_list
);
201 /* base + base_offset */
202 addr_base
= fold_build2 (PLUS_EXPR
, TREE_TYPE (data_ref_base
), data_ref_base
,
205 /* addr_expr = addr_base */
206 addr_expr
= vect_get_new_vect_var (scalar_ptr_type
, vect_pointer_var
,
207 get_name (base_name
));
208 add_referenced_tmp_var (addr_expr
);
209 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, addr_expr
, addr_base
);
210 new_temp
= make_ssa_name (addr_expr
, vec_stmt
);
211 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
212 append_to_statement_list_force (vec_stmt
, new_stmt_list
);
214 if (vect_print_dump_info (REPORT_DETAILS
))
216 fprintf (vect_dump
, "created ");
217 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
223 /* Function vect_create_data_ref_ptr.
225 Create a memory reference expression for vector access, to be used in a
226 vector load/store stmt. The reference is based on a new pointer to vector
230 1. STMT: a stmt that references memory. Expected to be of the form
231 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
232 2. BSI: block_stmt_iterator where new stmts can be added.
233 3. OFFSET (optional): an offset to be added to the initial address accessed
234 by the data-ref in STMT.
235 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
236 pointing to the initial address.
239 1. Declare a new ptr to vector_type, and have it point to the base of the
240 data reference (initial addressed accessed by the data reference).
241 For example, for vector of type V8HI, the following code is generated:
244 vp = (v8hi *)initial_address;
246 if OFFSET is not supplied:
247 initial_address = &a[init];
248 if OFFSET is supplied:
249 initial_address = &a[init + OFFSET];
251 Return the initial_address in INITIAL_ADDRESS.
253 If the access in STMT is an interleaved access, create the above pointer
254 for all the data-refs in the interleaving group and return them in DRS,
257 2. If ONLY_INIT is true, return the initial pointer. Otherwise, create
258 a data-reference in the loop based on the new vector pointer vp. This
259 new data reference will by some means be updated each iteration of
260 the loop. Return the pointer vp'. */
263 vect_create_data_ref_ptr (tree stmt
, block_stmt_iterator
*bsi ATTRIBUTE_UNUSED
,
264 tree offset
, tree
*initial_address
, tree
*ptr_incr
,
268 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
269 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
270 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
271 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
277 tree new_stmt_list
= NULL_TREE
;
278 edge pe
= loop_preheader_edge (loop
);
281 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
283 base_name
= build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr
)));
285 if (vect_print_dump_info (REPORT_DETAILS
))
287 tree data_ref_base
= base_name
;
288 fprintf (vect_dump
, "create vector-pointer variable to type: ");
289 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
290 if (TREE_CODE (data_ref_base
) == VAR_DECL
)
291 fprintf (vect_dump
, " vectorizing a one dimensional array ref: ");
292 else if (TREE_CODE (data_ref_base
) == ARRAY_REF
)
293 fprintf (vect_dump
, " vectorizing a multidimensional array ref: ");
294 else if (TREE_CODE (data_ref_base
) == COMPONENT_REF
)
295 fprintf (vect_dump
, " vectorizing a record based array ref: ");
296 else if (TREE_CODE (data_ref_base
) == SSA_NAME
)
297 fprintf (vect_dump
, " vectorizing a pointer ref: ");
298 print_generic_expr (vect_dump
, base_name
, TDF_SLIM
);
301 /** (1) Create the new vector-pointer variable: **/
303 vect_ptr_type
= build_pointer_type (vectype
);
304 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
305 get_name (base_name
));
306 add_referenced_tmp_var (vect_ptr
);
309 /** (2) Add aliasing information to the new vector-pointer:
310 (The points-to info (DR_PTR_INFO) may be defined later.) **/
312 tag
= DR_MEMTAG (dr
);
315 /* If tag is a variable (and NOT_A_TAG) than a new type alias
316 tag must be created with tag added to its may alias list. */
318 new_type_alias (vect_ptr
, tag
);
320 var_ann (vect_ptr
)->type_mem_tag
= tag
;
322 var_ann (vect_ptr
)->subvars
= DR_SUBVARS (dr
);
324 /** (3) Calculate the initial address the vector-pointer, and set
325 the vector-pointer to point to it before the loop: **/
327 /* Create: (&(base[init_val+offset]) in the loop preheader. */
328 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
330 pe
= loop_preheader_edge (loop
);
331 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt_list
);
332 gcc_assert (!new_bb
);
333 *initial_address
= new_temp
;
335 /* Create: p = (vectype *) initial_base */
336 vec_stmt
= fold_convert (vect_ptr_type
, new_temp
);
337 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, vect_ptr
, vec_stmt
);
338 vect_ptr_init
= make_ssa_name (vect_ptr
, vec_stmt
);
339 TREE_OPERAND (vec_stmt
, 0) = vect_ptr_init
;
340 new_bb
= bsi_insert_on_edge_immediate (pe
, vec_stmt
);
341 gcc_assert (!new_bb
);
343 /** (4) Handle the updating of the vector-pointer inside the loop: **/
345 if (only_init
) /* No update in loop is required. */
347 /* Copy the points-to information if it exists. */
348 if (DR_PTR_INFO (dr
))
349 duplicate_ssa_name_ptr_info (vect_ptr_init
, DR_PTR_INFO (dr
));
350 return vect_ptr_init
;
354 block_stmt_iterator incr_bsi
;
356 tree indx_before_incr
, indx_after_incr
;
359 standard_iv_increment_position (loop
, &incr_bsi
, &insert_after
);
361 create_iv (vect_ptr_init
,
362 fold_convert (vect_ptr_type
, TYPE_SIZE_UNIT (vectype
)),
363 NULL_TREE
, loop
, &incr_bsi
, insert_after
,
364 &indx_before_incr
, &indx_after_incr
);
365 incr
= bsi_stmt (incr_bsi
);
366 set_stmt_info ((tree_ann_t
) stmt_ann (incr
),
367 new_stmt_vec_info (incr
, loop_vinfo
));
369 /* Copy the points-to information if it exists. */
370 if (DR_PTR_INFO (dr
))
372 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
373 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
375 merge_alias_info (vect_ptr_init
, indx_before_incr
);
376 merge_alias_info (vect_ptr_init
, indx_after_incr
);
379 return indx_before_incr
;
384 /* Function bump_vector_ptr
386 Increment a pointer (to a vector type) by vector-size. Connect the new
387 increment stmt to the exising def-use update-chain of the pointer.
389 The pointer def-use update-chain before this function:
390 DATAREF_PTR = phi (p_0, p_2)
392 PTR_INCR: p_2 = DATAREF_PTR + step
394 The pointer def-use update-chain after this function:
395 DATAREF_PTR = phi (p_0, p_2)
397 NEW_DATAREF_PTR = DATAREF_PTR + vector_size
399 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
402 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
404 PTR_INCR - the stmt that updates the pointer in each iteration of the loop.
405 The increment amount across iteration is also expected to be
407 BSI - location where the new update stmt is to be placed.
408 STMT - the original scalar memory-access stmt that is being vectorized.
410 Output: Return NEW_DATAREF_PTR as illustrated above.
415 bump_vector_ptr (tree dataref_ptr
, tree ptr_incr
, block_stmt_iterator
*bsi
,
418 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
419 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
420 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
421 tree vptr_type
= TREE_TYPE (dataref_ptr
);
422 tree ptr_var
= SSA_NAME_VAR (dataref_ptr
);
423 tree update
= fold_convert (vptr_type
, TYPE_SIZE_UNIT (vectype
));
427 tree new_dataref_ptr
;
429 incr_stmt
= build2 (MODIFY_EXPR
, vptr_type
, ptr_var
,
430 build2 (PLUS_EXPR
, vptr_type
, dataref_ptr
, update
));
431 new_dataref_ptr
= make_ssa_name (ptr_var
, incr_stmt
);
432 TREE_OPERAND (incr_stmt
, 0) = new_dataref_ptr
;
433 vect_finish_stmt_generation (stmt
, incr_stmt
, bsi
);
435 /* Update the vector-pointer's cross-iteration increment. */
436 FOR_EACH_SSA_USE_OPERAND (use_p
, ptr_incr
, iter
, SSA_OP_USE
)
438 tree use
= USE_FROM_PTR (use_p
);
440 if (use
== dataref_ptr
)
441 SET_USE (use_p
, new_dataref_ptr
);
443 gcc_assert (tree_int_cst_compare (use
, update
) == 0);
446 /* Copy the points-to information if it exists. */
447 if (DR_PTR_INFO (dr
))
448 duplicate_ssa_name_ptr_info (new_dataref_ptr
, DR_PTR_INFO (dr
));
449 merge_alias_info (new_dataref_ptr
, dataref_ptr
); /* CHECKME */
451 return new_dataref_ptr
;
455 /* Function vect_setup_realignment
457 This function is called when vectorizing an unaligned load using
458 the dr_unaligned_software_pipeline scheme.
459 This function generates the following code at the loop prolog:
462 msq_init = *(floor(p)); # prolog load
463 realignment_token = call target_builtin;
465 msq = phi (msq_init, ---)
467 The code above sets up a new (vector) pointer, pointing to the first
468 location accessed by STMT, and a "floor-aligned" load using that pointer.
469 It also generates code to compute the "realignment-token" (if the relevant
470 target hook was defined), and creates a phi-node at the loop-header bb
471 whose arguments are the result of the prolog-load (created by this
472 function) and the result of a load that takes place in the loop (to be
473 created by the caller to this function).
474 The caller to this function uses the phi-result (msq) to create the
475 realignment code inside the loop, and sets up the missing phi argument,
479 msq = phi (msq_init, lsq)
480 lsq = *(floor(p')); # load in loop
481 result = realign_load (msq, lsq, realignment_token);
484 STMT - (scalar) load stmt to be vectorized. This load accesses
485 a memory location that may be unaligned.
486 BSI - place where new code is to be inserted.
489 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
490 target hook, if defined.
491 Return value - the result of the loop-header phi node.
495 vect_setup_realignment (tree stmt
, block_stmt_iterator
*bsi
,
496 tree
*realignment_token
)
498 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
499 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
500 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
501 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
502 edge pe
= loop_preheader_edge (loop
);
503 tree scalar_dest
= TREE_OPERAND (stmt
, 0);
515 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
516 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
517 ptr
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
, &init_addr
, NULL
, true);
518 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, ptr
);
519 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
520 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
521 TREE_OPERAND (new_stmt
, 0) = new_temp
;
522 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
523 gcc_assert (!new_bb
);
524 msq_init
= TREE_OPERAND (new_stmt
, 0);
525 copy_virtual_operands (new_stmt
, stmt
);
526 update_vuses_to_preheader (new_stmt
, loop
);
529 if (targetm
.vectorize
.builtin_mask_for_load
)
531 /* Create permutation mask, if required, in loop preheader. */
533 tree params
= build_tree_list (NULL_TREE
, init_addr
);
535 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
536 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
537 new_stmt
= build_function_call_expr (builtin_decl
, params
);
538 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, new_stmt
);
539 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
540 TREE_OPERAND (new_stmt
, 0) = new_temp
;
541 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
542 gcc_assert (!new_bb
);
543 *realignment_token
= TREE_OPERAND (new_stmt
, 0);
545 /* The result of the CALL_EXPR to this builtin is determined from
546 the value of the parameter and no global variables are touched
547 which makes the builtin a "const" function. Requiring the
548 builtin to have the "const" attribute makes it unnecessary
549 to call mark_call_clobbered. */
550 gcc_assert (TREE_READONLY (builtin_decl
));
553 /* <3> Create msq = phi <msq_init, lsq> in loop */
554 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
555 msq
= make_ssa_name (vec_dest
, NULL_TREE
);
556 phi_stmt
= create_phi_node (msq
, loop
->header
);
557 SSA_NAME_DEF_STMT (msq
) = phi_stmt
;
558 add_phi_arg (phi_stmt
, msq_init
, loop_preheader_edge (loop
));
564 /* Function vect_create_destination_var.
566 Create a new temporary of type VECTYPE. */
569 vect_create_destination_var (tree scalar_dest
, tree vectype
)
572 const char *new_name
;
574 enum vect_var_kind kind
;
576 kind
= vectype
? vect_simple_var
: vect_scalar_var
;
577 type
= vectype
? vectype
: TREE_TYPE (scalar_dest
);
579 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
581 new_name
= get_name (scalar_dest
);
584 vec_dest
= vect_get_new_vect_var (type
, vect_simple_var
, new_name
);
585 add_referenced_tmp_var (vec_dest
);
591 /* Function vect_init_vector.
593 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
594 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
595 used in the vectorization of STMT. */
598 vect_init_vector (tree stmt
, tree vector_var
)
600 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
601 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
602 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
605 tree vectype
= TREE_TYPE (vector_var
);
611 new_var
= vect_get_new_vect_var (vectype
, vect_simple_var
, "cst_");
612 add_referenced_tmp_var (new_var
);
614 init_stmt
= build2 (MODIFY_EXPR
, vectype
, new_var
, vector_var
);
615 new_temp
= make_ssa_name (new_var
, init_stmt
);
616 TREE_OPERAND (init_stmt
, 0) = new_temp
;
618 pe
= loop_preheader_edge (loop
);
619 new_bb
= bsi_insert_on_edge_immediate (pe
, init_stmt
);
620 gcc_assert (!new_bb
);
622 if (vect_print_dump_info (REPORT_DETAILS
))
624 fprintf (vect_dump
, "created new init_stmt: ");
625 print_generic_expr (vect_dump
, init_stmt
, TDF_SLIM
);
628 vec_oprnd
= TREE_OPERAND (init_stmt
, 0);
633 /* Function vect_get_vec_def_for_operand.
635 OP is an operand in STMT. This function returns a (vector) def that will be
636 used in the vectorized stmt for STMT.
638 In the case that OP is an SSA_NAME which is defined in the loop, then
639 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
641 In case OP is an invariant or constant, a new stmt that creates a vector def
642 needs to be introduced. */
645 vect_get_vec_def_for_operand (tree op
, tree stmt
, tree
*scalar_def
)
650 stmt_vec_info def_stmt_info
= NULL
;
651 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
652 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
653 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
654 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
655 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
661 enum vect_def_type dt
;
664 if (vect_print_dump_info (REPORT_DETAILS
))
666 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
667 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
670 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
671 gcc_assert (is_simple_use
);
672 if (vect_print_dump_info (REPORT_DETAILS
))
676 fprintf (vect_dump
, "def = ");
677 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
681 fprintf (vect_dump
, " def_stmt = ");
682 print_generic_expr (vect_dump
, def_stmt
, TDF_SLIM
);
687 /* Case 1: operand is a constant. */
688 case vect_constant_def
:
693 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
694 if (vect_print_dump_info (REPORT_DETAILS
))
695 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
697 for (i
= nunits
- 1; i
>= 0; --i
)
699 t
= tree_cons (NULL_TREE
, op
, t
);
701 vec_cst
= build_vector (vectype
, t
);
702 return vect_init_vector (stmt
, vec_cst
);
705 /* Case 2: operand is defined outside the loop - loop invariant. */
706 case vect_invariant_def
:
711 /* Create 'vec_inv = {inv,inv,..,inv}' */
712 if (vect_print_dump_info (REPORT_DETAILS
))
713 fprintf (vect_dump
, "Create vector_inv.");
715 for (i
= nunits
- 1; i
>= 0; --i
)
717 t
= tree_cons (NULL_TREE
, def
, t
);
720 /* FIXME: use build_constructor directly. */
721 vec_inv
= build_constructor_from_list (vectype
, t
);
722 return vect_init_vector (stmt
, vec_inv
);
725 /* Case 3: operand is defined inside the loop. */
729 *scalar_def
= def_stmt
;
731 /* Get the def from the vectorized stmt. */
732 def_stmt_info
= vinfo_for_stmt (def_stmt
);
733 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
734 gcc_assert (vec_stmt
);
735 vec_oprnd
= TREE_OPERAND (vec_stmt
, 0);
739 /* Case 4: operand is defined by a loop header phi - reduction */
740 case vect_reduction_def
:
742 gcc_assert (TREE_CODE (def_stmt
) == PHI_NODE
);
744 /* Get the def before the loop */
745 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
746 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
749 /* Case 5: operand is defined by loop-header phi - induction. */
750 case vect_induction_def
:
752 if (vect_print_dump_info (REPORT_DETAILS
))
753 fprintf (vect_dump
, "induction - unsupported.");
754 internal_error ("no support for induction"); /* FORNOW */
763 /* Function vect_get_vec_def_for_stmt_copy
765 Return a vector-def for an operand. This function is used when the
766 vectorized stmt to be created (by the caller to this function) is a "copy"
767 created in case the vectorized result cannot fit in one vector, and several
768 copies of the vector-stmt are required. In this case the vector-def is
769 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
770 of the stmt that defines VEC_OPRND.
771 DT is the type of the vector def VEC_OPRND.
774 In case the vectorization factor (VF) is bigger than the number
775 of elements that can fit in a vectype (nunits), we have to generate
776 more than one vector stmt to vectorize the scalar stmt. This situation
777 arises when there are multiple data-types operated upon in the loop; the
778 smallest data-type determines the VF, and as a result, when vectorizing
779 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
780 vector stmt (each computing a vector of 'nunits' results, and together
781 computing 'VF' results in each iteration). This function is called when
782 vectorizing such a stmt (e.g. vectorizing S2 in the illusration below):
784 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
786 S1: x = load VS1.0: vx.0 = memref0 VS1.1
787 VS1.1: vx.1 = memref1 VS1.2
788 VS1.2: vx.2 = memref2 VS1.3
789 VS1.3: vx.3 = memref3
791 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
792 VSnew.1: vz1 = vx.1 + ... VSnew.2
793 VSnew.2: vz2 = vx.2 + ... VSnew.3
794 VSnew.3: vz3 = vx.3 + ...
796 To create the first vector-stmt (out of the 'VF/nunits' copies) -
797 denoted VSnew.0 - the function 'vect_get_vec_def_for_operand' is called to
798 get the relevant vector-def for each operand (e.g. 'vx.0').
799 To create the remaining copies of the vector-stmt (VSnew.j), this
800 function is called to get the relevant vector-def for each operand. It is
801 obtained from the respective VS1.j stmt, which is recorded in the
802 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
803 For example, to obtain the vector-def 'vx.1' in order to create the
804 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
805 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
806 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
807 and return its def ('vx.1').
808 Overall to create the above sequence this function will be called 3 times:
809 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
810 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
811 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
814 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
816 tree vec_stmt_for_operand
;
817 stmt_vec_info def_stmt_info
;
819 if (dt
== vect_invariant_def
|| dt
== vect_constant_def
)
821 /* Do nothing; can reuse same def. */ ;
825 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
826 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
827 gcc_assert (def_stmt_info
);
828 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
829 gcc_assert (vec_stmt_for_operand
);
830 vec_oprnd
= TREE_OPERAND (vec_stmt_for_operand
, 0);
836 /* Function vect_finish_stmt_generation.
838 Insert a new stmt. */
841 vect_finish_stmt_generation (tree stmt
, tree vec_stmt
,
842 block_stmt_iterator
*bsi
)
844 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
845 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
847 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
848 set_stmt_info (get_tree_ann (vec_stmt
),
849 new_stmt_vec_info (vec_stmt
, loop_vinfo
));
851 if (vect_print_dump_info (REPORT_DETAILS
))
853 fprintf (vect_dump
, "add new stmt: ");
854 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
857 /* Make sure bsi points to the stmt that is being vectorized. */
858 gcc_assert (stmt
== bsi_stmt (*bsi
));
860 #ifdef USE_MAPPED_LOCATION
861 SET_EXPR_LOCATION (vec_stmt
, EXPR_LOCATION (stmt
));
863 SET_EXPR_LOCUS (vec_stmt
, EXPR_LOCUS (stmt
));
868 #define ADJUST_IN_EPILOG 1
870 /* Function get_initial_def_for_reduction
873 STMT - a stmt that performs a reduction operation in the loop.
874 INIT_VAL - the initial value of the reduction variable
877 SCALAR_DEF - a tree that holds a value to be added to the final result
878 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
879 Return a vector variable, initialized according to the operation that STMT
880 performs. This vector will be used as the initial value of the
881 vector of partial results.
883 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
886 min/max: [init_val,init_val,..,init_val,init_val]
887 bit and/or: [init_val,init_val,..,init_val,init_val]
888 and when necessary (e.g. add/mult case) let the caller know
889 that it needs to adjust the result by init_val.
891 Option2: Initialize the vector as follows:
892 add: [0,0,...,0,init_val]
893 mult: [1,1,...,1,init_val]
894 min/max: [init_val,init_val,...,init_val]
895 bit and/or: [init_val,init_val,...,init_val]
896 and no adjustments are needed.
898 For example, for the following code:
904 STMT is 's = s + a[i]', and the reduction variable is 's'.
905 For a vector of 4 units, we want to return either [0,0,0,init_val],
906 or [0,0,0,0] and let the caller know that it needs to adjust
907 the result at the end by 'init_val'.
909 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
910 TODO: Use some cost-model to estimate which scheme is more profitable.
914 get_initial_def_for_reduction (tree stmt
, tree init_val
, tree
*scalar_def
)
917 get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (stmt
,0)));
918 int nunits
= GET_MODE_NUNITS (TYPE_MODE (vectype
));
920 enum tree_code code
= TREE_CODE (TREE_OPERAND (stmt
, 1));
921 tree type
= TREE_TYPE (init_val
);
923 tree vec
, t
= NULL_TREE
;
924 bool need_epilog_adjust
;
927 gcc_assert (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
));
935 if (INTEGRAL_TYPE_P (type
))
936 def
= build_int_cst (type
, 0);
938 def
= build_real (type
, dconst0
);
940 #ifdef ADJUST_IN_EPILOG
941 /* All the 'nunits' elements are set to 0. The final result will be
942 adjusted by 'init_val' at the loop epilog. */
944 need_epilog_adjust
= true;
946 /* 'nunits - 1' elements are set to 0; The last element is set to
947 'init_val'. No further adjustments at the epilog are needed. */
948 nelements
= nunits
- 1;
949 need_epilog_adjust
= false;
957 need_epilog_adjust
= false;
964 for (i
= nelements
- 1; i
>= 0; --i
)
965 t
= tree_cons (NULL_TREE
, def
, t
);
967 if (nelements
== nunits
- 1)
969 /* Set the last element of the vector. */
970 t
= tree_cons (NULL_TREE
, init_val
, t
);
973 gcc_assert (nelements
== nunits
);
975 if (TREE_CODE (init_val
) == INTEGER_CST
|| TREE_CODE (init_val
) == REAL_CST
)
976 vec
= build_vector (vectype
, t
);
978 vec
= build_constructor_from_list (vectype
, t
);
980 if (!need_epilog_adjust
)
981 *scalar_def
= NULL_TREE
;
983 *scalar_def
= init_val
;
985 return vect_init_vector (stmt
, vec
);
989 /* Function vect_create_epilog_for_reduction
991 Create code at the loop-epilog to finalize the result of a reduction
994 VECT_DEF is a vector of partial results.
995 REDUC_CODE is the tree-code for the epilog reduction.
996 STMT is the scalar reduction stmt that is being vectorized.
997 REDUCTION_PHI is the phi-node that carries the reduction computation.
1000 1. Creates the reduction def-use cycle: sets the the arguments for
1002 The loop-entry argument is the vectorized initial-value of the reduction.
1003 The loop-latch argument is VECT_DEF - the vector of partial sums.
1004 2. "Reduces" the vector of partial results VECT_DEF into a single result,
1005 by applying the operation specified by REDUC_CODE if available, or by
1006 other means (whole-vector shifts or a scalar loop).
1007 The function also creates a new phi node at the loop exit to preserve
1008 loop-closed form, as illustrated below.
1010 The flow at the entry to this function:
1013 vec_def = phi <null, null> # REDUCTION_PHI
1014 VECT_DEF = vector_stmt # vectorized form of STMT
1015 s_loop = scalar_stmt # (scalar) STMT
1017 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1021 The above is transformed by this function into:
1024 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
1025 VECT_DEF = vector_stmt # vectorized form of STMT
1026 s_loop = scalar_stmt # (scalar) STMT
1028 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1029 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1030 v_out2 = reduce <v_out1>
1031 s_out3 = extract_field <v_out2, 0>
1032 s_out4 = adjust_result <s_out3>
1038 vect_create_epilog_for_reduction (tree vect_def
, tree stmt
,
1039 enum tree_code reduc_code
, tree reduction_phi
)
1041 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1043 enum machine_mode mode
;
1044 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1045 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1046 basic_block exit_bb
;
1050 block_stmt_iterator exit_bsi
;
1055 tree new_scalar_dest
, exit_phi
;
1056 tree bitsize
, bitpos
, bytesize
;
1057 enum tree_code code
;
1058 tree scalar_initial_def
;
1059 tree vec_initial_def
;
1061 imm_use_iterator imm_iter
;
1062 use_operand_p use_p
;
1063 bool extract_scalar_result
;
1066 tree operation
= TREE_OPERAND (stmt
, 1);
1069 op_type
= TREE_CODE_LENGTH (TREE_CODE (operation
));
1070 reduction_op
= TREE_OPERAND (operation
, op_type
-1);
1071 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
1072 mode
= TYPE_MODE (vectype
);
1074 /*** 1. Create the reduction def-use cycle. ***/
1076 /* 1.1 set the loop-entry arg of the reduction-phi: */
1077 /* For the case of reduction, vect_get_vec_def_for_operand returns
1078 the scalar def before the loop, that defines the initial value
1079 of the reduction variable. */
1080 vec_initial_def
= vect_get_vec_def_for_operand (reduction_op
, stmt
,
1081 &scalar_initial_def
);
1082 add_phi_arg (reduction_phi
, vec_initial_def
, loop_preheader_edge (loop
));
1084 /* 1.2 set the loop-latch arg for the reduction-phi: */
1085 add_phi_arg (reduction_phi
, vect_def
, loop_latch_edge (loop
));
1087 if (vect_print_dump_info (REPORT_DETAILS
))
1089 fprintf (vect_dump
, "transform reduction: created def-use cycle:");
1090 print_generic_expr (vect_dump
, reduction_phi
, TDF_SLIM
);
1091 fprintf (vect_dump
, "\n");
1092 print_generic_expr (vect_dump
, SSA_NAME_DEF_STMT (vect_def
), TDF_SLIM
);
1096 /*** 2. Create the reduction epilog code.
1097 The reduction epilog code operates across the elements of the vector
1098 of partial results computed by the vectorized loop.
1099 The reduction epilog code consists of:
1100 step 1: compute the scalar result in a vector (v_out2)
1101 step 2: extract the scalar result (s_out3) from the vector (v_out2)
1102 step 3: adjust the scalar result (s_out3) if needed.
1104 Step 1 can be accomplished using one the following three schemes:
1105 (scheme 1) using reduc_code, if available.
1106 (scheme 2) using whole-vector shifts, if available.
1107 (scheme 3) using a scalar loop. In this case steps 1+2 above are
1110 The overall epilog code looks like this:
1112 s_out0 = phi <s_loop> # original EXIT_PHI
1113 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1114 v_out2 = reduce <v_out1> # step 1
1115 s_out3 = extract_field <v_out2, 0> # step 2
1116 s_out4 = adjust_result <s_out3> # step 3
1118 (step 3 is optional, and step2 1 and 2 may be combined).
1119 Lastly, the uses of s_out0 are replaced by s_out4.
1123 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
1124 v_out1 = phi <v_loop> */
1126 exit_bb
= loop
->single_exit
->dest
;
1127 new_phi
= create_phi_node (SSA_NAME_VAR (vect_def
), exit_bb
);
1128 SET_PHI_ARG_DEF (new_phi
, loop
->single_exit
->dest_idx
, vect_def
);
1129 exit_bsi
= bsi_start (exit_bb
);
1131 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
1132 (i.e. when reduc_code is not available) and in the final adjusment code
1133 (if needed). Also get the original scalar reduction variable as
1134 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
1135 represents a reduction pattern), the tree-code and scalar-def are
1136 taken from the original stmt that the pattern-stmt (STMT) replaces.
1137 Otherwise (it is a regular reduction) - the tree-code and scalar-def
1138 are taken from STMT. */
1140 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
1143 /* Regular reduction */
1148 /* Reduction pattern */
1149 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt
);
1150 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
));
1151 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
1153 code
= TREE_CODE (TREE_OPERAND (orig_stmt
, 1));
1154 scalar_dest
= TREE_OPERAND (orig_stmt
, 0);
1155 scalar_type
= TREE_TYPE (scalar_dest
);
1156 new_scalar_dest
= vect_create_destination_var (scalar_dest
, NULL
);
1157 bitsize
= TYPE_SIZE (scalar_type
);
1158 bytesize
= TYPE_SIZE_UNIT (scalar_type
);
1160 /* 2.3 Create the reduction code, using one of the three schemes described
1163 if (reduc_code
< NUM_TREE_CODES
)
1165 /*** Case 1: Create:
1166 v_out2 = reduc_expr <v_out1> */
1168 if (vect_print_dump_info (REPORT_DETAILS
))
1169 fprintf (vect_dump
, "Reduce using direct vector reduction.");
1171 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1172 epilog_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
1173 build1 (reduc_code
, vectype
, PHI_RESULT (new_phi
)));
1174 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
1175 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
1176 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1178 extract_scalar_result
= true;
1182 enum tree_code shift_code
= VEC_RSHIFT_EXPR
;
1183 bool have_whole_vector_shift
= true;
1185 int element_bitsize
= tree_low_cst (bitsize
, 1);
1186 int vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
1189 if (vec_shr_optab
->handlers
[mode
].insn_code
!= CODE_FOR_nothing
)
1190 shift_code
= VEC_RSHIFT_EXPR
;
1192 have_whole_vector_shift
= false;
1194 /* Regardless of whether we have a whole vector shift, if we're
1195 emulating the operation via tree-vect-generic, we don't want
1196 to use it. Only the first round of the reduction is likely
1197 to still be profitable via emulation. */
1198 /* ??? It might be better to emit a reduction tree code here, so that
1199 tree-vect-generic can expand the first round via bit tricks. */
1200 if (!VECTOR_MODE_P (mode
))
1201 have_whole_vector_shift
= false;
1204 optab optab
= optab_for_tree_code (code
, vectype
);
1205 if (optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
1206 have_whole_vector_shift
= false;
1209 if (have_whole_vector_shift
)
1211 /*** Case 2: Create:
1212 for (offset = VS/2; offset >= element_size; offset/=2)
1214 Create: va' = vec_shift <va, offset>
1215 Create: va = vop <va, va'>
1218 if (vect_print_dump_info (REPORT_DETAILS
))
1219 fprintf (vect_dump
, "Reduce using vector shifts");
1221 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1222 new_temp
= PHI_RESULT (new_phi
);
1224 for (bit_offset
= vec_size_in_bits
/2;
1225 bit_offset
>= element_bitsize
;
1228 tree bitpos
= size_int (bit_offset
);
1230 epilog_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
1231 build2 (shift_code
, vectype
, new_temp
, bitpos
));
1232 new_name
= make_ssa_name (vec_dest
, epilog_stmt
);
1233 TREE_OPERAND (epilog_stmt
, 0) = new_name
;
1234 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1236 epilog_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
1237 build2 (code
, vectype
, new_name
, new_temp
));
1238 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
1239 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
1240 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1243 extract_scalar_result
= true;
1249 /*** Case 3: Create:
1250 s = extract_field <v_out2, 0>
1251 for (offset = element_size;
1252 offset < vector_size;
1253 offset += element_size;)
1255 Create: s' = extract_field <v_out2, offset>
1256 Create: s = op <s, s'>
1259 if (vect_print_dump_info (REPORT_DETAILS
))
1260 fprintf (vect_dump
, "Reduce using scalar code. ");
1262 vec_temp
= PHI_RESULT (new_phi
);
1263 vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
1264 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
1266 BIT_FIELD_REF_UNSIGNED (rhs
) = TYPE_UNSIGNED (scalar_type
);
1267 epilog_stmt
= build2 (MODIFY_EXPR
, scalar_type
, new_scalar_dest
, rhs
);
1268 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1269 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
1270 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1272 for (bit_offset
= element_bitsize
;
1273 bit_offset
< vec_size_in_bits
;
1274 bit_offset
+= element_bitsize
)
1276 tree bitpos
= bitsize_int (bit_offset
);
1277 tree rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
1280 BIT_FIELD_REF_UNSIGNED (rhs
) = TYPE_UNSIGNED (scalar_type
);
1281 epilog_stmt
= build2 (MODIFY_EXPR
, scalar_type
, new_scalar_dest
,
1283 new_name
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1284 TREE_OPERAND (epilog_stmt
, 0) = new_name
;
1285 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1287 epilog_stmt
= build2 (MODIFY_EXPR
, scalar_type
, new_scalar_dest
,
1288 build2 (code
, scalar_type
, new_name
, new_temp
));
1289 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1290 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
1291 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1294 extract_scalar_result
= false;
1298 /* 2.4 Extract the final scalar result. Create:
1299 s_out3 = extract_field <v_out2, bitpos> */
1301 if (extract_scalar_result
)
1305 if (vect_print_dump_info (REPORT_DETAILS
))
1306 fprintf (vect_dump
, "extract scalar result");
1308 if (BYTES_BIG_ENDIAN
)
1309 bitpos
= size_binop (MULT_EXPR
,
1310 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1),
1311 TYPE_SIZE (scalar_type
));
1313 bitpos
= bitsize_zero_node
;
1315 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, new_temp
, bitsize
, bitpos
);
1316 BIT_FIELD_REF_UNSIGNED (rhs
) = TYPE_UNSIGNED (scalar_type
);
1317 epilog_stmt
= build2 (MODIFY_EXPR
, scalar_type
, new_scalar_dest
, rhs
);
1318 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1319 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
1320 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1323 /* 2.5 Adjust the final result by the initial value of the reduction
1324 variable. (When such adjustment is not needed, then
1325 'scalar_initial_def' is zero).
1328 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1330 if (scalar_initial_def
)
1332 epilog_stmt
= build2 (MODIFY_EXPR
, scalar_type
, new_scalar_dest
,
1333 build2 (code
, scalar_type
, new_temp
, scalar_initial_def
));
1334 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
1335 TREE_OPERAND (epilog_stmt
, 0) = new_temp
;
1336 bsi_insert_after (&exit_bsi
, epilog_stmt
, BSI_NEW_STMT
);
1339 /* 2.6 Replace uses of s_out0 with uses of s_out4 (or s_out3) */
1341 /* Find the loop-closed-use at the loop exit of the original scalar result.
1342 (The reduction result is expected to have two immediate uses - one at the
1343 latch block, and one at the loop exit). */
1345 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
1347 if (!flow_bb_inside_loop_p (loop
, bb_for_stmt (USE_STMT (use_p
))))
1349 exit_phi
= USE_STMT (use_p
);
1353 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1354 gcc_assert (exit_phi
);
1355 /* Replace the uses: */
1356 orig_name
= PHI_RESULT (exit_phi
);
1357 FOR_EACH_IMM_USE_SAFE (use_p
, imm_iter
, orig_name
)
1358 SET_USE (use_p
, new_temp
);
1362 /* Function vectorizable_reduction
1364 Check if STMT performs a reduction that can be vectorized.
1365 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1366 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1367 Return FALSE if not a vectorizable STMT, TRUE otherwise.
1369 This function also handles reduction idioms (patterns) that have been
1370 recognized in advance during vect_pattern_recog. In this case, STMT may be
1372 X = pattern_expr (arg0, arg1, ..., X)
1373 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1374 sequence that had been detected and replaced by the pattern-stmt (STMT).
1376 In some cases of reduction patterns, the type of the reduction variable X is
1377 different than the type of the other arguments of STMT.
1378 In such cases, the vectype that is used when transforming STMT into a vector
1379 stmt is different than the vectype that is used to determine the
1380 vectorization factor, because it consists of a different number of elements
1381 than the actual number of elements that are being operated upon in parallel.
1383 For example, consider an accumulation of shorts into an int accumulator.
1384 On some targets it's possible to vectorize this pattern operating on 8
1385 shorts at a time (hence, the vectype for purposes of determining the
1386 vectorization factor should be V8HI); on the other hand, the vectype that
1387 is used to create the vector form is actually V4SI (the type of the result).
1389 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1390 indicates what is the actual level of parallelism (V8HI in the example), so
1391 that the right vectorization factor would be derived. This vectype
1392 corresponds to the type of arguments to the reduction stmt, and should *NOT*
1393 be used to create the vectorized stmt. The right vectype for the vectorized
1394 stmt is obtained from the type of the result X:
1395 get_vectype_for_scalar_type (TREE_TYPE (X))
1397 This means that, contrary to "regular" reductions (or "regular" stmts in
1398 general), the following equation:
1399 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1400 does *NOT* necessarily hold for reduction patterns.
1404 vectorizable_reduction (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1409 tree loop_vec_def0
= NULL_TREE
, loop_vec_def1
= NULL_TREE
;
1410 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1411 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1412 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1413 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1415 enum tree_code code
, orig_code
, epilog_reduc_code
= 0;
1416 enum machine_mode vec_mode
;
1418 optab optab
, reduc_optab
;
1419 tree new_temp
= NULL_TREE
;
1421 enum vect_def_type dt
;
1426 stmt_vec_info orig_stmt_info
;
1427 tree expr
= NULL_TREE
;
1429 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1430 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1431 stmt_vec_info prev_stmt_info
;
1433 tree new_stmt
= NULL_TREE
;
1436 gcc_assert (ncopies
>= 1);
1438 /* 1. Is vectorizable reduction? */
1440 /* Not supportable if the reduction variable is used in the loop. */
1441 if (STMT_VINFO_RELEVANT_P (stmt_info
))
1444 if (!STMT_VINFO_LIVE_P (stmt_info
))
1447 /* Make sure it was already recognized as a reduction computation. */
1448 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
1451 /* 2. Has this been recognized as a reduction pattern?
1453 Check if STMT represents a pattern that has been recognized
1454 in earlier analysis stages. For stmts that represent a pattern,
1455 the STMT_VINFO_RELATED_STMT field records the last stmt in
1456 the original sequence that constitutes the pattern. */
1458 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
1461 orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
1462 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
) == stmt
);
1463 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info
));
1464 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info
));
1467 /* 3. Check the operands of the operation. The first operands are defined
1468 inside the loop body. The last operand is the reduction variable,
1469 which is defined by the loop-header-phi. */
1471 gcc_assert (TREE_CODE (stmt
) == MODIFY_EXPR
);
1473 operation
= TREE_OPERAND (stmt
, 1);
1474 code
= TREE_CODE (operation
);
1475 op_type
= TREE_CODE_LENGTH (code
);
1476 if (op_type
!= binary_op
&& op_type
!= ternary_op
)
1478 scalar_dest
= TREE_OPERAND (stmt
, 0);
1479 scalar_type
= TREE_TYPE (scalar_dest
);
1481 /* All uses but the last are expected to be defined in the loop.
1482 The last use is the reduction variable. */
1483 for (i
= 0; i
< op_type
-1; i
++)
1485 op
= TREE_OPERAND (operation
, i
);
1486 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
1487 gcc_assert (is_simple_use
);
1488 gcc_assert (dt
== vect_loop_def
|| dt
== vect_invariant_def
||
1489 dt
== vect_constant_def
);
1492 op
= TREE_OPERAND (operation
, i
);
1493 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
1494 gcc_assert (is_simple_use
);
1495 gcc_assert (dt
== vect_reduction_def
);
1496 gcc_assert (TREE_CODE (def_stmt
) == PHI_NODE
);
1498 gcc_assert (orig_stmt
== vect_is_simple_reduction (loop
, def_stmt
));
1500 gcc_assert (stmt
== vect_is_simple_reduction (loop
, def_stmt
));
1502 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
1505 /* 4. Supportable by target? */
1507 /* 4.1. Check support for the operation in the loop */
1508 optab
= optab_for_tree_code (code
, vectype
);
1511 if (vect_print_dump_info (REPORT_DETAILS
))
1512 fprintf (vect_dump
, "no optab.");
1515 vec_mode
= TYPE_MODE (vectype
);
1516 if (optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
1518 if (vect_print_dump_info (REPORT_DETAILS
))
1519 fprintf (vect_dump
, "op not supported by target.");
1520 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
1521 || LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1522 < vect_min_worthwhile_factor (code
))
1524 if (vect_print_dump_info (REPORT_DETAILS
))
1525 fprintf (vect_dump
, "proceeding using word mode.");
1528 /* Worthwhile without SIMD support? */
1529 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1530 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1531 < vect_min_worthwhile_factor (code
))
1533 if (vect_print_dump_info (REPORT_DETAILS
))
1534 fprintf (vect_dump
, "not worthwhile without SIMD support.");
1538 /* 4.2. Check support for the epilog operation.
1540 If STMT represents a reduction pattern, then the type of the
1541 reduction variable may be different than the type of the rest
1542 of the arguments. For example, consider the case of accumulation
1543 of shorts into an int accumulator; The original code:
1544 S1: int_a = (int) short_a;
1545 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
1548 STMT: int_acc = widen_sum <short_a, int_acc>
1551 1. The tree-code that is used to create the vector operation in the
1552 epilog code (that reduces the partial results) is not the
1553 tree-code of STMT, but is rather the tree-code of the original
1554 stmt from the pattern that STMT is replacing. I.e, in the example
1555 above we want to use 'widen_sum' in the loop, but 'plus' in the
1557 2. The type (mode) we use to check available target support
1558 for the vector operation to be created in the *epilog*, is
1559 determined by the type of the reduction variable (in the example
1560 above we'd check this: plus_optab[vect_int_mode]).
1561 However the type (mode) we use to check available target support
1562 for the vector operation to be created *inside the loop*, is
1563 determined by the type of the other arguments to STMT (in the
1564 example we'd check this: widen_sum_optab[vect_short_mode]).
1566 This is contrary to "regular" reductions, in which the types of all
1567 the arguments are the same as the type of the reduction variable.
1568 For "regular" reductions we can therefore use the same vector type
1569 (and also the same tree-code) when generating the epilog code and
1570 when generating the code inside the loop. */
1574 /* This is a reduction pattern: get the vectype from the type of the
1575 reduction variable, and get the tree-code from orig_stmt. */
1576 orig_code
= TREE_CODE (TREE_OPERAND (orig_stmt
, 1));
1577 vectype
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1578 vec_mode
= TYPE_MODE (vectype
);
1582 /* Regular reduction: use the same vectype and tree-code as used for
1583 the vector code inside the loop can be used for the epilog code. */
1587 if (!reduction_code_for_scalar_code (orig_code
, &epilog_reduc_code
))
1589 reduc_optab
= optab_for_tree_code (epilog_reduc_code
, vectype
);
1592 if (vect_print_dump_info (REPORT_DETAILS
))
1593 fprintf (vect_dump
, "no optab for reduction.");
1594 epilog_reduc_code
= NUM_TREE_CODES
;
1596 if (reduc_optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
1598 if (vect_print_dump_info (REPORT_DETAILS
))
1599 fprintf (vect_dump
, "reduc op not supported by target.");
1600 epilog_reduc_code
= NUM_TREE_CODES
;
1603 if (!vec_stmt
) /* transformation not required. */
1605 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
1611 if (vect_print_dump_info (REPORT_DETAILS
))
1612 fprintf (vect_dump
, "transform reduction.");
1614 /* Create the destination vector */
1615 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1617 /* Create the reduction-phi that defines the reduction-operand. */
1618 new_phi
= create_phi_node (vec_dest
, loop
->header
);
1620 /* In case the vectorization factor (VF) is bigger than the number
1621 of elements that we can fit in a vectype (nunits), we have to generate
1622 more than one vector stmt - i.e - we need to "unroll" the
1623 vector stmt by a factor VF/nunits. For more details see documentation
1624 in vectorizable_operation. */
1626 prev_stmt_info
= NULL
;
1627 for (j
= 0; j
< ncopies
; j
++)
1632 op
= TREE_OPERAND (operation
, 0);
1633 loop_vec_def0
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1634 if (op_type
== ternary_op
)
1636 op
= TREE_OPERAND (operation
, 1);
1637 loop_vec_def1
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1640 /* Get the vector def for the reduction variable from the phi node */
1641 reduc_def
= PHI_RESULT (new_phi
);
1645 enum vect_def_type dt
= vect_unknown_def_type
; /* Dummy */
1646 loop_vec_def0
= vect_get_vec_def_for_stmt_copy (dt
, loop_vec_def0
);
1647 if (op_type
== ternary_op
)
1648 loop_vec_def1
= vect_get_vec_def_for_stmt_copy (dt
, loop_vec_def1
);
1650 /* Get the vector def for the reduction variable from the vectorized
1651 reduction operation generated in the previous iteration (j-1) */
1652 reduc_def
= TREE_OPERAND (new_stmt
,0);
1655 /* Arguments are ready. create the new vector stmt. */
1657 if (op_type
== binary_op
)
1658 expr
= build2 (code
, vectype
, loop_vec_def0
, reduc_def
);
1660 expr
= build3 (code
, vectype
, loop_vec_def0
, loop_vec_def1
, reduc_def
);
1661 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, expr
);
1662 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1663 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1664 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1667 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
1669 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = *vec_stmt
= new_stmt
;
1670 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1673 /* Finalize the reduction-phi (set it's arguments) and create the
1674 epilog reduction code. */
1675 vect_create_epilog_for_reduction (new_temp
, stmt
, epilog_reduc_code
, new_phi
);
1681 /* Function vectorizable_assignment.
1683 Check if STMT performs an assignment (copy) that can be vectorized.
1684 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1685 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1686 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1689 vectorizable_assignment (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1695 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1696 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1697 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1700 enum vect_def_type dt
;
1701 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1702 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1704 gcc_assert (ncopies
>= 1);
1706 return false; /* FORNOW */
1708 /* Is vectorizable assignment? */
1709 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1712 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
1714 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1717 scalar_dest
= TREE_OPERAND (stmt
, 0);
1718 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
1721 op
= TREE_OPERAND (stmt
, 1);
1722 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
1724 if (vect_print_dump_info (REPORT_DETAILS
))
1725 fprintf (vect_dump
, "use not simple.");
1729 if (!vec_stmt
) /* transformation not required. */
1731 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
1736 if (vect_print_dump_info (REPORT_DETAILS
))
1737 fprintf (vect_dump
, "transform assignment.");
1740 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1743 op
= TREE_OPERAND (stmt
, 1);
1744 vec_oprnd
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1746 /* Arguments are ready. create the new vector stmt. */
1747 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, vec_oprnd
);
1748 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
1749 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
1750 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1756 /* Function vect_min_worthwhile_factor.
1758 For a loop where we could vectorize the operation indicated by CODE,
1759 return the minimum vectorization factor that makes it worthwhile
1760 to use generic vectors. */
1762 vect_min_worthwhile_factor (enum tree_code code
)
1783 /* Function vectorizable_operation.
1785 Check if STMT performs a binary or unary operation that can be vectorized.
1786 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1787 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1788 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1791 vectorizable_operation (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1796 tree op0
, op1
= NULL
;
1797 tree vec_oprnd0
=NULL
, vec_oprnd1
=NULL
;
1798 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1799 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1800 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1801 enum tree_code code
;
1802 enum machine_mode vec_mode
;
1807 enum machine_mode optab_op2_mode
;
1809 enum vect_def_type dt0
, dt1
;
1811 stmt_vec_info prev_stmt_info
;
1812 int nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
1815 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1818 gcc_assert (ncopies
>= 1);
1820 /* Is STMT a vectorizable binary/unary operation? */
1821 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1824 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
1826 if (STMT_VINFO_LIVE_P (stmt_info
))
1828 /* FORNOW: not yet supported. */
1829 if (vect_print_dump_info (REPORT_DETAILS
))
1830 fprintf (vect_dump
, "value used after loop.");
1834 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1837 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) != SSA_NAME
)
1840 scalar_dest
= TREE_OPERAND (stmt
, 0);
1841 vectype_out
= get_vectype_for_scalar_type (TREE_TYPE (scalar_dest
));
1842 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1843 if (nunits_out
!= nunits_in
)
1846 operation
= TREE_OPERAND (stmt
, 1);
1847 code
= TREE_CODE (operation
);
1848 optab
= optab_for_tree_code (code
, vectype
);
1850 /* Support only unary or binary operations. */
1851 op_type
= TREE_CODE_LENGTH (code
);
1852 if (op_type
!= unary_op
&& op_type
!= binary_op
)
1854 if (vect_print_dump_info (REPORT_DETAILS
))
1855 fprintf (vect_dump
, "num. args = %d (not unary/binary op).", op_type
);
1859 op0
= TREE_OPERAND (operation
, 0);
1860 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt0
))
1862 if (vect_print_dump_info (REPORT_DETAILS
))
1863 fprintf (vect_dump
, "use not simple.");
1867 if (op_type
== binary_op
)
1869 op1
= TREE_OPERAND (operation
, 1);
1870 if (!vect_is_simple_use (op1
, loop_vinfo
, &def_stmt
, &def
, &dt1
))
1872 if (vect_print_dump_info (REPORT_DETAILS
))
1873 fprintf (vect_dump
, "use not simple.");
1878 /* Supportable by target? */
1881 if (vect_print_dump_info (REPORT_DETAILS
))
1882 fprintf (vect_dump
, "no optab.");
1885 vec_mode
= TYPE_MODE (vectype
);
1886 icode
= (int) optab
->handlers
[(int) vec_mode
].insn_code
;
1887 if (icode
== CODE_FOR_nothing
)
1889 if (vect_print_dump_info (REPORT_DETAILS
))
1890 fprintf (vect_dump
, "op not supported by target.");
1891 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
1892 || LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1893 < vect_min_worthwhile_factor (code
))
1895 if (vect_print_dump_info (REPORT_DETAILS
))
1896 fprintf (vect_dump
, "proceeding using word mode.");
1899 /* Worthwhile without SIMD support? */
1900 if (!VECTOR_MODE_P (vec_mode
)
1901 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
1902 < vect_min_worthwhile_factor (code
))
1904 if (vect_print_dump_info (REPORT_DETAILS
))
1905 fprintf (vect_dump
, "not worthwhile without SIMD support.");
1909 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
)
1911 /* FORNOW: not yet supported. */
1912 if (!VECTOR_MODE_P (vec_mode
))
1915 /* Invariant argument is needed for a vector shift
1916 by a scalar shift operand. */
1917 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
1918 if (! (VECTOR_MODE_P (optab_op2_mode
)
1919 || dt1
== vect_constant_def
1920 || dt1
== vect_invariant_def
))
1922 if (vect_print_dump_info (REPORT_DETAILS
))
1923 fprintf (vect_dump
, "operand mode requires invariant argument.");
1928 if (!vec_stmt
) /* transformation not required. */
1930 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
1936 if (vect_print_dump_info (REPORT_DETAILS
))
1937 fprintf (vect_dump
, "transform binary/unary operation. ncopies = %d.",
1941 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1943 /* In case the vectorization factor (VF) is bigger than the number
1944 of elements that we can fit in a vectype (nunits), we have to generate
1945 more than one vector stmt - i.e - we need to "unroll" the
1946 vector stmt by a factor VF/nunits. In doing so, we record a pointer
1947 from one copy of the vector stmt to the next, in the field
1948 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
1949 stages to find the correct vector defs to be used when vectorizing
1950 stmts that use the defs of the current stmt. The example below illustrates
1951 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
1952 4 vectorized stmts):
1954 before vectorization:
1955 RELATED_STMT VEC_STMT
1959 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
1961 RELATED_STMT VEC_STMT
1962 VS1_0: vx0 = memref0 VS1_1 -
1963 VS1_1: vx1 = memref1 VS1_2 -
1964 VS1_2: vx2 = memref2 VS1_3 -
1965 VS1_3: vx3 = memref3 - -
1966 S1: x = load - VS1_0
1969 step2: vectorize stmt S2 (done here):
1970 To vectorize stmt S2 we first need to find the relevant vector
1971 def for the first operand 'x'. This is, as usual, obtained from
1972 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
1973 that defines 'x' (S1). This way we find the stmt VS1_0, and the
1974 relevant vector def 'vx0'. Having found 'vx0' we can generate
1975 the vector stmt VS2_0, and as usual, record it in the
1976 STMT_VINFO_VEC_STMT of stmt S2.
1977 When creating the second copy (VS2_1), we obtain the relevant vector
1978 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
1979 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
1980 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
1981 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
1982 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
1983 chain of stmts and pointers:
1984 RELATED_STMT VEC_STMT
1985 VS1_0: vx0 = memref0 VS1_1 -
1986 VS1_1: vx1 = memref1 VS1_2 -
1987 VS1_2: vx2 = memref2 VS1_3 -
1988 VS1_3: vx3 = memref3 - -
1989 S1: x = load - VS1_0
1990 VS2_0: vz0 = vx0 + v1 VS2_1 -
1991 VS2_1: vz1 = vx1 + v1 VS2_2 -
1992 VS2_2: vz2 = vx2 + v1 VS2_3 -
1993 VS2_3: vz3 = vx3 + v1 - -
1994 S2: z = x + 1 - VS2_0 */
1996 prev_stmt_info
= NULL
;
1997 for (j
= 0; j
< ncopies
; j
++)
2002 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2003 if (op_type
== binary_op
)
2005 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
)
2007 /* Vector shl and shr insn patterns can be defined with
2008 scalar operand 2 (shift operand). In this case, use
2009 constant or loop invariant op1 directly, without
2010 extending it to vector mode first. */
2011 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
2012 if (!VECTOR_MODE_P (optab_op2_mode
))
2014 if (vect_print_dump_info (REPORT_DETAILS
))
2015 fprintf (vect_dump
, "operand 1 using scalar mode.");
2020 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
2025 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt0
, vec_oprnd0
);
2026 if (op_type
== binary_op
)
2027 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt1
, vec_oprnd1
);
2030 /* Arguments are ready. create the new vector stmt. */
2032 if (op_type
== binary_op
)
2033 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
2034 build2 (code
, vectype
, vec_oprnd0
, vec_oprnd1
));
2036 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
2037 build1 (code
, vectype
, vec_oprnd0
));
2038 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2039 TREE_OPERAND (new_stmt
, 0) = new_temp
;
2040 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
2043 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2046 gcc_assert (prev_stmt_info
);
2047 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2049 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2052 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2057 /* Function vectorizable_type_demotion
2059 Check if STMT performs a binary or unary operation that involves
2060 type demotion, and if it can be vectorized.
2061 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2062 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2063 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2066 vectorizable_type_demotion (tree stmt
, block_stmt_iterator
*bsi
,
2073 tree vec_oprnd0
=NULL
, vec_oprnd1
=NULL
;
2074 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2075 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2076 enum tree_code code
;
2079 enum vect_def_type dt0
;
2081 stmt_vec_info prev_stmt_info
;
2091 enum machine_mode vec_mode
;
2093 /* Is STMT a vectorizable type-demotion operation? */
2095 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2098 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
2100 if (STMT_VINFO_LIVE_P (stmt_info
))
2102 /* FORNOW: not yet supported. */
2103 if (vect_print_dump_info (REPORT_DETAILS
))
2104 fprintf (vect_dump
, "value used after loop.");
2108 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
2111 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) != SSA_NAME
)
2114 operation
= TREE_OPERAND (stmt
, 1);
2115 code
= TREE_CODE (operation
);
2116 if (code
!= NOP_EXPR
&& code
!= CONVERT_EXPR
)
2119 op0
= TREE_OPERAND (operation
, 0);
2120 vectype_in
= get_vectype_for_scalar_type (TREE_TYPE (op0
));
2121 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2123 scalar_dest
= TREE_OPERAND (stmt
, 0);
2124 scalar_type
= TREE_TYPE (scalar_dest
);
2125 vectype_out
= get_vectype_for_scalar_type (scalar_type
);
2126 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2127 if (nunits_in
!= nunits_out
/2) /* FORNOW */
2130 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2131 gcc_assert (ncopies
>= 1);
2133 /* Check the operands of the operation. */
2134 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt0
))
2136 if (vect_print_dump_info (REPORT_DETAILS
))
2137 fprintf (vect_dump
, "use not simple.");
2141 /* Supportable by target? */
2142 code
= VEC_PACK_MOD_EXPR
;
2143 optab
= optab_for_tree_code (VEC_PACK_MOD_EXPR
, vectype_in
);
2147 vec_mode
= TYPE_MODE (vectype_in
);
2148 if (optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
2151 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
2153 if (!vec_stmt
) /* transformation not required. */
2155 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2161 if (vect_print_dump_info (REPORT_DETAILS
))
2162 fprintf (vect_dump
, "transform type demotion operation. ncopies = %d.",
2166 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2168 /* In case the vectorization factor (VF) is bigger than the number
2169 of elements that we can fit in a vectype (nunits), we have to generate
2170 more than one vector stmt - i.e - we need to "unroll" the
2171 vector stmt by a factor VF/nunits. */
2172 prev_stmt_info
= NULL
;
2173 for (j
= 0; j
< ncopies
; j
++)
2178 enum vect_def_type dt
= vect_unknown_def_type
; /* Dummy */
2179 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2180 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd0
);
2184 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt0
, vec_oprnd1
);
2185 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt0
, vec_oprnd0
);
2188 /* Arguments are ready. Create the new vector stmt. */
2189 expr
= build2 (code
, vectype_out
, vec_oprnd0
, vec_oprnd1
);
2190 new_stmt
= build2 (MODIFY_EXPR
, vectype_out
, vec_dest
, expr
);
2191 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2192 TREE_OPERAND (new_stmt
, 0) = new_temp
;
2193 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
2196 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2199 gcc_assert (prev_stmt_info
);
2200 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2203 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2206 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2211 /* Function vect_gen_widened_results_half
2213 Create a vector stmt whose code, type, number of arguments, and result
2214 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
2215 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2216 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2217 needs to be created (DECL is a function-decl of a target-builtin).
2218 STMT is the original scalar stmt that we are vectorizing. */
2221 vect_gen_widened_results_half (enum tree_code code
, tree vectype
, tree decl
,
2222 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
2223 tree vec_dest
, block_stmt_iterator
*bsi
,
2233 /* Generate half of the widened result: */
2234 if (code
== CALL_EXPR
)
2236 /* Target specific support */
2237 vec_params
= build_tree_list (NULL_TREE
, vec_oprnd0
);
2238 if (op_type
== binary_op
)
2239 vec_params
= tree_cons (NULL_TREE
, vec_oprnd1
, vec_params
);
2240 expr
= build_function_call_expr (decl
, vec_params
);
2244 /* Generic support */
2245 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
2246 if (op_type
== binary_op
)
2247 expr
= build2 (code
, vectype
, vec_oprnd0
, vec_oprnd1
);
2249 expr
= build1 (code
, vectype
, vec_oprnd0
);
2251 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, expr
);
2252 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2253 TREE_OPERAND (new_stmt
, 0) = new_temp
;
2254 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
2256 if (code
== CALL_EXPR
)
2258 FOR_EACH_SSA_TREE_OPERAND (sym
, new_stmt
, iter
, SSA_OP_ALL_VIRTUALS
)
2260 if (TREE_CODE (sym
) == SSA_NAME
)
2261 sym
= SSA_NAME_VAR (sym
);
2262 mark_sym_for_renaming (sym
);
2270 /* Function vectorizable_type_promotion
2272 Check if STMT performs a binary or unary operation that involves
2273 type promotion, and if it can be vectorized.
2274 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2275 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2276 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2279 vectorizable_type_promotion (tree stmt
, block_stmt_iterator
*bsi
,
2285 tree op0
, op1
= NULL
;
2286 tree vec_oprnd0
=NULL
, vec_oprnd1
=NULL
;
2287 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2288 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2289 enum tree_code code
, code1
= CODE_FOR_nothing
, code2
= CODE_FOR_nothing
;
2290 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2293 enum vect_def_type dt0
, dt1
;
2295 stmt_vec_info prev_stmt_info
;
2303 /* Is STMT a vectorizable type-promotion operation? */
2305 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2308 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
2310 if (STMT_VINFO_LIVE_P (stmt_info
))
2312 /* FORNOW: not yet supported. */
2313 if (vect_print_dump_info (REPORT_DETAILS
))
2314 fprintf (vect_dump
, "value used after loop.");
2318 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
2321 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) != SSA_NAME
)
2324 operation
= TREE_OPERAND (stmt
, 1);
2325 code
= TREE_CODE (operation
);
2326 if (code
!= NOP_EXPR
&& code
!= WIDEN_MULT_EXPR
)
2329 op0
= TREE_OPERAND (operation
, 0);
2330 vectype_in
= get_vectype_for_scalar_type (TREE_TYPE (op0
));
2331 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2332 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2333 gcc_assert (ncopies
>= 1);
2335 scalar_dest
= TREE_OPERAND (stmt
, 0);
2336 vectype_out
= get_vectype_for_scalar_type (TREE_TYPE (scalar_dest
));
2337 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2338 if (nunits_out
!= nunits_in
/2) /* FORNOW */
2341 /* Check the operands of the operation. */
2342 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt0
))
2344 if (vect_print_dump_info (REPORT_DETAILS
))
2345 fprintf (vect_dump
, "use not simple.");
2349 op_type
= TREE_CODE_LENGTH (code
);
2350 if (op_type
== binary_op
)
2352 op1
= TREE_OPERAND (operation
, 1);
2353 if (!vect_is_simple_use (op1
, loop_vinfo
, &def_stmt
, &def
, &dt1
))
2355 if (vect_print_dump_info (REPORT_DETAILS
))
2356 fprintf (vect_dump
, "use not simple.");
2361 /* Supportable by target? */
2362 if (!supportable_widening_operation (code
, stmt
, vectype_in
,
2363 &decl1
, &decl2
, &code1
, &code2
))
2366 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
2368 if (!vec_stmt
) /* transformation not required. */
2370 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2376 if (vect_print_dump_info (REPORT_DETAILS
))
2377 fprintf (vect_dump
, "transform type promotion operation. ncopies = %d.",
2381 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2383 /* In case the vectorization factor (VF) is bigger than the number
2384 of elements that we can fit in a vectype (nunits), we have to generate
2385 more than one vector stmt - i.e - we need to "unroll" the
2386 vector stmt by a factor VF/nunits. */
2387 prev_stmt_info
= NULL
;
2388 for (j
= 0; j
< ncopies
; j
++)
2393 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2394 if (op_type
== binary_op
)
2395 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
2399 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt0
, vec_oprnd0
);
2400 if (op_type
== binary_op
)
2401 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt1
, vec_oprnd1
);
2404 /* Arguments are ready. Create the new vector stmt. We are creating
2405 two vector defs because the widened results does not fit in one vector.
2406 The vectorized stmt can be expressed as a call to a taregt builtin,
2407 or a using a tree-code.
2410 /* Generate first half of the widened result: */
2411 new_stmt
= vect_gen_widened_results_half (code1
, vectype_out
, decl1
,
2412 vec_oprnd0
, vec_oprnd1
, op_type
, vec_dest
, bsi
, stmt
);
2414 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2416 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2417 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2419 /* Generate second half of the widened result: */
2420 new_stmt
= vect_gen_widened_results_half (code2
, vectype_out
, decl2
,
2421 vec_oprnd0
, vec_oprnd1
, op_type
, vec_dest
, bsi
, stmt
);
2422 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2423 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2426 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2431 /* Function vect_strided_store_supported.
2433 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2434 and FALSE otherwise.
2439 vect_strided_store_supported (tree vectype
)
2441 optab interleave_high_optab
, interleave_low_optab
;
2444 mode
= (int) TYPE_MODE (vectype
);
2446 /* Check that the operation is supported. */
2447 interleave_high_optab
= optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR
,
2449 interleave_low_optab
= optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR
,
2451 if (!interleave_high_optab
|| !interleave_low_optab
)
2453 if (vect_print_dump_info (REPORT_DETAILS
))
2454 fprintf (vect_dump
, "no optab for interleave.");
2457 if (interleave_high_optab
->handlers
[(int) mode
].insn_code
2459 || interleave_low_optab
->handlers
[(int) mode
].insn_code
2460 == CODE_FOR_nothing
)
2462 if (vect_print_dump_info (REPORT_DETAILS
))
2463 fprintf (vect_dump
, "interleave op not supported by target.");
2470 /* Function vect_permute_store_chain.
2472 Given a chain of interleaved strores in DR_CHAIN of LENGTH that must be
2473 a power of 2, generate interleave_high/low stmts to reorder the data
2474 correctly for the stores. Return the final references for stores in
2477 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2478 The input is 4 vectors each containg 8 elements. We assign a number to each
2479 element, the input sequence is:
2481 1st vec: 0 1 2 3 4 5 6 7
2482 2nd vec: 8 9 10 11 12 13 14 15
2483 3rd vec: 16 17 18 19 20 21 22 23
2484 4th vec: 24 25 26 27 28 29 30 31
2486 The output sequence should be:
2488 1st vec: 0 8 16 24 1 9 17 25
2489 2nd vec: 2 10 18 26 3 11 19 27
2490 3rd vec: 4 12 20 28 5 13 21 30
2491 4th vec: 6 14 22 30 7 15 23 31
2493 i.e., we interleave the contents of the four vectors in their order.
2495 We use interleave_high/low instructions to create such output. The input of
2496 each interleave_high/low operation is two vectors:
2499 the even elements of the result vector are obtained left-to-right from the
2500 high/low elements of the first vector. The odd elements of the result are
2501 obtained left-to-right from the high/low elements of the second vector.
2502 The output of interleave_high will be: 0 4 1 5
2503 and of interleave_low: 2 6 3 7
2506 The permutaion is done in log LENGTH stages. In each stage interleave_high
2507 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2508 where the first argument is taken from the first half of DR_CHAIN and the
2509 second argument from it's second half.
2512 I1: interleave_high (1st vec, 3rd vec)
2513 I2: interleave_low (1st vec, 3rd vec)
2514 I3: interleave_high (2nd vec, 4th vec)
2515 I4: interleave_low (2nd vec, 4th vec)
2517 The output for the first stage is:
2519 I1: 0 16 1 17 2 18 3 19
2520 I2: 4 20 5 21 6 22 7 23
2521 I3: 8 24 9 25 10 26 11 27
2522 I4: 12 28 13 29 14 30 15 31
2524 The output of the second stage, i.e. the final result is:
2526 I1: 0 8 16 24 1 9 17 25
2527 I2: 2 10 18 26 3 11 19 27
2528 I3: 4 12 20 28 5 13 21 30
2529 I4: 6 14 22 30 7 15 23 31
2534 vect_permute_store_chain (VEC(tree
,heap
) *dr_chain
,
2535 unsigned int length
,
2537 block_stmt_iterator
*bsi
,
2538 VEC(tree
,heap
) **result_chain
)
2540 tree perm_dest
, perm_stmt
, vect1
, vect2
, high
, low
;
2541 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
2545 VEC(tree
,heap
) *first
, *second
;
2547 scalar_dest
= TREE_OPERAND (stmt
, 0);
2548 first
= VEC_alloc (tree
, heap
, length
/2);
2549 second
= VEC_alloc (tree
, heap
, length
/2);
2551 /* Check that the operation is supported. */
2552 if (!vect_strided_store_supported (vectype
))
2555 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
2557 for (i
= 0; i
< exact_log2(length
); i
++)
2559 for (j
= 0; j
< length
/2; j
++)
2561 vect1
= VEC_index(tree
, dr_chain
, j
);
2562 vect2
= VEC_index(tree
, dr_chain
, j
+length
/2);
2564 /* high = interleave_high (vect1, vect2); */
2565 perm_dest
= create_tmp_var (vectype
, "vect_inter_high");
2566 add_referenced_tmp_var (perm_dest
);
2567 if (BYTES_BIG_ENDIAN
)
2568 perm_stmt
= build2 (MODIFY_EXPR
, vectype
, perm_dest
,
2569 build2 (VEC_INTERLEAVE_HIGH_EXPR
, vectype
, vect1
,
2572 perm_stmt
= build2 (MODIFY_EXPR
, vectype
, perm_dest
,
2573 build2 (VEC_INTERLEAVE_LOW_EXPR
, vectype
, vect1
,
2575 high
= make_ssa_name (perm_dest
, perm_stmt
);
2576 TREE_OPERAND (perm_stmt
, 0) = high
;
2577 vect_finish_stmt_generation (stmt
, perm_stmt
, bsi
);
2578 mark_new_vars_to_rename (perm_stmt
);
2579 VEC_replace (tree
, *result_chain
, 2*j
, high
);
2581 /* low = interleave_low (vect1, vect2); */
2582 perm_dest
= create_tmp_var (vectype
, "vect_inter_low");
2583 add_referenced_tmp_var (perm_dest
);
2584 if (BYTES_BIG_ENDIAN
)
2585 perm_stmt
= build2 (MODIFY_EXPR
, vectype
, perm_dest
,
2586 build2 (VEC_INTERLEAVE_LOW_EXPR
, vectype
, vect1
,
2589 perm_stmt
= build2 (MODIFY_EXPR
, vectype
, perm_dest
,
2590 build2 (VEC_INTERLEAVE_HIGH_EXPR
, vectype
, vect1
,
2592 low
= make_ssa_name (perm_dest
, perm_stmt
);
2593 TREE_OPERAND (perm_stmt
, 0) = low
;
2594 vect_finish_stmt_generation (stmt
, perm_stmt
, bsi
);
2595 mark_new_vars_to_rename (perm_stmt
);
2596 VEC_replace (tree
, *result_chain
, 2*j
+1, low
);
2598 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
2604 /* Function vectorizable_store.
2606 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2608 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2609 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2610 Return FALSE if not a vectorizable STMT, TRUE otherwise.
2614 vectorizable_store (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
2619 tree vec_oprnd
= NULL_TREE
;
2620 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2621 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
2622 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2623 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2624 enum machine_mode vec_mode
;
2626 enum dr_alignment_support alignment_support_cheme
;
2628 def_operand_p def_p
;
2630 enum vect_def_type dt
;
2631 stmt_vec_info prev_stmt_info
= NULL
;
2632 tree dataref_ptr
= NULL_TREE
;
2633 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2634 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2637 tree ptr_incr
, next_stmt
, first_stmt
;
2638 bool strided_store
= false;
2639 unsigned int group_size
, i
;
2640 VEC(tree
,heap
) *dr_chain
= NULL
, *oprnds
= NULL
, *result_chain
= NULL
;
2642 gcc_assert (ncopies
>= 1);
2644 /* Is vectorizable store? */
2646 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
2649 scalar_dest
= TREE_OPERAND (stmt
, 0);
2650 if (TREE_CODE (scalar_dest
) != ARRAY_REF
2651 && TREE_CODE (scalar_dest
) != INDIRECT_REF
2652 && !DR_GROUP_FIRST_DR (stmt_info
))
2655 op
= TREE_OPERAND (stmt
, 1);
2656 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
2658 if (vect_print_dump_info (REPORT_DETAILS
))
2659 fprintf (vect_dump
, "use not simple.");
2663 vec_mode
= TYPE_MODE (vectype
);
2664 /* FORNOW. In some cases can vectorize even if data-type not supported
2665 (e.g. - array initialization with 0). */
2666 if (mov_optab
->handlers
[(int)vec_mode
].insn_code
== CODE_FOR_nothing
)
2669 if (!STMT_VINFO_DATA_REF (stmt_info
))
2672 if (DR_GROUP_FIRST_DR (stmt_info
))
2674 strided_store
= true;
2675 if (!vect_strided_store_supported (vectype
))
2678 if (!vec_stmt
) /* transformation not required. */
2680 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
2686 if (vect_print_dump_info (REPORT_DETAILS
))
2687 fprintf (vect_dump
, "transform store. ncopies = %d",ncopies
);
2689 *vec_stmt
= NULL_TREE
;
2692 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
2693 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
2694 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
2696 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
2698 /* We vectorize all the stmts of the interleaving group when we
2699 reach the last stmt in the group. */
2700 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
2701 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
)))
2711 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
2712 oprnds
= VEC_alloc (tree
, heap
, group_size
);
2714 alignment_support_cheme
= vect_supportable_dr_alignment (first_dr
);
2715 gcc_assert (alignment_support_cheme
);
2716 gcc_assert (alignment_support_cheme
== dr_aligned
); /* FORNOW */
2718 /* In case the vectorization factor (VF) is bigger than the number
2719 of elements that we can fit in a vectype (nunits), we have to generate
2720 more than one vector stmt - i.e - we need to "unroll" the
2721 vector stmt by a factor VF/nunits. For more details see documentation in
2722 vectorizable_operation. */
2724 /* In case of interleaving (non-unit strided access):
2731 We create vectorized storess starting from base address (the access of the
2732 first stmt in the chain (S2 in the above example), when the last store stmt
2733 of the chain (S4) is reached:
2736 VS2: &base + vec_size*1 = vx0
2737 VS3: &base + vec_size*2 = vx1
2738 VS4: &base + vec_size*3 = vx3
2740 Then permutation statements are generated:
2742 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
2743 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
2746 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
2747 (the order of the data-refs in the output of vect_permute_store_chain
2748 corresponds to the order of scalar stmts in the interleaving chain - see
2749 the documentaion of vect_permute_store_chain()).
2751 In case of both multiple types and interleaving, above vector stores and
2752 permutation stmts are created for every copy. The result vector stmts are
2753 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
2754 STMT_VINFO_RELATED_STMT for the next copies.
2757 for (j
= 0; j
< ncopies
; j
++)
2761 /* Handle use - get the vectorized defs from the defining stmts. */
2762 /* For interleaved stores we collect vectorized defs for all the
2763 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then used
2764 as an input to vect_permute_store_chain(), and OPRNDS as an input
2765 to vect_get_vec_def_for_stmt_copy() for the next copy.
2766 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
2767 OPRNDS are of size 1.
2769 next_stmt
= first_stmt
;
2770 for (i
= 0; i
< group_size
; i
++)
2772 /* Since gaps are not supported for interleaved stores, GROUP_SIZE
2773 is the exact number of stmts in the chain. Therefore, NEXT_STMT
2774 can't be NULL_TREE. In case that there is no interleaving,
2775 GROUP_SIZE is 1, and only one iteration of the loop will be
2778 gcc_assert (next_stmt
);
2779 op
= TREE_OPERAND (next_stmt
, 1);
2780 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
, NULL
);
2781 VEC_quick_push(tree
, dr_chain
, vec_oprnd
);
2782 VEC_quick_push(tree
, oprnds
, vec_oprnd
);
2783 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
2785 /* Handle def - create new pointer. */
2786 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, bsi
, NULL_TREE
,
2787 &dummy
, &ptr_incr
, false);
2791 /* Handle use - get the vectorized defs from the defining stmts. */
2792 /* For interleaved stores we created vectorized defs for all the
2793 defs stored in OPRNDS in the previous iteration (previous copy).
2794 DR_CHAIN is then used as an input to vect_permute_store_chain(),
2795 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
2797 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
2798 OPRNDS are of size 1.
2800 for (i
= 0; i
< group_size
; i
++)
2802 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
,
2803 VEC_index (tree
, oprnds
, i
));
2804 VEC_replace(tree
, dr_chain
, i
, vec_oprnd
);
2805 VEC_replace(tree
, oprnds
, i
, vec_oprnd
);
2808 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, bsi
, stmt
);
2813 result_chain
= VEC_alloc (tree
, heap
, group_size
);
2815 if (!vect_permute_store_chain (dr_chain
, group_size
, stmt
, bsi
,
2820 next_stmt
= first_stmt
;
2821 for (i
= 0; i
< group_size
; i
++)
2823 /* For strided stores vectorized defs are interleaved in
2824 vect_permute_store_chain(). */
2826 vec_oprnd
= VEC_index(tree
, result_chain
, i
);
2828 data_ref
= build_fold_indirect_ref (dataref_ptr
);
2830 /* Arguments are ready. create the new vector stmt. */
2831 new_stmt
= build2 (MODIFY_EXPR
, vectype
, data_ref
, vec_oprnd
);
2832 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
2834 /* Set the V_MAY_DEFS for the vector pointer. If this virtual def has a
2835 use outside the loop and a loop peel is performed then the def may be
2836 renamed by the peel. Mark it for renaming so the later use will also
2838 copy_virtual_operands (new_stmt
, next_stmt
);
2841 /* The original store is deleted so the same SSA_NAMEs can be used.
2843 FOR_EACH_SSA_TREE_OPERAND (def
, next_stmt
, iter
, SSA_OP_VMAYDEF
)
2845 SSA_NAME_DEF_STMT (def
) = new_stmt
;
2846 mark_sym_for_renaming (SSA_NAME_VAR (def
));
2848 gcc_assert (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)));
2849 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)) = new_stmt
;
2853 /* Create new names for all the definitions created by COPY and
2854 add replacement mappings for each new name. */
2855 FOR_EACH_SSA_DEF_OPERAND (def_p
, new_stmt
, iter
, SSA_OP_VMAYDEF
)
2857 create_new_def_for (DEF_FROM_PTR (def_p
), new_stmt
, def_p
);
2858 mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p
)));
2860 gcc_assert (prev_stmt_info
);
2861 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2864 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2865 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
2868 /* Bump the vector pointer. */
2869 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, bsi
, stmt
);
2876 /* Function vect_strided_load_supported.
2878 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
2879 and FALSE otherwise.
2884 vect_strided_load_supported (tree vectype
)
2886 optab perm_even_optab
, perm_odd_optab
;
2889 mode
= (int) TYPE_MODE (vectype
);
2891 if (!targetm
.vectorize
.builtin_extract_even
2892 || !targetm
.vectorize
.builtin_extract_even (vectype
, NULL_TREE
,
2893 NULL_TREE
, NULL_TREE
))
2895 perm_even_optab
= optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR
,
2897 if (!perm_even_optab
)
2899 if (vect_print_dump_info (REPORT_DETAILS
))
2900 fprintf (vect_dump
, "no optab for perm_even.");
2903 if (perm_even_optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
2905 if (vect_print_dump_info (REPORT_DETAILS
))
2906 fprintf (vect_dump
, "perm_even op not supported by target.");
2910 if (!targetm
.vectorize
.builtin_extract_odd
2911 || !targetm
.vectorize
.builtin_extract_odd (vectype
, NULL_TREE
,
2912 NULL_TREE
, NULL_TREE
))
2914 perm_odd_optab
= optab_for_tree_code (VEC_EXTRACT_ODD_EXPR
, vectype
);
2915 if (!perm_odd_optab
)
2917 if (vect_print_dump_info (REPORT_DETAILS
))
2918 fprintf (vect_dump
, "no optab for perm_odd.");
2921 if (perm_odd_optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
2923 if (vect_print_dump_info (REPORT_DETAILS
))
2924 fprintf (vect_dump
, "perm_odd op not supported by target.");
2932 /* Function vect_permute_load_chain.
2934 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
2935 a power of 2, generate extract_even/odd stmts to reorder the input data
2936 correctly. Return the final references for loads in RESULT_CHAIN.
2938 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2939 The input is 4 vectors each containg 8 elements. We assign a number to each
2940 element, the input sequence is:
2942 1st vec: 0 1 2 3 4 5 6 7
2943 2nd vec: 8 9 10 11 12 13 14 15
2944 3rd vec: 16 17 18 19 20 21 22 23
2945 4th vec: 24 25 26 27 28 29 30 31
2947 The output sequence should be:
2949 1st vec: 0 4 8 12 16 20 24 28
2950 2nd vec: 1 5 9 13 17 21 25 29
2951 3rd vec: 2 6 10 14 18 22 26 30
2952 4th vec: 3 7 11 15 19 23 27 31
2954 i.e., the first output vector should contain the first elements of each
2955 interleaving group, etc.
2957 We use extract_even/odd instructions to create such output. The input of each
2958 extract_even/odd operation is two vectors
2962 and the output is the vector of extracted even/odd elements. The output of
2963 extract_even will be: 0 2 4 6
2964 and of extract_odd: 1 3 5 7
2967 The permutaion is done in log LENGTH stages. In each stage extract_even and
2968 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
2969 order. In our example,
2971 E1: extract_even (1st vec, 2nd vec)
2972 E2: extract_odd (1st vec, 2nd vec)
2973 E3: extract_even (3rd vec, 4th vec)
2974 E4: extract_odd (3rd vec, 4th vec)
2976 The output for the first stage will be:
2978 E1: 0 2 4 6 8 10 12 14
2979 E2: 1 3 5 7 9 11 13 15
2980 E3: 16 18 20 22 24 26 28 30
2981 E4: 17 19 21 23 25 27 29 31
2983 In order to proceed and create the correct sequence for the next stage (or
2984 for the correct output, if the second stage is the last one, as in our
2985 example), we first put the output of extract_even operation and then the
2986 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
2987 The input for the second stage is:
2989 1st vec (E1): 0 2 4 6 8 10 12 14
2990 2nd vec (E3): 16 18 20 22 24 26 28 30
2991 3rd vec (E2): 1 3 5 7 9 11 13 15
2992 4th vec (E4): 17 19 21 23 25 27 29 31
2994 The output of the second stage:
2996 E1: 0 4 8 12 16 20 24 28
2997 E2: 2 6 10 14 18 22 26 30
2998 E3: 1 5 9 13 17 21 25 29
2999 E4: 3 7 11 15 19 23 27 31
3001 And RESULT_CHAIN after reordering:
3003 1st vec (E1): 0 4 8 12 16 20 24 28
3004 2nd vec (E3): 1 5 9 13 17 21 25 29
3005 3rd vec (E2): 2 6 10 14 18 22 26 30
3006 4th vec (E4): 3 7 11 15 19 23 27 31
3010 vect_permute_load_chain (VEC(tree
,heap
) *dr_chain
,
3011 unsigned int length
,
3013 block_stmt_iterator
*bsi
,
3014 VEC(tree
,heap
) **result_chain
)
3016 tree perm_dest
, perm_stmt
, data_ref
, first_vect
, second_vect
;
3017 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
3021 use_operand_p use_p
;
3023 /* Check that the operation is supported. */
3024 if (!vect_strided_load_supported (vectype
))
3027 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
3028 for (i
= 0; i
< exact_log2(length
); i
++)
3030 for (j
= 0; j
< length
; j
+=2)
3032 first_vect
= VEC_index(tree
, dr_chain
, j
);
3033 second_vect
= VEC_index(tree
, dr_chain
, j
+1);
3035 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3036 perm_dest
= create_tmp_var (vectype
, "vect_perm_even");
3037 add_referenced_tmp_var (perm_dest
);
3039 if (targetm
.vectorize
.builtin_extract_even
3040 && targetm
.vectorize
.builtin_extract_even (vectype
,
3041 NULL_TREE
, NULL_TREE
, NULL_TREE
))
3042 perm_stmt
= targetm
.vectorize
.builtin_extract_even (perm_dest
,
3043 first_vect
, second_vect
, stmt
);
3045 perm_stmt
= build2 (MODIFY_EXPR
, vectype
, perm_dest
,
3046 build2 (VEC_EXTRACT_EVEN_EXPR
, vectype
,
3047 first_vect
, second_vect
));
3049 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
3050 TREE_OPERAND (perm_stmt
, 0) = data_ref
;
3051 vect_finish_stmt_generation (stmt
, perm_stmt
, bsi
);
3054 FOR_EACH_SSA_USE_OPERAND (use_p
, perm_stmt
, iter
,
3055 SSA_OP_VIRTUAL_USES
| SSA_OP_VIRTUAL_KILLS
)
3057 tree op
= USE_FROM_PTR (use_p
);
3058 if (TREE_CODE (op
) == SSA_NAME
)
3059 op
= SSA_NAME_VAR (op
);
3060 mark_sym_for_renaming (op
);
3062 VEC_replace (tree
, *result_chain
, j
/2, data_ref
);
3064 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3065 perm_dest
= create_tmp_var (vectype
, "vect_perm_odd");
3066 add_referenced_tmp_var (perm_dest
);
3068 if (targetm
.vectorize
.builtin_extract_odd
3069 && targetm
.vectorize
.builtin_extract_odd (vectype
,
3070 NULL_TREE
, NULL_TREE
, NULL_TREE
))
3071 perm_stmt
= targetm
.vectorize
.builtin_extract_odd (perm_dest
,
3072 first_vect
, second_vect
, stmt
);
3074 perm_stmt
= build2 (MODIFY_EXPR
, vectype
, perm_dest
,
3075 build2 (VEC_EXTRACT_ODD_EXPR
, vectype
,
3078 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
3079 TREE_OPERAND (perm_stmt
, 0) = data_ref
;
3080 vect_finish_stmt_generation (stmt
, perm_stmt
, bsi
);
3083 FOR_EACH_SSA_USE_OPERAND (use_p
, perm_stmt
, iter
,
3084 SSA_OP_VIRTUAL_USES
| SSA_OP_VIRTUAL_KILLS
)
3086 tree op
= USE_FROM_PTR (use_p
);
3087 if (TREE_CODE (op
) == SSA_NAME
)
3088 op
= SSA_NAME_VAR (op
);
3089 mark_sym_for_renaming (op
);
3092 VEC_replace (tree
, *result_chain
, j
/2+length
/2, data_ref
);
3094 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
3100 /* Function vect_transform_strided_load.
3102 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3103 to perform their permutation and ascribe the result vectorized statements to
3104 the scalar statements.
3108 vect_transform_strided_load (tree stmt
, VEC(tree
,heap
) *dr_chain
, int size
,
3109 block_stmt_iterator
*bsi
)
3111 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3112 tree first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
3113 tree next_stmt
, new_stmt
;
3114 VEC(tree
,heap
) *result_chain
= NULL
;
3115 unsigned int num
, i
;
3118 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3119 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3120 vectors, that are ready for vector computation. */
3121 result_chain
= VEC_alloc (tree
, heap
, size
);
3123 if (!vect_permute_load_chain (dr_chain
, size
, stmt
, bsi
, &result_chain
))
3126 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3127 Since we scan the chain starting from it's first node, their order
3128 corresponds the order of data-refs in RESULT_CHAIN. */
3129 next_stmt
= first_stmt
;
3131 for (i
= 0; VEC_iterate(tree
, result_chain
, i
, tmp_data_ref
); i
++)
3136 /* Skip the gaps. Loads created for the gaps will be removed by dead
3137 code elimination pass later.
3138 DR_GROUP_GAP is the number of steps in elements from the previous
3139 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3140 correspond to the gaps.
3142 if (num
< DR_GROUP_GAP (vinfo_for_stmt (next_stmt
)))
3150 new_stmt
= SSA_NAME_DEF_STMT (tmp_data_ref
);
3151 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3152 copies, and we put the new vector statement in the first available
3154 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)))
3155 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)) = new_stmt
;
3158 tree prev_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
));
3159 tree rel_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
));
3162 prev_stmt
= rel_stmt
;
3163 rel_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt
));
3165 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
)) = new_stmt
;
3168 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
3169 /* If NEXT_STMT accesses the same DR as the previous statement,
3170 put the same TMP_DATA_REF as its vectorized statement; otherwise
3171 get the next data-ref from RESULT_CHAIN. */
3172 if (!next_stmt
|| !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
3180 /* vectorizable_load.
3182 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3184 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3185 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3186 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3189 vectorizable_load (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
3192 tree vec_dest
= NULL
;
3193 tree data_ref
= NULL
;
3195 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3196 stmt_vec_info prev_stmt_info
;
3197 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
3198 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3201 tree new_stmt
= NULL_TREE
, first_stmt
;
3203 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3204 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3205 enum dr_alignment_support alignment_support_cheme
;
3206 tree dataref_ptr
= NULL_TREE
;
3208 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3209 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3210 int i
, j
, group_size
;
3211 tree msq
= NULL_TREE
, lsq
;
3212 tree offset
= NULL_TREE
;
3213 tree realignment_token
= NULL_TREE
;
3214 tree phi_stmt
= NULL_TREE
;
3215 VEC(tree
,heap
) *dr_chain
= NULL
;
3216 bool strided_load
= false;
3218 gcc_assert (ncopies
>= 1);
3220 /* Is vectorizable load? */
3221 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3224 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
3226 if (STMT_VINFO_LIVE_P (stmt_info
))
3228 /* FORNOW: not yet supported. */
3229 if (vect_print_dump_info (REPORT_DETAILS
))
3230 fprintf (vect_dump
, "value used after loop.");
3234 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
3237 scalar_dest
= TREE_OPERAND (stmt
, 0);
3238 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
3241 op
= TREE_OPERAND (stmt
, 1);
3242 if (TREE_CODE (op
) != ARRAY_REF
&& TREE_CODE (op
) != INDIRECT_REF
3243 && !DR_GROUP_FIRST_DR (stmt_info
))
3246 if (!STMT_VINFO_DATA_REF (stmt_info
))
3249 mode
= (int) TYPE_MODE (vectype
);
3251 /* FORNOW. In some cases can vectorize even if data-type not supported
3252 (e.g. - data copies). */
3253 if (mov_optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
3255 if (vect_print_dump_info (REPORT_DETAILS
))
3256 fprintf (vect_dump
, "Aligned load, but unsupported type.");
3260 /* Check if the load is a part of an interleaving chain. */
3261 if (DR_GROUP_FIRST_DR (stmt_info
))
3263 strided_load
= true;
3265 /* Check if interleaving is supported. */
3266 if (!vect_strided_load_supported (vectype
))
3270 if (!vec_stmt
) /* transformation not required. */
3272 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
3278 if (vect_print_dump_info (REPORT_DETAILS
))
3279 fprintf (vect_dump
, "transform load.");
3283 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
3284 /* Check if the chain of loads is already vectorized. */
3285 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
)))
3287 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3288 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3289 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
3298 alignment_support_cheme
= vect_supportable_dr_alignment (first_dr
);
3299 gcc_assert (alignment_support_cheme
);
3301 *vec_stmt
= NULL_TREE
;
3303 /* In case the vectorization factor (VF) is bigger than the number
3304 of elements that we can fit in a vectype (nunits), we have to generate
3305 more than one vector stmt - i.e - we need to "unroll" the
3306 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3307 from one copy of the vector stmt to the next, in the field
3308 STMT_VINFO_RELATED_STMT. This is necessary in oder to allow following
3309 stages to find the correct vector defs to be used when vectorizing
3310 stmts that use the defs of the current stmt. The example below illustrates
3311 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3312 4 vectorized stmts):
3314 before vectorization:
3315 RELATED_STMT VEC_STMT
3319 step 1: vectorize stmt S1:
3320 We first create the vector stmt VS1_0, and, as usual, record a
3321 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3322 Next, we create the vector stmt VS1_1, and record a pointer to
3323 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3324 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3326 RELATED_STMT VEC_STMT
3327 VS1_0: vx0 = memref0 VS1_1 -
3328 VS1_1: vx1 = memref1 VS1_2 -
3329 VS1_2: vx2 = memref2 VS1_3 -
3330 VS1_3: vx3 = memref3 - -
3331 S1: x = load - VS1_0
3334 See in documentation in vectorizable_operation for how the information
3335 we recorded in RELATED_STMT field is used to vectorize stmt S2. */
3337 /* In case of interleaving (non-unit strided access):
3344 We create vectorized loads starting from base address (the access of the
3345 first stmt in the chain (S2 in the above example):
3348 VS2: vx1 = &base + vec_size*1
3349 VS3: vx3 = &base + vec_size*2
3350 VS4: vx4 = &base + vec_size*3
3352 Then permutation statements are generated:
3354 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3355 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3358 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3359 (the order of the data-refs in the output of vect_permute_load_chain
3360 corresponds to the order of scalar stmts in the interleaving chain - see
3361 the documentaion of vect_permute_load_chain()).
3362 The generation of permutation stmts and recording them in
3363 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3365 In case of both multiple types and interleaving, above vector loads and
3366 permutation stmts are created for every copy. The result vector stmts are
3367 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3368 STMT_VINFO_RELATED_STMT for the next copies.
3372 if (alignment_support_cheme == dr_aligned
3373 || alignment_support_cheme == dr_unaligned_supported)
3379 p = p + indx * vectype_size;
3383 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
3387 >> msq_init = *(floor(p1))
3388 p2 = initial_addr + VS - 1;
3389 >> realignment_token = call target_builtin;
3392 p2 = p2 + indx * vectype_size
3394 >> vec_dest = realign_load (msq, lsq, realignment_token)
3401 if (alignment_support_cheme
== dr_unaligned_software_pipeline
)
3403 msq
= vect_setup_realignment (first_stmt
, bsi
, &realignment_token
);
3404 phi_stmt
= SSA_NAME_DEF_STMT (msq
);
3405 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
3408 prev_stmt_info
= NULL
;
3409 for (j
= 0; j
< ncopies
; j
++)
3411 /* 1. Create the vector pointer update chain. */
3413 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, bsi
, offset
,
3414 &dummy
, &ptr_incr
, false);
3416 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, bsi
, stmt
);
3418 for (i
= 0; i
< group_size
; i
++)
3420 /* 2. Create the vector-load in the loop. */
3421 switch (alignment_support_cheme
)
3424 gcc_assert (aligned_access_p (first_dr
));
3425 data_ref
= build_fold_indirect_ref (dataref_ptr
);
3427 case dr_unaligned_supported
:
3429 int mis
= DR_MISALIGNMENT (first_dr
);
3430 tree tmis
= (mis
== -1 ? size_zero_node
: size_int (mis
));
3432 gcc_assert (!aligned_access_p (first_dr
));
3433 tmis
= size_binop (MULT_EXPR
, tmis
, size_int(BITS_PER_UNIT
));
3435 build2 (MISALIGNED_INDIRECT_REF
, vectype
, dataref_ptr
, tmis
);
3438 case dr_unaligned_software_pipeline
:
3439 gcc_assert (!aligned_access_p (first_dr
));
3440 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, dataref_ptr
);
3445 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3446 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
3447 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3448 TREE_OPERAND (new_stmt
, 0) = new_temp
;
3449 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
3450 copy_virtual_operands (new_stmt
, stmt
);
3451 mark_new_vars_to_rename (new_stmt
);
3453 /* 3. Handle explicit realignment if necessary/supported. */
3454 if (alignment_support_cheme
== dr_unaligned_software_pipeline
)
3457 <vec_dest = realign_load (msq, lsq, realignment_token)> */
3458 lsq
= TREE_OPERAND (new_stmt
, 0);
3459 if (!realignment_token
)
3460 realignment_token
= dataref_ptr
;
3461 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3463 build3 (REALIGN_LOAD_EXPR
, vectype
, msq
, lsq
, realignment_token
);
3464 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, new_stmt
);
3465 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3466 TREE_OPERAND (new_stmt
, 0) = new_temp
;
3467 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
3468 if (i
== group_size
- 1)
3469 add_phi_arg (phi_stmt
, lsq
, loop_latch_edge (loop
));
3473 VEC_quick_push (tree
, dr_chain
, new_temp
);
3474 if (i
< group_size
- 1)
3475 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, bsi
, stmt
);
3479 if (!vect_transform_strided_load (stmt
, dr_chain
, group_size
, bsi
))
3481 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
3486 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3488 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3489 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3496 /* Function vect_get_epilog_def_for_operand
3498 STMT defines a value that is used after the loop. OP is an operand
3499 in STMT. This function returns a def that will be used in the
3500 "epilog-stmt" for STMT (see vectorizable_live_operation).
3502 In the case that OP is an SSA_NAME which is defined in the loop, then
3503 STMT_VINFO_EPILOG_STMT of the defining stmt holds the relevant def.
3505 In case OP is an invariant or constant, the same OP can be used.
3507 In case STMT is a phi-node that represents a simple induction-variable,
3508 a code that computes the value of the induction variable after the
3513 vect_get_epilog_def_for_operand (tree op
, tree stmt
,
3514 block_stmt_iterator
* bsi
)
3516 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3517 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3518 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3519 enum vect_def_type dt
;
3526 vect_is_simple_live_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
3528 gcc_assert (is_simple_use
);
3532 /* Case 1: operand is constant or invariant. */
3533 case vect_constant_def
:
3534 case vect_invariant_def
:
3537 /* Case 2: operand is defined inside the loop. */
3540 /* Get the def from the respective epilog stmt. */
3541 stmt_vec_info def_stmt_info
= vinfo_for_stmt (def_stmt
);
3542 epilog_stmt
= STMT_VINFO_EPILOG_STMT (def_stmt_info
);
3544 gcc_assert (epilog_stmt
);
3545 return TREE_OPERAND (epilog_stmt
, 0);
3548 /* Case 3: operand is defined by loop-header phi - induction. */
3549 case vect_induction_def
:
3551 tree niters
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
3552 tree type
= TREE_TYPE (niters
);
3554 gcc_assert (TREE_CODE (def_stmt
) == PHI_NODE
);
3555 niters
= build2 (MINUS_EXPR
, type
, niters
,
3556 fold_convert (type
, integer_one_node
));
3557 return advance_iv_by_n (loop
, def_stmt
, niters
, bsi
);
3566 /* Function vectorizable_live_operation.
3568 STMT computes a value that is used outside the loop. Check if it can
3569 be supported. "Vectorize" it by generating the appropriate (scalar)
3570 computation at the loop epilog. Record the newly generated epilog-code
3571 in STMT_VINFO_EPILOG_STMT. */
3574 vectorizable_live_operation (tree stmt
,
3575 block_stmt_iterator
*bsi
,
3579 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3580 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3581 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3583 enum tree_code code
;
3587 enum vect_def_type dt
;
3589 tree epilog_oprnd0
, epilog_oprnd1
= NULL_TREE
;
3590 imm_use_iterator imm_iter
;
3591 use_operand_p use_p
;
3595 tree dest_var
, dest_name
;
3598 if (!STMT_VINFO_LIVE_P (stmt_info
))
3601 if (TREE_CODE (stmt
) == PHI_NODE
)
3603 tree name
= PHI_RESULT (stmt
);
3605 if (!vect_is_simple_live_use (name
, loop_vinfo
, &def_stmt
, &def
, &dt
))
3607 if (dt
!= vect_induction_def
)
3612 gcc_assert (TREE_CODE (stmt
) == MODIFY_EXPR
);
3614 operation
= TREE_OPERAND (stmt
, 1);
3615 code
= TREE_CODE (operation
);
3616 op_type
= TREE_CODE_LENGTH (code
);
3618 if (op_type
!= unary_op
&& op_type
!= binary_op
)
3620 if (vect_print_dump_info (REPORT_DETAILS
))
3621 fprintf (vect_dump
, "num. args = %d (not unary/binary op).", op_type
);
3625 for (i
= 0; i
< op_type
; i
++)
3627 op
= TREE_OPERAND (operation
, i
);
3628 if (!vect_is_simple_live_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
3630 if (dt
!= vect_invariant_def
&& dt
!= vect_constant_def
3631 && dt
!= vect_induction_def
&& dt
!= vect_loop_def
)
3635 if (!epilog_stmt
) /* transformation not required. */
3640 if (vect_print_dump_info (REPORT_DETAILS
))
3641 fprintf (vect_dump
, "transform live operation.");
3644 op0
= TREE_OPERAND (operation
, 0);
3645 epilog_oprnd0
= vect_get_epilog_def_for_operand (op0
, stmt
, bsi
);
3646 if (vect_print_dump_info (REPORT_DETAILS
))
3647 print_generic_expr (vect_dump
, epilog_oprnd0
, TDF_SLIM
);
3648 if (op_type
== binary_op
)
3650 op1
= TREE_OPERAND (operation
, 1);
3651 epilog_oprnd1
= vect_get_epilog_def_for_operand (op1
, stmt
, bsi
);
3652 if (vect_print_dump_info (REPORT_DETAILS
))
3653 print_generic_expr (vect_dump
, epilog_oprnd1
, TDF_SLIM
);
3656 /* Arguments are ready, create the new vector stmt. */
3657 type
= TREE_TYPE (operation
);
3658 if (op_type
== unary_op
)
3659 expr
= build1 (code
, type
, epilog_oprnd0
);
3661 expr
= build2 (code
, type
, epilog_oprnd0
, epilog_oprnd1
);
3662 if (vect_print_dump_info (REPORT_DETAILS
))
3663 print_generic_expr (vect_dump
, expr
, TDF_SLIM
);
3665 dest_name
= TREE_OPERAND (stmt
, 0);
3666 dest_var
= vect_create_destination_var (dest_name
, type
);
3667 new_stmt
= build2 (MODIFY_EXPR
, type
, dest_var
, expr
);
3668 new_temp
= make_ssa_name (dest_var
, new_stmt
);
3669 TREE_OPERAND (new_stmt
, 0) = new_temp
;
3670 bsi_insert_before (bsi
, new_stmt
, BSI_SAME_STMT
);
3672 STMT_VINFO_EPILOG_STMT (stmt_info
) = new_stmt
;
3674 /* Find relevant loop-closed-exit-phi to update */
3676 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, dest_name
)
3678 if (!flow_bb_inside_loop_p (loop
, bb_for_stmt (USE_STMT (use_p
))))
3680 exit_phi
= USE_STMT (use_p
);
3687 /* Replace uses of the PHI_RESULT of exit_phi with the newly created
3689 tree orig_name
= PHI_RESULT (exit_phi
);
3691 FOR_EACH_IMM_USE_SAFE (use_p
, imm_iter
, orig_name
)
3692 SET_USE (use_p
, new_temp
);
3699 /* Function vect_is_simple_cond.
3702 LOOP - the loop that is being vectorized.
3703 COND - Condition that is checked for simple use.
3705 Returns whether a COND can be vectorized. Checks whether
3706 condition operands are supportable using vec_is_simple_use. */
3709 vect_is_simple_cond (tree cond
, loop_vec_info loop_vinfo
)
3713 enum vect_def_type dt
;
3715 if (!COMPARISON_CLASS_P (cond
))
3718 lhs
= TREE_OPERAND (cond
, 0);
3719 rhs
= TREE_OPERAND (cond
, 1);
3721 if (TREE_CODE (lhs
) == SSA_NAME
)
3723 tree lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
3724 if (!vect_is_simple_use (lhs
, loop_vinfo
, &lhs_def_stmt
, &def
, &dt
))
3727 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
)
3730 if (TREE_CODE (rhs
) == SSA_NAME
)
3732 tree rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
3733 if (!vect_is_simple_use (rhs
, loop_vinfo
, &rhs_def_stmt
, &def
, &dt
))
3736 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
)
3742 /* vectorizable_condition.
3744 Check if STMT is conditional modify expression that can be vectorized.
3745 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3746 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
3749 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3752 vectorizable_condition (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
3754 tree scalar_dest
= NULL_TREE
;
3755 tree vec_dest
= NULL_TREE
;
3756 tree op
= NULL_TREE
;
3757 tree cond_expr
, then_clause
, else_clause
;
3758 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3759 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3760 tree vec_cond_lhs
, vec_cond_rhs
, vec_then_clause
, vec_else_clause
;
3761 tree vec_compare
, vec_cond_expr
;
3763 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3764 enum machine_mode vec_mode
;
3766 enum vect_def_type dt
;
3767 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3768 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3770 gcc_assert (ncopies
>= 1);
3772 return false; /* FORNOW */
3774 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3777 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_loop_def
);
3779 if (STMT_VINFO_LIVE_P (stmt_info
))
3781 /* FORNOW: not yet supported. */
3782 if (vect_print_dump_info (REPORT_DETAILS
))
3783 fprintf (vect_dump
, "value used after loop.");
3787 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
3790 op
= TREE_OPERAND (stmt
, 1);
3792 if (TREE_CODE (op
) != COND_EXPR
)
3795 cond_expr
= TREE_OPERAND (op
, 0);
3796 then_clause
= TREE_OPERAND (op
, 1);
3797 else_clause
= TREE_OPERAND (op
, 2);
3799 if (!vect_is_simple_cond (cond_expr
, loop_vinfo
))
3802 if (TREE_CODE (then_clause
) == SSA_NAME
)
3804 tree then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
3805 if (!vect_is_simple_use (then_clause
, loop_vinfo
,
3806 &then_def_stmt
, &def
, &dt
))
3809 else if (TREE_CODE (then_clause
) != INTEGER_CST
3810 && TREE_CODE (then_clause
) != REAL_CST
)
3813 if (TREE_CODE (else_clause
) == SSA_NAME
)
3815 tree else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
3816 if (!vect_is_simple_use (else_clause
, loop_vinfo
,
3817 &else_def_stmt
, &def
, &dt
))
3820 else if (TREE_CODE (else_clause
) != INTEGER_CST
3821 && TREE_CODE (else_clause
) != REAL_CST
)
3825 vec_mode
= TYPE_MODE (vectype
);
3829 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
3830 return expand_vec_cond_expr_p (op
, vec_mode
);
3836 scalar_dest
= TREE_OPERAND (stmt
, 0);
3837 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3839 /* Handle cond expr. */
3841 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0), stmt
, NULL
);
3843 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1), stmt
, NULL
);
3844 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
, stmt
, NULL
);
3845 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
, stmt
, NULL
);
3847 /* Arguments are ready. create the new vector stmt. */
3848 vec_compare
= build2 (TREE_CODE (cond_expr
), vectype
,
3849 vec_cond_lhs
, vec_cond_rhs
);
3850 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
3851 vec_compare
, vec_then_clause
, vec_else_clause
);
3853 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, vec_cond_expr
);
3854 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
3855 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
3856 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
3862 /* Function vect_transform_stmt.
3864 Create a vectorized stmt to replace STMT, and insert it at BSI. */
3867 vect_transform_stmt (tree stmt
, block_stmt_iterator
*bsi
,
3868 block_stmt_iterator
*exit_bsi
, bool *interleaving
)
3870 bool is_store
= false;
3871 bool is_load
= false;
3872 tree vec_stmt
= NULL_TREE
;
3873 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3874 tree orig_stmt_in_pattern
;
3877 *interleaving
= false;
3879 if (STMT_VINFO_RELEVANT_P (stmt_info
))
3881 switch (STMT_VINFO_TYPE (stmt_info
))
3883 case type_demotion_vec_info_type
:
3884 done
= vectorizable_type_demotion (stmt
, bsi
, &vec_stmt
);
3888 case type_promotion_vec_info_type
:
3889 done
= vectorizable_type_promotion (stmt
, bsi
, &vec_stmt
);
3893 case op_vec_info_type
:
3894 done
= vectorizable_operation (stmt
, bsi
, &vec_stmt
);
3898 case assignment_vec_info_type
:
3899 done
= vectorizable_assignment (stmt
, bsi
, &vec_stmt
);
3903 case load_vec_info_type
:
3904 done
= vectorizable_load (stmt
, bsi
, &vec_stmt
);
3909 case store_vec_info_type
:
3910 done
= vectorizable_store (stmt
, bsi
, &vec_stmt
);
3913 if (DR_GROUP_FIRST_DR (stmt_info
))
3915 /* In case of interleaving, the whole chain is vectorized when the
3916 last store in the chain is reached. Store stmts before the last
3917 one are skipped, and there vec_stmt_info shoudn't be freed
3919 *interleaving
= true;
3920 if (STMT_VINFO_VEC_STMT (stmt_info
))
3927 case condition_vec_info_type
:
3928 done
= vectorizable_condition (stmt
, bsi
, &vec_stmt
);
3933 if (vect_print_dump_info (REPORT_DETAILS
))
3934 fprintf (vect_dump
, "stmt not supported.");
3938 /* If STMT was inserted by the vectorizer to replace a computation idiom
3939 (i.e. it is a "pattern stmt"), We need to record a pointer to VEC_STMT
3940 in the stmt_info of ORIG_STMT_IN_PATTERN (the stmt in the original
3941 sequence that computed this idiom). See more detail in the
3942 documentation of vect_pattern_recog. */
3944 gcc_assert (vec_stmt
|| is_load
|| is_store
3946 && !STMT_VINFO_VEC_STMT (stmt_info
)));
3949 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
3950 orig_stmt_in_pattern
= STMT_VINFO_RELATED_STMT (stmt_info
);
3951 if (orig_stmt_in_pattern
)
3953 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt_in_pattern
);
3955 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
3957 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
3958 STMT_VINFO_VEC_STMT (stmt_vinfo
) = vec_stmt
;
3966 if (STMT_VINFO_VEC_STMT (stmt_info
))
3968 tree next_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
3972 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (next_stmt
)))
3974 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
));
3976 switch (STMT_VINFO_TYPE (vinfo_for_stmt (next_stmt
)))
3978 case reduc_vec_info_type
:
3979 done
= vectorizable_reduction (next_stmt
, bsi
, &vec_stmt
);
3984 done
= vectorizable_live_operation (next_stmt
, exit_bsi
,
3989 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
3995 if (STMT_VINFO_LIVE_P (stmt_info
))
3997 switch (STMT_VINFO_TYPE (stmt_info
))
3999 case reduc_vec_info_type
:
4000 done
= vectorizable_reduction (stmt
, bsi
, &vec_stmt
);
4005 done
= vectorizable_live_operation (stmt
, exit_bsi
, &vec_stmt
);
4014 /* This function builds ni_name = number of iterations loop executes
4015 on the loop preheader. */
4018 vect_build_loop_niters (loop_vec_info loop_vinfo
)
4020 tree ni_name
, stmt
, var
;
4022 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4023 tree ni
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
4025 var
= create_tmp_var (TREE_TYPE (ni
), "niters");
4026 add_referenced_tmp_var (var
);
4027 ni_name
= force_gimple_operand (ni
, &stmt
, false, var
);
4029 pe
= loop_preheader_edge (loop
);
4032 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
4033 gcc_assert (!new_bb
);
4040 /* This function generates the following statements:
4042 ni_name = number of iterations loop executes
4043 ratio = ni_name / vf
4044 ratio_mult_vf_name = ratio * vf
4046 and places them at the loop preheader edge. */
4049 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo
,
4051 tree
*ratio_mult_vf_name_ptr
,
4052 tree
*ratio_name_ptr
)
4059 tree ratio_mult_vf_name
;
4060 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4061 tree ni
= LOOP_VINFO_NITERS (loop_vinfo
);
4062 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4065 pe
= loop_preheader_edge (loop
);
4067 /* Generate temporary variable that contains
4068 number of iterations loop executes. */
4070 ni_name
= vect_build_loop_niters (loop_vinfo
);
4071 log_vf
= build_int_cst (TREE_TYPE (ni
), exact_log2 (vf
));
4073 /* Create: ratio = ni >> log2(vf) */
4075 var
= create_tmp_var (TREE_TYPE (ni
), "bnd");
4076 add_referenced_tmp_var (var
);
4077 ratio_name
= make_ssa_name (var
, NULL_TREE
);
4078 stmt
= build2 (MODIFY_EXPR
, void_type_node
, ratio_name
,
4079 build2 (RSHIFT_EXPR
, TREE_TYPE (ni_name
), ni_name
, log_vf
));
4080 SSA_NAME_DEF_STMT (ratio_name
) = stmt
;
4082 pe
= loop_preheader_edge (loop
);
4083 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
4084 gcc_assert (!new_bb
);
4086 /* Create: ratio_mult_vf = ratio << log2 (vf). */
4088 var
= create_tmp_var (TREE_TYPE (ni
), "ratio_mult_vf");
4089 add_referenced_tmp_var (var
);
4090 ratio_mult_vf_name
= make_ssa_name (var
, NULL_TREE
);
4091 stmt
= build2 (MODIFY_EXPR
, void_type_node
, ratio_mult_vf_name
,
4092 build2 (LSHIFT_EXPR
, TREE_TYPE (ratio_name
), ratio_name
, log_vf
));
4093 SSA_NAME_DEF_STMT (ratio_mult_vf_name
) = stmt
;
4095 pe
= loop_preheader_edge (loop
);
4096 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
4097 gcc_assert (!new_bb
);
4099 *ni_name_ptr
= ni_name
;
4100 *ratio_mult_vf_name_ptr
= ratio_mult_vf_name
;
4101 *ratio_name_ptr
= ratio_name
;
4107 /* Function update_vuses_to_preheader.
4110 STMT - a statement with potential VUSEs.
4111 LOOP - the loop whose preheader will contain STMT.
4113 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
4114 appears to be defined in a V_MAY_DEF in another statement in a loop.
4115 One such case is when the VUSE is at the dereference of a __restricted__
4116 pointer in a load and the V_MAY_DEF is at the dereference of a different
4117 __restricted__ pointer in a store. Vectorization may result in
4118 copy_virtual_uses being called to copy the problematic VUSE to a new
4119 statement that is being inserted in the loop preheader. This procedure
4120 is called to change the SSA_NAME in the new statement's VUSE from the
4121 SSA_NAME updated in the loop to the related SSA_NAME available on the
4122 path entering the loop.
4124 When this function is called, we have the following situation:
4129 # name1 = phi < name0 , name2>
4134 # name2 = vdef <name1>
4139 Stmt S1 was created in the loop preheader block as part of misaligned-load
4140 handling. This function fixes the name of the vuse of S1 from 'name1' to
4144 update_vuses_to_preheader (tree stmt
, struct loop
*loop
)
4146 basic_block header_bb
= loop
->header
;
4147 edge preheader_e
= loop_preheader_edge (loop
);
4149 use_operand_p use_p
;
4151 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_VUSE
)
4153 tree ssa_name
= USE_FROM_PTR (use_p
);
4154 tree def_stmt
= SSA_NAME_DEF_STMT (ssa_name
);
4155 tree name_var
= SSA_NAME_VAR (ssa_name
);
4156 basic_block bb
= bb_for_stmt (def_stmt
);
4158 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
4159 if (!IS_EMPTY_STMT (def_stmt
)
4160 && flow_bb_inside_loop_p (loop
, bb
))
4162 /* If the block containing the statement defining the SSA_NAME
4163 is in the loop then it's necessary to find the definition
4164 outside the loop using the PHI nodes of the header. */
4166 bool updated
= false;
4168 for (phi
= phi_nodes (header_bb
); phi
; phi
= TREE_CHAIN (phi
))
4170 if (SSA_NAME_VAR (PHI_RESULT (phi
)) == name_var
)
4172 SET_USE (use_p
, PHI_ARG_DEF (phi
, preheader_e
->dest_idx
));
4177 gcc_assert (updated
);
4183 /* Function advance_iv_by_n
4185 LOOP: A loop that has an induction variable (IV) represented by IV_PHI.
4186 IV_PHI: A phi node (typically at the header-bb of LOOP)
4187 NITERS: number of iterations (steps) to advance the IV.
4189 This function advances the IV by NITERS, by generating the following
4190 code at the exit-bb of LOOP:
4191 new_iv = iv_initial_condition + iv_step * NITERS;
4195 advance_iv_by_n (struct loop
*loop
, tree iv_phi
, tree niters
, block_stmt_iterator
*bsi
)
4197 tree access_fn
= NULL
;
4198 tree evolution_part
;
4201 tree var
, stmt
, ni
, ni_name
;
4203 access_fn
= analyze_scalar_evolution (loop
, PHI_RESULT (iv_phi
));
4204 gcc_assert (access_fn
);
4205 if (vect_print_dump_info (REPORT_DETAILS
))
4207 fprintf (vect_dump
, "accesses funcion for phi: ");
4208 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
4212 unshare_expr (evolution_part_in_loop_num (access_fn
, loop
->num
));
4213 gcc_assert (evolution_part
!= NULL_TREE
);
4215 /* FORNOW: We do not support IVs whose evolution function is a polynomial
4216 of degree >= 2 or exponential. */
4217 gcc_assert (!tree_is_chrec (evolution_part
));
4219 step_expr
= evolution_part
;
4221 unshare_expr (initial_condition_in_loop_num (access_fn
, loop
->num
));
4222 ni
= build2 (PLUS_EXPR
, TREE_TYPE (init_expr
),
4223 build2 (MULT_EXPR
, TREE_TYPE (niters
), niters
, step_expr
), init_expr
);
4224 var
= create_tmp_var (TREE_TYPE (init_expr
), "tmp");
4225 add_referenced_tmp_var (var
);
4226 ni_name
= force_gimple_operand (ni
, &stmt
, false, var
);
4230 bsi_insert_before (bsi
, stmt
, BSI_SAME_STMT
);
4236 /* Function vect_update_ivs_after_vectorizer.
4238 "Advance" the induction variables of LOOP to the value they should take
4239 after the execution of LOOP. This is currently necessary because the
4240 vectorizer does not handle induction variables that are used after the
4241 loop. Such a situation occurs when the last iterations of LOOP are
4243 1. We introduced new uses after LOOP for IVs that were not originally used
4244 after LOOP: the IVs of LOOP are now used by an epilog loop.
4245 2. LOOP is going to be vectorized; this means that it will iterate N/VF
4246 times, whereas the loop IVs should be bumped N times.
4249 - LOOP - a loop that is going to be vectorized. The last few iterations
4250 of LOOP were peeled.
4251 - NITERS - the number of iterations that LOOP executes (before it is
4252 vectorized). i.e, the number of times the ivs should be bumped.
4253 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
4254 coming out from LOOP on which there are uses of the LOOP ivs
4255 (this is the path from LOOP->exit to epilog_loop->preheader).
4257 The new definitions of the ivs are placed in LOOP->exit.
4258 The phi args associated with the edge UPDATE_E in the bb
4259 UPDATE_E->dest are updated accordingly.
4261 Assumption 1: Like the rest of the vectorizer, this function assumes
4262 a single loop exit that has a single predecessor.
4264 Assumption 2: The phi nodes in the LOOP header and in update_bb are
4265 organized in the same order.
4267 Assumption 3: The access function of the ivs is simple enough (see
4268 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
4270 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
4271 coming out of LOOP on which the ivs of LOOP are used (this is the path
4272 that leads to the epilog loop; other paths skip the epilog loop). This
4273 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
4274 needs to have its phis updated.
4278 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo
, tree niters
,
4281 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4283 basic_block update_bb
= update_e
->dest
;
4284 basic_block exit_bb
= loop
->single_exit
->dest
;
4285 block_stmt_iterator bsi
= bsi_last (exit_bb
);
4287 /* Make sure there exists a single-predecessor exit bb: */
4288 gcc_assert (EDGE_COUNT (exit_bb
->preds
) == 1);
4290 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
4292 for (phi
= phi_nodes (loop
->header
), phi1
= phi_nodes (update_bb
);
4294 phi
= PHI_CHAIN (phi
), phi1
= PHI_CHAIN (phi1
))
4298 if (vect_print_dump_info (REPORT_DETAILS
))
4300 fprintf (vect_dump
, "vect_update_ivs_after_vectorizer: phi: ");
4301 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
4304 /* Skip virtual phi's. */
4305 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
4307 if (vect_print_dump_info (REPORT_DETAILS
))
4308 fprintf (vect_dump
, "virtual phi. skip.");
4312 /* Skip reduction phis. */
4313 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
4315 if (vect_print_dump_info (REPORT_DETAILS
))
4316 fprintf (vect_dump
, "reduc phi. skip.");
4321 ni_name
= advance_iv_by_n (loop
, phi
, niters
, &bsi
);
4323 /* Fix phi expressions in the successor bb. */
4324 SET_PHI_ARG_DEF (phi1
, update_e
->dest_idx
, ni_name
);
4329 /* Function vect_do_peeling_for_loop_bound
4331 Peel the last iterations of the loop represented by LOOP_VINFO.
4332 The peeled iterations form a new epilog loop. Given that the loop now
4333 iterates NITERS times, the new epilog loop iterates
4334 NITERS % VECTORIZATION_FACTOR times.
4336 The original loop will later be made to iterate
4337 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
4340 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo
, tree
*ratio
,
4341 struct loops
*loops
)
4343 tree ni_name
, ratio_mult_vf_name
;
4344 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4345 struct loop
*new_loop
;
4347 basic_block preheader
;
4350 if (vect_print_dump_info (REPORT_DETAILS
))
4351 fprintf (vect_dump
, "=== vect_do_peeling_for_loop_bound ===");
4353 initialize_original_copy_tables ();
4355 /* Generate the following variables on the preheader of original loop:
4357 ni_name = number of iteration the original loop executes
4358 ratio = ni_name / vf
4359 ratio_mult_vf_name = ratio * vf */
4360 vect_generate_tmps_on_preheader (loop_vinfo
, &ni_name
,
4361 &ratio_mult_vf_name
, ratio
);
4363 loop_num
= loop
->num
;
4364 new_loop
= slpeel_tree_peel_loop_to_edge (loop
, loops
, loop
->single_exit
,
4365 ratio_mult_vf_name
, ni_name
, false);
4366 gcc_assert (new_loop
);
4367 gcc_assert (loop_num
== loop
->num
);
4368 #ifdef ENABLE_CHECKING
4369 slpeel_verify_cfg_after_peeling (loop
, new_loop
);
4372 /* A guard that controls whether the new_loop is to be executed or skipped
4373 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
4374 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
4375 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
4376 is on the path where the LOOP IVs are used and need to be updated. */
4378 preheader
= loop_preheader_edge (new_loop
)->src
;
4379 if (EDGE_PRED (preheader
, 0)->src
== loop
->single_exit
->dest
)
4380 update_e
= EDGE_PRED (preheader
, 0);
4382 update_e
= EDGE_PRED (preheader
, 1);
4384 /* Update IVs of original loop as if they were advanced
4385 by ratio_mult_vf_name steps. */
4386 vect_update_ivs_after_vectorizer (loop_vinfo
, ratio_mult_vf_name
, update_e
);
4388 /* After peeling we have to reset scalar evolution analyzer. */
4391 free_original_copy_tables ();
4395 /* Function vect_gen_niters_for_prolog_loop
4397 Set the number of iterations for the loop represented by LOOP_VINFO
4398 to the minimum between LOOP_NITERS (the original iteration count of the loop)
4399 and the misalignment of DR - the data reference recorded in
4400 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
4401 this loop, the data reference DR will refer to an aligned location.
4403 The following computation is generated:
4405 If the misalignment of DR is known at compile time:
4406 addr_mis = int mis = DR_MISALIGNMENT (dr);
4407 Else, compute address misalignment in bytes:
4408 addr_mis = addr & (vectype_size - 1)
4410 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
4412 (elem_size = element type size; an element is the scalar element
4413 whose type is the inner type of the vectype)
4417 prolog_niters = min ( LOOP_NITERS ,
4418 (VF/inter_size - addr_mis/elem_size)&(VF/inter_size-1) )
4419 where inter_size is the size of the interleaved group.
4424 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo
, tree loop_niters
)
4426 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
4427 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4428 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4430 tree iters
, iters_name
;
4433 tree dr_stmt
= DR_STMT (dr
);
4434 stmt_vec_info stmt_info
= vinfo_for_stmt (dr_stmt
);
4435 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4436 int vectype_align
= TYPE_ALIGN (vectype
) / BITS_PER_UNIT
;
4437 tree niters_type
= TREE_TYPE (loop_niters
);
4438 int element_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
4441 if (DR_GROUP_FIRST_DR (stmt_info
))
4443 /* For interleaved access element size must be multipled by the size of
4444 the interleaved group. */
4445 inter_size
= DR_GROUP_SIZE (vinfo_for_stmt (
4446 DR_GROUP_FIRST_DR (stmt_info
)));
4447 element_size
*= inter_size
;
4450 pe
= loop_preheader_edge (loop
);
4452 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) > 0)
4454 int byte_misalign
= LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
);
4455 int elem_misalign
= byte_misalign
/ element_size
;
4457 if (vect_print_dump_info (REPORT_DETAILS
))
4458 fprintf (vect_dump
, "known alignment = %d.", byte_misalign
);
4460 iters
= build_int_cst (niters_type
,
4461 (vf
/inter_size
- elem_misalign
)&(vf
/inter_size
-1));
4465 tree new_stmts
= NULL_TREE
;
4467 vect_create_addr_base_for_vector_ref (dr_stmt
, &new_stmts
, NULL_TREE
);
4468 tree ptr_type
= TREE_TYPE (start_addr
);
4469 tree size
= TYPE_SIZE (ptr_type
);
4470 tree type
= lang_hooks
.types
.type_for_size (tree_low_cst (size
, 1), 1);
4471 tree vectype_size_minus_1
= build_int_cst (type
, vectype_align
- 1);
4472 tree elem_size_log
= build_int_cst (type
, exact_log2 (element_size
));
4473 tree vf_minus_1
= build_int_cst (type
, vf
- 1);
4474 tree vf_tree
= build_int_cst (type
, vf
);
4478 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmts
);
4479 gcc_assert (!new_bb
);
4481 /* Create: byte_misalign = addr & (vectype_size - 1) */
4483 build2 (BIT_AND_EXPR
, type
, start_addr
, vectype_size_minus_1
);
4485 /* Create: elem_misalign = byte_misalign / element_size */
4486 elem_misalign
= build2 (RSHIFT_EXPR
, type
, byte_misalign
, elem_size_log
);
4488 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
4489 iters
= build2 (MINUS_EXPR
, type
, vf_tree
, elem_misalign
);
4490 iters
= build2 (BIT_AND_EXPR
, type
, iters
, vf_minus_1
);
4491 iters
= fold_convert (niters_type
, iters
);
4494 /* Create: prolog_loop_niters = min (iters, loop_niters) */
4495 /* If the loop bound is known at compile time we already verified that it is
4496 greater than vf; since the misalignment ('iters') is at most vf, there's
4497 no need to generate the MIN_EXPR in this case. */
4498 if (TREE_CODE (loop_niters
) != INTEGER_CST
)
4499 iters
= build2 (MIN_EXPR
, niters_type
, iters
, loop_niters
);
4501 if (vect_print_dump_info (REPORT_DETAILS
))
4503 fprintf (vect_dump
, "niters for prolog loop: ");
4504 print_generic_expr (vect_dump
, iters
, TDF_SLIM
);
4507 var
= create_tmp_var (niters_type
, "prolog_loop_niters");
4508 add_referenced_tmp_var (var
);
4509 iters_name
= force_gimple_operand (iters
, &stmt
, false, var
);
4511 /* Insert stmt on loop preheader edge. */
4514 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
4515 gcc_assert (!new_bb
);
4522 /* Function vect_update_init_of_dr
4524 NITERS iterations were peeled from LOOP. DR represents a data reference
4525 in LOOP. This function updates the information recorded in DR to
4526 account for the fact that the first NITERS iterations had already been
4527 executed. Specifically, it updates the OFFSET field of DR. */
4530 vect_update_init_of_dr (struct data_reference
*dr
, tree niters
)
4532 tree offset
= DR_OFFSET (dr
);
4534 niters
= fold_build2 (MULT_EXPR
, TREE_TYPE (niters
), niters
, DR_STEP (dr
));
4535 offset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
, niters
);
4536 DR_OFFSET (dr
) = offset
;
4540 /* Function vect_update_inits_of_drs
4542 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
4543 This function updates the information recorded for the data references in
4544 the loop to account for the fact that the first NITERS iterations had
4545 already been executed. Specifically, it updates the initial_condition of the
4546 access_function of all the data_references in the loop. */
4549 vect_update_inits_of_drs (loop_vec_info loop_vinfo
, tree niters
)
4552 varray_type datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
4554 if (vect_dump
&& (dump_flags
& TDF_DETAILS
))
4555 fprintf (vect_dump
, "=== vect_update_inits_of_dr ===");
4557 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (datarefs
); i
++)
4559 struct data_reference
*dr
= VARRAY_GENERIC_PTR (datarefs
, i
);
4560 vect_update_init_of_dr (dr
, niters
);
4565 /* Function vect_do_peeling_for_alignment
4567 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
4568 'niters' is set to the misalignment of one of the data references in the
4569 loop, thereby forcing it to refer to an aligned location at the beginning
4570 of the execution of this loop. The data reference for which we are
4571 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
4574 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo
, struct loops
*loops
)
4576 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4577 tree niters_of_prolog_loop
, ni_name
;
4579 struct loop
*new_loop
;
4581 if (vect_print_dump_info (REPORT_DETAILS
))
4582 fprintf (vect_dump
, "=== vect_do_peeling_for_alignment ===");
4584 initialize_original_copy_tables ();
4586 ni_name
= vect_build_loop_niters (loop_vinfo
);
4587 niters_of_prolog_loop
= vect_gen_niters_for_prolog_loop (loop_vinfo
, ni_name
);
4589 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
4591 slpeel_tree_peel_loop_to_edge (loop
, loops
, loop_preheader_edge (loop
),
4592 niters_of_prolog_loop
, ni_name
, true);
4593 gcc_assert (new_loop
);
4594 #ifdef ENABLE_CHECKING
4595 slpeel_verify_cfg_after_peeling (new_loop
, loop
);
4598 /* Update number of times loop executes. */
4599 n_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
4600 LOOP_VINFO_NITERS (loop_vinfo
) = fold_build2 (MINUS_EXPR
,
4601 TREE_TYPE (n_iters
), n_iters
, niters_of_prolog_loop
);
4603 /* Update the init conditions of the access functions of all data refs. */
4604 vect_update_inits_of_drs (loop_vinfo
, niters_of_prolog_loop
);
4606 /* After peeling we have to reset scalar evolution analyzer. */
4609 free_original_copy_tables ();
4613 /* Function vect_create_cond_for_align_checks.
4615 Create a conditional expression that represents the alignment checks for
4616 all of data references (array element references) whose alignment must be
4620 LOOP_VINFO - two fields of the loop information are used.
4621 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
4622 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
4625 COND_EXPR_STMT_LIST - statements needed to construct the conditional
4627 The returned value is the conditional expression to be used in the if
4628 statement that controls which version of the loop gets executed at runtime.
4630 The algorithm makes two assumptions:
4631 1) The number of bytes "n" in a vector is a power of 2.
4632 2) An address "a" is aligned if a%n is zero and that this
4633 test can be done as a&(n-1) == 0. For example, for 16
4634 byte vectors the test is a&0xf == 0. */
4637 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo
,
4638 tree
*cond_expr_stmt_list
)
4640 VEC(tree
,heap
) *may_misalign_stmts
4641 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
4643 int mask
= LOOP_VINFO_PTR_MASK (loop_vinfo
);
4647 tree int_ptrsize_type
;
4649 tree or_tmp_name
= NULL_TREE
;
4650 tree and_tmp
, and_tmp_name
, and_stmt
;
4653 /* Check that mask is one less than a power of 2, i.e., mask is
4654 all zeros followed by all ones. */
4655 gcc_assert ((mask
!= 0) && ((mask
& (mask
+1)) == 0));
4657 /* CHECKME: what is the best integer or unsigned type to use to hold a
4658 cast from a pointer value? */
4659 psize
= TYPE_SIZE (ptr_type_node
);
4661 = lang_hooks
.types
.type_for_size (tree_low_cst (psize
, 1), 0);
4663 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
4664 of the first vector of the i'th data reference. */
4666 for (i
= 0; VEC_iterate (tree
, may_misalign_stmts
, i
, ref_stmt
); i
++)
4668 tree new_stmt_list
= NULL_TREE
;
4670 tree addr_tmp
, addr_tmp_name
, addr_stmt
;
4671 tree or_tmp
, new_or_tmp_name
, or_stmt
;
4673 /* create: addr_tmp = (int)(address_of_first_vector) */
4674 addr_base
= vect_create_addr_base_for_vector_ref (ref_stmt
,
4678 if (new_stmt_list
!= NULL_TREE
)
4679 append_to_statement_list_force (new_stmt_list
, cond_expr_stmt_list
);
4681 sprintf (tmp_name
, "%s%d", "addr2int", i
);
4682 addr_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
4683 add_referenced_tmp_var (addr_tmp
);
4684 addr_tmp_name
= make_ssa_name (addr_tmp
, NULL_TREE
);
4685 addr_stmt
= fold_convert (int_ptrsize_type
, addr_base
);
4686 addr_stmt
= build2 (MODIFY_EXPR
, void_type_node
,
4687 addr_tmp_name
, addr_stmt
);
4688 SSA_NAME_DEF_STMT (addr_tmp_name
) = addr_stmt
;
4689 append_to_statement_list_force (addr_stmt
, cond_expr_stmt_list
);
4691 /* The addresses are OR together. */
4693 if (or_tmp_name
!= NULL_TREE
)
4695 /* create: or_tmp = or_tmp | addr_tmp */
4696 sprintf (tmp_name
, "%s%d", "orptrs", i
);
4697 or_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
4698 add_referenced_tmp_var (or_tmp
);
4699 new_or_tmp_name
= make_ssa_name (or_tmp
, NULL_TREE
);
4700 or_stmt
= build2 (MODIFY_EXPR
, void_type_node
, new_or_tmp_name
,
4701 build2 (BIT_IOR_EXPR
, int_ptrsize_type
,
4704 SSA_NAME_DEF_STMT (new_or_tmp_name
) = or_stmt
;
4705 append_to_statement_list_force (or_stmt
, cond_expr_stmt_list
);
4706 or_tmp_name
= new_or_tmp_name
;
4709 or_tmp_name
= addr_tmp_name
;
4713 mask_cst
= build_int_cst (int_ptrsize_type
, mask
);
4715 /* create: and_tmp = or_tmp & mask */
4716 and_tmp
= create_tmp_var (int_ptrsize_type
, "andmask" );
4717 add_referenced_tmp_var (and_tmp
);
4718 and_tmp_name
= make_ssa_name (and_tmp
, NULL_TREE
);
4720 and_stmt
= build2 (MODIFY_EXPR
, void_type_node
,
4722 build2 (BIT_AND_EXPR
, int_ptrsize_type
,
4723 or_tmp_name
, mask_cst
));
4724 SSA_NAME_DEF_STMT (and_tmp_name
) = and_stmt
;
4725 append_to_statement_list_force (and_stmt
, cond_expr_stmt_list
);
4727 /* Make and_tmp the left operand of the conditional test against zero.
4728 if and_tmp has a non-zero bit then some address is unaligned. */
4729 ptrsize_zero
= build_int_cst (int_ptrsize_type
, 0);
4730 return build2 (EQ_EXPR
, boolean_type_node
,
4731 and_tmp_name
, ptrsize_zero
);
4735 /* Function vect_transform_loop.
4737 The analysis phase has determined that the loop is vectorizable.
4738 Vectorize the loop - created vectorized stmts to replace the scalar
4739 stmts in the loop, and update the loop exit condition. */
4742 vect_transform_loop (loop_vec_info loop_vinfo
,
4743 struct loops
*loops ATTRIBUTE_UNUSED
)
4745 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4746 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
4747 int nbbs
= loop
->num_nodes
;
4748 block_stmt_iterator si
;
4751 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4755 basic_block exit_bb
;
4756 block_stmt_iterator exit_si
;
4758 if (vect_print_dump_info (REPORT_DETAILS
))
4759 fprintf (vect_dump
, "=== vec_transform_loop ===");
4761 /* If the loop has data references that may or may not be aligned then
4762 two versions of the loop need to be generated, one which is vectorized
4763 and one which isn't. A test is then generated to control which of the
4764 loops is executed. The test checks for the alignment of all of the
4765 data references that may or may not be aligned. */
4767 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)))
4771 tree cond_expr_stmt_list
= NULL_TREE
;
4772 basic_block condition_bb
;
4773 block_stmt_iterator cond_exp_bsi
;
4774 basic_block merge_bb
;
4775 basic_block new_exit_bb
;
4777 tree orig_phi
, new_phi
, arg
;
4779 cond_expr
= vect_create_cond_for_align_checks (loop_vinfo
,
4780 &cond_expr_stmt_list
);
4781 initialize_original_copy_tables ();
4782 nloop
= loop_version (loops
, loop
, cond_expr
, &condition_bb
, true);
4783 free_original_copy_tables();
4785 /** Loop versioning violates an assumption we try to maintain during
4786 vectorization - that the loop exit block has a single predecessor.
4787 After versioning, the exit block of both loop versions is the same
4788 basic block (i.e. it has two predecessors). Just in order to simplify
4789 following transformations in the vectorizer, we fix this situation
4790 here by adding a new (empty) block on the exit-edge of the loop,
4791 with the proper loop-exit phis to maintain loop-closed-form. **/
4793 merge_bb
= loop
->single_exit
->dest
;
4794 gcc_assert (EDGE_COUNT (merge_bb
->preds
) == 2);
4795 new_exit_bb
= split_edge (loop
->single_exit
);
4796 add_bb_to_loop (new_exit_bb
, loop
->outer
);
4797 new_exit_e
= loop
->single_exit
;
4798 e
= EDGE_SUCC (new_exit_bb
, 0);
4800 for (orig_phi
= phi_nodes (merge_bb
); orig_phi
;
4801 orig_phi
= PHI_CHAIN (orig_phi
))
4803 new_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi
)),
4805 arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, e
);
4806 add_phi_arg (new_phi
, arg
, new_exit_e
);
4807 SET_PHI_ARG_DEF (orig_phi
, e
->dest_idx
, PHI_RESULT (new_phi
));
4810 /** end loop-exit-fixes after versioning **/
4812 update_ssa (TODO_update_ssa
);
4813 cond_exp_bsi
= bsi_last (condition_bb
);
4814 bsi_insert_before (&cond_exp_bsi
, cond_expr_stmt_list
, BSI_SAME_STMT
);
4817 /* CHECKME: we wouldn't need this if we calles update_ssa once
4819 bitmap_zero (vect_vnames_to_rename
);
4821 /* Peel the loop if there are data refs with unknown alignment.
4822 Only one data ref with unknown store is allowed. */
4824 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
4825 vect_do_peeling_for_alignment (loop_vinfo
, loops
);
4827 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
4828 compile time constant), or it is a constant that doesn't divide by the
4829 vectorization factor, then an epilog loop needs to be created.
4830 We therefore duplicate the loop: the original loop will be vectorized,
4831 and will compute the first (n/VF) iterations. The second copy of the loop
4832 will remain scalar and will compute the remaining (n%VF) iterations.
4833 (VF is the vectorization factor). */
4835 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
4836 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
4837 && LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0))
4838 vect_do_peeling_for_loop_bound (loop_vinfo
, &ratio
, loops
);
4840 ratio
= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
)),
4841 LOOP_VINFO_INT_NITERS (loop_vinfo
) / vectorization_factor
);
4843 /* 1) Make sure the loop header has exactly two entries
4844 2) Make sure we have a preheader basic block. */
4846 gcc_assert (EDGE_COUNT (loop
->header
->preds
) == 2);
4847 if (EDGE_COUNT (loop_preheader_edge (loop
)->src
->succs
) != 1)
4848 loop_split_edge_with (loop_preheader_edge (loop
), NULL
);
4850 exit_bb
= loop
->single_exit
->dest
;
4851 exit_si
= bsi_after_labels (exit_bb
);
4852 if (TREE_CODE (bsi_stmt (exit_si
)) == LABEL_EXPR
)
4853 bsi_next (&exit_si
);
4855 /* FORNOW: the vectorizer supports only loops which body consist
4856 of one basic block (header + empty latch). When the vectorizer will
4857 support more involved loop forms, the order by which the BBs are
4858 traversed need to be reconsidered. */
4860 for (i
= 0; i
< nbbs
; i
++)
4862 basic_block bb
= bbs
[i
];
4864 for (si
= bsi_start (bb
); !bsi_end_p (si
);)
4866 tree stmt
= bsi_stmt (si
);
4867 stmt_vec_info stmt_info
;
4870 if (vect_print_dump_info (REPORT_DETAILS
))
4872 fprintf (vect_dump
, "------>vectorizing statement: ");
4873 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
4875 stmt_info
= vinfo_for_stmt (stmt
);
4876 gcc_assert (stmt_info
);
4877 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
4878 && !STMT_VINFO_LIVE_P (stmt_info
))
4884 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
))
4885 != (unsigned HOST_WIDE_INT
) vectorization_factor
)
4886 && vect_print_dump_info (REPORT_DETAILS
))
4887 fprintf (vect_dump
, "multiple-types.");
4889 /* -------- vectorize statement ------------ */
4890 if (vect_print_dump_info (REPORT_DETAILS
))
4891 fprintf (vect_dump
, "transform statement.");
4893 interleaving
= false;
4894 is_store
= vect_transform_stmt (stmt
, &si
, &exit_si
, &interleaving
);
4899 if (DR_GROUP_FIRST_DR (stmt_info
))
4901 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
4902 interleaving chain was completed - free all the stores in
4904 tree next
= DR_GROUP_FIRST_DR (stmt_info
);
4906 stmt_vec_info next_stmt_info
;
4910 next_stmt_info
= vinfo_for_stmt (next
);
4911 /* Free the attached stmt_vec_info and remove the stmt. */
4912 ann
= stmt_ann (next
);
4913 tmp
= DR_GROUP_NEXT_DR (next_stmt_info
);
4914 free (next_stmt_info
);
4915 set_stmt_info ((tree_ann_t
)ann
, NULL
);
4923 /* Free the attached stmt_vec_info and remove the stmt. */
4924 ann
= stmt_ann (stmt
);
4926 set_stmt_info ((tree_ann_t
)ann
, NULL
);
4935 /* This is case of skipped interleaved store. We don't free
4936 its stmt_vec_info. */
4946 slpeel_make_loop_iterate_ntimes (loop
, ratio
);
4948 EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename
, 0, j
, bi
)
4949 mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j
)));
4951 /* The memory tags and pointers in vectorized statements need to
4952 have their SSA forms updated. FIXME, why can't this be delayed
4953 until all the loops have been transformed? */
4954 update_ssa (TODO_update_ssa
);
4956 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS
))
4957 fprintf (vect_dump
, "LOOP VECTORIZED.\n");