Remove old autovect-branch by moving to "dead" directory.
[official-gcc.git] / old-autovect-branch / gcc / tree-vect-transform.c
blobd7732ff49c87b3768f54f2ceefc6cdf1d9177b8f
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "recog.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
45 #include "toplev.h"
46 #include "real.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt
50 (tree, block_stmt_iterator *, block_stmt_iterator *, bool *);
51 static tree vect_create_destination_var (tree, tree);
52 static tree vect_create_data_ref_ptr
53 (tree, block_stmt_iterator *, tree, tree *, tree *, bool);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_setup_realignment (tree, block_stmt_iterator *, tree *);
56 static tree bump_vector_ptr (tree, tree, block_stmt_iterator *, tree);
57 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
58 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
59 static tree vect_get_epilog_def_for_operand (tree, tree, block_stmt_iterator *);
60 static tree vect_get_vec_def_for_stmt_copy (enum vect_def_type, tree);
61 static tree vect_init_vector (tree, tree);
62 static void vect_finish_stmt_generation
63 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
64 static void update_vuses_to_preheader (tree, struct loop*);
65 static bool vect_is_simple_cond (tree, loop_vec_info);
66 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
67 static tree get_initial_def_for_reduction (tree, tree, tree *);
68 static tree vect_gen_widened_results_half
69 (enum tree_code, tree, tree, tree, tree, int, tree, block_stmt_iterator *, tree);
70 static bool vect_permute_store_chain (VEC(tree,heap) *, unsigned int, tree,
71 block_stmt_iterator *, VEC(tree,heap) **);
72 static bool vect_transform_strided_load (tree, VEC(tree,heap) *, int,
73 block_stmt_iterator *);
74 static bool vect_permute_load_chain (VEC(tree,heap) *, unsigned int, tree,
75 block_stmt_iterator *,
76 VEC(tree,heap) **);
77 static bool vect_permute_store_chain (VEC(tree,heap) *, unsigned int, tree,
78 block_stmt_iterator *,
79 VEC(tree,heap) **);
80 static bool vect_strided_load_supported (tree);
81 static bool vect_strided_store_supported (tree);
83 /* Utility function dealing with loop peeling (not peeling itself). */
84 static void vect_generate_tmps_on_preheader
85 (loop_vec_info, tree *, tree *, tree *);
86 static tree vect_build_loop_niters (loop_vec_info);
87 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
88 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
89 static tree advance_iv_by_n (struct loop *, tree, tree, block_stmt_iterator *);
90 static void vect_update_init_of_dr (struct data_reference *, tree niters);
91 static void vect_update_inits_of_drs (loop_vec_info, tree);
92 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
93 static void vect_do_peeling_for_loop_bound
94 (loop_vec_info, tree *, struct loops *);
95 static int vect_min_worthwhile_factor (enum tree_code);
98 /* Function vect_get_new_vect_var.
100 Returns a name for a new variable. The current naming scheme appends the
101 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
102 the name of vectorizer generated variables, and appends that to NAME if
103 provided. */
105 static tree
106 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
108 const char *prefix;
109 tree new_vect_var;
111 switch (var_kind)
113 case vect_simple_var:
114 prefix = "vect_";
115 break;
116 case vect_scalar_var:
117 prefix = "stmp_";
118 break;
119 case vect_pointer_var:
120 prefix = "vect_p";
121 break;
122 default:
123 gcc_unreachable ();
126 if (name)
127 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
128 else
129 new_vect_var = create_tmp_var (type, prefix);
131 return new_vect_var;
135 /* Function vect_create_addr_base_for_vector_ref.
137 Create an expression that computes the address of the first memory location
138 that will be accessed for a data reference.
140 Input:
141 STMT: The statement containing the data reference.
142 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
143 OFFSET: Optional. If supplied, it is be added to the initial address.
145 Output:
146 1. Return an SSA_NAME whose value is the address of the memory location of
147 the first vector of the data reference.
148 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
149 these statement(s) which define the returned SSA_NAME.
151 FORNOW: We are only handling array accesses with step 1. */
153 static tree
154 vect_create_addr_base_for_vector_ref (tree stmt,
155 tree *new_stmt_list,
156 tree offset)
158 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
159 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
160 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
161 tree base_name = build_fold_indirect_ref (data_ref_base);
162 tree ref = DR_REF (dr);
163 tree scalar_type = TREE_TYPE (ref);
164 tree scalar_ptr_type = build_pointer_type (scalar_type);
165 tree vec_stmt;
166 tree new_temp;
167 tree addr_base, addr_expr;
168 tree dest, new_stmt;
169 tree base_offset = unshare_expr (DR_OFFSET (dr));
170 tree init = unshare_expr (DR_INIT (dr));
172 /* Create base_offset */
173 base_offset = size_binop (PLUS_EXPR, base_offset, init);
174 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
175 add_referenced_tmp_var (dest);
176 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
177 append_to_statement_list_force (new_stmt, new_stmt_list);
179 if (offset)
181 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
182 tree step;
184 /* For interleaved access step we divide STEP by the size of the
185 interleaving group. */
186 if (DR_GROUP_SIZE (stmt_info))
187 step = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (offset), DR_STEP (dr),
188 build_int_cst (TREE_TYPE (offset),
189 DR_GROUP_SIZE (stmt_info)));
190 else
191 step = DR_STEP (dr);
193 add_referenced_tmp_var (tmp);
194 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
195 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
196 base_offset, offset);
197 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
198 append_to_statement_list_force (new_stmt, new_stmt_list);
201 /* base + base_offset */
202 addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
203 base_offset);
205 /* addr_expr = addr_base */
206 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
207 get_name (base_name));
208 add_referenced_tmp_var (addr_expr);
209 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
210 new_temp = make_ssa_name (addr_expr, vec_stmt);
211 TREE_OPERAND (vec_stmt, 0) = new_temp;
212 append_to_statement_list_force (vec_stmt, new_stmt_list);
214 if (vect_print_dump_info (REPORT_DETAILS))
216 fprintf (vect_dump, "created ");
217 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
219 return new_temp;
223 /* Function vect_create_data_ref_ptr.
225 Create a memory reference expression for vector access, to be used in a
226 vector load/store stmt. The reference is based on a new pointer to vector
227 type (vp).
229 Input:
230 1. STMT: a stmt that references memory. Expected to be of the form
231 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
232 2. BSI: block_stmt_iterator where new stmts can be added.
233 3. OFFSET (optional): an offset to be added to the initial address accessed
234 by the data-ref in STMT.
235 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
236 pointing to the initial address.
238 Output:
239 1. Declare a new ptr to vector_type, and have it point to the base of the
240 data reference (initial addressed accessed by the data reference).
241 For example, for vector of type V8HI, the following code is generated:
243 v8hi *vp;
244 vp = (v8hi *)initial_address;
246 if OFFSET is not supplied:
247 initial_address = &a[init];
248 if OFFSET is supplied:
249 initial_address = &a[init + OFFSET];
251 Return the initial_address in INITIAL_ADDRESS.
253 If the access in STMT is an interleaved access, create the above pointer
254 for all the data-refs in the interleaving group and return them in DRS,
255 if DRS is not NULL.
257 2. If ONLY_INIT is true, return the initial pointer. Otherwise, create
258 a data-reference in the loop based on the new vector pointer vp. This
259 new data reference will by some means be updated each iteration of
260 the loop. Return the pointer vp'. */
262 static tree
263 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
264 tree offset, tree *initial_address, tree *ptr_incr,
265 bool only_init)
267 tree base_name;
268 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
269 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
270 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
271 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272 tree vect_ptr_type;
273 tree vect_ptr;
274 tree tag;
275 tree new_temp;
276 tree vec_stmt;
277 tree new_stmt_list = NULL_TREE;
278 edge pe = loop_preheader_edge (loop);
279 basic_block new_bb;
280 tree vect_ptr_init;
281 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
283 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
285 if (vect_print_dump_info (REPORT_DETAILS))
287 tree data_ref_base = base_name;
288 fprintf (vect_dump, "create vector-pointer variable to type: ");
289 print_generic_expr (vect_dump, vectype, TDF_SLIM);
290 if (TREE_CODE (data_ref_base) == VAR_DECL)
291 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
292 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
293 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
294 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
295 fprintf (vect_dump, " vectorizing a record based array ref: ");
296 else if (TREE_CODE (data_ref_base) == SSA_NAME)
297 fprintf (vect_dump, " vectorizing a pointer ref: ");
298 print_generic_expr (vect_dump, base_name, TDF_SLIM);
301 /** (1) Create the new vector-pointer variable: **/
303 vect_ptr_type = build_pointer_type (vectype);
304 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
305 get_name (base_name));
306 add_referenced_tmp_var (vect_ptr);
309 /** (2) Add aliasing information to the new vector-pointer:
310 (The points-to info (DR_PTR_INFO) may be defined later.) **/
312 tag = DR_MEMTAG (dr);
313 gcc_assert (tag);
315 /* If tag is a variable (and NOT_A_TAG) than a new type alias
316 tag must be created with tag added to its may alias list. */
317 if (!MTAG_P (tag))
318 new_type_alias (vect_ptr, tag);
319 else
320 var_ann (vect_ptr)->type_mem_tag = tag;
322 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
324 /** (3) Calculate the initial address the vector-pointer, and set
325 the vector-pointer to point to it before the loop: **/
327 /* Create: (&(base[init_val+offset]) in the loop preheader. */
328 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
329 offset);
330 pe = loop_preheader_edge (loop);
331 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
332 gcc_assert (!new_bb);
333 *initial_address = new_temp;
335 /* Create: p = (vectype *) initial_base */
336 vec_stmt = fold_convert (vect_ptr_type, new_temp);
337 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
338 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
339 TREE_OPERAND (vec_stmt, 0) = vect_ptr_init;
340 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
341 gcc_assert (!new_bb);
343 /** (4) Handle the updating of the vector-pointer inside the loop: **/
345 if (only_init) /* No update in loop is required. */
347 /* Copy the points-to information if it exists. */
348 if (DR_PTR_INFO (dr))
349 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
350 return vect_ptr_init;
352 else
354 block_stmt_iterator incr_bsi;
355 bool insert_after;
356 tree indx_before_incr, indx_after_incr;
357 tree incr;
359 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
361 create_iv (vect_ptr_init,
362 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
363 NULL_TREE, loop, &incr_bsi, insert_after,
364 &indx_before_incr, &indx_after_incr);
365 incr = bsi_stmt (incr_bsi);
366 set_stmt_info ((tree_ann_t) stmt_ann (incr),
367 new_stmt_vec_info (incr, loop_vinfo));
369 /* Copy the points-to information if it exists. */
370 if (DR_PTR_INFO (dr))
372 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
373 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
375 merge_alias_info (vect_ptr_init, indx_before_incr);
376 merge_alias_info (vect_ptr_init, indx_after_incr);
377 if (ptr_incr)
378 *ptr_incr = incr;
379 return indx_before_incr;
384 /* Function bump_vector_ptr
386 Increment a pointer (to a vector type) by vector-size. Connect the new
387 increment stmt to the exising def-use update-chain of the pointer.
389 The pointer def-use update-chain before this function:
390 DATAREF_PTR = phi (p_0, p_2)
391 ....
392 PTR_INCR: p_2 = DATAREF_PTR + step
394 The pointer def-use update-chain after this function:
395 DATAREF_PTR = phi (p_0, p_2)
396 ....
397 NEW_DATAREF_PTR = DATAREF_PTR + vector_size
398 ....
399 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
401 Input:
402 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
403 in the loop.
404 PTR_INCR - the stmt that updates the pointer in each iteration of the loop.
405 The increment amount across iteration is also expected to be
406 vector_size.
407 BSI - location where the new update stmt is to be placed.
408 STMT - the original scalar memory-access stmt that is being vectorized.
410 Output: Return NEW_DATAREF_PTR as illustrated above.
414 static tree
415 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
416 tree stmt)
418 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
419 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
420 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
421 tree vptr_type = TREE_TYPE (dataref_ptr);
422 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
423 tree update = fold_convert (vptr_type, TYPE_SIZE_UNIT (vectype));
424 tree incr_stmt;
425 ssa_op_iter iter;
426 use_operand_p use_p;
427 tree new_dataref_ptr;
429 incr_stmt = build2 (MODIFY_EXPR, vptr_type, ptr_var,
430 build2 (PLUS_EXPR, vptr_type, dataref_ptr, update));
431 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
432 TREE_OPERAND (incr_stmt, 0) = new_dataref_ptr;
433 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
435 /* Update the vector-pointer's cross-iteration increment. */
436 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
438 tree use = USE_FROM_PTR (use_p);
440 if (use == dataref_ptr)
441 SET_USE (use_p, new_dataref_ptr);
442 else
443 gcc_assert (tree_int_cst_compare (use, update) == 0);
446 /* Copy the points-to information if it exists. */
447 if (DR_PTR_INFO (dr))
448 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
449 merge_alias_info (new_dataref_ptr, dataref_ptr); /* CHECKME */
451 return new_dataref_ptr;
455 /* Function vect_setup_realignment
457 This function is called when vectorizing an unaligned load using
458 the dr_unaligned_software_pipeline scheme.
459 This function generates the following code at the loop prolog:
461 p = initial_addr;
462 msq_init = *(floor(p)); # prolog load
463 realignment_token = call target_builtin;
464 loop:
465 msq = phi (msq_init, ---)
467 The code above sets up a new (vector) pointer, pointing to the first
468 location accessed by STMT, and a "floor-aligned" load using that pointer.
469 It also generates code to compute the "realignment-token" (if the relevant
470 target hook was defined), and creates a phi-node at the loop-header bb
471 whose arguments are the result of the prolog-load (created by this
472 function) and the result of a load that takes place in the loop (to be
473 created by the caller to this function).
474 The caller to this function uses the phi-result (msq) to create the
475 realignment code inside the loop, and sets up the missing phi argument,
476 as follows:
478 loop:
479 msq = phi (msq_init, lsq)
480 lsq = *(floor(p')); # load in loop
481 result = realign_load (msq, lsq, realignment_token);
483 Input:
484 STMT - (scalar) load stmt to be vectorized. This load accesses
485 a memory location that may be unaligned.
486 BSI - place where new code is to be inserted.
488 Output:
489 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
490 target hook, if defined.
491 Return value - the result of the loop-header phi node.
494 static tree
495 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
496 tree *realignment_token)
498 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
499 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
500 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
501 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
502 edge pe = loop_preheader_edge (loop);
503 tree scalar_dest = TREE_OPERAND (stmt, 0);
504 tree vec_dest;
505 tree init_addr;
506 tree ptr;
507 tree data_ref;
508 tree new_stmt;
509 basic_block new_bb;
510 tree msq_init;
511 tree new_temp;
512 tree phi_stmt;
513 tree msq;
515 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
516 vec_dest = vect_create_destination_var (scalar_dest, vectype);
517 ptr = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &init_addr, NULL, true);
518 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
519 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
520 new_temp = make_ssa_name (vec_dest, new_stmt);
521 TREE_OPERAND (new_stmt, 0) = new_temp;
522 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
523 gcc_assert (!new_bb);
524 msq_init = TREE_OPERAND (new_stmt, 0);
525 copy_virtual_operands (new_stmt, stmt);
526 update_vuses_to_preheader (new_stmt, loop);
528 /* <2> */
529 if (targetm.vectorize.builtin_mask_for_load)
531 /* Create permutation mask, if required, in loop preheader. */
532 tree builtin_decl;
533 tree params = build_tree_list (NULL_TREE, init_addr);
535 vec_dest = vect_create_destination_var (scalar_dest, vectype);
536 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
537 new_stmt = build_function_call_expr (builtin_decl, params);
538 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
539 new_temp = make_ssa_name (vec_dest, new_stmt);
540 TREE_OPERAND (new_stmt, 0) = new_temp;
541 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
542 gcc_assert (!new_bb);
543 *realignment_token = TREE_OPERAND (new_stmt, 0);
545 /* The result of the CALL_EXPR to this builtin is determined from
546 the value of the parameter and no global variables are touched
547 which makes the builtin a "const" function. Requiring the
548 builtin to have the "const" attribute makes it unnecessary
549 to call mark_call_clobbered. */
550 gcc_assert (TREE_READONLY (builtin_decl));
553 /* <3> Create msq = phi <msq_init, lsq> in loop */
554 vec_dest = vect_create_destination_var (scalar_dest, vectype);
555 msq = make_ssa_name (vec_dest, NULL_TREE);
556 phi_stmt = create_phi_node (msq, loop->header);
557 SSA_NAME_DEF_STMT (msq) = phi_stmt;
558 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
560 return msq;
564 /* Function vect_create_destination_var.
566 Create a new temporary of type VECTYPE. */
568 static tree
569 vect_create_destination_var (tree scalar_dest, tree vectype)
571 tree vec_dest;
572 const char *new_name;
573 tree type;
574 enum vect_var_kind kind;
576 kind = vectype ? vect_simple_var : vect_scalar_var;
577 type = vectype ? vectype : TREE_TYPE (scalar_dest);
579 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
581 new_name = get_name (scalar_dest);
582 if (!new_name)
583 new_name = "var_";
584 vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
585 add_referenced_tmp_var (vec_dest);
587 return vec_dest;
591 /* Function vect_init_vector.
593 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
594 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
595 used in the vectorization of STMT. */
597 static tree
598 vect_init_vector (tree stmt, tree vector_var)
600 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
601 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
602 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
603 tree new_var;
604 tree init_stmt;
605 tree vectype = TREE_TYPE (vector_var);
606 tree vec_oprnd;
607 edge pe;
608 tree new_temp;
609 basic_block new_bb;
611 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
612 add_referenced_tmp_var (new_var);
614 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
615 new_temp = make_ssa_name (new_var, init_stmt);
616 TREE_OPERAND (init_stmt, 0) = new_temp;
618 pe = loop_preheader_edge (loop);
619 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
620 gcc_assert (!new_bb);
622 if (vect_print_dump_info (REPORT_DETAILS))
624 fprintf (vect_dump, "created new init_stmt: ");
625 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
628 vec_oprnd = TREE_OPERAND (init_stmt, 0);
629 return vec_oprnd;
633 /* Function vect_get_vec_def_for_operand.
635 OP is an operand in STMT. This function returns a (vector) def that will be
636 used in the vectorized stmt for STMT.
638 In the case that OP is an SSA_NAME which is defined in the loop, then
639 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
641 In case OP is an invariant or constant, a new stmt that creates a vector def
642 needs to be introduced. */
644 static tree
645 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
647 tree vec_oprnd;
648 tree vec_stmt;
649 tree def_stmt;
650 stmt_vec_info def_stmt_info = NULL;
651 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
652 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
653 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
654 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
655 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
656 tree vec_inv;
657 tree vec_cst;
658 tree t = NULL_TREE;
659 tree def;
660 int i;
661 enum vect_def_type dt;
662 bool is_simple_use;
664 if (vect_print_dump_info (REPORT_DETAILS))
666 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
667 print_generic_expr (vect_dump, op, TDF_SLIM);
670 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
671 gcc_assert (is_simple_use);
672 if (vect_print_dump_info (REPORT_DETAILS))
674 if (def)
676 fprintf (vect_dump, "def = ");
677 print_generic_expr (vect_dump, def, TDF_SLIM);
679 if (def_stmt)
681 fprintf (vect_dump, " def_stmt = ");
682 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
685 switch (dt)
687 /* Case 1: operand is a constant. */
688 case vect_constant_def:
690 if (scalar_def)
691 *scalar_def = op;
693 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
694 if (vect_print_dump_info (REPORT_DETAILS))
695 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
697 for (i = nunits - 1; i >= 0; --i)
699 t = tree_cons (NULL_TREE, op, t);
701 vec_cst = build_vector (vectype, t);
702 return vect_init_vector (stmt, vec_cst);
705 /* Case 2: operand is defined outside the loop - loop invariant. */
706 case vect_invariant_def:
708 if (scalar_def)
709 *scalar_def = def;
711 /* Create 'vec_inv = {inv,inv,..,inv}' */
712 if (vect_print_dump_info (REPORT_DETAILS))
713 fprintf (vect_dump, "Create vector_inv.");
715 for (i = nunits - 1; i >= 0; --i)
717 t = tree_cons (NULL_TREE, def, t);
720 /* FIXME: use build_constructor directly. */
721 vec_inv = build_constructor_from_list (vectype, t);
722 return vect_init_vector (stmt, vec_inv);
725 /* Case 3: operand is defined inside the loop. */
726 case vect_loop_def:
728 if (scalar_def)
729 *scalar_def = def_stmt;
731 /* Get the def from the vectorized stmt. */
732 def_stmt_info = vinfo_for_stmt (def_stmt);
733 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
734 gcc_assert (vec_stmt);
735 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
736 return vec_oprnd;
739 /* Case 4: operand is defined by a loop header phi - reduction */
740 case vect_reduction_def:
742 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
744 /* Get the def before the loop */
745 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
746 return get_initial_def_for_reduction (stmt, op, scalar_def);
749 /* Case 5: operand is defined by loop-header phi - induction. */
750 case vect_induction_def:
752 if (vect_print_dump_info (REPORT_DETAILS))
753 fprintf (vect_dump, "induction - unsupported.");
754 internal_error ("no support for induction"); /* FORNOW */
757 default:
758 gcc_unreachable ();
763 /* Function vect_get_vec_def_for_stmt_copy
765 Return a vector-def for an operand. This function is used when the
766 vectorized stmt to be created (by the caller to this function) is a "copy"
767 created in case the vectorized result cannot fit in one vector, and several
768 copies of the vector-stmt are required. In this case the vector-def is
769 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
770 of the stmt that defines VEC_OPRND.
771 DT is the type of the vector def VEC_OPRND.
773 Context:
774 In case the vectorization factor (VF) is bigger than the number
775 of elements that can fit in a vectype (nunits), we have to generate
776 more than one vector stmt to vectorize the scalar stmt. This situation
777 arises when there are multiple data-types operated upon in the loop; the
778 smallest data-type determines the VF, and as a result, when vectorizing
779 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
780 vector stmt (each computing a vector of 'nunits' results, and together
781 computing 'VF' results in each iteration). This function is called when
782 vectorizing such a stmt (e.g. vectorizing S2 in the illusration below):
784 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
786 S1: x = load VS1.0: vx.0 = memref0 VS1.1
787 VS1.1: vx.1 = memref1 VS1.2
788 VS1.2: vx.2 = memref2 VS1.3
789 VS1.3: vx.3 = memref3
791 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
792 VSnew.1: vz1 = vx.1 + ... VSnew.2
793 VSnew.2: vz2 = vx.2 + ... VSnew.3
794 VSnew.3: vz3 = vx.3 + ...
796 To create the first vector-stmt (out of the 'VF/nunits' copies) -
797 denoted VSnew.0 - the function 'vect_get_vec_def_for_operand' is called to
798 get the relevant vector-def for each operand (e.g. 'vx.0').
799 To create the remaining copies of the vector-stmt (VSnew.j), this
800 function is called to get the relevant vector-def for each operand. It is
801 obtained from the respective VS1.j stmt, which is recorded in the
802 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
803 For example, to obtain the vector-def 'vx.1' in order to create the
804 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
805 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
806 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
807 and return its def ('vx.1').
808 Overall to create the above sequence this function will be called 3 times:
809 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
810 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
811 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
813 static tree
814 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
816 tree vec_stmt_for_operand;
817 stmt_vec_info def_stmt_info;
819 if (dt == vect_invariant_def || dt == vect_constant_def)
821 /* Do nothing; can reuse same def. */ ;
822 return vec_oprnd;
825 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
826 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
827 gcc_assert (def_stmt_info);
828 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
829 gcc_assert (vec_stmt_for_operand);
830 vec_oprnd = TREE_OPERAND (vec_stmt_for_operand, 0);
832 return vec_oprnd;
836 /* Function vect_finish_stmt_generation.
838 Insert a new stmt. */
840 static void
841 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
842 block_stmt_iterator *bsi)
844 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
845 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
847 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
848 set_stmt_info (get_tree_ann (vec_stmt),
849 new_stmt_vec_info (vec_stmt, loop_vinfo));
851 if (vect_print_dump_info (REPORT_DETAILS))
853 fprintf (vect_dump, "add new stmt: ");
854 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
857 /* Make sure bsi points to the stmt that is being vectorized. */
858 gcc_assert (stmt == bsi_stmt (*bsi));
860 #ifdef USE_MAPPED_LOCATION
861 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
862 #else
863 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
864 #endif
868 #define ADJUST_IN_EPILOG 1
870 /* Function get_initial_def_for_reduction
872 Input:
873 STMT - a stmt that performs a reduction operation in the loop.
874 INIT_VAL - the initial value of the reduction variable
876 Output:
877 SCALAR_DEF - a tree that holds a value to be added to the final result
878 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
879 Return a vector variable, initialized according to the operation that STMT
880 performs. This vector will be used as the initial value of the
881 vector of partial results.
883 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
884 add: [0,0,...,0,0]
885 mult: [1,1,...,1,1]
886 min/max: [init_val,init_val,..,init_val,init_val]
887 bit and/or: [init_val,init_val,..,init_val,init_val]
888 and when necessary (e.g. add/mult case) let the caller know
889 that it needs to adjust the result by init_val.
891 Option2: Initialize the vector as follows:
892 add: [0,0,...,0,init_val]
893 mult: [1,1,...,1,init_val]
894 min/max: [init_val,init_val,...,init_val]
895 bit and/or: [init_val,init_val,...,init_val]
896 and no adjustments are needed.
898 For example, for the following code:
900 s = init_val;
901 for (i=0;i<n;i++)
902 s = s + a[i];
904 STMT is 's = s + a[i]', and the reduction variable is 's'.
905 For a vector of 4 units, we want to return either [0,0,0,init_val],
906 or [0,0,0,0] and let the caller know that it needs to adjust
907 the result at the end by 'init_val'.
909 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
910 TODO: Use some cost-model to estimate which scheme is more profitable.
913 static tree
914 get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
916 tree vectype =
917 get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (stmt ,0)));
918 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
919 int nelements;
920 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
921 tree type = TREE_TYPE (init_val);
922 tree def;
923 tree vec, t = NULL_TREE;
924 bool need_epilog_adjust;
925 int i;
927 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
929 switch (code)
931 case PLUS_EXPR:
932 case WIDEN_SUM_EXPR:
933 case SAD_EXPR:
934 case DOT_PROD_EXPR:
935 if (INTEGRAL_TYPE_P (type))
936 def = build_int_cst (type, 0);
937 else
938 def = build_real (type, dconst0);
940 #ifdef ADJUST_IN_EPILOG
941 /* All the 'nunits' elements are set to 0. The final result will be
942 adjusted by 'init_val' at the loop epilog. */
943 nelements = nunits;
944 need_epilog_adjust = true;
945 #else
946 /* 'nunits - 1' elements are set to 0; The last element is set to
947 'init_val'. No further adjustments at the epilog are needed. */
948 nelements = nunits - 1;
949 need_epilog_adjust = false;
950 #endif
951 break;
953 case MIN_EXPR:
954 case MAX_EXPR:
955 def = init_val;
956 nelements = nunits;
957 need_epilog_adjust = false;
958 break;
960 default:
961 gcc_unreachable ();
964 for (i = nelements - 1; i >= 0; --i)
965 t = tree_cons (NULL_TREE, def, t);
967 if (nelements == nunits - 1)
969 /* Set the last element of the vector. */
970 t = tree_cons (NULL_TREE, init_val, t);
971 nelements += 1;
973 gcc_assert (nelements == nunits);
975 if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
976 vec = build_vector (vectype, t);
977 else
978 vec = build_constructor_from_list (vectype, t);
980 if (!need_epilog_adjust)
981 *scalar_def = NULL_TREE;
982 else
983 *scalar_def = init_val;
985 return vect_init_vector (stmt, vec);
989 /* Function vect_create_epilog_for_reduction
991 Create code at the loop-epilog to finalize the result of a reduction
992 computation.
994 VECT_DEF is a vector of partial results.
995 REDUC_CODE is the tree-code for the epilog reduction.
996 STMT is the scalar reduction stmt that is being vectorized.
997 REDUCTION_PHI is the phi-node that carries the reduction computation.
999 This function:
1000 1. Creates the reduction def-use cycle: sets the the arguments for
1001 REDUCTION_PHI:
1002 The loop-entry argument is the vectorized initial-value of the reduction.
1003 The loop-latch argument is VECT_DEF - the vector of partial sums.
1004 2. "Reduces" the vector of partial results VECT_DEF into a single result,
1005 by applying the operation specified by REDUC_CODE if available, or by
1006 other means (whole-vector shifts or a scalar loop).
1007 The function also creates a new phi node at the loop exit to preserve
1008 loop-closed form, as illustrated below.
1010 The flow at the entry to this function:
1012 loop:
1013 vec_def = phi <null, null> # REDUCTION_PHI
1014 VECT_DEF = vector_stmt # vectorized form of STMT
1015 s_loop = scalar_stmt # (scalar) STMT
1016 loop_exit:
1017 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1018 use <s_out0>
1019 use <s_out0>
1021 The above is transformed by this function into:
1023 loop:
1024 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
1025 VECT_DEF = vector_stmt # vectorized form of STMT
1026 s_loop = scalar_stmt # (scalar) STMT
1027 loop_exit:
1028 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1029 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1030 v_out2 = reduce <v_out1>
1031 s_out3 = extract_field <v_out2, 0>
1032 s_out4 = adjust_result <s_out3>
1033 use <s_out4>
1034 use <s_out4>
1037 static void
1038 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
1039 enum tree_code reduc_code, tree reduction_phi)
1041 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1042 tree vectype;
1043 enum machine_mode mode;
1044 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1045 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1046 basic_block exit_bb;
1047 tree scalar_dest;
1048 tree scalar_type;
1049 tree new_phi;
1050 block_stmt_iterator exit_bsi;
1051 tree vec_dest;
1052 tree new_temp;
1053 tree new_name;
1054 tree epilog_stmt;
1055 tree new_scalar_dest, exit_phi;
1056 tree bitsize, bitpos, bytesize;
1057 enum tree_code code;
1058 tree scalar_initial_def;
1059 tree vec_initial_def;
1060 tree orig_name;
1061 imm_use_iterator imm_iter;
1062 use_operand_p use_p;
1063 bool extract_scalar_result;
1064 tree reduction_op;
1065 tree orig_stmt;
1066 tree operation = TREE_OPERAND (stmt, 1);
1067 int op_type;
1069 op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
1070 reduction_op = TREE_OPERAND (operation, op_type-1);
1071 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
1072 mode = TYPE_MODE (vectype);
1074 /*** 1. Create the reduction def-use cycle. ***/
1076 /* 1.1 set the loop-entry arg of the reduction-phi: */
1077 /* For the case of reduction, vect_get_vec_def_for_operand returns
1078 the scalar def before the loop, that defines the initial value
1079 of the reduction variable. */
1080 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
1081 &scalar_initial_def);
1082 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
1084 /* 1.2 set the loop-latch arg for the reduction-phi: */
1085 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
1087 if (vect_print_dump_info (REPORT_DETAILS))
1089 fprintf (vect_dump, "transform reduction: created def-use cycle:");
1090 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
1091 fprintf (vect_dump, "\n");
1092 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
1096 /*** 2. Create the reduction epilog code.
1097 The reduction epilog code operates across the elements of the vector
1098 of partial results computed by the vectorized loop.
1099 The reduction epilog code consists of:
1100 step 1: compute the scalar result in a vector (v_out2)
1101 step 2: extract the scalar result (s_out3) from the vector (v_out2)
1102 step 3: adjust the scalar result (s_out3) if needed.
1104 Step 1 can be accomplished using one the following three schemes:
1105 (scheme 1) using reduc_code, if available.
1106 (scheme 2) using whole-vector shifts, if available.
1107 (scheme 3) using a scalar loop. In this case steps 1+2 above are
1108 combined.
1110 The overall epilog code looks like this:
1112 s_out0 = phi <s_loop> # original EXIT_PHI
1113 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1114 v_out2 = reduce <v_out1> # step 1
1115 s_out3 = extract_field <v_out2, 0> # step 2
1116 s_out4 = adjust_result <s_out3> # step 3
1118 (step 3 is optional, and step2 1 and 2 may be combined).
1119 Lastly, the uses of s_out0 are replaced by s_out4.
1121 ***/
1123 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
1124 v_out1 = phi <v_loop> */
1126 exit_bb = loop->single_exit->dest;
1127 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
1128 SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
1129 exit_bsi = bsi_start (exit_bb);
1131 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
1132 (i.e. when reduc_code is not available) and in the final adjusment code
1133 (if needed). Also get the original scalar reduction variable as
1134 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
1135 represents a reduction pattern), the tree-code and scalar-def are
1136 taken from the original stmt that the pattern-stmt (STMT) replaces.
1137 Otherwise (it is a regular reduction) - the tree-code and scalar-def
1138 are taken from STMT. */
1140 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1141 if (!orig_stmt)
1143 /* Regular reduction */
1144 orig_stmt = stmt;
1146 else
1148 /* Reduction pattern */
1149 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
1150 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1151 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
1153 code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
1154 scalar_dest = TREE_OPERAND (orig_stmt, 0);
1155 scalar_type = TREE_TYPE (scalar_dest);
1156 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
1157 bitsize = TYPE_SIZE (scalar_type);
1158 bytesize = TYPE_SIZE_UNIT (scalar_type);
1160 /* 2.3 Create the reduction code, using one of the three schemes described
1161 above. */
1163 if (reduc_code < NUM_TREE_CODES)
1165 /*** Case 1: Create:
1166 v_out2 = reduc_expr <v_out1> */
1168 if (vect_print_dump_info (REPORT_DETAILS))
1169 fprintf (vect_dump, "Reduce using direct vector reduction.");
1171 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1172 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1173 build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
1174 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1175 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1176 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1178 extract_scalar_result = true;
1180 else
1182 enum tree_code shift_code = VEC_RSHIFT_EXPR;
1183 bool have_whole_vector_shift = true;
1184 int bit_offset;
1185 int element_bitsize = tree_low_cst (bitsize, 1);
1186 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1187 tree vec_temp;
1189 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1190 shift_code = VEC_RSHIFT_EXPR;
1191 else
1192 have_whole_vector_shift = false;
1194 /* Regardless of whether we have a whole vector shift, if we're
1195 emulating the operation via tree-vect-generic, we don't want
1196 to use it. Only the first round of the reduction is likely
1197 to still be profitable via emulation. */
1198 /* ??? It might be better to emit a reduction tree code here, so that
1199 tree-vect-generic can expand the first round via bit tricks. */
1200 if (!VECTOR_MODE_P (mode))
1201 have_whole_vector_shift = false;
1202 else
1204 optab optab = optab_for_tree_code (code, vectype);
1205 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
1206 have_whole_vector_shift = false;
1209 if (have_whole_vector_shift)
1211 /*** Case 2: Create:
1212 for (offset = VS/2; offset >= element_size; offset/=2)
1214 Create: va' = vec_shift <va, offset>
1215 Create: va = vop <va, va'>
1216 } */
1218 if (vect_print_dump_info (REPORT_DETAILS))
1219 fprintf (vect_dump, "Reduce using vector shifts");
1221 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1222 new_temp = PHI_RESULT (new_phi);
1224 for (bit_offset = vec_size_in_bits/2;
1225 bit_offset >= element_bitsize;
1226 bit_offset /= 2)
1228 tree bitpos = size_int (bit_offset);
1230 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1231 build2 (shift_code, vectype, new_temp, bitpos));
1232 new_name = make_ssa_name (vec_dest, epilog_stmt);
1233 TREE_OPERAND (epilog_stmt, 0) = new_name;
1234 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1236 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1237 build2 (code, vectype, new_name, new_temp));
1238 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1239 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1240 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1243 extract_scalar_result = true;
1245 else
1247 tree rhs;
1249 /*** Case 3: Create:
1250 s = extract_field <v_out2, 0>
1251 for (offset = element_size;
1252 offset < vector_size;
1253 offset += element_size;)
1255 Create: s' = extract_field <v_out2, offset>
1256 Create: s = op <s, s'>
1257 } */
1259 if (vect_print_dump_info (REPORT_DETAILS))
1260 fprintf (vect_dump, "Reduce using scalar code. ");
1262 vec_temp = PHI_RESULT (new_phi);
1263 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1264 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1265 bitsize_zero_node);
1266 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1267 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1268 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1269 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1270 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1272 for (bit_offset = element_bitsize;
1273 bit_offset < vec_size_in_bits;
1274 bit_offset += element_bitsize)
1276 tree bitpos = bitsize_int (bit_offset);
1277 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1278 bitpos);
1280 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1281 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1282 rhs);
1283 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1284 TREE_OPERAND (epilog_stmt, 0) = new_name;
1285 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1287 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1288 build2 (code, scalar_type, new_name, new_temp));
1289 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1290 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1291 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1294 extract_scalar_result = false;
1298 /* 2.4 Extract the final scalar result. Create:
1299 s_out3 = extract_field <v_out2, bitpos> */
1301 if (extract_scalar_result)
1303 tree rhs;
1305 if (vect_print_dump_info (REPORT_DETAILS))
1306 fprintf (vect_dump, "extract scalar result");
1308 if (BYTES_BIG_ENDIAN)
1309 bitpos = size_binop (MULT_EXPR,
1310 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1311 TYPE_SIZE (scalar_type));
1312 else
1313 bitpos = bitsize_zero_node;
1315 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1316 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1317 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1318 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1319 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1320 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1323 /* 2.5 Adjust the final result by the initial value of the reduction
1324 variable. (When such adjustment is not needed, then
1325 'scalar_initial_def' is zero).
1327 Create:
1328 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1330 if (scalar_initial_def)
1332 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1333 build2 (code, scalar_type, new_temp, scalar_initial_def));
1334 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1335 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1336 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1339 /* 2.6 Replace uses of s_out0 with uses of s_out4 (or s_out3) */
1341 /* Find the loop-closed-use at the loop exit of the original scalar result.
1342 (The reduction result is expected to have two immediate uses - one at the
1343 latch block, and one at the loop exit). */
1344 exit_phi = NULL;
1345 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1347 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1349 exit_phi = USE_STMT (use_p);
1350 break;
1353 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1354 gcc_assert (exit_phi);
1355 /* Replace the uses: */
1356 orig_name = PHI_RESULT (exit_phi);
1357 FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name)
1358 SET_USE (use_p, new_temp);
1362 /* Function vectorizable_reduction
1364 Check if STMT performs a reduction that can be vectorized.
1365 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1366 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1367 Return FALSE if not a vectorizable STMT, TRUE otherwise.
1369 This function also handles reduction idioms (patterns) that have been
1370 recognized in advance during vect_pattern_recog. In this case, STMT may be
1371 of this form:
1372 X = pattern_expr (arg0, arg1, ..., X)
1373 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1374 sequence that had been detected and replaced by the pattern-stmt (STMT).
1376 In some cases of reduction patterns, the type of the reduction variable X is
1377 different than the type of the other arguments of STMT.
1378 In such cases, the vectype that is used when transforming STMT into a vector
1379 stmt is different than the vectype that is used to determine the
1380 vectorization factor, because it consists of a different number of elements
1381 than the actual number of elements that are being operated upon in parallel.
1383 For example, consider an accumulation of shorts into an int accumulator.
1384 On some targets it's possible to vectorize this pattern operating on 8
1385 shorts at a time (hence, the vectype for purposes of determining the
1386 vectorization factor should be V8HI); on the other hand, the vectype that
1387 is used to create the vector form is actually V4SI (the type of the result).
1389 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1390 indicates what is the actual level of parallelism (V8HI in the example), so
1391 that the right vectorization factor would be derived. This vectype
1392 corresponds to the type of arguments to the reduction stmt, and should *NOT*
1393 be used to create the vectorized stmt. The right vectype for the vectorized
1394 stmt is obtained from the type of the result X:
1395 get_vectype_for_scalar_type (TREE_TYPE (X))
1397 This means that, contrary to "regular" reductions (or "regular" stmts in
1398 general), the following equation:
1399 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1400 does *NOT* necessarily hold for reduction patterns.
1403 bool
1404 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1406 tree vec_dest;
1407 tree scalar_dest;
1408 tree op;
1409 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
1410 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1411 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1412 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1413 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1414 tree operation;
1415 enum tree_code code, orig_code, epilog_reduc_code = 0;
1416 enum machine_mode vec_mode;
1417 int op_type;
1418 optab optab, reduc_optab;
1419 tree new_temp = NULL_TREE;
1420 tree def, def_stmt;
1421 enum vect_def_type dt;
1422 tree new_phi;
1423 tree scalar_type;
1424 bool is_simple_use;
1425 tree orig_stmt;
1426 stmt_vec_info orig_stmt_info;
1427 tree expr = NULL_TREE;
1428 int i;
1429 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1430 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1431 stmt_vec_info prev_stmt_info;
1432 tree reduc_def;
1433 tree new_stmt = NULL_TREE;
1434 int j;
1436 gcc_assert (ncopies >= 1);
1438 /* 1. Is vectorizable reduction? */
1440 /* Not supportable if the reduction variable is used in the loop. */
1441 if (STMT_VINFO_RELEVANT_P (stmt_info))
1442 return false;
1444 if (!STMT_VINFO_LIVE_P (stmt_info))
1445 return false;
1447 /* Make sure it was already recognized as a reduction computation. */
1448 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1449 return false;
1451 /* 2. Has this been recognized as a reduction pattern?
1453 Check if STMT represents a pattern that has been recognized
1454 in earlier analysis stages. For stmts that represent a pattern,
1455 the STMT_VINFO_RELATED_STMT field records the last stmt in
1456 the original sequence that constitutes the pattern. */
1458 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1459 if (orig_stmt)
1461 orig_stmt_info = vinfo_for_stmt (orig_stmt);
1462 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
1463 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
1464 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
1467 /* 3. Check the operands of the operation. The first operands are defined
1468 inside the loop body. The last operand is the reduction variable,
1469 which is defined by the loop-header-phi. */
1471 gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1473 operation = TREE_OPERAND (stmt, 1);
1474 code = TREE_CODE (operation);
1475 op_type = TREE_CODE_LENGTH (code);
1476 if (op_type != binary_op && op_type != ternary_op)
1477 return false;
1478 scalar_dest = TREE_OPERAND (stmt, 0);
1479 scalar_type = TREE_TYPE (scalar_dest);
1481 /* All uses but the last are expected to be defined in the loop.
1482 The last use is the reduction variable. */
1483 for (i = 0; i < op_type-1; i++)
1485 op = TREE_OPERAND (operation, i);
1486 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1487 gcc_assert (is_simple_use);
1488 gcc_assert (dt == vect_loop_def || dt == vect_invariant_def ||
1489 dt == vect_constant_def);
1492 op = TREE_OPERAND (operation, i);
1493 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1494 gcc_assert (is_simple_use);
1495 gcc_assert (dt == vect_reduction_def);
1496 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1497 if (orig_stmt)
1498 gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
1499 else
1500 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
1502 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
1503 return false;
1505 /* 4. Supportable by target? */
1507 /* 4.1. Check support for the operation in the loop */
1508 optab = optab_for_tree_code (code, vectype);
1509 if (!optab)
1511 if (vect_print_dump_info (REPORT_DETAILS))
1512 fprintf (vect_dump, "no optab.");
1513 return false;
1515 vec_mode = TYPE_MODE (vectype);
1516 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1518 if (vect_print_dump_info (REPORT_DETAILS))
1519 fprintf (vect_dump, "op not supported by target.");
1520 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1521 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1522 < vect_min_worthwhile_factor (code))
1523 return false;
1524 if (vect_print_dump_info (REPORT_DETAILS))
1525 fprintf (vect_dump, "proceeding using word mode.");
1528 /* Worthwhile without SIMD support? */
1529 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1530 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1531 < vect_min_worthwhile_factor (code))
1533 if (vect_print_dump_info (REPORT_DETAILS))
1534 fprintf (vect_dump, "not worthwhile without SIMD support.");
1535 return false;
1538 /* 4.2. Check support for the epilog operation.
1540 If STMT represents a reduction pattern, then the type of the
1541 reduction variable may be different than the type of the rest
1542 of the arguments. For example, consider the case of accumulation
1543 of shorts into an int accumulator; The original code:
1544 S1: int_a = (int) short_a;
1545 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
1547 was replaced with:
1548 STMT: int_acc = widen_sum <short_a, int_acc>
1550 This means that:
1551 1. The tree-code that is used to create the vector operation in the
1552 epilog code (that reduces the partial results) is not the
1553 tree-code of STMT, but is rather the tree-code of the original
1554 stmt from the pattern that STMT is replacing. I.e, in the example
1555 above we want to use 'widen_sum' in the loop, but 'plus' in the
1556 epilog.
1557 2. The type (mode) we use to check available target support
1558 for the vector operation to be created in the *epilog*, is
1559 determined by the type of the reduction variable (in the example
1560 above we'd check this: plus_optab[vect_int_mode]).
1561 However the type (mode) we use to check available target support
1562 for the vector operation to be created *inside the loop*, is
1563 determined by the type of the other arguments to STMT (in the
1564 example we'd check this: widen_sum_optab[vect_short_mode]).
1566 This is contrary to "regular" reductions, in which the types of all
1567 the arguments are the same as the type of the reduction variable.
1568 For "regular" reductions we can therefore use the same vector type
1569 (and also the same tree-code) when generating the epilog code and
1570 when generating the code inside the loop. */
1572 if (orig_stmt)
1574 /* This is a reduction pattern: get the vectype from the type of the
1575 reduction variable, and get the tree-code from orig_stmt. */
1576 orig_code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
1577 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
1578 vec_mode = TYPE_MODE (vectype);
1580 else
1582 /* Regular reduction: use the same vectype and tree-code as used for
1583 the vector code inside the loop can be used for the epilog code. */
1584 orig_code = code;
1587 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
1588 return false;
1589 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
1590 if (!reduc_optab)
1592 if (vect_print_dump_info (REPORT_DETAILS))
1593 fprintf (vect_dump, "no optab for reduction.");
1594 epilog_reduc_code = NUM_TREE_CODES;
1596 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1598 if (vect_print_dump_info (REPORT_DETAILS))
1599 fprintf (vect_dump, "reduc op not supported by target.");
1600 epilog_reduc_code = NUM_TREE_CODES;
1603 if (!vec_stmt) /* transformation not required. */
1605 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1606 return true;
1609 /** Transform. **/
1611 if (vect_print_dump_info (REPORT_DETAILS))
1612 fprintf (vect_dump, "transform reduction.");
1614 /* Create the destination vector */
1615 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1617 /* Create the reduction-phi that defines the reduction-operand. */
1618 new_phi = create_phi_node (vec_dest, loop->header);
1620 /* In case the vectorization factor (VF) is bigger than the number
1621 of elements that we can fit in a vectype (nunits), we have to generate
1622 more than one vector stmt - i.e - we need to "unroll" the
1623 vector stmt by a factor VF/nunits. For more details see documentation
1624 in vectorizable_operation. */
1626 prev_stmt_info = NULL;
1627 for (j = 0; j < ncopies; j++)
1629 /* Handle uses. */
1630 if (j == 0)
1632 op = TREE_OPERAND (operation, 0);
1633 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
1634 if (op_type == ternary_op)
1636 op = TREE_OPERAND (operation, 1);
1637 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1640 /* Get the vector def for the reduction variable from the phi node */
1641 reduc_def = PHI_RESULT (new_phi);
1643 else
1645 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
1646 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
1647 if (op_type == ternary_op)
1648 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
1650 /* Get the vector def for the reduction variable from the vectorized
1651 reduction operation generated in the previous iteration (j-1) */
1652 reduc_def = TREE_OPERAND (new_stmt ,0);
1655 /* Arguments are ready. create the new vector stmt. */
1657 if (op_type == binary_op)
1658 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
1659 else
1660 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1, reduc_def);
1661 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, expr);
1662 new_temp = make_ssa_name (vec_dest, new_stmt);
1663 TREE_OPERAND (new_stmt, 0) = new_temp;
1664 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1666 if (j == 0)
1667 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1668 else
1669 STMT_VINFO_RELATED_STMT (prev_stmt_info) = *vec_stmt = new_stmt;
1670 prev_stmt_info = vinfo_for_stmt (new_stmt);
1673 /* Finalize the reduction-phi (set it's arguments) and create the
1674 epilog reduction code. */
1675 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
1677 return true;
1681 /* Function vectorizable_assignment.
1683 Check if STMT performs an assignment (copy) that can be vectorized.
1684 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1685 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1686 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1688 bool
1689 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1691 tree vec_dest;
1692 tree scalar_dest;
1693 tree op;
1694 tree vec_oprnd;
1695 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1696 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1697 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1698 tree new_temp;
1699 tree def, def_stmt;
1700 enum vect_def_type dt;
1701 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1702 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1704 gcc_assert (ncopies >= 1);
1705 if (ncopies > 1)
1706 return false; /* FORNOW */
1708 /* Is vectorizable assignment? */
1709 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1710 return false;
1712 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1714 if (TREE_CODE (stmt) != MODIFY_EXPR)
1715 return false;
1717 scalar_dest = TREE_OPERAND (stmt, 0);
1718 if (TREE_CODE (scalar_dest) != SSA_NAME)
1719 return false;
1721 op = TREE_OPERAND (stmt, 1);
1722 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1724 if (vect_print_dump_info (REPORT_DETAILS))
1725 fprintf (vect_dump, "use not simple.");
1726 return false;
1729 if (!vec_stmt) /* transformation not required. */
1731 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1732 return true;
1735 /** Transform. **/
1736 if (vect_print_dump_info (REPORT_DETAILS))
1737 fprintf (vect_dump, "transform assignment.");
1739 /* Handle def. */
1740 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1742 /* Handle use. */
1743 op = TREE_OPERAND (stmt, 1);
1744 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1746 /* Arguments are ready. create the new vector stmt. */
1747 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1748 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1749 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1750 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1752 return true;
1756 /* Function vect_min_worthwhile_factor.
1758 For a loop where we could vectorize the operation indicated by CODE,
1759 return the minimum vectorization factor that makes it worthwhile
1760 to use generic vectors. */
1761 static int
1762 vect_min_worthwhile_factor (enum tree_code code)
1764 switch (code)
1766 case PLUS_EXPR:
1767 case MINUS_EXPR:
1768 case NEGATE_EXPR:
1769 return 4;
1771 case BIT_AND_EXPR:
1772 case BIT_IOR_EXPR:
1773 case BIT_XOR_EXPR:
1774 case BIT_NOT_EXPR:
1775 return 2;
1777 default:
1778 return INT_MAX;
1783 /* Function vectorizable_operation.
1785 Check if STMT performs a binary or unary operation that can be vectorized.
1786 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1787 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1788 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1790 bool
1791 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1793 tree vec_dest;
1794 tree scalar_dest;
1795 tree operation;
1796 tree op0, op1 = NULL;
1797 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
1798 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1799 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1800 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1801 enum tree_code code;
1802 enum machine_mode vec_mode;
1803 tree new_temp;
1804 int op_type;
1805 optab optab;
1806 int icode;
1807 enum machine_mode optab_op2_mode;
1808 tree def, def_stmt;
1809 enum vect_def_type dt0, dt1;
1810 tree new_stmt;
1811 stmt_vec_info prev_stmt_info;
1812 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
1813 int nunits_out;
1814 tree vectype_out;
1815 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1816 int j;
1818 gcc_assert (ncopies >= 1);
1820 /* Is STMT a vectorizable binary/unary operation? */
1821 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1822 return false;
1824 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1826 if (STMT_VINFO_LIVE_P (stmt_info))
1828 /* FORNOW: not yet supported. */
1829 if (vect_print_dump_info (REPORT_DETAILS))
1830 fprintf (vect_dump, "value used after loop.");
1831 return false;
1834 if (TREE_CODE (stmt) != MODIFY_EXPR)
1835 return false;
1837 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1838 return false;
1840 scalar_dest = TREE_OPERAND (stmt, 0);
1841 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
1842 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1843 if (nunits_out != nunits_in)
1844 return false;
1846 operation = TREE_OPERAND (stmt, 1);
1847 code = TREE_CODE (operation);
1848 optab = optab_for_tree_code (code, vectype);
1850 /* Support only unary or binary operations. */
1851 op_type = TREE_CODE_LENGTH (code);
1852 if (op_type != unary_op && op_type != binary_op)
1854 if (vect_print_dump_info (REPORT_DETAILS))
1855 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1856 return false;
1859 op0 = TREE_OPERAND (operation, 0);
1860 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
1862 if (vect_print_dump_info (REPORT_DETAILS))
1863 fprintf (vect_dump, "use not simple.");
1864 return false;
1867 if (op_type == binary_op)
1869 op1 = TREE_OPERAND (operation, 1);
1870 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
1872 if (vect_print_dump_info (REPORT_DETAILS))
1873 fprintf (vect_dump, "use not simple.");
1874 return false;
1878 /* Supportable by target? */
1879 if (!optab)
1881 if (vect_print_dump_info (REPORT_DETAILS))
1882 fprintf (vect_dump, "no optab.");
1883 return false;
1885 vec_mode = TYPE_MODE (vectype);
1886 icode = (int) optab->handlers[(int) vec_mode].insn_code;
1887 if (icode == CODE_FOR_nothing)
1889 if (vect_print_dump_info (REPORT_DETAILS))
1890 fprintf (vect_dump, "op not supported by target.");
1891 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1892 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1893 < vect_min_worthwhile_factor (code))
1894 return false;
1895 if (vect_print_dump_info (REPORT_DETAILS))
1896 fprintf (vect_dump, "proceeding using word mode.");
1899 /* Worthwhile without SIMD support? */
1900 if (!VECTOR_MODE_P (vec_mode)
1901 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1902 < vect_min_worthwhile_factor (code))
1904 if (vect_print_dump_info (REPORT_DETAILS))
1905 fprintf (vect_dump, "not worthwhile without SIMD support.");
1906 return false;
1909 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1911 /* FORNOW: not yet supported. */
1912 if (!VECTOR_MODE_P (vec_mode))
1913 return false;
1915 /* Invariant argument is needed for a vector shift
1916 by a scalar shift operand. */
1917 optab_op2_mode = insn_data[icode].operand[2].mode;
1918 if (! (VECTOR_MODE_P (optab_op2_mode)
1919 || dt1 == vect_constant_def
1920 || dt1 == vect_invariant_def))
1922 if (vect_print_dump_info (REPORT_DETAILS))
1923 fprintf (vect_dump, "operand mode requires invariant argument.");
1924 return false;
1928 if (!vec_stmt) /* transformation not required. */
1930 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1931 return true;
1934 /** Transform. **/
1936 if (vect_print_dump_info (REPORT_DETAILS))
1937 fprintf (vect_dump, "transform binary/unary operation. ncopies = %d.",
1938 ncopies);
1940 /* Handle def. */
1941 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1943 /* In case the vectorization factor (VF) is bigger than the number
1944 of elements that we can fit in a vectype (nunits), we have to generate
1945 more than one vector stmt - i.e - we need to "unroll" the
1946 vector stmt by a factor VF/nunits. In doing so, we record a pointer
1947 from one copy of the vector stmt to the next, in the field
1948 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
1949 stages to find the correct vector defs to be used when vectorizing
1950 stmts that use the defs of the current stmt. The example below illustrates
1951 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
1952 4 vectorized stmts):
1954 before vectorization:
1955 RELATED_STMT VEC_STMT
1956 S1: x = memref - -
1957 S2: z = x + 1 - -
1959 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
1960 there):
1961 RELATED_STMT VEC_STMT
1962 VS1_0: vx0 = memref0 VS1_1 -
1963 VS1_1: vx1 = memref1 VS1_2 -
1964 VS1_2: vx2 = memref2 VS1_3 -
1965 VS1_3: vx3 = memref3 - -
1966 S1: x = load - VS1_0
1967 S2: z = x + 1 - -
1969 step2: vectorize stmt S2 (done here):
1970 To vectorize stmt S2 we first need to find the relevant vector
1971 def for the first operand 'x'. This is, as usual, obtained from
1972 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
1973 that defines 'x' (S1). This way we find the stmt VS1_0, and the
1974 relevant vector def 'vx0'. Having found 'vx0' we can generate
1975 the vector stmt VS2_0, and as usual, record it in the
1976 STMT_VINFO_VEC_STMT of stmt S2.
1977 When creating the second copy (VS2_1), we obtain the relevant vector
1978 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
1979 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
1980 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
1981 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
1982 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
1983 chain of stmts and pointers:
1984 RELATED_STMT VEC_STMT
1985 VS1_0: vx0 = memref0 VS1_1 -
1986 VS1_1: vx1 = memref1 VS1_2 -
1987 VS1_2: vx2 = memref2 VS1_3 -
1988 VS1_3: vx3 = memref3 - -
1989 S1: x = load - VS1_0
1990 VS2_0: vz0 = vx0 + v1 VS2_1 -
1991 VS2_1: vz1 = vx1 + v1 VS2_2 -
1992 VS2_2: vz2 = vx2 + v1 VS2_3 -
1993 VS2_3: vz3 = vx3 + v1 - -
1994 S2: z = x + 1 - VS2_0 */
1996 prev_stmt_info = NULL;
1997 for (j = 0; j < ncopies; j++)
1999 /* Handle uses. */
2000 if (j == 0)
2002 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2003 if (op_type == binary_op)
2005 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
2007 /* Vector shl and shr insn patterns can be defined with
2008 scalar operand 2 (shift operand). In this case, use
2009 constant or loop invariant op1 directly, without
2010 extending it to vector mode first. */
2011 optab_op2_mode = insn_data[icode].operand[2].mode;
2012 if (!VECTOR_MODE_P (optab_op2_mode))
2014 if (vect_print_dump_info (REPORT_DETAILS))
2015 fprintf (vect_dump, "operand 1 using scalar mode.");
2016 vec_oprnd1 = op1;
2019 if (!vec_oprnd1)
2020 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2023 else
2025 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2026 if (op_type == binary_op)
2027 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
2030 /* Arguments are ready. create the new vector stmt. */
2032 if (op_type == binary_op)
2033 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2034 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2035 else
2036 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2037 build1 (code, vectype, vec_oprnd0));
2038 new_temp = make_ssa_name (vec_dest, new_stmt);
2039 TREE_OPERAND (new_stmt, 0) = new_temp;
2040 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2042 if (j == 0)
2043 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2044 else
2046 gcc_assert (prev_stmt_info);
2047 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2049 prev_stmt_info = vinfo_for_stmt (new_stmt);
2052 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2053 return true;
2057 /* Function vectorizable_type_demotion
2059 Check if STMT performs a binary or unary operation that involves
2060 type demotion, and if it can be vectorized.
2061 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2062 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2063 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2065 bool
2066 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
2067 tree *vec_stmt)
2069 tree vec_dest;
2070 tree scalar_dest;
2071 tree operation;
2072 tree op0;
2073 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2074 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2075 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2076 enum tree_code code;
2077 tree new_temp;
2078 tree def, def_stmt;
2079 enum vect_def_type dt0;
2080 tree new_stmt;
2081 stmt_vec_info prev_stmt_info;
2082 int nunits_in;
2083 int nunits_out;
2084 tree vectype_out;
2085 int ncopies;
2086 int j;
2087 tree expr;
2088 tree vectype_in;
2089 tree scalar_type;
2090 optab optab;
2091 enum machine_mode vec_mode;
2093 /* Is STMT a vectorizable type-demotion operation? */
2095 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2096 return false;
2098 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2100 if (STMT_VINFO_LIVE_P (stmt_info))
2102 /* FORNOW: not yet supported. */
2103 if (vect_print_dump_info (REPORT_DETAILS))
2104 fprintf (vect_dump, "value used after loop.");
2105 return false;
2108 if (TREE_CODE (stmt) != MODIFY_EXPR)
2109 return false;
2111 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2112 return false;
2114 operation = TREE_OPERAND (stmt, 1);
2115 code = TREE_CODE (operation);
2116 if (code != NOP_EXPR && code != CONVERT_EXPR)
2117 return false;
2119 op0 = TREE_OPERAND (operation, 0);
2120 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2121 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2123 scalar_dest = TREE_OPERAND (stmt, 0);
2124 scalar_type = TREE_TYPE (scalar_dest);
2125 vectype_out = get_vectype_for_scalar_type (scalar_type);
2126 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2127 if (nunits_in != nunits_out/2) /* FORNOW */
2128 return false;
2130 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2131 gcc_assert (ncopies >= 1);
2133 /* Check the operands of the operation. */
2134 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2136 if (vect_print_dump_info (REPORT_DETAILS))
2137 fprintf (vect_dump, "use not simple.");
2138 return false;
2141 /* Supportable by target? */
2142 code = VEC_PACK_MOD_EXPR;
2143 optab = optab_for_tree_code (VEC_PACK_MOD_EXPR, vectype_in);
2144 if (!optab)
2145 return false;
2147 vec_mode = TYPE_MODE (vectype_in);
2148 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2149 return false;
2151 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2153 if (!vec_stmt) /* transformation not required. */
2155 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2156 return true;
2159 /** Transform. **/
2161 if (vect_print_dump_info (REPORT_DETAILS))
2162 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2163 ncopies);
2165 /* Handle def. */
2166 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2168 /* In case the vectorization factor (VF) is bigger than the number
2169 of elements that we can fit in a vectype (nunits), we have to generate
2170 more than one vector stmt - i.e - we need to "unroll" the
2171 vector stmt by a factor VF/nunits. */
2172 prev_stmt_info = NULL;
2173 for (j = 0; j < ncopies; j++)
2175 /* Handle uses. */
2176 if (j == 0)
2178 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2179 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2180 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd0);
2182 else
2184 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
2185 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2188 /* Arguments are ready. Create the new vector stmt. */
2189 expr = build2 (code, vectype_out, vec_oprnd0, vec_oprnd1);
2190 new_stmt = build2 (MODIFY_EXPR, vectype_out, vec_dest, expr);
2191 new_temp = make_ssa_name (vec_dest, new_stmt);
2192 TREE_OPERAND (new_stmt, 0) = new_temp;
2193 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2195 if (j == 0)
2196 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2197 else
2199 gcc_assert (prev_stmt_info);
2200 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2203 prev_stmt_info = vinfo_for_stmt (new_stmt);
2206 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2207 return true;
2211 /* Function vect_gen_widened_results_half
2213 Create a vector stmt whose code, type, number of arguments, and result
2214 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
2215 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2216 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2217 needs to be created (DECL is a function-decl of a target-builtin).
2218 STMT is the original scalar stmt that we are vectorizing. */
2220 static tree
2221 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
2222 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2223 tree vec_dest, block_stmt_iterator *bsi,
2224 tree stmt)
2226 tree vec_params;
2227 tree expr;
2228 tree new_stmt;
2229 tree new_temp;
2230 tree sym;
2231 ssa_op_iter iter;
2233 /* Generate half of the widened result: */
2234 if (code == CALL_EXPR)
2236 /* Target specific support */
2237 vec_params = build_tree_list (NULL_TREE, vec_oprnd0);
2238 if (op_type == binary_op)
2239 vec_params = tree_cons (NULL_TREE, vec_oprnd1, vec_params);
2240 expr = build_function_call_expr (decl, vec_params);
2242 else
2244 /* Generic support */
2245 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2246 if (op_type == binary_op)
2247 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
2248 else
2249 expr = build1 (code, vectype, vec_oprnd0);
2251 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, expr);
2252 new_temp = make_ssa_name (vec_dest, new_stmt);
2253 TREE_OPERAND (new_stmt, 0) = new_temp;
2254 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2256 if (code == CALL_EXPR)
2258 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2260 if (TREE_CODE (sym) == SSA_NAME)
2261 sym = SSA_NAME_VAR (sym);
2262 mark_sym_for_renaming (sym);
2266 return new_stmt;
2270 /* Function vectorizable_type_promotion
2272 Check if STMT performs a binary or unary operation that involves
2273 type promotion, and if it can be vectorized.
2274 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2275 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2276 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2278 bool
2279 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
2280 tree *vec_stmt)
2282 tree vec_dest;
2283 tree scalar_dest;
2284 tree operation;
2285 tree op0, op1 = NULL;
2286 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2287 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2288 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2289 enum tree_code code, code1 = CODE_FOR_nothing, code2 = CODE_FOR_nothing;
2290 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2291 int op_type;
2292 tree def, def_stmt;
2293 enum vect_def_type dt0, dt1;
2294 tree new_stmt;
2295 stmt_vec_info prev_stmt_info;
2296 int nunits_in;
2297 int nunits_out;
2298 tree vectype_out;
2299 int ncopies;
2300 int j;
2301 tree vectype_in;
2303 /* Is STMT a vectorizable type-promotion operation? */
2305 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2306 return false;
2308 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2310 if (STMT_VINFO_LIVE_P (stmt_info))
2312 /* FORNOW: not yet supported. */
2313 if (vect_print_dump_info (REPORT_DETAILS))
2314 fprintf (vect_dump, "value used after loop.");
2315 return false;
2318 if (TREE_CODE (stmt) != MODIFY_EXPR)
2319 return false;
2321 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2322 return false;
2324 operation = TREE_OPERAND (stmt, 1);
2325 code = TREE_CODE (operation);
2326 if (code != NOP_EXPR && code != WIDEN_MULT_EXPR)
2327 return false;
2329 op0 = TREE_OPERAND (operation, 0);
2330 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2331 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2332 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2333 gcc_assert (ncopies >= 1);
2335 scalar_dest = TREE_OPERAND (stmt, 0);
2336 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2337 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2338 if (nunits_out != nunits_in/2) /* FORNOW */
2339 return false;
2341 /* Check the operands of the operation. */
2342 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2344 if (vect_print_dump_info (REPORT_DETAILS))
2345 fprintf (vect_dump, "use not simple.");
2346 return false;
2349 op_type = TREE_CODE_LENGTH (code);
2350 if (op_type == binary_op)
2352 op1 = TREE_OPERAND (operation, 1);
2353 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
2355 if (vect_print_dump_info (REPORT_DETAILS))
2356 fprintf (vect_dump, "use not simple.");
2357 return false;
2361 /* Supportable by target? */
2362 if (!supportable_widening_operation (code, stmt, vectype_in,
2363 &decl1, &decl2, &code1, &code2))
2364 return false;
2366 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2368 if (!vec_stmt) /* transformation not required. */
2370 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2371 return true;
2374 /** Transform. **/
2376 if (vect_print_dump_info (REPORT_DETAILS))
2377 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2378 ncopies);
2380 /* Handle def. */
2381 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2383 /* In case the vectorization factor (VF) is bigger than the number
2384 of elements that we can fit in a vectype (nunits), we have to generate
2385 more than one vector stmt - i.e - we need to "unroll" the
2386 vector stmt by a factor VF/nunits. */
2387 prev_stmt_info = NULL;
2388 for (j = 0; j < ncopies; j++)
2390 /* Handle uses. */
2391 if (j == 0)
2393 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2394 if (op_type == binary_op)
2395 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2397 else
2399 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2400 if (op_type == binary_op)
2401 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
2404 /* Arguments are ready. Create the new vector stmt. We are creating
2405 two vector defs because the widened results does not fit in one vector.
2406 The vectorized stmt can be expressed as a call to a taregt builtin,
2407 or a using a tree-code.
2410 /* Generate first half of the widened result: */
2411 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
2412 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
2413 if (j == 0)
2414 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2415 else
2416 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2417 prev_stmt_info = vinfo_for_stmt (new_stmt);
2419 /* Generate second half of the widened result: */
2420 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
2421 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
2422 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2423 prev_stmt_info = vinfo_for_stmt (new_stmt);
2426 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2427 return true;
2431 /* Function vect_strided_store_supported.
2433 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2434 and FALSE otherwise.
2438 static bool
2439 vect_strided_store_supported (tree vectype)
2441 optab interleave_high_optab, interleave_low_optab;
2442 int mode;
2444 mode = (int) TYPE_MODE (vectype);
2446 /* Check that the operation is supported. */
2447 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
2448 vectype);
2449 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
2450 vectype);
2451 if (!interleave_high_optab || !interleave_low_optab)
2453 if (vect_print_dump_info (REPORT_DETAILS))
2454 fprintf (vect_dump, "no optab for interleave.");
2455 return false;
2457 if (interleave_high_optab->handlers[(int) mode].insn_code
2458 == CODE_FOR_nothing
2459 || interleave_low_optab->handlers[(int) mode].insn_code
2460 == CODE_FOR_nothing)
2462 if (vect_print_dump_info (REPORT_DETAILS))
2463 fprintf (vect_dump, "interleave op not supported by target.");
2464 return false;
2466 return true;
2470 /* Function vect_permute_store_chain.
2472 Given a chain of interleaved strores in DR_CHAIN of LENGTH that must be
2473 a power of 2, generate interleave_high/low stmts to reorder the data
2474 correctly for the stores. Return the final references for stores in
2475 RESULT_CHAIN.
2477 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2478 The input is 4 vectors each containg 8 elements. We assign a number to each
2479 element, the input sequence is:
2481 1st vec: 0 1 2 3 4 5 6 7
2482 2nd vec: 8 9 10 11 12 13 14 15
2483 3rd vec: 16 17 18 19 20 21 22 23
2484 4th vec: 24 25 26 27 28 29 30 31
2486 The output sequence should be:
2488 1st vec: 0 8 16 24 1 9 17 25
2489 2nd vec: 2 10 18 26 3 11 19 27
2490 3rd vec: 4 12 20 28 5 13 21 30
2491 4th vec: 6 14 22 30 7 15 23 31
2493 i.e., we interleave the contents of the four vectors in their order.
2495 We use interleave_high/low instructions to create such output. The input of
2496 each interleave_high/low operation is two vectors:
2497 1st vec 2nd vec
2498 0 1 2 3 4 5 6 7
2499 the even elements of the result vector are obtained left-to-right from the
2500 high/low elements of the first vector. The odd elements of the result are
2501 obtained left-to-right from the high/low elements of the second vector.
2502 The output of interleave_high will be: 0 4 1 5
2503 and of interleave_low: 2 6 3 7
2506 The permutaion is done in log LENGTH stages. In each stage interleave_high
2507 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2508 where the first argument is taken from the first half of DR_CHAIN and the
2509 second argument from it's second half.
2510 In our example,
2512 I1: interleave_high (1st vec, 3rd vec)
2513 I2: interleave_low (1st vec, 3rd vec)
2514 I3: interleave_high (2nd vec, 4th vec)
2515 I4: interleave_low (2nd vec, 4th vec)
2517 The output for the first stage is:
2519 I1: 0 16 1 17 2 18 3 19
2520 I2: 4 20 5 21 6 22 7 23
2521 I3: 8 24 9 25 10 26 11 27
2522 I4: 12 28 13 29 14 30 15 31
2524 The output of the second stage, i.e. the final result is:
2526 I1: 0 8 16 24 1 9 17 25
2527 I2: 2 10 18 26 3 11 19 27
2528 I3: 4 12 20 28 5 13 21 30
2529 I4: 6 14 22 30 7 15 23 31
2533 static bool
2534 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2535 unsigned int length,
2536 tree stmt,
2537 block_stmt_iterator *bsi,
2538 VEC(tree,heap) **result_chain)
2540 tree perm_dest, perm_stmt, vect1, vect2, high, low;
2541 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2542 tree scalar_dest;
2543 int i;
2544 unsigned int j;
2545 VEC(tree,heap) *first, *second;
2547 scalar_dest = TREE_OPERAND (stmt, 0);
2548 first = VEC_alloc (tree, heap, length/2);
2549 second = VEC_alloc (tree, heap, length/2);
2551 /* Check that the operation is supported. */
2552 if (!vect_strided_store_supported (vectype))
2553 return false;
2555 *result_chain = VEC_copy (tree, heap, dr_chain);
2557 for (i = 0; i < exact_log2(length); i++)
2559 for (j = 0; j < length/2; j++)
2561 vect1 = VEC_index(tree, dr_chain, j);
2562 vect2 = VEC_index(tree, dr_chain, j+length/2);
2564 /* high = interleave_high (vect1, vect2); */
2565 perm_dest = create_tmp_var (vectype, "vect_inter_high");
2566 add_referenced_tmp_var (perm_dest);
2567 if (BYTES_BIG_ENDIAN)
2568 perm_stmt = build2 (MODIFY_EXPR, vectype, perm_dest,
2569 build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1,
2570 vect2));
2571 else
2572 perm_stmt = build2 (MODIFY_EXPR, vectype, perm_dest,
2573 build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1,
2574 vect2));
2575 high = make_ssa_name (perm_dest, perm_stmt);
2576 TREE_OPERAND (perm_stmt, 0) = high;
2577 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
2578 mark_new_vars_to_rename (perm_stmt);
2579 VEC_replace (tree, *result_chain, 2*j, high);
2581 /* low = interleave_low (vect1, vect2); */
2582 perm_dest = create_tmp_var (vectype, "vect_inter_low");
2583 add_referenced_tmp_var (perm_dest);
2584 if (BYTES_BIG_ENDIAN)
2585 perm_stmt = build2 (MODIFY_EXPR, vectype, perm_dest,
2586 build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1,
2587 vect2));
2588 else
2589 perm_stmt = build2 (MODIFY_EXPR, vectype, perm_dest,
2590 build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1,
2591 vect2));
2592 low = make_ssa_name (perm_dest, perm_stmt);
2593 TREE_OPERAND (perm_stmt, 0) = low;
2594 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
2595 mark_new_vars_to_rename (perm_stmt);
2596 VEC_replace (tree, *result_chain, 2*j+1, low);
2598 dr_chain = VEC_copy (tree, heap, *result_chain);
2600 return true;
2604 /* Function vectorizable_store.
2606 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2607 can be vectorized.
2608 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2609 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2610 Return FALSE if not a vectorizable STMT, TRUE otherwise.
2613 bool
2614 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2616 tree scalar_dest;
2617 tree data_ref;
2618 tree op;
2619 tree vec_oprnd = NULL_TREE;
2620 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2621 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
2622 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2623 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2624 enum machine_mode vec_mode;
2625 tree dummy;
2626 enum dr_alignment_support alignment_support_cheme;
2627 ssa_op_iter iter;
2628 def_operand_p def_p;
2629 tree def, def_stmt;
2630 enum vect_def_type dt;
2631 stmt_vec_info prev_stmt_info = NULL;
2632 tree dataref_ptr = NULL_TREE;
2633 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2634 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2635 int j;
2636 tree new_stmt;
2637 tree ptr_incr, next_stmt, first_stmt;
2638 bool strided_store = false;
2639 unsigned int group_size, i;
2640 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
2642 gcc_assert (ncopies >= 1);
2644 /* Is vectorizable store? */
2646 if (TREE_CODE (stmt) != MODIFY_EXPR)
2647 return false;
2649 scalar_dest = TREE_OPERAND (stmt, 0);
2650 if (TREE_CODE (scalar_dest) != ARRAY_REF
2651 && TREE_CODE (scalar_dest) != INDIRECT_REF
2652 && !DR_GROUP_FIRST_DR (stmt_info))
2653 return false;
2655 op = TREE_OPERAND (stmt, 1);
2656 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2658 if (vect_print_dump_info (REPORT_DETAILS))
2659 fprintf (vect_dump, "use not simple.");
2660 return false;
2663 vec_mode = TYPE_MODE (vectype);
2664 /* FORNOW. In some cases can vectorize even if data-type not supported
2665 (e.g. - array initialization with 0). */
2666 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2667 return false;
2669 if (!STMT_VINFO_DATA_REF (stmt_info))
2670 return false;
2672 if (DR_GROUP_FIRST_DR (stmt_info))
2674 strided_store = true;
2675 if (!vect_strided_store_supported (vectype))
2676 return false;
2678 if (!vec_stmt) /* transformation not required. */
2680 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2681 return true;
2684 /** Transform. **/
2686 if (vect_print_dump_info (REPORT_DETAILS))
2687 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
2689 *vec_stmt = NULL_TREE;
2690 if (strided_store)
2692 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
2693 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2694 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2696 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
2698 /* We vectorize all the stmts of the interleaving group when we
2699 reach the last stmt in the group. */
2700 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
2701 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)))
2702 return true;
2704 else
2706 first_stmt = stmt;
2707 first_dr = dr;
2708 group_size = 1;
2711 dr_chain = VEC_alloc (tree, heap, group_size);
2712 oprnds = VEC_alloc (tree, heap, group_size);
2714 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
2715 gcc_assert (alignment_support_cheme);
2716 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
2718 /* In case the vectorization factor (VF) is bigger than the number
2719 of elements that we can fit in a vectype (nunits), we have to generate
2720 more than one vector stmt - i.e - we need to "unroll" the
2721 vector stmt by a factor VF/nunits. For more details see documentation in
2722 vectorizable_operation. */
2724 /* In case of interleaving (non-unit strided access):
2726 S1: &base + 2 = x2
2727 S2: &base = x0
2728 S3: &base + 1 = x1
2729 S4: &base + 3 = x3
2731 We create vectorized storess starting from base address (the access of the
2732 first stmt in the chain (S2 in the above example), when the last store stmt
2733 of the chain (S4) is reached:
2735 VS1: &base = vx2
2736 VS2: &base + vec_size*1 = vx0
2737 VS3: &base + vec_size*2 = vx1
2738 VS4: &base + vec_size*3 = vx3
2740 Then permutation statements are generated:
2742 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
2743 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
2746 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
2747 (the order of the data-refs in the output of vect_permute_store_chain
2748 corresponds to the order of scalar stmts in the interleaving chain - see
2749 the documentaion of vect_permute_store_chain()).
2751 In case of both multiple types and interleaving, above vector stores and
2752 permutation stmts are created for every copy. The result vector stmts are
2753 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
2754 STMT_VINFO_RELATED_STMT for the next copies.
2757 for (j = 0; j < ncopies; j++)
2759 if (j == 0)
2761 /* Handle use - get the vectorized defs from the defining stmts. */
2762 /* For interleaved stores we collect vectorized defs for all the
2763 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then used
2764 as an input to vect_permute_store_chain(), and OPRNDS as an input
2765 to vect_get_vec_def_for_stmt_copy() for the next copy.
2766 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
2767 OPRNDS are of size 1.
2769 next_stmt = first_stmt;
2770 for (i = 0; i < group_size; i++)
2772 /* Since gaps are not supported for interleaved stores, GROUP_SIZE
2773 is the exact number of stmts in the chain. Therefore, NEXT_STMT
2774 can't be NULL_TREE. In case that there is no interleaving,
2775 GROUP_SIZE is 1, and only one iteration of the loop will be
2776 executed.
2778 gcc_assert (next_stmt);
2779 op = TREE_OPERAND (next_stmt, 1);
2780 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, NULL);
2781 VEC_quick_push(tree, dr_chain, vec_oprnd);
2782 VEC_quick_push(tree, oprnds, vec_oprnd);
2783 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
2785 /* Handle def - create new pointer. */
2786 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, NULL_TREE,
2787 &dummy, &ptr_incr, false);
2789 else
2791 /* Handle use - get the vectorized defs from the defining stmts. */
2792 /* For interleaved stores we created vectorized defs for all the
2793 defs stored in OPRNDS in the previous iteration (previous copy).
2794 DR_CHAIN is then used as an input to vect_permute_store_chain(),
2795 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
2796 next copy.
2797 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
2798 OPRNDS are of size 1.
2800 for (i = 0; i < group_size; i++)
2802 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt,
2803 VEC_index (tree, oprnds, i));
2804 VEC_replace(tree, dr_chain, i, vec_oprnd);
2805 VEC_replace(tree, oprnds, i, vec_oprnd);
2807 /* Handle def. */
2808 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
2811 if (strided_store)
2813 result_chain = VEC_alloc (tree, heap, group_size);
2814 /* Permute. */
2815 if (!vect_permute_store_chain (dr_chain, group_size, stmt, bsi,
2816 &result_chain))
2817 return false;
2820 next_stmt = first_stmt;
2821 for (i = 0; i < group_size; i++)
2823 /* For strided stores vectorized defs are interleaved in
2824 vect_permute_store_chain(). */
2825 if (strided_store)
2826 vec_oprnd = VEC_index(tree, result_chain, i);
2828 data_ref = build_fold_indirect_ref (dataref_ptr);
2830 /* Arguments are ready. create the new vector stmt. */
2831 new_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd);
2832 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2834 /* Set the V_MAY_DEFS for the vector pointer. If this virtual def has a
2835 use outside the loop and a loop peel is performed then the def may be
2836 renamed by the peel. Mark it for renaming so the later use will also
2837 be renamed. */
2838 copy_virtual_operands (new_stmt, next_stmt);
2839 if (j == 0)
2841 /* The original store is deleted so the same SSA_NAMEs can be used.
2843 FOR_EACH_SSA_TREE_OPERAND (def, next_stmt, iter, SSA_OP_VMAYDEF)
2845 SSA_NAME_DEF_STMT (def) = new_stmt;
2846 mark_sym_for_renaming (SSA_NAME_VAR (def));
2848 gcc_assert (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)));
2849 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
2851 else
2853 /* Create new names for all the definitions created by COPY and
2854 add replacement mappings for each new name. */
2855 FOR_EACH_SSA_DEF_OPERAND (def_p, new_stmt, iter, SSA_OP_VMAYDEF)
2857 create_new_def_for (DEF_FROM_PTR (def_p), new_stmt, def_p);
2858 mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p)));
2860 gcc_assert (prev_stmt_info);
2861 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2864 prev_stmt_info = vinfo_for_stmt (new_stmt);
2865 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
2866 if (!next_stmt)
2867 break;
2868 /* Bump the vector pointer. */
2869 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
2872 return true;
2876 /* Function vect_strided_load_supported.
2878 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
2879 and FALSE otherwise.
2883 static bool
2884 vect_strided_load_supported (tree vectype)
2886 optab perm_even_optab, perm_odd_optab;
2887 int mode;
2889 mode = (int) TYPE_MODE (vectype);
2891 if (!targetm.vectorize.builtin_extract_even
2892 || !targetm.vectorize.builtin_extract_even (vectype, NULL_TREE,
2893 NULL_TREE, NULL_TREE))
2895 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR,
2896 vectype);
2897 if (!perm_even_optab)
2899 if (vect_print_dump_info (REPORT_DETAILS))
2900 fprintf (vect_dump, "no optab for perm_even.");
2901 return false;
2903 if (perm_even_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2905 if (vect_print_dump_info (REPORT_DETAILS))
2906 fprintf (vect_dump, "perm_even op not supported by target.");
2907 return false;
2910 if (!targetm.vectorize.builtin_extract_odd
2911 || !targetm.vectorize.builtin_extract_odd (vectype, NULL_TREE,
2912 NULL_TREE, NULL_TREE))
2914 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
2915 if (!perm_odd_optab)
2917 if (vect_print_dump_info (REPORT_DETAILS))
2918 fprintf (vect_dump, "no optab for perm_odd.");
2919 return false;
2921 if (perm_odd_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2923 if (vect_print_dump_info (REPORT_DETAILS))
2924 fprintf (vect_dump, "perm_odd op not supported by target.");
2925 return false;
2928 return true;
2932 /* Function vect_permute_load_chain.
2934 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
2935 a power of 2, generate extract_even/odd stmts to reorder the input data
2936 correctly. Return the final references for loads in RESULT_CHAIN.
2938 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2939 The input is 4 vectors each containg 8 elements. We assign a number to each
2940 element, the input sequence is:
2942 1st vec: 0 1 2 3 4 5 6 7
2943 2nd vec: 8 9 10 11 12 13 14 15
2944 3rd vec: 16 17 18 19 20 21 22 23
2945 4th vec: 24 25 26 27 28 29 30 31
2947 The output sequence should be:
2949 1st vec: 0 4 8 12 16 20 24 28
2950 2nd vec: 1 5 9 13 17 21 25 29
2951 3rd vec: 2 6 10 14 18 22 26 30
2952 4th vec: 3 7 11 15 19 23 27 31
2954 i.e., the first output vector should contain the first elements of each
2955 interleaving group, etc.
2957 We use extract_even/odd instructions to create such output. The input of each
2958 extract_even/odd operation is two vectors
2959 1st vec 2nd vec
2960 0 1 2 3 4 5 6 7
2962 and the output is the vector of extracted even/odd elements. The output of
2963 extract_even will be: 0 2 4 6
2964 and of extract_odd: 1 3 5 7
2967 The permutaion is done in log LENGTH stages. In each stage extract_even and
2968 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
2969 order. In our example,
2971 E1: extract_even (1st vec, 2nd vec)
2972 E2: extract_odd (1st vec, 2nd vec)
2973 E3: extract_even (3rd vec, 4th vec)
2974 E4: extract_odd (3rd vec, 4th vec)
2976 The output for the first stage will be:
2978 E1: 0 2 4 6 8 10 12 14
2979 E2: 1 3 5 7 9 11 13 15
2980 E3: 16 18 20 22 24 26 28 30
2981 E4: 17 19 21 23 25 27 29 31
2983 In order to proceed and create the correct sequence for the next stage (or
2984 for the correct output, if the second stage is the last one, as in our
2985 example), we first put the output of extract_even operation and then the
2986 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
2987 The input for the second stage is:
2989 1st vec (E1): 0 2 4 6 8 10 12 14
2990 2nd vec (E3): 16 18 20 22 24 26 28 30
2991 3rd vec (E2): 1 3 5 7 9 11 13 15
2992 4th vec (E4): 17 19 21 23 25 27 29 31
2994 The output of the second stage:
2996 E1: 0 4 8 12 16 20 24 28
2997 E2: 2 6 10 14 18 22 26 30
2998 E3: 1 5 9 13 17 21 25 29
2999 E4: 3 7 11 15 19 23 27 31
3001 And RESULT_CHAIN after reordering:
3003 1st vec (E1): 0 4 8 12 16 20 24 28
3004 2nd vec (E3): 1 5 9 13 17 21 25 29
3005 3rd vec (E2): 2 6 10 14 18 22 26 30
3006 4th vec (E4): 3 7 11 15 19 23 27 31
3009 static bool
3010 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3011 unsigned int length,
3012 tree stmt,
3013 block_stmt_iterator *bsi,
3014 VEC(tree,heap) **result_chain)
3016 tree perm_dest, perm_stmt, data_ref, first_vect, second_vect;
3017 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3018 int i;
3019 unsigned int j;
3020 ssa_op_iter iter;
3021 use_operand_p use_p;
3023 /* Check that the operation is supported. */
3024 if (!vect_strided_load_supported (vectype))
3025 return false;
3027 *result_chain = VEC_copy (tree, heap, dr_chain);
3028 for (i = 0; i < exact_log2(length); i++)
3030 for (j = 0; j < length; j+=2)
3032 first_vect = VEC_index(tree, dr_chain, j);
3033 second_vect = VEC_index(tree, dr_chain, j+1);
3035 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3036 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3037 add_referenced_tmp_var (perm_dest);
3039 if (targetm.vectorize.builtin_extract_even
3040 && targetm.vectorize.builtin_extract_even (vectype,
3041 NULL_TREE, NULL_TREE, NULL_TREE))
3042 perm_stmt = targetm.vectorize.builtin_extract_even (perm_dest,
3043 first_vect, second_vect, stmt);
3044 else
3045 perm_stmt = build2 (MODIFY_EXPR, vectype, perm_dest,
3046 build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
3047 first_vect, second_vect));
3049 data_ref = make_ssa_name (perm_dest, perm_stmt);
3050 TREE_OPERAND (perm_stmt, 0) = data_ref;
3051 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3053 /* CHECKME. */
3054 FOR_EACH_SSA_USE_OPERAND (use_p, perm_stmt, iter,
3055 SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
3057 tree op = USE_FROM_PTR (use_p);
3058 if (TREE_CODE (op) == SSA_NAME)
3059 op = SSA_NAME_VAR (op);
3060 mark_sym_for_renaming (op);
3062 VEC_replace (tree, *result_chain, j/2, data_ref);
3064 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3065 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3066 add_referenced_tmp_var (perm_dest);
3068 if (targetm.vectorize.builtin_extract_odd
3069 && targetm.vectorize.builtin_extract_odd (vectype,
3070 NULL_TREE, NULL_TREE, NULL_TREE))
3071 perm_stmt = targetm.vectorize.builtin_extract_odd (perm_dest,
3072 first_vect, second_vect, stmt);
3073 else
3074 perm_stmt = build2 (MODIFY_EXPR, vectype, perm_dest,
3075 build2 (VEC_EXTRACT_ODD_EXPR, vectype,
3076 first_vect,
3077 second_vect));
3078 data_ref = make_ssa_name (perm_dest, perm_stmt);
3079 TREE_OPERAND (perm_stmt, 0) = data_ref;
3080 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3082 /* CHECKME. */
3083 FOR_EACH_SSA_USE_OPERAND (use_p, perm_stmt, iter,
3084 SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
3086 tree op = USE_FROM_PTR (use_p);
3087 if (TREE_CODE (op) == SSA_NAME)
3088 op = SSA_NAME_VAR (op);
3089 mark_sym_for_renaming (op);
3092 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3094 dr_chain = VEC_copy (tree, heap, *result_chain);
3096 return true;
3100 /* Function vect_transform_strided_load.
3102 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3103 to perform their permutation and ascribe the result vectorized statements to
3104 the scalar statements.
3107 static bool
3108 vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
3109 block_stmt_iterator *bsi)
3111 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3112 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3113 tree next_stmt, new_stmt;
3114 VEC(tree,heap) *result_chain = NULL;
3115 unsigned int num, i;
3116 tree tmp_data_ref;
3118 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3119 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3120 vectors, that are ready for vector computation. */
3121 result_chain = VEC_alloc (tree, heap, size);
3122 /* Permute. */
3123 if (!vect_permute_load_chain (dr_chain, size, stmt, bsi, &result_chain))
3124 return false;
3126 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3127 Since we scan the chain starting from it's first node, their order
3128 corresponds the order of data-refs in RESULT_CHAIN. */
3129 next_stmt = first_stmt;
3130 num = 1;
3131 for (i = 0; VEC_iterate(tree, result_chain, i, tmp_data_ref); i++)
3133 if (!next_stmt)
3134 break;
3136 /* Skip the gaps. Loads created for the gaps will be removed by dead
3137 code elimination pass later.
3138 DR_GROUP_GAP is the number of steps in elements from the previous
3139 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3140 correspond to the gaps.
3142 if (num < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3144 num++;
3145 continue;
3148 while (next_stmt)
3150 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3151 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3152 copies, and we put the new vector statement in the first available
3153 RELATED_STMT. */
3154 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3155 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3156 else
3158 tree prev_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3159 tree rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3160 while (rel_stmt)
3162 prev_stmt = rel_stmt;
3163 rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3165 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
3167 num = 1;
3168 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3169 /* If NEXT_STMT accesses the same DR as the previous statement,
3170 put the same TMP_DATA_REF as its vectorized statement; otherwise
3171 get the next data-ref from RESULT_CHAIN. */
3172 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3173 break;
3176 return true;
3180 /* vectorizable_load.
3182 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3183 can be vectorized.
3184 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3185 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3186 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3188 bool
3189 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3191 tree scalar_dest;
3192 tree vec_dest = NULL;
3193 tree data_ref = NULL;
3194 tree op;
3195 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3196 stmt_vec_info prev_stmt_info;
3197 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3198 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3199 tree new_temp;
3200 int mode;
3201 tree new_stmt = NULL_TREE, first_stmt;
3202 tree dummy;
3203 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3204 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3205 enum dr_alignment_support alignment_support_cheme;
3206 tree dataref_ptr = NULL_TREE;
3207 tree ptr_incr;
3208 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3209 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3210 int i, j, group_size;
3211 tree msq = NULL_TREE, lsq;
3212 tree offset = NULL_TREE;
3213 tree realignment_token = NULL_TREE;
3214 tree phi_stmt = NULL_TREE;
3215 VEC(tree,heap) *dr_chain = NULL;
3216 bool strided_load = false;
3218 gcc_assert (ncopies >= 1);
3220 /* Is vectorizable load? */
3221 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3222 return false;
3224 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
3226 if (STMT_VINFO_LIVE_P (stmt_info))
3228 /* FORNOW: not yet supported. */
3229 if (vect_print_dump_info (REPORT_DETAILS))
3230 fprintf (vect_dump, "value used after loop.");
3231 return false;
3234 if (TREE_CODE (stmt) != MODIFY_EXPR)
3235 return false;
3237 scalar_dest = TREE_OPERAND (stmt, 0);
3238 if (TREE_CODE (scalar_dest) != SSA_NAME)
3239 return false;
3241 op = TREE_OPERAND (stmt, 1);
3242 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF
3243 && !DR_GROUP_FIRST_DR (stmt_info))
3244 return false;
3246 if (!STMT_VINFO_DATA_REF (stmt_info))
3247 return false;
3249 mode = (int) TYPE_MODE (vectype);
3251 /* FORNOW. In some cases can vectorize even if data-type not supported
3252 (e.g. - data copies). */
3253 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
3255 if (vect_print_dump_info (REPORT_DETAILS))
3256 fprintf (vect_dump, "Aligned load, but unsupported type.");
3257 return false;
3260 /* Check if the load is a part of an interleaving chain. */
3261 if (DR_GROUP_FIRST_DR (stmt_info))
3263 strided_load = true;
3265 /* Check if interleaving is supported. */
3266 if (!vect_strided_load_supported (vectype))
3267 return false;
3270 if (!vec_stmt) /* transformation not required. */
3272 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3273 return true;
3276 /** Transform. **/
3278 if (vect_print_dump_info (REPORT_DETAILS))
3279 fprintf (vect_dump, "transform load.");
3281 if (strided_load)
3283 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3284 /* Check if the chain of loads is already vectorized. */
3285 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
3286 return true;
3287 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3288 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3289 dr_chain = VEC_alloc (tree, heap, group_size);
3291 else
3293 first_stmt = stmt;
3294 first_dr = dr;
3295 group_size = 1;
3298 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
3299 gcc_assert (alignment_support_cheme);
3301 *vec_stmt = NULL_TREE;
3303 /* In case the vectorization factor (VF) is bigger than the number
3304 of elements that we can fit in a vectype (nunits), we have to generate
3305 more than one vector stmt - i.e - we need to "unroll" the
3306 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3307 from one copy of the vector stmt to the next, in the field
3308 STMT_VINFO_RELATED_STMT. This is necessary in oder to allow following
3309 stages to find the correct vector defs to be used when vectorizing
3310 stmts that use the defs of the current stmt. The example below illustrates
3311 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3312 4 vectorized stmts):
3314 before vectorization:
3315 RELATED_STMT VEC_STMT
3316 S1: x = memref - -
3317 S2: z = x + 1 - -
3319 step 1: vectorize stmt S1:
3320 We first create the vector stmt VS1_0, and, as usual, record a
3321 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3322 Next, we create the vector stmt VS1_1, and record a pointer to
3323 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3324 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3325 stmts and pointers:
3326 RELATED_STMT VEC_STMT
3327 VS1_0: vx0 = memref0 VS1_1 -
3328 VS1_1: vx1 = memref1 VS1_2 -
3329 VS1_2: vx2 = memref2 VS1_3 -
3330 VS1_3: vx3 = memref3 - -
3331 S1: x = load - VS1_0
3332 S2: z = x + 1 - -
3334 See in documentation in vectorizable_operation for how the information
3335 we recorded in RELATED_STMT field is used to vectorize stmt S2. */
3337 /* In case of interleaving (non-unit strided access):
3339 S1: x2 = &base + 2
3340 S2: x0 = &base
3341 S3: x1 = &base + 1
3342 S4: x3 = &base + 3
3344 We create vectorized loads starting from base address (the access of the
3345 first stmt in the chain (S2 in the above example):
3347 VS1: vx0 = &base
3348 VS2: vx1 = &base + vec_size*1
3349 VS3: vx3 = &base + vec_size*2
3350 VS4: vx4 = &base + vec_size*3
3352 Then permutation statements are generated:
3354 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3355 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3358 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3359 (the order of the data-refs in the output of vect_permute_load_chain
3360 corresponds to the order of scalar stmts in the interleaving chain - see
3361 the documentaion of vect_permute_load_chain()).
3362 The generation of permutation stmts and recording them in
3363 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3365 In case of both multiple types and interleaving, above vector loads and
3366 permutation stmts are created for every copy. The result vector stmts are
3367 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3368 STMT_VINFO_RELATED_STMT for the next copies.
3372 if (alignment_support_cheme == dr_aligned
3373 || alignment_support_cheme == dr_unaligned_supported)
3375 Create:
3376 p = initial_addr;
3377 indx = 0;
3378 loop {
3379 p = p + indx * vectype_size;
3380 vec_dest = *(p);
3381 indx = indx + 1;
3383 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
3385 Create:
3386 p1 = initial_addr;
3387 >> msq_init = *(floor(p1))
3388 p2 = initial_addr + VS - 1;
3389 >> realignment_token = call target_builtin;
3390 indx = 0;
3391 loop {
3392 p2 = p2 + indx * vectype_size
3393 lsq = *(floor(p2))
3394 >> vec_dest = realign_load (msq, lsq, realignment_token)
3395 indx = indx + 1;
3396 >> msq = lsq;
3401 if (alignment_support_cheme == dr_unaligned_software_pipeline)
3403 msq = vect_setup_realignment (first_stmt, bsi, &realignment_token);
3404 phi_stmt = SSA_NAME_DEF_STMT (msq);
3405 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3408 prev_stmt_info = NULL;
3409 for (j = 0; j < ncopies; j++)
3411 /* 1. Create the vector pointer update chain. */
3412 if (j == 0)
3413 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, offset,
3414 &dummy, &ptr_incr, false);
3415 else
3416 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
3418 for (i = 0; i < group_size; i++)
3420 /* 2. Create the vector-load in the loop. */
3421 switch (alignment_support_cheme)
3423 case dr_aligned:
3424 gcc_assert (aligned_access_p (first_dr));
3425 data_ref = build_fold_indirect_ref (dataref_ptr);
3426 break;
3427 case dr_unaligned_supported:
3429 int mis = DR_MISALIGNMENT (first_dr);
3430 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3432 gcc_assert (!aligned_access_p (first_dr));
3433 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
3434 data_ref =
3435 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3436 break;
3438 case dr_unaligned_software_pipeline:
3439 gcc_assert (!aligned_access_p (first_dr));
3440 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3441 break;
3442 default:
3443 gcc_unreachable ();
3445 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3446 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
3447 new_temp = make_ssa_name (vec_dest, new_stmt);
3448 TREE_OPERAND (new_stmt, 0) = new_temp;
3449 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3450 copy_virtual_operands (new_stmt, stmt);
3451 mark_new_vars_to_rename (new_stmt);
3453 /* 3. Handle explicit realignment if necessary/supported. */
3454 if (alignment_support_cheme == dr_unaligned_software_pipeline)
3456 /* Create in loop:
3457 <vec_dest = realign_load (msq, lsq, realignment_token)> */
3458 lsq = TREE_OPERAND (new_stmt, 0);
3459 if (!realignment_token)
3460 realignment_token = dataref_ptr;
3461 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3462 new_stmt =
3463 build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, realignment_token);
3464 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
3465 new_temp = make_ssa_name (vec_dest, new_stmt);
3466 TREE_OPERAND (new_stmt, 0) = new_temp;
3467 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3468 if (i == group_size - 1)
3469 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
3470 msq = lsq;
3472 if (strided_load)
3473 VEC_quick_push (tree, dr_chain, new_temp);
3474 if (i < group_size - 1)
3475 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
3477 if (strided_load)
3479 if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
3480 return false;
3481 dr_chain = VEC_alloc (tree, heap, group_size);
3483 else
3485 if (j == 0)
3486 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3487 else
3488 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3489 prev_stmt_info = vinfo_for_stmt (new_stmt);
3492 return true;
3496 /* Function vect_get_epilog_def_for_operand
3498 STMT defines a value that is used after the loop. OP is an operand
3499 in STMT. This function returns a def that will be used in the
3500 "epilog-stmt" for STMT (see vectorizable_live_operation).
3502 In the case that OP is an SSA_NAME which is defined in the loop, then
3503 STMT_VINFO_EPILOG_STMT of the defining stmt holds the relevant def.
3505 In case OP is an invariant or constant, the same OP can be used.
3507 In case STMT is a phi-node that represents a simple induction-variable,
3508 a code that computes the value of the induction variable after the
3509 loop is generated.
3512 static tree
3513 vect_get_epilog_def_for_operand (tree op, tree stmt,
3514 block_stmt_iterator * bsi)
3516 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3517 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3518 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3519 enum vect_def_type dt;
3520 tree def;
3521 tree def_stmt;
3522 bool is_simple_use;
3523 tree epilog_stmt;
3525 is_simple_use =
3526 vect_is_simple_live_use (op, loop_vinfo, &def_stmt, &def, &dt);
3528 gcc_assert (is_simple_use);
3530 switch (dt)
3532 /* Case 1: operand is constant or invariant. */
3533 case vect_constant_def:
3534 case vect_invariant_def:
3535 return op;
3537 /* Case 2: operand is defined inside the loop. */
3538 case vect_loop_def:
3540 /* Get the def from the respective epilog stmt. */
3541 stmt_vec_info def_stmt_info = vinfo_for_stmt (def_stmt);
3542 epilog_stmt = STMT_VINFO_EPILOG_STMT (def_stmt_info);
3544 gcc_assert (epilog_stmt);
3545 return TREE_OPERAND (epilog_stmt, 0);
3548 /* Case 3: operand is defined by loop-header phi - induction. */
3549 case vect_induction_def:
3551 tree niters = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
3552 tree type = TREE_TYPE (niters);
3554 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
3555 niters = build2 (MINUS_EXPR, type, niters,
3556 fold_convert (type, integer_one_node));
3557 return advance_iv_by_n (loop, def_stmt, niters, bsi);
3560 default:
3561 gcc_unreachable ();
3566 /* Function vectorizable_live_operation.
3568 STMT computes a value that is used outside the loop. Check if it can
3569 be supported. "Vectorize" it by generating the appropriate (scalar)
3570 computation at the loop epilog. Record the newly generated epilog-code
3571 in STMT_VINFO_EPILOG_STMT. */
3573 bool
3574 vectorizable_live_operation (tree stmt,
3575 block_stmt_iterator *bsi,
3576 tree *epilog_stmt)
3578 tree operation;
3579 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3580 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3581 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3582 int i;
3583 enum tree_code code;
3584 int op_type;
3585 tree op, op0, op1;
3586 tree def, def_stmt;
3587 enum vect_def_type dt;
3588 tree type;
3589 tree epilog_oprnd0, epilog_oprnd1 = NULL_TREE;
3590 imm_use_iterator imm_iter;
3591 use_operand_p use_p;
3592 tree expr;
3593 tree new_stmt;
3594 tree new_temp;
3595 tree dest_var, dest_name;
3596 tree exit_phi;
3598 if (!STMT_VINFO_LIVE_P (stmt_info))
3599 return false;
3601 if (TREE_CODE (stmt) == PHI_NODE)
3603 tree name = PHI_RESULT (stmt);
3605 if (!vect_is_simple_live_use (name, loop_vinfo, &def_stmt, &def, &dt))
3606 return false;
3607 if (dt != vect_induction_def)
3608 return false;
3609 return true;
3612 gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
3614 operation = TREE_OPERAND (stmt, 1);
3615 code = TREE_CODE (operation);
3616 op_type = TREE_CODE_LENGTH (code);
3618 if (op_type != unary_op && op_type != binary_op)
3620 if (vect_print_dump_info (REPORT_DETAILS))
3621 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
3622 return false;
3625 for (i = 0; i < op_type; i++)
3627 op = TREE_OPERAND (operation, i);
3628 if (!vect_is_simple_live_use (op, loop_vinfo, &def_stmt, &def, &dt))
3629 return false;
3630 if (dt != vect_invariant_def && dt != vect_constant_def
3631 && dt != vect_induction_def && dt != vect_loop_def)
3632 return false;
3635 if (!epilog_stmt) /* transformation not required. */
3636 return true;
3638 /** Transform. **/
3640 if (vect_print_dump_info (REPORT_DETAILS))
3641 fprintf (vect_dump, "transform live operation.");
3643 /* Prepare uses */
3644 op0 = TREE_OPERAND (operation, 0);
3645 epilog_oprnd0 = vect_get_epilog_def_for_operand (op0, stmt, bsi);
3646 if (vect_print_dump_info (REPORT_DETAILS))
3647 print_generic_expr (vect_dump, epilog_oprnd0, TDF_SLIM);
3648 if (op_type == binary_op)
3650 op1 = TREE_OPERAND (operation, 1);
3651 epilog_oprnd1 = vect_get_epilog_def_for_operand (op1, stmt, bsi);
3652 if (vect_print_dump_info (REPORT_DETAILS))
3653 print_generic_expr (vect_dump, epilog_oprnd1, TDF_SLIM);
3656 /* Arguments are ready, create the new vector stmt. */
3657 type = TREE_TYPE (operation);
3658 if (op_type == unary_op)
3659 expr = build1 (code, type, epilog_oprnd0);
3660 else
3661 expr = build2 (code, type, epilog_oprnd0, epilog_oprnd1);
3662 if (vect_print_dump_info (REPORT_DETAILS))
3663 print_generic_expr (vect_dump, expr, TDF_SLIM);
3665 dest_name = TREE_OPERAND (stmt, 0);
3666 dest_var = vect_create_destination_var (dest_name, type);
3667 new_stmt = build2 (MODIFY_EXPR, type, dest_var, expr);
3668 new_temp = make_ssa_name (dest_var, new_stmt);
3669 TREE_OPERAND (new_stmt, 0) = new_temp;
3670 bsi_insert_before (bsi, new_stmt, BSI_SAME_STMT);
3672 STMT_VINFO_EPILOG_STMT (stmt_info) = new_stmt;
3674 /* Find relevant loop-closed-exit-phi to update */
3675 exit_phi = NULL;
3676 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, dest_name)
3678 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
3680 exit_phi = USE_STMT (use_p);
3681 break;
3685 if (exit_phi)
3687 /* Replace uses of the PHI_RESULT of exit_phi with the newly created
3688 def of new_stmt */
3689 tree orig_name = PHI_RESULT (exit_phi);
3691 FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name)
3692 SET_USE (use_p, new_temp);
3695 return true;
3699 /* Function vect_is_simple_cond.
3701 Input:
3702 LOOP - the loop that is being vectorized.
3703 COND - Condition that is checked for simple use.
3705 Returns whether a COND can be vectorized. Checks whether
3706 condition operands are supportable using vec_is_simple_use. */
3708 static bool
3709 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
3711 tree lhs, rhs;
3712 tree def;
3713 enum vect_def_type dt;
3715 if (!COMPARISON_CLASS_P (cond))
3716 return false;
3718 lhs = TREE_OPERAND (cond, 0);
3719 rhs = TREE_OPERAND (cond, 1);
3721 if (TREE_CODE (lhs) == SSA_NAME)
3723 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
3724 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
3725 return false;
3727 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
3728 return false;
3730 if (TREE_CODE (rhs) == SSA_NAME)
3732 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
3733 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
3734 return false;
3736 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
3737 return false;
3739 return true;
3742 /* vectorizable_condition.
3744 Check if STMT is conditional modify expression that can be vectorized.
3745 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3746 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
3747 at BSI.
3749 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3751 bool
3752 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3754 tree scalar_dest = NULL_TREE;
3755 tree vec_dest = NULL_TREE;
3756 tree op = NULL_TREE;
3757 tree cond_expr, then_clause, else_clause;
3758 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3759 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3760 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
3761 tree vec_compare, vec_cond_expr;
3762 tree new_temp;
3763 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3764 enum machine_mode vec_mode;
3765 tree def;
3766 enum vect_def_type dt;
3767 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3768 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3770 gcc_assert (ncopies >= 1);
3771 if (ncopies > 1)
3772 return false; /* FORNOW */
3774 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3775 return false;
3777 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
3779 if (STMT_VINFO_LIVE_P (stmt_info))
3781 /* FORNOW: not yet supported. */
3782 if (vect_print_dump_info (REPORT_DETAILS))
3783 fprintf (vect_dump, "value used after loop.");
3784 return false;
3787 if (TREE_CODE (stmt) != MODIFY_EXPR)
3788 return false;
3790 op = TREE_OPERAND (stmt, 1);
3792 if (TREE_CODE (op) != COND_EXPR)
3793 return false;
3795 cond_expr = TREE_OPERAND (op, 0);
3796 then_clause = TREE_OPERAND (op, 1);
3797 else_clause = TREE_OPERAND (op, 2);
3799 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
3800 return false;
3802 if (TREE_CODE (then_clause) == SSA_NAME)
3804 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
3805 if (!vect_is_simple_use (then_clause, loop_vinfo,
3806 &then_def_stmt, &def, &dt))
3807 return false;
3809 else if (TREE_CODE (then_clause) != INTEGER_CST
3810 && TREE_CODE (then_clause) != REAL_CST)
3811 return false;
3813 if (TREE_CODE (else_clause) == SSA_NAME)
3815 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
3816 if (!vect_is_simple_use (else_clause, loop_vinfo,
3817 &else_def_stmt, &def, &dt))
3818 return false;
3820 else if (TREE_CODE (else_clause) != INTEGER_CST
3821 && TREE_CODE (else_clause) != REAL_CST)
3822 return false;
3825 vec_mode = TYPE_MODE (vectype);
3827 if (!vec_stmt)
3829 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
3830 return expand_vec_cond_expr_p (op, vec_mode);
3833 /* Transform */
3835 /* Handle def. */
3836 scalar_dest = TREE_OPERAND (stmt, 0);
3837 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3839 /* Handle cond expr. */
3840 vec_cond_lhs =
3841 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
3842 vec_cond_rhs =
3843 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
3844 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
3845 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
3847 /* Arguments are ready. create the new vector stmt. */
3848 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
3849 vec_cond_lhs, vec_cond_rhs);
3850 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
3851 vec_compare, vec_then_clause, vec_else_clause);
3853 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
3854 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3855 TREE_OPERAND (*vec_stmt, 0) = new_temp;
3856 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
3858 return true;
3862 /* Function vect_transform_stmt.
3864 Create a vectorized stmt to replace STMT, and insert it at BSI. */
3866 static bool
3867 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi,
3868 block_stmt_iterator *exit_bsi, bool *interleaving)
3870 bool is_store = false;
3871 bool is_load = false;
3872 tree vec_stmt = NULL_TREE;
3873 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3874 tree orig_stmt_in_pattern;
3875 bool done;
3877 *interleaving = false;
3879 if (STMT_VINFO_RELEVANT_P (stmt_info))
3881 switch (STMT_VINFO_TYPE (stmt_info))
3883 case type_demotion_vec_info_type:
3884 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
3885 gcc_assert (done);
3886 break;
3888 case type_promotion_vec_info_type:
3889 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
3890 gcc_assert (done);
3891 break;
3893 case op_vec_info_type:
3894 done = vectorizable_operation (stmt, bsi, &vec_stmt);
3895 gcc_assert (done);
3896 break;
3898 case assignment_vec_info_type:
3899 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
3900 gcc_assert (done);
3901 break;
3903 case load_vec_info_type:
3904 done = vectorizable_load (stmt, bsi, &vec_stmt);
3905 gcc_assert (done);
3906 is_load = true;
3907 break;
3909 case store_vec_info_type:
3910 done = vectorizable_store (stmt, bsi, &vec_stmt);
3911 gcc_assert (done);
3913 if (DR_GROUP_FIRST_DR (stmt_info))
3915 /* In case of interleaving, the whole chain is vectorized when the
3916 last store in the chain is reached. Store stmts before the last
3917 one are skipped, and there vec_stmt_info shoudn't be freed
3918 meanwhile. */
3919 *interleaving = true;
3920 if (STMT_VINFO_VEC_STMT (stmt_info))
3921 is_store = true;
3923 else
3924 is_store = true;
3925 break;
3927 case condition_vec_info_type:
3928 done = vectorizable_condition (stmt, bsi, &vec_stmt);
3929 gcc_assert (done);
3930 break;
3932 default:
3933 if (vect_print_dump_info (REPORT_DETAILS))
3934 fprintf (vect_dump, "stmt not supported.");
3935 gcc_unreachable ();
3938 /* If STMT was inserted by the vectorizer to replace a computation idiom
3939 (i.e. it is a "pattern stmt"), We need to record a pointer to VEC_STMT
3940 in the stmt_info of ORIG_STMT_IN_PATTERN (the stmt in the original
3941 sequence that computed this idiom). See more detail in the
3942 documentation of vect_pattern_recog. */
3943 /* CHECKME */
3944 gcc_assert (vec_stmt || is_load || is_store
3945 || (*interleaving
3946 && !STMT_VINFO_VEC_STMT (stmt_info)));
3947 if (vec_stmt)
3949 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
3950 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
3951 if (orig_stmt_in_pattern)
3953 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
3955 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3957 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
3958 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
3964 if (*interleaving)
3966 if (STMT_VINFO_VEC_STMT (stmt_info))
3968 tree next_stmt = DR_GROUP_FIRST_DR (stmt_info);
3970 while (next_stmt)
3972 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (next_stmt)))
3974 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3976 switch (STMT_VINFO_TYPE (vinfo_for_stmt (next_stmt)))
3978 case reduc_vec_info_type:
3979 done = vectorizable_reduction (next_stmt, bsi, &vec_stmt);
3980 gcc_assert (done);
3981 break;
3983 default:
3984 done = vectorizable_live_operation (next_stmt, exit_bsi,
3985 &vec_stmt);
3986 gcc_assert (done);
3989 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3993 else
3995 if (STMT_VINFO_LIVE_P (stmt_info))
3997 switch (STMT_VINFO_TYPE (stmt_info))
3999 case reduc_vec_info_type:
4000 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
4001 gcc_assert (done);
4002 break;
4004 default:
4005 done = vectorizable_live_operation (stmt, exit_bsi, &vec_stmt);
4006 gcc_assert (done);
4010 return is_store;
4014 /* This function builds ni_name = number of iterations loop executes
4015 on the loop preheader. */
4017 static tree
4018 vect_build_loop_niters (loop_vec_info loop_vinfo)
4020 tree ni_name, stmt, var;
4021 edge pe;
4022 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4023 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
4025 var = create_tmp_var (TREE_TYPE (ni), "niters");
4026 add_referenced_tmp_var (var);
4027 ni_name = force_gimple_operand (ni, &stmt, false, var);
4029 pe = loop_preheader_edge (loop);
4030 if (stmt)
4032 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
4033 gcc_assert (!new_bb);
4036 return ni_name;
4040 /* This function generates the following statements:
4042 ni_name = number of iterations loop executes
4043 ratio = ni_name / vf
4044 ratio_mult_vf_name = ratio * vf
4046 and places them at the loop preheader edge. */
4048 static void
4049 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
4050 tree *ni_name_ptr,
4051 tree *ratio_mult_vf_name_ptr,
4052 tree *ratio_name_ptr)
4054 edge pe;
4055 basic_block new_bb;
4056 tree stmt, ni_name;
4057 tree var;
4058 tree ratio_name;
4059 tree ratio_mult_vf_name;
4060 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4061 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
4062 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4063 tree log_vf;
4065 pe = loop_preheader_edge (loop);
4067 /* Generate temporary variable that contains
4068 number of iterations loop executes. */
4070 ni_name = vect_build_loop_niters (loop_vinfo);
4071 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
4073 /* Create: ratio = ni >> log2(vf) */
4075 var = create_tmp_var (TREE_TYPE (ni), "bnd");
4076 add_referenced_tmp_var (var);
4077 ratio_name = make_ssa_name (var, NULL_TREE);
4078 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
4079 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
4080 SSA_NAME_DEF_STMT (ratio_name) = stmt;
4082 pe = loop_preheader_edge (loop);
4083 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
4084 gcc_assert (!new_bb);
4086 /* Create: ratio_mult_vf = ratio << log2 (vf). */
4088 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
4089 add_referenced_tmp_var (var);
4090 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
4091 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
4092 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
4093 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
4095 pe = loop_preheader_edge (loop);
4096 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
4097 gcc_assert (!new_bb);
4099 *ni_name_ptr = ni_name;
4100 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
4101 *ratio_name_ptr = ratio_name;
4103 return;
4107 /* Function update_vuses_to_preheader.
4109 Input:
4110 STMT - a statement with potential VUSEs.
4111 LOOP - the loop whose preheader will contain STMT.
4113 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
4114 appears to be defined in a V_MAY_DEF in another statement in a loop.
4115 One such case is when the VUSE is at the dereference of a __restricted__
4116 pointer in a load and the V_MAY_DEF is at the dereference of a different
4117 __restricted__ pointer in a store. Vectorization may result in
4118 copy_virtual_uses being called to copy the problematic VUSE to a new
4119 statement that is being inserted in the loop preheader. This procedure
4120 is called to change the SSA_NAME in the new statement's VUSE from the
4121 SSA_NAME updated in the loop to the related SSA_NAME available on the
4122 path entering the loop.
4124 When this function is called, we have the following situation:
4126 # vuse <name1>
4127 S1: vload
4128 do {
4129 # name1 = phi < name0 , name2>
4131 # vuse <name1>
4132 S2: vload
4134 # name2 = vdef <name1>
4135 S3: vstore
4137 }while...
4139 Stmt S1 was created in the loop preheader block as part of misaligned-load
4140 handling. This function fixes the name of the vuse of S1 from 'name1' to
4141 'name0'. */
4143 static void
4144 update_vuses_to_preheader (tree stmt, struct loop *loop)
4146 basic_block header_bb = loop->header;
4147 edge preheader_e = loop_preheader_edge (loop);
4148 ssa_op_iter iter;
4149 use_operand_p use_p;
4151 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
4153 tree ssa_name = USE_FROM_PTR (use_p);
4154 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
4155 tree name_var = SSA_NAME_VAR (ssa_name);
4156 basic_block bb = bb_for_stmt (def_stmt);
4158 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
4159 if (!IS_EMPTY_STMT (def_stmt)
4160 && flow_bb_inside_loop_p (loop, bb))
4162 /* If the block containing the statement defining the SSA_NAME
4163 is in the loop then it's necessary to find the definition
4164 outside the loop using the PHI nodes of the header. */
4165 tree phi;
4166 bool updated = false;
4168 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
4170 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
4172 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
4173 updated = true;
4174 break;
4177 gcc_assert (updated);
4183 /* Function advance_iv_by_n
4185 LOOP: A loop that has an induction variable (IV) represented by IV_PHI.
4186 IV_PHI: A phi node (typically at the header-bb of LOOP)
4187 NITERS: number of iterations (steps) to advance the IV.
4189 This function advances the IV by NITERS, by generating the following
4190 code at the exit-bb of LOOP:
4191 new_iv = iv_initial_condition + iv_step * NITERS;
4194 static tree
4195 advance_iv_by_n (struct loop *loop, tree iv_phi, tree niters, block_stmt_iterator *bsi)
4197 tree access_fn = NULL;
4198 tree evolution_part;
4199 tree init_expr;
4200 tree step_expr;
4201 tree var, stmt, ni, ni_name;
4203 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (iv_phi));
4204 gcc_assert (access_fn);
4205 if (vect_print_dump_info (REPORT_DETAILS))
4207 fprintf (vect_dump, "accesses funcion for phi: ");
4208 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
4211 evolution_part =
4212 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
4213 gcc_assert (evolution_part != NULL_TREE);
4215 /* FORNOW: We do not support IVs whose evolution function is a polynomial
4216 of degree >= 2 or exponential. */
4217 gcc_assert (!tree_is_chrec (evolution_part));
4219 step_expr = evolution_part;
4220 init_expr =
4221 unshare_expr (initial_condition_in_loop_num (access_fn, loop->num));
4222 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
4223 build2 (MULT_EXPR, TREE_TYPE (niters), niters, step_expr), init_expr);
4224 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
4225 add_referenced_tmp_var (var);
4226 ni_name = force_gimple_operand (ni, &stmt, false, var);
4228 /* Insert stmt. */
4229 if (stmt)
4230 bsi_insert_before (bsi, stmt, BSI_SAME_STMT);
4232 return ni_name;
4236 /* Function vect_update_ivs_after_vectorizer.
4238 "Advance" the induction variables of LOOP to the value they should take
4239 after the execution of LOOP. This is currently necessary because the
4240 vectorizer does not handle induction variables that are used after the
4241 loop. Such a situation occurs when the last iterations of LOOP are
4242 peeled, because:
4243 1. We introduced new uses after LOOP for IVs that were not originally used
4244 after LOOP: the IVs of LOOP are now used by an epilog loop.
4245 2. LOOP is going to be vectorized; this means that it will iterate N/VF
4246 times, whereas the loop IVs should be bumped N times.
4248 Input:
4249 - LOOP - a loop that is going to be vectorized. The last few iterations
4250 of LOOP were peeled.
4251 - NITERS - the number of iterations that LOOP executes (before it is
4252 vectorized). i.e, the number of times the ivs should be bumped.
4253 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
4254 coming out from LOOP on which there are uses of the LOOP ivs
4255 (this is the path from LOOP->exit to epilog_loop->preheader).
4257 The new definitions of the ivs are placed in LOOP->exit.
4258 The phi args associated with the edge UPDATE_E in the bb
4259 UPDATE_E->dest are updated accordingly.
4261 Assumption 1: Like the rest of the vectorizer, this function assumes
4262 a single loop exit that has a single predecessor.
4264 Assumption 2: The phi nodes in the LOOP header and in update_bb are
4265 organized in the same order.
4267 Assumption 3: The access function of the ivs is simple enough (see
4268 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
4270 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
4271 coming out of LOOP on which the ivs of LOOP are used (this is the path
4272 that leads to the epilog loop; other paths skip the epilog loop). This
4273 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
4274 needs to have its phis updated.
4277 static void
4278 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
4279 edge update_e)
4281 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4282 tree phi, phi1;
4283 basic_block update_bb = update_e->dest;
4284 basic_block exit_bb = loop->single_exit->dest;
4285 block_stmt_iterator bsi = bsi_last (exit_bb);
4287 /* Make sure there exists a single-predecessor exit bb: */
4288 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
4290 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
4292 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
4293 phi && phi1;
4294 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
4296 tree ni_name;
4298 if (vect_print_dump_info (REPORT_DETAILS))
4300 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
4301 print_generic_expr (vect_dump, phi, TDF_SLIM);
4304 /* Skip virtual phi's. */
4305 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
4307 if (vect_print_dump_info (REPORT_DETAILS))
4308 fprintf (vect_dump, "virtual phi. skip.");
4309 continue;
4312 /* Skip reduction phis. */
4313 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
4315 if (vect_print_dump_info (REPORT_DETAILS))
4316 fprintf (vect_dump, "reduc phi. skip.");
4318 continue;
4321 ni_name = advance_iv_by_n (loop, phi, niters, &bsi);
4323 /* Fix phi expressions in the successor bb. */
4324 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
4329 /* Function vect_do_peeling_for_loop_bound
4331 Peel the last iterations of the loop represented by LOOP_VINFO.
4332 The peeled iterations form a new epilog loop. Given that the loop now
4333 iterates NITERS times, the new epilog loop iterates
4334 NITERS % VECTORIZATION_FACTOR times.
4336 The original loop will later be made to iterate
4337 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
4339 static void
4340 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
4341 struct loops *loops)
4343 tree ni_name, ratio_mult_vf_name;
4344 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4345 struct loop *new_loop;
4346 edge update_e;
4347 basic_block preheader;
4348 int loop_num;
4350 if (vect_print_dump_info (REPORT_DETAILS))
4351 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
4353 initialize_original_copy_tables ();
4355 /* Generate the following variables on the preheader of original loop:
4357 ni_name = number of iteration the original loop executes
4358 ratio = ni_name / vf
4359 ratio_mult_vf_name = ratio * vf */
4360 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
4361 &ratio_mult_vf_name, ratio);
4363 loop_num = loop->num;
4364 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
4365 ratio_mult_vf_name, ni_name, false);
4366 gcc_assert (new_loop);
4367 gcc_assert (loop_num == loop->num);
4368 #ifdef ENABLE_CHECKING
4369 slpeel_verify_cfg_after_peeling (loop, new_loop);
4370 #endif
4372 /* A guard that controls whether the new_loop is to be executed or skipped
4373 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
4374 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
4375 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
4376 is on the path where the LOOP IVs are used and need to be updated. */
4378 preheader = loop_preheader_edge (new_loop)->src;
4379 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
4380 update_e = EDGE_PRED (preheader, 0);
4381 else
4382 update_e = EDGE_PRED (preheader, 1);
4384 /* Update IVs of original loop as if they were advanced
4385 by ratio_mult_vf_name steps. */
4386 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
4388 /* After peeling we have to reset scalar evolution analyzer. */
4389 scev_reset ();
4391 free_original_copy_tables ();
4395 /* Function vect_gen_niters_for_prolog_loop
4397 Set the number of iterations for the loop represented by LOOP_VINFO
4398 to the minimum between LOOP_NITERS (the original iteration count of the loop)
4399 and the misalignment of DR - the data reference recorded in
4400 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
4401 this loop, the data reference DR will refer to an aligned location.
4403 The following computation is generated:
4405 If the misalignment of DR is known at compile time:
4406 addr_mis = int mis = DR_MISALIGNMENT (dr);
4407 Else, compute address misalignment in bytes:
4408 addr_mis = addr & (vectype_size - 1)
4410 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
4412 (elem_size = element type size; an element is the scalar element
4413 whose type is the inner type of the vectype)
4415 For interleaving,
4417 prolog_niters = min ( LOOP_NITERS ,
4418 (VF/inter_size - addr_mis/elem_size)&(VF/inter_size-1) )
4419 where inter_size is the size of the interleaved group.
4423 static tree
4424 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
4426 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
4427 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4428 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4429 tree var, stmt;
4430 tree iters, iters_name;
4431 edge pe;
4432 basic_block new_bb;
4433 tree dr_stmt = DR_STMT (dr);
4434 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
4435 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4436 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
4437 tree niters_type = TREE_TYPE (loop_niters);
4438 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
4439 int inter_size = 1;
4441 if (DR_GROUP_FIRST_DR (stmt_info))
4443 /* For interleaved access element size must be multipled by the size of
4444 the interleaved group. */
4445 inter_size = DR_GROUP_SIZE (vinfo_for_stmt (
4446 DR_GROUP_FIRST_DR (stmt_info)));
4447 element_size *= inter_size;
4450 pe = loop_preheader_edge (loop);
4452 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
4454 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
4455 int elem_misalign = byte_misalign / element_size;
4457 if (vect_print_dump_info (REPORT_DETAILS))
4458 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
4460 iters = build_int_cst (niters_type,
4461 (vf/inter_size - elem_misalign)&(vf/inter_size-1));
4463 else
4465 tree new_stmts = NULL_TREE;
4466 tree start_addr =
4467 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
4468 tree ptr_type = TREE_TYPE (start_addr);
4469 tree size = TYPE_SIZE (ptr_type);
4470 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
4471 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
4472 tree elem_size_log = build_int_cst (type, exact_log2 (element_size));
4473 tree vf_minus_1 = build_int_cst (type, vf - 1);
4474 tree vf_tree = build_int_cst (type, vf);
4475 tree byte_misalign;
4476 tree elem_misalign;
4478 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
4479 gcc_assert (!new_bb);
4481 /* Create: byte_misalign = addr & (vectype_size - 1) */
4482 byte_misalign =
4483 build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
4485 /* Create: elem_misalign = byte_misalign / element_size */
4486 elem_misalign = build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
4488 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
4489 iters = build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
4490 iters = build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
4491 iters = fold_convert (niters_type, iters);
4494 /* Create: prolog_loop_niters = min (iters, loop_niters) */
4495 /* If the loop bound is known at compile time we already verified that it is
4496 greater than vf; since the misalignment ('iters') is at most vf, there's
4497 no need to generate the MIN_EXPR in this case. */
4498 if (TREE_CODE (loop_niters) != INTEGER_CST)
4499 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
4501 if (vect_print_dump_info (REPORT_DETAILS))
4503 fprintf (vect_dump, "niters for prolog loop: ");
4504 print_generic_expr (vect_dump, iters, TDF_SLIM);
4507 var = create_tmp_var (niters_type, "prolog_loop_niters");
4508 add_referenced_tmp_var (var);
4509 iters_name = force_gimple_operand (iters, &stmt, false, var);
4511 /* Insert stmt on loop preheader edge. */
4512 if (stmt)
4514 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
4515 gcc_assert (!new_bb);
4518 return iters_name;
4522 /* Function vect_update_init_of_dr
4524 NITERS iterations were peeled from LOOP. DR represents a data reference
4525 in LOOP. This function updates the information recorded in DR to
4526 account for the fact that the first NITERS iterations had already been
4527 executed. Specifically, it updates the OFFSET field of DR. */
4529 static void
4530 vect_update_init_of_dr (struct data_reference *dr, tree niters)
4532 tree offset = DR_OFFSET (dr);
4534 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
4535 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
4536 DR_OFFSET (dr) = offset;
4540 /* Function vect_update_inits_of_drs
4542 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
4543 This function updates the information recorded for the data references in
4544 the loop to account for the fact that the first NITERS iterations had
4545 already been executed. Specifically, it updates the initial_condition of the
4546 access_function of all the data_references in the loop. */
4548 static void
4549 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
4551 unsigned int i;
4552 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
4554 if (vect_dump && (dump_flags & TDF_DETAILS))
4555 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
4557 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
4559 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
4560 vect_update_init_of_dr (dr, niters);
4565 /* Function vect_do_peeling_for_alignment
4567 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
4568 'niters' is set to the misalignment of one of the data references in the
4569 loop, thereby forcing it to refer to an aligned location at the beginning
4570 of the execution of this loop. The data reference for which we are
4571 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
4573 static void
4574 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
4576 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4577 tree niters_of_prolog_loop, ni_name;
4578 tree n_iters;
4579 struct loop *new_loop;
4581 if (vect_print_dump_info (REPORT_DETAILS))
4582 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
4584 initialize_original_copy_tables ();
4586 ni_name = vect_build_loop_niters (loop_vinfo);
4587 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
4589 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
4590 new_loop =
4591 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
4592 niters_of_prolog_loop, ni_name, true);
4593 gcc_assert (new_loop);
4594 #ifdef ENABLE_CHECKING
4595 slpeel_verify_cfg_after_peeling (new_loop, loop);
4596 #endif
4598 /* Update number of times loop executes. */
4599 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
4600 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
4601 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
4603 /* Update the init conditions of the access functions of all data refs. */
4604 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
4606 /* After peeling we have to reset scalar evolution analyzer. */
4607 scev_reset ();
4609 free_original_copy_tables ();
4613 /* Function vect_create_cond_for_align_checks.
4615 Create a conditional expression that represents the alignment checks for
4616 all of data references (array element references) whose alignment must be
4617 checked at runtime.
4619 Input:
4620 LOOP_VINFO - two fields of the loop information are used.
4621 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
4622 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
4624 Output:
4625 COND_EXPR_STMT_LIST - statements needed to construct the conditional
4626 expression.
4627 The returned value is the conditional expression to be used in the if
4628 statement that controls which version of the loop gets executed at runtime.
4630 The algorithm makes two assumptions:
4631 1) The number of bytes "n" in a vector is a power of 2.
4632 2) An address "a" is aligned if a%n is zero and that this
4633 test can be done as a&(n-1) == 0. For example, for 16
4634 byte vectors the test is a&0xf == 0. */
4636 static tree
4637 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
4638 tree *cond_expr_stmt_list)
4640 VEC(tree,heap) *may_misalign_stmts
4641 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
4642 tree ref_stmt;
4643 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
4644 tree mask_cst;
4645 unsigned int i;
4646 tree psize;
4647 tree int_ptrsize_type;
4648 char tmp_name[20];
4649 tree or_tmp_name = NULL_TREE;
4650 tree and_tmp, and_tmp_name, and_stmt;
4651 tree ptrsize_zero;
4653 /* Check that mask is one less than a power of 2, i.e., mask is
4654 all zeros followed by all ones. */
4655 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
4657 /* CHECKME: what is the best integer or unsigned type to use to hold a
4658 cast from a pointer value? */
4659 psize = TYPE_SIZE (ptr_type_node);
4660 int_ptrsize_type
4661 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
4663 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
4664 of the first vector of the i'th data reference. */
4666 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
4668 tree new_stmt_list = NULL_TREE;
4669 tree addr_base;
4670 tree addr_tmp, addr_tmp_name, addr_stmt;
4671 tree or_tmp, new_or_tmp_name, or_stmt;
4673 /* create: addr_tmp = (int)(address_of_first_vector) */
4674 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
4675 &new_stmt_list,
4676 NULL_TREE);
4678 if (new_stmt_list != NULL_TREE)
4679 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
4681 sprintf (tmp_name, "%s%d", "addr2int", i);
4682 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
4683 add_referenced_tmp_var (addr_tmp);
4684 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
4685 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
4686 addr_stmt = build2 (MODIFY_EXPR, void_type_node,
4687 addr_tmp_name, addr_stmt);
4688 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
4689 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
4691 /* The addresses are OR together. */
4693 if (or_tmp_name != NULL_TREE)
4695 /* create: or_tmp = or_tmp | addr_tmp */
4696 sprintf (tmp_name, "%s%d", "orptrs", i);
4697 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
4698 add_referenced_tmp_var (or_tmp);
4699 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
4700 or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name,
4701 build2 (BIT_IOR_EXPR, int_ptrsize_type,
4702 or_tmp_name,
4703 addr_tmp_name));
4704 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
4705 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
4706 or_tmp_name = new_or_tmp_name;
4708 else
4709 or_tmp_name = addr_tmp_name;
4711 } /* end for i */
4713 mask_cst = build_int_cst (int_ptrsize_type, mask);
4715 /* create: and_tmp = or_tmp & mask */
4716 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
4717 add_referenced_tmp_var (and_tmp);
4718 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
4720 and_stmt = build2 (MODIFY_EXPR, void_type_node,
4721 and_tmp_name,
4722 build2 (BIT_AND_EXPR, int_ptrsize_type,
4723 or_tmp_name, mask_cst));
4724 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
4725 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
4727 /* Make and_tmp the left operand of the conditional test against zero.
4728 if and_tmp has a non-zero bit then some address is unaligned. */
4729 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
4730 return build2 (EQ_EXPR, boolean_type_node,
4731 and_tmp_name, ptrsize_zero);
4735 /* Function vect_transform_loop.
4737 The analysis phase has determined that the loop is vectorizable.
4738 Vectorize the loop - created vectorized stmts to replace the scalar
4739 stmts in the loop, and update the loop exit condition. */
4741 void
4742 vect_transform_loop (loop_vec_info loop_vinfo,
4743 struct loops *loops ATTRIBUTE_UNUSED)
4745 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4746 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4747 int nbbs = loop->num_nodes;
4748 block_stmt_iterator si;
4749 int i;
4750 tree ratio = NULL;
4751 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4752 bitmap_iterator bi;
4753 unsigned int j;
4754 bool interleaving;
4755 basic_block exit_bb;
4756 block_stmt_iterator exit_si;
4758 if (vect_print_dump_info (REPORT_DETAILS))
4759 fprintf (vect_dump, "=== vec_transform_loop ===");
4761 /* If the loop has data references that may or may not be aligned then
4762 two versions of the loop need to be generated, one which is vectorized
4763 and one which isn't. A test is then generated to control which of the
4764 loops is executed. The test checks for the alignment of all of the
4765 data references that may or may not be aligned. */
4767 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
4769 struct loop *nloop;
4770 tree cond_expr;
4771 tree cond_expr_stmt_list = NULL_TREE;
4772 basic_block condition_bb;
4773 block_stmt_iterator cond_exp_bsi;
4774 basic_block merge_bb;
4775 basic_block new_exit_bb;
4776 edge new_exit_e, e;
4777 tree orig_phi, new_phi, arg;
4779 cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
4780 &cond_expr_stmt_list);
4781 initialize_original_copy_tables ();
4782 nloop = loop_version (loops, loop, cond_expr, &condition_bb, true);
4783 free_original_copy_tables();
4785 /** Loop versioning violates an assumption we try to maintain during
4786 vectorization - that the loop exit block has a single predecessor.
4787 After versioning, the exit block of both loop versions is the same
4788 basic block (i.e. it has two predecessors). Just in order to simplify
4789 following transformations in the vectorizer, we fix this situation
4790 here by adding a new (empty) block on the exit-edge of the loop,
4791 with the proper loop-exit phis to maintain loop-closed-form. **/
4793 merge_bb = loop->single_exit->dest;
4794 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
4795 new_exit_bb = split_edge (loop->single_exit);
4796 add_bb_to_loop (new_exit_bb, loop->outer);
4797 new_exit_e = loop->single_exit;
4798 e = EDGE_SUCC (new_exit_bb, 0);
4800 for (orig_phi = phi_nodes (merge_bb); orig_phi;
4801 orig_phi = PHI_CHAIN (orig_phi))
4803 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
4804 new_exit_bb);
4805 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
4806 add_phi_arg (new_phi, arg, new_exit_e);
4807 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
4810 /** end loop-exit-fixes after versioning **/
4812 update_ssa (TODO_update_ssa);
4813 cond_exp_bsi = bsi_last (condition_bb);
4814 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
4817 /* CHECKME: we wouldn't need this if we calles update_ssa once
4818 for all loops. */
4819 bitmap_zero (vect_vnames_to_rename);
4821 /* Peel the loop if there are data refs with unknown alignment.
4822 Only one data ref with unknown store is allowed. */
4824 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
4825 vect_do_peeling_for_alignment (loop_vinfo, loops);
4827 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
4828 compile time constant), or it is a constant that doesn't divide by the
4829 vectorization factor, then an epilog loop needs to be created.
4830 We therefore duplicate the loop: the original loop will be vectorized,
4831 and will compute the first (n/VF) iterations. The second copy of the loop
4832 will remain scalar and will compute the remaining (n%VF) iterations.
4833 (VF is the vectorization factor). */
4835 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4836 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4837 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
4838 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
4839 else
4840 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
4841 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
4843 /* 1) Make sure the loop header has exactly two entries
4844 2) Make sure we have a preheader basic block. */
4846 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
4847 if (EDGE_COUNT (loop_preheader_edge (loop)->src->succs) != 1)
4848 loop_split_edge_with (loop_preheader_edge (loop), NULL);
4850 exit_bb = loop->single_exit->dest;
4851 exit_si = bsi_after_labels (exit_bb);
4852 if (TREE_CODE (bsi_stmt (exit_si)) == LABEL_EXPR)
4853 bsi_next (&exit_si);
4855 /* FORNOW: the vectorizer supports only loops which body consist
4856 of one basic block (header + empty latch). When the vectorizer will
4857 support more involved loop forms, the order by which the BBs are
4858 traversed need to be reconsidered. */
4860 for (i = 0; i < nbbs; i++)
4862 basic_block bb = bbs[i];
4864 for (si = bsi_start (bb); !bsi_end_p (si);)
4866 tree stmt = bsi_stmt (si);
4867 stmt_vec_info stmt_info;
4868 bool is_store;
4870 if (vect_print_dump_info (REPORT_DETAILS))
4872 fprintf (vect_dump, "------>vectorizing statement: ");
4873 print_generic_expr (vect_dump, stmt, TDF_SLIM);
4875 stmt_info = vinfo_for_stmt (stmt);
4876 gcc_assert (stmt_info);
4877 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4878 && !STMT_VINFO_LIVE_P (stmt_info))
4880 bsi_next (&si);
4881 continue;
4884 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
4885 != (unsigned HOST_WIDE_INT) vectorization_factor)
4886 && vect_print_dump_info (REPORT_DETAILS))
4887 fprintf (vect_dump, "multiple-types.");
4889 /* -------- vectorize statement ------------ */
4890 if (vect_print_dump_info (REPORT_DETAILS))
4891 fprintf (vect_dump, "transform statement.");
4893 interleaving = false;
4894 is_store = vect_transform_stmt (stmt, &si, &exit_si, &interleaving);
4895 if (is_store)
4897 stmt_ann_t ann;
4899 if (DR_GROUP_FIRST_DR (stmt_info))
4901 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
4902 interleaving chain was completed - free all the stores in
4903 the chain. */
4904 tree next = DR_GROUP_FIRST_DR (stmt_info);
4905 tree tmp;
4906 stmt_vec_info next_stmt_info;
4908 while (next)
4910 next_stmt_info = vinfo_for_stmt (next);
4911 /* Free the attached stmt_vec_info and remove the stmt. */
4912 ann = stmt_ann (next);
4913 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
4914 free (next_stmt_info);
4915 set_stmt_info ((tree_ann_t)ann, NULL);
4916 next = tmp;
4918 bsi_remove (&si);
4919 continue;
4921 else
4923 /* Free the attached stmt_vec_info and remove the stmt. */
4924 ann = stmt_ann (stmt);
4925 free (stmt_info);
4926 set_stmt_info ((tree_ann_t)ann, NULL);
4927 bsi_remove (&si);
4928 continue;
4931 else
4933 if (interleaving)
4935 /* This is case of skipped interleaved store. We don't free
4936 its stmt_vec_info. */
4937 bsi_remove (&si);
4938 continue;
4942 bsi_next (&si);
4943 } /* stmts in BB */
4944 } /* BBs in loop */
4946 slpeel_make_loop_iterate_ntimes (loop, ratio);
4948 EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
4949 mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
4951 /* The memory tags and pointers in vectorized statements need to
4952 have their SSA forms updated. FIXME, why can't this be delayed
4953 until all the loops have been transformed? */
4954 update_ssa (TODO_update_ssa);
4956 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
4957 fprintf (vect_dump, "LOOP VECTORIZED.\n");