* target.h (globalize_decl_name): New.
[official-gcc.git] / gcc / tree-vect-transform.c
blob846d52bf90cac03642898a1b4ce1ef1f8dd02c98
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "params.h"
39 #include "recog.h"
40 #include "tree-data-ref.h"
41 #include "tree-chrec.h"
42 #include "tree-scalar-evolution.h"
43 #include "tree-vectorizer.h"
44 #include "langhooks.h"
45 #include "tree-pass.h"
46 #include "toplev.h"
47 #include "real.h"
49 /* Utility functions for the code transformation. */
50 static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *);
51 static tree vect_create_destination_var (tree, tree);
52 static tree vect_create_data_ref_ptr
53 (tree, block_stmt_iterator *, tree, tree *, tree *, bool, tree);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_setup_realignment (tree, block_stmt_iterator *, tree *);
56 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
57 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
58 static tree vect_init_vector (tree, tree, tree);
59 static void vect_finish_stmt_generation
60 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
61 static bool vect_is_simple_cond (tree, loop_vec_info);
62 static void update_vuses_to_preheader (tree, struct loop*);
63 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
64 static tree get_initial_def_for_reduction (tree, tree, tree *);
66 /* Utility function dealing with loop peeling (not peeling itself). */
67 static void vect_generate_tmps_on_preheader
68 (loop_vec_info, tree *, tree *, tree *);
69 static tree vect_build_loop_niters (loop_vec_info);
70 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
71 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
72 static void vect_update_init_of_dr (struct data_reference *, tree niters);
73 static void vect_update_inits_of_drs (loop_vec_info, tree);
74 static int vect_min_worthwhile_factor (enum tree_code);
77 /* Function vect_get_new_vect_var.
79 Returns a name for a new variable. The current naming scheme appends the
80 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
81 the name of vectorizer generated variables, and appends that to NAME if
82 provided. */
84 static tree
85 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
87 const char *prefix;
88 tree new_vect_var;
90 switch (var_kind)
92 case vect_simple_var:
93 prefix = "vect_";
94 break;
95 case vect_scalar_var:
96 prefix = "stmp_";
97 break;
98 case vect_pointer_var:
99 prefix = "vect_p";
100 break;
101 default:
102 gcc_unreachable ();
105 if (name)
106 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
107 else
108 new_vect_var = create_tmp_var (type, prefix);
110 /* Mark vector typed variable as a gimple register variable. */
111 if (TREE_CODE (type) == VECTOR_TYPE)
112 DECL_GIMPLE_REG_P (new_vect_var) = true;
114 return new_vect_var;
118 /* Function vect_create_addr_base_for_vector_ref.
120 Create an expression that computes the address of the first memory location
121 that will be accessed for a data reference.
123 Input:
124 STMT: The statement containing the data reference.
125 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
126 OFFSET: Optional. If supplied, it is be added to the initial address.
128 Output:
129 1. Return an SSA_NAME whose value is the address of the memory location of
130 the first vector of the data reference.
131 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
132 these statement(s) which define the returned SSA_NAME.
134 FORNOW: We are only handling array accesses with step 1. */
136 static tree
137 vect_create_addr_base_for_vector_ref (tree stmt,
138 tree *new_stmt_list,
139 tree offset)
141 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
142 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
143 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
144 tree base_name = build_fold_indirect_ref (data_ref_base);
145 tree vec_stmt;
146 tree addr_base, addr_expr;
147 tree dest, new_stmt;
148 tree base_offset = unshare_expr (DR_OFFSET (dr));
149 tree init = unshare_expr (DR_INIT (dr));
150 tree vect_ptr_type, addr_expr2;
152 /* Create base_offset */
153 base_offset = size_binop (PLUS_EXPR, base_offset, init);
154 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
155 add_referenced_var (dest);
156 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
157 append_to_statement_list_force (new_stmt, new_stmt_list);
159 if (offset)
161 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
162 tree step;
164 /* For interleaved access step we divide STEP by the size of the
165 interleaving group. */
166 if (DR_GROUP_SIZE (stmt_info))
167 step = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (offset), DR_STEP (dr),
168 build_int_cst (TREE_TYPE (offset),
169 DR_GROUP_SIZE (stmt_info)));
170 else
171 step = DR_STEP (dr);
173 add_referenced_var (tmp);
174 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
175 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
176 base_offset, offset);
177 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
178 append_to_statement_list_force (new_stmt, new_stmt_list);
181 /* base + base_offset */
182 addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
183 base_offset);
185 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
187 /* addr_expr = addr_base */
188 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
189 get_name (base_name));
190 add_referenced_var (addr_expr);
191 vec_stmt = fold_convert (vect_ptr_type, addr_base);
192 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
193 get_name (base_name));
194 add_referenced_var (addr_expr2);
195 vec_stmt = force_gimple_operand (vec_stmt, &new_stmt, false, addr_expr2);
196 append_to_statement_list_force (new_stmt, new_stmt_list);
198 if (vect_print_dump_info (REPORT_DETAILS))
200 fprintf (vect_dump, "created ");
201 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
203 return vec_stmt;
207 /* Function vect_create_data_ref_ptr.
209 Create a new pointer to vector type (vp), that points to the first location
210 accessed in the loop by STMT, along with the def-use update chain to
211 appropriately advance the pointer through the loop iterations. Also set
212 aliasing information for the pointer. This vector pointer is used by the
213 callers to this function to create a memory reference expression for vector
214 load/store access.
216 Input:
217 1. STMT: a stmt that references memory. Expected to be of the form
218 GIMPLE_MODIFY_STMT <name, data-ref> or
219 GIMPLE_MODIFY_STMT <data-ref, name>.
220 2. BSI: block_stmt_iterator where new stmts can be added.
221 3. OFFSET (optional): an offset to be added to the initial address accessed
222 by the data-ref in STMT.
223 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
224 pointing to the initial address.
225 5. TYPE: if not NULL indicates the required type of the data-ref
227 Output:
228 1. Declare a new ptr to vector_type, and have it point to the base of the
229 data reference (initial addressed accessed by the data reference).
230 For example, for vector of type V8HI, the following code is generated:
232 v8hi *vp;
233 vp = (v8hi *)initial_address;
235 if OFFSET is not supplied:
236 initial_address = &a[init];
237 if OFFSET is supplied:
238 initial_address = &a[init + OFFSET];
240 Return the initial_address in INITIAL_ADDRESS.
242 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
243 update the pointer in each iteration of the loop.
245 Return the increment stmt that updates the pointer in PTR_INCR.
247 3. Return the pointer. */
249 static tree
250 vect_create_data_ref_ptr (tree stmt,
251 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
252 tree offset, tree *initial_address, tree *ptr_incr,
253 bool only_init, tree type)
255 tree base_name;
256 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
257 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
258 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
259 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
260 tree vect_ptr_type;
261 tree vect_ptr;
262 tree tag;
263 tree new_temp;
264 tree vec_stmt;
265 tree new_stmt_list = NULL_TREE;
266 edge pe = loop_preheader_edge (loop);
267 basic_block new_bb;
268 tree vect_ptr_init;
269 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
271 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
273 if (vect_print_dump_info (REPORT_DETAILS))
275 tree data_ref_base = base_name;
276 fprintf (vect_dump, "create vector-pointer variable to type: ");
277 print_generic_expr (vect_dump, vectype, TDF_SLIM);
278 if (TREE_CODE (data_ref_base) == VAR_DECL)
279 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
280 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
281 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
282 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
283 fprintf (vect_dump, " vectorizing a record based array ref: ");
284 else if (TREE_CODE (data_ref_base) == SSA_NAME)
285 fprintf (vect_dump, " vectorizing a pointer ref: ");
286 print_generic_expr (vect_dump, base_name, TDF_SLIM);
289 /** (1) Create the new vector-pointer variable: **/
290 if (type)
291 vect_ptr_type = build_pointer_type (type);
292 else
293 vect_ptr_type = build_pointer_type (vectype);
294 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
295 get_name (base_name));
296 add_referenced_var (vect_ptr);
298 /** (2) Add aliasing information to the new vector-pointer:
299 (The points-to info (DR_PTR_INFO) may be defined later.) **/
301 tag = DR_MEMTAG (dr);
302 gcc_assert (tag);
304 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
305 tag must be created with tag added to its may alias list. */
306 if (!MTAG_P (tag))
307 new_type_alias (vect_ptr, tag, DR_REF (dr));
308 else
309 set_symbol_mem_tag (vect_ptr, tag);
311 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
313 /** (3) Calculate the initial address the vector-pointer, and set
314 the vector-pointer to point to it before the loop: **/
316 /* Create: (&(base[init_val+offset]) in the loop preheader. */
317 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
318 offset);
319 pe = loop_preheader_edge (loop);
320 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
321 gcc_assert (!new_bb);
322 *initial_address = new_temp;
324 /* Create: p = (vectype *) initial_base */
325 vec_stmt = fold_convert (vect_ptr_type, new_temp);
326 vec_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vect_ptr, vec_stmt);
327 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
328 GIMPLE_STMT_OPERAND (vec_stmt, 0) = vect_ptr_init;
329 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
330 gcc_assert (!new_bb);
333 /** (4) Handle the updating of the vector-pointer inside the loop: **/
335 if (only_init) /* No update in loop is required. */
337 /* Copy the points-to information if it exists. */
338 if (DR_PTR_INFO (dr))
339 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
340 return vect_ptr_init;
342 else
344 block_stmt_iterator incr_bsi;
345 bool insert_after;
346 tree indx_before_incr, indx_after_incr;
347 tree incr;
349 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
350 create_iv (vect_ptr_init,
351 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
352 NULL_TREE, loop, &incr_bsi, insert_after,
353 &indx_before_incr, &indx_after_incr);
354 incr = bsi_stmt (incr_bsi);
355 set_stmt_info (stmt_ann (incr),
356 new_stmt_vec_info (incr, loop_vinfo));
358 /* Copy the points-to information if it exists. */
359 if (DR_PTR_INFO (dr))
361 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
362 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
364 merge_alias_info (vect_ptr_init, indx_before_incr);
365 merge_alias_info (vect_ptr_init, indx_after_incr);
366 if (ptr_incr)
367 *ptr_incr = incr;
369 return indx_before_incr;
374 /* Function bump_vector_ptr
376 Increment a pointer (to a vector type) by vector-size. Connect the new
377 increment stmt to the existing def-use update-chain of the pointer.
379 The pointer def-use update-chain before this function:
380 DATAREF_PTR = phi (p_0, p_2)
381 ....
382 PTR_INCR: p_2 = DATAREF_PTR + step
384 The pointer def-use update-chain after this function:
385 DATAREF_PTR = phi (p_0, p_2)
386 ....
387 NEW_DATAREF_PTR = DATAREF_PTR + vector_size
388 ....
389 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
391 Input:
392 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
393 in the loop.
394 PTR_INCR - the stmt that updates the pointer in each iteration of the loop.
395 The increment amount across iterations is also expected to be
396 vector_size.
397 BSI - location where the new update stmt is to be placed.
398 STMT - the original scalar memory-access stmt that is being vectorized.
400 Output: Return NEW_DATAREF_PTR as illustrated above.
404 static tree
405 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
406 tree stmt)
408 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
409 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
410 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
411 tree vptr_type = TREE_TYPE (dataref_ptr);
412 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
413 tree update = fold_convert (vptr_type, TYPE_SIZE_UNIT (vectype));
414 tree incr_stmt;
415 ssa_op_iter iter;
416 use_operand_p use_p;
417 tree new_dataref_ptr;
419 incr_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, ptr_var,
420 build2 (PLUS_EXPR, vptr_type, dataref_ptr, update));
421 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
422 GIMPLE_STMT_OPERAND (incr_stmt, 0) = new_dataref_ptr;
423 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
425 /* Update the vector-pointer's cross-iteration increment. */
426 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
428 tree use = USE_FROM_PTR (use_p);
430 if (use == dataref_ptr)
431 SET_USE (use_p, new_dataref_ptr);
432 else
433 gcc_assert (tree_int_cst_compare (use, update) == 0);
436 /* Copy the points-to information if it exists. */
437 if (DR_PTR_INFO (dr))
438 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
439 merge_alias_info (new_dataref_ptr, dataref_ptr);
441 return new_dataref_ptr;
445 /* Function vect_create_destination_var.
447 Create a new temporary of type VECTYPE. */
449 static tree
450 vect_create_destination_var (tree scalar_dest, tree vectype)
452 tree vec_dest;
453 const char *new_name;
454 tree type;
455 enum vect_var_kind kind;
457 kind = vectype ? vect_simple_var : vect_scalar_var;
458 type = vectype ? vectype : TREE_TYPE (scalar_dest);
460 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
462 new_name = get_name (scalar_dest);
463 if (!new_name)
464 new_name = "var_";
465 vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
466 add_referenced_var (vec_dest);
468 return vec_dest;
472 /* Function vect_init_vector.
474 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
475 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
476 used in the vectorization of STMT. */
478 static tree
479 vect_init_vector (tree stmt, tree vector_var, tree vector_type)
481 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
482 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
483 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
484 tree new_var;
485 tree init_stmt;
486 tree vec_oprnd;
487 edge pe;
488 tree new_temp;
489 basic_block new_bb;
491 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
492 add_referenced_var (new_var);
494 init_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, new_var, vector_var);
495 new_temp = make_ssa_name (new_var, init_stmt);
496 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_temp;
498 pe = loop_preheader_edge (loop);
499 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
500 gcc_assert (!new_bb);
502 if (vect_print_dump_info (REPORT_DETAILS))
504 fprintf (vect_dump, "created new init_stmt: ");
505 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
508 vec_oprnd = GIMPLE_STMT_OPERAND (init_stmt, 0);
509 return vec_oprnd;
513 /* Function vect_get_vec_def_for_operand.
515 OP is an operand in STMT. This function returns a (vector) def that will be
516 used in the vectorized stmt for STMT.
518 In the case that OP is an SSA_NAME which is defined in the loop, then
519 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
521 In case OP is an invariant or constant, a new stmt that creates a vector def
522 needs to be introduced. */
524 static tree
525 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
527 tree vec_oprnd;
528 tree vec_stmt;
529 tree def_stmt;
530 stmt_vec_info def_stmt_info = NULL;
531 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
532 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
533 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
534 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
535 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
536 tree vec_inv;
537 tree vec_cst;
538 tree t = NULL_TREE;
539 tree def;
540 int i;
541 enum vect_def_type dt;
542 bool is_simple_use;
543 tree vector_type;
545 if (vect_print_dump_info (REPORT_DETAILS))
547 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
548 print_generic_expr (vect_dump, op, TDF_SLIM);
551 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
552 gcc_assert (is_simple_use);
553 if (vect_print_dump_info (REPORT_DETAILS))
555 if (def)
557 fprintf (vect_dump, "def = ");
558 print_generic_expr (vect_dump, def, TDF_SLIM);
560 if (def_stmt)
562 fprintf (vect_dump, " def_stmt = ");
563 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
567 switch (dt)
569 /* Case 1: operand is a constant. */
570 case vect_constant_def:
572 if (scalar_def)
573 *scalar_def = op;
575 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
576 if (vect_print_dump_info (REPORT_DETAILS))
577 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
579 for (i = nunits - 1; i >= 0; --i)
581 t = tree_cons (NULL_TREE, op, t);
583 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
584 vec_cst = build_vector (vector_type, t);
586 return vect_init_vector (stmt, vec_cst, vector_type);
589 /* Case 2: operand is defined outside the loop - loop invariant. */
590 case vect_invariant_def:
592 if (scalar_def)
593 *scalar_def = def;
595 /* Create 'vec_inv = {inv,inv,..,inv}' */
596 if (vect_print_dump_info (REPORT_DETAILS))
597 fprintf (vect_dump, "Create vector_inv.");
599 for (i = nunits - 1; i >= 0; --i)
601 t = tree_cons (NULL_TREE, def, t);
604 /* FIXME: use build_constructor directly. */
605 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
606 vec_inv = build_constructor_from_list (vector_type, t);
607 return vect_init_vector (stmt, vec_inv, vector_type);
610 /* Case 3: operand is defined inside the loop. */
611 case vect_loop_def:
613 if (scalar_def)
614 *scalar_def = def_stmt;
616 /* Get the def from the vectorized stmt. */
617 def_stmt_info = vinfo_for_stmt (def_stmt);
618 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
619 gcc_assert (vec_stmt);
620 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
621 return vec_oprnd;
624 /* Case 4: operand is defined by a loop header phi - reduction */
625 case vect_reduction_def:
627 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
629 /* Get the def before the loop */
630 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
631 return get_initial_def_for_reduction (stmt, op, scalar_def);
634 /* Case 5: operand is defined by loop-header phi - induction. */
635 case vect_induction_def:
637 if (vect_print_dump_info (REPORT_DETAILS))
638 fprintf (vect_dump, "induction - unsupported.");
639 internal_error ("no support for induction"); /* FORNOW */
642 default:
643 gcc_unreachable ();
648 /* Function vect_get_vec_def_for_stmt_copy
650 Return a vector-def for an operand. This function is used when the
651 vectorized stmt to be created (by the caller to this function) is a "copy"
652 created in case the vectorized result cannot fit in one vector, and several
653 copies of the vector-stmt are required. In this case the vector-def is
654 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
655 of the stmt that defines VEC_OPRND.
656 DT is the type of the vector def VEC_OPRND.
658 Context:
659 In case the vectorization factor (VF) is bigger than the number
660 of elements that can fit in a vectype (nunits), we have to generate
661 more than one vector stmt to vectorize the scalar stmt. This situation
662 arises when there are multiple data-types operated upon in the loop; the
663 smallest data-type determines the VF, and as a result, when vectorizing
664 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
665 vector stmt (each computing a vector of 'nunits' results, and together
666 computing 'VF' results in each iteration). This function is called when
667 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
668 which VF=16 and nunits=4, so the number of copies required is 4):
670 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
672 S1: x = load VS1.0: vx.0 = memref0 VS1.1
673 VS1.1: vx.1 = memref1 VS1.2
674 VS1.2: vx.2 = memref2 VS1.3
675 VS1.3: vx.3 = memref3
677 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
678 VSnew.1: vz1 = vx.1 + ... VSnew.2
679 VSnew.2: vz2 = vx.2 + ... VSnew.3
680 VSnew.3: vz3 = vx.3 + ...
682 The vectorization of S1 is explained in vectorizable_load.
683 The vectorization of S2:
684 To create the first vector-stmt out of the 4 copies - VSnew.0 -
685 the function 'vect_get_vec_def_for_operand' is called to
686 get the relevant vector-def for each operand of S2. For operand x it
687 returns the vector-def 'vx.0'.
689 To create the remaining copies of the vector-stmt (VSnew.j), this
690 function is called to get the relevant vector-def for each operand. It is
691 obtained from the respective VS1.j stmt, which is recorded in the
692 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
694 For example, to obtain the vector-def 'vx.1' in order to create the
695 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
696 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
697 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
698 and return its def ('vx.1').
699 Overall, to create the above sequence this function will be called 3 times:
700 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
701 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
702 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
704 static tree
705 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
707 tree vec_stmt_for_operand;
708 stmt_vec_info def_stmt_info;
710 if (dt == vect_invariant_def || dt == vect_constant_def)
712 /* Do nothing; can reuse same def. */ ;
713 return vec_oprnd;
716 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
717 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
718 gcc_assert (def_stmt_info);
719 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
720 gcc_assert (vec_stmt_for_operand);
721 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
723 return vec_oprnd;
727 /* Function vect_finish_stmt_generation.
729 Insert a new stmt. */
731 static void
732 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
733 block_stmt_iterator *bsi)
735 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
736 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
738 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
739 set_stmt_info (get_stmt_ann (vec_stmt),
740 new_stmt_vec_info (vec_stmt, loop_vinfo));
742 if (vect_print_dump_info (REPORT_DETAILS))
744 fprintf (vect_dump, "add new stmt: ");
745 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
748 /* Make sure bsi points to the stmt that is being vectorized. */
749 gcc_assert (stmt == bsi_stmt (*bsi));
751 #ifdef USE_MAPPED_LOCATION
752 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
753 #else
754 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
755 #endif
759 #define ADJUST_IN_EPILOG 1
761 /* Function get_initial_def_for_reduction
763 Input:
764 STMT - a stmt that performs a reduction operation in the loop.
765 INIT_VAL - the initial value of the reduction variable
767 Output:
768 SCALAR_DEF - a tree that holds a value to be added to the final result
769 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
770 Return a vector variable, initialized according to the operation that STMT
771 performs. This vector will be used as the initial value of the
772 vector of partial results.
774 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
775 add: [0,0,...,0,0]
776 mult: [1,1,...,1,1]
777 min/max: [init_val,init_val,..,init_val,init_val]
778 bit and/or: [init_val,init_val,..,init_val,init_val]
779 and when necessary (e.g. add/mult case) let the caller know
780 that it needs to adjust the result by init_val.
782 Option2: Initialize the vector as follows:
783 add: [0,0,...,0,init_val]
784 mult: [1,1,...,1,init_val]
785 min/max: [init_val,init_val,...,init_val]
786 bit and/or: [init_val,init_val,...,init_val]
787 and no adjustments are needed.
789 For example, for the following code:
791 s = init_val;
792 for (i=0;i<n;i++)
793 s = s + a[i];
795 STMT is 's = s + a[i]', and the reduction variable is 's'.
796 For a vector of 4 units, we want to return either [0,0,0,init_val],
797 or [0,0,0,0] and let the caller know that it needs to adjust
798 the result at the end by 'init_val'.
800 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
801 TODO: Use some cost-model to estimate which scheme is more profitable.
804 static tree
805 get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
807 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
808 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
809 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
810 int nelements;
811 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
812 tree type = TREE_TYPE (init_val);
813 tree def;
814 tree vec, t = NULL_TREE;
815 bool need_epilog_adjust;
816 int i;
817 tree vector_type;
819 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
821 switch (code)
823 case WIDEN_SUM_EXPR:
824 case DOT_PROD_EXPR:
825 case PLUS_EXPR:
826 if (INTEGRAL_TYPE_P (type))
827 def = build_int_cst (type, 0);
828 else
829 def = build_real (type, dconst0);
831 #ifdef ADJUST_IN_EPILOG
832 /* All the 'nunits' elements are set to 0. The final result will be
833 adjusted by 'init_val' at the loop epilog. */
834 nelements = nunits;
835 need_epilog_adjust = true;
836 #else
837 /* 'nunits - 1' elements are set to 0; The last element is set to
838 'init_val'. No further adjustments at the epilog are needed. */
839 nelements = nunits - 1;
840 need_epilog_adjust = false;
841 #endif
842 break;
844 case MIN_EXPR:
845 case MAX_EXPR:
846 def = init_val;
847 nelements = nunits;
848 need_epilog_adjust = false;
849 break;
851 default:
852 gcc_unreachable ();
855 for (i = nelements - 1; i >= 0; --i)
856 t = tree_cons (NULL_TREE, def, t);
858 if (nelements == nunits - 1)
860 /* Set the last element of the vector. */
861 t = tree_cons (NULL_TREE, init_val, t);
862 nelements += 1;
864 gcc_assert (nelements == nunits);
866 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
867 if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
868 vec = build_vector (vector_type, t);
869 else
870 vec = build_constructor_from_list (vector_type, t);
872 if (!need_epilog_adjust)
873 *scalar_def = NULL_TREE;
874 else
875 *scalar_def = init_val;
877 return vect_init_vector (stmt, vec, vector_type);
881 /* Function vect_create_epilog_for_reduction
883 Create code at the loop-epilog to finalize the result of a reduction
884 computation.
886 VECT_DEF is a vector of partial results.
887 REDUC_CODE is the tree-code for the epilog reduction.
888 STMT is the scalar reduction stmt that is being vectorized.
889 REDUCTION_PHI is the phi-node that carries the reduction computation.
891 This function:
892 1. Creates the reduction def-use cycle: sets the the arguments for
893 REDUCTION_PHI:
894 The loop-entry argument is the vectorized initial-value of the reduction.
895 The loop-latch argument is VECT_DEF - the vector of partial sums.
896 2. "Reduces" the vector of partial results VECT_DEF into a single result,
897 by applying the operation specified by REDUC_CODE if available, or by
898 other means (whole-vector shifts or a scalar loop).
899 The function also creates a new phi node at the loop exit to preserve
900 loop-closed form, as illustrated below.
902 The flow at the entry to this function:
904 loop:
905 vec_def = phi <null, null> # REDUCTION_PHI
906 VECT_DEF = vector_stmt # vectorized form of STMT
907 s_loop = scalar_stmt # (scalar) STMT
908 loop_exit:
909 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
910 use <s_out0>
911 use <s_out0>
913 The above is transformed by this function into:
915 loop:
916 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
917 VECT_DEF = vector_stmt # vectorized form of STMT
918 s_loop = scalar_stmt # (scalar) STMT
919 loop_exit:
920 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
921 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
922 v_out2 = reduce <v_out1>
923 s_out3 = extract_field <v_out2, 0>
924 s_out4 = adjust_result <s_out3>
925 use <s_out4>
926 use <s_out4>
929 static void
930 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
931 enum tree_code reduc_code, tree reduction_phi)
933 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
934 tree vectype;
935 enum machine_mode mode;
936 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
937 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
938 basic_block exit_bb;
939 tree scalar_dest;
940 tree scalar_type;
941 tree new_phi;
942 block_stmt_iterator exit_bsi;
943 tree vec_dest;
944 tree new_temp;
945 tree new_name;
946 tree epilog_stmt;
947 tree new_scalar_dest, exit_phi;
948 tree bitsize, bitpos, bytesize;
949 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
950 tree scalar_initial_def;
951 tree vec_initial_def;
952 tree orig_name;
953 imm_use_iterator imm_iter;
954 use_operand_p use_p;
955 bool extract_scalar_result;
956 tree reduction_op;
957 tree orig_stmt;
958 tree use_stmt;
959 tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
960 int op_type;
962 op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
963 reduction_op = TREE_OPERAND (operation, op_type-1);
964 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
965 mode = TYPE_MODE (vectype);
967 /*** 1. Create the reduction def-use cycle ***/
969 /* 1.1 set the loop-entry arg of the reduction-phi: */
970 /* For the case of reduction, vect_get_vec_def_for_operand returns
971 the scalar def before the loop, that defines the initial value
972 of the reduction variable. */
973 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
974 &scalar_initial_def);
975 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
977 /* 1.2 set the loop-latch arg for the reduction-phi: */
978 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
980 if (vect_print_dump_info (REPORT_DETAILS))
982 fprintf (vect_dump, "transform reduction: created def-use cycle:");
983 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
984 fprintf (vect_dump, "\n");
985 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
989 /*** 2. Create epilog code
990 The reduction epilog code operates across the elements of the vector
991 of partial results computed by the vectorized loop.
992 The reduction epilog code consists of:
993 step 1: compute the scalar result in a vector (v_out2)
994 step 2: extract the scalar result (s_out3) from the vector (v_out2)
995 step 3: adjust the scalar result (s_out3) if needed.
997 Step 1 can be accomplished using one the following three schemes:
998 (scheme 1) using reduc_code, if available.
999 (scheme 2) using whole-vector shifts, if available.
1000 (scheme 3) using a scalar loop. In this case steps 1+2 above are
1001 combined.
1003 The overall epilog code looks like this:
1005 s_out0 = phi <s_loop> # original EXIT_PHI
1006 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1007 v_out2 = reduce <v_out1> # step 1
1008 s_out3 = extract_field <v_out2, 0> # step 2
1009 s_out4 = adjust_result <s_out3> # step 3
1011 (step 3 is optional, and step2 1 and 2 may be combined).
1012 Lastly, the uses of s_out0 are replaced by s_out4.
1014 ***/
1016 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
1017 v_out1 = phi <v_loop> */
1019 exit_bb = single_exit (loop)->dest;
1020 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
1021 SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def);
1022 exit_bsi = bsi_start (exit_bb);
1024 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
1025 (i.e. when reduc_code is not available) and in the final adjustment code
1026 (if needed). Also get the original scalar reduction variable as
1027 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
1028 represents a reduction pattern), the tree-code and scalar-def are
1029 taken from the original stmt that the pattern-stmt (STMT) replaces.
1030 Otherwise (it is a regular reduction) - the tree-code and scalar-def
1031 are taken from STMT. */
1033 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1034 if (!orig_stmt)
1036 /* Regular reduction */
1037 orig_stmt = stmt;
1039 else
1041 /* Reduction pattern */
1042 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
1043 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1044 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
1046 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
1047 scalar_dest = GIMPLE_STMT_OPERAND (orig_stmt, 0);
1048 scalar_type = TREE_TYPE (scalar_dest);
1049 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
1050 bitsize = TYPE_SIZE (scalar_type);
1051 bytesize = TYPE_SIZE_UNIT (scalar_type);
1053 /* 2.3 Create the reduction code, using one of the three schemes described
1054 above. */
1056 if (reduc_code < NUM_TREE_CODES)
1058 /*** Case 1: Create:
1059 v_out2 = reduc_expr <v_out1> */
1061 if (vect_print_dump_info (REPORT_DETAILS))
1062 fprintf (vect_dump, "Reduce using direct vector reduction.");
1064 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1065 epilog_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest,
1066 build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
1067 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1068 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1069 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1071 extract_scalar_result = true;
1073 else
1075 enum tree_code shift_code = 0;
1076 bool have_whole_vector_shift = true;
1077 int bit_offset;
1078 int element_bitsize = tree_low_cst (bitsize, 1);
1079 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1080 tree vec_temp;
1082 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1083 shift_code = VEC_RSHIFT_EXPR;
1084 else
1085 have_whole_vector_shift = false;
1087 /* Regardless of whether we have a whole vector shift, if we're
1088 emulating the operation via tree-vect-generic, we don't want
1089 to use it. Only the first round of the reduction is likely
1090 to still be profitable via emulation. */
1091 /* ??? It might be better to emit a reduction tree code here, so that
1092 tree-vect-generic can expand the first round via bit tricks. */
1093 if (!VECTOR_MODE_P (mode))
1094 have_whole_vector_shift = false;
1095 else
1097 optab optab = optab_for_tree_code (code, vectype);
1098 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
1099 have_whole_vector_shift = false;
1102 if (have_whole_vector_shift)
1104 /*** Case 2: Create:
1105 for (offset = VS/2; offset >= element_size; offset/=2)
1107 Create: va' = vec_shift <va, offset>
1108 Create: va = vop <va, va'>
1109 } */
1111 if (vect_print_dump_info (REPORT_DETAILS))
1112 fprintf (vect_dump, "Reduce using vector shifts");
1114 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1115 new_temp = PHI_RESULT (new_phi);
1117 for (bit_offset = vec_size_in_bits/2;
1118 bit_offset >= element_bitsize;
1119 bit_offset /= 2)
1121 tree bitpos = size_int (bit_offset);
1123 epilog_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
1124 vec_dest,
1125 build2 (shift_code, vectype,
1126 new_temp, bitpos));
1127 new_name = make_ssa_name (vec_dest, epilog_stmt);
1128 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1129 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1131 epilog_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
1132 vec_dest,
1133 build2 (code, vectype,
1134 new_name, new_temp));
1135 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1136 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1137 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1140 extract_scalar_result = true;
1142 else
1144 tree rhs;
1146 /*** Case 3: Create:
1147 s = extract_field <v_out2, 0>
1148 for (offset = element_size;
1149 offset < vector_size;
1150 offset += element_size;)
1152 Create: s' = extract_field <v_out2, offset>
1153 Create: s = op <s, s'>
1154 } */
1156 if (vect_print_dump_info (REPORT_DETAILS))
1157 fprintf (vect_dump, "Reduce using scalar code. ");
1159 vec_temp = PHI_RESULT (new_phi);
1160 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1161 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1162 bitsize_zero_node);
1163 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1164 epilog_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
1165 new_scalar_dest, rhs);
1166 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1167 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1168 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1170 for (bit_offset = element_bitsize;
1171 bit_offset < vec_size_in_bits;
1172 bit_offset += element_bitsize)
1174 tree bitpos = bitsize_int (bit_offset);
1175 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1176 bitpos);
1178 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1179 epilog_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
1180 new_scalar_dest, rhs);
1181 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1182 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1183 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1185 epilog_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
1186 new_scalar_dest,
1187 build2 (code, scalar_type, new_name, new_temp));
1188 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1189 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1190 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1193 extract_scalar_result = false;
1197 /* 2.4 Extract the final scalar result. Create:
1198 s_out3 = extract_field <v_out2, bitpos> */
1200 if (extract_scalar_result)
1202 tree rhs;
1204 if (vect_print_dump_info (REPORT_DETAILS))
1205 fprintf (vect_dump, "extract scalar result");
1207 if (BYTES_BIG_ENDIAN)
1208 bitpos = size_binop (MULT_EXPR,
1209 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1210 TYPE_SIZE (scalar_type));
1211 else
1212 bitpos = bitsize_zero_node;
1214 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1215 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1216 epilog_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
1217 new_scalar_dest, rhs);
1218 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1219 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1220 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1223 /* 2.4 Adjust the final result by the initial value of the reduction
1224 variable. (When such adjustment is not needed, then
1225 'scalar_initial_def' is zero).
1227 Create:
1228 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1230 if (scalar_initial_def)
1232 epilog_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
1233 new_scalar_dest,
1234 build2 (code, scalar_type, new_temp, scalar_initial_def));
1235 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1236 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1237 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1240 /* 2.6 Replace uses of s_out0 with uses of s_out3 */
1242 /* Find the loop-closed-use at the loop exit of the original scalar result.
1243 (The reduction result is expected to have two immediate uses - one at the
1244 latch block, and one at the loop exit). */
1245 exit_phi = NULL;
1246 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1248 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1250 exit_phi = USE_STMT (use_p);
1251 break;
1254 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1255 gcc_assert (exit_phi);
1256 /* Replace the uses: */
1257 orig_name = PHI_RESULT (exit_phi);
1258 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1259 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1260 SET_USE (use_p, new_temp);
1264 /* Function vectorizable_reduction.
1266 Check if STMT performs a reduction operation that can be vectorized.
1267 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1268 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1269 Return FALSE if not a vectorizable STMT, TRUE otherwise.
1271 This function also handles reduction idioms (patterns) that have been
1272 recognized in advance during vect_pattern_recog. In this case, STMT may be
1273 of this form:
1274 X = pattern_expr (arg0, arg1, ..., X)
1275 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1276 sequence that had been detected and replaced by the pattern-stmt (STMT).
1278 In some cases of reduction patterns, the type of the reduction variable X is
1279 different than the type of the other arguments of STMT.
1280 In such cases, the vectype that is used when transforming STMT into a vector
1281 stmt is different than the vectype that is used to determine the
1282 vectorization factor, because it consists of a different number of elements
1283 than the actual number of elements that are being operated upon in parallel.
1285 For example, consider an accumulation of shorts into an int accumulator.
1286 On some targets it's possible to vectorize this pattern operating on 8
1287 shorts at a time (hence, the vectype for purposes of determining the
1288 vectorization factor should be V8HI); on the other hand, the vectype that
1289 is used to create the vector form is actually V4SI (the type of the result).
1291 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1292 indicates what is the actual level of parallelism (V8HI in the example), so
1293 that the right vectorization factor would be derived. This vectype
1294 corresponds to the type of arguments to the reduction stmt, and should *NOT*
1295 be used to create the vectorized stmt. The right vectype for the vectorized
1296 stmt is obtained from the type of the result X:
1297 get_vectype_for_scalar_type (TREE_TYPE (X))
1299 This means that, contrary to "regular" reductions (or "regular" stmts in
1300 general), the following equation:
1301 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1302 does *NOT* necessarily hold for reduction patterns. */
1304 bool
1305 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1307 tree vec_dest;
1308 tree scalar_dest;
1309 tree op;
1310 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
1311 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1312 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1313 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1314 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1315 tree operation;
1316 enum tree_code code, orig_code, epilog_reduc_code = 0;
1317 enum machine_mode vec_mode;
1318 int op_type;
1319 optab optab, reduc_optab;
1320 tree new_temp = NULL_TREE;
1321 tree def, def_stmt;
1322 enum vect_def_type dt;
1323 tree new_phi;
1324 tree scalar_type;
1325 bool is_simple_use;
1326 tree orig_stmt;
1327 stmt_vec_info orig_stmt_info;
1328 tree expr = NULL_TREE;
1329 int i;
1330 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1331 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1332 stmt_vec_info prev_stmt_info;
1333 tree reduc_def;
1334 tree new_stmt = NULL_TREE;
1335 int j;
1337 gcc_assert (ncopies >= 1);
1339 /* 1. Is vectorizable reduction? */
1341 /* Not supportable if the reduction variable is used in the loop. */
1342 if (STMT_VINFO_RELEVANT_P (stmt_info))
1343 return false;
1345 if (!STMT_VINFO_LIVE_P (stmt_info))
1346 return false;
1348 /* Make sure it was already recognized as a reduction computation. */
1349 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1350 return false;
1352 /* 2. Has this been recognized as a reduction pattern?
1354 Check if STMT represents a pattern that has been recognized
1355 in earlier analysis stages. For stmts that represent a pattern,
1356 the STMT_VINFO_RELATED_STMT field records the last stmt in
1357 the original sequence that constitutes the pattern. */
1359 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1360 if (orig_stmt)
1362 orig_stmt_info = vinfo_for_stmt (orig_stmt);
1363 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
1364 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
1365 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
1368 /* 3. Check the operands of the operation. The first operands are defined
1369 inside the loop body. The last operand is the reduction variable,
1370 which is defined by the loop-header-phi. */
1372 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1374 operation = GIMPLE_STMT_OPERAND (stmt, 1);
1375 code = TREE_CODE (operation);
1376 op_type = TREE_CODE_LENGTH (code);
1377 if (op_type != binary_op && op_type != ternary_op)
1378 return false;
1379 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
1380 scalar_type = TREE_TYPE (scalar_dest);
1382 /* All uses but the last are expected to be defined in the loop.
1383 The last use is the reduction variable. */
1384 for (i = 0; i < op_type-1; i++)
1386 op = TREE_OPERAND (operation, i);
1387 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1388 gcc_assert (is_simple_use);
1389 gcc_assert (dt == vect_loop_def || dt == vect_invariant_def ||
1390 dt == vect_constant_def);
1393 op = TREE_OPERAND (operation, i);
1394 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1395 gcc_assert (is_simple_use);
1396 gcc_assert (dt == vect_reduction_def);
1397 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1398 if (orig_stmt)
1399 gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
1400 else
1401 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
1403 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
1404 return false;
1406 /* 4. Supportable by target? */
1408 /* 4.1. check support for the operation in the loop */
1409 optab = optab_for_tree_code (code, vectype);
1410 if (!optab)
1412 if (vect_print_dump_info (REPORT_DETAILS))
1413 fprintf (vect_dump, "no optab.");
1414 return false;
1416 vec_mode = TYPE_MODE (vectype);
1417 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1419 if (vect_print_dump_info (REPORT_DETAILS))
1420 fprintf (vect_dump, "op not supported by target.");
1421 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1422 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1423 < vect_min_worthwhile_factor (code))
1424 return false;
1425 if (vect_print_dump_info (REPORT_DETAILS))
1426 fprintf (vect_dump, "proceeding using word mode.");
1429 /* Worthwhile without SIMD support? */
1430 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1431 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1432 < vect_min_worthwhile_factor (code))
1434 if (vect_print_dump_info (REPORT_DETAILS))
1435 fprintf (vect_dump, "not worthwhile without SIMD support.");
1436 return false;
1439 /* 4.2. Check support for the epilog operation.
1441 If STMT represents a reduction pattern, then the type of the
1442 reduction variable may be different than the type of the rest
1443 of the arguments. For example, consider the case of accumulation
1444 of shorts into an int accumulator; The original code:
1445 S1: int_a = (int) short_a;
1446 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
1448 was replaced with:
1449 STMT: int_acc = widen_sum <short_a, int_acc>
1451 This means that:
1452 1. The tree-code that is used to create the vector operation in the
1453 epilog code (that reduces the partial results) is not the
1454 tree-code of STMT, but is rather the tree-code of the original
1455 stmt from the pattern that STMT is replacing. I.e, in the example
1456 above we want to use 'widen_sum' in the loop, but 'plus' in the
1457 epilog.
1458 2. The type (mode) we use to check available target support
1459 for the vector operation to be created in the *epilog*, is
1460 determined by the type of the reduction variable (in the example
1461 above we'd check this: plus_optab[vect_int_mode]).
1462 However the type (mode) we use to check available target support
1463 for the vector operation to be created *inside the loop*, is
1464 determined by the type of the other arguments to STMT (in the
1465 example we'd check this: widen_sum_optab[vect_short_mode]).
1467 This is contrary to "regular" reductions, in which the types of all
1468 the arguments are the same as the type of the reduction variable.
1469 For "regular" reductions we can therefore use the same vector type
1470 (and also the same tree-code) when generating the epilog code and
1471 when generating the code inside the loop. */
1473 if (orig_stmt)
1475 /* This is a reduction pattern: get the vectype from the type of the
1476 reduction variable, and get the tree-code from orig_stmt. */
1477 orig_code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
1478 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
1479 vec_mode = TYPE_MODE (vectype);
1481 else
1483 /* Regular reduction: use the same vectype and tree-code as used for
1484 the vector code inside the loop can be used for the epilog code. */
1485 orig_code = code;
1488 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
1489 return false;
1490 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
1491 if (!reduc_optab)
1493 if (vect_print_dump_info (REPORT_DETAILS))
1494 fprintf (vect_dump, "no optab for reduction.");
1495 epilog_reduc_code = NUM_TREE_CODES;
1497 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1499 if (vect_print_dump_info (REPORT_DETAILS))
1500 fprintf (vect_dump, "reduc op not supported by target.");
1501 epilog_reduc_code = NUM_TREE_CODES;
1504 if (!vec_stmt) /* transformation not required. */
1506 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1507 return true;
1510 /** Transform. **/
1512 if (vect_print_dump_info (REPORT_DETAILS))
1513 fprintf (vect_dump, "transform reduction.");
1515 /* Create the destination vector */
1516 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1518 /* Create the reduction-phi that defines the reduction-operand. */
1519 new_phi = create_phi_node (vec_dest, loop->header);
1521 /* In case the vectorization factor (VF) is bigger than the number
1522 of elements that we can fit in a vectype (nunits), we have to generate
1523 more than one vector stmt - i.e - we need to "unroll" the
1524 vector stmt by a factor VF/nunits. For more details see documentation
1525 in vectorizable_operation. */
1527 prev_stmt_info = NULL;
1528 for (j = 0; j < ncopies; j++)
1530 /* Handle uses. */
1531 if (j == 0)
1533 op = TREE_OPERAND (operation, 0);
1534 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
1535 if (op_type == ternary_op)
1537 op = TREE_OPERAND (operation, 1);
1538 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1541 /* Get the vector def for the reduction variable from the phi node */
1542 reduc_def = PHI_RESULT (new_phi);
1544 else
1546 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
1547 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
1548 if (op_type == ternary_op)
1549 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
1551 /* Get the vector def for the reduction variable from the vectorized
1552 reduction operation generated in the previous iteration (j-1) */
1553 reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
1556 /* Arguments are ready. create the new vector stmt. */
1558 if (op_type == binary_op)
1559 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
1560 else
1561 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
1562 reduc_def);
1563 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest, expr);
1564 new_temp = make_ssa_name (vec_dest, new_stmt);
1565 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
1566 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1568 if (j == 0)
1569 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1570 else
1571 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1572 prev_stmt_info = vinfo_for_stmt (new_stmt);
1575 /* Finalize the reduction-phi (set it's arguments) and create the
1576 epilog reduction code. */
1577 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
1578 return true;
1581 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1582 true if the target has a vectorized version of the function,
1583 or false if the function cannot be vectorized. */
1585 bool
1586 vectorizable_function (tree call, tree vectype)
1588 tree fndecl = get_callee_fndecl (call);
1590 /* We only handle functions that do not read or clobber memory -- i.e.
1591 const or novops ones. */
1592 if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1593 return false;
1595 if (!fndecl
1596 || TREE_CODE (fndecl) != FUNCTION_DECL
1597 || !DECL_BUILT_IN (fndecl))
1598 return false;
1600 if (targetm.vectorize.builtin_vectorized_function (DECL_FUNCTION_CODE (fndecl), vectype))
1601 return true;
1603 return false;
1606 /* Returns an expression that performs a call to vectorized version
1607 of FNDECL in type VECTYPE, with the arguments given by ARGS.
1608 If extra statements need to be generated, they are inserted
1609 before BSI. */
1611 static tree
1612 build_vectorized_function_call (tree fndecl,
1613 tree vectype, tree args)
1615 tree vfndecl;
1616 enum built_in_function code = DECL_FUNCTION_CODE (fndecl);
1618 /* The target specific builtin should be available. */
1619 vfndecl = targetm.vectorize.builtin_vectorized_function (code, vectype);
1620 gcc_assert (vfndecl != NULL_TREE);
1622 return build_function_call_expr (vfndecl, args);
1625 /* Function vectorizable_call.
1627 Check if STMT performs a function call that can be vectorized.
1628 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1629 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1630 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1632 bool
1633 vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1635 tree vec_dest;
1636 tree scalar_dest;
1637 tree operation;
1638 tree op, args, type;
1639 tree vec_oprnd, vargs, *pvargs_end;
1640 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1641 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1642 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1643 tree fndecl, rhs, new_temp, def, def_stmt;
1644 enum vect_def_type dt;
1646 /* Is STMT a vectorizable call? */
1647 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1648 return false;
1650 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
1651 return false;
1653 operation = GIMPLE_STMT_OPERAND (stmt, 1);
1654 if (TREE_CODE (operation) != CALL_EXPR)
1655 return false;
1657 /* For now, we only vectorize functions if a target specific builtin
1658 is available. TODO -- in some cases, it might be profitable to
1659 insert the calls for pieces of the vector, in order to be able
1660 to vectorize other operations in the loop. */
1661 if (!vectorizable_function (operation, vectype))
1663 if (vect_print_dump_info (REPORT_DETAILS))
1664 fprintf (vect_dump, "function is not vectorizable.");
1666 return false;
1668 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
1670 for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
1672 op = TREE_VALUE (args);
1674 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1676 if (vect_print_dump_info (REPORT_DETAILS))
1677 fprintf (vect_dump, "use not simple.");
1678 return false;
1682 if (!vec_stmt) /* transformation not required. */
1684 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1685 return true;
1688 /** Transform. **/
1690 if (vect_print_dump_info (REPORT_DETAILS))
1691 fprintf (vect_dump, "transform operation.");
1693 /* Handle def. */
1694 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
1695 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1697 /* Handle uses. */
1698 vargs = NULL_TREE;
1699 pvargs_end = &vargs;
1700 for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
1702 op = TREE_VALUE (args);
1703 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1705 *pvargs_end = tree_cons (NULL_TREE, vec_oprnd, NULL_TREE);
1706 pvargs_end = &TREE_CHAIN (*pvargs_end);
1709 fndecl = get_callee_fndecl (operation);
1710 rhs = build_vectorized_function_call (fndecl, vectype, vargs);
1711 *vec_stmt = build2 (GIMPLE_MODIFY_STMT, vectype, vec_dest, rhs);
1712 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1713 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
1715 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1717 /* The call in STMT might prevent it from being removed in dce. We however
1718 cannot remove it here, due to the way the ssa name it defines is mapped
1719 to the new definition. So just replace rhs of the statement with something
1720 harmless. */
1721 type = TREE_TYPE (scalar_dest);
1722 GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
1724 return true;
1728 /* Function vectorizable_assignment.
1730 Check if STMT performs an assignment (copy) that can be vectorized.
1731 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1732 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1733 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1735 bool
1736 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1738 tree vec_dest;
1739 tree scalar_dest;
1740 tree op;
1741 tree vec_oprnd;
1742 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1743 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1744 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1745 tree new_temp;
1746 tree def, def_stmt;
1747 enum vect_def_type dt;
1748 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1749 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1751 gcc_assert (ncopies >= 1);
1752 if (ncopies > 1)
1753 return false; /* FORNOW */
1755 /* Is vectorizable assignment? */
1756 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1757 return false;
1759 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1761 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1762 return false;
1764 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
1765 if (TREE_CODE (scalar_dest) != SSA_NAME)
1766 return false;
1768 op = GIMPLE_STMT_OPERAND (stmt, 1);
1769 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1771 if (vect_print_dump_info (REPORT_DETAILS))
1772 fprintf (vect_dump, "use not simple.");
1773 return false;
1776 if (!vec_stmt) /* transformation not required. */
1778 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1779 return true;
1782 /** Transform. **/
1783 if (vect_print_dump_info (REPORT_DETAILS))
1784 fprintf (vect_dump, "transform assignment.");
1786 /* Handle def. */
1787 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1789 /* Handle use. */
1790 op = GIMPLE_STMT_OPERAND (stmt, 1);
1791 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1793 /* Arguments are ready. create the new vector stmt. */
1794 *vec_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest, vec_oprnd);
1795 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1796 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
1797 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1799 return true;
1803 /* Function vect_min_worthwhile_factor.
1805 For a loop where we could vectorize the operation indicated by CODE,
1806 return the minimum vectorization factor that makes it worthwhile
1807 to use generic vectors. */
1808 static int
1809 vect_min_worthwhile_factor (enum tree_code code)
1811 switch (code)
1813 case PLUS_EXPR:
1814 case MINUS_EXPR:
1815 case NEGATE_EXPR:
1816 return 4;
1818 case BIT_AND_EXPR:
1819 case BIT_IOR_EXPR:
1820 case BIT_XOR_EXPR:
1821 case BIT_NOT_EXPR:
1822 return 2;
1824 default:
1825 return INT_MAX;
1830 /* Function vectorizable_operation.
1832 Check if STMT performs a binary or unary operation that can be vectorized.
1833 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1834 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1835 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1837 bool
1838 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1840 tree vec_dest;
1841 tree scalar_dest;
1842 tree operation;
1843 tree op0, op1 = NULL;
1844 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1845 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1846 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1847 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1848 enum tree_code code;
1849 enum machine_mode vec_mode;
1850 tree new_temp;
1851 int op_type;
1852 optab optab;
1853 int icode;
1854 enum machine_mode optab_op2_mode;
1855 tree def, def_stmt;
1856 enum vect_def_type dt0, dt1;
1857 tree new_stmt;
1858 stmt_vec_info prev_stmt_info;
1859 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
1860 int nunits_out;
1861 tree vectype_out;
1862 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1863 int j;
1865 gcc_assert (ncopies >= 1);
1867 /* Is STMT a vectorizable binary/unary operation? */
1868 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1869 return false;
1871 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1873 if (STMT_VINFO_LIVE_P (stmt_info))
1875 /* FORNOW: not yet supported. */
1876 if (vect_print_dump_info (REPORT_DETAILS))
1877 fprintf (vect_dump, "value used after loop.");
1878 return false;
1881 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1882 return false;
1884 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
1885 return false;
1887 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
1888 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
1889 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1890 if (nunits_out != nunits_in)
1891 return false;
1893 operation = GIMPLE_STMT_OPERAND (stmt, 1);
1894 code = TREE_CODE (operation);
1895 optab = optab_for_tree_code (code, vectype);
1897 /* Support only unary or binary operations. */
1898 op_type = TREE_CODE_LENGTH (code);
1899 if (op_type != unary_op && op_type != binary_op)
1901 if (vect_print_dump_info (REPORT_DETAILS))
1902 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1903 return false;
1906 op0 = TREE_OPERAND (operation, 0);
1907 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
1909 if (vect_print_dump_info (REPORT_DETAILS))
1910 fprintf (vect_dump, "use not simple.");
1911 return false;
1914 if (op_type == binary_op)
1916 op1 = TREE_OPERAND (operation, 1);
1917 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
1919 if (vect_print_dump_info (REPORT_DETAILS))
1920 fprintf (vect_dump, "use not simple.");
1921 return false;
1925 /* Supportable by target? */
1926 if (!optab)
1928 if (vect_print_dump_info (REPORT_DETAILS))
1929 fprintf (vect_dump, "no optab.");
1930 return false;
1932 vec_mode = TYPE_MODE (vectype);
1933 icode = (int) optab->handlers[(int) vec_mode].insn_code;
1934 if (icode == CODE_FOR_nothing)
1936 if (vect_print_dump_info (REPORT_DETAILS))
1937 fprintf (vect_dump, "op not supported by target.");
1938 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1939 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1940 < vect_min_worthwhile_factor (code))
1941 return false;
1942 if (vect_print_dump_info (REPORT_DETAILS))
1943 fprintf (vect_dump, "proceeding using word mode.");
1946 /* Worthwhile without SIMD support? */
1947 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1948 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1949 < vect_min_worthwhile_factor (code))
1951 if (vect_print_dump_info (REPORT_DETAILS))
1952 fprintf (vect_dump, "not worthwhile without SIMD support.");
1953 return false;
1956 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1958 /* FORNOW: not yet supported. */
1959 if (!VECTOR_MODE_P (vec_mode))
1960 return false;
1962 /* Invariant argument is needed for a vector shift
1963 by a scalar shift operand. */
1964 optab_op2_mode = insn_data[icode].operand[2].mode;
1965 if (! (VECTOR_MODE_P (optab_op2_mode)
1966 || dt1 == vect_constant_def
1967 || dt1 == vect_invariant_def))
1969 if (vect_print_dump_info (REPORT_DETAILS))
1970 fprintf (vect_dump, "operand mode requires invariant argument.");
1971 return false;
1975 if (!vec_stmt) /* transformation not required. */
1977 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1978 return true;
1981 /** Transform. **/
1983 if (vect_print_dump_info (REPORT_DETAILS))
1984 fprintf (vect_dump, "transform binary/unary operation.");
1986 /* Handle def. */
1987 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1989 /* In case the vectorization factor (VF) is bigger than the number
1990 of elements that we can fit in a vectype (nunits), we have to generate
1991 more than one vector stmt - i.e - we need to "unroll" the
1992 vector stmt by a factor VF/nunits. In doing so, we record a pointer
1993 from one copy of the vector stmt to the next, in the field
1994 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
1995 stages to find the correct vector defs to be used when vectorizing
1996 stmts that use the defs of the current stmt. The example below illustrates
1997 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
1998 4 vectorized stmts):
2000 before vectorization:
2001 RELATED_STMT VEC_STMT
2002 S1: x = memref - -
2003 S2: z = x + 1 - -
2005 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2006 there):
2007 RELATED_STMT VEC_STMT
2008 VS1_0: vx0 = memref0 VS1_1 -
2009 VS1_1: vx1 = memref1 VS1_2 -
2010 VS1_2: vx2 = memref2 VS1_3 -
2011 VS1_3: vx3 = memref3 - -
2012 S1: x = load - VS1_0
2013 S2: z = x + 1 - -
2015 step2: vectorize stmt S2 (done here):
2016 To vectorize stmt S2 we first need to find the relevant vector
2017 def for the first operand 'x'. This is, as usual, obtained from
2018 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2019 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2020 relevant vector def 'vx0'. Having found 'vx0' we can generate
2021 the vector stmt VS2_0, and as usual, record it in the
2022 STMT_VINFO_VEC_STMT of stmt S2.
2023 When creating the second copy (VS2_1), we obtain the relevant vector
2024 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2025 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2026 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2027 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2028 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2029 chain of stmts and pointers:
2030 RELATED_STMT VEC_STMT
2031 VS1_0: vx0 = memref0 VS1_1 -
2032 VS1_1: vx1 = memref1 VS1_2 -
2033 VS1_2: vx2 = memref2 VS1_3 -
2034 VS1_3: vx3 = memref3 - -
2035 S1: x = load - VS1_0
2036 VS2_0: vz0 = vx0 + v1 VS2_1 -
2037 VS2_1: vz1 = vx1 + v1 VS2_2 -
2038 VS2_2: vz2 = vx2 + v1 VS2_3 -
2039 VS2_3: vz3 = vx3 + v1 - -
2040 S2: z = x + 1 - VS2_0 */
2042 prev_stmt_info = NULL;
2043 for (j = 0; j < ncopies; j++)
2045 /* Handle uses. */
2046 if (j == 0)
2048 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2049 if (op_type == binary_op)
2051 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
2053 /* Vector shl and shr insn patterns can be defined with
2054 scalar operand 2 (shift operand). In this case, use
2055 constant or loop invariant op1 directly, without
2056 extending it to vector mode first. */
2057 optab_op2_mode = insn_data[icode].operand[2].mode;
2058 if (!VECTOR_MODE_P (optab_op2_mode))
2060 if (vect_print_dump_info (REPORT_DETAILS))
2061 fprintf (vect_dump, "operand 1 using scalar mode.");
2062 vec_oprnd1 = op1;
2065 if (!vec_oprnd1)
2066 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2069 else
2071 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2072 if (op_type == binary_op)
2073 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
2076 /* Arguments are ready. create the new vector stmt. */
2078 if (op_type == binary_op)
2079 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest,
2080 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2081 else
2082 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest,
2083 build1 (code, vectype, vec_oprnd0));
2084 new_temp = make_ssa_name (vec_dest, new_stmt);
2085 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2086 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2088 if (j == 0)
2089 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2090 else
2091 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2092 prev_stmt_info = vinfo_for_stmt (new_stmt);
2095 return true;
2099 /* Function vectorizable_type_demotion
2101 Check if STMT performs a binary or unary operation that involves
2102 type demotion, and if it can be vectorized.
2103 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2104 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2105 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2107 bool
2108 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
2109 tree *vec_stmt)
2111 tree vec_dest;
2112 tree scalar_dest;
2113 tree operation;
2114 tree op0;
2115 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2116 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2117 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2118 enum tree_code code;
2119 tree new_temp;
2120 tree def, def_stmt;
2121 enum vect_def_type dt0;
2122 tree new_stmt;
2123 stmt_vec_info prev_stmt_info;
2124 int nunits_in;
2125 int nunits_out;
2126 tree vectype_out;
2127 int ncopies;
2128 int j;
2129 tree expr;
2130 tree vectype_in;
2131 tree scalar_type;
2132 optab optab;
2133 enum machine_mode vec_mode;
2135 /* Is STMT a vectorizable type-demotion operation? */
2137 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2138 return false;
2140 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2142 if (STMT_VINFO_LIVE_P (stmt_info))
2144 /* FORNOW: not yet supported. */
2145 if (vect_print_dump_info (REPORT_DETAILS))
2146 fprintf (vect_dump, "value used after loop.");
2147 return false;
2150 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2151 return false;
2153 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2154 return false;
2156 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2157 code = TREE_CODE (operation);
2158 if (code != NOP_EXPR && code != CONVERT_EXPR)
2159 return false;
2161 op0 = TREE_OPERAND (operation, 0);
2162 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2163 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2165 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2166 scalar_type = TREE_TYPE (scalar_dest);
2167 vectype_out = get_vectype_for_scalar_type (scalar_type);
2168 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2169 if (nunits_in != nunits_out / 2) /* FORNOW */
2170 return false;
2172 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2173 gcc_assert (ncopies >= 1);
2175 if (! INTEGRAL_TYPE_P (scalar_type)
2176 || !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2177 return false;
2179 /* Check the operands of the operation. */
2180 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2182 if (vect_print_dump_info (REPORT_DETAILS))
2183 fprintf (vect_dump, "use not simple.");
2184 return false;
2187 /* Supportable by target? */
2188 code = VEC_PACK_MOD_EXPR;
2189 optab = optab_for_tree_code (VEC_PACK_MOD_EXPR, vectype_in);
2190 if (!optab)
2191 return false;
2193 vec_mode = TYPE_MODE (vectype_in);
2194 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2195 return false;
2197 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2199 if (!vec_stmt) /* transformation not required. */
2201 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2202 return true;
2205 /** Transform. **/
2207 if (vect_print_dump_info (REPORT_DETAILS))
2208 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2209 ncopies);
2211 /* Handle def. */
2212 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2214 /* In case the vectorization factor (VF) is bigger than the number
2215 of elements that we can fit in a vectype (nunits), we have to generate
2216 more than one vector stmt - i.e - we need to "unroll" the
2217 vector stmt by a factor VF/nunits. */
2218 prev_stmt_info = NULL;
2219 for (j = 0; j < ncopies; j++)
2221 /* Handle uses. */
2222 if (j == 0)
2224 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2225 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2226 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd0);
2228 else
2230 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
2231 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2234 /* Arguments are ready. Create the new vector stmt. */
2235 expr = build2 (code, vectype_out, vec_oprnd0, vec_oprnd1);
2236 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest, expr);
2237 new_temp = make_ssa_name (vec_dest, new_stmt);
2238 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2239 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2241 if (j == 0)
2242 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2243 else
2244 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2246 prev_stmt_info = vinfo_for_stmt (new_stmt);
2249 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2250 return true;
2254 /* Function vect_gen_widened_results_half
2256 Create a vector stmt whose code, type, number of arguments, and result
2257 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
2258 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2259 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2260 needs to be created (DECL is a function-decl of a target-builtin).
2261 STMT is the original scalar stmt that we are vectorizing. */
2263 static tree
2264 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
2265 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2266 tree vec_dest, block_stmt_iterator *bsi,
2267 tree stmt)
2269 tree vec_params;
2270 tree expr;
2271 tree new_stmt;
2272 tree new_temp;
2273 tree sym;
2274 ssa_op_iter iter;
2276 /* Generate half of the widened result: */
2277 if (code == CALL_EXPR)
2279 /* Target specific support */
2280 vec_params = build_tree_list (NULL_TREE, vec_oprnd0);
2281 if (op_type == binary_op)
2282 vec_params = tree_cons (NULL_TREE, vec_oprnd1, vec_params);
2283 expr = build_function_call_expr (decl, vec_params);
2285 else
2287 /* Generic support */
2288 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2289 if (op_type == binary_op)
2290 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
2291 else
2292 expr = build1 (code, vectype, vec_oprnd0);
2294 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest, expr);
2295 new_temp = make_ssa_name (vec_dest, new_stmt);
2296 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2297 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2299 if (code == CALL_EXPR)
2301 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2303 if (TREE_CODE (sym) == SSA_NAME)
2304 sym = SSA_NAME_VAR (sym);
2305 mark_sym_for_renaming (sym);
2309 return new_stmt;
2313 /* Function vectorizable_type_promotion
2315 Check if STMT performs a binary or unary operation that involves
2316 type promotion, and if it can be vectorized.
2317 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2318 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2319 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2321 bool
2322 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
2323 tree *vec_stmt)
2325 tree vec_dest;
2326 tree scalar_dest;
2327 tree operation;
2328 tree op0, op1 = NULL;
2329 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2330 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2331 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2332 enum tree_code code, code1 = CODE_FOR_nothing, code2 = CODE_FOR_nothing;
2333 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2334 int op_type;
2335 tree def, def_stmt;
2336 enum vect_def_type dt0, dt1;
2337 tree new_stmt;
2338 stmt_vec_info prev_stmt_info;
2339 int nunits_in;
2340 int nunits_out;
2341 tree vectype_out;
2342 int ncopies;
2343 int j;
2344 tree vectype_in;
2346 /* Is STMT a vectorizable type-promotion operation? */
2348 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2349 return false;
2351 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2353 if (STMT_VINFO_LIVE_P (stmt_info))
2355 /* FORNOW: not yet supported. */
2356 if (vect_print_dump_info (REPORT_DETAILS))
2357 fprintf (vect_dump, "value used after loop.");
2358 return false;
2361 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2362 return false;
2364 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2365 return false;
2367 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2368 code = TREE_CODE (operation);
2369 if (code != NOP_EXPR && code != WIDEN_MULT_EXPR)
2370 return false;
2372 op0 = TREE_OPERAND (operation, 0);
2373 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2374 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2375 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2376 gcc_assert (ncopies >= 1);
2378 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2379 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2380 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2381 if (nunits_out != nunits_in / 2) /* FORNOW */
2382 return false;
2384 if (! INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2385 || !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2386 return false;
2388 /* Check the operands of the operation. */
2389 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2391 if (vect_print_dump_info (REPORT_DETAILS))
2392 fprintf (vect_dump, "use not simple.");
2393 return false;
2396 op_type = TREE_CODE_LENGTH (code);
2397 if (op_type == binary_op)
2399 op1 = TREE_OPERAND (operation, 1);
2400 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
2402 if (vect_print_dump_info (REPORT_DETAILS))
2403 fprintf (vect_dump, "use not simple.");
2404 return false;
2408 /* Supportable by target? */
2409 if (!supportable_widening_operation (code, stmt, vectype_in,
2410 &decl1, &decl2, &code1, &code2))
2411 return false;
2413 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2415 if (!vec_stmt) /* transformation not required. */
2417 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2418 return true;
2421 /** Transform. **/
2423 if (vect_print_dump_info (REPORT_DETAILS))
2424 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2425 ncopies);
2427 /* Handle def. */
2428 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2430 /* In case the vectorization factor (VF) is bigger than the number
2431 of elements that we can fit in a vectype (nunits), we have to generate
2432 more than one vector stmt - i.e - we need to "unroll" the
2433 vector stmt by a factor VF/nunits. */
2435 prev_stmt_info = NULL;
2436 for (j = 0; j < ncopies; j++)
2438 /* Handle uses. */
2439 if (j == 0)
2441 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2442 if (op_type == binary_op)
2443 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2445 else
2447 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2448 if (op_type == binary_op)
2449 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
2452 /* Arguments are ready. Create the new vector stmt. We are creating
2453 two vector defs because the widened result does not fit in one vector.
2454 The vectorized stmt can be expressed as a call to a taregt builtin,
2455 or a using a tree-code. */
2456 /* Generate first half of the widened result: */
2457 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
2458 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
2459 if (j == 0)
2460 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2461 else
2462 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2463 prev_stmt_info = vinfo_for_stmt (new_stmt);
2465 /* Generate second half of the widened result: */
2466 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
2467 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
2468 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2469 prev_stmt_info = vinfo_for_stmt (new_stmt);
2473 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2474 return true;
2478 /* Function vect_strided_store_supported.
2480 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2481 and FALSE otherwise. */
2483 static bool
2484 vect_strided_store_supported (tree vectype)
2486 optab interleave_high_optab, interleave_low_optab;
2487 int mode;
2489 mode = (int) TYPE_MODE (vectype);
2491 /* Check that the operation is supported. */
2492 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
2493 vectype);
2494 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
2495 vectype);
2496 if (!interleave_high_optab || !interleave_low_optab)
2498 if (vect_print_dump_info (REPORT_DETAILS))
2499 fprintf (vect_dump, "no optab for interleave.");
2500 return false;
2503 if (interleave_high_optab->handlers[(int) mode].insn_code
2504 == CODE_FOR_nothing
2505 || interleave_low_optab->handlers[(int) mode].insn_code
2506 == CODE_FOR_nothing)
2508 if (vect_print_dump_info (REPORT_DETAILS))
2509 fprintf (vect_dump, "interleave op not supported by target.");
2510 return false;
2512 return true;
2516 /* Function vect_permute_store_chain.
2518 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2519 a power of 2, generate interleave_high/low stmts to reorder the data
2520 correctly for the stores. Return the final references for stores in
2521 RESULT_CHAIN.
2523 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2524 The input is 4 vectors each containing 8 elements. We assign a number to each
2525 element, the input sequence is:
2527 1st vec: 0 1 2 3 4 5 6 7
2528 2nd vec: 8 9 10 11 12 13 14 15
2529 3rd vec: 16 17 18 19 20 21 22 23
2530 4th vec: 24 25 26 27 28 29 30 31
2532 The output sequence should be:
2534 1st vec: 0 8 16 24 1 9 17 25
2535 2nd vec: 2 10 18 26 3 11 19 27
2536 3rd vec: 4 12 20 28 5 13 21 30
2537 4th vec: 6 14 22 30 7 15 23 31
2539 i.e., we interleave the contents of the four vectors in their order.
2541 We use interleave_high/low instructions to create such output. The input of
2542 each interleave_high/low operation is two vectors:
2543 1st vec 2nd vec
2544 0 1 2 3 4 5 6 7
2545 the even elements of the result vector are obtained left-to-right from the
2546 high/low elements of the first vector. The odd elements of the result are
2547 obtained left-to-right from the high/low elements of the second vector.
2548 The output of interleave_high will be: 0 4 1 5
2549 and of interleave_low: 2 6 3 7
2552 The permutation is done in log LENGTH stages. In each stage interleave_high
2553 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2554 where the first argument is taken from the first half of DR_CHAIN and the
2555 second argument from it's second half.
2556 In our example,
2558 I1: interleave_high (1st vec, 3rd vec)
2559 I2: interleave_low (1st vec, 3rd vec)
2560 I3: interleave_high (2nd vec, 4th vec)
2561 I4: interleave_low (2nd vec, 4th vec)
2563 The output for the first stage is:
2565 I1: 0 16 1 17 2 18 3 19
2566 I2: 4 20 5 21 6 22 7 23
2567 I3: 8 24 9 25 10 26 11 27
2568 I4: 12 28 13 29 14 30 15 31
2570 The output of the second stage, i.e. the final result is:
2572 I1: 0 8 16 24 1 9 17 25
2573 I2: 2 10 18 26 3 11 19 27
2574 I3: 4 12 20 28 5 13 21 30
2575 I4: 6 14 22 30 7 15 23 31. */
2577 static bool
2578 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2579 unsigned int length,
2580 tree stmt,
2581 block_stmt_iterator *bsi,
2582 VEC(tree,heap) **result_chain)
2584 tree perm_dest, perm_stmt, vect1, vect2, high, low;
2585 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2586 tree scalar_dest;
2587 int i;
2588 unsigned int j;
2589 VEC(tree,heap) *first, *second;
2591 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2592 first = VEC_alloc (tree, heap, length/2);
2593 second = VEC_alloc (tree, heap, length/2);
2595 /* Check that the operation is supported. */
2596 if (!vect_strided_store_supported (vectype))
2597 return false;
2599 *result_chain = VEC_copy (tree, heap, dr_chain);
2601 for (i = 0; i < exact_log2 (length); i++)
2603 for (j = 0; j < length/2; j++)
2605 vect1 = VEC_index (tree, dr_chain, j);
2606 vect2 = VEC_index (tree, dr_chain, j+length/2);
2608 /* Create interleaving stmt:
2609 in the case of big endian:
2610 high = interleave_high (vect1, vect2)
2611 and in the case of little endian:
2612 high = interleave_low (vect1, vect2). */
2613 perm_dest = create_tmp_var (vectype, "vect_inter_high");
2614 DECL_GIMPLE_REG_P (perm_dest) = 1;
2615 add_referenced_var (perm_dest);
2616 if (BYTES_BIG_ENDIAN)
2617 perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
2618 build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype,
2619 vect1, vect2));
2620 else
2621 perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
2622 build2 (VEC_INTERLEAVE_LOW_EXPR, vectype,
2623 vect1, vect2));
2624 high = make_ssa_name (perm_dest, perm_stmt);
2625 GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
2626 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
2627 VEC_replace (tree, *result_chain, 2*j, high);
2629 /* Create interleaving stmt:
2630 in the case of big endian:
2631 low = interleave_low (vect1, vect2)
2632 and in the case of little endian:
2633 low = interleave_high (vect1, vect2). */
2634 perm_dest = create_tmp_var (vectype, "vect_inter_low");
2635 DECL_GIMPLE_REG_P (perm_dest) = 1;
2636 add_referenced_var (perm_dest);
2637 if (BYTES_BIG_ENDIAN)
2638 perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
2639 build2 (VEC_INTERLEAVE_LOW_EXPR, vectype,
2640 vect1, vect2));
2641 else
2642 perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
2643 build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype,
2644 vect1, vect2));
2645 low = make_ssa_name (perm_dest, perm_stmt);
2646 GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
2647 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
2648 VEC_replace (tree, *result_chain, 2*j+1, low);
2650 dr_chain = VEC_copy (tree, heap, *result_chain);
2652 return true;
2656 /* Function vectorizable_store.
2658 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2659 can be vectorized.
2660 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2661 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2662 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2664 bool
2665 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2667 tree scalar_dest;
2668 tree data_ref;
2669 tree op;
2670 tree vec_oprnd = NULL_TREE;
2671 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2672 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
2673 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2674 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2675 enum machine_mode vec_mode;
2676 tree dummy;
2677 enum dr_alignment_support alignment_support_cheme;
2678 ssa_op_iter iter;
2679 def_operand_p def_p;
2680 tree def, def_stmt;
2681 enum vect_def_type dt;
2682 stmt_vec_info prev_stmt_info = NULL;
2683 tree dataref_ptr = NULL_TREE;
2684 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2685 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2686 int j;
2687 tree next_stmt, first_stmt;
2688 bool strided_store = false;
2689 unsigned int group_size, i;
2690 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
2691 gcc_assert (ncopies >= 1);
2693 /* Is vectorizable store? */
2695 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2696 return false;
2698 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2699 if (TREE_CODE (scalar_dest) != ARRAY_REF
2700 && TREE_CODE (scalar_dest) != INDIRECT_REF
2701 && !DR_GROUP_FIRST_DR (stmt_info))
2702 return false;
2704 op = GIMPLE_STMT_OPERAND (stmt, 1);
2705 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2707 if (vect_print_dump_info (REPORT_DETAILS))
2708 fprintf (vect_dump, "use not simple.");
2709 return false;
2712 vec_mode = TYPE_MODE (vectype);
2713 /* FORNOW. In some cases can vectorize even if data-type not supported
2714 (e.g. - array initialization with 0). */
2715 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2716 return false;
2718 if (!STMT_VINFO_DATA_REF (stmt_info))
2719 return false;
2721 if (DR_GROUP_FIRST_DR (stmt_info))
2723 strided_store = true;
2724 if (!vect_strided_store_supported (vectype))
2725 return false;
2728 if (!vec_stmt) /* transformation not required. */
2730 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2731 return true;
2734 /** Transform. **/
2736 if (vect_print_dump_info (REPORT_DETAILS))
2737 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
2739 if (strided_store)
2741 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
2742 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2743 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2745 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
2747 /* We vectorize all the stmts of the interleaving group when we
2748 reach the last stmt in the group. */
2749 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
2750 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)))
2752 *vec_stmt = NULL_TREE;
2753 return true;
2756 else
2758 first_stmt = stmt;
2759 first_dr = dr;
2760 group_size = 1;
2763 dr_chain = VEC_alloc (tree, heap, group_size);
2764 oprnds = VEC_alloc (tree, heap, group_size);
2766 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
2767 gcc_assert (alignment_support_cheme);
2768 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
2770 /* In case the vectorization factor (VF) is bigger than the number
2771 of elements that we can fit in a vectype (nunits), we have to generate
2772 more than one vector stmt - i.e - we need to "unroll" the
2773 vector stmt by a factor VF/nunits. For more details see documentation in
2774 vect_get_vec_def_for_copy_stmt. */
2776 /* In case of interleaving (non-unit strided access):
2778 S1: &base + 2 = x2
2779 S2: &base = x0
2780 S3: &base + 1 = x1
2781 S4: &base + 3 = x3
2783 We create vectorized stores starting from base address (the access of the
2784 first stmt in the chain (S2 in the above example), when the last store stmt
2785 of the chain (S4) is reached:
2787 VS1: &base = vx2
2788 VS2: &base + vec_size*1 = vx0
2789 VS3: &base + vec_size*2 = vx1
2790 VS4: &base + vec_size*3 = vx3
2792 Then permutation statements are generated:
2794 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
2795 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
2798 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
2799 (the order of the data-refs in the output of vect_permute_store_chain
2800 corresponds to the order of scalar stmts in the interleaving chain - see
2801 the documentation of vect_permute_store_chain()).
2803 In case of both multiple types and interleaving, above vector stores and
2804 permutation stmts are created for every copy. The result vector stmts are
2805 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
2806 STMT_VINFO_RELATED_STMT for the next copies.
2809 prev_stmt_info = NULL;
2810 for (j = 0; j < ncopies; j++)
2812 tree new_stmt;
2813 tree ptr_incr;
2815 if (j == 0)
2817 /* For interleaved stores we collect vectorized defs for all the
2818 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then used
2819 as an input to vect_permute_store_chain(), and OPRNDS as an input
2820 to vect_get_vec_def_for_stmt_copy() for the next copy.
2821 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
2822 OPRNDS are of size 1. */
2823 next_stmt = first_stmt;
2824 for (i = 0; i < group_size; i++)
2826 /* Since gaps are not supported for interleaved stores, GROUP_SIZE
2827 is the exact number of stmts in the chain. Therefore, NEXT_STMT
2828 can't be NULL_TREE. In case that there is no interleaving,
2829 GROUP_SIZE is 1, and only one iteration of the loop will be
2830 executed. */
2831 gcc_assert (next_stmt);
2832 op = GIMPLE_STMT_OPERAND (next_stmt, 1);
2833 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, NULL);
2834 VEC_quick_push(tree, dr_chain, vec_oprnd);
2835 VEC_quick_push(tree, oprnds, vec_oprnd);
2836 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
2838 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, NULL_TREE,
2839 &dummy, &ptr_incr, false,
2840 TREE_TYPE (vec_oprnd));
2842 else
2844 /* For interleaved stores we created vectorized defs for all the
2845 defs stored in OPRNDS in the previous iteration (previous copy).
2846 DR_CHAIN is then used as an input to vect_permute_store_chain(),
2847 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
2848 next copy.
2849 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
2850 OPRNDS are of size 1. */
2851 for (i = 0; i < group_size; i++)
2853 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt,
2854 VEC_index (tree, oprnds, i));
2855 VEC_replace(tree, dr_chain, i, vec_oprnd);
2856 VEC_replace(tree, oprnds, i, vec_oprnd);
2858 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
2861 if (strided_store)
2863 result_chain = VEC_alloc (tree, heap, group_size);
2864 /* Permute. */
2865 if (!vect_permute_store_chain (dr_chain, group_size, stmt, bsi,
2866 &result_chain))
2867 return false;
2870 next_stmt = first_stmt;
2871 for (i = 0; i < group_size; i++)
2873 /* For strided stores vectorized defs are interleaved in
2874 vect_permute_store_chain(). */
2875 if (strided_store)
2876 vec_oprnd = VEC_index(tree, result_chain, i);
2878 data_ref = build_fold_indirect_ref (dataref_ptr);
2879 /* Arguments are ready. Create the new vector stmt. */
2880 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, data_ref,
2881 vec_oprnd);
2882 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2884 /* Set the VDEFs for the vector pointer. If this virtual def
2885 has a use outside the loop and a loop peel is performed
2886 then the def may be renamed by the peel. Mark it for
2887 renaming so the later use will also be renamed. */
2888 copy_virtual_operands (new_stmt, next_stmt);
2889 if (j == 0)
2891 /* The original store is deleted so the same SSA_NAMEs
2892 can be used. */
2893 FOR_EACH_SSA_TREE_OPERAND (def, next_stmt, iter, SSA_OP_VDEF)
2895 SSA_NAME_DEF_STMT (def) = new_stmt;
2896 mark_sym_for_renaming (SSA_NAME_VAR (def));
2899 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2901 else
2903 /* Create new names for all the definitions created by COPY and
2904 add replacement mappings for each new name. */
2905 FOR_EACH_SSA_DEF_OPERAND (def_p, new_stmt, iter, SSA_OP_VDEF)
2907 create_new_def_for (DEF_FROM_PTR (def_p), new_stmt, def_p);
2908 mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p)));
2911 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2914 prev_stmt_info = vinfo_for_stmt (new_stmt);
2915 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
2916 if (!next_stmt)
2917 break;
2918 /* Bump the vector pointer. */
2919 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
2923 return true;
2927 /* Function vect_setup_realignment
2929 This function is called when vectorizing an unaligned load using
2930 the dr_unaligned_software_pipeline scheme.
2931 This function generates the following code at the loop prolog:
2933 p = initial_addr;
2934 msq_init = *(floor(p)); # prolog load
2935 realignment_token = call target_builtin;
2936 loop:
2937 msq = phi (msq_init, ---)
2939 The code above sets up a new (vector) pointer, pointing to the first
2940 location accessed by STMT, and a "floor-aligned" load using that pointer.
2941 It also generates code to compute the "realignment-token" (if the relevant
2942 target hook was defined), and creates a phi-node at the loop-header bb
2943 whose arguments are the result of the prolog-load (created by this
2944 function) and the result of a load that takes place in the loop (to be
2945 created by the caller to this function).
2946 The caller to this function uses the phi-result (msq) to create the
2947 realignment code inside the loop, and sets up the missing phi argument,
2948 as follows:
2950 loop:
2951 msq = phi (msq_init, lsq)
2952 lsq = *(floor(p')); # load in loop
2953 result = realign_load (msq, lsq, realignment_token);
2955 Input:
2956 STMT - (scalar) load stmt to be vectorized. This load accesses
2957 a memory location that may be unaligned.
2958 BSI - place where new code is to be inserted.
2960 Output:
2961 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2962 target hook, if defined.
2963 Return value - the result of the loop-header phi node. */
2965 static tree
2966 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
2967 tree *realignment_token)
2969 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2970 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2971 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2972 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2973 edge pe = loop_preheader_edge (loop);
2974 tree scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2975 tree vec_dest;
2976 tree init_addr;
2977 tree inc;
2978 tree ptr;
2979 tree data_ref;
2980 tree new_stmt;
2981 basic_block new_bb;
2982 tree msq_init;
2983 tree new_temp;
2984 tree phi_stmt;
2985 tree msq;
2987 /* 1. Create msq_init = *(floor(p1)) in the loop preheader */
2988 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2989 ptr = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &init_addr, &inc, true,
2990 NULL_TREE);
2991 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
2992 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest, data_ref);
2993 new_temp = make_ssa_name (vec_dest, new_stmt);
2994 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2995 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2996 gcc_assert (!new_bb);
2997 msq_init = GIMPLE_STMT_OPERAND (new_stmt, 0);
2998 copy_virtual_operands (new_stmt, stmt);
2999 update_vuses_to_preheader (new_stmt, loop);
3001 /* 2. Create permutation mask, if required, in loop preheader. */
3002 if (targetm.vectorize.builtin_mask_for_load)
3004 tree builtin_decl;
3005 tree params = build_tree_list (NULL_TREE, init_addr);
3007 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3008 new_stmt = build_function_call_expr (builtin_decl, params);
3009 vec_dest = vect_create_destination_var (scalar_dest,
3010 TREE_TYPE (new_stmt));
3011 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest,
3012 new_stmt);
3013 new_temp = make_ssa_name (vec_dest, new_stmt);
3014 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3015 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
3016 gcc_assert (!new_bb);
3017 *realignment_token = GIMPLE_STMT_OPERAND (new_stmt, 0);
3019 /* The result of the CALL_EXPR to this builtin is determined from
3020 the value of the parameter and no global variables are touched
3021 which makes the builtin a "const" function. Requiring the
3022 builtin to have the "const" attribute makes it unnecessary
3023 to call mark_call_clobbered. */
3024 gcc_assert (TREE_READONLY (builtin_decl));
3027 /* 3. Create msq = phi <msq_init, lsq> in loop */
3028 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3029 msq = make_ssa_name (vec_dest, NULL_TREE);
3030 phi_stmt = create_phi_node (msq, loop->header);
3031 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3032 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
3034 return msq;
3038 /* Function vect_strided_load_supported.
3040 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3041 and FALSE otherwise. */
3043 static bool
3044 vect_strided_load_supported (tree vectype)
3046 optab perm_even_optab, perm_odd_optab;
3047 int mode;
3049 mode = (int) TYPE_MODE (vectype);
3051 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype);
3052 if (!perm_even_optab)
3054 if (vect_print_dump_info (REPORT_DETAILS))
3055 fprintf (vect_dump, "no optab for perm_even.");
3056 return false;
3059 if (perm_even_optab->handlers[mode].insn_code == CODE_FOR_nothing)
3061 if (vect_print_dump_info (REPORT_DETAILS))
3062 fprintf (vect_dump, "perm_even op not supported by target.");
3063 return false;
3066 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
3067 if (!perm_odd_optab)
3069 if (vect_print_dump_info (REPORT_DETAILS))
3070 fprintf (vect_dump, "no optab for perm_odd.");
3071 return false;
3074 if (perm_odd_optab->handlers[mode].insn_code == CODE_FOR_nothing)
3076 if (vect_print_dump_info (REPORT_DETAILS))
3077 fprintf (vect_dump, "perm_odd op not supported by target.");
3078 return false;
3080 return true;
3084 /* Function vect_permute_load_chain.
3086 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3087 a power of 2, generate extract_even/odd stmts to reorder the input data
3088 correctly. Return the final references for loads in RESULT_CHAIN.
3090 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3091 The input is 4 vectors each containing 8 elements. We assign a number to each
3092 element, the input sequence is:
3094 1st vec: 0 1 2 3 4 5 6 7
3095 2nd vec: 8 9 10 11 12 13 14 15
3096 3rd vec: 16 17 18 19 20 21 22 23
3097 4th vec: 24 25 26 27 28 29 30 31
3099 The output sequence should be:
3101 1st vec: 0 4 8 12 16 20 24 28
3102 2nd vec: 1 5 9 13 17 21 25 29
3103 3rd vec: 2 6 10 14 18 22 26 30
3104 4th vec: 3 7 11 15 19 23 27 31
3106 i.e., the first output vector should contain the first elements of each
3107 interleaving group, etc.
3109 We use extract_even/odd instructions to create such output. The input of each
3110 extract_even/odd operation is two vectors
3111 1st vec 2nd vec
3112 0 1 2 3 4 5 6 7
3114 and the output is the vector of extracted even/odd elements. The output of
3115 extract_even will be: 0 2 4 6
3116 and of extract_odd: 1 3 5 7
3119 The permutation is done in log LENGTH stages. In each stage extract_even and
3120 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3121 order. In our example,
3123 E1: extract_even (1st vec, 2nd vec)
3124 E2: extract_odd (1st vec, 2nd vec)
3125 E3: extract_even (3rd vec, 4th vec)
3126 E4: extract_odd (3rd vec, 4th vec)
3128 The output for the first stage will be:
3130 E1: 0 2 4 6 8 10 12 14
3131 E2: 1 3 5 7 9 11 13 15
3132 E3: 16 18 20 22 24 26 28 30
3133 E4: 17 19 21 23 25 27 29 31
3135 In order to proceed and create the correct sequence for the next stage (or
3136 for the correct output, if the second stage is the last one, as in our
3137 example), we first put the output of extract_even operation and then the
3138 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3139 The input for the second stage is:
3141 1st vec (E1): 0 2 4 6 8 10 12 14
3142 2nd vec (E3): 16 18 20 22 24 26 28 30
3143 3rd vec (E2): 1 3 5 7 9 11 13 15
3144 4th vec (E4): 17 19 21 23 25 27 29 31
3146 The output of the second stage:
3148 E1: 0 4 8 12 16 20 24 28
3149 E2: 2 6 10 14 18 22 26 30
3150 E3: 1 5 9 13 17 21 25 29
3151 E4: 3 7 11 15 19 23 27 31
3153 And RESULT_CHAIN after reordering:
3155 1st vec (E1): 0 4 8 12 16 20 24 28
3156 2nd vec (E3): 1 5 9 13 17 21 25 29
3157 3rd vec (E2): 2 6 10 14 18 22 26 30
3158 4th vec (E4): 3 7 11 15 19 23 27 31. */
3160 static bool
3161 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3162 unsigned int length,
3163 tree stmt,
3164 block_stmt_iterator *bsi,
3165 VEC(tree,heap) **result_chain)
3167 tree perm_dest, perm_stmt, data_ref, first_vect, second_vect;
3168 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3169 int i;
3170 unsigned int j;
3172 /* Check that the operation is supported. */
3173 if (!vect_strided_load_supported (vectype))
3174 return false;
3176 *result_chain = VEC_copy (tree, heap, dr_chain);
3177 for (i = 0; i < exact_log2 (length); i++)
3179 for (j = 0; j < length; j +=2)
3181 first_vect = VEC_index (tree, dr_chain, j);
3182 second_vect = VEC_index (tree, dr_chain, j+1);
3184 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3185 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3186 DECL_GIMPLE_REG_P (perm_dest) = 1;
3187 add_referenced_var (perm_dest);
3189 perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
3190 build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
3191 first_vect, second_vect));
3193 data_ref = make_ssa_name (perm_dest, perm_stmt);
3194 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
3195 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3196 mark_symbols_for_renaming (perm_stmt);
3198 VEC_replace (tree, *result_chain, j/2, data_ref);
3200 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3201 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3202 DECL_GIMPLE_REG_P (perm_dest) = 1;
3203 add_referenced_var (perm_dest);
3205 perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
3206 build2 (VEC_EXTRACT_ODD_EXPR, vectype,
3207 first_vect, second_vect));
3208 data_ref = make_ssa_name (perm_dest, perm_stmt);
3209 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
3210 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3211 mark_symbols_for_renaming (perm_stmt);
3213 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3215 dr_chain = VEC_copy (tree, heap, *result_chain);
3217 return true;
3221 /* Function vect_transform_strided_load.
3223 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3224 to perform their permutation and ascribe the result vectorized statements to
3225 the scalar statements.
3228 static bool
3229 vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
3230 block_stmt_iterator *bsi)
3232 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3233 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3234 tree next_stmt, new_stmt;
3235 VEC(tree,heap) *result_chain = NULL;
3236 unsigned int i, gap_count;
3237 tree tmp_data_ref;
3239 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3240 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3241 vectors, that are ready for vector computation. */
3242 result_chain = VEC_alloc (tree, heap, size);
3243 /* Permute. */
3244 if (!vect_permute_load_chain (dr_chain, size, stmt, bsi, &result_chain))
3245 return false;
3247 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3248 Since we scan the chain starting from it's first node, their order
3249 corresponds the order of data-refs in RESULT_CHAIN. */
3250 next_stmt = first_stmt;
3251 gap_count = 1;
3252 for (i = 0; VEC_iterate(tree, result_chain, i, tmp_data_ref); i++)
3254 if (!next_stmt)
3255 break;
3257 /* Skip the gaps. Loads created for the gaps will be removed by dead
3258 code elimination pass later.
3259 DR_GROUP_GAP is the number of steps in elements from the previous
3260 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3261 correspond to the gaps.
3263 if (gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3265 gap_count++;
3266 continue;
3269 while (next_stmt)
3271 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3272 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3273 copies, and we put the new vector statement in the first available
3274 RELATED_STMT. */
3275 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3276 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3277 else
3279 tree prev_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3280 tree rel_stmt = STMT_VINFO_RELATED_STMT (
3281 vinfo_for_stmt (prev_stmt));
3282 while (rel_stmt)
3284 prev_stmt = rel_stmt;
3285 rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3287 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
3289 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3290 gap_count = 1;
3291 /* If NEXT_STMT accesses the same DR as the previous statement,
3292 put the same TMP_DATA_REF as its vectorized statement; otherwise
3293 get the next data-ref from RESULT_CHAIN. */
3294 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3295 break;
3298 return true;
3302 /* vectorizable_load.
3304 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3305 can be vectorized.
3306 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3307 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3308 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3310 bool
3311 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3313 tree scalar_dest;
3314 tree vec_dest = NULL;
3315 tree data_ref = NULL;
3316 tree op;
3317 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3318 stmt_vec_info prev_stmt_info;
3319 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3320 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3321 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3322 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3323 tree new_temp;
3324 int mode;
3325 tree new_stmt = NULL_TREE;
3326 tree dummy;
3327 enum dr_alignment_support alignment_support_cheme;
3328 tree dataref_ptr = NULL_TREE;
3329 tree ptr_incr;
3330 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3331 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3332 int i, j, group_size;
3333 tree msq = NULL_TREE, lsq;
3334 tree offset = NULL_TREE;
3335 tree realignment_token = NULL_TREE;
3336 tree phi_stmt = NULL_TREE;
3337 VEC(tree,heap) *dr_chain = NULL;
3338 bool strided_load = false;
3339 tree first_stmt;
3341 /* Is vectorizable load? */
3342 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3343 return false;
3345 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
3347 if (STMT_VINFO_LIVE_P (stmt_info))
3349 /* FORNOW: not yet supported. */
3350 if (vect_print_dump_info (REPORT_DETAILS))
3351 fprintf (vect_dump, "value used after loop.");
3352 return false;
3355 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3356 return false;
3358 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3359 if (TREE_CODE (scalar_dest) != SSA_NAME)
3360 return false;
3362 op = GIMPLE_STMT_OPERAND (stmt, 1);
3363 if (TREE_CODE (op) != ARRAY_REF
3364 && TREE_CODE (op) != INDIRECT_REF
3365 && !DR_GROUP_FIRST_DR (stmt_info))
3366 return false;
3368 if (!STMT_VINFO_DATA_REF (stmt_info))
3369 return false;
3371 mode = (int) TYPE_MODE (vectype);
3373 /* FORNOW. In some cases can vectorize even if data-type not supported
3374 (e.g. - data copies). */
3375 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
3377 if (vect_print_dump_info (REPORT_DETAILS))
3378 fprintf (vect_dump, "Aligned load, but unsupported type.");
3379 return false;
3382 /* Check if the load is a part of an interleaving chain. */
3383 if (DR_GROUP_FIRST_DR (stmt_info))
3385 strided_load = true;
3387 /* Check if interleaving is supported. */
3388 if (!vect_strided_load_supported (vectype))
3389 return false;
3392 if (!vec_stmt) /* transformation not required. */
3394 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3395 return true;
3398 /** Transform. **/
3400 if (vect_print_dump_info (REPORT_DETAILS))
3401 fprintf (vect_dump, "transform load.");
3403 if (strided_load)
3405 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3406 /* Check if the chain of loads is already vectorized. */
3407 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
3409 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3410 return true;
3412 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3413 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3414 dr_chain = VEC_alloc (tree, heap, group_size);
3416 else
3418 first_stmt = stmt;
3419 first_dr = dr;
3420 group_size = 1;
3423 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
3424 gcc_assert (alignment_support_cheme);
3427 /* In case the vectorization factor (VF) is bigger than the number
3428 of elements that we can fit in a vectype (nunits), we have to generate
3429 more than one vector stmt - i.e - we need to "unroll" the
3430 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3431 from one copy of the vector stmt to the next, in the field
3432 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3433 stages to find the correct vector defs to be used when vectorizing
3434 stmts that use the defs of the current stmt. The example below illustrates
3435 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3436 4 vectorized stmts):
3438 before vectorization:
3439 RELATED_STMT VEC_STMT
3440 S1: x = memref - -
3441 S2: z = x + 1 - -
3443 step 1: vectorize stmt S1:
3444 We first create the vector stmt VS1_0, and, as usual, record a
3445 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3446 Next, we create the vector stmt VS1_1, and record a pointer to
3447 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3448 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3449 stmts and pointers:
3450 RELATED_STMT VEC_STMT
3451 VS1_0: vx0 = memref0 VS1_1 -
3452 VS1_1: vx1 = memref1 VS1_2 -
3453 VS1_2: vx2 = memref2 VS1_3 -
3454 VS1_3: vx3 = memref3 - -
3455 S1: x = load - VS1_0
3456 S2: z = x + 1 - -
3458 See in documentation in vect_get_vec_def_for_stmt_copy for how the
3459 information we recorded in RELATED_STMT field is used to vectorize
3460 stmt S2. */
3462 /* In case of interleaving (non-unit strided access):
3464 S1: x2 = &base + 2
3465 S2: x0 = &base
3466 S3: x1 = &base + 1
3467 S4: x3 = &base + 3
3469 Vectorized loads are created in the order of memory accesses
3470 starting from the access of the first stmt of the chain:
3472 VS1: vx0 = &base
3473 VS2: vx1 = &base + vec_size*1
3474 VS3: vx3 = &base + vec_size*2
3475 VS4: vx4 = &base + vec_size*3
3477 Then permutation statements are generated:
3479 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3480 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3483 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3484 (the order of the data-refs in the output of vect_permute_load_chain
3485 corresponds to the order of scalar stmts in the interleaving chain - see
3486 the documentation of vect_permute_load_chain()).
3487 The generation of permutation stmts and recording them in
3488 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3490 In case of both multiple types and interleaving, the vector loads and
3491 permutation stmts above are created for every copy. The result vector stmts
3492 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3493 STMT_VINFO_RELATED_STMT for the next copies. */
3495 /* If the data reference is aligned (dr_aligned) or potentially unaligned
3496 on a target that supports unaligned accesses (dr_unaligned_supported)
3497 we generate the following code:
3498 p = initial_addr;
3499 indx = 0;
3500 loop {
3501 p = p + indx * vectype_size;
3502 vec_dest = *(p);
3503 indx = indx + 1;
3506 Otherwise, the data reference is potentially unaligned on a target that
3507 does not support unaligned accesses (dr_unaligned_software_pipeline) -
3508 then generate the following code, in which the data in each iteration is
3509 obtained by two vector loads, one from the previous iteration, and one
3510 from the current iteration:
3511 p1 = initial_addr;
3512 msq_init = *(floor(p1))
3513 p2 = initial_addr + VS - 1;
3514 realignment_token = call target_builtin;
3515 indx = 0;
3516 loop {
3517 p2 = p2 + indx * vectype_size
3518 lsq = *(floor(p2))
3519 vec_dest = realign_load (msq, lsq, realignment_token)
3520 indx = indx + 1;
3521 msq = lsq;
3522 } */
3524 if (alignment_support_cheme == dr_unaligned_software_pipeline)
3526 msq = vect_setup_realignment (first_stmt, bsi, &realignment_token);
3527 phi_stmt = SSA_NAME_DEF_STMT (msq);
3528 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3531 prev_stmt_info = NULL;
3532 for (j = 0; j < ncopies; j++)
3534 /* 1. Create the vector pointer update chain. */
3535 if (j == 0)
3536 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, offset, &dummy,
3537 &ptr_incr, false, NULL_TREE);
3538 else
3539 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
3541 for (i = 0; i < group_size; i++)
3543 /* 2. Create the vector-load in the loop. */
3544 switch (alignment_support_cheme)
3546 case dr_aligned:
3547 gcc_assert (aligned_access_p (first_dr));
3548 data_ref = build_fold_indirect_ref (dataref_ptr);
3549 break;
3550 case dr_unaligned_supported:
3552 int mis = DR_MISALIGNMENT (first_dr);
3553 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3555 gcc_assert (!aligned_access_p (first_dr));
3556 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
3557 data_ref =
3558 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3559 break;
3561 case dr_unaligned_software_pipeline:
3562 gcc_assert (!aligned_access_p (first_dr));
3563 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3564 break;
3565 default:
3566 gcc_unreachable ();
3568 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3569 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest,
3570 data_ref);
3571 new_temp = make_ssa_name (vec_dest, new_stmt);
3572 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3573 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3574 copy_virtual_operands (new_stmt, stmt);
3575 mark_symbols_for_renaming (new_stmt);
3577 /* 3. Handle explicit realignment if necessary/supported. */
3578 if (alignment_support_cheme == dr_unaligned_software_pipeline)
3580 /* Create in loop:
3581 <vec_dest = realign_load (msq, lsq, realignment_token)> */
3582 lsq = GIMPLE_STMT_OPERAND (new_stmt, 0);
3583 if (!realignment_token)
3584 realignment_token = dataref_ptr;
3585 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3586 new_stmt =
3587 build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, realignment_token);
3588 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest,
3589 new_stmt);
3590 new_temp = make_ssa_name (vec_dest, new_stmt);
3591 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3592 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3593 if (i == group_size - 1 && j == ncopies - 1)
3594 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
3595 msq = lsq;
3597 if (strided_load)
3598 VEC_quick_push (tree, dr_chain, new_temp);
3599 if (i < group_size - 1)
3600 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
3603 if (strided_load)
3605 if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
3606 return false;
3607 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3608 dr_chain = VEC_alloc (tree, heap, group_size);
3610 else
3612 if (j == 0)
3613 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3614 else
3615 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3616 prev_stmt_info = vinfo_for_stmt (new_stmt);
3620 return true;
3624 /* Function vectorizable_live_operation.
3626 STMT computes a value that is used outside the loop. Check if
3627 it can be supported. */
3629 bool
3630 vectorizable_live_operation (tree stmt,
3631 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
3632 tree *vec_stmt ATTRIBUTE_UNUSED)
3634 tree operation;
3635 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3636 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3637 int i;
3638 enum tree_code code;
3639 int op_type;
3640 tree op;
3641 tree def, def_stmt;
3642 enum vect_def_type dt;
3644 if (!STMT_VINFO_LIVE_P (stmt_info))
3645 return false;
3647 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3648 return false;
3650 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3651 return false;
3653 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3654 code = TREE_CODE (operation);
3656 op_type = TREE_CODE_LENGTH (code);
3658 /* FORNOW: support only if all uses are invariant. This means
3659 that the scalar operations can remain in place, unvectorized.
3660 The original last scalar value that they compute will be used. */
3662 for (i = 0; i < op_type; i++)
3664 op = TREE_OPERAND (operation, i);
3665 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
3667 if (vect_print_dump_info (REPORT_DETAILS))
3668 fprintf (vect_dump, "use not simple.");
3669 return false;
3672 if (dt != vect_invariant_def && dt != vect_constant_def)
3673 return false;
3676 /* No transformation is required for the cases we currently support. */
3677 return true;
3681 /* Function vect_is_simple_cond.
3683 Input:
3684 LOOP - the loop that is being vectorized.
3685 COND - Condition that is checked for simple use.
3687 Returns whether a COND can be vectorized. Checks whether
3688 condition operands are supportable using vec_is_simple_use. */
3690 static bool
3691 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
3693 tree lhs, rhs;
3694 tree def;
3695 enum vect_def_type dt;
3697 if (!COMPARISON_CLASS_P (cond))
3698 return false;
3700 lhs = TREE_OPERAND (cond, 0);
3701 rhs = TREE_OPERAND (cond, 1);
3703 if (TREE_CODE (lhs) == SSA_NAME)
3705 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
3706 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
3707 return false;
3709 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
3710 return false;
3712 if (TREE_CODE (rhs) == SSA_NAME)
3714 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
3715 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
3716 return false;
3718 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
3719 return false;
3721 return true;
3724 /* vectorizable_condition.
3726 Check if STMT is conditional modify expression that can be vectorized.
3727 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3728 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
3729 at BSI.
3731 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3733 bool
3734 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3736 tree scalar_dest = NULL_TREE;
3737 tree vec_dest = NULL_TREE;
3738 tree op = NULL_TREE;
3739 tree cond_expr, then_clause, else_clause;
3740 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3741 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3742 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
3743 tree vec_compare, vec_cond_expr;
3744 tree new_temp;
3745 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3746 enum machine_mode vec_mode;
3747 tree def;
3748 enum vect_def_type dt;
3749 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3750 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3752 gcc_assert (ncopies >= 1);
3753 if (ncopies > 1)
3754 return false; /* FORNOW */
3756 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3757 return false;
3759 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
3761 if (STMT_VINFO_LIVE_P (stmt_info))
3763 /* FORNOW: not yet supported. */
3764 if (vect_print_dump_info (REPORT_DETAILS))
3765 fprintf (vect_dump, "value used after loop.");
3766 return false;
3769 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3770 return false;
3772 op = GIMPLE_STMT_OPERAND (stmt, 1);
3774 if (TREE_CODE (op) != COND_EXPR)
3775 return false;
3777 cond_expr = TREE_OPERAND (op, 0);
3778 then_clause = TREE_OPERAND (op, 1);
3779 else_clause = TREE_OPERAND (op, 2);
3781 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
3782 return false;
3784 /* We do not handle two different vector types for the condition
3785 and the values. */
3786 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
3787 return false;
3789 if (TREE_CODE (then_clause) == SSA_NAME)
3791 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
3792 if (!vect_is_simple_use (then_clause, loop_vinfo,
3793 &then_def_stmt, &def, &dt))
3794 return false;
3796 else if (TREE_CODE (then_clause) != INTEGER_CST
3797 && TREE_CODE (then_clause) != REAL_CST)
3798 return false;
3800 if (TREE_CODE (else_clause) == SSA_NAME)
3802 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
3803 if (!vect_is_simple_use (else_clause, loop_vinfo,
3804 &else_def_stmt, &def, &dt))
3805 return false;
3807 else if (TREE_CODE (else_clause) != INTEGER_CST
3808 && TREE_CODE (else_clause) != REAL_CST)
3809 return false;
3812 vec_mode = TYPE_MODE (vectype);
3814 if (!vec_stmt)
3816 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
3817 return expand_vec_cond_expr_p (op, vec_mode);
3820 /* Transform */
3822 /* Handle def. */
3823 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3824 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3826 /* Handle cond expr. */
3827 vec_cond_lhs =
3828 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
3829 vec_cond_rhs =
3830 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
3831 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
3832 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
3834 /* Arguments are ready. create the new vector stmt. */
3835 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
3836 vec_cond_lhs, vec_cond_rhs);
3837 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
3838 vec_compare, vec_then_clause, vec_else_clause);
3840 *vec_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest,
3841 vec_cond_expr);
3842 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3843 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
3844 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
3846 return true;
3849 /* Function vect_transform_stmt.
3851 Create a vectorized stmt to replace STMT, and insert it at BSI. */
3853 bool
3854 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store)
3856 bool is_store = false;
3857 tree vec_stmt = NULL_TREE;
3858 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3859 tree orig_stmt_in_pattern;
3860 bool done;
3862 if (STMT_VINFO_RELEVANT_P (stmt_info))
3864 switch (STMT_VINFO_TYPE (stmt_info))
3866 case type_demotion_vec_info_type:
3867 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
3868 gcc_assert (done);
3869 break;
3871 case type_promotion_vec_info_type:
3872 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
3873 gcc_assert (done);
3874 break;
3876 case op_vec_info_type:
3877 done = vectorizable_operation (stmt, bsi, &vec_stmt);
3878 gcc_assert (done);
3879 break;
3881 case assignment_vec_info_type:
3882 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
3883 gcc_assert (done);
3884 break;
3886 case load_vec_info_type:
3887 done = vectorizable_load (stmt, bsi, &vec_stmt);
3888 gcc_assert (done);
3889 break;
3891 case store_vec_info_type:
3892 done = vectorizable_store (stmt, bsi, &vec_stmt);
3893 gcc_assert (done);
3894 if (DR_GROUP_FIRST_DR (stmt_info))
3896 /* In case of interleaving, the whole chain is vectorized when the
3897 last store in the chain is reached. Store stmts before the last
3898 one are skipped, and there vec_stmt_info shouldn't be freed
3899 meanwhile. */
3900 *strided_store = true;
3901 if (STMT_VINFO_VEC_STMT (stmt_info))
3902 is_store = true;
3904 else
3905 is_store = true;
3906 break;
3908 case condition_vec_info_type:
3909 done = vectorizable_condition (stmt, bsi, &vec_stmt);
3910 gcc_assert (done);
3911 break;
3913 case call_vec_info_type:
3914 done = vectorizable_call (stmt, bsi, &vec_stmt);
3915 break;
3917 default:
3918 if (vect_print_dump_info (REPORT_DETAILS))
3919 fprintf (vect_dump, "stmt not supported.");
3920 gcc_unreachable ();
3923 gcc_assert (vec_stmt || *strided_store);
3924 if (vec_stmt)
3926 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
3927 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
3928 if (orig_stmt_in_pattern)
3930 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
3931 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3933 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
3935 /* STMT was inserted by the vectorizer to replace a
3936 computation idiom. ORIG_STMT_IN_PATTERN is a stmt in the
3937 original sequence that computed this idiom. We need to
3938 record a pointer to VEC_STMT in the stmt_info of
3939 ORIG_STMT_IN_PATTERN. See more details in the
3940 documentation of vect_pattern_recog. */
3942 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
3948 if (STMT_VINFO_LIVE_P (stmt_info))
3950 switch (STMT_VINFO_TYPE (stmt_info))
3952 case reduc_vec_info_type:
3953 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
3954 gcc_assert (done);
3955 break;
3957 default:
3958 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
3959 gcc_assert (done);
3963 return is_store;
3967 /* This function builds ni_name = number of iterations loop executes
3968 on the loop preheader. */
3970 static tree
3971 vect_build_loop_niters (loop_vec_info loop_vinfo)
3973 tree ni_name, stmt, var;
3974 edge pe;
3975 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3976 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
3978 var = create_tmp_var (TREE_TYPE (ni), "niters");
3979 add_referenced_var (var);
3980 ni_name = force_gimple_operand (ni, &stmt, false, var);
3982 pe = loop_preheader_edge (loop);
3983 if (stmt)
3985 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3986 gcc_assert (!new_bb);
3989 return ni_name;
3993 /* This function generates the following statements:
3995 ni_name = number of iterations loop executes
3996 ratio = ni_name / vf
3997 ratio_mult_vf_name = ratio * vf
3999 and places them at the loop preheader edge. */
4001 static void
4002 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
4003 tree *ni_name_ptr,
4004 tree *ratio_mult_vf_name_ptr,
4005 tree *ratio_name_ptr)
4008 edge pe;
4009 basic_block new_bb;
4010 tree stmt, ni_name;
4011 tree var;
4012 tree ratio_name;
4013 tree ratio_mult_vf_name;
4014 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4015 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
4016 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4017 tree log_vf;
4019 pe = loop_preheader_edge (loop);
4021 /* Generate temporary variable that contains
4022 number of iterations loop executes. */
4024 ni_name = vect_build_loop_niters (loop_vinfo);
4025 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
4027 /* Create: ratio = ni >> log2(vf) */
4029 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
4030 if (!is_gimple_val (ratio_name))
4032 var = create_tmp_var (TREE_TYPE (ni), "bnd");
4033 add_referenced_var (var);
4035 ratio_name = force_gimple_operand (ratio_name, &stmt, true, var);
4036 pe = loop_preheader_edge (loop);
4037 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
4038 gcc_assert (!new_bb);
4041 /* Create: ratio_mult_vf = ratio << log2 (vf). */
4043 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
4044 ratio_name, log_vf);
4045 if (!is_gimple_val (ratio_mult_vf_name))
4047 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
4048 add_referenced_var (var);
4050 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmt,
4051 true, var);
4052 pe = loop_preheader_edge (loop);
4053 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
4054 gcc_assert (!new_bb);
4057 *ni_name_ptr = ni_name;
4058 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
4059 *ratio_name_ptr = ratio_name;
4061 return;
4065 /* Function update_vuses_to_preheader.
4067 Input:
4068 STMT - a statement with potential VUSEs.
4069 LOOP - the loop whose preheader will contain STMT.
4071 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
4072 appears to be defined in a VDEF in another statement in a loop.
4073 One such case is when the VUSE is at the dereference of a __restricted__
4074 pointer in a load and the VDEF is at the dereference of a different
4075 __restricted__ pointer in a store. Vectorization may result in
4076 copy_virtual_uses being called to copy the problematic VUSE to a new
4077 statement that is being inserted in the loop preheader. This procedure
4078 is called to change the SSA_NAME in the new statement's VUSE from the
4079 SSA_NAME updated in the loop to the related SSA_NAME available on the
4080 path entering the loop.
4082 When this function is called, we have the following situation:
4084 # vuse <name1>
4085 S1: vload
4086 do {
4087 # name1 = phi < name0 , name2>
4089 # vuse <name1>
4090 S2: vload
4092 # name2 = vdef <name1>
4093 S3: vstore
4095 }while...
4097 Stmt S1 was created in the loop preheader block as part of misaligned-load
4098 handling. This function fixes the name of the vuse of S1 from 'name1' to
4099 'name0'. */
4101 static void
4102 update_vuses_to_preheader (tree stmt, struct loop *loop)
4104 basic_block header_bb = loop->header;
4105 edge preheader_e = loop_preheader_edge (loop);
4106 ssa_op_iter iter;
4107 use_operand_p use_p;
4109 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
4111 tree ssa_name = USE_FROM_PTR (use_p);
4112 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
4113 tree name_var = SSA_NAME_VAR (ssa_name);
4114 basic_block bb = bb_for_stmt (def_stmt);
4116 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
4117 if (!IS_EMPTY_STMT (def_stmt)
4118 && flow_bb_inside_loop_p (loop, bb))
4120 /* If the block containing the statement defining the SSA_NAME
4121 is in the loop then it's necessary to find the definition
4122 outside the loop using the PHI nodes of the header. */
4123 tree phi;
4124 bool updated = false;
4126 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
4128 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
4130 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
4131 updated = true;
4132 break;
4135 gcc_assert (updated);
4141 /* Function vect_update_ivs_after_vectorizer.
4143 "Advance" the induction variables of LOOP to the value they should take
4144 after the execution of LOOP. This is currently necessary because the
4145 vectorizer does not handle induction variables that are used after the
4146 loop. Such a situation occurs when the last iterations of LOOP are
4147 peeled, because:
4148 1. We introduced new uses after LOOP for IVs that were not originally used
4149 after LOOP: the IVs of LOOP are now used by an epilog loop.
4150 2. LOOP is going to be vectorized; this means that it will iterate N/VF
4151 times, whereas the loop IVs should be bumped N times.
4153 Input:
4154 - LOOP - a loop that is going to be vectorized. The last few iterations
4155 of LOOP were peeled.
4156 - NITERS - the number of iterations that LOOP executes (before it is
4157 vectorized). i.e, the number of times the ivs should be bumped.
4158 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
4159 coming out from LOOP on which there are uses of the LOOP ivs
4160 (this is the path from LOOP->exit to epilog_loop->preheader).
4162 The new definitions of the ivs are placed in LOOP->exit.
4163 The phi args associated with the edge UPDATE_E in the bb
4164 UPDATE_E->dest are updated accordingly.
4166 Assumption 1: Like the rest of the vectorizer, this function assumes
4167 a single loop exit that has a single predecessor.
4169 Assumption 2: The phi nodes in the LOOP header and in update_bb are
4170 organized in the same order.
4172 Assumption 3: The access function of the ivs is simple enough (see
4173 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
4175 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
4176 coming out of LOOP on which the ivs of LOOP are used (this is the path
4177 that leads to the epilog loop; other paths skip the epilog loop). This
4178 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
4179 needs to have its phis updated.
4182 static void
4183 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
4184 edge update_e)
4186 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4187 basic_block exit_bb = single_exit (loop)->dest;
4188 tree phi, phi1;
4189 basic_block update_bb = update_e->dest;
4191 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
4193 /* Make sure there exists a single-predecessor exit bb: */
4194 gcc_assert (single_pred_p (exit_bb));
4196 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
4197 phi && phi1;
4198 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
4200 tree access_fn = NULL;
4201 tree evolution_part;
4202 tree init_expr;
4203 tree step_expr;
4204 tree var, stmt, ni, ni_name;
4205 block_stmt_iterator last_bsi;
4207 if (vect_print_dump_info (REPORT_DETAILS))
4209 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
4210 print_generic_expr (vect_dump, phi, TDF_SLIM);
4213 /* Skip virtual phi's. */
4214 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
4216 if (vect_print_dump_info (REPORT_DETAILS))
4217 fprintf (vect_dump, "virtual phi. skip.");
4218 continue;
4221 /* Skip reduction phis. */
4222 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
4224 if (vect_print_dump_info (REPORT_DETAILS))
4225 fprintf (vect_dump, "reduc phi. skip.");
4226 continue;
4229 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
4230 gcc_assert (access_fn);
4231 evolution_part =
4232 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
4233 gcc_assert (evolution_part != NULL_TREE);
4235 /* FORNOW: We do not support IVs whose evolution function is a polynomial
4236 of degree >= 2 or exponential. */
4237 gcc_assert (!tree_is_chrec (evolution_part));
4239 step_expr = evolution_part;
4240 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
4241 loop->num));
4243 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
4244 fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
4245 fold_convert (TREE_TYPE (init_expr),
4246 niters),
4247 step_expr),
4248 init_expr);
4250 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
4251 add_referenced_var (var);
4253 ni_name = force_gimple_operand (ni, &stmt, false, var);
4255 /* Insert stmt into exit_bb. */
4256 last_bsi = bsi_last (exit_bb);
4257 if (stmt)
4258 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
4260 /* Fix phi expressions in the successor bb. */
4261 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
4266 /* Function vect_do_peeling_for_loop_bound
4268 Peel the last iterations of the loop represented by LOOP_VINFO.
4269 The peeled iterations form a new epilog loop. Given that the loop now
4270 iterates NITERS times, the new epilog loop iterates
4271 NITERS % VECTORIZATION_FACTOR times.
4273 The original loop will later be made to iterate
4274 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
4276 static void
4277 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
4279 tree ni_name, ratio_mult_vf_name;
4280 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4281 struct loop *new_loop;
4282 edge update_e;
4283 basic_block preheader;
4284 int loop_num;
4285 unsigned int th;
4287 if (vect_print_dump_info (REPORT_DETAILS))
4288 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
4290 initialize_original_copy_tables ();
4292 /* Generate the following variables on the preheader of original loop:
4294 ni_name = number of iteration the original loop executes
4295 ratio = ni_name / vf
4296 ratio_mult_vf_name = ratio * vf */
4297 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
4298 &ratio_mult_vf_name, ratio);
4300 loop_num = loop->num;
4301 /* Threshold for vectorized loop. */
4302 th = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)) *
4303 LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4304 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
4305 ratio_mult_vf_name, ni_name, false, th);
4306 gcc_assert (new_loop);
4307 gcc_assert (loop_num == loop->num);
4308 #ifdef ENABLE_CHECKING
4309 slpeel_verify_cfg_after_peeling (loop, new_loop);
4310 #endif
4312 /* A guard that controls whether the new_loop is to be executed or skipped
4313 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
4314 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
4315 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
4316 is on the path where the LOOP IVs are used and need to be updated. */
4318 preheader = loop_preheader_edge (new_loop)->src;
4319 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
4320 update_e = EDGE_PRED (preheader, 0);
4321 else
4322 update_e = EDGE_PRED (preheader, 1);
4324 /* Update IVs of original loop as if they were advanced
4325 by ratio_mult_vf_name steps. */
4326 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
4328 /* After peeling we have to reset scalar evolution analyzer. */
4329 scev_reset ();
4331 free_original_copy_tables ();
4335 /* Function vect_gen_niters_for_prolog_loop
4337 Set the number of iterations for the loop represented by LOOP_VINFO
4338 to the minimum between LOOP_NITERS (the original iteration count of the loop)
4339 and the misalignment of DR - the data reference recorded in
4340 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
4341 this loop, the data reference DR will refer to an aligned location.
4343 The following computation is generated:
4345 If the misalignment of DR is known at compile time:
4346 addr_mis = int mis = DR_MISALIGNMENT (dr);
4347 Else, compute address misalignment in bytes:
4348 addr_mis = addr & (vectype_size - 1)
4350 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
4352 (elem_size = element type size; an element is the scalar element
4353 whose type is the inner type of the vectype)
4355 For interleaving,
4357 prolog_niters = min ( LOOP_NITERS ,
4358 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
4359 where group_size is the size of the interleaved group.
4362 static tree
4363 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
4365 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
4366 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4367 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4368 tree var, stmt;
4369 tree iters, iters_name;
4370 edge pe;
4371 basic_block new_bb;
4372 tree dr_stmt = DR_STMT (dr);
4373 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
4374 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4375 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
4376 tree niters_type = TREE_TYPE (loop_niters);
4377 int group_size = 1;
4378 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
4380 if (DR_GROUP_FIRST_DR (stmt_info))
4382 /* For interleaved access element size must be multiplied by the size of
4383 the interleaved group. */
4384 group_size = DR_GROUP_SIZE (vinfo_for_stmt (
4385 DR_GROUP_FIRST_DR (stmt_info)));
4386 element_size *= group_size;
4389 pe = loop_preheader_edge (loop);
4391 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
4393 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
4394 int elem_misalign = byte_misalign / element_size;
4396 if (vect_print_dump_info (REPORT_DETAILS))
4397 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
4398 iters = build_int_cst (niters_type,
4399 (vf - elem_misalign)&(vf/group_size-1));
4401 else
4403 tree new_stmts = NULL_TREE;
4404 tree start_addr =
4405 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
4406 tree ptr_type = TREE_TYPE (start_addr);
4407 tree size = TYPE_SIZE (ptr_type);
4408 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
4409 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
4410 tree elem_size_log =
4411 build_int_cst (type, exact_log2 (vectype_align/vf));
4412 tree vf_minus_1 = build_int_cst (type, vf - 1);
4413 tree vf_tree = build_int_cst (type, vf);
4414 tree byte_misalign;
4415 tree elem_misalign;
4417 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
4418 gcc_assert (!new_bb);
4420 /* Create: byte_misalign = addr & (vectype_size - 1) */
4421 byte_misalign =
4422 fold_build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
4424 /* Create: elem_misalign = byte_misalign / element_size */
4425 elem_misalign =
4426 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
4428 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
4429 iters = fold_build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
4430 iters = fold_build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
4431 iters = fold_convert (niters_type, iters);
4434 /* Create: prolog_loop_niters = min (iters, loop_niters) */
4435 /* If the loop bound is known at compile time we already verified that it is
4436 greater than vf; since the misalignment ('iters') is at most vf, there's
4437 no need to generate the MIN_EXPR in this case. */
4438 if (TREE_CODE (loop_niters) != INTEGER_CST)
4439 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
4441 if (vect_print_dump_info (REPORT_DETAILS))
4443 fprintf (vect_dump, "niters for prolog loop: ");
4444 print_generic_expr (vect_dump, iters, TDF_SLIM);
4447 var = create_tmp_var (niters_type, "prolog_loop_niters");
4448 add_referenced_var (var);
4449 iters_name = force_gimple_operand (iters, &stmt, false, var);
4451 /* Insert stmt on loop preheader edge. */
4452 if (stmt)
4454 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
4455 gcc_assert (!new_bb);
4458 return iters_name;
4462 /* Function vect_update_init_of_dr
4464 NITERS iterations were peeled from LOOP. DR represents a data reference
4465 in LOOP. This function updates the information recorded in DR to
4466 account for the fact that the first NITERS iterations had already been
4467 executed. Specifically, it updates the OFFSET field of DR. */
4469 static void
4470 vect_update_init_of_dr (struct data_reference *dr, tree niters)
4472 tree offset = DR_OFFSET (dr);
4474 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
4475 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
4476 DR_OFFSET (dr) = offset;
4480 /* Function vect_update_inits_of_drs
4482 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
4483 This function updates the information recorded for the data references in
4484 the loop to account for the fact that the first NITERS iterations had
4485 already been executed. Specifically, it updates the initial_condition of the
4486 access_function of all the data_references in the loop. */
4488 static void
4489 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
4491 unsigned int i;
4492 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
4493 struct data_reference *dr;
4495 if (vect_dump && (dump_flags & TDF_DETAILS))
4496 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
4498 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4499 vect_update_init_of_dr (dr, niters);
4503 /* Function vect_do_peeling_for_alignment
4505 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
4506 'niters' is set to the misalignment of one of the data references in the
4507 loop, thereby forcing it to refer to an aligned location at the beginning
4508 of the execution of this loop. The data reference for which we are
4509 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
4511 static void
4512 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
4514 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4515 tree niters_of_prolog_loop, ni_name;
4516 tree n_iters;
4517 struct loop *new_loop;
4519 if (vect_print_dump_info (REPORT_DETAILS))
4520 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
4522 initialize_original_copy_tables ();
4524 ni_name = vect_build_loop_niters (loop_vinfo);
4525 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
4527 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
4528 new_loop =
4529 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
4530 niters_of_prolog_loop, ni_name, true, 0);
4531 gcc_assert (new_loop);
4532 #ifdef ENABLE_CHECKING
4533 slpeel_verify_cfg_after_peeling (new_loop, loop);
4534 #endif
4536 /* Update number of times loop executes. */
4537 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
4538 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
4539 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
4541 /* Update the init conditions of the access functions of all data refs. */
4542 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
4544 /* After peeling we have to reset scalar evolution analyzer. */
4545 scev_reset ();
4547 free_original_copy_tables ();
4551 /* Function vect_create_cond_for_align_checks.
4553 Create a conditional expression that represents the alignment checks for
4554 all of data references (array element references) whose alignment must be
4555 checked at runtime.
4557 Input:
4558 LOOP_VINFO - two fields of the loop information are used.
4559 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
4560 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
4562 Output:
4563 COND_EXPR_STMT_LIST - statements needed to construct the conditional
4564 expression.
4565 The returned value is the conditional expression to be used in the if
4566 statement that controls which version of the loop gets executed at runtime.
4568 The algorithm makes two assumptions:
4569 1) The number of bytes "n" in a vector is a power of 2.
4570 2) An address "a" is aligned if a%n is zero and that this
4571 test can be done as a&(n-1) == 0. For example, for 16
4572 byte vectors the test is a&0xf == 0. */
4574 static tree
4575 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
4576 tree *cond_expr_stmt_list)
4578 VEC(tree,heap) *may_misalign_stmts
4579 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
4580 tree ref_stmt;
4581 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
4582 tree mask_cst;
4583 unsigned int i;
4584 tree psize;
4585 tree int_ptrsize_type;
4586 char tmp_name[20];
4587 tree or_tmp_name = NULL_TREE;
4588 tree and_tmp, and_tmp_name, and_stmt;
4589 tree ptrsize_zero;
4591 /* Check that mask is one less than a power of 2, i.e., mask is
4592 all zeros followed by all ones. */
4593 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
4595 /* CHECKME: what is the best integer or unsigned type to use to hold a
4596 cast from a pointer value? */
4597 psize = TYPE_SIZE (ptr_type_node);
4598 int_ptrsize_type
4599 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
4601 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
4602 of the first vector of the i'th data reference. */
4604 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
4606 tree new_stmt_list = NULL_TREE;
4607 tree addr_base;
4608 tree addr_tmp, addr_tmp_name, addr_stmt;
4609 tree or_tmp, new_or_tmp_name, or_stmt;
4611 /* create: addr_tmp = (int)(address_of_first_vector) */
4612 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
4613 &new_stmt_list,
4614 NULL_TREE);
4616 if (new_stmt_list != NULL_TREE)
4617 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
4619 sprintf (tmp_name, "%s%d", "addr2int", i);
4620 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
4621 add_referenced_var (addr_tmp);
4622 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
4623 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
4624 addr_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
4625 addr_tmp_name, addr_stmt);
4626 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
4627 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
4629 /* The addresses are OR together. */
4631 if (or_tmp_name != NULL_TREE)
4633 /* create: or_tmp = or_tmp | addr_tmp */
4634 sprintf (tmp_name, "%s%d", "orptrs", i);
4635 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
4636 add_referenced_var (or_tmp);
4637 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
4638 or_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
4639 new_or_tmp_name,
4640 build2 (BIT_IOR_EXPR, int_ptrsize_type,
4641 or_tmp_name,
4642 addr_tmp_name));
4643 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
4644 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
4645 or_tmp_name = new_or_tmp_name;
4647 else
4648 or_tmp_name = addr_tmp_name;
4650 } /* end for i */
4652 mask_cst = build_int_cst (int_ptrsize_type, mask);
4654 /* create: and_tmp = or_tmp & mask */
4655 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
4656 add_referenced_var (and_tmp);
4657 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
4659 and_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
4660 and_tmp_name,
4661 build2 (BIT_AND_EXPR, int_ptrsize_type,
4662 or_tmp_name, mask_cst));
4663 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
4664 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
4666 /* Make and_tmp the left operand of the conditional test against zero.
4667 if and_tmp has a nonzero bit then some address is unaligned. */
4668 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
4669 return build2 (EQ_EXPR, boolean_type_node,
4670 and_tmp_name, ptrsize_zero);
4674 /* Function vect_transform_loop.
4676 The analysis phase has determined that the loop is vectorizable.
4677 Vectorize the loop - created vectorized stmts to replace the scalar
4678 stmts in the loop, and update the loop exit condition. */
4680 void
4681 vect_transform_loop (loop_vec_info loop_vinfo)
4683 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4684 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4685 int nbbs = loop->num_nodes;
4686 block_stmt_iterator si;
4687 int i;
4688 tree ratio = NULL;
4689 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4690 bool strided_store;
4692 if (vect_print_dump_info (REPORT_DETAILS))
4693 fprintf (vect_dump, "=== vec_transform_loop ===");
4695 /* If the loop has data references that may or may not be aligned then
4696 two versions of the loop need to be generated, one which is vectorized
4697 and one which isn't. A test is then generated to control which of the
4698 loops is executed. The test checks for the alignment of all of the
4699 data references that may or may not be aligned. */
4701 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
4703 struct loop *nloop;
4704 tree cond_expr;
4705 tree cond_expr_stmt_list = NULL_TREE;
4706 basic_block condition_bb;
4707 block_stmt_iterator cond_exp_bsi;
4708 basic_block merge_bb;
4709 basic_block new_exit_bb;
4710 edge new_exit_e, e;
4711 tree orig_phi, new_phi, arg;
4712 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
4714 cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
4715 &cond_expr_stmt_list);
4716 initialize_original_copy_tables ();
4717 nloop = loop_version (loop, cond_expr, &condition_bb,
4718 prob, prob, REG_BR_PROB_BASE - prob, true);
4719 free_original_copy_tables();
4721 /** Loop versioning violates an assumption we try to maintain during
4722 vectorization - that the loop exit block has a single predecessor.
4723 After versioning, the exit block of both loop versions is the same
4724 basic block (i.e. it has two predecessors). Just in order to simplify
4725 following transformations in the vectorizer, we fix this situation
4726 here by adding a new (empty) block on the exit-edge of the loop,
4727 with the proper loop-exit phis to maintain loop-closed-form. **/
4729 merge_bb = single_exit (loop)->dest;
4730 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
4731 new_exit_bb = split_edge (single_exit (loop));
4732 new_exit_e = single_exit (loop);
4733 e = EDGE_SUCC (new_exit_bb, 0);
4735 for (orig_phi = phi_nodes (merge_bb); orig_phi;
4736 orig_phi = PHI_CHAIN (orig_phi))
4738 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
4739 new_exit_bb);
4740 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
4741 add_phi_arg (new_phi, arg, new_exit_e);
4742 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
4745 /** end loop-exit-fixes after versioning **/
4747 update_ssa (TODO_update_ssa);
4748 cond_exp_bsi = bsi_last (condition_bb);
4749 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
4752 /* CHECKME: we wouldn't need this if we called update_ssa once
4753 for all loops. */
4754 bitmap_zero (vect_memsyms_to_rename);
4756 /* Peel the loop if there are data refs with unknown alignment.
4757 Only one data ref with unknown store is allowed. */
4759 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
4760 vect_do_peeling_for_alignment (loop_vinfo);
4762 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
4763 compile time constant), or it is a constant that doesn't divide by the
4764 vectorization factor, then an epilog loop needs to be created.
4765 We therefore duplicate the loop: the original loop will be vectorized,
4766 and will compute the first (n/VF) iterations. The second copy of the loop
4767 will remain scalar and will compute the remaining (n%VF) iterations.
4768 (VF is the vectorization factor). */
4770 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4771 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4772 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
4773 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
4774 else
4775 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
4776 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
4778 /* 1) Make sure the loop header has exactly two entries
4779 2) Make sure we have a preheader basic block. */
4781 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
4783 split_edge (loop_preheader_edge (loop));
4785 /* FORNOW: the vectorizer supports only loops which body consist
4786 of one basic block (header + empty latch). When the vectorizer will
4787 support more involved loop forms, the order by which the BBs are
4788 traversed need to be reconsidered. */
4790 for (i = 0; i < nbbs; i++)
4792 basic_block bb = bbs[i];
4794 for (si = bsi_start (bb); !bsi_end_p (si);)
4796 tree stmt = bsi_stmt (si);
4797 stmt_vec_info stmt_info;
4798 bool is_store;
4800 if (vect_print_dump_info (REPORT_DETAILS))
4802 fprintf (vect_dump, "------>vectorizing statement: ");
4803 print_generic_expr (vect_dump, stmt, TDF_SLIM);
4805 stmt_info = vinfo_for_stmt (stmt);
4806 gcc_assert (stmt_info);
4807 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4808 && !STMT_VINFO_LIVE_P (stmt_info))
4810 bsi_next (&si);
4811 continue;
4814 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
4815 != (unsigned HOST_WIDE_INT) vectorization_factor)
4816 && vect_print_dump_info (REPORT_DETAILS))
4817 fprintf (vect_dump, "multiple-types.");
4819 /* -------- vectorize statement ------------ */
4820 if (vect_print_dump_info (REPORT_DETAILS))
4821 fprintf (vect_dump, "transform statement.");
4823 strided_store = false;
4824 is_store = vect_transform_stmt (stmt, &si, &strided_store);
4825 if (is_store)
4827 stmt_ann_t ann;
4828 if (DR_GROUP_FIRST_DR (stmt_info))
4830 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
4831 interleaving chain was completed - free all the stores in
4832 the chain. */
4833 tree next = DR_GROUP_FIRST_DR (stmt_info);
4834 tree tmp;
4835 stmt_vec_info next_stmt_info;
4837 while (next)
4839 next_stmt_info = vinfo_for_stmt (next);
4840 /* Free the attached stmt_vec_info and remove the stmt. */
4841 ann = stmt_ann (next);
4842 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
4843 free (next_stmt_info);
4844 set_stmt_info (ann, NULL);
4845 next = tmp;
4847 bsi_remove (&si, true);
4848 continue;
4850 else
4852 /* Free the attached stmt_vec_info and remove the stmt. */
4853 ann = stmt_ann (stmt);
4854 free (stmt_info);
4855 set_stmt_info (ann, NULL);
4856 bsi_remove (&si, true);
4857 continue;
4860 else
4862 if (strided_store)
4864 /* This is case of skipped interleaved store. We don't free
4865 its stmt_vec_info. */
4866 bsi_remove (&si, true);
4867 continue;
4870 bsi_next (&si);
4871 } /* stmts in BB */
4872 } /* BBs in loop */
4874 slpeel_make_loop_iterate_ntimes (loop, ratio);
4876 mark_set_for_renaming (vect_memsyms_to_rename);
4878 /* The memory tags and pointers in vectorized statements need to
4879 have their SSA forms updated. FIXME, why can't this be delayed
4880 until all the loops have been transformed? */
4881 update_ssa (TODO_update_ssa);
4883 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
4884 fprintf (vect_dump, "LOOP VECTORIZED.");