* testsuite/libmudflap.c/externs-1.c (main): Add return statement.
[official-gcc.git] / gcc / tree-vect-transform.c
blob2b4d1d774af4f381f6b1023bfaeb08db6edfed08
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "tree-data-ref.h"
39 #include "tree-chrec.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
43 #include "tree-pass.h"
44 #include "toplev.h"
45 #include "real.h"
47 /* Utility functions for the code transformation. */
48 static bool vect_transform_stmt (tree, block_stmt_iterator *);
49 static void vect_align_data_ref (tree);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, bool);
53 static tree vect_create_index_for_vector_ref (loop_vec_info);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void update_vuses_to_preheader (tree, struct loop*);
62 static tree get_initial_def_for_reduction (tree, tree, tree *);
64 /* Utility function dealing with loop peeling (not peeling itself). */
65 static void vect_generate_tmps_on_preheader
66 (loop_vec_info, tree *, tree *, tree *);
67 static tree vect_build_loop_niters (loop_vec_info);
68 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
70 static void vect_update_init_of_dr (struct data_reference *, tree niters);
71 static void vect_update_inits_of_drs (loop_vec_info, tree);
72 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
73 static void vect_do_peeling_for_loop_bound
74 (loop_vec_info, tree *, struct loops *);
77 /* Function vect_get_new_vect_var.
79 Returns a name for a new variable. The current naming scheme appends the
80 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
81 the name of vectorizer generated variables, and appends that to NAME if
82 provided. */
84 static tree
85 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
87 const char *prefix;
88 tree new_vect_var;
90 switch (var_kind)
92 case vect_simple_var:
93 prefix = "vect_";
94 break;
95 case vect_scalar_var:
96 prefix = "stmp_";
97 break;
98 case vect_pointer_var:
99 prefix = "vect_p";
100 break;
101 default:
102 gcc_unreachable ();
105 if (name)
106 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
107 else
108 new_vect_var = create_tmp_var (type, prefix);
110 return new_vect_var;
114 /* Function vect_create_index_for_vector_ref.
116 Create (and return) an index variable, along with it's update chain in the
117 loop. This variable will be used to access a memory location in a vector
118 operation.
120 Input:
121 LOOP: The loop being vectorized.
122 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
123 function can be added here, or in the loop pre-header.
125 Output:
126 Return an index that will be used to index a vector array. It is expected
127 that a pointer to the first vector will be used as the base address for the
128 indexed reference.
130 FORNOW: we are not trying to be efficient, just creating a new index each
131 time from scratch. At this time all vector references could use the same
132 index.
134 TODO: create only one index to be used by all vector references. Record
135 the index in the LOOP_VINFO the first time this procedure is called and
136 return it on subsequent calls. The increment of this index must be placed
137 just before the conditional expression that ends the single block loop. */
139 static tree
140 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
142 tree init, step;
143 block_stmt_iterator incr_bsi;
144 bool insert_after;
145 tree indx_before_incr, indx_after_incr;
146 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
147 tree incr;
149 /* It is assumed that the base pointer used for vectorized access contains
150 the address of the first vector. Therefore the index used for vectorized
151 access must be initialized to zero and incremented by 1. */
153 init = integer_zero_node;
154 step = integer_one_node;
156 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
157 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
158 &indx_before_incr, &indx_after_incr);
159 incr = bsi_stmt (incr_bsi);
160 set_stmt_info ((tree_ann_t)stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
162 return indx_before_incr;
166 /* Function vect_create_addr_base_for_vector_ref.
168 Create an expression that computes the address of the first memory location
169 that will be accessed for a data reference.
171 Input:
172 STMT: The statement containing the data reference.
173 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
174 OFFSET: Optional. If supplied, it is be added to the initial address.
176 Output:
177 1. Return an SSA_NAME whose value is the address of the memory location of
178 the first vector of the data reference.
179 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
180 these statement(s) which define the returned SSA_NAME.
182 FORNOW: We are only handling array accesses with step 1. */
184 static tree
185 vect_create_addr_base_for_vector_ref (tree stmt,
186 tree *new_stmt_list,
187 tree offset)
189 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
190 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
191 tree data_ref_base =
192 unshare_expr (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
193 tree base_name = build_fold_indirect_ref (data_ref_base);
194 tree ref = DR_REF (dr);
195 tree scalar_type = TREE_TYPE (ref);
196 tree scalar_ptr_type = build_pointer_type (scalar_type);
197 tree vec_stmt;
198 tree new_temp;
199 tree addr_base, addr_expr;
200 tree dest, new_stmt;
201 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
203 /* Create base_offset */
204 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
205 add_referenced_tmp_var (dest);
206 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
207 append_to_statement_list_force (new_stmt, new_stmt_list);
209 if (offset)
211 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
212 add_referenced_tmp_var (tmp);
213 offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
214 STMT_VINFO_VECT_STEP (stmt_info)));
215 base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset),
216 base_offset, offset));
217 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
218 append_to_statement_list_force (new_stmt, new_stmt_list);
221 /* base + base_offset */
222 addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
223 base_offset));
225 /* addr_expr = addr_base */
226 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
227 get_name (base_name));
228 add_referenced_tmp_var (addr_expr);
229 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
230 new_temp = make_ssa_name (addr_expr, vec_stmt);
231 TREE_OPERAND (vec_stmt, 0) = new_temp;
232 append_to_statement_list_force (vec_stmt, new_stmt_list);
234 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
236 fprintf (vect_dump, "created ");
237 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
239 return new_temp;
243 /* Function vect_align_data_ref.
245 Handle misalignment of a memory accesses.
247 FORNOW: Can't handle misaligned accesses.
248 Make sure that the dataref is aligned. */
250 static void
251 vect_align_data_ref (tree stmt)
253 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
254 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
256 /* FORNOW: can't handle misaligned accesses;
257 all accesses expected to be aligned. */
258 gcc_assert (aligned_access_p (dr));
262 /* Function vect_create_data_ref_ptr.
264 Create a memory reference expression for vector access, to be used in a
265 vector load/store stmt. The reference is based on a new pointer to vector
266 type (vp).
268 Input:
269 1. STMT: a stmt that references memory. Expected to be of the form
270 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
271 2. BSI: block_stmt_iterator where new stmts can be added.
272 3. OFFSET (optional): an offset to be added to the initial address accessed
273 by the data-ref in STMT.
274 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
275 pointing to the initial address.
277 Output:
278 1. Declare a new ptr to vector_type, and have it point to the base of the
279 data reference (initial addressed accessed by the data reference).
280 For example, for vector of type V8HI, the following code is generated:
282 v8hi *vp;
283 vp = (v8hi *)initial_address;
285 if OFFSET is not supplied:
286 initial_address = &a[init];
287 if OFFSET is supplied:
288 initial_address = &a[init + OFFSET];
290 Return the initial_address in INITIAL_ADDRESS.
292 2. Create a data-reference in the loop based on the new vector pointer vp,
293 and using a new index variable 'idx' as follows:
295 vp' = vp + update
297 where if ONLY_INIT is true:
298 update = zero
299 and otherwise
300 update = idx + vector_type_size
302 Return the pointer vp'.
305 FORNOW: handle only aligned and consecutive accesses. */
307 static tree
308 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
309 tree *initial_address, bool only_init)
311 tree base_name;
312 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
313 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
314 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
315 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
316 tree vect_ptr_type;
317 tree vect_ptr;
318 tree tag;
319 tree new_temp;
320 tree vec_stmt;
321 tree new_stmt_list = NULL_TREE;
322 tree idx;
323 edge pe = loop_preheader_edge (loop);
324 basic_block new_bb;
325 tree vect_ptr_init;
326 tree vectype_size;
327 tree ptr_update;
328 tree data_ref_ptr;
329 tree type, tmp, size;
331 base_name = build_fold_indirect_ref (unshare_expr (
332 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info)));
334 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
336 tree data_ref_base = base_name;
337 fprintf (vect_dump, "create array_ref of type: ");
338 print_generic_expr (vect_dump, vectype, TDF_SLIM);
339 if (TREE_CODE (data_ref_base) == VAR_DECL)
340 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
341 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
342 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
343 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
344 fprintf (vect_dump, " vectorizing a record based array ref: ");
345 else if (TREE_CODE (data_ref_base) == SSA_NAME)
346 fprintf (vect_dump, " vectorizing a pointer ref: ");
347 print_generic_expr (vect_dump, base_name, TDF_SLIM);
350 /** (1) Create the new vector-pointer variable: **/
352 vect_ptr_type = build_pointer_type (vectype);
353 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
354 get_name (base_name));
355 add_referenced_tmp_var (vect_ptr);
358 /** (2) Add aliasing information to the new vector-pointer:
359 (The points-to info (SSA_NAME_PTR_INFO) may be defined later.) **/
361 tag = STMT_VINFO_MEMTAG (stmt_info);
362 gcc_assert (tag);
364 /* If tag is a variable (and NOT_A_TAG) than a new type alias
365 tag must be created with tag added to its may alias list. */
366 if (var_ann (tag)->mem_tag_kind == NOT_A_TAG)
367 new_type_alias (vect_ptr, tag);
368 else
369 var_ann (vect_ptr)->type_mem_tag = tag;
371 var_ann (vect_ptr)->subvars = STMT_VINFO_SUBVARS (stmt_info);
373 /** (3) Calculate the initial address the vector-pointer, and set
374 the vector-pointer to point to it before the loop: **/
376 /* Create: (&(base[init_val+offset]) in the loop preheader. */
377 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
378 offset);
379 pe = loop_preheader_edge (loop);
380 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
381 gcc_assert (!new_bb);
382 *initial_address = new_temp;
384 /* Create: p = (vectype *) initial_base */
385 vec_stmt = fold_convert (vect_ptr_type, new_temp);
386 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
387 new_temp = make_ssa_name (vect_ptr, vec_stmt);
388 TREE_OPERAND (vec_stmt, 0) = new_temp;
389 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
390 gcc_assert (!new_bb);
391 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
394 /** (4) Handle the updating of the vector-pointer inside the loop: **/
396 if (only_init) /* No update in loop is required. */
398 /* Copy the points-to information if it exists. */
399 if (STMT_VINFO_PTR_INFO (stmt_info))
400 duplicate_ssa_name_ptr_info (vect_ptr_init,
401 STMT_VINFO_PTR_INFO (stmt_info));
402 return vect_ptr_init;
405 idx = vect_create_index_for_vector_ref (loop_vinfo);
407 /* Create: update = idx * vectype_size */
408 tmp = create_tmp_var (integer_type_node, "update");
409 add_referenced_tmp_var (tmp);
410 size = TYPE_SIZE (vect_ptr_type);
411 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
412 ptr_update = create_tmp_var (type, "update");
413 add_referenced_tmp_var (ptr_update);
414 vectype_size = TYPE_SIZE_UNIT (vectype);
415 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
416 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
417 new_temp = make_ssa_name (tmp, vec_stmt);
418 TREE_OPERAND (vec_stmt, 0) = new_temp;
419 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
420 vec_stmt = fold_convert (type, new_temp);
421 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
422 new_temp = make_ssa_name (ptr_update, vec_stmt);
423 TREE_OPERAND (vec_stmt, 0) = new_temp;
424 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
426 /* Create: data_ref_ptr = vect_ptr_init + update */
427 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
428 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
429 new_temp = make_ssa_name (vect_ptr, vec_stmt);
430 TREE_OPERAND (vec_stmt, 0) = new_temp;
431 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
432 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
434 /* Copy the points-to information if it exists. */
435 if (STMT_VINFO_PTR_INFO (stmt_info))
436 duplicate_ssa_name_ptr_info (data_ref_ptr, STMT_VINFO_PTR_INFO (stmt_info));
437 return data_ref_ptr;
441 /* Function vect_create_destination_var.
443 Create a new temporary of type VECTYPE. */
445 static tree
446 vect_create_destination_var (tree scalar_dest, tree vectype)
448 tree vec_dest;
449 const char *new_name;
450 tree type;
451 enum vect_var_kind kind;
453 kind = vectype ? vect_simple_var : vect_scalar_var;
454 type = vectype ? vectype : TREE_TYPE (scalar_dest);
456 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
458 new_name = get_name (scalar_dest);
459 if (!new_name)
460 new_name = "var_";
461 vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
462 add_referenced_tmp_var (vec_dest);
464 return vec_dest;
468 /* Function vect_init_vector.
470 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
471 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
472 used in the vectorization of STMT. */
474 static tree
475 vect_init_vector (tree stmt, tree vector_var)
477 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
478 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
479 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
480 tree new_var;
481 tree init_stmt;
482 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
483 tree vec_oprnd;
484 edge pe;
485 tree new_temp;
486 basic_block new_bb;
488 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
489 add_referenced_tmp_var (new_var);
491 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
492 new_temp = make_ssa_name (new_var, init_stmt);
493 TREE_OPERAND (init_stmt, 0) = new_temp;
495 pe = loop_preheader_edge (loop);
496 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
497 gcc_assert (!new_bb);
499 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
501 fprintf (vect_dump, "created new init_stmt: ");
502 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
505 vec_oprnd = TREE_OPERAND (init_stmt, 0);
506 return vec_oprnd;
510 /* Function vect_get_vec_def_for_operand.
512 OP is an operand in STMT. This function returns a (vector) def that will be
513 used in the vectorized stmt for STMT.
515 In the case that OP is an SSA_NAME which is defined in the loop, then
516 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
518 In case OP is an invariant or constant, a new stmt that creates a vector def
519 needs to be introduced. */
521 static tree
522 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
524 tree vec_oprnd;
525 tree vec_stmt;
526 tree def_stmt;
527 stmt_vec_info def_stmt_info = NULL;
528 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
529 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
530 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
531 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
532 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
533 tree vec_inv;
534 tree vec_cst;
535 tree t = NULL_TREE;
536 tree def;
537 int i;
538 enum vect_def_type dt;
539 bool is_simple_use;
541 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
543 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
544 print_generic_expr (vect_dump, op, TDF_SLIM);
547 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
548 gcc_assert (is_simple_use);
549 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
551 if (def)
553 fprintf (vect_dump, "def = ");
554 print_generic_expr (vect_dump, def, TDF_SLIM);
556 if (def_stmt)
558 fprintf (vect_dump, " def_stmt = ");
559 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
563 switch (dt)
565 /* Case 1: operand is a constant. */
566 case vect_constant_def:
568 if (scalar_def)
569 *scalar_def = op;
571 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
572 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
573 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
575 for (i = nunits - 1; i >= 0; --i)
577 t = tree_cons (NULL_TREE, op, t);
579 vec_cst = build_vector (vectype, t);
580 return vect_init_vector (stmt, vec_cst);
583 /* Case 2: operand is defined outside the loop - loop invariant. */
584 case vect_invariant_def:
586 if (scalar_def)
587 *scalar_def = def;
589 /* Create 'vec_inv = {inv,inv,..,inv}' */
590 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
591 fprintf (vect_dump, "Create vector_inv.");
593 for (i = nunits - 1; i >= 0; --i)
595 t = tree_cons (NULL_TREE, def, t);
598 vec_inv = build_constructor (vectype, t);
599 return vect_init_vector (stmt, vec_inv);
602 /* Case 3: operand is defined inside the loop. */
603 case vect_loop_def:
605 if (scalar_def)
606 *scalar_def = def_stmt;
608 /* Get the def from the vectorized stmt. */
609 def_stmt_info = vinfo_for_stmt (def_stmt);
610 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
611 gcc_assert (vec_stmt);
612 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
613 return vec_oprnd;
616 /* Case 4: operand is defined by a loop header phi - reduction */
617 case vect_reduction_def:
619 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
621 /* Get the def before the loop */
622 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
623 return get_initial_def_for_reduction (stmt, op, scalar_def);
626 /* Case 5: operand is defined by loop-header phi - induction. */
627 case vect_induction_def:
629 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
630 fprintf (vect_dump, "induction - unsupported.");
631 internal_error ("no support for induction"); /* FORNOW */
634 default:
635 gcc_unreachable ();
640 /* Function vect_finish_stmt_generation.
642 Insert a new stmt. */
644 static void
645 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
647 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
649 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
651 fprintf (vect_dump, "add new stmt: ");
652 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
655 /* Make sure bsi points to the stmt that is being vectorized. */
656 gcc_assert (stmt == bsi_stmt (*bsi));
658 #ifdef USE_MAPPED_LOCATION
659 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
660 #else
661 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
662 #endif
666 #define ADJUST_IN_EPILOG 1
668 /* Function get_initial_def_for_reduction
670 Input:
671 STMT - a stmt that performs a reduction operation in the loop.
672 INIT_VAL - the initial value of the reduction variable
674 Output:
675 SCALAR_DEF - a tree that holds a value to be added to the final result
676 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
677 Return a vector variable, initialized according to the operation that STMT
678 performs. This vector will be used as the initial value of the
679 vector of partial results.
681 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
682 add: [0,0,...,0,0]
683 mult: [1,1,...,1,1]
684 min/max: [init_val,init_val,..,init_val,init_val]
685 bit and/or: [init_val,init_val,..,init_val,init_val]
686 and when necessary (e.g. add/mult case) let the caller know
687 that it needs to adjust the result by init_val.
689 Option2: Initialize the vector as follows:
690 add: [0,0,...,0,init_val]
691 mult: [1,1,...,1,init_val]
692 min/max: [init_val,init_val,...,init_val]
693 bit and/or: [init_val,init_val,...,init_val]
694 and no adjustments are needed.
696 For example, for the following code:
698 s = init_val;
699 for (i=0;i<n;i++)
700 s = s + a[i];
702 STMT is 's = s + a[i]', and the reduction variable is 's'.
703 For a vector of 4 units, we want to return either [0,0,0,init_val],
704 or [0,0,0,0] and let the caller know that it needs to adjust
705 the result at the end by 'init_val'.
707 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
708 TODO: Use some cost-model to estimate which scheme is more profitable.
711 static tree
712 get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
714 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
715 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
716 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
717 int nelements;
718 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
719 tree type = TREE_TYPE (init_val);
720 tree def;
721 tree vec, t = NULL_TREE;
722 bool need_epilog_adjust;
723 int i;
725 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
727 switch (code)
729 case PLUS_EXPR:
730 def = INTEGRAL_TYPE_P (type) ? integer_zero_node :
731 build_real (type, dconst0);
732 #ifdef ADJUST_IN_EPILOG
733 /* All the 'nunits' elements are set to 0. The final result will be
734 adjusted by 'init_val' at the loop epilog. */
735 nelements = nunits;
736 need_epilog_adjust = true;
737 #else
738 /* 'nunits - 1' elements are set to 0; The last element is set to
739 'init_val'. No further adjustments at the epilog are needed. */
740 nelements = nunits - 1;
741 need_epilog_adjust = false;
742 #endif
743 break;
745 case MIN_EXPR:
746 case MAX_EXPR:
747 def = init_val;
748 nelements = nunits;
749 need_epilog_adjust = false;
750 break;
752 default:
753 gcc_unreachable ();
756 for (i = nelements - 1; i >= 0; --i)
758 t = tree_cons (NULL_TREE, def, t);
761 if (nelements == nunits - 1)
763 /* Set the last element of the vector. */
764 t = tree_cons (NULL_TREE, init_val, t);
765 nelements += 1;
767 gcc_assert (nelements == nunits);
769 if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
770 vec = build_vector (vectype, t);
771 else
772 vec = build_constructor (vectype, t);
774 if (need_epilog_adjust)
775 *scalar_def = init_val;
776 else
777 *scalar_def = INTEGRAL_TYPE_P (type) ? integer_zero_node
778 : build_real (type, dconst0);
779 return vect_init_vector (stmt, vec);
783 /* Function vect_create_epilog_for_reduction:
785 Create code at the loop-epilog to finalize the result of a reduction
786 computation.
788 LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it
789 into a single result, by applying the operation REDUC_CODE on the
790 partial-results-vector. For this, we need to create a new phi node at the
791 loop exit to preserve loop-closed form, as illustrated below.
793 STMT is the original scalar reduction stmt that is being vectorized.
794 REDUCTION_OP is the scalar reduction-variable.
795 REDUCTION_PHI is the phi-node that carries the reduction computation.
796 This function also sets the arguments for the REDUCTION_PHI:
797 The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP.
798 The loop-latch argument is VECT_DEF - the vector of partial sums.
800 This function transforms this:
802 loop:
803 vec_def = phi <null, null> # REDUCTION_PHI
804 ....
805 VECT_DEF = ...
807 loop_exit:
808 s_out0 = phi <s_loop> # EXIT_PHI
810 use <s_out0>
811 use <s_out0>
813 Into:
815 loop:
816 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
817 ....
818 VECT_DEF = ...
820 loop_exit:
821 s_out0 = phi <s_loop> # EXIT_PHI
822 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
824 v_out2 = reduc_expr <v_out1>
825 s_out3 = extract_field <v_out2, 0>
827 use <s_out3>
828 use <s_out3>
831 static void
832 vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
833 enum tree_code reduc_code, tree reduction_phi)
835 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
836 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
837 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
838 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
839 basic_block exit_bb;
840 tree scalar_dest = TREE_OPERAND (stmt, 0);
841 tree scalar_type = TREE_TYPE (scalar_dest);
842 tree new_phi;
843 block_stmt_iterator exit_bsi;
844 tree vec_dest;
845 tree new_temp;
846 tree epilog_stmt;
847 tree new_scalar_dest, exit_phi;
848 tree bitsize, bitpos;
849 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
850 tree scalar_initial_def;
851 tree vec_initial_def;
852 tree orig_name;
853 imm_use_iterator imm_iter;
854 use_operand_p use_p;
856 /*** 1. Create the reduction def-use cycle ***/
858 /* 1.1 set the loop-entry arg of the reduction-phi: */
859 /* For the case of reduction, vect_get_vec_def_for_operand returns
860 the scalar def before the loop, that defines the initial value
861 of the reduction variable. */
862 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
863 &scalar_initial_def);
864 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
867 /* 1.2 set the loop-latch arg for the reduction-phi: */
868 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
870 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
872 fprintf (vect_dump, "transform reduction: created def-use cycle:");
873 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
874 fprintf (vect_dump, "\n");
875 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
879 /*** 2. Create epilog code ***/
881 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
882 v_out1 = phi <v_loop> */
884 exit_bb = loop->single_exit->dest;
885 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
886 SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
888 exit_bsi = bsi_start (exit_bb);
891 /* 2.2 Create:
892 v_out2 = reduc_expr <v_out1>
893 s_out3 = extract_field <v_out2, 0> */
895 vec_dest = vect_create_destination_var (scalar_dest, vectype);
896 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
897 build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
898 new_temp = make_ssa_name (vec_dest, epilog_stmt);
899 TREE_OPERAND (epilog_stmt, 0) = new_temp;
900 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
902 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
904 fprintf (vect_dump, "transform reduction: created epilog code:");
905 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
908 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
909 bitsize = TYPE_SIZE (scalar_type);
911 /* The result is in the low order bits. */
912 if (BITS_BIG_ENDIAN)
913 bitpos = size_binop (MULT_EXPR,
914 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
915 TYPE_SIZE (scalar_type));
916 else
917 bitpos = bitsize_zero_node;
919 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
920 build3 (BIT_FIELD_REF, scalar_type,
921 new_temp, bitsize, bitpos));
922 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
923 TREE_OPERAND (epilog_stmt, 0) = new_temp;
924 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
926 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
927 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
930 /* 2.3 Adjust the final result by the initial value of the reduction
931 variable. (when such adjustment is not needed, then
932 'scalar_initial_def' is zero).
934 Create:
935 s_out = scalar_expr <s_out, scalar_initial_def> */
937 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
938 build2 (code, scalar_type, new_temp, scalar_initial_def));
939 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
940 TREE_OPERAND (epilog_stmt, 0) = new_temp;
941 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
943 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
944 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
947 /* 2.4 Replace uses of s_out0 with uses of s_out3 */
949 /* Find the loop-closed-use at the loop exit of the original
950 scalar result. (The reduction result is expected to have
951 two immediate uses - one at the latch block, and one at the
952 loop exit). */
953 exit_phi = NULL;
954 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
956 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
958 exit_phi = USE_STMT (use_p);
959 break;
963 orig_name = PHI_RESULT (exit_phi);
965 FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name)
966 SET_USE (use_p, new_temp);
970 /* Function vectorizable_reduction.
972 Check if STMT performs a reduction operation that can be vectorized.
973 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
974 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
975 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
977 bool
978 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
980 tree vec_dest;
981 tree scalar_dest;
982 tree op0, op1;
983 tree loop_vec_def;
984 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
985 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
986 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
987 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
988 tree operation;
989 enum tree_code code, reduc_code = 0;
990 enum machine_mode vec_mode;
991 int op_type;
992 optab optab, reduc_optab;
993 tree new_temp;
994 tree def0, def1, def_stmt0, def_stmt1;
995 enum vect_def_type dt0, dt1;
996 tree new_phi;
997 tree scalar_type;
998 bool is_simple_use0;
999 bool is_simple_use1;
1001 /* Is vectorizable reduction? */
1003 /* Not supportable if the reduction variable is used in the loop. */
1004 if (STMT_VINFO_RELEVANT_P (stmt_info))
1005 return false;
1007 if (!STMT_VINFO_LIVE_P (stmt_info))
1008 return false;
1010 /* Make sure it was already recognized as a reduction pattern. */
1011 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1012 return false;
1014 gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1016 operation = TREE_OPERAND (stmt, 1);
1017 code = TREE_CODE (operation);
1018 op_type = TREE_CODE_LENGTH (code);
1020 if (op_type != binary_op)
1021 return false;
1023 op0 = TREE_OPERAND (operation, 0);
1024 op1 = TREE_OPERAND (operation, 1);
1025 scalar_dest = TREE_OPERAND (stmt, 0);
1026 scalar_type = TREE_TYPE (scalar_dest);
1028 /* Check the first operand. It is expected to be defined inside the loop. */
1029 is_simple_use0 =
1030 vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0);
1031 is_simple_use1 =
1032 vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1);
1034 gcc_assert (is_simple_use0);
1035 gcc_assert (is_simple_use1);
1036 gcc_assert (dt0 == vect_loop_def);
1037 gcc_assert (dt1 == vect_reduction_def);
1038 gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE);
1039 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt1));
1041 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt1)))
1042 return false;
1044 /* Supportable by target? */
1046 /* check support for the operation in the loop */
1047 optab = optab_for_tree_code (code, vectype);
1048 if (!optab)
1050 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1051 fprintf (vect_dump, "no optab.");
1052 return false;
1054 vec_mode = TYPE_MODE (vectype);
1055 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1057 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1058 fprintf (vect_dump, "op not supported by target.");
1059 return false;
1062 /* check support for the epilog operation */
1063 if (!reduction_code_for_scalar_code (code, &reduc_code))
1064 return false;
1065 reduc_optab = optab_for_tree_code (reduc_code, vectype);
1066 if (!reduc_optab)
1068 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1069 fprintf (vect_dump, "no optab for reduction.");
1070 return false;
1072 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1074 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1075 fprintf (vect_dump, "op not supported by target.");
1076 return false;
1079 if (!vec_stmt) /* transformation not required. */
1081 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1082 return true;
1085 /** Transform. **/
1087 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1088 fprintf (vect_dump, "transform reduction.");
1090 /* Create the destination vector */
1091 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1094 /* Create the reduction-phi that defines the reduction-operand. */
1095 new_phi = create_phi_node (vec_dest, loop->header);
1098 /* Prepare the operand that is defined inside the loop body */
1099 loop_vec_def = vect_get_vec_def_for_operand (op0, stmt, NULL);
1100 gcc_assert (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (loop_vec_def))));
1103 /* Create the vectorized operation that computes the partial results */
1104 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1105 build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi)));
1106 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1107 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1108 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1111 /* Finalize the reduction-phi (set it's arguments) and create the
1112 epilog reduction code. */
1113 vect_create_epilog_for_reduction (new_temp, stmt, op1, reduc_code, new_phi);
1114 return true;
1118 /* Function vectorizable_assignment.
1120 Check if STMT performs an assignment (copy) that can be vectorized.
1121 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1122 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1123 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1125 bool
1126 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1128 tree vec_dest;
1129 tree scalar_dest;
1130 tree op;
1131 tree vec_oprnd;
1132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1133 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1134 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1135 tree new_temp;
1136 tree def, def_stmt;
1137 enum vect_def_type dt;
1139 /* Is vectorizable assignment? */
1140 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1141 return false;
1143 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1145 if (TREE_CODE (stmt) != MODIFY_EXPR)
1146 return false;
1148 scalar_dest = TREE_OPERAND (stmt, 0);
1149 if (TREE_CODE (scalar_dest) != SSA_NAME)
1150 return false;
1152 op = TREE_OPERAND (stmt, 1);
1153 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1155 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1156 fprintf (vect_dump, "use not simple.");
1157 return false;
1160 if (!vec_stmt) /* transformation not required. */
1162 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1163 return true;
1166 /** Transform. **/
1167 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1168 fprintf (vect_dump, "transform assignment.");
1170 /* Handle def. */
1171 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1173 /* Handle use. */
1174 op = TREE_OPERAND (stmt, 1);
1175 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1177 /* Arguments are ready. create the new vector stmt. */
1178 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1179 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1180 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1181 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1183 return true;
1187 /* Function vect_min_worthwhile_factor.
1189 For a loop where we could vectorize the operation indicated by CODE,
1190 return the minimum vectorization factor that makes it worthwhile
1191 to use generic vectors. */
1192 static int
1193 vect_min_worthwhile_factor (enum tree_code code)
1195 switch (code)
1197 case PLUS_EXPR:
1198 case MINUS_EXPR:
1199 case NEGATE_EXPR:
1200 return 4;
1202 case BIT_AND_EXPR:
1203 case BIT_IOR_EXPR:
1204 case BIT_XOR_EXPR:
1205 case BIT_NOT_EXPR:
1206 return 2;
1208 default:
1209 return INT_MAX;
1214 /* Function vectorizable_operation.
1216 Check if STMT performs a binary or unary operation that can be vectorized.
1217 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1218 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1219 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1221 bool
1222 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1224 tree vec_dest;
1225 tree scalar_dest;
1226 tree operation;
1227 tree op0, op1 = NULL;
1228 tree vec_oprnd0, vec_oprnd1=NULL;
1229 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1230 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1231 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1232 int i;
1233 enum tree_code code;
1234 enum machine_mode vec_mode;
1235 tree new_temp;
1236 int op_type;
1237 tree op;
1238 optab optab;
1239 tree def, def_stmt;
1240 enum vect_def_type dt;
1242 /* Is STMT a vectorizable binary/unary operation? */
1243 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1244 return false;
1246 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1248 if (STMT_VINFO_LIVE_P (stmt_info))
1250 /* FORNOW: not yet supported. */
1251 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
1252 fprintf (vect_dump, "value used after loop.");
1253 return false;
1256 if (TREE_CODE (stmt) != MODIFY_EXPR)
1257 return false;
1259 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1260 return false;
1262 operation = TREE_OPERAND (stmt, 1);
1263 code = TREE_CODE (operation);
1264 optab = optab_for_tree_code (code, vectype);
1266 /* Support only unary or binary operations. */
1267 op_type = TREE_CODE_LENGTH (code);
1268 if (op_type != unary_op && op_type != binary_op)
1270 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1271 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1272 return false;
1275 for (i = 0; i < op_type; i++)
1277 op = TREE_OPERAND (operation, i);
1278 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1280 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1281 fprintf (vect_dump, "use not simple.");
1282 return false;
1286 /* Supportable by target? */
1287 if (!optab)
1289 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1290 fprintf (vect_dump, "no optab.");
1291 return false;
1293 vec_mode = TYPE_MODE (vectype);
1294 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1296 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1297 fprintf (vect_dump, "op not supported by target.");
1298 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1299 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1300 < vect_min_worthwhile_factor (code))
1301 return false;
1302 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1303 fprintf (vect_dump, "proceeding using word mode.");
1306 /* Worthwhile without SIMD support? */
1307 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1308 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1309 < vect_min_worthwhile_factor (code))
1311 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1312 fprintf (vect_dump, "not worthwhile without SIMD support.");
1313 return false;
1316 if (!vec_stmt) /* transformation not required. */
1318 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1319 return true;
1322 /** Transform. **/
1324 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1325 fprintf (vect_dump, "transform binary/unary operation.");
1327 /* Handle def. */
1328 scalar_dest = TREE_OPERAND (stmt, 0);
1329 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1331 /* Handle uses. */
1332 op0 = TREE_OPERAND (operation, 0);
1333 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1335 if (op_type == binary_op)
1337 op1 = TREE_OPERAND (operation, 1);
1338 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1341 /* Arguments are ready. create the new vector stmt. */
1343 if (op_type == binary_op)
1344 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1345 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1346 else
1347 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1348 build1 (code, vectype, vec_oprnd0));
1349 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1350 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1351 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1353 return true;
1357 /* Function vectorizable_store.
1359 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1360 can be vectorized.
1361 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1362 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1363 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1365 bool
1366 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1368 tree scalar_dest;
1369 tree data_ref;
1370 tree op;
1371 tree vec_oprnd1;
1372 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1373 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1374 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1375 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1376 enum machine_mode vec_mode;
1377 tree dummy;
1378 enum dr_alignment_support alignment_support_cheme;
1379 ssa_op_iter iter;
1380 tree def, def_stmt;
1381 enum vect_def_type dt;
1383 /* Is vectorizable store? */
1385 if (TREE_CODE (stmt) != MODIFY_EXPR)
1386 return false;
1388 scalar_dest = TREE_OPERAND (stmt, 0);
1389 if (TREE_CODE (scalar_dest) != ARRAY_REF
1390 && TREE_CODE (scalar_dest) != INDIRECT_REF)
1391 return false;
1393 op = TREE_OPERAND (stmt, 1);
1394 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1396 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1397 fprintf (vect_dump, "use not simple.");
1398 return false;
1401 vec_mode = TYPE_MODE (vectype);
1402 /* FORNOW. In some cases can vectorize even if data-type not supported
1403 (e.g. - array initialization with 0). */
1404 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1405 return false;
1407 if (!STMT_VINFO_DATA_REF (stmt_info))
1408 return false;
1411 if (!vec_stmt) /* transformation not required. */
1413 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1414 return true;
1417 /** Transform. **/
1419 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1420 fprintf (vect_dump, "transform store");
1422 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1423 gcc_assert (alignment_support_cheme);
1424 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
1426 /* Handle use - get the vectorized def from the defining stmt. */
1427 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1429 /* Handle def. */
1430 /* FORNOW: make sure the data reference is aligned. */
1431 vect_align_data_ref (stmt);
1432 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1433 data_ref = build_fold_indirect_ref (data_ref);
1435 /* Arguments are ready. create the new vector stmt. */
1436 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1437 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1439 /* Copy the V_MAY_DEFS representing the aliasing of the original array
1440 element's definition to the vector's definition then update the
1441 defining statement. The original is being deleted so the same
1442 SSA_NAMEs can be used. */
1443 copy_virtual_operands (*vec_stmt, stmt);
1445 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
1447 SSA_NAME_DEF_STMT (def) = *vec_stmt;
1449 /* If this virtual def has a use outside the loop and a loop peel is
1450 performed then the def may be renamed by the peel. Mark it for
1451 renaming so the later use will also be renamed. */
1452 mark_sym_for_renaming (SSA_NAME_VAR (def));
1455 return true;
1459 /* vectorizable_load.
1461 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1462 can be vectorized.
1463 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1464 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1465 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1467 bool
1468 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1470 tree scalar_dest;
1471 tree vec_dest = NULL;
1472 tree data_ref = NULL;
1473 tree op;
1474 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1475 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1476 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1477 tree new_temp;
1478 int mode;
1479 tree init_addr;
1480 tree new_stmt;
1481 tree dummy;
1482 basic_block new_bb;
1483 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1484 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1485 edge pe = loop_preheader_edge (loop);
1486 enum dr_alignment_support alignment_support_cheme;
1488 /* Is vectorizable load? */
1489 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1490 return false;
1492 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1494 if (STMT_VINFO_LIVE_P (stmt_info))
1496 /* FORNOW: not yet supported. */
1497 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
1498 fprintf (vect_dump, "value used after loop.");
1499 return false;
1502 if (TREE_CODE (stmt) != MODIFY_EXPR)
1503 return false;
1505 scalar_dest = TREE_OPERAND (stmt, 0);
1506 if (TREE_CODE (scalar_dest) != SSA_NAME)
1507 return false;
1509 op = TREE_OPERAND (stmt, 1);
1510 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1511 return false;
1513 if (!STMT_VINFO_DATA_REF (stmt_info))
1514 return false;
1516 mode = (int) TYPE_MODE (vectype);
1518 /* FORNOW. In some cases can vectorize even if data-type not supported
1519 (e.g. - data copies). */
1520 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1522 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
1523 fprintf (vect_dump, "Aligned load, but unsupported type.");
1524 return false;
1527 if (!vec_stmt) /* transformation not required. */
1529 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1530 return true;
1533 /** Transform. **/
1535 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1536 fprintf (vect_dump, "transform load.");
1538 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1539 gcc_assert (alignment_support_cheme);
1541 if (alignment_support_cheme == dr_aligned
1542 || alignment_support_cheme == dr_unaligned_supported)
1544 /* Create:
1545 p = initial_addr;
1546 indx = 0;
1547 loop {
1548 vec_dest = *(p);
1549 indx = indx + 1;
1553 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1554 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1555 if (aligned_access_p (dr))
1556 data_ref = build_fold_indirect_ref (data_ref);
1557 else
1559 int mis = DR_MISALIGNMENT (dr);
1560 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1561 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1562 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1564 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1565 new_temp = make_ssa_name (vec_dest, new_stmt);
1566 TREE_OPERAND (new_stmt, 0) = new_temp;
1567 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1568 copy_virtual_operands (new_stmt, stmt);
1570 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1572 /* Create:
1573 p1 = initial_addr;
1574 msq_init = *(floor(p1))
1575 p2 = initial_addr + VS - 1;
1576 magic = have_builtin ? builtin_result : initial_address;
1577 indx = 0;
1578 loop {
1579 p2' = p2 + indx * vectype_size
1580 lsq = *(floor(p2'))
1581 vec_dest = realign_load (msq, lsq, magic)
1582 indx = indx + 1;
1583 msq = lsq;
1587 tree offset;
1588 tree magic;
1589 tree phi_stmt;
1590 tree msq_init;
1591 tree msq, lsq;
1592 tree dataref_ptr;
1593 tree params;
1595 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1596 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1597 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1598 &init_addr, true);
1599 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1600 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1601 new_temp = make_ssa_name (vec_dest, new_stmt);
1602 TREE_OPERAND (new_stmt, 0) = new_temp;
1603 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1604 gcc_assert (!new_bb);
1605 msq_init = TREE_OPERAND (new_stmt, 0);
1606 copy_virtual_operands (new_stmt, stmt);
1607 update_vuses_to_preheader (new_stmt, loop);
1610 /* <2> Create lsq = *(floor(p2')) in the loop */
1611 offset = build_int_cst (integer_type_node,
1612 TYPE_VECTOR_SUBPARTS (vectype));
1613 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1614 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1615 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1616 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1617 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1618 new_temp = make_ssa_name (vec_dest, new_stmt);
1619 TREE_OPERAND (new_stmt, 0) = new_temp;
1620 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1621 lsq = TREE_OPERAND (new_stmt, 0);
1622 copy_virtual_operands (new_stmt, stmt);
1625 /* <3> */
1626 if (targetm.vectorize.builtin_mask_for_load)
1628 /* Create permutation mask, if required, in loop preheader. */
1629 tree builtin_decl;
1630 params = build_tree_list (NULL_TREE, init_addr);
1631 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1632 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1633 new_stmt = build_function_call_expr (builtin_decl, params);
1634 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1635 new_temp = make_ssa_name (vec_dest, new_stmt);
1636 TREE_OPERAND (new_stmt, 0) = new_temp;
1637 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1638 gcc_assert (!new_bb);
1639 magic = TREE_OPERAND (new_stmt, 0);
1641 /* The result of the CALL_EXPR to this builtin is determined from
1642 the value of the parameter and no global variables are touched
1643 which makes the builtin a "const" function. Requiring the
1644 builtin to have the "const" attribute makes it unnecessary
1645 to call mark_call_clobbered_vars_to_rename. */
1646 gcc_assert (TREE_READONLY (builtin_decl));
1648 else
1650 /* Use current address instead of init_addr for reduced reg pressure.
1652 magic = dataref_ptr;
1656 /* <4> Create msq = phi <msq_init, lsq> in loop */
1657 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1658 msq = make_ssa_name (vec_dest, NULL_TREE);
1659 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1660 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1661 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1662 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1665 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1666 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1667 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1668 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1669 new_temp = make_ssa_name (vec_dest, new_stmt);
1670 TREE_OPERAND (new_stmt, 0) = new_temp;
1671 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1673 else
1674 gcc_unreachable ();
1676 *vec_stmt = new_stmt;
1677 return true;
1681 /* Function vectorizable_live_operation.
1683 STMT computes a value that is used outside the loop. Check if
1684 it can be supported. */
1686 bool
1687 vectorizable_live_operation (tree stmt,
1688 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1689 tree *vec_stmt ATTRIBUTE_UNUSED)
1691 tree operation;
1692 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1693 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1694 int i;
1695 enum tree_code code;
1696 int op_type;
1697 tree op;
1698 tree def, def_stmt;
1699 enum vect_def_type dt;
1701 if (!STMT_VINFO_LIVE_P (stmt_info))
1702 return false;
1704 if (TREE_CODE (stmt) != MODIFY_EXPR)
1705 return false;
1707 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1708 return false;
1710 operation = TREE_OPERAND (stmt, 1);
1711 code = TREE_CODE (operation);
1713 op_type = TREE_CODE_LENGTH (code);
1715 /* FORNOW: support only if all uses are invariant. This means
1716 that the scalar operations can remain in place, unvectorized.
1717 The original last scalar value that they compute will be used. */
1719 for (i = 0; i < op_type; i++)
1721 op = TREE_OPERAND (operation, i);
1722 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1724 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1725 fprintf (vect_dump, "use not simple.");
1726 return false;
1729 if (dt != vect_invariant_def && dt != vect_constant_def)
1730 return false;
1733 /* No transformation is required for the cases we currently support. */
1734 return true;
1738 /* Function vect_is_simple_cond.
1740 Input:
1741 LOOP - the loop that is being vectorized.
1742 COND - Condition that is checked for simple use.
1744 Returns whether a COND can be vectorized. Checks whether
1745 condition operands are supportable using vec_is_simple_use. */
1747 static bool
1748 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
1750 tree lhs, rhs;
1751 tree def;
1752 enum vect_def_type dt;
1754 if (!COMPARISON_CLASS_P (cond))
1755 return false;
1757 lhs = TREE_OPERAND (cond, 0);
1758 rhs = TREE_OPERAND (cond, 1);
1760 if (TREE_CODE (lhs) == SSA_NAME)
1762 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
1763 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
1764 return false;
1766 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
1767 return false;
1769 if (TREE_CODE (rhs) == SSA_NAME)
1771 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
1772 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
1773 return false;
1775 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
1776 return false;
1778 return true;
1781 /* vectorizable_condition.
1783 Check if STMT is conditional modify expression that can be vectorized.
1784 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1785 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
1786 at BSI.
1788 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1790 bool
1791 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1793 tree scalar_dest = NULL_TREE;
1794 tree vec_dest = NULL_TREE;
1795 tree op = NULL_TREE;
1796 tree cond_expr, then_clause, else_clause;
1797 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1798 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1799 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
1800 tree vec_compare, vec_cond_expr;
1801 tree new_temp;
1802 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1803 enum machine_mode vec_mode;
1804 tree def;
1805 enum vect_def_type dt;
1807 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1808 return false;
1810 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1812 if (STMT_VINFO_LIVE_P (stmt_info))
1814 /* FORNOW: not yet supported. */
1815 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
1816 fprintf (vect_dump, "value used after loop.");
1817 return false;
1820 if (TREE_CODE (stmt) != MODIFY_EXPR)
1821 return false;
1823 op = TREE_OPERAND (stmt, 1);
1825 if (TREE_CODE (op) != COND_EXPR)
1826 return false;
1828 cond_expr = TREE_OPERAND (op, 0);
1829 then_clause = TREE_OPERAND (op, 1);
1830 else_clause = TREE_OPERAND (op, 2);
1832 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
1833 return false;
1835 if (TREE_CODE (then_clause) == SSA_NAME)
1837 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
1838 if (!vect_is_simple_use (then_clause, loop_vinfo,
1839 &then_def_stmt, &def, &dt))
1840 return false;
1842 else if (TREE_CODE (then_clause) != INTEGER_CST
1843 && TREE_CODE (then_clause) != REAL_CST)
1844 return false;
1846 if (TREE_CODE (else_clause) == SSA_NAME)
1848 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
1849 if (!vect_is_simple_use (else_clause, loop_vinfo,
1850 &else_def_stmt, &def, &dt))
1851 return false;
1853 else if (TREE_CODE (else_clause) != INTEGER_CST
1854 && TREE_CODE (else_clause) != REAL_CST)
1855 return false;
1858 vec_mode = TYPE_MODE (vectype);
1860 if (!vec_stmt)
1862 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
1863 return expand_vec_cond_expr_p (op, vec_mode);
1866 /* Transform */
1868 /* Handle def. */
1869 scalar_dest = TREE_OPERAND (stmt, 0);
1870 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1872 /* Handle cond expr. */
1873 vec_cond_lhs =
1874 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
1875 vec_cond_rhs =
1876 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
1877 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
1878 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
1880 /* Arguments are ready. create the new vector stmt. */
1881 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
1882 vec_cond_lhs, vec_cond_rhs);
1883 vec_cond_expr = build (VEC_COND_EXPR, vectype,
1884 vec_compare, vec_then_clause, vec_else_clause);
1886 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
1887 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1888 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1889 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1891 return true;
1894 /* Function vect_transform_stmt.
1896 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1898 bool
1899 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
1901 bool is_store = false;
1902 tree vec_stmt = NULL_TREE;
1903 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1904 bool done;
1906 if (STMT_VINFO_RELEVANT_P (stmt_info))
1908 switch (STMT_VINFO_TYPE (stmt_info))
1910 case op_vec_info_type:
1911 done = vectorizable_operation (stmt, bsi, &vec_stmt);
1912 gcc_assert (done);
1913 break;
1915 case assignment_vec_info_type:
1916 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
1917 gcc_assert (done);
1918 break;
1920 case load_vec_info_type:
1921 done = vectorizable_load (stmt, bsi, &vec_stmt);
1922 gcc_assert (done);
1923 break;
1925 case store_vec_info_type:
1926 done = vectorizable_store (stmt, bsi, &vec_stmt);
1927 gcc_assert (done);
1928 is_store = true;
1929 break;
1931 case condition_vec_info_type:
1932 done = vectorizable_condition (stmt, bsi, &vec_stmt);
1933 gcc_assert (done);
1934 break;
1936 default:
1937 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1938 fprintf (vect_dump, "stmt not supported.");
1939 gcc_unreachable ();
1942 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1945 if (STMT_VINFO_LIVE_P (stmt_info))
1947 switch (STMT_VINFO_TYPE (stmt_info))
1949 case reduc_vec_info_type:
1950 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
1951 gcc_assert (done);
1952 break;
1954 default:
1955 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
1956 gcc_assert (done);
1959 if (vec_stmt)
1961 gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
1962 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1966 return is_store;
1970 /* This function builds ni_name = number of iterations loop executes
1971 on the loop preheader. */
1973 static tree
1974 vect_build_loop_niters (loop_vec_info loop_vinfo)
1976 tree ni_name, stmt, var;
1977 edge pe;
1978 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1979 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1981 var = create_tmp_var (TREE_TYPE (ni), "niters");
1982 add_referenced_tmp_var (var);
1983 ni_name = force_gimple_operand (ni, &stmt, false, var);
1985 pe = loop_preheader_edge (loop);
1986 if (stmt)
1988 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1989 gcc_assert (!new_bb);
1992 return ni_name;
1996 /* This function generates the following statements:
1998 ni_name = number of iterations loop executes
1999 ratio = ni_name / vf
2000 ratio_mult_vf_name = ratio * vf
2002 and places them at the loop preheader edge. */
2004 static void
2005 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2006 tree *ni_name_ptr,
2007 tree *ratio_mult_vf_name_ptr,
2008 tree *ratio_name_ptr)
2011 edge pe;
2012 basic_block new_bb;
2013 tree stmt, ni_name;
2014 tree var;
2015 tree ratio_name;
2016 tree ratio_mult_vf_name;
2017 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2018 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2019 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2020 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
2022 pe = loop_preheader_edge (loop);
2024 /* Generate temporary variable that contains
2025 number of iterations loop executes. */
2027 ni_name = vect_build_loop_niters (loop_vinfo);
2029 /* Create: ratio = ni >> log2(vf) */
2031 var = create_tmp_var (TREE_TYPE (ni), "bnd");
2032 add_referenced_tmp_var (var);
2033 ratio_name = make_ssa_name (var, NULL_TREE);
2034 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2035 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2036 SSA_NAME_DEF_STMT (ratio_name) = stmt;
2038 pe = loop_preheader_edge (loop);
2039 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2040 gcc_assert (!new_bb);
2042 /* Create: ratio_mult_vf = ratio << log2 (vf). */
2044 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2045 add_referenced_tmp_var (var);
2046 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2047 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2048 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2049 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2051 pe = loop_preheader_edge (loop);
2052 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2053 gcc_assert (!new_bb);
2055 *ni_name_ptr = ni_name;
2056 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2057 *ratio_name_ptr = ratio_name;
2059 return;
2063 /* Function update_vuses_to_preheader.
2065 Input:
2066 STMT - a statement with potential VUSEs.
2067 LOOP - the loop whose preheader will contain STMT.
2069 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
2070 appears to be defined in a V_MAY_DEF in another statement in a loop.
2071 One such case is when the VUSE is at the dereference of a __restricted__
2072 pointer in a load and the V_MAY_DEF is at the dereference of a different
2073 __restricted__ pointer in a store. Vectorization may result in
2074 copy_virtual_uses being called to copy the problematic VUSE to a new
2075 statement that is being inserted in the loop preheader. This procedure
2076 is called to change the SSA_NAME in the new statement's VUSE from the
2077 SSA_NAME updated in the loop to the related SSA_NAME available on the
2078 path entering the loop.
2080 When this function is called, we have the following situation:
2082 # vuse <name1>
2083 S1: vload
2084 do {
2085 # name1 = phi < name0 , name2>
2087 # vuse <name1>
2088 S2: vload
2090 # name2 = vdef <name1>
2091 S3: vstore
2093 }while...
2095 Stmt S1 was created in the loop preheader block as part of misaligned-load
2096 handling. This function fixes the name of the vuse of S1 from 'name1' to
2097 'name0'. */
2099 static void
2100 update_vuses_to_preheader (tree stmt, struct loop *loop)
2102 basic_block header_bb = loop->header;
2103 edge preheader_e = loop_preheader_edge (loop);
2104 ssa_op_iter iter;
2105 use_operand_p use_p;
2107 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
2109 tree ssa_name = USE_FROM_PTR (use_p);
2110 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
2111 tree name_var = SSA_NAME_VAR (ssa_name);
2112 basic_block bb = bb_for_stmt (def_stmt);
2114 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
2115 if (!IS_EMPTY_STMT (def_stmt)
2116 && flow_bb_inside_loop_p (loop, bb))
2118 /* If the block containing the statement defining the SSA_NAME
2119 is in the loop then it's necessary to find the definition
2120 outside the loop using the PHI nodes of the header. */
2121 tree phi;
2122 bool updated = false;
2124 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
2126 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
2128 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
2129 updated = true;
2130 break;
2133 gcc_assert (updated);
2139 /* Function vect_update_ivs_after_vectorizer.
2141 "Advance" the induction variables of LOOP to the value they should take
2142 after the execution of LOOP. This is currently necessary because the
2143 vectorizer does not handle induction variables that are used after the
2144 loop. Such a situation occurs when the last iterations of LOOP are
2145 peeled, because:
2146 1. We introduced new uses after LOOP for IVs that were not originally used
2147 after LOOP: the IVs of LOOP are now used by an epilog loop.
2148 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2149 times, whereas the loop IVs should be bumped N times.
2151 Input:
2152 - LOOP - a loop that is going to be vectorized. The last few iterations
2153 of LOOP were peeled.
2154 - NITERS - the number of iterations that LOOP executes (before it is
2155 vectorized). i.e, the number of times the ivs should be bumped.
2156 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2157 coming out from LOOP on which there are uses of the LOOP ivs
2158 (this is the path from LOOP->exit to epilog_loop->preheader).
2160 The new definitions of the ivs are placed in LOOP->exit.
2161 The phi args associated with the edge UPDATE_E in the bb
2162 UPDATE_E->dest are updated accordingly.
2164 Assumption 1: Like the rest of the vectorizer, this function assumes
2165 a single loop exit that has a single predecessor.
2167 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2168 organized in the same order.
2170 Assumption 3: The access function of the ivs is simple enough (see
2171 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2173 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2174 coming out of LOOP on which the ivs of LOOP are used (this is the path
2175 that leads to the epilog loop; other paths skip the epilog loop). This
2176 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2177 needs to have its phis updated.
2180 static void
2181 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
2182 edge update_e)
2184 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2185 basic_block exit_bb = loop->single_exit->dest;
2186 tree phi, phi1;
2187 basic_block update_bb = update_e->dest;
2189 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
2191 /* Make sure there exists a single-predecessor exit bb: */
2192 gcc_assert (single_pred_p (exit_bb));
2194 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2195 phi && phi1;
2196 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2198 tree access_fn = NULL;
2199 tree evolution_part;
2200 tree init_expr;
2201 tree step_expr;
2202 tree var, stmt, ni, ni_name;
2203 block_stmt_iterator last_bsi;
2205 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2207 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
2208 print_generic_expr (vect_dump, phi, TDF_SLIM);
2211 /* Skip virtual phi's. */
2212 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2214 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2215 fprintf (vect_dump, "virtual phi. skip.");
2216 continue;
2219 /* Skip reduction phis. */
2220 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2222 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2223 fprintf (vect_dump, "reduc phi. skip.");
2224 continue;
2227 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2228 gcc_assert (access_fn);
2229 evolution_part =
2230 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2231 gcc_assert (evolution_part != NULL_TREE);
2233 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2234 of degree >= 2 or exponential. */
2235 gcc_assert (!tree_is_chrec (evolution_part));
2237 step_expr = evolution_part;
2238 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2239 loop->num));
2241 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2242 build2 (MULT_EXPR, TREE_TYPE (niters),
2243 niters, step_expr), init_expr);
2245 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2246 add_referenced_tmp_var (var);
2248 ni_name = force_gimple_operand (ni, &stmt, false, var);
2250 /* Insert stmt into exit_bb. */
2251 last_bsi = bsi_last (exit_bb);
2252 if (stmt)
2253 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2255 /* Fix phi expressions in the successor bb. */
2256 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
2261 /* Function vect_do_peeling_for_loop_bound
2263 Peel the last iterations of the loop represented by LOOP_VINFO.
2264 The peeled iterations form a new epilog loop. Given that the loop now
2265 iterates NITERS times, the new epilog loop iterates
2266 NITERS % VECTORIZATION_FACTOR times.
2268 The original loop will later be made to iterate
2269 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
2271 static void
2272 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2273 struct loops *loops)
2275 tree ni_name, ratio_mult_vf_name;
2276 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2277 struct loop *new_loop;
2278 edge update_e;
2279 basic_block preheader;
2280 int loop_num;
2282 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2283 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
2285 initialize_original_copy_tables ();
2287 /* Generate the following variables on the preheader of original loop:
2289 ni_name = number of iteration the original loop executes
2290 ratio = ni_name / vf
2291 ratio_mult_vf_name = ratio * vf */
2292 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2293 &ratio_mult_vf_name, ratio);
2295 loop_num = loop->num;
2296 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
2297 ratio_mult_vf_name, ni_name, false);
2298 gcc_assert (new_loop);
2299 gcc_assert (loop_num == loop->num);
2300 #ifdef ENABLE_CHECKING
2301 slpeel_verify_cfg_after_peeling (loop, new_loop);
2302 #endif
2304 /* A guard that controls whether the new_loop is to be executed or skipped
2305 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
2306 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
2307 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
2308 is on the path where the LOOP IVs are used and need to be updated. */
2310 preheader = loop_preheader_edge (new_loop)->src;
2311 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
2312 update_e = EDGE_PRED (preheader, 0);
2313 else
2314 update_e = EDGE_PRED (preheader, 1);
2316 /* Update IVs of original loop as if they were advanced
2317 by ratio_mult_vf_name steps. */
2318 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
2320 /* After peeling we have to reset scalar evolution analyzer. */
2321 scev_reset ();
2323 free_original_copy_tables ();
2327 /* Function vect_gen_niters_for_prolog_loop
2329 Set the number of iterations for the loop represented by LOOP_VINFO
2330 to the minimum between LOOP_NITERS (the original iteration count of the loop)
2331 and the misalignment of DR - the data reference recorded in
2332 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
2333 this loop, the data reference DR will refer to an aligned location.
2335 The following computation is generated:
2337 If the misalignment of DR is known at compile time:
2338 addr_mis = int mis = DR_MISALIGNMENT (dr);
2339 Else, compute address misalignment in bytes:
2340 addr_mis = addr & (vectype_size - 1)
2342 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
2344 (elem_size = element type size; an element is the scalar element
2345 whose type is the inner type of the vectype) */
2347 static tree
2348 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
2350 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2351 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2352 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2353 tree var, stmt;
2354 tree iters, iters_name;
2355 edge pe;
2356 basic_block new_bb;
2357 tree dr_stmt = DR_STMT (dr);
2358 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2359 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2360 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2361 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
2362 tree niters_type = TREE_TYPE (loop_niters);
2364 pe = loop_preheader_edge (loop);
2366 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2368 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2369 int element_size = vectype_align/vf;
2370 int elem_misalign = byte_misalign / element_size;
2372 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2373 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
2374 iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
2376 else
2378 tree new_stmts = NULL_TREE;
2379 tree start_addr =
2380 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
2381 tree ptr_type = TREE_TYPE (start_addr);
2382 tree size = TYPE_SIZE (ptr_type);
2383 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2384 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2385 tree elem_size_log =
2386 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
2387 tree vf_tree = build_int_cst (unsigned_type_node, vf);
2388 tree byte_misalign;
2389 tree elem_misalign;
2391 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
2392 gcc_assert (!new_bb);
2394 /* Create: byte_misalign = addr & (vectype_size - 1) */
2395 byte_misalign =
2396 build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
2398 /* Create: elem_misalign = byte_misalign / element_size */
2399 elem_misalign =
2400 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
2402 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
2403 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
2404 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
2405 iters = fold_convert (niters_type, iters);
2408 /* Create: prolog_loop_niters = min (iters, loop_niters) */
2409 /* If the loop bound is known at compile time we already verified that it is
2410 greater than vf; since the misalignment ('iters') is at most vf, there's
2411 no need to generate the MIN_EXPR in this case. */
2412 if (TREE_CODE (loop_niters) != INTEGER_CST)
2413 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
2415 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2417 fprintf (vect_dump, "niters for prolog loop: ");
2418 print_generic_expr (vect_dump, iters, TDF_SLIM);
2421 var = create_tmp_var (niters_type, "prolog_loop_niters");
2422 add_referenced_tmp_var (var);
2423 iters_name = force_gimple_operand (iters, &stmt, false, var);
2425 /* Insert stmt on loop preheader edge. */
2426 if (stmt)
2428 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2429 gcc_assert (!new_bb);
2432 return iters_name;
2436 /* Function vect_update_init_of_dr
2438 NITERS iterations were peeled from LOOP. DR represents a data reference
2439 in LOOP. This function updates the information recorded in DR to
2440 account for the fact that the first NITERS iterations had already been
2441 executed. Specifically, it updates the OFFSET field of stmt_info. */
2443 static void
2444 vect_update_init_of_dr (struct data_reference *dr, tree niters)
2446 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
2447 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
2449 niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
2450 STMT_VINFO_VECT_STEP (stmt_info)));
2451 offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
2452 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
2456 /* Function vect_update_inits_of_drs
2458 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2459 This function updates the information recorded for the data references in
2460 the loop to account for the fact that the first NITERS iterations had
2461 already been executed. Specifically, it updates the initial_condition of the
2462 access_function of all the data_references in the loop. */
2464 static void
2465 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2467 unsigned int i;
2468 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2469 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2471 if (vect_dump && (dump_flags & TDF_DETAILS))
2472 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2474 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2476 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2477 vect_update_init_of_dr (dr, niters);
2480 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2482 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2483 vect_update_init_of_dr (dr, niters);
2488 /* Function vect_do_peeling_for_alignment
2490 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2491 'niters' is set to the misalignment of one of the data references in the
2492 loop, thereby forcing it to refer to an aligned location at the beginning
2493 of the execution of this loop. The data reference for which we are
2494 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
2496 static void
2497 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
2499 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2500 tree niters_of_prolog_loop, ni_name;
2501 tree n_iters;
2502 struct loop *new_loop;
2504 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2505 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2507 initialize_original_copy_tables ();
2509 ni_name = vect_build_loop_niters (loop_vinfo);
2510 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
2512 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2513 new_loop =
2514 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
2515 niters_of_prolog_loop, ni_name, true);
2516 gcc_assert (new_loop);
2517 #ifdef ENABLE_CHECKING
2518 slpeel_verify_cfg_after_peeling (new_loop, loop);
2519 #endif
2521 /* Update number of times loop executes. */
2522 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2523 LOOP_VINFO_NITERS (loop_vinfo) = fold (build2 (MINUS_EXPR,
2524 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop));
2526 /* Update the init conditions of the access functions of all data refs. */
2527 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2529 /* After peeling we have to reset scalar evolution analyzer. */
2530 scev_reset ();
2532 free_original_copy_tables ();
2536 /* Function vect_transform_loop.
2538 The analysis phase has determined that the loop is vectorizable.
2539 Vectorize the loop - created vectorized stmts to replace the scalar
2540 stmts in the loop, and update the loop exit condition. */
2542 void
2543 vect_transform_loop (loop_vec_info loop_vinfo,
2544 struct loops *loops ATTRIBUTE_UNUSED)
2546 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2547 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2548 int nbbs = loop->num_nodes;
2549 block_stmt_iterator si;
2550 int i;
2551 tree ratio = NULL;
2552 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2554 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2555 fprintf (vect_dump, "=== vec_transform_loop ===");
2557 /* Peel the loop if there are data refs with unknown alignment.
2558 Only one data ref with unknown store is allowed. */
2560 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
2561 vect_do_peeling_for_alignment (loop_vinfo, loops);
2563 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
2564 compile time constant), or it is a constant that doesn't divide by the
2565 vectorization factor, then an epilog loop needs to be created.
2566 We therefore duplicate the loop: the original loop will be vectorized,
2567 and will compute the first (n/VF) iterations. The second copy of the loop
2568 will remain scalar and will compute the remaining (n%VF) iterations.
2569 (VF is the vectorization factor). */
2571 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2572 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2573 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
2574 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
2575 else
2576 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
2577 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
2579 /* 1) Make sure the loop header has exactly two entries
2580 2) Make sure we have a preheader basic block. */
2582 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
2584 loop_split_edge_with (loop_preheader_edge (loop), NULL);
2587 /* FORNOW: the vectorizer supports only loops which body consist
2588 of one basic block (header + empty latch). When the vectorizer will
2589 support more involved loop forms, the order by which the BBs are
2590 traversed need to be reconsidered. */
2592 for (i = 0; i < nbbs; i++)
2594 basic_block bb = bbs[i];
2596 for (si = bsi_start (bb); !bsi_end_p (si);)
2598 tree stmt = bsi_stmt (si);
2599 stmt_vec_info stmt_info;
2600 bool is_store;
2602 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2604 fprintf (vect_dump, "------>vectorizing statement: ");
2605 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2607 stmt_info = vinfo_for_stmt (stmt);
2608 gcc_assert (stmt_info);
2609 if (!STMT_VINFO_RELEVANT_P (stmt_info)
2610 && !STMT_VINFO_LIVE_P (stmt_info))
2612 bsi_next (&si);
2613 continue;
2615 /* FORNOW: Verify that all stmts operate on the same number of
2616 units and no inner unrolling is necessary. */
2617 gcc_assert
2618 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
2619 == vectorization_factor);
2621 /* -------- vectorize statement ------------ */
2622 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2623 fprintf (vect_dump, "transform statement.");
2625 is_store = vect_transform_stmt (stmt, &si);
2626 if (is_store)
2628 /* Free the attached stmt_vec_info and remove the stmt. */
2629 stmt_ann_t ann = stmt_ann (stmt);
2630 free (stmt_info);
2631 set_stmt_info ((tree_ann_t)ann, NULL);
2632 bsi_remove (&si);
2633 continue;
2636 bsi_next (&si);
2637 } /* stmts in BB */
2638 } /* BBs in loop */
2640 slpeel_make_loop_iterate_ntimes (loop, ratio);
2642 /* The memory tags and pointers in vectorized statements need to
2643 have their SSA forms updated. FIXME, why can't this be delayed
2644 until all the loops have been transformed? */
2645 update_ssa (TODO_update_ssa);
2647 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, LOOP_LOC (loop_vinfo)))
2648 fprintf (vect_dump, "LOOP VECTORIZED.");