PR testsuite/21969
[official-gcc.git] / gcc / tree-vect-transform.c
blob7d153099c955bfb8bbd2adbb134bb8ede6c584b2
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "tree-data-ref.h"
39 #include "tree-chrec.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
43 #include "tree-pass.h"
44 #include "toplev.h"
45 #include "real.h"
47 /* Utility functions for the code transformation. */
48 static bool vect_transform_stmt (tree, block_stmt_iterator *);
49 static void vect_align_data_ref (tree);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, bool);
53 static tree vect_create_index_for_vector_ref (loop_vec_info);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void update_vuses_to_preheader (tree, struct loop*);
62 static tree get_initial_def_for_reduction (tree, tree, tree *);
64 /* Utility function dealing with loop peeling (not peeling itself). */
65 static void vect_generate_tmps_on_preheader
66 (loop_vec_info, tree *, tree *, tree *);
67 static tree vect_build_loop_niters (loop_vec_info);
68 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
70 static void vect_update_init_of_dr (struct data_reference *, tree niters);
71 static void vect_update_inits_of_drs (loop_vec_info, tree);
72 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
73 static void vect_do_peeling_for_loop_bound
74 (loop_vec_info, tree *, struct loops *);
77 /* Function vect_get_new_vect_var.
79 Returns a name for a new variable. The current naming scheme appends the
80 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
81 the name of vectorizer generated variables, and appends that to NAME if
82 provided. */
84 static tree
85 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
87 const char *prefix;
88 tree new_vect_var;
90 switch (var_kind)
92 case vect_simple_var:
93 prefix = "vect_";
94 break;
95 case vect_scalar_var:
96 prefix = "stmp_";
97 break;
98 case vect_pointer_var:
99 prefix = "vect_p";
100 break;
101 default:
102 gcc_unreachable ();
105 if (name)
106 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
107 else
108 new_vect_var = create_tmp_var (type, prefix);
110 return new_vect_var;
114 /* Function vect_create_index_for_vector_ref.
116 Create (and return) an index variable, along with it's update chain in the
117 loop. This variable will be used to access a memory location in a vector
118 operation.
120 Input:
121 LOOP: The loop being vectorized.
122 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
123 function can be added here, or in the loop pre-header.
125 Output:
126 Return an index that will be used to index a vector array. It is expected
127 that a pointer to the first vector will be used as the base address for the
128 indexed reference.
130 FORNOW: we are not trying to be efficient, just creating a new index each
131 time from scratch. At this time all vector references could use the same
132 index.
134 TODO: create only one index to be used by all vector references. Record
135 the index in the LOOP_VINFO the first time this procedure is called and
136 return it on subsequent calls. The increment of this index must be placed
137 just before the conditional expression that ends the single block loop. */
139 static tree
140 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
142 tree init, step;
143 block_stmt_iterator incr_bsi;
144 bool insert_after;
145 tree indx_before_incr, indx_after_incr;
146 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
147 tree incr;
149 /* It is assumed that the base pointer used for vectorized access contains
150 the address of the first vector. Therefore the index used for vectorized
151 access must be initialized to zero and incremented by 1. */
153 init = integer_zero_node;
154 step = integer_one_node;
156 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
157 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
158 &indx_before_incr, &indx_after_incr);
159 incr = bsi_stmt (incr_bsi);
160 set_stmt_info ((tree_ann_t)stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
162 return indx_before_incr;
166 /* Function vect_create_addr_base_for_vector_ref.
168 Create an expression that computes the address of the first memory location
169 that will be accessed for a data reference.
171 Input:
172 STMT: The statement containing the data reference.
173 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
174 OFFSET: Optional. If supplied, it is be added to the initial address.
176 Output:
177 1. Return an SSA_NAME whose value is the address of the memory location of
178 the first vector of the data reference.
179 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
180 these statement(s) which define the returned SSA_NAME.
182 FORNOW: We are only handling array accesses with step 1. */
184 static tree
185 vect_create_addr_base_for_vector_ref (tree stmt,
186 tree *new_stmt_list,
187 tree offset)
189 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
190 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
191 tree data_ref_base =
192 unshare_expr (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
193 tree base_name = build_fold_indirect_ref (data_ref_base);
194 tree ref = DR_REF (dr);
195 tree scalar_type = TREE_TYPE (ref);
196 tree scalar_ptr_type = build_pointer_type (scalar_type);
197 tree vec_stmt;
198 tree new_temp;
199 tree addr_base, addr_expr;
200 tree dest, new_stmt;
201 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
203 /* Create base_offset */
204 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
205 add_referenced_tmp_var (dest);
206 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
207 append_to_statement_list_force (new_stmt, new_stmt_list);
209 if (offset)
211 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
212 add_referenced_tmp_var (tmp);
213 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
214 STMT_VINFO_VECT_STEP (stmt_info));
215 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
216 base_offset, offset);
217 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
218 append_to_statement_list_force (new_stmt, new_stmt_list);
221 /* base + base_offset */
222 addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
223 base_offset);
225 /* addr_expr = addr_base */
226 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
227 get_name (base_name));
228 add_referenced_tmp_var (addr_expr);
229 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
230 new_temp = make_ssa_name (addr_expr, vec_stmt);
231 TREE_OPERAND (vec_stmt, 0) = new_temp;
232 append_to_statement_list_force (vec_stmt, new_stmt_list);
234 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
236 fprintf (vect_dump, "created ");
237 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
239 return new_temp;
243 /* Function vect_align_data_ref.
245 Handle misalignment of a memory accesses.
247 FORNOW: Can't handle misaligned accesses.
248 Make sure that the dataref is aligned. */
250 static void
251 vect_align_data_ref (tree stmt)
253 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
254 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
256 /* FORNOW: can't handle misaligned accesses;
257 all accesses expected to be aligned. */
258 gcc_assert (aligned_access_p (dr));
262 /* Function vect_create_data_ref_ptr.
264 Create a memory reference expression for vector access, to be used in a
265 vector load/store stmt. The reference is based on a new pointer to vector
266 type (vp).
268 Input:
269 1. STMT: a stmt that references memory. Expected to be of the form
270 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
271 2. BSI: block_stmt_iterator where new stmts can be added.
272 3. OFFSET (optional): an offset to be added to the initial address accessed
273 by the data-ref in STMT.
274 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
275 pointing to the initial address.
277 Output:
278 1. Declare a new ptr to vector_type, and have it point to the base of the
279 data reference (initial addressed accessed by the data reference).
280 For example, for vector of type V8HI, the following code is generated:
282 v8hi *vp;
283 vp = (v8hi *)initial_address;
285 if OFFSET is not supplied:
286 initial_address = &a[init];
287 if OFFSET is supplied:
288 initial_address = &a[init + OFFSET];
290 Return the initial_address in INITIAL_ADDRESS.
292 2. Create a data-reference in the loop based on the new vector pointer vp,
293 and using a new index variable 'idx' as follows:
295 vp' = vp + update
297 where if ONLY_INIT is true:
298 update = zero
299 and otherwise
300 update = idx + vector_type_size
302 Return the pointer vp'.
305 FORNOW: handle only aligned and consecutive accesses. */
307 static tree
308 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
309 tree *initial_address, bool only_init)
311 tree base_name;
312 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
313 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
314 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
315 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
316 tree vect_ptr_type;
317 tree vect_ptr;
318 tree tag;
319 tree new_temp;
320 tree vec_stmt;
321 tree new_stmt_list = NULL_TREE;
322 tree idx;
323 edge pe = loop_preheader_edge (loop);
324 basic_block new_bb;
325 tree vect_ptr_init;
326 tree vectype_size;
327 tree ptr_update;
328 tree data_ref_ptr;
329 tree type, tmp, size;
331 base_name = build_fold_indirect_ref (unshare_expr (
332 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info)));
334 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
336 tree data_ref_base = base_name;
337 fprintf (vect_dump, "create array_ref of type: ");
338 print_generic_expr (vect_dump, vectype, TDF_SLIM);
339 if (TREE_CODE (data_ref_base) == VAR_DECL)
340 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
341 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
342 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
343 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
344 fprintf (vect_dump, " vectorizing a record based array ref: ");
345 else if (TREE_CODE (data_ref_base) == SSA_NAME)
346 fprintf (vect_dump, " vectorizing a pointer ref: ");
347 print_generic_expr (vect_dump, base_name, TDF_SLIM);
350 /** (1) Create the new vector-pointer variable: **/
352 vect_ptr_type = build_pointer_type (vectype);
353 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
354 get_name (base_name));
355 add_referenced_tmp_var (vect_ptr);
358 /** (2) Add aliasing information to the new vector-pointer:
359 (The points-to info (SSA_NAME_PTR_INFO) may be defined later.) **/
361 tag = STMT_VINFO_MEMTAG (stmt_info);
362 gcc_assert (tag);
364 /* If tag is a variable (and NOT_A_TAG) than a new type alias
365 tag must be created with tag added to its may alias list. */
366 if (var_ann (tag)->mem_tag_kind == NOT_A_TAG)
367 new_type_alias (vect_ptr, tag);
368 else
369 var_ann (vect_ptr)->type_mem_tag = tag;
371 var_ann (vect_ptr)->subvars = STMT_VINFO_SUBVARS (stmt_info);
373 /** (3) Calculate the initial address the vector-pointer, and set
374 the vector-pointer to point to it before the loop: **/
376 /* Create: (&(base[init_val+offset]) in the loop preheader. */
377 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
378 offset);
379 pe = loop_preheader_edge (loop);
380 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
381 gcc_assert (!new_bb);
382 *initial_address = new_temp;
384 /* Create: p = (vectype *) initial_base */
385 vec_stmt = fold_convert (vect_ptr_type, new_temp);
386 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
387 new_temp = make_ssa_name (vect_ptr, vec_stmt);
388 TREE_OPERAND (vec_stmt, 0) = new_temp;
389 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
390 gcc_assert (!new_bb);
391 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
394 /** (4) Handle the updating of the vector-pointer inside the loop: **/
396 if (only_init) /* No update in loop is required. */
398 /* Copy the points-to information if it exists. */
399 if (STMT_VINFO_PTR_INFO (stmt_info))
400 duplicate_ssa_name_ptr_info (vect_ptr_init,
401 STMT_VINFO_PTR_INFO (stmt_info));
402 return vect_ptr_init;
405 idx = vect_create_index_for_vector_ref (loop_vinfo);
407 /* Create: update = idx * vectype_size */
408 tmp = create_tmp_var (integer_type_node, "update");
409 add_referenced_tmp_var (tmp);
410 size = TYPE_SIZE (vect_ptr_type);
411 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
412 ptr_update = create_tmp_var (type, "update");
413 add_referenced_tmp_var (ptr_update);
414 vectype_size = TYPE_SIZE_UNIT (vectype);
415 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
416 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
417 new_temp = make_ssa_name (tmp, vec_stmt);
418 TREE_OPERAND (vec_stmt, 0) = new_temp;
419 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
420 vec_stmt = fold_convert (type, new_temp);
421 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
422 new_temp = make_ssa_name (ptr_update, vec_stmt);
423 TREE_OPERAND (vec_stmt, 0) = new_temp;
424 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
426 /* Create: data_ref_ptr = vect_ptr_init + update */
427 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
428 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
429 new_temp = make_ssa_name (vect_ptr, vec_stmt);
430 TREE_OPERAND (vec_stmt, 0) = new_temp;
431 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
432 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
434 /* Copy the points-to information if it exists. */
435 if (STMT_VINFO_PTR_INFO (stmt_info))
436 duplicate_ssa_name_ptr_info (data_ref_ptr, STMT_VINFO_PTR_INFO (stmt_info));
437 return data_ref_ptr;
441 /* Function vect_create_destination_var.
443 Create a new temporary of type VECTYPE. */
445 static tree
446 vect_create_destination_var (tree scalar_dest, tree vectype)
448 tree vec_dest;
449 const char *new_name;
450 tree type;
451 enum vect_var_kind kind;
453 kind = vectype ? vect_simple_var : vect_scalar_var;
454 type = vectype ? vectype : TREE_TYPE (scalar_dest);
456 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
458 new_name = get_name (scalar_dest);
459 if (!new_name)
460 new_name = "var_";
461 vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
462 add_referenced_tmp_var (vec_dest);
464 return vec_dest;
468 /* Function vect_init_vector.
470 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
471 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
472 used in the vectorization of STMT. */
474 static tree
475 vect_init_vector (tree stmt, tree vector_var)
477 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
478 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
479 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
480 tree new_var;
481 tree init_stmt;
482 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
483 tree vec_oprnd;
484 edge pe;
485 tree new_temp;
486 basic_block new_bb;
488 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
489 add_referenced_tmp_var (new_var);
491 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
492 new_temp = make_ssa_name (new_var, init_stmt);
493 TREE_OPERAND (init_stmt, 0) = new_temp;
495 pe = loop_preheader_edge (loop);
496 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
497 gcc_assert (!new_bb);
499 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
501 fprintf (vect_dump, "created new init_stmt: ");
502 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
505 vec_oprnd = TREE_OPERAND (init_stmt, 0);
506 return vec_oprnd;
510 /* Function vect_get_vec_def_for_operand.
512 OP is an operand in STMT. This function returns a (vector) def that will be
513 used in the vectorized stmt for STMT.
515 In the case that OP is an SSA_NAME which is defined in the loop, then
516 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
518 In case OP is an invariant or constant, a new stmt that creates a vector def
519 needs to be introduced. */
521 static tree
522 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
524 tree vec_oprnd;
525 tree vec_stmt;
526 tree def_stmt;
527 stmt_vec_info def_stmt_info = NULL;
528 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
529 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
530 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
531 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
532 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
533 tree vec_inv;
534 tree vec_cst;
535 tree t = NULL_TREE;
536 tree def;
537 int i;
538 enum vect_def_type dt;
539 bool is_simple_use;
541 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
543 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
544 print_generic_expr (vect_dump, op, TDF_SLIM);
547 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
548 gcc_assert (is_simple_use);
549 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
551 if (def)
553 fprintf (vect_dump, "def = ");
554 print_generic_expr (vect_dump, def, TDF_SLIM);
556 if (def_stmt)
558 fprintf (vect_dump, " def_stmt = ");
559 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
563 switch (dt)
565 /* Case 1: operand is a constant. */
566 case vect_constant_def:
568 if (scalar_def)
569 *scalar_def = op;
571 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
572 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
573 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
575 for (i = nunits - 1; i >= 0; --i)
577 t = tree_cons (NULL_TREE, op, t);
579 vec_cst = build_vector (vectype, t);
580 return vect_init_vector (stmt, vec_cst);
583 /* Case 2: operand is defined outside the loop - loop invariant. */
584 case vect_invariant_def:
586 if (scalar_def)
587 *scalar_def = def;
589 /* Create 'vec_inv = {inv,inv,..,inv}' */
590 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
591 fprintf (vect_dump, "Create vector_inv.");
593 for (i = nunits - 1; i >= 0; --i)
595 t = tree_cons (NULL_TREE, def, t);
598 vec_inv = build_constructor (vectype, t);
599 return vect_init_vector (stmt, vec_inv);
602 /* Case 3: operand is defined inside the loop. */
603 case vect_loop_def:
605 if (scalar_def)
606 *scalar_def = def_stmt;
608 /* Get the def from the vectorized stmt. */
609 def_stmt_info = vinfo_for_stmt (def_stmt);
610 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
611 gcc_assert (vec_stmt);
612 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
613 return vec_oprnd;
616 /* Case 4: operand is defined by a loop header phi - reduction */
617 case vect_reduction_def:
619 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
621 /* Get the def before the loop */
622 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
623 return get_initial_def_for_reduction (stmt, op, scalar_def);
626 /* Case 5: operand is defined by loop-header phi - induction. */
627 case vect_induction_def:
629 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
630 fprintf (vect_dump, "induction - unsupported.");
631 internal_error ("no support for induction"); /* FORNOW */
634 default:
635 gcc_unreachable ();
640 /* Function vect_finish_stmt_generation.
642 Insert a new stmt. */
644 static void
645 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
647 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
649 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
651 fprintf (vect_dump, "add new stmt: ");
652 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
655 /* Make sure bsi points to the stmt that is being vectorized. */
656 gcc_assert (stmt == bsi_stmt (*bsi));
658 #ifdef USE_MAPPED_LOCATION
659 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
660 #else
661 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
662 #endif
666 #define ADJUST_IN_EPILOG 1
668 /* Function get_initial_def_for_reduction
670 Input:
671 STMT - a stmt that performs a reduction operation in the loop.
672 INIT_VAL - the initial value of the reduction variable
674 Output:
675 SCALAR_DEF - a tree that holds a value to be added to the final result
676 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
677 Return a vector variable, initialized according to the operation that STMT
678 performs. This vector will be used as the initial value of the
679 vector of partial results.
681 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
682 add: [0,0,...,0,0]
683 mult: [1,1,...,1,1]
684 min/max: [init_val,init_val,..,init_val,init_val]
685 bit and/or: [init_val,init_val,..,init_val,init_val]
686 and when necessary (e.g. add/mult case) let the caller know
687 that it needs to adjust the result by init_val.
689 Option2: Initialize the vector as follows:
690 add: [0,0,...,0,init_val]
691 mult: [1,1,...,1,init_val]
692 min/max: [init_val,init_val,...,init_val]
693 bit and/or: [init_val,init_val,...,init_val]
694 and no adjustments are needed.
696 For example, for the following code:
698 s = init_val;
699 for (i=0;i<n;i++)
700 s = s + a[i];
702 STMT is 's = s + a[i]', and the reduction variable is 's'.
703 For a vector of 4 units, we want to return either [0,0,0,init_val],
704 or [0,0,0,0] and let the caller know that it needs to adjust
705 the result at the end by 'init_val'.
707 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
708 TODO: Use some cost-model to estimate which scheme is more profitable.
711 static tree
712 get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
714 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
715 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
716 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
717 int nelements;
718 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
719 tree type = TREE_TYPE (init_val);
720 tree def;
721 tree vec, t = NULL_TREE;
722 bool need_epilog_adjust;
723 int i;
725 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
727 switch (code)
729 case PLUS_EXPR:
730 if (INTEGRAL_TYPE_P (type))
731 def = build_int_cst (type, 0);
732 else
733 def = build_real (type, dconst0);
735 #ifdef ADJUST_IN_EPILOG
736 /* All the 'nunits' elements are set to 0. The final result will be
737 adjusted by 'init_val' at the loop epilog. */
738 nelements = nunits;
739 need_epilog_adjust = true;
740 #else
741 /* 'nunits - 1' elements are set to 0; The last element is set to
742 'init_val'. No further adjustments at the epilog are needed. */
743 nelements = nunits - 1;
744 need_epilog_adjust = false;
745 #endif
746 break;
748 case MIN_EXPR:
749 case MAX_EXPR:
750 def = init_val;
751 nelements = nunits;
752 need_epilog_adjust = true;
753 break;
755 default:
756 gcc_unreachable ();
759 for (i = nelements - 1; i >= 0; --i)
760 t = tree_cons (NULL_TREE, def, t);
762 if (nelements == nunits - 1)
764 /* Set the last element of the vector. */
765 t = tree_cons (NULL_TREE, init_val, t);
766 nelements += 1;
768 gcc_assert (nelements == nunits);
770 if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
771 vec = build_vector (vectype, t);
772 else
773 vec = build_constructor (vectype, t);
775 if (!need_epilog_adjust)
777 if (INTEGRAL_TYPE_P (type))
778 init_val = build_int_cst (type, 0);
779 else
780 init_val = build_real (type, dconst0);
782 *scalar_def = init_val;
784 return vect_init_vector (stmt, vec);
788 /* Function vect_create_epilog_for_reduction:
790 Create code at the loop-epilog to finalize the result of a reduction
791 computation.
793 LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it
794 into a single result, by applying the operation REDUC_CODE on the
795 partial-results-vector. For this, we need to create a new phi node at the
796 loop exit to preserve loop-closed form, as illustrated below.
798 STMT is the original scalar reduction stmt that is being vectorized.
799 REDUCTION_OP is the scalar reduction-variable.
800 REDUCTION_PHI is the phi-node that carries the reduction computation.
801 This function also sets the arguments for the REDUCTION_PHI:
802 The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP.
803 The loop-latch argument is VECT_DEF - the vector of partial sums.
805 This function transforms this:
807 loop:
808 vec_def = phi <null, null> # REDUCTION_PHI
809 ....
810 VECT_DEF = ...
812 loop_exit:
813 s_out0 = phi <s_loop> # EXIT_PHI
815 use <s_out0>
816 use <s_out0>
818 Into:
820 loop:
821 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
822 ....
823 VECT_DEF = ...
825 loop_exit:
826 s_out0 = phi <s_loop> # EXIT_PHI
827 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
829 v_out2 = reduc_expr <v_out1>
830 s_out3 = extract_field <v_out2, 0>
832 use <s_out3>
833 use <s_out3>
836 static void
837 vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
838 enum tree_code reduc_code, tree reduction_phi)
840 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
841 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
842 enum machine_mode mode = TYPE_MODE (vectype);
843 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
844 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
845 basic_block exit_bb;
846 tree scalar_dest = TREE_OPERAND (stmt, 0);
847 tree scalar_type = TREE_TYPE (scalar_dest);
848 tree new_phi;
849 block_stmt_iterator exit_bsi;
850 tree vec_dest;
851 tree new_temp;
852 tree new_name;
853 tree epilog_stmt;
854 tree new_scalar_dest, exit_phi;
855 tree bitsize, bitpos, bytesize;
856 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
857 tree scalar_initial_def;
858 tree vec_initial_def;
859 tree orig_name;
860 imm_use_iterator imm_iter;
861 use_operand_p use_p;
862 bool extract_scalar_result;
863 bool adjust_in_epilog;
865 /*** 1. Create the reduction def-use cycle ***/
867 /* 1.1 set the loop-entry arg of the reduction-phi: */
868 /* For the case of reduction, vect_get_vec_def_for_operand returns
869 the scalar def before the loop, that defines the initial value
870 of the reduction variable. */
871 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
872 &scalar_initial_def);
873 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
876 /* 1.2 set the loop-latch arg for the reduction-phi: */
877 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
879 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
881 fprintf (vect_dump, "transform reduction: created def-use cycle:");
882 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
883 fprintf (vect_dump, "\n");
884 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
888 /*** 2. Create epilog code ***/
890 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
891 v_out1 = phi <v_loop> */
893 exit_bb = loop->single_exit->dest;
894 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
895 SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
897 exit_bsi = bsi_start (exit_bb);
900 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
901 bitsize = TYPE_SIZE (scalar_type);
902 bytesize = TYPE_SIZE_UNIT (scalar_type);
904 /* 2.2 Create the reduction code. */
906 if (reduc_code < NUM_TREE_CODES)
908 /*** Case 1: Create:
909 v_out2 = reduc_expr <v_out1> */
911 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
912 fprintf (vect_dump, "Reduce using direct vector reduction.");
914 vec_dest = vect_create_destination_var (scalar_dest, vectype);
915 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
916 build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
917 new_temp = make_ssa_name (vec_dest, epilog_stmt);
918 TREE_OPERAND (epilog_stmt, 0) = new_temp;
919 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
921 extract_scalar_result = true;
922 adjust_in_epilog = true;
924 else
926 enum tree_code shift_code;
927 bool have_whole_vector_shift = true;
928 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); /* CHECKME */
929 int bit_offset;
930 int element_bitsize = tree_low_cst (bitsize, 1);
931 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
932 tree vec_temp;
934 /* The result of the reduction is expected to be at the least
935 significant bits of the vector. This is merely convention,
936 as it's the extraction later that really matters, and that
937 is also under our control. */
938 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
939 shift_code = VEC_RSHIFT_EXPR;
940 else
941 have_whole_vector_shift = false;
943 if (have_whole_vector_shift)
945 /*** Case 2:
946 for (offset = VS/2; offset >= element_size; offset/=2)
948 Create: va' = vec_shift <va, offset>
949 Create: va = vop <va, va'>
950 } */
952 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
953 fprintf (vect_dump, "Reduce using vector shifts");
955 vec_dest = vect_create_destination_var (scalar_dest, vectype);
956 new_temp = PHI_RESULT (new_phi);
958 for (bit_offset = vec_size_in_bits/2;
959 bit_offset >= element_bitsize;
960 bit_offset /= 2)
962 tree bitpos = size_int (bit_offset);
964 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
965 build2 (shift_code, vectype, new_temp, bitpos));
966 new_name = make_ssa_name (vec_dest, epilog_stmt);
967 TREE_OPERAND (epilog_stmt, 0) = new_name;
968 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
969 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
970 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
973 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
974 build2 (code, vectype, new_name, new_temp));
975 new_temp = make_ssa_name (vec_dest, epilog_stmt);
976 TREE_OPERAND (epilog_stmt, 0) = new_temp;
977 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
978 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
979 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
982 extract_scalar_result = true;
983 adjust_in_epilog = true;
985 else
987 /*** Case 3:
988 Create: s = init;
989 for (offset=0; offset<vector_size; offset+=element_size;)
991 Create: s' = extract_field <v_out2, offset>
992 Create: s = op <s, s'>
993 } */
995 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
996 fprintf (vect_dump, "Reduce using scalar code. ");
998 vec_temp = PHI_RESULT (new_phi);
999 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1001 /* first iteration is peeled out when possible to minimize
1002 the number of operations we generate: */
1003 if (code == PLUS_EXPR
1004 && (integer_zerop (scalar_initial_def)
1005 || real_zerop (scalar_initial_def)))
1007 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1008 build3 (BIT_FIELD_REF, scalar_type,
1009 vec_temp, bitsize, bitsize_zero_node));
1010 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1011 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1012 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1013 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1014 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1016 bit_offset = element_bitsize;
1018 else
1020 new_temp = scalar_initial_def;
1021 bit_offset = 0;
1024 for (;
1025 bit_offset < vec_size_in_bits;
1026 bit_offset += element_bitsize)
1028 tree bitpos = bitsize_int (bit_offset);
1030 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1031 build3 (BIT_FIELD_REF, scalar_type,
1032 vec_temp, bitsize, bitpos));
1033 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1034 TREE_OPERAND (epilog_stmt, 0) = new_name;
1035 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1036 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1037 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1040 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1041 build2 (code, scalar_type, new_name, new_temp));
1042 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1043 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1044 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1045 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1046 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1049 extract_scalar_result = false;
1050 adjust_in_epilog = false;
1055 /* 2.3 Extract the final scalar result. Create:
1056 s_out3 = extract_field <v_out2, bitpos> */
1058 if (extract_scalar_result)
1060 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1061 fprintf (vect_dump, "extract scalar result");
1063 /* The result is in the low order bits. */
1064 if (BITS_BIG_ENDIAN)
1065 bitpos = size_binop (MULT_EXPR,
1066 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1067 TYPE_SIZE (scalar_type));
1068 else
1069 bitpos = bitsize_zero_node;
1071 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1072 build3 (BIT_FIELD_REF, scalar_type,
1073 new_temp, bitsize, bitpos));
1074 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1075 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1076 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1077 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1078 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1082 /* 2.4 Adjust the final result by the initial value of the reduction
1083 variable. (when such adjustment is not needed, then
1084 'scalar_initial_def' is zero).
1086 Create:
1087 s_out = scalar_expr <s_out, scalar_initial_def> */
1089 if (adjust_in_epilog)
1091 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1092 build2 (code, scalar_type, new_temp, scalar_initial_def));
1093 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1094 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1095 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1097 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1098 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1102 /* 2.5 Replace uses of s_out0 with uses of s_out3 */
1104 /* Find the loop-closed-use at the loop exit of the original
1105 scalar result. (The reduction result is expected to have
1106 two immediate uses - one at the latch block, and one at the
1107 loop exit). */
1108 exit_phi = NULL;
1109 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1111 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1113 exit_phi = USE_STMT (use_p);
1114 break;
1118 orig_name = PHI_RESULT (exit_phi);
1120 FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name)
1121 SET_USE (use_p, new_temp);
1125 /* Function vectorizable_reduction.
1127 Check if STMT performs a reduction operation that can be vectorized.
1128 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1129 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1130 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1132 bool
1133 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1135 tree vec_dest;
1136 tree scalar_dest;
1137 tree op0, op1;
1138 tree loop_vec_def;
1139 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1140 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1141 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1142 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1143 tree operation;
1144 enum tree_code code, reduc_code = 0;
1145 enum machine_mode vec_mode;
1146 int op_type;
1147 optab optab, reduc_optab;
1148 tree new_temp;
1149 tree def0, def1, def_stmt0, def_stmt1;
1150 enum vect_def_type dt0, dt1;
1151 tree new_phi;
1152 tree scalar_type;
1153 bool is_simple_use0;
1154 bool is_simple_use1;
1156 /* Is vectorizable reduction? */
1158 /* Not supportable if the reduction variable is used in the loop. */
1159 if (STMT_VINFO_RELEVANT_P (stmt_info))
1160 return false;
1162 if (!STMT_VINFO_LIVE_P (stmt_info))
1163 return false;
1165 /* Make sure it was already recognized as a reduction pattern. */
1166 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1167 return false;
1169 gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1171 operation = TREE_OPERAND (stmt, 1);
1172 code = TREE_CODE (operation);
1173 op_type = TREE_CODE_LENGTH (code);
1175 if (op_type != binary_op)
1176 return false;
1178 op0 = TREE_OPERAND (operation, 0);
1179 op1 = TREE_OPERAND (operation, 1);
1180 scalar_dest = TREE_OPERAND (stmt, 0);
1181 scalar_type = TREE_TYPE (scalar_dest);
1183 /* Check the first operand. It is expected to be defined inside the loop. */
1184 is_simple_use0 =
1185 vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0);
1186 is_simple_use1 =
1187 vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1);
1189 gcc_assert (is_simple_use0);
1190 gcc_assert (is_simple_use1);
1191 gcc_assert (dt0 == vect_loop_def);
1192 gcc_assert (dt1 == vect_reduction_def);
1193 gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE);
1194 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt1));
1196 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt1)))
1197 return false;
1199 /* Supportable by target? */
1201 /* check support for the operation in the loop */
1202 optab = optab_for_tree_code (code, vectype);
1203 if (!optab)
1205 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1206 fprintf (vect_dump, "no optab.");
1207 return false;
1209 vec_mode = TYPE_MODE (vectype);
1210 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1212 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1213 fprintf (vect_dump, "op not supported by target.");
1214 return false;
1217 /* check support for the epilog operation */
1218 if (!reduction_code_for_scalar_code (code, &reduc_code))
1219 return false;
1220 reduc_optab = optab_for_tree_code (reduc_code, vectype);
1221 if (!reduc_optab)
1223 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1224 fprintf (vect_dump, "no optab for reduction.");
1225 reduc_code = NUM_TREE_CODES;
1227 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1229 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1230 fprintf (vect_dump, "reduc op not supported by target.");
1231 reduc_code = NUM_TREE_CODES;
1234 if (!vec_stmt) /* transformation not required. */
1236 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1237 return true;
1240 /** Transform. **/
1242 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1243 fprintf (vect_dump, "transform reduction.");
1245 /* Create the destination vector */
1246 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1249 /* Create the reduction-phi that defines the reduction-operand. */
1250 new_phi = create_phi_node (vec_dest, loop->header);
1253 /* Prepare the operand that is defined inside the loop body */
1254 loop_vec_def = vect_get_vec_def_for_operand (op0, stmt, NULL);
1255 gcc_assert (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (loop_vec_def))));
1258 /* Create the vectorized operation that computes the partial results */
1259 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1260 build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi)));
1261 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1262 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1263 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1266 /* Finalize the reduction-phi (set it's arguments) and create the
1267 epilog reduction code. */
1268 vect_create_epilog_for_reduction (new_temp, stmt, op1, reduc_code, new_phi);
1269 return true;
1273 /* Function vectorizable_assignment.
1275 Check if STMT performs an assignment (copy) that can be vectorized.
1276 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1277 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1278 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1280 bool
1281 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1283 tree vec_dest;
1284 tree scalar_dest;
1285 tree op;
1286 tree vec_oprnd;
1287 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1288 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1289 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1290 tree new_temp;
1291 tree def, def_stmt;
1292 enum vect_def_type dt;
1294 /* Is vectorizable assignment? */
1295 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1296 return false;
1298 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1300 if (TREE_CODE (stmt) != MODIFY_EXPR)
1301 return false;
1303 scalar_dest = TREE_OPERAND (stmt, 0);
1304 if (TREE_CODE (scalar_dest) != SSA_NAME)
1305 return false;
1307 op = TREE_OPERAND (stmt, 1);
1308 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1310 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1311 fprintf (vect_dump, "use not simple.");
1312 return false;
1315 if (!vec_stmt) /* transformation not required. */
1317 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1318 return true;
1321 /** Transform. **/
1322 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1323 fprintf (vect_dump, "transform assignment.");
1325 /* Handle def. */
1326 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1328 /* Handle use. */
1329 op = TREE_OPERAND (stmt, 1);
1330 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1332 /* Arguments are ready. create the new vector stmt. */
1333 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1334 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1335 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1336 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1338 return true;
1342 /* Function vect_min_worthwhile_factor.
1344 For a loop where we could vectorize the operation indicated by CODE,
1345 return the minimum vectorization factor that makes it worthwhile
1346 to use generic vectors. */
1347 static int
1348 vect_min_worthwhile_factor (enum tree_code code)
1350 switch (code)
1352 case PLUS_EXPR:
1353 case MINUS_EXPR:
1354 case NEGATE_EXPR:
1355 return 4;
1357 case BIT_AND_EXPR:
1358 case BIT_IOR_EXPR:
1359 case BIT_XOR_EXPR:
1360 case BIT_NOT_EXPR:
1361 return 2;
1363 default:
1364 return INT_MAX;
1369 /* Function vectorizable_operation.
1371 Check if STMT performs a binary or unary operation that can be vectorized.
1372 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1373 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1374 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1376 bool
1377 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1379 tree vec_dest;
1380 tree scalar_dest;
1381 tree operation;
1382 tree op0, op1 = NULL;
1383 tree vec_oprnd0, vec_oprnd1=NULL;
1384 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1385 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1386 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1387 int i;
1388 enum tree_code code;
1389 enum machine_mode vec_mode;
1390 tree new_temp;
1391 int op_type;
1392 tree op;
1393 optab optab;
1394 tree def, def_stmt;
1395 enum vect_def_type dt;
1397 /* Is STMT a vectorizable binary/unary operation? */
1398 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1399 return false;
1401 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1403 if (STMT_VINFO_LIVE_P (stmt_info))
1405 /* FORNOW: not yet supported. */
1406 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
1407 fprintf (vect_dump, "value used after loop.");
1408 return false;
1411 if (TREE_CODE (stmt) != MODIFY_EXPR)
1412 return false;
1414 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1415 return false;
1417 operation = TREE_OPERAND (stmt, 1);
1418 code = TREE_CODE (operation);
1419 optab = optab_for_tree_code (code, vectype);
1421 /* Support only unary or binary operations. */
1422 op_type = TREE_CODE_LENGTH (code);
1423 if (op_type != unary_op && op_type != binary_op)
1425 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1426 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1427 return false;
1430 for (i = 0; i < op_type; i++)
1432 op = TREE_OPERAND (operation, i);
1433 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1435 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1436 fprintf (vect_dump, "use not simple.");
1437 return false;
1441 /* Supportable by target? */
1442 if (!optab)
1444 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1445 fprintf (vect_dump, "no optab.");
1446 return false;
1448 vec_mode = TYPE_MODE (vectype);
1449 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1451 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1452 fprintf (vect_dump, "op not supported by target.");
1453 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1454 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1455 < vect_min_worthwhile_factor (code))
1456 return false;
1457 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1458 fprintf (vect_dump, "proceeding using word mode.");
1461 /* Worthwhile without SIMD support? */
1462 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1463 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1464 < vect_min_worthwhile_factor (code))
1466 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1467 fprintf (vect_dump, "not worthwhile without SIMD support.");
1468 return false;
1471 if (!vec_stmt) /* transformation not required. */
1473 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1474 return true;
1477 /** Transform. **/
1479 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1480 fprintf (vect_dump, "transform binary/unary operation.");
1482 /* Handle def. */
1483 scalar_dest = TREE_OPERAND (stmt, 0);
1484 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1486 /* Handle uses. */
1487 op0 = TREE_OPERAND (operation, 0);
1488 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1490 if (op_type == binary_op)
1492 op1 = TREE_OPERAND (operation, 1);
1493 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1496 /* Arguments are ready. create the new vector stmt. */
1498 if (op_type == binary_op)
1499 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1500 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1501 else
1502 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1503 build1 (code, vectype, vec_oprnd0));
1504 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1505 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1506 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1508 return true;
1512 /* Function vectorizable_store.
1514 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1515 can be vectorized.
1516 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1517 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1518 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1520 bool
1521 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1523 tree scalar_dest;
1524 tree data_ref;
1525 tree op;
1526 tree vec_oprnd1;
1527 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1528 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1529 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1530 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1531 enum machine_mode vec_mode;
1532 tree dummy;
1533 enum dr_alignment_support alignment_support_cheme;
1534 ssa_op_iter iter;
1535 tree def, def_stmt;
1536 enum vect_def_type dt;
1538 /* Is vectorizable store? */
1540 if (TREE_CODE (stmt) != MODIFY_EXPR)
1541 return false;
1543 scalar_dest = TREE_OPERAND (stmt, 0);
1544 if (TREE_CODE (scalar_dest) != ARRAY_REF
1545 && TREE_CODE (scalar_dest) != INDIRECT_REF)
1546 return false;
1548 op = TREE_OPERAND (stmt, 1);
1549 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1551 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1552 fprintf (vect_dump, "use not simple.");
1553 return false;
1556 vec_mode = TYPE_MODE (vectype);
1557 /* FORNOW. In some cases can vectorize even if data-type not supported
1558 (e.g. - array initialization with 0). */
1559 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1560 return false;
1562 if (!STMT_VINFO_DATA_REF (stmt_info))
1563 return false;
1566 if (!vec_stmt) /* transformation not required. */
1568 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1569 return true;
1572 /** Transform. **/
1574 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1575 fprintf (vect_dump, "transform store");
1577 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1578 gcc_assert (alignment_support_cheme);
1579 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
1581 /* Handle use - get the vectorized def from the defining stmt. */
1582 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1584 /* Handle def. */
1585 /* FORNOW: make sure the data reference is aligned. */
1586 vect_align_data_ref (stmt);
1587 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1588 data_ref = build_fold_indirect_ref (data_ref);
1590 /* Arguments are ready. create the new vector stmt. */
1591 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1592 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1594 /* Copy the V_MAY_DEFS representing the aliasing of the original array
1595 element's definition to the vector's definition then update the
1596 defining statement. The original is being deleted so the same
1597 SSA_NAMEs can be used. */
1598 copy_virtual_operands (*vec_stmt, stmt);
1600 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
1602 SSA_NAME_DEF_STMT (def) = *vec_stmt;
1604 /* If this virtual def has a use outside the loop and a loop peel is
1605 performed then the def may be renamed by the peel. Mark it for
1606 renaming so the later use will also be renamed. */
1607 mark_sym_for_renaming (SSA_NAME_VAR (def));
1610 return true;
1614 /* vectorizable_load.
1616 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1617 can be vectorized.
1618 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1619 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1620 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1622 bool
1623 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1625 tree scalar_dest;
1626 tree vec_dest = NULL;
1627 tree data_ref = NULL;
1628 tree op;
1629 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1630 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1631 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1632 tree new_temp;
1633 int mode;
1634 tree init_addr;
1635 tree new_stmt;
1636 tree dummy;
1637 basic_block new_bb;
1638 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1639 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1640 edge pe = loop_preheader_edge (loop);
1641 enum dr_alignment_support alignment_support_cheme;
1643 /* Is vectorizable load? */
1644 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1645 return false;
1647 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1649 if (STMT_VINFO_LIVE_P (stmt_info))
1651 /* FORNOW: not yet supported. */
1652 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
1653 fprintf (vect_dump, "value used after loop.");
1654 return false;
1657 if (TREE_CODE (stmt) != MODIFY_EXPR)
1658 return false;
1660 scalar_dest = TREE_OPERAND (stmt, 0);
1661 if (TREE_CODE (scalar_dest) != SSA_NAME)
1662 return false;
1664 op = TREE_OPERAND (stmt, 1);
1665 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1666 return false;
1668 if (!STMT_VINFO_DATA_REF (stmt_info))
1669 return false;
1671 mode = (int) TYPE_MODE (vectype);
1673 /* FORNOW. In some cases can vectorize even if data-type not supported
1674 (e.g. - data copies). */
1675 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1677 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
1678 fprintf (vect_dump, "Aligned load, but unsupported type.");
1679 return false;
1682 if (!vec_stmt) /* transformation not required. */
1684 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1685 return true;
1688 /** Transform. **/
1690 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1691 fprintf (vect_dump, "transform load.");
1693 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1694 gcc_assert (alignment_support_cheme);
1696 if (alignment_support_cheme == dr_aligned
1697 || alignment_support_cheme == dr_unaligned_supported)
1699 /* Create:
1700 p = initial_addr;
1701 indx = 0;
1702 loop {
1703 vec_dest = *(p);
1704 indx = indx + 1;
1708 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1709 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1710 if (aligned_access_p (dr))
1711 data_ref = build_fold_indirect_ref (data_ref);
1712 else
1714 int mis = DR_MISALIGNMENT (dr);
1715 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1716 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1717 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1719 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1720 new_temp = make_ssa_name (vec_dest, new_stmt);
1721 TREE_OPERAND (new_stmt, 0) = new_temp;
1722 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1723 copy_virtual_operands (new_stmt, stmt);
1725 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1727 /* Create:
1728 p1 = initial_addr;
1729 msq_init = *(floor(p1))
1730 p2 = initial_addr + VS - 1;
1731 magic = have_builtin ? builtin_result : initial_address;
1732 indx = 0;
1733 loop {
1734 p2' = p2 + indx * vectype_size
1735 lsq = *(floor(p2'))
1736 vec_dest = realign_load (msq, lsq, magic)
1737 indx = indx + 1;
1738 msq = lsq;
1742 tree offset;
1743 tree magic;
1744 tree phi_stmt;
1745 tree msq_init;
1746 tree msq, lsq;
1747 tree dataref_ptr;
1748 tree params;
1750 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1751 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1752 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1753 &init_addr, true);
1754 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1755 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1756 new_temp = make_ssa_name (vec_dest, new_stmt);
1757 TREE_OPERAND (new_stmt, 0) = new_temp;
1758 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1759 gcc_assert (!new_bb);
1760 msq_init = TREE_OPERAND (new_stmt, 0);
1761 copy_virtual_operands (new_stmt, stmt);
1762 update_vuses_to_preheader (new_stmt, loop);
1765 /* <2> Create lsq = *(floor(p2')) in the loop */
1766 offset = build_int_cst (integer_type_node,
1767 TYPE_VECTOR_SUBPARTS (vectype));
1768 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1769 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1770 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1771 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1772 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1773 new_temp = make_ssa_name (vec_dest, new_stmt);
1774 TREE_OPERAND (new_stmt, 0) = new_temp;
1775 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1776 lsq = TREE_OPERAND (new_stmt, 0);
1777 copy_virtual_operands (new_stmt, stmt);
1780 /* <3> */
1781 if (targetm.vectorize.builtin_mask_for_load)
1783 /* Create permutation mask, if required, in loop preheader. */
1784 tree builtin_decl;
1785 params = build_tree_list (NULL_TREE, init_addr);
1786 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1787 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1788 new_stmt = build_function_call_expr (builtin_decl, params);
1789 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1790 new_temp = make_ssa_name (vec_dest, new_stmt);
1791 TREE_OPERAND (new_stmt, 0) = new_temp;
1792 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1793 gcc_assert (!new_bb);
1794 magic = TREE_OPERAND (new_stmt, 0);
1796 /* The result of the CALL_EXPR to this builtin is determined from
1797 the value of the parameter and no global variables are touched
1798 which makes the builtin a "const" function. Requiring the
1799 builtin to have the "const" attribute makes it unnecessary
1800 to call mark_call_clobbered_vars_to_rename. */
1801 gcc_assert (TREE_READONLY (builtin_decl));
1803 else
1805 /* Use current address instead of init_addr for reduced reg pressure.
1807 magic = dataref_ptr;
1811 /* <4> Create msq = phi <msq_init, lsq> in loop */
1812 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1813 msq = make_ssa_name (vec_dest, NULL_TREE);
1814 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1815 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1816 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1817 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1820 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1821 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1822 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1823 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1824 new_temp = make_ssa_name (vec_dest, new_stmt);
1825 TREE_OPERAND (new_stmt, 0) = new_temp;
1826 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1828 else
1829 gcc_unreachable ();
1831 *vec_stmt = new_stmt;
1832 return true;
1836 /* Function vectorizable_live_operation.
1838 STMT computes a value that is used outside the loop. Check if
1839 it can be supported. */
1841 bool
1842 vectorizable_live_operation (tree stmt,
1843 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1844 tree *vec_stmt ATTRIBUTE_UNUSED)
1846 tree operation;
1847 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1848 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1849 int i;
1850 enum tree_code code;
1851 int op_type;
1852 tree op;
1853 tree def, def_stmt;
1854 enum vect_def_type dt;
1856 if (!STMT_VINFO_LIVE_P (stmt_info))
1857 return false;
1859 if (TREE_CODE (stmt) != MODIFY_EXPR)
1860 return false;
1862 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1863 return false;
1865 operation = TREE_OPERAND (stmt, 1);
1866 code = TREE_CODE (operation);
1868 op_type = TREE_CODE_LENGTH (code);
1870 /* FORNOW: support only if all uses are invariant. This means
1871 that the scalar operations can remain in place, unvectorized.
1872 The original last scalar value that they compute will be used. */
1874 for (i = 0; i < op_type; i++)
1876 op = TREE_OPERAND (operation, i);
1877 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1879 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1880 fprintf (vect_dump, "use not simple.");
1881 return false;
1884 if (dt != vect_invariant_def && dt != vect_constant_def)
1885 return false;
1888 /* No transformation is required for the cases we currently support. */
1889 return true;
1893 /* Function vect_is_simple_cond.
1895 Input:
1896 LOOP - the loop that is being vectorized.
1897 COND - Condition that is checked for simple use.
1899 Returns whether a COND can be vectorized. Checks whether
1900 condition operands are supportable using vec_is_simple_use. */
1902 static bool
1903 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
1905 tree lhs, rhs;
1906 tree def;
1907 enum vect_def_type dt;
1909 if (!COMPARISON_CLASS_P (cond))
1910 return false;
1912 lhs = TREE_OPERAND (cond, 0);
1913 rhs = TREE_OPERAND (cond, 1);
1915 if (TREE_CODE (lhs) == SSA_NAME)
1917 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
1918 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
1919 return false;
1921 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
1922 return false;
1924 if (TREE_CODE (rhs) == SSA_NAME)
1926 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
1927 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
1928 return false;
1930 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
1931 return false;
1933 return true;
1936 /* vectorizable_condition.
1938 Check if STMT is conditional modify expression that can be vectorized.
1939 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1940 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
1941 at BSI.
1943 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1945 bool
1946 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1948 tree scalar_dest = NULL_TREE;
1949 tree vec_dest = NULL_TREE;
1950 tree op = NULL_TREE;
1951 tree cond_expr, then_clause, else_clause;
1952 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1953 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1954 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
1955 tree vec_compare, vec_cond_expr;
1956 tree new_temp;
1957 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1958 enum machine_mode vec_mode;
1959 tree def;
1960 enum vect_def_type dt;
1962 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1963 return false;
1965 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1967 if (STMT_VINFO_LIVE_P (stmt_info))
1969 /* FORNOW: not yet supported. */
1970 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
1971 fprintf (vect_dump, "value used after loop.");
1972 return false;
1975 if (TREE_CODE (stmt) != MODIFY_EXPR)
1976 return false;
1978 op = TREE_OPERAND (stmt, 1);
1980 if (TREE_CODE (op) != COND_EXPR)
1981 return false;
1983 cond_expr = TREE_OPERAND (op, 0);
1984 then_clause = TREE_OPERAND (op, 1);
1985 else_clause = TREE_OPERAND (op, 2);
1987 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
1988 return false;
1990 if (TREE_CODE (then_clause) == SSA_NAME)
1992 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
1993 if (!vect_is_simple_use (then_clause, loop_vinfo,
1994 &then_def_stmt, &def, &dt))
1995 return false;
1997 else if (TREE_CODE (then_clause) != INTEGER_CST
1998 && TREE_CODE (then_clause) != REAL_CST)
1999 return false;
2001 if (TREE_CODE (else_clause) == SSA_NAME)
2003 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
2004 if (!vect_is_simple_use (else_clause, loop_vinfo,
2005 &else_def_stmt, &def, &dt))
2006 return false;
2008 else if (TREE_CODE (else_clause) != INTEGER_CST
2009 && TREE_CODE (else_clause) != REAL_CST)
2010 return false;
2013 vec_mode = TYPE_MODE (vectype);
2015 if (!vec_stmt)
2017 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
2018 return expand_vec_cond_expr_p (op, vec_mode);
2021 /* Transform */
2023 /* Handle def. */
2024 scalar_dest = TREE_OPERAND (stmt, 0);
2025 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2027 /* Handle cond expr. */
2028 vec_cond_lhs =
2029 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
2030 vec_cond_rhs =
2031 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
2032 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
2033 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
2035 /* Arguments are ready. create the new vector stmt. */
2036 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
2037 vec_cond_lhs, vec_cond_rhs);
2038 vec_cond_expr = build (VEC_COND_EXPR, vectype,
2039 vec_compare, vec_then_clause, vec_else_clause);
2041 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
2042 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2043 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2044 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2046 return true;
2049 /* Function vect_transform_stmt.
2051 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2053 bool
2054 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2056 bool is_store = false;
2057 tree vec_stmt = NULL_TREE;
2058 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2059 bool done;
2061 if (STMT_VINFO_RELEVANT_P (stmt_info))
2063 switch (STMT_VINFO_TYPE (stmt_info))
2065 case op_vec_info_type:
2066 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2067 gcc_assert (done);
2068 break;
2070 case assignment_vec_info_type:
2071 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2072 gcc_assert (done);
2073 break;
2075 case load_vec_info_type:
2076 done = vectorizable_load (stmt, bsi, &vec_stmt);
2077 gcc_assert (done);
2078 break;
2080 case store_vec_info_type:
2081 done = vectorizable_store (stmt, bsi, &vec_stmt);
2082 gcc_assert (done);
2083 is_store = true;
2084 break;
2086 case condition_vec_info_type:
2087 done = vectorizable_condition (stmt, bsi, &vec_stmt);
2088 gcc_assert (done);
2089 break;
2091 default:
2092 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2093 fprintf (vect_dump, "stmt not supported.");
2094 gcc_unreachable ();
2097 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2100 if (STMT_VINFO_LIVE_P (stmt_info))
2102 switch (STMT_VINFO_TYPE (stmt_info))
2104 case reduc_vec_info_type:
2105 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
2106 gcc_assert (done);
2107 break;
2109 default:
2110 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
2111 gcc_assert (done);
2114 if (vec_stmt)
2116 gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
2117 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2121 return is_store;
2125 /* This function builds ni_name = number of iterations loop executes
2126 on the loop preheader. */
2128 static tree
2129 vect_build_loop_niters (loop_vec_info loop_vinfo)
2131 tree ni_name, stmt, var;
2132 edge pe;
2133 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2134 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2136 var = create_tmp_var (TREE_TYPE (ni), "niters");
2137 add_referenced_tmp_var (var);
2138 ni_name = force_gimple_operand (ni, &stmt, false, var);
2140 pe = loop_preheader_edge (loop);
2141 if (stmt)
2143 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2144 gcc_assert (!new_bb);
2147 return ni_name;
2151 /* This function generates the following statements:
2153 ni_name = number of iterations loop executes
2154 ratio = ni_name / vf
2155 ratio_mult_vf_name = ratio * vf
2157 and places them at the loop preheader edge. */
2159 static void
2160 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2161 tree *ni_name_ptr,
2162 tree *ratio_mult_vf_name_ptr,
2163 tree *ratio_name_ptr)
2166 edge pe;
2167 basic_block new_bb;
2168 tree stmt, ni_name;
2169 tree var;
2170 tree ratio_name;
2171 tree ratio_mult_vf_name;
2172 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2173 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2174 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2175 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
2177 pe = loop_preheader_edge (loop);
2179 /* Generate temporary variable that contains
2180 number of iterations loop executes. */
2182 ni_name = vect_build_loop_niters (loop_vinfo);
2184 /* Create: ratio = ni >> log2(vf) */
2186 var = create_tmp_var (TREE_TYPE (ni), "bnd");
2187 add_referenced_tmp_var (var);
2188 ratio_name = make_ssa_name (var, NULL_TREE);
2189 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2190 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2191 SSA_NAME_DEF_STMT (ratio_name) = stmt;
2193 pe = loop_preheader_edge (loop);
2194 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2195 gcc_assert (!new_bb);
2197 /* Create: ratio_mult_vf = ratio << log2 (vf). */
2199 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2200 add_referenced_tmp_var (var);
2201 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2202 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2203 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2204 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2206 pe = loop_preheader_edge (loop);
2207 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2208 gcc_assert (!new_bb);
2210 *ni_name_ptr = ni_name;
2211 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2212 *ratio_name_ptr = ratio_name;
2214 return;
2218 /* Function update_vuses_to_preheader.
2220 Input:
2221 STMT - a statement with potential VUSEs.
2222 LOOP - the loop whose preheader will contain STMT.
2224 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
2225 appears to be defined in a V_MAY_DEF in another statement in a loop.
2226 One such case is when the VUSE is at the dereference of a __restricted__
2227 pointer in a load and the V_MAY_DEF is at the dereference of a different
2228 __restricted__ pointer in a store. Vectorization may result in
2229 copy_virtual_uses being called to copy the problematic VUSE to a new
2230 statement that is being inserted in the loop preheader. This procedure
2231 is called to change the SSA_NAME in the new statement's VUSE from the
2232 SSA_NAME updated in the loop to the related SSA_NAME available on the
2233 path entering the loop.
2235 When this function is called, we have the following situation:
2237 # vuse <name1>
2238 S1: vload
2239 do {
2240 # name1 = phi < name0 , name2>
2242 # vuse <name1>
2243 S2: vload
2245 # name2 = vdef <name1>
2246 S3: vstore
2248 }while...
2250 Stmt S1 was created in the loop preheader block as part of misaligned-load
2251 handling. This function fixes the name of the vuse of S1 from 'name1' to
2252 'name0'. */
2254 static void
2255 update_vuses_to_preheader (tree stmt, struct loop *loop)
2257 basic_block header_bb = loop->header;
2258 edge preheader_e = loop_preheader_edge (loop);
2259 ssa_op_iter iter;
2260 use_operand_p use_p;
2262 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
2264 tree ssa_name = USE_FROM_PTR (use_p);
2265 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
2266 tree name_var = SSA_NAME_VAR (ssa_name);
2267 basic_block bb = bb_for_stmt (def_stmt);
2269 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
2270 if (!IS_EMPTY_STMT (def_stmt)
2271 && flow_bb_inside_loop_p (loop, bb))
2273 /* If the block containing the statement defining the SSA_NAME
2274 is in the loop then it's necessary to find the definition
2275 outside the loop using the PHI nodes of the header. */
2276 tree phi;
2277 bool updated = false;
2279 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
2281 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
2283 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
2284 updated = true;
2285 break;
2288 gcc_assert (updated);
2294 /* Function vect_update_ivs_after_vectorizer.
2296 "Advance" the induction variables of LOOP to the value they should take
2297 after the execution of LOOP. This is currently necessary because the
2298 vectorizer does not handle induction variables that are used after the
2299 loop. Such a situation occurs when the last iterations of LOOP are
2300 peeled, because:
2301 1. We introduced new uses after LOOP for IVs that were not originally used
2302 after LOOP: the IVs of LOOP are now used by an epilog loop.
2303 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2304 times, whereas the loop IVs should be bumped N times.
2306 Input:
2307 - LOOP - a loop that is going to be vectorized. The last few iterations
2308 of LOOP were peeled.
2309 - NITERS - the number of iterations that LOOP executes (before it is
2310 vectorized). i.e, the number of times the ivs should be bumped.
2311 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2312 coming out from LOOP on which there are uses of the LOOP ivs
2313 (this is the path from LOOP->exit to epilog_loop->preheader).
2315 The new definitions of the ivs are placed in LOOP->exit.
2316 The phi args associated with the edge UPDATE_E in the bb
2317 UPDATE_E->dest are updated accordingly.
2319 Assumption 1: Like the rest of the vectorizer, this function assumes
2320 a single loop exit that has a single predecessor.
2322 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2323 organized in the same order.
2325 Assumption 3: The access function of the ivs is simple enough (see
2326 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2328 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2329 coming out of LOOP on which the ivs of LOOP are used (this is the path
2330 that leads to the epilog loop; other paths skip the epilog loop). This
2331 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2332 needs to have its phis updated.
2335 static void
2336 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
2337 edge update_e)
2339 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2340 basic_block exit_bb = loop->single_exit->dest;
2341 tree phi, phi1;
2342 basic_block update_bb = update_e->dest;
2344 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
2346 /* Make sure there exists a single-predecessor exit bb: */
2347 gcc_assert (single_pred_p (exit_bb));
2349 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2350 phi && phi1;
2351 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2353 tree access_fn = NULL;
2354 tree evolution_part;
2355 tree init_expr;
2356 tree step_expr;
2357 tree var, stmt, ni, ni_name;
2358 block_stmt_iterator last_bsi;
2360 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2362 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
2363 print_generic_expr (vect_dump, phi, TDF_SLIM);
2366 /* Skip virtual phi's. */
2367 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2369 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2370 fprintf (vect_dump, "virtual phi. skip.");
2371 continue;
2374 /* Skip reduction phis. */
2375 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2377 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2378 fprintf (vect_dump, "reduc phi. skip.");
2379 continue;
2382 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2383 gcc_assert (access_fn);
2384 evolution_part =
2385 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2386 gcc_assert (evolution_part != NULL_TREE);
2388 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2389 of degree >= 2 or exponential. */
2390 gcc_assert (!tree_is_chrec (evolution_part));
2392 step_expr = evolution_part;
2393 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2394 loop->num));
2396 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2397 build2 (MULT_EXPR, TREE_TYPE (niters),
2398 niters, step_expr), init_expr);
2400 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2401 add_referenced_tmp_var (var);
2403 ni_name = force_gimple_operand (ni, &stmt, false, var);
2405 /* Insert stmt into exit_bb. */
2406 last_bsi = bsi_last (exit_bb);
2407 if (stmt)
2408 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2410 /* Fix phi expressions in the successor bb. */
2411 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
2416 /* Function vect_do_peeling_for_loop_bound
2418 Peel the last iterations of the loop represented by LOOP_VINFO.
2419 The peeled iterations form a new epilog loop. Given that the loop now
2420 iterates NITERS times, the new epilog loop iterates
2421 NITERS % VECTORIZATION_FACTOR times.
2423 The original loop will later be made to iterate
2424 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
2426 static void
2427 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2428 struct loops *loops)
2430 tree ni_name, ratio_mult_vf_name;
2431 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2432 struct loop *new_loop;
2433 edge update_e;
2434 basic_block preheader;
2435 int loop_num;
2437 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2438 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
2440 initialize_original_copy_tables ();
2442 /* Generate the following variables on the preheader of original loop:
2444 ni_name = number of iteration the original loop executes
2445 ratio = ni_name / vf
2446 ratio_mult_vf_name = ratio * vf */
2447 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2448 &ratio_mult_vf_name, ratio);
2450 loop_num = loop->num;
2451 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
2452 ratio_mult_vf_name, ni_name, false);
2453 gcc_assert (new_loop);
2454 gcc_assert (loop_num == loop->num);
2455 #ifdef ENABLE_CHECKING
2456 slpeel_verify_cfg_after_peeling (loop, new_loop);
2457 #endif
2459 /* A guard that controls whether the new_loop is to be executed or skipped
2460 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
2461 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
2462 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
2463 is on the path where the LOOP IVs are used and need to be updated. */
2465 preheader = loop_preheader_edge (new_loop)->src;
2466 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
2467 update_e = EDGE_PRED (preheader, 0);
2468 else
2469 update_e = EDGE_PRED (preheader, 1);
2471 /* Update IVs of original loop as if they were advanced
2472 by ratio_mult_vf_name steps. */
2473 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
2475 /* After peeling we have to reset scalar evolution analyzer. */
2476 scev_reset ();
2478 free_original_copy_tables ();
2482 /* Function vect_gen_niters_for_prolog_loop
2484 Set the number of iterations for the loop represented by LOOP_VINFO
2485 to the minimum between LOOP_NITERS (the original iteration count of the loop)
2486 and the misalignment of DR - the data reference recorded in
2487 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
2488 this loop, the data reference DR will refer to an aligned location.
2490 The following computation is generated:
2492 If the misalignment of DR is known at compile time:
2493 addr_mis = int mis = DR_MISALIGNMENT (dr);
2494 Else, compute address misalignment in bytes:
2495 addr_mis = addr & (vectype_size - 1)
2497 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
2499 (elem_size = element type size; an element is the scalar element
2500 whose type is the inner type of the vectype) */
2502 static tree
2503 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
2505 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2506 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2507 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2508 tree var, stmt;
2509 tree iters, iters_name;
2510 edge pe;
2511 basic_block new_bb;
2512 tree dr_stmt = DR_STMT (dr);
2513 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2514 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2515 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2516 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
2517 tree niters_type = TREE_TYPE (loop_niters);
2519 pe = loop_preheader_edge (loop);
2521 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2523 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2524 int element_size = vectype_align/vf;
2525 int elem_misalign = byte_misalign / element_size;
2527 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2528 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
2529 iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
2531 else
2533 tree new_stmts = NULL_TREE;
2534 tree start_addr =
2535 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
2536 tree ptr_type = TREE_TYPE (start_addr);
2537 tree size = TYPE_SIZE (ptr_type);
2538 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2539 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2540 tree elem_size_log =
2541 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
2542 tree vf_tree = build_int_cst (unsigned_type_node, vf);
2543 tree byte_misalign;
2544 tree elem_misalign;
2546 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
2547 gcc_assert (!new_bb);
2549 /* Create: byte_misalign = addr & (vectype_size - 1) */
2550 byte_misalign =
2551 build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
2553 /* Create: elem_misalign = byte_misalign / element_size */
2554 elem_misalign =
2555 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
2557 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
2558 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
2559 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
2560 iters = fold_convert (niters_type, iters);
2563 /* Create: prolog_loop_niters = min (iters, loop_niters) */
2564 /* If the loop bound is known at compile time we already verified that it is
2565 greater than vf; since the misalignment ('iters') is at most vf, there's
2566 no need to generate the MIN_EXPR in this case. */
2567 if (TREE_CODE (loop_niters) != INTEGER_CST)
2568 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
2570 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2572 fprintf (vect_dump, "niters for prolog loop: ");
2573 print_generic_expr (vect_dump, iters, TDF_SLIM);
2576 var = create_tmp_var (niters_type, "prolog_loop_niters");
2577 add_referenced_tmp_var (var);
2578 iters_name = force_gimple_operand (iters, &stmt, false, var);
2580 /* Insert stmt on loop preheader edge. */
2581 if (stmt)
2583 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2584 gcc_assert (!new_bb);
2587 return iters_name;
2591 /* Function vect_update_init_of_dr
2593 NITERS iterations were peeled from LOOP. DR represents a data reference
2594 in LOOP. This function updates the information recorded in DR to
2595 account for the fact that the first NITERS iterations had already been
2596 executed. Specifically, it updates the OFFSET field of stmt_info. */
2598 static void
2599 vect_update_init_of_dr (struct data_reference *dr, tree niters)
2601 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
2602 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
2604 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters,
2605 STMT_VINFO_VECT_STEP (stmt_info));
2606 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
2607 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
2611 /* Function vect_update_inits_of_drs
2613 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2614 This function updates the information recorded for the data references in
2615 the loop to account for the fact that the first NITERS iterations had
2616 already been executed. Specifically, it updates the initial_condition of the
2617 access_function of all the data_references in the loop. */
2619 static void
2620 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2622 unsigned int i;
2623 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2624 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2626 if (vect_dump && (dump_flags & TDF_DETAILS))
2627 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2629 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2631 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2632 vect_update_init_of_dr (dr, niters);
2635 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2637 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2638 vect_update_init_of_dr (dr, niters);
2643 /* Function vect_do_peeling_for_alignment
2645 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2646 'niters' is set to the misalignment of one of the data references in the
2647 loop, thereby forcing it to refer to an aligned location at the beginning
2648 of the execution of this loop. The data reference for which we are
2649 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
2651 static void
2652 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
2654 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2655 tree niters_of_prolog_loop, ni_name;
2656 tree n_iters;
2657 struct loop *new_loop;
2659 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2660 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2662 initialize_original_copy_tables ();
2664 ni_name = vect_build_loop_niters (loop_vinfo);
2665 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
2667 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2668 new_loop =
2669 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
2670 niters_of_prolog_loop, ni_name, true);
2671 gcc_assert (new_loop);
2672 #ifdef ENABLE_CHECKING
2673 slpeel_verify_cfg_after_peeling (new_loop, loop);
2674 #endif
2676 /* Update number of times loop executes. */
2677 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2678 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2679 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2681 /* Update the init conditions of the access functions of all data refs. */
2682 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2684 /* After peeling we have to reset scalar evolution analyzer. */
2685 scev_reset ();
2687 free_original_copy_tables ();
2691 /* Function vect_transform_loop.
2693 The analysis phase has determined that the loop is vectorizable.
2694 Vectorize the loop - created vectorized stmts to replace the scalar
2695 stmts in the loop, and update the loop exit condition. */
2697 void
2698 vect_transform_loop (loop_vec_info loop_vinfo,
2699 struct loops *loops ATTRIBUTE_UNUSED)
2701 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2702 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2703 int nbbs = loop->num_nodes;
2704 block_stmt_iterator si;
2705 int i;
2706 tree ratio = NULL;
2707 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2709 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2710 fprintf (vect_dump, "=== vec_transform_loop ===");
2712 /* Peel the loop if there are data refs with unknown alignment.
2713 Only one data ref with unknown store is allowed. */
2715 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
2716 vect_do_peeling_for_alignment (loop_vinfo, loops);
2718 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
2719 compile time constant), or it is a constant that doesn't divide by the
2720 vectorization factor, then an epilog loop needs to be created.
2721 We therefore duplicate the loop: the original loop will be vectorized,
2722 and will compute the first (n/VF) iterations. The second copy of the loop
2723 will remain scalar and will compute the remaining (n%VF) iterations.
2724 (VF is the vectorization factor). */
2726 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2727 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2728 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
2729 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
2730 else
2731 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
2732 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
2734 /* 1) Make sure the loop header has exactly two entries
2735 2) Make sure we have a preheader basic block. */
2737 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
2739 loop_split_edge_with (loop_preheader_edge (loop), NULL);
2742 /* FORNOW: the vectorizer supports only loops which body consist
2743 of one basic block (header + empty latch). When the vectorizer will
2744 support more involved loop forms, the order by which the BBs are
2745 traversed need to be reconsidered. */
2747 for (i = 0; i < nbbs; i++)
2749 basic_block bb = bbs[i];
2751 for (si = bsi_start (bb); !bsi_end_p (si);)
2753 tree stmt = bsi_stmt (si);
2754 stmt_vec_info stmt_info;
2755 bool is_store;
2757 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2759 fprintf (vect_dump, "------>vectorizing statement: ");
2760 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2762 stmt_info = vinfo_for_stmt (stmt);
2763 gcc_assert (stmt_info);
2764 if (!STMT_VINFO_RELEVANT_P (stmt_info)
2765 && !STMT_VINFO_LIVE_P (stmt_info))
2767 bsi_next (&si);
2768 continue;
2770 /* FORNOW: Verify that all stmts operate on the same number of
2771 units and no inner unrolling is necessary. */
2772 gcc_assert
2773 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
2774 == (unsigned HOST_WIDE_INT) vectorization_factor);
2776 /* -------- vectorize statement ------------ */
2777 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2778 fprintf (vect_dump, "transform statement.");
2780 is_store = vect_transform_stmt (stmt, &si);
2781 if (is_store)
2783 /* Free the attached stmt_vec_info and remove the stmt. */
2784 stmt_ann_t ann = stmt_ann (stmt);
2785 free (stmt_info);
2786 set_stmt_info ((tree_ann_t)ann, NULL);
2787 bsi_remove (&si);
2788 continue;
2791 bsi_next (&si);
2792 } /* stmts in BB */
2793 } /* BBs in loop */
2795 slpeel_make_loop_iterate_ntimes (loop, ratio);
2797 /* The memory tags and pointers in vectorized statements need to
2798 have their SSA forms updated. FIXME, why can't this be delayed
2799 until all the loops have been transformed? */
2800 update_ssa (TODO_update_ssa);
2802 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, LOOP_LOC (loop_vinfo)))
2803 fprintf (vect_dump, "LOOP VECTORIZED.");