Add files that I missed when importing NaCl changes earlier
[gcc/nacl-gcc.git] / gcc / tree-vect-transform.c
bloba5e4dcc5babd38f43c8a0a521bd979e0fe86a1bd
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "tree-data-ref.h"
39 #include "tree-chrec.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
43 #include "tree-pass.h"
44 #include "toplev.h"
45 #include "real.h"
47 /* Utility functions for the code transformation. */
48 static bool vect_transform_stmt (tree, block_stmt_iterator *);
49 static void vect_align_data_ref (tree);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, bool);
53 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
54 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
55 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
56 static tree vect_init_vector (tree, tree);
57 static void vect_finish_stmt_generation
58 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
59 static bool vect_is_simple_cond (tree, loop_vec_info);
60 static void update_vuses_to_preheader (tree, struct loop*);
61 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
62 static tree get_initial_def_for_reduction (tree, tree, tree *);
64 /* Utility function dealing with loop peeling (not peeling itself). */
65 static void vect_generate_tmps_on_preheader
66 (loop_vec_info, tree *, tree *, tree *);
67 static tree vect_build_loop_niters (loop_vec_info);
68 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
70 static void vect_update_init_of_dr (struct data_reference *, tree niters);
71 static void vect_update_inits_of_drs (loop_vec_info, tree);
72 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
73 static void vect_do_peeling_for_loop_bound
74 (loop_vec_info, tree *, struct loops *);
75 static int vect_min_worthwhile_factor (enum tree_code);
78 /* Function vect_get_new_vect_var.
80 Returns a name for a new variable. The current naming scheme appends the
81 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
82 the name of vectorizer generated variables, and appends that to NAME if
83 provided. */
85 static tree
86 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
88 const char *prefix;
89 tree new_vect_var;
91 switch (var_kind)
93 case vect_simple_var:
94 prefix = "vect_";
95 break;
96 case vect_scalar_var:
97 prefix = "stmp_";
98 break;
99 case vect_pointer_var:
100 prefix = "vect_p";
101 break;
102 default:
103 gcc_unreachable ();
106 if (name)
107 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
108 else
109 new_vect_var = create_tmp_var (type, prefix);
111 return new_vect_var;
115 /* Function vect_create_addr_base_for_vector_ref.
117 Create an expression that computes the address of the first memory location
118 that will be accessed for a data reference.
120 Input:
121 STMT: The statement containing the data reference.
122 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
123 OFFSET: Optional. If supplied, it is be added to the initial address.
125 Output:
126 1. Return an SSA_NAME whose value is the address of the memory location of
127 the first vector of the data reference.
128 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
129 these statement(s) which define the returned SSA_NAME.
131 FORNOW: We are only handling array accesses with step 1. */
133 static tree
134 vect_create_addr_base_for_vector_ref (tree stmt,
135 tree *new_stmt_list,
136 tree offset)
138 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
139 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
140 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
141 tree base_name = build_fold_indirect_ref (data_ref_base);
142 tree ref = DR_REF (dr);
143 tree scalar_type = TREE_TYPE (ref);
144 tree scalar_ptr_type = build_pointer_type (scalar_type);
145 tree vec_stmt;
146 tree new_temp;
147 tree addr_base, addr_expr;
148 tree dest, new_stmt;
149 tree base_offset = unshare_expr (DR_OFFSET (dr));
150 tree init = unshare_expr (DR_INIT (dr));
152 /* Create base_offset */
153 base_offset = size_binop (PLUS_EXPR, base_offset, init);
154 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
155 add_referenced_var (dest);
156 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
157 append_to_statement_list_force (new_stmt, new_stmt_list);
159 if (offset)
161 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
162 add_referenced_var (tmp);
163 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
164 DR_STEP (dr));
165 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
166 base_offset, offset);
167 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
168 append_to_statement_list_force (new_stmt, new_stmt_list);
171 /* base + base_offset */
172 addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
173 base_offset);
175 /* addr_expr = addr_base */
176 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
177 get_name (base_name));
178 add_referenced_var (addr_expr);
179 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
180 new_temp = make_ssa_name (addr_expr, vec_stmt);
181 TREE_OPERAND (vec_stmt, 0) = new_temp;
182 append_to_statement_list_force (vec_stmt, new_stmt_list);
184 if (vect_print_dump_info (REPORT_DETAILS))
186 fprintf (vect_dump, "created ");
187 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
189 return new_temp;
193 /* Function vect_align_data_ref.
195 Handle misalignment of a memory accesses.
197 FORNOW: Can't handle misaligned accesses.
198 Make sure that the dataref is aligned. */
200 static void
201 vect_align_data_ref (tree stmt)
203 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
204 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
206 /* FORNOW: can't handle misaligned accesses;
207 all accesses expected to be aligned. */
208 gcc_assert (aligned_access_p (dr));
212 /* Function vect_create_data_ref_ptr.
214 Create a memory reference expression for vector access, to be used in a
215 vector load/store stmt. The reference is based on a new pointer to vector
216 type (vp).
218 Input:
219 1. STMT: a stmt that references memory. Expected to be of the form
220 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
221 2. BSI: block_stmt_iterator where new stmts can be added.
222 3. OFFSET (optional): an offset to be added to the initial address accessed
223 by the data-ref in STMT.
224 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
225 pointing to the initial address.
227 Output:
228 1. Declare a new ptr to vector_type, and have it point to the base of the
229 data reference (initial addressed accessed by the data reference).
230 For example, for vector of type V8HI, the following code is generated:
232 v8hi *vp;
233 vp = (v8hi *)initial_address;
235 if OFFSET is not supplied:
236 initial_address = &a[init];
237 if OFFSET is supplied:
238 initial_address = &a[init + OFFSET];
240 Return the initial_address in INITIAL_ADDRESS.
242 2. If ONLY_INIT is true, return the initial pointer. Otherwise, create
243 a data-reference in the loop based on the new vector pointer vp. This
244 new data reference will by some means be updated each iteration of
245 the loop. Return the pointer vp'.
247 FORNOW: handle only aligned and consecutive accesses. */
249 static tree
250 vect_create_data_ref_ptr (tree stmt,
251 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
252 tree offset, tree *initial_address, bool only_init)
254 tree base_name;
255 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
256 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
257 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
258 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
259 tree vect_ptr_type;
260 tree vect_ptr;
261 tree tag;
262 tree new_temp;
263 tree vec_stmt;
264 tree new_stmt_list = NULL_TREE;
265 edge pe = loop_preheader_edge (loop);
266 basic_block new_bb;
267 tree vect_ptr_init;
268 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
270 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
272 if (vect_print_dump_info (REPORT_DETAILS))
274 tree data_ref_base = base_name;
275 fprintf (vect_dump, "create vector-pointer variable to type: ");
276 print_generic_expr (vect_dump, vectype, TDF_SLIM);
277 if (TREE_CODE (data_ref_base) == VAR_DECL)
278 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
279 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
280 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
281 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
282 fprintf (vect_dump, " vectorizing a record based array ref: ");
283 else if (TREE_CODE (data_ref_base) == SSA_NAME)
284 fprintf (vect_dump, " vectorizing a pointer ref: ");
285 print_generic_expr (vect_dump, base_name, TDF_SLIM);
288 /** (1) Create the new vector-pointer variable: **/
290 vect_ptr_type = build_pointer_type (vectype);
291 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
292 get_name (base_name));
293 add_referenced_var (vect_ptr);
296 /** (2) Add aliasing information to the new vector-pointer:
297 (The points-to info (DR_PTR_INFO) may be defined later.) **/
299 tag = DR_MEMTAG (dr);
300 gcc_assert (tag);
302 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
303 tag must be created with tag added to its may alias list. */
304 if (!MTAG_P (tag))
305 new_type_alias (vect_ptr, tag, DR_REF (dr));
306 else
307 var_ann (vect_ptr)->symbol_mem_tag = tag;
309 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
311 /** (3) Calculate the initial address the vector-pointer, and set
312 the vector-pointer to point to it before the loop: **/
314 /* Create: (&(base[init_val+offset]) in the loop preheader. */
315 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
316 offset);
317 pe = loop_preheader_edge (loop);
318 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
319 gcc_assert (!new_bb);
320 *initial_address = new_temp;
322 /* Create: p = (vectype *) initial_base */
323 vec_stmt = fold_convert (vect_ptr_type, new_temp);
324 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
325 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
326 TREE_OPERAND (vec_stmt, 0) = vect_ptr_init;
327 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
328 gcc_assert (!new_bb);
331 /** (4) Handle the updating of the vector-pointer inside the loop: **/
333 if (only_init) /* No update in loop is required. */
335 /* Copy the points-to information if it exists. */
336 if (DR_PTR_INFO (dr))
337 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
338 return vect_ptr_init;
340 else
342 block_stmt_iterator incr_bsi;
343 bool insert_after;
344 tree indx_before_incr, indx_after_incr;
345 tree incr;
347 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
348 create_iv (vect_ptr_init,
349 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
350 NULL_TREE, loop, &incr_bsi, insert_after,
351 &indx_before_incr, &indx_after_incr);
352 incr = bsi_stmt (incr_bsi);
353 set_stmt_info (stmt_ann (incr),
354 new_stmt_vec_info (incr, loop_vinfo));
356 /* Copy the points-to information if it exists. */
357 if (DR_PTR_INFO (dr))
359 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
360 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
362 merge_alias_info (vect_ptr_init, indx_before_incr);
363 merge_alias_info (vect_ptr_init, indx_after_incr);
365 return indx_before_incr;
370 /* Function vect_create_destination_var.
372 Create a new temporary of type VECTYPE. */
374 static tree
375 vect_create_destination_var (tree scalar_dest, tree vectype)
377 tree vec_dest;
378 const char *new_name;
379 tree type;
380 enum vect_var_kind kind;
382 kind = vectype ? vect_simple_var : vect_scalar_var;
383 type = vectype ? vectype : TREE_TYPE (scalar_dest);
385 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
387 new_name = get_name (scalar_dest);
388 if (!new_name)
389 new_name = "var_";
390 vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
391 add_referenced_var (vec_dest);
393 return vec_dest;
397 /* Function vect_init_vector.
399 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
400 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
401 used in the vectorization of STMT. */
403 static tree
404 vect_init_vector (tree stmt, tree vector_var)
406 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
407 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
408 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
409 tree new_var;
410 tree init_stmt;
411 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
412 tree vec_oprnd;
413 edge pe;
414 tree new_temp;
415 basic_block new_bb;
417 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
418 add_referenced_var (new_var);
420 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
421 new_temp = make_ssa_name (new_var, init_stmt);
422 TREE_OPERAND (init_stmt, 0) = new_temp;
424 pe = loop_preheader_edge (loop);
425 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
426 gcc_assert (!new_bb);
428 if (vect_print_dump_info (REPORT_DETAILS))
430 fprintf (vect_dump, "created new init_stmt: ");
431 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
434 vec_oprnd = TREE_OPERAND (init_stmt, 0);
435 return vec_oprnd;
439 /* Function vect_get_vec_def_for_operand.
441 OP is an operand in STMT. This function returns a (vector) def that will be
442 used in the vectorized stmt for STMT.
444 In the case that OP is an SSA_NAME which is defined in the loop, then
445 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
447 In case OP is an invariant or constant, a new stmt that creates a vector def
448 needs to be introduced. */
450 static tree
451 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
453 tree vec_oprnd;
454 tree vec_stmt;
455 tree def_stmt;
456 stmt_vec_info def_stmt_info = NULL;
457 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
458 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
459 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
460 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
461 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
462 tree vec_inv;
463 tree vec_cst;
464 tree t = NULL_TREE;
465 tree def;
466 int i;
467 enum vect_def_type dt;
468 bool is_simple_use;
470 if (vect_print_dump_info (REPORT_DETAILS))
472 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
473 print_generic_expr (vect_dump, op, TDF_SLIM);
476 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
477 gcc_assert (is_simple_use);
478 if (vect_print_dump_info (REPORT_DETAILS))
480 if (def)
482 fprintf (vect_dump, "def = ");
483 print_generic_expr (vect_dump, def, TDF_SLIM);
485 if (def_stmt)
487 fprintf (vect_dump, " def_stmt = ");
488 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
492 switch (dt)
494 /* Case 1: operand is a constant. */
495 case vect_constant_def:
497 if (scalar_def)
498 *scalar_def = op;
500 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
501 if (vect_print_dump_info (REPORT_DETAILS))
502 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
504 for (i = nunits - 1; i >= 0; --i)
506 t = tree_cons (NULL_TREE, op, t);
508 vec_cst = build_vector (vectype, t);
509 return vect_init_vector (stmt, vec_cst);
512 /* Case 2: operand is defined outside the loop - loop invariant. */
513 case vect_invariant_def:
515 if (scalar_def)
516 *scalar_def = def;
518 /* Create 'vec_inv = {inv,inv,..,inv}' */
519 if (vect_print_dump_info (REPORT_DETAILS))
520 fprintf (vect_dump, "Create vector_inv.");
522 for (i = nunits - 1; i >= 0; --i)
524 t = tree_cons (NULL_TREE, def, t);
527 /* FIXME: use build_constructor directly. */
528 vec_inv = build_constructor_from_list (vectype, t);
529 return vect_init_vector (stmt, vec_inv);
532 /* Case 3: operand is defined inside the loop. */
533 case vect_loop_def:
535 if (scalar_def)
536 *scalar_def = def_stmt;
538 /* Get the def from the vectorized stmt. */
539 def_stmt_info = vinfo_for_stmt (def_stmt);
540 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
541 gcc_assert (vec_stmt);
542 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
543 return vec_oprnd;
546 /* Case 4: operand is defined by a loop header phi - reduction */
547 case vect_reduction_def:
549 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
551 /* Get the def before the loop */
552 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
553 return get_initial_def_for_reduction (stmt, op, scalar_def);
556 /* Case 5: operand is defined by loop-header phi - induction. */
557 case vect_induction_def:
559 if (vect_print_dump_info (REPORT_DETAILS))
560 fprintf (vect_dump, "induction - unsupported.");
561 internal_error ("no support for induction"); /* FORNOW */
564 default:
565 gcc_unreachable ();
570 /* Function vect_finish_stmt_generation.
572 Insert a new stmt. */
574 static void
575 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
577 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
579 if (vect_print_dump_info (REPORT_DETAILS))
581 fprintf (vect_dump, "add new stmt: ");
582 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
585 /* Make sure bsi points to the stmt that is being vectorized. */
586 gcc_assert (stmt == bsi_stmt (*bsi));
588 #ifdef USE_MAPPED_LOCATION
589 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
590 #else
591 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
592 #endif
596 #define ADJUST_IN_EPILOG 1
598 /* Function get_initial_def_for_reduction
600 Input:
601 STMT - a stmt that performs a reduction operation in the loop.
602 INIT_VAL - the initial value of the reduction variable
604 Output:
605 SCALAR_DEF - a tree that holds a value to be added to the final result
606 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
607 Return a vector variable, initialized according to the operation that STMT
608 performs. This vector will be used as the initial value of the
609 vector of partial results.
611 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
612 add: [0,0,...,0,0]
613 mult: [1,1,...,1,1]
614 min/max: [init_val,init_val,..,init_val,init_val]
615 bit and/or: [init_val,init_val,..,init_val,init_val]
616 and when necessary (e.g. add/mult case) let the caller know
617 that it needs to adjust the result by init_val.
619 Option2: Initialize the vector as follows:
620 add: [0,0,...,0,init_val]
621 mult: [1,1,...,1,init_val]
622 min/max: [init_val,init_val,...,init_val]
623 bit and/or: [init_val,init_val,...,init_val]
624 and no adjustments are needed.
626 For example, for the following code:
628 s = init_val;
629 for (i=0;i<n;i++)
630 s = s + a[i];
632 STMT is 's = s + a[i]', and the reduction variable is 's'.
633 For a vector of 4 units, we want to return either [0,0,0,init_val],
634 or [0,0,0,0] and let the caller know that it needs to adjust
635 the result at the end by 'init_val'.
637 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
638 TODO: Use some cost-model to estimate which scheme is more profitable.
641 static tree
642 get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
644 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
645 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
646 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
647 int nelements;
648 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
649 tree type = TREE_TYPE (init_val);
650 tree def;
651 tree vec, t = NULL_TREE;
652 bool need_epilog_adjust;
653 int i;
655 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
657 switch (code)
659 case WIDEN_SUM_EXPR:
660 case DOT_PROD_EXPR:
661 case PLUS_EXPR:
662 if (INTEGRAL_TYPE_P (type))
663 def = build_int_cst (type, 0);
664 else
665 def = build_real (type, dconst0);
667 #ifdef ADJUST_IN_EPILOG
668 /* All the 'nunits' elements are set to 0. The final result will be
669 adjusted by 'init_val' at the loop epilog. */
670 nelements = nunits;
671 need_epilog_adjust = true;
672 #else
673 /* 'nunits - 1' elements are set to 0; The last element is set to
674 'init_val'. No further adjustments at the epilog are needed. */
675 nelements = nunits - 1;
676 need_epilog_adjust = false;
677 #endif
678 break;
680 case MIN_EXPR:
681 case MAX_EXPR:
682 def = init_val;
683 nelements = nunits;
684 need_epilog_adjust = false;
685 break;
687 default:
688 gcc_unreachable ();
691 for (i = nelements - 1; i >= 0; --i)
692 t = tree_cons (NULL_TREE, def, t);
694 if (nelements == nunits - 1)
696 /* Set the last element of the vector. */
697 t = tree_cons (NULL_TREE, init_val, t);
698 nelements += 1;
700 gcc_assert (nelements == nunits);
702 if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
703 vec = build_vector (vectype, t);
704 else
705 vec = build_constructor_from_list (vectype, t);
707 if (!need_epilog_adjust)
708 *scalar_def = NULL_TREE;
709 else
710 *scalar_def = init_val;
712 return vect_init_vector (stmt, vec);
716 /* Function vect_create_epilog_for_reduction
718 Create code at the loop-epilog to finalize the result of a reduction
719 computation.
721 VECT_DEF is a vector of partial results.
722 REDUC_CODE is the tree-code for the epilog reduction.
723 STMT is the scalar reduction stmt that is being vectorized.
724 REDUCTION_PHI is the phi-node that carries the reduction computation.
726 This function:
727 1. Creates the reduction def-use cycle: sets the the arguments for
728 REDUCTION_PHI:
729 The loop-entry argument is the vectorized initial-value of the reduction.
730 The loop-latch argument is VECT_DEF - the vector of partial sums.
731 2. "Reduces" the vector of partial results VECT_DEF into a single result,
732 by applying the operation specified by REDUC_CODE if available, or by
733 other means (whole-vector shifts or a scalar loop).
734 The function also creates a new phi node at the loop exit to preserve
735 loop-closed form, as illustrated below.
737 The flow at the entry to this function:
739 loop:
740 vec_def = phi <null, null> # REDUCTION_PHI
741 VECT_DEF = vector_stmt # vectorized form of STMT
742 s_loop = scalar_stmt # (scalar) STMT
743 loop_exit:
744 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
745 use <s_out0>
746 use <s_out0>
748 The above is transformed by this function into:
750 loop:
751 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
752 VECT_DEF = vector_stmt # vectorized form of STMT
753 s_loop = scalar_stmt # (scalar) STMT
754 loop_exit:
755 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
756 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
757 v_out2 = reduce <v_out1>
758 s_out3 = extract_field <v_out2, 0>
759 s_out4 = adjust_result <s_out3>
760 use <s_out4>
761 use <s_out4>
764 static void
765 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
766 enum tree_code reduc_code, tree reduction_phi)
768 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
769 tree vectype;
770 enum machine_mode mode;
771 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
772 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
773 basic_block exit_bb;
774 tree scalar_dest;
775 tree scalar_type;
776 tree new_phi;
777 block_stmt_iterator exit_bsi;
778 tree vec_dest;
779 tree new_temp;
780 tree new_name;
781 tree epilog_stmt;
782 tree new_scalar_dest, exit_phi;
783 tree bitsize, bitpos, bytesize;
784 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
785 tree scalar_initial_def;
786 tree vec_initial_def;
787 tree orig_name;
788 imm_use_iterator imm_iter;
789 use_operand_p use_p;
790 bool extract_scalar_result;
791 tree reduction_op;
792 tree orig_stmt;
793 tree use_stmt;
794 tree operation = TREE_OPERAND (stmt, 1);
795 int op_type;
797 op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
798 reduction_op = TREE_OPERAND (operation, op_type-1);
799 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
800 mode = TYPE_MODE (vectype);
802 /*** 1. Create the reduction def-use cycle ***/
804 /* 1.1 set the loop-entry arg of the reduction-phi: */
805 /* For the case of reduction, vect_get_vec_def_for_operand returns
806 the scalar def before the loop, that defines the initial value
807 of the reduction variable. */
808 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
809 &scalar_initial_def);
810 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
812 /* 1.2 set the loop-latch arg for the reduction-phi: */
813 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
815 if (vect_print_dump_info (REPORT_DETAILS))
817 fprintf (vect_dump, "transform reduction: created def-use cycle:");
818 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
819 fprintf (vect_dump, "\n");
820 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
824 /*** 2. Create epilog code
825 The reduction epilog code operates across the elements of the vector
826 of partial results computed by the vectorized loop.
827 The reduction epilog code consists of:
828 step 1: compute the scalar result in a vector (v_out2)
829 step 2: extract the scalar result (s_out3) from the vector (v_out2)
830 step 3: adjust the scalar result (s_out3) if needed.
832 Step 1 can be accomplished using one the following three schemes:
833 (scheme 1) using reduc_code, if available.
834 (scheme 2) using whole-vector shifts, if available.
835 (scheme 3) using a scalar loop. In this case steps 1+2 above are
836 combined.
838 The overall epilog code looks like this:
840 s_out0 = phi <s_loop> # original EXIT_PHI
841 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
842 v_out2 = reduce <v_out1> # step 1
843 s_out3 = extract_field <v_out2, 0> # step 2
844 s_out4 = adjust_result <s_out3> # step 3
846 (step 3 is optional, and step2 1 and 2 may be combined).
847 Lastly, the uses of s_out0 are replaced by s_out4.
849 ***/
851 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
852 v_out1 = phi <v_loop> */
854 exit_bb = loop->single_exit->dest;
855 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
856 SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
857 exit_bsi = bsi_start (exit_bb);
859 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
860 (i.e. when reduc_code is not available) and in the final adjustment code
861 (if needed). Also get the original scalar reduction variable as
862 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
863 represents a reduction pattern), the tree-code and scalar-def are
864 taken from the original stmt that the pattern-stmt (STMT) replaces.
865 Otherwise (it is a regular reduction) - the tree-code and scalar-def
866 are taken from STMT. */
868 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
869 if (!orig_stmt)
871 /* Regular reduction */
872 orig_stmt = stmt;
874 else
876 /* Reduction pattern */
877 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
878 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
879 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
881 code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
882 scalar_dest = TREE_OPERAND (orig_stmt, 0);
883 scalar_type = TREE_TYPE (scalar_dest);
884 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
885 bitsize = TYPE_SIZE (scalar_type);
886 bytesize = TYPE_SIZE_UNIT (scalar_type);
888 /* 2.3 Create the reduction code, using one of the three schemes described
889 above. */
891 if (reduc_code < NUM_TREE_CODES)
893 /*** Case 1: Create:
894 v_out2 = reduc_expr <v_out1> */
896 if (vect_print_dump_info (REPORT_DETAILS))
897 fprintf (vect_dump, "Reduce using direct vector reduction.");
899 vec_dest = vect_create_destination_var (scalar_dest, vectype);
900 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
901 build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
902 new_temp = make_ssa_name (vec_dest, epilog_stmt);
903 TREE_OPERAND (epilog_stmt, 0) = new_temp;
904 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
906 extract_scalar_result = true;
908 else
910 enum tree_code shift_code = 0;
911 bool have_whole_vector_shift = true;
912 int bit_offset;
913 int element_bitsize = tree_low_cst (bitsize, 1);
914 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
915 tree vec_temp;
917 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
918 shift_code = VEC_RSHIFT_EXPR;
919 else
920 have_whole_vector_shift = false;
922 /* Regardless of whether we have a whole vector shift, if we're
923 emulating the operation via tree-vect-generic, we don't want
924 to use it. Only the first round of the reduction is likely
925 to still be profitable via emulation. */
926 /* ??? It might be better to emit a reduction tree code here, so that
927 tree-vect-generic can expand the first round via bit tricks. */
928 if (!VECTOR_MODE_P (mode))
929 have_whole_vector_shift = false;
930 else
932 optab optab = optab_for_tree_code (code, vectype);
933 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
934 have_whole_vector_shift = false;
937 if (have_whole_vector_shift)
939 /*** Case 2: Create:
940 for (offset = VS/2; offset >= element_size; offset/=2)
942 Create: va' = vec_shift <va, offset>
943 Create: va = vop <va, va'>
944 } */
946 if (vect_print_dump_info (REPORT_DETAILS))
947 fprintf (vect_dump, "Reduce using vector shifts");
949 vec_dest = vect_create_destination_var (scalar_dest, vectype);
950 new_temp = PHI_RESULT (new_phi);
952 for (bit_offset = vec_size_in_bits/2;
953 bit_offset >= element_bitsize;
954 bit_offset /= 2)
956 tree bitpos = size_int (bit_offset);
958 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
959 build2 (shift_code, vectype, new_temp, bitpos));
960 new_name = make_ssa_name (vec_dest, epilog_stmt);
961 TREE_OPERAND (epilog_stmt, 0) = new_name;
962 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
964 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
965 build2 (code, vectype, new_name, new_temp));
966 new_temp = make_ssa_name (vec_dest, epilog_stmt);
967 TREE_OPERAND (epilog_stmt, 0) = new_temp;
968 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
971 extract_scalar_result = true;
973 else
975 tree rhs;
977 /*** Case 3: Create:
978 s = extract_field <v_out2, 0>
979 for (offset = element_size;
980 offset < vector_size;
981 offset += element_size;)
983 Create: s' = extract_field <v_out2, offset>
984 Create: s = op <s, s'>
985 } */
987 if (vect_print_dump_info (REPORT_DETAILS))
988 fprintf (vect_dump, "Reduce using scalar code. ");
990 vec_temp = PHI_RESULT (new_phi);
991 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
992 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
993 bitsize_zero_node);
994 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
995 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
996 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
997 TREE_OPERAND (epilog_stmt, 0) = new_temp;
998 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1000 for (bit_offset = element_bitsize;
1001 bit_offset < vec_size_in_bits;
1002 bit_offset += element_bitsize)
1004 tree bitpos = bitsize_int (bit_offset);
1005 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1006 bitpos);
1008 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1009 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1010 rhs);
1011 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1012 TREE_OPERAND (epilog_stmt, 0) = new_name;
1013 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1015 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1016 build2 (code, scalar_type, new_name, new_temp));
1017 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1018 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1019 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1022 extract_scalar_result = false;
1026 /* 2.4 Extract the final scalar result. Create:
1027 s_out3 = extract_field <v_out2, bitpos> */
1029 if (extract_scalar_result)
1031 tree rhs;
1033 if (vect_print_dump_info (REPORT_DETAILS))
1034 fprintf (vect_dump, "extract scalar result");
1036 if (BYTES_BIG_ENDIAN)
1037 bitpos = size_binop (MULT_EXPR,
1038 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1039 TYPE_SIZE (scalar_type));
1040 else
1041 bitpos = bitsize_zero_node;
1043 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1044 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1045 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1046 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1047 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1048 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1051 /* 2.4 Adjust the final result by the initial value of the reduction
1052 variable. (When such adjustment is not needed, then
1053 'scalar_initial_def' is zero).
1055 Create:
1056 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1058 if (scalar_initial_def)
1060 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1061 build2 (code, scalar_type, new_temp, scalar_initial_def));
1062 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1063 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1064 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1067 /* 2.6 Replace uses of s_out0 with uses of s_out3 */
1069 /* Find the loop-closed-use at the loop exit of the original scalar result.
1070 (The reduction result is expected to have two immediate uses - one at the
1071 latch block, and one at the loop exit). */
1072 exit_phi = NULL;
1073 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1075 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1077 exit_phi = USE_STMT (use_p);
1078 break;
1081 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1082 gcc_assert (exit_phi);
1083 /* Replace the uses: */
1084 orig_name = PHI_RESULT (exit_phi);
1085 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1086 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1087 SET_USE (use_p, new_temp);
1091 /* Function vectorizable_reduction.
1093 Check if STMT performs a reduction operation that can be vectorized.
1094 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1095 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1096 Return FALSE if not a vectorizable STMT, TRUE otherwise.
1098 This function also handles reduction idioms (patterns) that have been
1099 recognized in advance during vect_pattern_recog. In this case, STMT may be
1100 of this form:
1101 X = pattern_expr (arg0, arg1, ..., X)
1102 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1103 sequence that had been detected and replaced by the pattern-stmt (STMT).
1105 In some cases of reduction patterns, the type of the reduction variable X is
1106 different than the type of the other arguments of STMT.
1107 In such cases, the vectype that is used when transforming STMT into a vector
1108 stmt is different than the vectype that is used to determine the
1109 vectorization factor, because it consists of a different number of elements
1110 than the actual number of elements that are being operated upon in parallel.
1112 For example, consider an accumulation of shorts into an int accumulator.
1113 On some targets it's possible to vectorize this pattern operating on 8
1114 shorts at a time (hence, the vectype for purposes of determining the
1115 vectorization factor should be V8HI); on the other hand, the vectype that
1116 is used to create the vector form is actually V4SI (the type of the result).
1118 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1119 indicates what is the actual level of parallelism (V8HI in the example), so
1120 that the right vectorization factor would be derived. This vectype
1121 corresponds to the type of arguments to the reduction stmt, and should *NOT*
1122 be used to create the vectorized stmt. The right vectype for the vectorized
1123 stmt is obtained from the type of the result X:
1124 get_vectype_for_scalar_type (TREE_TYPE (X))
1126 This means that, contrary to "regular" reductions (or "regular" stmts in
1127 general), the following equation:
1128 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1129 does *NOT* necessarily hold for reduction patterns. */
1131 bool
1132 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1134 tree vec_dest;
1135 tree scalar_dest;
1136 tree op;
1137 tree loop_vec_def0, loop_vec_def1;
1138 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1139 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1140 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1141 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1142 tree operation;
1143 enum tree_code code, orig_code, epilog_reduc_code = 0;
1144 enum machine_mode vec_mode;
1145 int op_type;
1146 optab optab, reduc_optab;
1147 tree new_temp;
1148 tree def, def_stmt;
1149 enum vect_def_type dt;
1150 tree new_phi;
1151 tree scalar_type;
1152 bool is_simple_use;
1153 tree orig_stmt;
1154 stmt_vec_info orig_stmt_info;
1155 tree expr = NULL_TREE;
1156 int i;
1158 /* 1. Is vectorizable reduction? */
1160 /* Not supportable if the reduction variable is used in the loop. */
1161 if (STMT_VINFO_RELEVANT_P (stmt_info))
1162 return false;
1164 if (!STMT_VINFO_LIVE_P (stmt_info))
1165 return false;
1167 /* Make sure it was already recognized as a reduction computation. */
1168 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1169 return false;
1171 /* 2. Has this been recognized as a reduction pattern?
1173 Check if STMT represents a pattern that has been recognized
1174 in earlier analysis stages. For stmts that represent a pattern,
1175 the STMT_VINFO_RELATED_STMT field records the last stmt in
1176 the original sequence that constitutes the pattern. */
1178 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1179 if (orig_stmt)
1181 orig_stmt_info = vinfo_for_stmt (orig_stmt);
1182 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
1183 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
1184 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
1187 /* 3. Check the operands of the operation. The first operands are defined
1188 inside the loop body. The last operand is the reduction variable,
1189 which is defined by the loop-header-phi. */
1191 gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1193 operation = TREE_OPERAND (stmt, 1);
1194 code = TREE_CODE (operation);
1195 op_type = TREE_CODE_LENGTH (code);
1197 if (op_type != binary_op && op_type != ternary_op)
1198 return false;
1199 scalar_dest = TREE_OPERAND (stmt, 0);
1200 scalar_type = TREE_TYPE (scalar_dest);
1202 /* All uses but the last are expected to be defined in the loop.
1203 The last use is the reduction variable. */
1204 for (i = 0; i < op_type-1; i++)
1206 op = TREE_OPERAND (operation, i);
1207 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1208 gcc_assert (is_simple_use);
1209 gcc_assert (dt == vect_loop_def || dt == vect_invariant_def ||
1210 dt == vect_constant_def);
1213 op = TREE_OPERAND (operation, i);
1214 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1215 gcc_assert (is_simple_use);
1216 gcc_assert (dt == vect_reduction_def);
1217 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1218 if (orig_stmt)
1219 gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
1220 else
1221 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
1223 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
1224 return false;
1226 /* 4. Supportable by target? */
1228 /* 4.1. check support for the operation in the loop */
1229 optab = optab_for_tree_code (code, vectype);
1230 if (!optab)
1232 if (vect_print_dump_info (REPORT_DETAILS))
1233 fprintf (vect_dump, "no optab.");
1234 return false;
1236 vec_mode = TYPE_MODE (vectype);
1237 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1239 if (vect_print_dump_info (REPORT_DETAILS))
1240 fprintf (vect_dump, "op not supported by target.");
1241 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1242 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1243 < vect_min_worthwhile_factor (code))
1244 return false;
1245 if (vect_print_dump_info (REPORT_DETAILS))
1246 fprintf (vect_dump, "proceeding using word mode.");
1249 /* Worthwhile without SIMD support? */
1250 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1251 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1252 < vect_min_worthwhile_factor (code))
1254 if (vect_print_dump_info (REPORT_DETAILS))
1255 fprintf (vect_dump, "not worthwhile without SIMD support.");
1256 return false;
1259 /* 4.2. Check support for the epilog operation.
1261 If STMT represents a reduction pattern, then the type of the
1262 reduction variable may be different than the type of the rest
1263 of the arguments. For example, consider the case of accumulation
1264 of shorts into an int accumulator; The original code:
1265 S1: int_a = (int) short_a;
1266 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
1268 was replaced with:
1269 STMT: int_acc = widen_sum <short_a, int_acc>
1271 This means that:
1272 1. The tree-code that is used to create the vector operation in the
1273 epilog code (that reduces the partial results) is not the
1274 tree-code of STMT, but is rather the tree-code of the original
1275 stmt from the pattern that STMT is replacing. I.e, in the example
1276 above we want to use 'widen_sum' in the loop, but 'plus' in the
1277 epilog.
1278 2. The type (mode) we use to check available target support
1279 for the vector operation to be created in the *epilog*, is
1280 determined by the type of the reduction variable (in the example
1281 above we'd check this: plus_optab[vect_int_mode]).
1282 However the type (mode) we use to check available target support
1283 for the vector operation to be created *inside the loop*, is
1284 determined by the type of the other arguments to STMT (in the
1285 example we'd check this: widen_sum_optab[vect_short_mode]).
1287 This is contrary to "regular" reductions, in which the types of all
1288 the arguments are the same as the type of the reduction variable.
1289 For "regular" reductions we can therefore use the same vector type
1290 (and also the same tree-code) when generating the epilog code and
1291 when generating the code inside the loop. */
1293 if (orig_stmt)
1295 /* This is a reduction pattern: get the vectype from the type of the
1296 reduction variable, and get the tree-code from orig_stmt. */
1297 orig_code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
1298 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
1299 vec_mode = TYPE_MODE (vectype);
1301 else
1303 /* Regular reduction: use the same vectype and tree-code as used for
1304 the vector code inside the loop can be used for the epilog code. */
1305 orig_code = code;
1308 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
1309 return false;
1310 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
1311 if (!reduc_optab)
1313 if (vect_print_dump_info (REPORT_DETAILS))
1314 fprintf (vect_dump, "no optab for reduction.");
1315 epilog_reduc_code = NUM_TREE_CODES;
1317 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1319 if (vect_print_dump_info (REPORT_DETAILS))
1320 fprintf (vect_dump, "reduc op not supported by target.");
1321 epilog_reduc_code = NUM_TREE_CODES;
1324 if (!vec_stmt) /* transformation not required. */
1326 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1327 return true;
1330 /** Transform. **/
1332 if (vect_print_dump_info (REPORT_DETAILS))
1333 fprintf (vect_dump, "transform reduction.");
1335 /* Create the destination vector */
1336 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1338 /* Create the reduction-phi that defines the reduction-operand. */
1339 new_phi = create_phi_node (vec_dest, loop->header);
1341 /* Prepare the operand that is defined inside the loop body */
1342 op = TREE_OPERAND (operation, 0);
1343 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
1344 if (op_type == binary_op)
1345 expr = build2 (code, vectype, loop_vec_def0, PHI_RESULT (new_phi));
1346 else if (op_type == ternary_op)
1348 op = TREE_OPERAND (operation, 1);
1349 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1350 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
1351 PHI_RESULT (new_phi));
1354 /* Create the vectorized operation that computes the partial results */
1355 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, expr);
1356 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1357 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1358 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1360 /* Finalize the reduction-phi (set it's arguments) and create the
1361 epilog reduction code. */
1362 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
1363 return true;
1367 /* Function vectorizable_assignment.
1369 Check if STMT performs an assignment (copy) that can be vectorized.
1370 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1371 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1372 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1374 bool
1375 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1377 tree vec_dest;
1378 tree scalar_dest;
1379 tree op;
1380 tree vec_oprnd;
1381 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1382 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1383 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1384 tree new_temp;
1385 tree def, def_stmt;
1386 enum vect_def_type dt;
1388 /* Is vectorizable assignment? */
1389 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1390 return false;
1392 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1394 if (TREE_CODE (stmt) != MODIFY_EXPR)
1395 return false;
1397 scalar_dest = TREE_OPERAND (stmt, 0);
1398 if (TREE_CODE (scalar_dest) != SSA_NAME)
1399 return false;
1401 op = TREE_OPERAND (stmt, 1);
1402 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1404 if (vect_print_dump_info (REPORT_DETAILS))
1405 fprintf (vect_dump, "use not simple.");
1406 return false;
1409 if (!vec_stmt) /* transformation not required. */
1411 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1412 return true;
1415 /** Transform. **/
1416 if (vect_print_dump_info (REPORT_DETAILS))
1417 fprintf (vect_dump, "transform assignment.");
1419 /* Handle def. */
1420 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1422 /* Handle use. */
1423 op = TREE_OPERAND (stmt, 1);
1424 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1426 /* Arguments are ready. create the new vector stmt. */
1427 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1428 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1429 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1430 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1432 return true;
1436 /* Function vect_min_worthwhile_factor.
1438 For a loop where we could vectorize the operation indicated by CODE,
1439 return the minimum vectorization factor that makes it worthwhile
1440 to use generic vectors. */
1441 static int
1442 vect_min_worthwhile_factor (enum tree_code code)
1444 switch (code)
1446 case PLUS_EXPR:
1447 case MINUS_EXPR:
1448 case NEGATE_EXPR:
1449 return 4;
1451 case BIT_AND_EXPR:
1452 case BIT_IOR_EXPR:
1453 case BIT_XOR_EXPR:
1454 case BIT_NOT_EXPR:
1455 return 2;
1457 default:
1458 return INT_MAX;
1463 /* Function vectorizable_operation.
1465 Check if STMT performs a binary or unary operation that can be vectorized.
1466 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1467 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1468 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1470 bool
1471 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1473 tree vec_dest;
1474 tree scalar_dest;
1475 tree operation;
1476 tree op0, op1 = NULL;
1477 tree vec_oprnd0, vec_oprnd1=NULL;
1478 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1479 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1480 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1481 int i;
1482 enum tree_code code;
1483 enum machine_mode vec_mode;
1484 tree new_temp;
1485 int op_type;
1486 tree op;
1487 optab optab;
1488 int icode;
1489 enum machine_mode optab_op2_mode;
1490 tree def, def_stmt;
1491 enum vect_def_type dt;
1493 /* Is STMT a vectorizable binary/unary operation? */
1494 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1495 return false;
1497 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1499 if (STMT_VINFO_LIVE_P (stmt_info))
1501 /* FORNOW: not yet supported. */
1502 if (vect_print_dump_info (REPORT_DETAILS))
1503 fprintf (vect_dump, "value used after loop.");
1504 return false;
1507 if (TREE_CODE (stmt) != MODIFY_EXPR)
1508 return false;
1510 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1511 return false;
1513 operation = TREE_OPERAND (stmt, 1);
1514 code = TREE_CODE (operation);
1515 optab = optab_for_tree_code (code, vectype);
1517 /* Support only unary or binary operations. */
1518 op_type = TREE_CODE_LENGTH (code);
1519 if (op_type != unary_op && op_type != binary_op)
1521 if (vect_print_dump_info (REPORT_DETAILS))
1522 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1523 return false;
1526 for (i = 0; i < op_type; i++)
1528 op = TREE_OPERAND (operation, i);
1529 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1531 if (vect_print_dump_info (REPORT_DETAILS))
1532 fprintf (vect_dump, "use not simple.");
1533 return false;
1537 /* Supportable by target? */
1538 if (!optab)
1540 if (vect_print_dump_info (REPORT_DETAILS))
1541 fprintf (vect_dump, "no optab.");
1542 return false;
1544 vec_mode = TYPE_MODE (vectype);
1545 icode = (int) optab->handlers[(int) vec_mode].insn_code;
1546 if (icode == CODE_FOR_nothing)
1548 if (vect_print_dump_info (REPORT_DETAILS))
1549 fprintf (vect_dump, "op not supported by target.");
1550 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1551 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1552 < vect_min_worthwhile_factor (code))
1553 return false;
1554 if (vect_print_dump_info (REPORT_DETAILS))
1555 fprintf (vect_dump, "proceeding using word mode.");
1558 /* Worthwhile without SIMD support? */
1559 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1560 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1561 < vect_min_worthwhile_factor (code))
1563 if (vect_print_dump_info (REPORT_DETAILS))
1564 fprintf (vect_dump, "not worthwhile without SIMD support.");
1565 return false;
1568 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1570 /* FORNOW: not yet supported. */
1571 if (!VECTOR_MODE_P (vec_mode))
1572 return false;
1574 /* Invariant argument is needed for a vector shift
1575 by a scalar shift operand. */
1576 optab_op2_mode = insn_data[icode].operand[2].mode;
1577 if (! (VECTOR_MODE_P (optab_op2_mode)
1578 || dt == vect_constant_def
1579 || dt == vect_invariant_def))
1581 if (vect_print_dump_info (REPORT_DETAILS))
1582 fprintf (vect_dump, "operand mode requires invariant argument.");
1583 return false;
1587 if (!vec_stmt) /* transformation not required. */
1589 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1590 return true;
1593 /** Transform. **/
1595 if (vect_print_dump_info (REPORT_DETAILS))
1596 fprintf (vect_dump, "transform binary/unary operation.");
1598 /* Handle def. */
1599 scalar_dest = TREE_OPERAND (stmt, 0);
1600 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1602 /* Handle uses. */
1603 op0 = TREE_OPERAND (operation, 0);
1604 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1606 if (op_type == binary_op)
1608 op1 = TREE_OPERAND (operation, 1);
1610 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1612 /* Vector shl and shr insn patterns can be defined with
1613 scalar operand 2 (shift operand). In this case, use
1614 constant or loop invariant op1 directly, without
1615 extending it to vector mode first. */
1617 optab_op2_mode = insn_data[icode].operand[2].mode;
1618 if (!VECTOR_MODE_P (optab_op2_mode))
1620 if (vect_print_dump_info (REPORT_DETAILS))
1621 fprintf (vect_dump, "operand 1 using scalar mode.");
1622 vec_oprnd1 = op1;
1626 if (!vec_oprnd1)
1627 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1630 /* Arguments are ready. create the new vector stmt. */
1632 if (op_type == binary_op)
1633 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1634 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1635 else
1636 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1637 build1 (code, vectype, vec_oprnd0));
1638 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1639 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1640 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1642 return true;
1646 /* Function vectorizable_store.
1648 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1649 can be vectorized.
1650 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1651 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1652 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1654 bool
1655 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1657 tree scalar_dest;
1658 tree data_ref;
1659 tree op;
1660 tree vec_oprnd1;
1661 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1662 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1663 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1664 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1665 enum machine_mode vec_mode;
1666 tree dummy;
1667 enum dr_alignment_support alignment_support_cheme;
1668 ssa_op_iter iter;
1669 tree def, def_stmt;
1670 enum vect_def_type dt;
1672 /* Is vectorizable store? */
1674 if (TREE_CODE (stmt) != MODIFY_EXPR)
1675 return false;
1677 scalar_dest = TREE_OPERAND (stmt, 0);
1678 if (TREE_CODE (scalar_dest) != ARRAY_REF
1679 && TREE_CODE (scalar_dest) != INDIRECT_REF)
1680 return false;
1682 op = TREE_OPERAND (stmt, 1);
1683 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1685 if (vect_print_dump_info (REPORT_DETAILS))
1686 fprintf (vect_dump, "use not simple.");
1687 return false;
1690 vec_mode = TYPE_MODE (vectype);
1691 /* FORNOW. In some cases can vectorize even if data-type not supported
1692 (e.g. - array initialization with 0). */
1693 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1694 return false;
1696 if (!STMT_VINFO_DATA_REF (stmt_info))
1697 return false;
1700 if (!vec_stmt) /* transformation not required. */
1702 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1703 return true;
1706 /** Transform. **/
1708 if (vect_print_dump_info (REPORT_DETAILS))
1709 fprintf (vect_dump, "transform store");
1711 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1712 gcc_assert (alignment_support_cheme);
1713 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
1715 /* Handle use - get the vectorized def from the defining stmt. */
1716 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1718 /* Handle def. */
1719 /* FORNOW: make sure the data reference is aligned. */
1720 vect_align_data_ref (stmt);
1721 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1722 data_ref = build_fold_indirect_ref (data_ref);
1724 /* Arguments are ready. create the new vector stmt. */
1725 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1726 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1728 /* Copy the V_MAY_DEFS representing the aliasing of the original array
1729 element's definition to the vector's definition then update the
1730 defining statement. The original is being deleted so the same
1731 SSA_NAMEs can be used. */
1732 copy_virtual_operands (*vec_stmt, stmt);
1734 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
1736 SSA_NAME_DEF_STMT (def) = *vec_stmt;
1738 /* If this virtual def has a use outside the loop and a loop peel is
1739 performed then the def may be renamed by the peel. Mark it for
1740 renaming so the later use will also be renamed. */
1741 mark_sym_for_renaming (SSA_NAME_VAR (def));
1744 return true;
1748 /* vectorizable_load.
1750 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1751 can be vectorized.
1752 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1753 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1754 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1756 bool
1757 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1759 tree scalar_dest;
1760 tree vec_dest = NULL;
1761 tree data_ref = NULL;
1762 tree op;
1763 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1764 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1765 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1766 tree new_temp;
1767 int mode;
1768 tree init_addr;
1769 tree new_stmt;
1770 tree dummy;
1771 basic_block new_bb;
1772 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1773 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1774 edge pe = loop_preheader_edge (loop);
1775 enum dr_alignment_support alignment_support_cheme;
1777 /* Is vectorizable load? */
1778 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1779 return false;
1781 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1783 if (STMT_VINFO_LIVE_P (stmt_info))
1785 /* FORNOW: not yet supported. */
1786 if (vect_print_dump_info (REPORT_DETAILS))
1787 fprintf (vect_dump, "value used after loop.");
1788 return false;
1791 if (TREE_CODE (stmt) != MODIFY_EXPR)
1792 return false;
1794 scalar_dest = TREE_OPERAND (stmt, 0);
1795 if (TREE_CODE (scalar_dest) != SSA_NAME)
1796 return false;
1798 op = TREE_OPERAND (stmt, 1);
1799 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1800 return false;
1802 if (!STMT_VINFO_DATA_REF (stmt_info))
1803 return false;
1805 mode = (int) TYPE_MODE (vectype);
1807 /* FORNOW. In some cases can vectorize even if data-type not supported
1808 (e.g. - data copies). */
1809 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1811 if (vect_print_dump_info (REPORT_DETAILS))
1812 fprintf (vect_dump, "Aligned load, but unsupported type.");
1813 return false;
1816 if (!vec_stmt) /* transformation not required. */
1818 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1819 return true;
1822 /** Transform. **/
1824 if (vect_print_dump_info (REPORT_DETAILS))
1825 fprintf (vect_dump, "transform load.");
1827 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1828 gcc_assert (alignment_support_cheme);
1830 if (alignment_support_cheme == dr_aligned
1831 || alignment_support_cheme == dr_unaligned_supported)
1833 /* Create:
1834 p = initial_addr;
1835 indx = 0;
1836 loop {
1837 vec_dest = *(p);
1838 indx = indx + 1;
1842 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1843 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1844 if (aligned_access_p (dr))
1845 data_ref = build_fold_indirect_ref (data_ref);
1846 else
1848 int mis = DR_MISALIGNMENT (dr);
1849 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1850 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1851 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1853 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1854 new_temp = make_ssa_name (vec_dest, new_stmt);
1855 TREE_OPERAND (new_stmt, 0) = new_temp;
1856 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1857 copy_virtual_operands (new_stmt, stmt);
1859 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1861 /* Create:
1862 p1 = initial_addr;
1863 msq_init = *(floor(p1))
1864 p2 = initial_addr + VS - 1;
1865 magic = have_builtin ? builtin_result : initial_address;
1866 indx = 0;
1867 loop {
1868 p2' = p2 + indx * vectype_size
1869 lsq = *(floor(p2'))
1870 vec_dest = realign_load (msq, lsq, magic)
1871 indx = indx + 1;
1872 msq = lsq;
1876 tree offset;
1877 tree magic;
1878 tree phi_stmt;
1879 tree msq_init;
1880 tree msq, lsq;
1881 tree dataref_ptr;
1882 tree params;
1884 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1885 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1886 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1887 &init_addr, true);
1888 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1889 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1890 new_temp = make_ssa_name (vec_dest, new_stmt);
1891 TREE_OPERAND (new_stmt, 0) = new_temp;
1892 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1893 gcc_assert (!new_bb);
1894 msq_init = TREE_OPERAND (new_stmt, 0);
1895 copy_virtual_operands (new_stmt, stmt);
1896 update_vuses_to_preheader (new_stmt, loop);
1899 /* <2> Create lsq = *(floor(p2')) in the loop */
1900 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
1901 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1902 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1903 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1904 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1905 new_temp = make_ssa_name (vec_dest, new_stmt);
1906 TREE_OPERAND (new_stmt, 0) = new_temp;
1907 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1908 lsq = TREE_OPERAND (new_stmt, 0);
1909 copy_virtual_operands (new_stmt, stmt);
1912 /* <3> */
1913 if (targetm.vectorize.builtin_mask_for_load)
1915 /* Create permutation mask, if required, in loop preheader. */
1916 tree builtin_decl;
1917 params = build_tree_list (NULL_TREE, init_addr);
1918 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1919 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1920 new_stmt = build_function_call_expr (builtin_decl, params);
1921 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1922 new_temp = make_ssa_name (vec_dest, new_stmt);
1923 TREE_OPERAND (new_stmt, 0) = new_temp;
1924 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1925 gcc_assert (!new_bb);
1926 magic = TREE_OPERAND (new_stmt, 0);
1928 /* The result of the CALL_EXPR to this builtin is determined from
1929 the value of the parameter and no global variables are touched
1930 which makes the builtin a "const" function. Requiring the
1931 builtin to have the "const" attribute makes it unnecessary
1932 to call mark_call_clobbered. */
1933 gcc_assert (TREE_READONLY (builtin_decl));
1935 else
1937 /* Use current address instead of init_addr for reduced reg pressure.
1939 magic = dataref_ptr;
1943 /* <4> Create msq = phi <msq_init, lsq> in loop */
1944 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1945 msq = make_ssa_name (vec_dest, NULL_TREE);
1946 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1947 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1948 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1949 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1952 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1953 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1954 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1955 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1956 new_temp = make_ssa_name (vec_dest, new_stmt);
1957 TREE_OPERAND (new_stmt, 0) = new_temp;
1958 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1960 else
1961 gcc_unreachable ();
1963 *vec_stmt = new_stmt;
1964 return true;
1968 /* Function vectorizable_live_operation.
1970 STMT computes a value that is used outside the loop. Check if
1971 it can be supported. */
1973 bool
1974 vectorizable_live_operation (tree stmt,
1975 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1976 tree *vec_stmt ATTRIBUTE_UNUSED)
1978 tree operation;
1979 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1980 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1981 int i;
1982 enum tree_code code;
1983 int op_type;
1984 tree op;
1985 tree def, def_stmt;
1986 enum vect_def_type dt;
1988 if (!STMT_VINFO_LIVE_P (stmt_info))
1989 return false;
1991 if (TREE_CODE (stmt) != MODIFY_EXPR)
1992 return false;
1994 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1995 return false;
1997 operation = TREE_OPERAND (stmt, 1);
1998 code = TREE_CODE (operation);
2000 op_type = TREE_CODE_LENGTH (code);
2002 /* FORNOW: support only if all uses are invariant. This means
2003 that the scalar operations can remain in place, unvectorized.
2004 The original last scalar value that they compute will be used. */
2006 for (i = 0; i < op_type; i++)
2008 op = TREE_OPERAND (operation, i);
2009 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2011 if (vect_print_dump_info (REPORT_DETAILS))
2012 fprintf (vect_dump, "use not simple.");
2013 return false;
2016 if (dt != vect_invariant_def && dt != vect_constant_def)
2017 return false;
2020 /* No transformation is required for the cases we currently support. */
2021 return true;
2025 /* Function vect_is_simple_cond.
2027 Input:
2028 LOOP - the loop that is being vectorized.
2029 COND - Condition that is checked for simple use.
2031 Returns whether a COND can be vectorized. Checks whether
2032 condition operands are supportable using vec_is_simple_use. */
2034 static bool
2035 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
2037 tree lhs, rhs;
2038 tree def;
2039 enum vect_def_type dt;
2041 if (!COMPARISON_CLASS_P (cond))
2042 return false;
2044 lhs = TREE_OPERAND (cond, 0);
2045 rhs = TREE_OPERAND (cond, 1);
2047 if (TREE_CODE (lhs) == SSA_NAME)
2049 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
2050 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
2051 return false;
2053 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
2054 return false;
2056 if (TREE_CODE (rhs) == SSA_NAME)
2058 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
2059 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
2060 return false;
2062 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
2063 return false;
2065 return true;
2068 /* vectorizable_condition.
2070 Check if STMT is conditional modify expression that can be vectorized.
2071 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2072 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
2073 at BSI.
2075 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2077 bool
2078 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2080 tree scalar_dest = NULL_TREE;
2081 tree vec_dest = NULL_TREE;
2082 tree op = NULL_TREE;
2083 tree cond_expr, then_clause, else_clause;
2084 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2085 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2086 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
2087 tree vec_compare, vec_cond_expr;
2088 tree new_temp;
2089 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2090 enum machine_mode vec_mode;
2091 tree def;
2092 enum vect_def_type dt;
2094 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2095 return false;
2097 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2099 if (STMT_VINFO_LIVE_P (stmt_info))
2101 /* FORNOW: not yet supported. */
2102 if (vect_print_dump_info (REPORT_DETAILS))
2103 fprintf (vect_dump, "value used after loop.");
2104 return false;
2107 if (TREE_CODE (stmt) != MODIFY_EXPR)
2108 return false;
2110 op = TREE_OPERAND (stmt, 1);
2112 if (TREE_CODE (op) != COND_EXPR)
2113 return false;
2115 cond_expr = TREE_OPERAND (op, 0);
2116 then_clause = TREE_OPERAND (op, 1);
2117 else_clause = TREE_OPERAND (op, 2);
2119 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
2120 return false;
2122 /* We do not handle two different vector types for the condition
2123 and the values. */
2124 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
2125 return false;
2127 if (TREE_CODE (then_clause) == SSA_NAME)
2129 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
2130 if (!vect_is_simple_use (then_clause, loop_vinfo,
2131 &then_def_stmt, &def, &dt))
2132 return false;
2134 else if (TREE_CODE (then_clause) != INTEGER_CST
2135 && TREE_CODE (then_clause) != REAL_CST)
2136 return false;
2138 if (TREE_CODE (else_clause) == SSA_NAME)
2140 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
2141 if (!vect_is_simple_use (else_clause, loop_vinfo,
2142 &else_def_stmt, &def, &dt))
2143 return false;
2145 else if (TREE_CODE (else_clause) != INTEGER_CST
2146 && TREE_CODE (else_clause) != REAL_CST)
2147 return false;
2150 vec_mode = TYPE_MODE (vectype);
2152 if (!vec_stmt)
2154 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
2155 return expand_vec_cond_expr_p (op, vec_mode);
2158 /* Transform */
2160 /* Handle def. */
2161 scalar_dest = TREE_OPERAND (stmt, 0);
2162 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2164 /* Handle cond expr. */
2165 vec_cond_lhs =
2166 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
2167 vec_cond_rhs =
2168 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
2169 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
2170 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
2172 /* Arguments are ready. create the new vector stmt. */
2173 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
2174 vec_cond_lhs, vec_cond_rhs);
2175 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
2176 vec_compare, vec_then_clause, vec_else_clause);
2178 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
2179 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2180 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2181 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2183 return true;
2186 /* Function vect_transform_stmt.
2188 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2190 bool
2191 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2193 bool is_store = false;
2194 tree vec_stmt = NULL_TREE;
2195 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2196 tree orig_stmt_in_pattern;
2197 bool done;
2199 if (STMT_VINFO_RELEVANT_P (stmt_info))
2201 switch (STMT_VINFO_TYPE (stmt_info))
2203 case op_vec_info_type:
2204 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2205 gcc_assert (done);
2206 break;
2208 case assignment_vec_info_type:
2209 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2210 gcc_assert (done);
2211 break;
2213 case load_vec_info_type:
2214 done = vectorizable_load (stmt, bsi, &vec_stmt);
2215 gcc_assert (done);
2216 break;
2218 case store_vec_info_type:
2219 done = vectorizable_store (stmt, bsi, &vec_stmt);
2220 gcc_assert (done);
2221 is_store = true;
2222 break;
2224 case condition_vec_info_type:
2225 done = vectorizable_condition (stmt, bsi, &vec_stmt);
2226 gcc_assert (done);
2227 break;
2229 default:
2230 if (vect_print_dump_info (REPORT_DETAILS))
2231 fprintf (vect_dump, "stmt not supported.");
2232 gcc_unreachable ();
2235 gcc_assert (vec_stmt);
2236 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2237 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
2238 if (orig_stmt_in_pattern)
2240 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
2241 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2243 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
2245 /* STMT was inserted by the vectorizer to replace a computation
2246 idiom. ORIG_STMT_IN_PATTERN is a stmt in the original
2247 sequence that computed this idiom. We need to record a pointer
2248 to VEC_STMT in the stmt_info of ORIG_STMT_IN_PATTERN. See more
2249 detail in the documentation of vect_pattern_recog. */
2251 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
2256 if (STMT_VINFO_LIVE_P (stmt_info))
2258 switch (STMT_VINFO_TYPE (stmt_info))
2260 case reduc_vec_info_type:
2261 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
2262 gcc_assert (done);
2263 break;
2265 default:
2266 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
2267 gcc_assert (done);
2270 if (vec_stmt)
2272 gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
2273 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2277 return is_store;
2281 /* This function builds ni_name = number of iterations loop executes
2282 on the loop preheader. */
2284 static tree
2285 vect_build_loop_niters (loop_vec_info loop_vinfo)
2287 tree ni_name, stmt, var;
2288 edge pe;
2289 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2290 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2292 var = create_tmp_var (TREE_TYPE (ni), "niters");
2293 add_referenced_var (var);
2294 ni_name = force_gimple_operand (ni, &stmt, false, var);
2296 pe = loop_preheader_edge (loop);
2297 if (stmt)
2299 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2300 gcc_assert (!new_bb);
2303 return ni_name;
2307 /* This function generates the following statements:
2309 ni_name = number of iterations loop executes
2310 ratio = ni_name / vf
2311 ratio_mult_vf_name = ratio * vf
2313 and places them at the loop preheader edge. */
2315 static void
2316 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2317 tree *ni_name_ptr,
2318 tree *ratio_mult_vf_name_ptr,
2319 tree *ratio_name_ptr)
2322 edge pe;
2323 basic_block new_bb;
2324 tree stmt, ni_name;
2325 tree var;
2326 tree ratio_name;
2327 tree ratio_mult_vf_name;
2328 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2329 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2330 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2331 tree log_vf;
2333 pe = loop_preheader_edge (loop);
2335 /* Generate temporary variable that contains
2336 number of iterations loop executes. */
2338 ni_name = vect_build_loop_niters (loop_vinfo);
2339 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
2341 /* Create: ratio = ni >> log2(vf) */
2343 var = create_tmp_var (TREE_TYPE (ni), "bnd");
2344 add_referenced_var (var);
2345 ratio_name = make_ssa_name (var, NULL_TREE);
2346 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2347 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2348 SSA_NAME_DEF_STMT (ratio_name) = stmt;
2350 pe = loop_preheader_edge (loop);
2351 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2352 gcc_assert (!new_bb);
2354 /* Create: ratio_mult_vf = ratio << log2 (vf). */
2356 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2357 add_referenced_var (var);
2358 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2359 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2360 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2361 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2363 pe = loop_preheader_edge (loop);
2364 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2365 gcc_assert (!new_bb);
2367 *ni_name_ptr = ni_name;
2368 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2369 *ratio_name_ptr = ratio_name;
2371 return;
2375 /* Function update_vuses_to_preheader.
2377 Input:
2378 STMT - a statement with potential VUSEs.
2379 LOOP - the loop whose preheader will contain STMT.
2381 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
2382 appears to be defined in a V_MAY_DEF in another statement in a loop.
2383 One such case is when the VUSE is at the dereference of a __restricted__
2384 pointer in a load and the V_MAY_DEF is at the dereference of a different
2385 __restricted__ pointer in a store. Vectorization may result in
2386 copy_virtual_uses being called to copy the problematic VUSE to a new
2387 statement that is being inserted in the loop preheader. This procedure
2388 is called to change the SSA_NAME in the new statement's VUSE from the
2389 SSA_NAME updated in the loop to the related SSA_NAME available on the
2390 path entering the loop.
2392 When this function is called, we have the following situation:
2394 # vuse <name1>
2395 S1: vload
2396 do {
2397 # name1 = phi < name0 , name2>
2399 # vuse <name1>
2400 S2: vload
2402 # name2 = vdef <name1>
2403 S3: vstore
2405 }while...
2407 Stmt S1 was created in the loop preheader block as part of misaligned-load
2408 handling. This function fixes the name of the vuse of S1 from 'name1' to
2409 'name0'. */
2411 static void
2412 update_vuses_to_preheader (tree stmt, struct loop *loop)
2414 basic_block header_bb = loop->header;
2415 edge preheader_e = loop_preheader_edge (loop);
2416 ssa_op_iter iter;
2417 use_operand_p use_p;
2419 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
2421 tree ssa_name = USE_FROM_PTR (use_p);
2422 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
2423 tree name_var = SSA_NAME_VAR (ssa_name);
2424 basic_block bb = bb_for_stmt (def_stmt);
2426 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
2427 if (!IS_EMPTY_STMT (def_stmt)
2428 && flow_bb_inside_loop_p (loop, bb))
2430 /* If the block containing the statement defining the SSA_NAME
2431 is in the loop then it's necessary to find the definition
2432 outside the loop using the PHI nodes of the header. */
2433 tree phi;
2434 bool updated = false;
2436 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
2438 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
2440 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
2441 updated = true;
2442 break;
2445 gcc_assert (updated);
2451 /* Function vect_update_ivs_after_vectorizer.
2453 "Advance" the induction variables of LOOP to the value they should take
2454 after the execution of LOOP. This is currently necessary because the
2455 vectorizer does not handle induction variables that are used after the
2456 loop. Such a situation occurs when the last iterations of LOOP are
2457 peeled, because:
2458 1. We introduced new uses after LOOP for IVs that were not originally used
2459 after LOOP: the IVs of LOOP are now used by an epilog loop.
2460 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2461 times, whereas the loop IVs should be bumped N times.
2463 Input:
2464 - LOOP - a loop that is going to be vectorized. The last few iterations
2465 of LOOP were peeled.
2466 - NITERS - the number of iterations that LOOP executes (before it is
2467 vectorized). i.e, the number of times the ivs should be bumped.
2468 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2469 coming out from LOOP on which there are uses of the LOOP ivs
2470 (this is the path from LOOP->exit to epilog_loop->preheader).
2472 The new definitions of the ivs are placed in LOOP->exit.
2473 The phi args associated with the edge UPDATE_E in the bb
2474 UPDATE_E->dest are updated accordingly.
2476 Assumption 1: Like the rest of the vectorizer, this function assumes
2477 a single loop exit that has a single predecessor.
2479 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2480 organized in the same order.
2482 Assumption 3: The access function of the ivs is simple enough (see
2483 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2485 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2486 coming out of LOOP on which the ivs of LOOP are used (this is the path
2487 that leads to the epilog loop; other paths skip the epilog loop). This
2488 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2489 needs to have its phis updated.
2492 static void
2493 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
2494 edge update_e)
2496 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2497 basic_block exit_bb = loop->single_exit->dest;
2498 tree phi, phi1;
2499 basic_block update_bb = update_e->dest;
2501 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
2503 /* Make sure there exists a single-predecessor exit bb: */
2504 gcc_assert (single_pred_p (exit_bb));
2506 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2507 phi && phi1;
2508 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2510 tree access_fn = NULL;
2511 tree evolution_part;
2512 tree init_expr;
2513 tree step_expr;
2514 tree var, stmt, ni, ni_name;
2515 block_stmt_iterator last_bsi;
2517 if (vect_print_dump_info (REPORT_DETAILS))
2519 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
2520 print_generic_expr (vect_dump, phi, TDF_SLIM);
2523 /* Skip virtual phi's. */
2524 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2526 if (vect_print_dump_info (REPORT_DETAILS))
2527 fprintf (vect_dump, "virtual phi. skip.");
2528 continue;
2531 /* Skip reduction phis. */
2532 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2534 if (vect_print_dump_info (REPORT_DETAILS))
2535 fprintf (vect_dump, "reduc phi. skip.");
2536 continue;
2539 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2540 gcc_assert (access_fn);
2541 evolution_part =
2542 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2543 gcc_assert (evolution_part != NULL_TREE);
2545 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2546 of degree >= 2 or exponential. */
2547 gcc_assert (!tree_is_chrec (evolution_part));
2549 step_expr = evolution_part;
2550 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2551 loop->num));
2553 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2554 build2 (MULT_EXPR, TREE_TYPE (niters),
2555 niters, step_expr), init_expr);
2557 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2558 add_referenced_var (var);
2560 ni_name = force_gimple_operand (ni, &stmt, false, var);
2562 /* Insert stmt into exit_bb. */
2563 last_bsi = bsi_last (exit_bb);
2564 if (stmt)
2565 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2567 /* Fix phi expressions in the successor bb. */
2568 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
2573 /* Function vect_do_peeling_for_loop_bound
2575 Peel the last iterations of the loop represented by LOOP_VINFO.
2576 The peeled iterations form a new epilog loop. Given that the loop now
2577 iterates NITERS times, the new epilog loop iterates
2578 NITERS % VECTORIZATION_FACTOR times.
2580 The original loop will later be made to iterate
2581 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
2583 static void
2584 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2585 struct loops *loops)
2587 tree ni_name, ratio_mult_vf_name;
2588 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2589 struct loop *new_loop;
2590 edge update_e;
2591 basic_block preheader;
2592 int loop_num;
2594 if (vect_print_dump_info (REPORT_DETAILS))
2595 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
2597 initialize_original_copy_tables ();
2599 /* Generate the following variables on the preheader of original loop:
2601 ni_name = number of iteration the original loop executes
2602 ratio = ni_name / vf
2603 ratio_mult_vf_name = ratio * vf */
2604 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2605 &ratio_mult_vf_name, ratio);
2607 loop_num = loop->num;
2608 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
2609 ratio_mult_vf_name, ni_name, false);
2610 gcc_assert (new_loop);
2611 gcc_assert (loop_num == loop->num);
2612 #ifdef ENABLE_CHECKING
2613 slpeel_verify_cfg_after_peeling (loop, new_loop);
2614 #endif
2616 /* A guard that controls whether the new_loop is to be executed or skipped
2617 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
2618 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
2619 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
2620 is on the path where the LOOP IVs are used and need to be updated. */
2622 preheader = loop_preheader_edge (new_loop)->src;
2623 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
2624 update_e = EDGE_PRED (preheader, 0);
2625 else
2626 update_e = EDGE_PRED (preheader, 1);
2628 /* Update IVs of original loop as if they were advanced
2629 by ratio_mult_vf_name steps. */
2630 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
2632 /* After peeling we have to reset scalar evolution analyzer. */
2633 scev_reset ();
2635 free_original_copy_tables ();
2639 /* Function vect_gen_niters_for_prolog_loop
2641 Set the number of iterations for the loop represented by LOOP_VINFO
2642 to the minimum between LOOP_NITERS (the original iteration count of the loop)
2643 and the misalignment of DR - the data reference recorded in
2644 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
2645 this loop, the data reference DR will refer to an aligned location.
2647 The following computation is generated:
2649 If the misalignment of DR is known at compile time:
2650 addr_mis = int mis = DR_MISALIGNMENT (dr);
2651 Else, compute address misalignment in bytes:
2652 addr_mis = addr & (vectype_size - 1)
2654 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
2656 (elem_size = element type size; an element is the scalar element
2657 whose type is the inner type of the vectype) */
2659 static tree
2660 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
2662 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2663 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2664 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2665 tree var, stmt;
2666 tree iters, iters_name;
2667 edge pe;
2668 basic_block new_bb;
2669 tree dr_stmt = DR_STMT (dr);
2670 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2671 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2672 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2673 tree niters_type = TREE_TYPE (loop_niters);
2675 pe = loop_preheader_edge (loop);
2677 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2679 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2680 int element_size = vectype_align/vf;
2681 int elem_misalign = byte_misalign / element_size;
2683 if (vect_print_dump_info (REPORT_DETAILS))
2684 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
2685 iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
2687 else
2689 tree new_stmts = NULL_TREE;
2690 tree start_addr =
2691 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
2692 tree ptr_type = TREE_TYPE (start_addr);
2693 tree size = TYPE_SIZE (ptr_type);
2694 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2695 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2696 tree elem_size_log =
2697 build_int_cst (type, exact_log2 (vectype_align/vf));
2698 tree vf_minus_1 = build_int_cst (type, vf - 1);
2699 tree vf_tree = build_int_cst (type, vf);
2700 tree byte_misalign;
2701 tree elem_misalign;
2703 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
2704 gcc_assert (!new_bb);
2706 /* Create: byte_misalign = addr & (vectype_size - 1) */
2707 byte_misalign =
2708 build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
2710 /* Create: elem_misalign = byte_misalign / element_size */
2711 elem_misalign =
2712 build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2714 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
2715 iters = build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
2716 iters = build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
2717 iters = fold_convert (niters_type, iters);
2720 /* Create: prolog_loop_niters = min (iters, loop_niters) */
2721 /* If the loop bound is known at compile time we already verified that it is
2722 greater than vf; since the misalignment ('iters') is at most vf, there's
2723 no need to generate the MIN_EXPR in this case. */
2724 if (TREE_CODE (loop_niters) != INTEGER_CST)
2725 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
2727 if (vect_print_dump_info (REPORT_DETAILS))
2729 fprintf (vect_dump, "niters for prolog loop: ");
2730 print_generic_expr (vect_dump, iters, TDF_SLIM);
2733 var = create_tmp_var (niters_type, "prolog_loop_niters");
2734 add_referenced_var (var);
2735 iters_name = force_gimple_operand (iters, &stmt, false, var);
2737 /* Insert stmt on loop preheader edge. */
2738 if (stmt)
2740 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2741 gcc_assert (!new_bb);
2744 return iters_name;
2748 /* Function vect_update_init_of_dr
2750 NITERS iterations were peeled from LOOP. DR represents a data reference
2751 in LOOP. This function updates the information recorded in DR to
2752 account for the fact that the first NITERS iterations had already been
2753 executed. Specifically, it updates the OFFSET field of DR. */
2755 static void
2756 vect_update_init_of_dr (struct data_reference *dr, tree niters)
2758 tree offset = DR_OFFSET (dr);
2760 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
2761 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
2762 DR_OFFSET (dr) = offset;
2766 /* Function vect_update_inits_of_drs
2768 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2769 This function updates the information recorded for the data references in
2770 the loop to account for the fact that the first NITERS iterations had
2771 already been executed. Specifically, it updates the initial_condition of the
2772 access_function of all the data_references in the loop. */
2774 static void
2775 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2777 unsigned int i;
2778 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2779 struct data_reference *dr;
2781 if (vect_dump && (dump_flags & TDF_DETAILS))
2782 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2784 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2785 vect_update_init_of_dr (dr, niters);
2789 /* Function vect_do_peeling_for_alignment
2791 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2792 'niters' is set to the misalignment of one of the data references in the
2793 loop, thereby forcing it to refer to an aligned location at the beginning
2794 of the execution of this loop. The data reference for which we are
2795 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
2797 static void
2798 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
2800 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2801 tree niters_of_prolog_loop, ni_name;
2802 tree n_iters;
2803 struct loop *new_loop;
2805 if (vect_print_dump_info (REPORT_DETAILS))
2806 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2808 initialize_original_copy_tables ();
2810 ni_name = vect_build_loop_niters (loop_vinfo);
2811 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
2813 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2814 new_loop =
2815 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
2816 niters_of_prolog_loop, ni_name, true);
2817 gcc_assert (new_loop);
2818 #ifdef ENABLE_CHECKING
2819 slpeel_verify_cfg_after_peeling (new_loop, loop);
2820 #endif
2822 /* Update number of times loop executes. */
2823 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2824 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2825 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2827 /* Update the init conditions of the access functions of all data refs. */
2828 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2830 /* After peeling we have to reset scalar evolution analyzer. */
2831 scev_reset ();
2833 free_original_copy_tables ();
2837 /* Function vect_create_cond_for_align_checks.
2839 Create a conditional expression that represents the alignment checks for
2840 all of data references (array element references) whose alignment must be
2841 checked at runtime.
2843 Input:
2844 LOOP_VINFO - two fields of the loop information are used.
2845 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2846 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2848 Output:
2849 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2850 expression.
2851 The returned value is the conditional expression to be used in the if
2852 statement that controls which version of the loop gets executed at runtime.
2854 The algorithm makes two assumptions:
2855 1) The number of bytes "n" in a vector is a power of 2.
2856 2) An address "a" is aligned if a%n is zero and that this
2857 test can be done as a&(n-1) == 0. For example, for 16
2858 byte vectors the test is a&0xf == 0. */
2860 static tree
2861 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2862 tree *cond_expr_stmt_list)
2864 VEC(tree,heap) *may_misalign_stmts
2865 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2866 tree ref_stmt;
2867 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2868 tree mask_cst;
2869 unsigned int i;
2870 tree psize;
2871 tree int_ptrsize_type;
2872 char tmp_name[20];
2873 tree or_tmp_name = NULL_TREE;
2874 tree and_tmp, and_tmp_name, and_stmt;
2875 tree ptrsize_zero;
2877 /* Check that mask is one less than a power of 2, i.e., mask is
2878 all zeros followed by all ones. */
2879 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2881 /* CHECKME: what is the best integer or unsigned type to use to hold a
2882 cast from a pointer value? */
2883 psize = TYPE_SIZE (ptr_type_node);
2884 int_ptrsize_type
2885 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2887 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2888 of the first vector of the i'th data reference. */
2890 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
2892 tree new_stmt_list = NULL_TREE;
2893 tree addr_base;
2894 tree addr_tmp, addr_tmp_name, addr_stmt;
2895 tree or_tmp, new_or_tmp_name, or_stmt;
2897 /* create: addr_tmp = (int)(address_of_first_vector) */
2898 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
2899 &new_stmt_list,
2900 NULL_TREE);
2902 if (new_stmt_list != NULL_TREE)
2903 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
2905 sprintf (tmp_name, "%s%d", "addr2int", i);
2906 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2907 add_referenced_var (addr_tmp);
2908 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
2909 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
2910 addr_stmt = build2 (MODIFY_EXPR, void_type_node,
2911 addr_tmp_name, addr_stmt);
2912 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2913 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
2915 /* The addresses are OR together. */
2917 if (or_tmp_name != NULL_TREE)
2919 /* create: or_tmp = or_tmp | addr_tmp */
2920 sprintf (tmp_name, "%s%d", "orptrs", i);
2921 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2922 add_referenced_var (or_tmp);
2923 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
2924 or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name,
2925 build2 (BIT_IOR_EXPR, int_ptrsize_type,
2926 or_tmp_name,
2927 addr_tmp_name));
2928 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2929 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
2930 or_tmp_name = new_or_tmp_name;
2932 else
2933 or_tmp_name = addr_tmp_name;
2935 } /* end for i */
2937 mask_cst = build_int_cst (int_ptrsize_type, mask);
2939 /* create: and_tmp = or_tmp & mask */
2940 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2941 add_referenced_var (and_tmp);
2942 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
2944 and_stmt = build2 (MODIFY_EXPR, void_type_node,
2945 and_tmp_name,
2946 build2 (BIT_AND_EXPR, int_ptrsize_type,
2947 or_tmp_name, mask_cst));
2948 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2949 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
2951 /* Make and_tmp the left operand of the conditional test against zero.
2952 if and_tmp has a nonzero bit then some address is unaligned. */
2953 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2954 return build2 (EQ_EXPR, boolean_type_node,
2955 and_tmp_name, ptrsize_zero);
2959 /* Function vect_transform_loop.
2961 The analysis phase has determined that the loop is vectorizable.
2962 Vectorize the loop - created vectorized stmts to replace the scalar
2963 stmts in the loop, and update the loop exit condition. */
2965 void
2966 vect_transform_loop (loop_vec_info loop_vinfo,
2967 struct loops *loops ATTRIBUTE_UNUSED)
2969 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2970 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2971 int nbbs = loop->num_nodes;
2972 block_stmt_iterator si;
2973 int i;
2974 tree ratio = NULL;
2975 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2976 bitmap_iterator bi;
2977 unsigned int j;
2979 if (vect_print_dump_info (REPORT_DETAILS))
2980 fprintf (vect_dump, "=== vec_transform_loop ===");
2982 /* If the loop has data references that may or may not be aligned then
2983 two versions of the loop need to be generated, one which is vectorized
2984 and one which isn't. A test is then generated to control which of the
2985 loops is executed. The test checks for the alignment of all of the
2986 data references that may or may not be aligned. */
2988 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
2990 struct loop *nloop;
2991 tree cond_expr;
2992 tree cond_expr_stmt_list = NULL_TREE;
2993 basic_block condition_bb;
2994 block_stmt_iterator cond_exp_bsi;
2995 basic_block merge_bb;
2996 basic_block new_exit_bb;
2997 edge new_exit_e, e;
2998 tree orig_phi, new_phi, arg;
3000 cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
3001 &cond_expr_stmt_list);
3002 initialize_original_copy_tables ();
3003 nloop = loop_version (loops, loop, cond_expr, &condition_bb, true);
3004 free_original_copy_tables();
3006 /** Loop versioning violates an assumption we try to maintain during
3007 vectorization - that the loop exit block has a single predecessor.
3008 After versioning, the exit block of both loop versions is the same
3009 basic block (i.e. it has two predecessors). Just in order to simplify
3010 following transformations in the vectorizer, we fix this situation
3011 here by adding a new (empty) block on the exit-edge of the loop,
3012 with the proper loop-exit phis to maintain loop-closed-form. **/
3014 merge_bb = loop->single_exit->dest;
3015 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
3016 new_exit_bb = split_edge (loop->single_exit);
3017 add_bb_to_loop (new_exit_bb, loop->outer);
3018 new_exit_e = loop->single_exit;
3019 e = EDGE_SUCC (new_exit_bb, 0);
3021 for (orig_phi = phi_nodes (merge_bb); orig_phi;
3022 orig_phi = PHI_CHAIN (orig_phi))
3024 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
3025 new_exit_bb);
3026 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
3027 add_phi_arg (new_phi, arg, new_exit_e);
3028 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
3031 /** end loop-exit-fixes after versioning **/
3033 update_ssa (TODO_update_ssa);
3034 cond_exp_bsi = bsi_last (condition_bb);
3035 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
3038 /* CHECKME: we wouldn't need this if we called update_ssa once
3039 for all loops. */
3040 bitmap_zero (vect_vnames_to_rename);
3042 /* Peel the loop if there are data refs with unknown alignment.
3043 Only one data ref with unknown store is allowed. */
3045 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
3046 vect_do_peeling_for_alignment (loop_vinfo, loops);
3048 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3049 compile time constant), or it is a constant that doesn't divide by the
3050 vectorization factor, then an epilog loop needs to be created.
3051 We therefore duplicate the loop: the original loop will be vectorized,
3052 and will compute the first (n/VF) iterations. The second copy of the loop
3053 will remain scalar and will compute the remaining (n%VF) iterations.
3054 (VF is the vectorization factor). */
3056 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3057 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3058 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3059 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3060 else
3061 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3062 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3064 /* 1) Make sure the loop header has exactly two entries
3065 2) Make sure we have a preheader basic block. */
3067 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3069 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3072 /* FORNOW: the vectorizer supports only loops which body consist
3073 of one basic block (header + empty latch). When the vectorizer will
3074 support more involved loop forms, the order by which the BBs are
3075 traversed need to be reconsidered. */
3077 for (i = 0; i < nbbs; i++)
3079 basic_block bb = bbs[i];
3081 for (si = bsi_start (bb); !bsi_end_p (si);)
3083 tree stmt = bsi_stmt (si);
3084 stmt_vec_info stmt_info;
3085 bool is_store;
3087 if (vect_print_dump_info (REPORT_DETAILS))
3089 fprintf (vect_dump, "------>vectorizing statement: ");
3090 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3092 stmt_info = vinfo_for_stmt (stmt);
3093 gcc_assert (stmt_info);
3094 if (!STMT_VINFO_RELEVANT_P (stmt_info)
3095 && !STMT_VINFO_LIVE_P (stmt_info))
3097 bsi_next (&si);
3098 continue;
3100 /* FORNOW: Verify that all stmts operate on the same number of
3101 units and no inner unrolling is necessary. */
3102 gcc_assert
3103 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
3104 == (unsigned HOST_WIDE_INT) vectorization_factor);
3106 /* -------- vectorize statement ------------ */
3107 if (vect_print_dump_info (REPORT_DETAILS))
3108 fprintf (vect_dump, "transform statement.");
3110 is_store = vect_transform_stmt (stmt, &si);
3111 if (is_store)
3113 /* Free the attached stmt_vec_info and remove the stmt. */
3114 stmt_ann_t ann = stmt_ann (stmt);
3115 free (stmt_info);
3116 set_stmt_info (ann, NULL);
3117 bsi_remove (&si, true);
3118 continue;
3121 bsi_next (&si);
3122 } /* stmts in BB */
3123 } /* BBs in loop */
3125 slpeel_make_loop_iterate_ntimes (loop, ratio);
3127 EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
3128 mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
3130 /* The memory tags and pointers in vectorized statements need to
3131 have their SSA forms updated. FIXME, why can't this be delayed
3132 until all the loops have been transformed? */
3133 update_ssa (TODO_update_ssa);
3135 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
3136 fprintf (vect_dump, "LOOP VECTORIZED.");