* tree-ssa-phiopt.c (conditional_replacement): Construct proper SSA
[official-gcc.git] / gcc / tree-vect-transform.c
blobf498adb74c530d398c6ab99ac91363e3b2c7d0ae
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "tree-data-ref.h"
39 #include "tree-chrec.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
43 #include "tree-pass.h"
44 #include "toplev.h"
45 #include "real.h"
47 /* Utility functions for the code transformation. */
48 static bool vect_transform_stmt (tree, block_stmt_iterator *);
49 static void vect_align_data_ref (tree);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, bool);
53 static tree vect_create_index_for_vector_ref (loop_vec_info);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void update_vuses_to_preheader (tree, struct loop*);
62 static tree get_initial_def_for_reduction (tree, tree, tree *);
64 /* Utility function dealing with loop peeling (not peeling itself). */
65 static void vect_generate_tmps_on_preheader
66 (loop_vec_info, tree *, tree *, tree *);
67 static tree vect_build_loop_niters (loop_vec_info);
68 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
70 static void vect_update_init_of_dr (struct data_reference *, tree niters);
71 static void vect_update_inits_of_drs (loop_vec_info, tree);
72 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
73 static void vect_do_peeling_for_loop_bound
74 (loop_vec_info, tree *, struct loops *);
75 static int vect_min_worthwhile_factor (enum tree_code);
78 /* Function vect_get_new_vect_var.
80 Returns a name for a new variable. The current naming scheme appends the
81 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
82 the name of vectorizer generated variables, and appends that to NAME if
83 provided. */
85 static tree
86 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
88 const char *prefix;
89 tree new_vect_var;
91 switch (var_kind)
93 case vect_simple_var:
94 prefix = "vect_";
95 break;
96 case vect_scalar_var:
97 prefix = "stmp_";
98 break;
99 case vect_pointer_var:
100 prefix = "vect_p";
101 break;
102 default:
103 gcc_unreachable ();
106 if (name)
107 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
108 else
109 new_vect_var = create_tmp_var (type, prefix);
111 return new_vect_var;
115 /* Function vect_create_index_for_vector_ref.
117 Create (and return) an index variable, along with it's update chain in the
118 loop. This variable will be used to access a memory location in a vector
119 operation.
121 Input:
122 LOOP: The loop being vectorized.
123 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
124 function can be added here, or in the loop pre-header.
126 Output:
127 Return an index that will be used to index a vector array. It is expected
128 that a pointer to the first vector will be used as the base address for the
129 indexed reference.
131 FORNOW: we are not trying to be efficient, just creating a new index each
132 time from scratch. At this time all vector references could use the same
133 index.
135 TODO: create only one index to be used by all vector references. Record
136 the index in the LOOP_VINFO the first time this procedure is called and
137 return it on subsequent calls. The increment of this index must be placed
138 just before the conditional expression that ends the single block loop. */
140 static tree
141 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
143 tree init, step;
144 block_stmt_iterator incr_bsi;
145 bool insert_after;
146 tree indx_before_incr, indx_after_incr;
147 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
148 tree incr;
150 /* It is assumed that the base pointer used for vectorized access contains
151 the address of the first vector. Therefore the index used for vectorized
152 access must be initialized to zero and incremented by 1. */
154 init = integer_zero_node;
155 step = integer_one_node;
157 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
158 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
159 &indx_before_incr, &indx_after_incr);
160 incr = bsi_stmt (incr_bsi);
161 set_stmt_info ((tree_ann_t)stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
163 return indx_before_incr;
167 /* Function vect_create_addr_base_for_vector_ref.
169 Create an expression that computes the address of the first memory location
170 that will be accessed for a data reference.
172 Input:
173 STMT: The statement containing the data reference.
174 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
175 OFFSET: Optional. If supplied, it is be added to the initial address.
177 Output:
178 1. Return an SSA_NAME whose value is the address of the memory location of
179 the first vector of the data reference.
180 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
181 these statement(s) which define the returned SSA_NAME.
183 FORNOW: We are only handling array accesses with step 1. */
185 static tree
186 vect_create_addr_base_for_vector_ref (tree stmt,
187 tree *new_stmt_list,
188 tree offset)
190 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
191 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
192 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
193 tree base_name = build_fold_indirect_ref (data_ref_base);
194 tree ref = DR_REF (dr);
195 tree scalar_type = TREE_TYPE (ref);
196 tree scalar_ptr_type = build_pointer_type (scalar_type);
197 tree vec_stmt;
198 tree new_temp;
199 tree addr_base, addr_expr;
200 tree dest, new_stmt;
201 tree base_offset = unshare_expr (DR_OFFSET (dr));
202 tree init = unshare_expr (DR_INIT (dr));
204 /* Create base_offset */
205 base_offset = size_binop (PLUS_EXPR, base_offset, init);
206 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
207 add_referenced_tmp_var (dest);
208 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
209 append_to_statement_list_force (new_stmt, new_stmt_list);
211 if (offset)
213 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
214 add_referenced_tmp_var (tmp);
215 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
216 DR_STEP (dr));
217 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
218 base_offset, offset);
219 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
220 append_to_statement_list_force (new_stmt, new_stmt_list);
223 /* base + base_offset */
224 addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
225 base_offset);
227 /* addr_expr = addr_base */
228 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
229 get_name (base_name));
230 add_referenced_tmp_var (addr_expr);
231 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
232 new_temp = make_ssa_name (addr_expr, vec_stmt);
233 TREE_OPERAND (vec_stmt, 0) = new_temp;
234 append_to_statement_list_force (vec_stmt, new_stmt_list);
236 if (vect_print_dump_info (REPORT_DETAILS))
238 fprintf (vect_dump, "created ");
239 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
241 return new_temp;
245 /* Function vect_align_data_ref.
247 Handle misalignment of a memory accesses.
249 FORNOW: Can't handle misaligned accesses.
250 Make sure that the dataref is aligned. */
252 static void
253 vect_align_data_ref (tree stmt)
255 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
256 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
258 /* FORNOW: can't handle misaligned accesses;
259 all accesses expected to be aligned. */
260 gcc_assert (aligned_access_p (dr));
264 /* Function vect_create_data_ref_ptr.
266 Create a memory reference expression for vector access, to be used in a
267 vector load/store stmt. The reference is based on a new pointer to vector
268 type (vp).
270 Input:
271 1. STMT: a stmt that references memory. Expected to be of the form
272 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
273 2. BSI: block_stmt_iterator where new stmts can be added.
274 3. OFFSET (optional): an offset to be added to the initial address accessed
275 by the data-ref in STMT.
276 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
277 pointing to the initial address.
279 Output:
280 1. Declare a new ptr to vector_type, and have it point to the base of the
281 data reference (initial addressed accessed by the data reference).
282 For example, for vector of type V8HI, the following code is generated:
284 v8hi *vp;
285 vp = (v8hi *)initial_address;
287 if OFFSET is not supplied:
288 initial_address = &a[init];
289 if OFFSET is supplied:
290 initial_address = &a[init + OFFSET];
292 Return the initial_address in INITIAL_ADDRESS.
294 2. Create a data-reference in the loop based on the new vector pointer vp,
295 and using a new index variable 'idx' as follows:
297 vp' = vp + update
299 where if ONLY_INIT is true:
300 update = zero
301 and otherwise
302 update = idx + vector_type_size
304 Return the pointer vp'.
307 FORNOW: handle only aligned and consecutive accesses. */
309 static tree
310 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
311 tree *initial_address, bool only_init)
313 tree base_name;
314 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
315 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
316 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
317 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
318 tree vect_ptr_type;
319 tree vect_ptr;
320 tree tag;
321 tree new_temp;
322 tree vec_stmt;
323 tree new_stmt_list = NULL_TREE;
324 tree idx;
325 edge pe = loop_preheader_edge (loop);
326 basic_block new_bb;
327 tree vect_ptr_init;
328 tree vectype_size;
329 tree ptr_update;
330 tree data_ref_ptr;
331 tree type, tmp, size;
332 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
334 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
336 if (vect_print_dump_info (REPORT_DETAILS))
338 tree data_ref_base = base_name;
339 fprintf (vect_dump, "create vector-pointer variable to type: ");
340 print_generic_expr (vect_dump, vectype, TDF_SLIM);
341 if (TREE_CODE (data_ref_base) == VAR_DECL)
342 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
343 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
344 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
345 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
346 fprintf (vect_dump, " vectorizing a record based array ref: ");
347 else if (TREE_CODE (data_ref_base) == SSA_NAME)
348 fprintf (vect_dump, " vectorizing a pointer ref: ");
349 print_generic_expr (vect_dump, base_name, TDF_SLIM);
352 /** (1) Create the new vector-pointer variable: **/
354 vect_ptr_type = build_pointer_type (vectype);
355 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
356 get_name (base_name));
357 add_referenced_tmp_var (vect_ptr);
360 /** (2) Add aliasing information to the new vector-pointer:
361 (The points-to info (DR_PTR_INFO) may be defined later.) **/
363 tag = DR_MEMTAG (dr);
364 gcc_assert (tag);
366 /* If tag is a variable (and NOT_A_TAG) than a new type alias
367 tag must be created with tag added to its may alias list. */
368 if (var_ann (tag)->mem_tag_kind == NOT_A_TAG)
369 new_type_alias (vect_ptr, tag);
370 else
371 var_ann (vect_ptr)->type_mem_tag = tag;
373 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
375 /** (3) Calculate the initial address the vector-pointer, and set
376 the vector-pointer to point to it before the loop: **/
378 /* Create: (&(base[init_val+offset]) in the loop preheader. */
379 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
380 offset);
381 pe = loop_preheader_edge (loop);
382 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
383 gcc_assert (!new_bb);
384 *initial_address = new_temp;
386 /* Create: p = (vectype *) initial_base */
387 vec_stmt = fold_convert (vect_ptr_type, new_temp);
388 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
389 new_temp = make_ssa_name (vect_ptr, vec_stmt);
390 TREE_OPERAND (vec_stmt, 0) = new_temp;
391 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
392 gcc_assert (!new_bb);
393 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
396 /** (4) Handle the updating of the vector-pointer inside the loop: **/
398 if (only_init) /* No update in loop is required. */
400 /* Copy the points-to information if it exists. */
401 if (DR_PTR_INFO (dr))
402 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
403 return vect_ptr_init;
406 idx = vect_create_index_for_vector_ref (loop_vinfo);
408 /* Create: update = idx * vectype_size */
409 tmp = create_tmp_var (integer_type_node, "update");
410 add_referenced_tmp_var (tmp);
411 size = TYPE_SIZE (vect_ptr_type);
412 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
413 ptr_update = create_tmp_var (type, "update");
414 add_referenced_tmp_var (ptr_update);
415 vectype_size = TYPE_SIZE_UNIT (vectype);
416 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
417 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
418 new_temp = make_ssa_name (tmp, vec_stmt);
419 TREE_OPERAND (vec_stmt, 0) = new_temp;
420 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
421 vec_stmt = fold_convert (type, new_temp);
422 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
423 new_temp = make_ssa_name (ptr_update, vec_stmt);
424 TREE_OPERAND (vec_stmt, 0) = new_temp;
425 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
427 /* Create: data_ref_ptr = vect_ptr_init + update */
428 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
429 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
430 new_temp = make_ssa_name (vect_ptr, vec_stmt);
431 TREE_OPERAND (vec_stmt, 0) = new_temp;
432 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
433 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
435 /* Copy the points-to information if it exists. */
436 if (DR_PTR_INFO (dr))
437 duplicate_ssa_name_ptr_info (data_ref_ptr, DR_PTR_INFO (dr));
438 return data_ref_ptr;
442 /* Function vect_create_destination_var.
444 Create a new temporary of type VECTYPE. */
446 static tree
447 vect_create_destination_var (tree scalar_dest, tree vectype)
449 tree vec_dest;
450 const char *new_name;
451 tree type;
452 enum vect_var_kind kind;
454 kind = vectype ? vect_simple_var : vect_scalar_var;
455 type = vectype ? vectype : TREE_TYPE (scalar_dest);
457 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
459 new_name = get_name (scalar_dest);
460 if (!new_name)
461 new_name = "var_";
462 vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
463 add_referenced_tmp_var (vec_dest);
465 return vec_dest;
469 /* Function vect_init_vector.
471 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
472 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
473 used in the vectorization of STMT. */
475 static tree
476 vect_init_vector (tree stmt, tree vector_var)
478 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
479 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
480 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
481 tree new_var;
482 tree init_stmt;
483 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
484 tree vec_oprnd;
485 edge pe;
486 tree new_temp;
487 basic_block new_bb;
489 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
490 add_referenced_tmp_var (new_var);
492 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
493 new_temp = make_ssa_name (new_var, init_stmt);
494 TREE_OPERAND (init_stmt, 0) = new_temp;
496 pe = loop_preheader_edge (loop);
497 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
498 gcc_assert (!new_bb);
500 if (vect_print_dump_info (REPORT_DETAILS))
502 fprintf (vect_dump, "created new init_stmt: ");
503 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
506 vec_oprnd = TREE_OPERAND (init_stmt, 0);
507 return vec_oprnd;
511 /* Function vect_get_vec_def_for_operand.
513 OP is an operand in STMT. This function returns a (vector) def that will be
514 used in the vectorized stmt for STMT.
516 In the case that OP is an SSA_NAME which is defined in the loop, then
517 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
519 In case OP is an invariant or constant, a new stmt that creates a vector def
520 needs to be introduced. */
522 static tree
523 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
525 tree vec_oprnd;
526 tree vec_stmt;
527 tree def_stmt;
528 stmt_vec_info def_stmt_info = NULL;
529 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
530 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
531 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
532 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
533 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
534 tree vec_inv;
535 tree vec_cst;
536 tree t = NULL_TREE;
537 tree def;
538 int i;
539 enum vect_def_type dt;
540 bool is_simple_use;
542 if (vect_print_dump_info (REPORT_DETAILS))
544 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
545 print_generic_expr (vect_dump, op, TDF_SLIM);
548 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
549 gcc_assert (is_simple_use);
550 if (vect_print_dump_info (REPORT_DETAILS))
552 if (def)
554 fprintf (vect_dump, "def = ");
555 print_generic_expr (vect_dump, def, TDF_SLIM);
557 if (def_stmt)
559 fprintf (vect_dump, " def_stmt = ");
560 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
564 switch (dt)
566 /* Case 1: operand is a constant. */
567 case vect_constant_def:
569 if (scalar_def)
570 *scalar_def = op;
572 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
573 if (vect_print_dump_info (REPORT_DETAILS))
574 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
576 for (i = nunits - 1; i >= 0; --i)
578 t = tree_cons (NULL_TREE, op, t);
580 vec_cst = build_vector (vectype, t);
581 return vect_init_vector (stmt, vec_cst);
584 /* Case 2: operand is defined outside the loop - loop invariant. */
585 case vect_invariant_def:
587 if (scalar_def)
588 *scalar_def = def;
590 /* Create 'vec_inv = {inv,inv,..,inv}' */
591 if (vect_print_dump_info (REPORT_DETAILS))
592 fprintf (vect_dump, "Create vector_inv.");
594 for (i = nunits - 1; i >= 0; --i)
596 t = tree_cons (NULL_TREE, def, t);
599 /* FIXME: use build_constructor directly. */
600 vec_inv = build_constructor_from_list (vectype, t);
601 return vect_init_vector (stmt, vec_inv);
604 /* Case 3: operand is defined inside the loop. */
605 case vect_loop_def:
607 if (scalar_def)
608 *scalar_def = def_stmt;
610 /* Get the def from the vectorized stmt. */
611 def_stmt_info = vinfo_for_stmt (def_stmt);
612 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
613 gcc_assert (vec_stmt);
614 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
615 return vec_oprnd;
618 /* Case 4: operand is defined by a loop header phi - reduction */
619 case vect_reduction_def:
621 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
623 /* Get the def before the loop */
624 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
625 return get_initial_def_for_reduction (stmt, op, scalar_def);
628 /* Case 5: operand is defined by loop-header phi - induction. */
629 case vect_induction_def:
631 if (vect_print_dump_info (REPORT_DETAILS))
632 fprintf (vect_dump, "induction - unsupported.");
633 internal_error ("no support for induction"); /* FORNOW */
636 default:
637 gcc_unreachable ();
642 /* Function vect_finish_stmt_generation.
644 Insert a new stmt. */
646 static void
647 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
649 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
651 if (vect_print_dump_info (REPORT_DETAILS))
653 fprintf (vect_dump, "add new stmt: ");
654 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
657 /* Make sure bsi points to the stmt that is being vectorized. */
658 gcc_assert (stmt == bsi_stmt (*bsi));
660 #ifdef USE_MAPPED_LOCATION
661 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
662 #else
663 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
664 #endif
668 #define ADJUST_IN_EPILOG 1
670 /* Function get_initial_def_for_reduction
672 Input:
673 STMT - a stmt that performs a reduction operation in the loop.
674 INIT_VAL - the initial value of the reduction variable
676 Output:
677 SCALAR_DEF - a tree that holds a value to be added to the final result
678 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
679 Return a vector variable, initialized according to the operation that STMT
680 performs. This vector will be used as the initial value of the
681 vector of partial results.
683 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
684 add: [0,0,...,0,0]
685 mult: [1,1,...,1,1]
686 min/max: [init_val,init_val,..,init_val,init_val]
687 bit and/or: [init_val,init_val,..,init_val,init_val]
688 and when necessary (e.g. add/mult case) let the caller know
689 that it needs to adjust the result by init_val.
691 Option2: Initialize the vector as follows:
692 add: [0,0,...,0,init_val]
693 mult: [1,1,...,1,init_val]
694 min/max: [init_val,init_val,...,init_val]
695 bit and/or: [init_val,init_val,...,init_val]
696 and no adjustments are needed.
698 For example, for the following code:
700 s = init_val;
701 for (i=0;i<n;i++)
702 s = s + a[i];
704 STMT is 's = s + a[i]', and the reduction variable is 's'.
705 For a vector of 4 units, we want to return either [0,0,0,init_val],
706 or [0,0,0,0] and let the caller know that it needs to adjust
707 the result at the end by 'init_val'.
709 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
710 TODO: Use some cost-model to estimate which scheme is more profitable.
713 static tree
714 get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
716 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
717 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
718 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
719 int nelements;
720 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
721 tree type = TREE_TYPE (init_val);
722 tree def;
723 tree vec, t = NULL_TREE;
724 bool need_epilog_adjust;
725 int i;
727 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
729 switch (code)
731 case PLUS_EXPR:
732 if (INTEGRAL_TYPE_P (type))
733 def = build_int_cst (type, 0);
734 else
735 def = build_real (type, dconst0);
737 #ifdef ADJUST_IN_EPILOG
738 /* All the 'nunits' elements are set to 0. The final result will be
739 adjusted by 'init_val' at the loop epilog. */
740 nelements = nunits;
741 need_epilog_adjust = true;
742 #else
743 /* 'nunits - 1' elements are set to 0; The last element is set to
744 'init_val'. No further adjustments at the epilog are needed. */
745 nelements = nunits - 1;
746 need_epilog_adjust = false;
747 #endif
748 break;
750 case MIN_EXPR:
751 case MAX_EXPR:
752 def = init_val;
753 nelements = nunits;
754 need_epilog_adjust = false;
755 break;
757 default:
758 gcc_unreachable ();
761 for (i = nelements - 1; i >= 0; --i)
762 t = tree_cons (NULL_TREE, def, t);
764 if (nelements == nunits - 1)
766 /* Set the last element of the vector. */
767 t = tree_cons (NULL_TREE, init_val, t);
768 nelements += 1;
770 gcc_assert (nelements == nunits);
772 if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
773 vec = build_vector (vectype, t);
774 else
775 vec = build_constructor_from_list (vectype, t);
777 if (!need_epilog_adjust)
778 *scalar_def = NULL_TREE;
779 else
780 *scalar_def = init_val;
782 return vect_init_vector (stmt, vec);
786 /* Function vect_create_epilog_for_reduction:
788 Create code at the loop-epilog to finalize the result of a reduction
789 computation.
791 LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it
792 into a single result, by applying the operation REDUC_CODE on the
793 partial-results-vector. For this, we need to create a new phi node at the
794 loop exit to preserve loop-closed form, as illustrated below.
796 STMT is the original scalar reduction stmt that is being vectorized.
797 REDUCTION_OP is the scalar reduction-variable.
798 REDUCTION_PHI is the phi-node that carries the reduction computation.
799 This function also sets the arguments for the REDUCTION_PHI:
800 The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP.
801 The loop-latch argument is VECT_DEF - the vector of partial sums.
803 This function transforms this:
805 loop:
806 vec_def = phi <null, null> # REDUCTION_PHI
807 ....
808 VECT_DEF = ...
810 loop_exit:
811 s_out0 = phi <s_loop> # EXIT_PHI
813 use <s_out0>
814 use <s_out0>
816 Into:
818 loop:
819 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
820 ....
821 VECT_DEF = ...
823 loop_exit:
824 s_out0 = phi <s_loop> # EXIT_PHI
825 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
827 v_out2 = reduc_expr <v_out1>
828 s_out3 = extract_field <v_out2, 0>
830 use <s_out3>
831 use <s_out3>
834 static void
835 vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
836 enum tree_code reduc_code, tree reduction_phi)
838 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
839 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
840 enum machine_mode mode = TYPE_MODE (vectype);
841 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
842 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
843 basic_block exit_bb;
844 tree scalar_dest = TREE_OPERAND (stmt, 0);
845 tree scalar_type = TREE_TYPE (scalar_dest);
846 tree new_phi;
847 block_stmt_iterator exit_bsi;
848 tree vec_dest;
849 tree new_temp;
850 tree new_name;
851 tree epilog_stmt;
852 tree new_scalar_dest, exit_phi;
853 tree bitsize, bitpos, bytesize;
854 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
855 tree scalar_initial_def;
856 tree vec_initial_def;
857 tree orig_name;
858 imm_use_iterator imm_iter;
859 use_operand_p use_p;
860 bool extract_scalar_result;
862 /*** 1. Create the reduction def-use cycle ***/
864 /* 1.1 set the loop-entry arg of the reduction-phi: */
865 /* For the case of reduction, vect_get_vec_def_for_operand returns
866 the scalar def before the loop, that defines the initial value
867 of the reduction variable. */
868 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
869 &scalar_initial_def);
870 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
873 /* 1.2 set the loop-latch arg for the reduction-phi: */
874 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
876 if (vect_print_dump_info (REPORT_DETAILS))
878 fprintf (vect_dump, "transform reduction: created def-use cycle:");
879 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
880 fprintf (vect_dump, "\n");
881 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
885 /*** 2. Create epilog code ***/
887 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
888 v_out1 = phi <v_loop> */
890 exit_bb = loop->single_exit->dest;
891 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
892 SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
894 exit_bsi = bsi_start (exit_bb);
897 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
898 bitsize = TYPE_SIZE (scalar_type);
899 bytesize = TYPE_SIZE_UNIT (scalar_type);
901 /* 2.2 Create the reduction code. */
903 if (reduc_code < NUM_TREE_CODES)
905 /*** Case 1: Create:
906 v_out2 = reduc_expr <v_out1> */
908 if (vect_print_dump_info (REPORT_DETAILS))
909 fprintf (vect_dump, "Reduce using direct vector reduction.");
911 vec_dest = vect_create_destination_var (scalar_dest, vectype);
912 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
913 build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
914 new_temp = make_ssa_name (vec_dest, epilog_stmt);
915 TREE_OPERAND (epilog_stmt, 0) = new_temp;
916 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
918 extract_scalar_result = true;
920 else
922 enum tree_code shift_code = 0;
923 bool have_whole_vector_shift = true;
924 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); /* CHECKME */
925 int bit_offset;
926 int element_bitsize = tree_low_cst (bitsize, 1);
927 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
928 tree vec_temp;
930 /* The result of the reduction is expected to be at the least
931 significant bits of the vector. This is merely convention,
932 as it's the extraction later that really matters, and that
933 is also under our control. */
934 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
935 shift_code = VEC_RSHIFT_EXPR;
936 else
937 have_whole_vector_shift = false;
939 /* Regardless of whether we have a whole vector shift, if we're
940 emulating the operation via tree-vect-generic, we don't want
941 to use it. Only the first round of the reduction is likely
942 to still be profitable via emulation. */
943 /* ??? It might be better to emit a reduction tree code here, so that
944 tree-vect-generic can expand the first round via bit tricks. */
945 if (!VECTOR_MODE_P (mode))
946 have_whole_vector_shift = false;
947 else
949 optab optab = optab_for_tree_code (code, vectype);
950 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
951 have_whole_vector_shift = false;
954 if (have_whole_vector_shift)
956 /*** Case 2:
957 for (offset = VS/2; offset >= element_size; offset/=2)
959 Create: va' = vec_shift <va, offset>
960 Create: va = vop <va, va'>
961 } */
963 if (vect_print_dump_info (REPORT_DETAILS))
964 fprintf (vect_dump, "Reduce using vector shifts");
966 vec_dest = vect_create_destination_var (scalar_dest, vectype);
967 new_temp = PHI_RESULT (new_phi);
969 for (bit_offset = vec_size_in_bits/2;
970 bit_offset >= element_bitsize;
971 bit_offset /= 2)
973 tree bitpos = size_int (bit_offset);
975 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
976 build2 (shift_code, vectype, new_temp, bitpos));
977 new_name = make_ssa_name (vec_dest, epilog_stmt);
978 TREE_OPERAND (epilog_stmt, 0) = new_name;
979 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
980 if (vect_print_dump_info (REPORT_DETAILS))
981 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
984 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
985 build2 (code, vectype, new_name, new_temp));
986 new_temp = make_ssa_name (vec_dest, epilog_stmt);
987 TREE_OPERAND (epilog_stmt, 0) = new_temp;
988 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
989 if (vect_print_dump_info (REPORT_DETAILS))
990 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
993 extract_scalar_result = true;
995 else
997 tree rhs;
999 /*** Case 3:
1000 Create:
1001 s = extract_field <v_out2, 0>
1002 for (offset=element_size; offset<vector_size; offset+=element_size;)
1004 Create: s' = extract_field <v_out2, offset>
1005 Create: s = op <s, s'>
1006 } */
1008 if (vect_print_dump_info (REPORT_DETAILS))
1009 fprintf (vect_dump, "Reduce using scalar code. ");
1011 vec_temp = PHI_RESULT (new_phi);
1012 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1014 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1015 bitsize_zero_node);
1017 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1018 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1019 rhs);
1020 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1021 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1022 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1023 if (vect_print_dump_info (REPORT_DETAILS))
1024 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1026 for (bit_offset = element_bitsize;
1027 bit_offset < vec_size_in_bits;
1028 bit_offset += element_bitsize)
1030 tree bitpos = bitsize_int (bit_offset);
1031 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1032 bitpos);
1034 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1035 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1036 rhs);
1037 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1038 TREE_OPERAND (epilog_stmt, 0) = new_name;
1039 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1040 if (vect_print_dump_info (REPORT_DETAILS))
1041 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1044 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1045 build2 (code, scalar_type, new_name, new_temp));
1046 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1047 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1048 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1049 if (vect_print_dump_info (REPORT_DETAILS))
1050 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1053 extract_scalar_result = false;
1058 /* 2.3 Extract the final scalar result. Create:
1059 s_out3 = extract_field <v_out2, bitpos> */
1061 if (extract_scalar_result)
1063 tree rhs;
1065 if (vect_print_dump_info (REPORT_DETAILS))
1066 fprintf (vect_dump, "extract scalar result");
1068 /* The result is in the low order bits. */
1069 if (BITS_BIG_ENDIAN)
1070 bitpos = size_binop (MULT_EXPR,
1071 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1072 TYPE_SIZE (scalar_type));
1073 else
1074 bitpos = bitsize_zero_node;
1076 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1077 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1078 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1079 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1080 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1081 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1082 if (vect_print_dump_info (REPORT_DETAILS))
1083 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1087 /* 2.4 Adjust the final result by the initial value of the reduction
1088 variable. (when such adjustment is not needed, then
1089 'scalar_initial_def' is zero).
1091 Create:
1092 s_out = scalar_expr <s_out, scalar_initial_def> */
1094 if (scalar_initial_def)
1096 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1097 build2 (code, scalar_type, new_temp, scalar_initial_def));
1098 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1099 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1100 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1102 if (vect_print_dump_info (REPORT_DETAILS))
1103 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1107 /* 2.5 Replace uses of s_out0 with uses of s_out3 */
1109 /* Find the loop-closed-use at the loop exit of the original
1110 scalar result. (The reduction result is expected to have
1111 two immediate uses - one at the latch block, and one at the
1112 loop exit). */
1113 exit_phi = NULL;
1114 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1116 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1118 exit_phi = USE_STMT (use_p);
1119 break;
1123 orig_name = PHI_RESULT (exit_phi);
1125 FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name)
1126 SET_USE (use_p, new_temp);
1130 /* Function vectorizable_reduction.
1132 Check if STMT performs a reduction operation that can be vectorized.
1133 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1134 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1135 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1137 bool
1138 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1140 tree vec_dest;
1141 tree scalar_dest;
1142 tree op0, op1;
1143 tree loop_vec_def;
1144 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1145 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1146 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1147 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1148 tree operation;
1149 enum tree_code code, reduc_code = 0;
1150 enum machine_mode vec_mode;
1151 int op_type;
1152 optab optab, reduc_optab;
1153 tree new_temp;
1154 tree def0, def1, def_stmt0, def_stmt1;
1155 enum vect_def_type dt0, dt1;
1156 tree new_phi;
1157 tree scalar_type;
1158 bool is_simple_use0;
1159 bool is_simple_use1;
1161 /* Is vectorizable reduction? */
1163 /* Not supportable if the reduction variable is used in the loop. */
1164 if (STMT_VINFO_RELEVANT_P (stmt_info))
1165 return false;
1167 if (!STMT_VINFO_LIVE_P (stmt_info))
1168 return false;
1170 /* Make sure it was already recognized as a reduction pattern. */
1171 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1172 return false;
1174 gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1176 operation = TREE_OPERAND (stmt, 1);
1177 code = TREE_CODE (operation);
1178 op_type = TREE_CODE_LENGTH (code);
1180 if (op_type != binary_op)
1181 return false;
1183 op0 = TREE_OPERAND (operation, 0);
1184 op1 = TREE_OPERAND (operation, 1);
1185 scalar_dest = TREE_OPERAND (stmt, 0);
1186 scalar_type = TREE_TYPE (scalar_dest);
1188 /* Check the first operand. It is expected to be defined inside the loop. */
1189 is_simple_use0 =
1190 vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0);
1191 is_simple_use1 =
1192 vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1);
1194 gcc_assert (is_simple_use0);
1195 gcc_assert (is_simple_use1);
1196 gcc_assert (dt0 == vect_loop_def);
1197 gcc_assert (dt1 == vect_reduction_def);
1198 gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE);
1199 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt1));
1201 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt1)))
1202 return false;
1204 /* Supportable by target? */
1206 /* check support for the operation in the loop */
1207 optab = optab_for_tree_code (code, vectype);
1208 if (!optab)
1210 if (vect_print_dump_info (REPORT_DETAILS))
1211 fprintf (vect_dump, "no optab.");
1212 return false;
1214 vec_mode = TYPE_MODE (vectype);
1215 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1217 if (vect_print_dump_info (REPORT_DETAILS))
1218 fprintf (vect_dump, "op not supported by target.");
1219 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1220 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1221 < vect_min_worthwhile_factor (code))
1222 return false;
1223 if (vect_print_dump_info (REPORT_DETAILS))
1224 fprintf (vect_dump, "proceeding using word mode.");
1227 /* Worthwhile without SIMD support? */
1228 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1229 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1230 < vect_min_worthwhile_factor (code))
1232 if (vect_print_dump_info (REPORT_DETAILS))
1233 fprintf (vect_dump, "not worthwhile without SIMD support.");
1234 return false;
1237 /* check support for the epilog operation */
1238 if (!reduction_code_for_scalar_code (code, &reduc_code))
1239 return false;
1240 reduc_optab = optab_for_tree_code (reduc_code, vectype);
1241 if (!reduc_optab)
1243 if (vect_print_dump_info (REPORT_DETAILS))
1244 fprintf (vect_dump, "no optab for reduction.");
1245 reduc_code = NUM_TREE_CODES;
1247 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1249 if (vect_print_dump_info (REPORT_DETAILS))
1250 fprintf (vect_dump, "reduc op not supported by target.");
1251 reduc_code = NUM_TREE_CODES;
1254 if (!vec_stmt) /* transformation not required. */
1256 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1257 return true;
1260 /** Transform. **/
1262 if (vect_print_dump_info (REPORT_DETAILS))
1263 fprintf (vect_dump, "transform reduction.");
1265 /* Create the destination vector */
1266 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1269 /* Create the reduction-phi that defines the reduction-operand. */
1270 new_phi = create_phi_node (vec_dest, loop->header);
1273 /* Prepare the operand that is defined inside the loop body */
1274 loop_vec_def = vect_get_vec_def_for_operand (op0, stmt, NULL);
1276 /* Create the vectorized operation that computes the partial results */
1277 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1278 build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi)));
1279 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1280 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1281 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1284 /* Finalize the reduction-phi (set it's arguments) and create the
1285 epilog reduction code. */
1286 vect_create_epilog_for_reduction (new_temp, stmt, op1, reduc_code, new_phi);
1287 return true;
1291 /* Function vectorizable_assignment.
1293 Check if STMT performs an assignment (copy) that can be vectorized.
1294 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1295 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1296 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1298 bool
1299 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1301 tree vec_dest;
1302 tree scalar_dest;
1303 tree op;
1304 tree vec_oprnd;
1305 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1306 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1307 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1308 tree new_temp;
1309 tree def, def_stmt;
1310 enum vect_def_type dt;
1312 /* Is vectorizable assignment? */
1313 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1314 return false;
1316 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1318 if (TREE_CODE (stmt) != MODIFY_EXPR)
1319 return false;
1321 scalar_dest = TREE_OPERAND (stmt, 0);
1322 if (TREE_CODE (scalar_dest) != SSA_NAME)
1323 return false;
1325 op = TREE_OPERAND (stmt, 1);
1326 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1328 if (vect_print_dump_info (REPORT_DETAILS))
1329 fprintf (vect_dump, "use not simple.");
1330 return false;
1333 if (!vec_stmt) /* transformation not required. */
1335 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1336 return true;
1339 /** Transform. **/
1340 if (vect_print_dump_info (REPORT_DETAILS))
1341 fprintf (vect_dump, "transform assignment.");
1343 /* Handle def. */
1344 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1346 /* Handle use. */
1347 op = TREE_OPERAND (stmt, 1);
1348 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1350 /* Arguments are ready. create the new vector stmt. */
1351 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1352 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1353 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1354 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1356 return true;
1360 /* Function vect_min_worthwhile_factor.
1362 For a loop where we could vectorize the operation indicated by CODE,
1363 return the minimum vectorization factor that makes it worthwhile
1364 to use generic vectors. */
1365 static int
1366 vect_min_worthwhile_factor (enum tree_code code)
1368 switch (code)
1370 case PLUS_EXPR:
1371 case MINUS_EXPR:
1372 case NEGATE_EXPR:
1373 return 4;
1375 case BIT_AND_EXPR:
1376 case BIT_IOR_EXPR:
1377 case BIT_XOR_EXPR:
1378 case BIT_NOT_EXPR:
1379 return 2;
1381 default:
1382 return INT_MAX;
1387 /* Function vectorizable_operation.
1389 Check if STMT performs a binary or unary operation that can be vectorized.
1390 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1391 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1392 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1394 bool
1395 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1397 tree vec_dest;
1398 tree scalar_dest;
1399 tree operation;
1400 tree op0, op1 = NULL;
1401 tree vec_oprnd0, vec_oprnd1=NULL;
1402 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1403 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1404 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1405 int i;
1406 enum tree_code code;
1407 enum machine_mode vec_mode;
1408 tree new_temp;
1409 int op_type;
1410 tree op;
1411 optab optab;
1412 tree def, def_stmt;
1413 enum vect_def_type dt;
1415 /* Is STMT a vectorizable binary/unary operation? */
1416 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1417 return false;
1419 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1421 if (STMT_VINFO_LIVE_P (stmt_info))
1423 /* FORNOW: not yet supported. */
1424 if (vect_print_dump_info (REPORT_DETAILS))
1425 fprintf (vect_dump, "value used after loop.");
1426 return false;
1429 if (TREE_CODE (stmt) != MODIFY_EXPR)
1430 return false;
1432 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1433 return false;
1435 operation = TREE_OPERAND (stmt, 1);
1436 code = TREE_CODE (operation);
1437 optab = optab_for_tree_code (code, vectype);
1439 /* Support only unary or binary operations. */
1440 op_type = TREE_CODE_LENGTH (code);
1441 if (op_type != unary_op && op_type != binary_op)
1443 if (vect_print_dump_info (REPORT_DETAILS))
1444 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1445 return false;
1448 for (i = 0; i < op_type; i++)
1450 op = TREE_OPERAND (operation, i);
1451 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1453 if (vect_print_dump_info (REPORT_DETAILS))
1454 fprintf (vect_dump, "use not simple.");
1455 return false;
1459 /* Supportable by target? */
1460 if (!optab)
1462 if (vect_print_dump_info (REPORT_DETAILS))
1463 fprintf (vect_dump, "no optab.");
1464 return false;
1466 vec_mode = TYPE_MODE (vectype);
1467 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1469 if (vect_print_dump_info (REPORT_DETAILS))
1470 fprintf (vect_dump, "op not supported by target.");
1471 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1472 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1473 < vect_min_worthwhile_factor (code))
1474 return false;
1475 if (vect_print_dump_info (REPORT_DETAILS))
1476 fprintf (vect_dump, "proceeding using word mode.");
1479 /* Worthwhile without SIMD support? */
1480 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1481 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1482 < vect_min_worthwhile_factor (code))
1484 if (vect_print_dump_info (REPORT_DETAILS))
1485 fprintf (vect_dump, "not worthwhile without SIMD support.");
1486 return false;
1489 if (!vec_stmt) /* transformation not required. */
1491 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1492 return true;
1495 /** Transform. **/
1497 if (vect_print_dump_info (REPORT_DETAILS))
1498 fprintf (vect_dump, "transform binary/unary operation.");
1500 /* Handle def. */
1501 scalar_dest = TREE_OPERAND (stmt, 0);
1502 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1504 /* Handle uses. */
1505 op0 = TREE_OPERAND (operation, 0);
1506 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1508 if (op_type == binary_op)
1510 op1 = TREE_OPERAND (operation, 1);
1511 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1514 /* Arguments are ready. create the new vector stmt. */
1516 if (op_type == binary_op)
1517 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1518 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1519 else
1520 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1521 build1 (code, vectype, vec_oprnd0));
1522 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1523 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1524 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1526 return true;
1530 /* Function vectorizable_store.
1532 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1533 can be vectorized.
1534 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1535 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1536 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1538 bool
1539 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1541 tree scalar_dest;
1542 tree data_ref;
1543 tree op;
1544 tree vec_oprnd1;
1545 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1546 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1547 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1548 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1549 enum machine_mode vec_mode;
1550 tree dummy;
1551 enum dr_alignment_support alignment_support_cheme;
1552 ssa_op_iter iter;
1553 tree def, def_stmt;
1554 enum vect_def_type dt;
1556 /* Is vectorizable store? */
1558 if (TREE_CODE (stmt) != MODIFY_EXPR)
1559 return false;
1561 scalar_dest = TREE_OPERAND (stmt, 0);
1562 if (TREE_CODE (scalar_dest) != ARRAY_REF
1563 && TREE_CODE (scalar_dest) != INDIRECT_REF)
1564 return false;
1566 op = TREE_OPERAND (stmt, 1);
1567 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1569 if (vect_print_dump_info (REPORT_DETAILS))
1570 fprintf (vect_dump, "use not simple.");
1571 return false;
1574 vec_mode = TYPE_MODE (vectype);
1575 /* FORNOW. In some cases can vectorize even if data-type not supported
1576 (e.g. - array initialization with 0). */
1577 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1578 return false;
1580 if (!STMT_VINFO_DATA_REF (stmt_info))
1581 return false;
1584 if (!vec_stmt) /* transformation not required. */
1586 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1587 return true;
1590 /** Transform. **/
1592 if (vect_print_dump_info (REPORT_DETAILS))
1593 fprintf (vect_dump, "transform store");
1595 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1596 gcc_assert (alignment_support_cheme);
1597 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
1599 /* Handle use - get the vectorized def from the defining stmt. */
1600 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1602 /* Handle def. */
1603 /* FORNOW: make sure the data reference is aligned. */
1604 vect_align_data_ref (stmt);
1605 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1606 data_ref = build_fold_indirect_ref (data_ref);
1608 /* Arguments are ready. create the new vector stmt. */
1609 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1610 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1612 /* Copy the V_MAY_DEFS representing the aliasing of the original array
1613 element's definition to the vector's definition then update the
1614 defining statement. The original is being deleted so the same
1615 SSA_NAMEs can be used. */
1616 copy_virtual_operands (*vec_stmt, stmt);
1618 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
1620 SSA_NAME_DEF_STMT (def) = *vec_stmt;
1622 /* If this virtual def has a use outside the loop and a loop peel is
1623 performed then the def may be renamed by the peel. Mark it for
1624 renaming so the later use will also be renamed. */
1625 mark_sym_for_renaming (SSA_NAME_VAR (def));
1628 return true;
1632 /* vectorizable_load.
1634 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1635 can be vectorized.
1636 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1637 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1638 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1640 bool
1641 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1643 tree scalar_dest;
1644 tree vec_dest = NULL;
1645 tree data_ref = NULL;
1646 tree op;
1647 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1648 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1649 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1650 tree new_temp;
1651 int mode;
1652 tree init_addr;
1653 tree new_stmt;
1654 tree dummy;
1655 basic_block new_bb;
1656 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1657 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1658 edge pe = loop_preheader_edge (loop);
1659 enum dr_alignment_support alignment_support_cheme;
1661 /* Is vectorizable load? */
1662 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1663 return false;
1665 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1667 if (STMT_VINFO_LIVE_P (stmt_info))
1669 /* FORNOW: not yet supported. */
1670 if (vect_print_dump_info (REPORT_DETAILS))
1671 fprintf (vect_dump, "value used after loop.");
1672 return false;
1675 if (TREE_CODE (stmt) != MODIFY_EXPR)
1676 return false;
1678 scalar_dest = TREE_OPERAND (stmt, 0);
1679 if (TREE_CODE (scalar_dest) != SSA_NAME)
1680 return false;
1682 op = TREE_OPERAND (stmt, 1);
1683 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1684 return false;
1686 if (!STMT_VINFO_DATA_REF (stmt_info))
1687 return false;
1689 mode = (int) TYPE_MODE (vectype);
1691 /* FORNOW. In some cases can vectorize even if data-type not supported
1692 (e.g. - data copies). */
1693 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1695 if (vect_print_dump_info (REPORT_DETAILS))
1696 fprintf (vect_dump, "Aligned load, but unsupported type.");
1697 return false;
1700 if (!vec_stmt) /* transformation not required. */
1702 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1703 return true;
1706 /** Transform. **/
1708 if (vect_print_dump_info (REPORT_DETAILS))
1709 fprintf (vect_dump, "transform load.");
1711 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1712 gcc_assert (alignment_support_cheme);
1714 if (alignment_support_cheme == dr_aligned
1715 || alignment_support_cheme == dr_unaligned_supported)
1717 /* Create:
1718 p = initial_addr;
1719 indx = 0;
1720 loop {
1721 vec_dest = *(p);
1722 indx = indx + 1;
1726 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1727 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1728 if (aligned_access_p (dr))
1729 data_ref = build_fold_indirect_ref (data_ref);
1730 else
1732 int mis = DR_MISALIGNMENT (dr);
1733 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1734 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1735 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1737 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1738 new_temp = make_ssa_name (vec_dest, new_stmt);
1739 TREE_OPERAND (new_stmt, 0) = new_temp;
1740 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1741 copy_virtual_operands (new_stmt, stmt);
1743 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1745 /* Create:
1746 p1 = initial_addr;
1747 msq_init = *(floor(p1))
1748 p2 = initial_addr + VS - 1;
1749 magic = have_builtin ? builtin_result : initial_address;
1750 indx = 0;
1751 loop {
1752 p2' = p2 + indx * vectype_size
1753 lsq = *(floor(p2'))
1754 vec_dest = realign_load (msq, lsq, magic)
1755 indx = indx + 1;
1756 msq = lsq;
1760 tree offset;
1761 tree magic;
1762 tree phi_stmt;
1763 tree msq_init;
1764 tree msq, lsq;
1765 tree dataref_ptr;
1766 tree params;
1768 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1769 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1770 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1771 &init_addr, true);
1772 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1773 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1774 new_temp = make_ssa_name (vec_dest, new_stmt);
1775 TREE_OPERAND (new_stmt, 0) = new_temp;
1776 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1777 gcc_assert (!new_bb);
1778 msq_init = TREE_OPERAND (new_stmt, 0);
1779 copy_virtual_operands (new_stmt, stmt);
1780 update_vuses_to_preheader (new_stmt, loop);
1783 /* <2> Create lsq = *(floor(p2')) in the loop */
1784 offset = build_int_cst (integer_type_node,
1785 TYPE_VECTOR_SUBPARTS (vectype));
1786 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1787 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1788 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1789 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1790 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1791 new_temp = make_ssa_name (vec_dest, new_stmt);
1792 TREE_OPERAND (new_stmt, 0) = new_temp;
1793 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1794 lsq = TREE_OPERAND (new_stmt, 0);
1795 copy_virtual_operands (new_stmt, stmt);
1798 /* <3> */
1799 if (targetm.vectorize.builtin_mask_for_load)
1801 /* Create permutation mask, if required, in loop preheader. */
1802 tree builtin_decl;
1803 params = build_tree_list (NULL_TREE, init_addr);
1804 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1805 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1806 new_stmt = build_function_call_expr (builtin_decl, params);
1807 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1808 new_temp = make_ssa_name (vec_dest, new_stmt);
1809 TREE_OPERAND (new_stmt, 0) = new_temp;
1810 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1811 gcc_assert (!new_bb);
1812 magic = TREE_OPERAND (new_stmt, 0);
1814 /* The result of the CALL_EXPR to this builtin is determined from
1815 the value of the parameter and no global variables are touched
1816 which makes the builtin a "const" function. Requiring the
1817 builtin to have the "const" attribute makes it unnecessary
1818 to call mark_call_clobbered_vars_to_rename. */
1819 gcc_assert (TREE_READONLY (builtin_decl));
1821 else
1823 /* Use current address instead of init_addr for reduced reg pressure.
1825 magic = dataref_ptr;
1829 /* <4> Create msq = phi <msq_init, lsq> in loop */
1830 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1831 msq = make_ssa_name (vec_dest, NULL_TREE);
1832 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1833 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1834 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1835 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1838 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1839 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1840 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1841 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1842 new_temp = make_ssa_name (vec_dest, new_stmt);
1843 TREE_OPERAND (new_stmt, 0) = new_temp;
1844 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1846 else
1847 gcc_unreachable ();
1849 *vec_stmt = new_stmt;
1850 return true;
1854 /* Function vectorizable_live_operation.
1856 STMT computes a value that is used outside the loop. Check if
1857 it can be supported. */
1859 bool
1860 vectorizable_live_operation (tree stmt,
1861 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1862 tree *vec_stmt ATTRIBUTE_UNUSED)
1864 tree operation;
1865 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1866 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1867 int i;
1868 enum tree_code code;
1869 int op_type;
1870 tree op;
1871 tree def, def_stmt;
1872 enum vect_def_type dt;
1874 if (!STMT_VINFO_LIVE_P (stmt_info))
1875 return false;
1877 if (TREE_CODE (stmt) != MODIFY_EXPR)
1878 return false;
1880 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1881 return false;
1883 operation = TREE_OPERAND (stmt, 1);
1884 code = TREE_CODE (operation);
1886 op_type = TREE_CODE_LENGTH (code);
1888 /* FORNOW: support only if all uses are invariant. This means
1889 that the scalar operations can remain in place, unvectorized.
1890 The original last scalar value that they compute will be used. */
1892 for (i = 0; i < op_type; i++)
1894 op = TREE_OPERAND (operation, i);
1895 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1897 if (vect_print_dump_info (REPORT_DETAILS))
1898 fprintf (vect_dump, "use not simple.");
1899 return false;
1902 if (dt != vect_invariant_def && dt != vect_constant_def)
1903 return false;
1906 /* No transformation is required for the cases we currently support. */
1907 return true;
1911 /* Function vect_is_simple_cond.
1913 Input:
1914 LOOP - the loop that is being vectorized.
1915 COND - Condition that is checked for simple use.
1917 Returns whether a COND can be vectorized. Checks whether
1918 condition operands are supportable using vec_is_simple_use. */
1920 static bool
1921 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
1923 tree lhs, rhs;
1924 tree def;
1925 enum vect_def_type dt;
1927 if (!COMPARISON_CLASS_P (cond))
1928 return false;
1930 lhs = TREE_OPERAND (cond, 0);
1931 rhs = TREE_OPERAND (cond, 1);
1933 if (TREE_CODE (lhs) == SSA_NAME)
1935 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
1936 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
1937 return false;
1939 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
1940 return false;
1942 if (TREE_CODE (rhs) == SSA_NAME)
1944 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
1945 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
1946 return false;
1948 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
1949 return false;
1951 return true;
1954 /* vectorizable_condition.
1956 Check if STMT is conditional modify expression that can be vectorized.
1957 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1958 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
1959 at BSI.
1961 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1963 bool
1964 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1966 tree scalar_dest = NULL_TREE;
1967 tree vec_dest = NULL_TREE;
1968 tree op = NULL_TREE;
1969 tree cond_expr, then_clause, else_clause;
1970 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1971 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1972 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
1973 tree vec_compare, vec_cond_expr;
1974 tree new_temp;
1975 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1976 enum machine_mode vec_mode;
1977 tree def;
1978 enum vect_def_type dt;
1980 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1981 return false;
1983 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1985 if (STMT_VINFO_LIVE_P (stmt_info))
1987 /* FORNOW: not yet supported. */
1988 if (vect_print_dump_info (REPORT_DETAILS))
1989 fprintf (vect_dump, "value used after loop.");
1990 return false;
1993 if (TREE_CODE (stmt) != MODIFY_EXPR)
1994 return false;
1996 op = TREE_OPERAND (stmt, 1);
1998 if (TREE_CODE (op) != COND_EXPR)
1999 return false;
2001 cond_expr = TREE_OPERAND (op, 0);
2002 then_clause = TREE_OPERAND (op, 1);
2003 else_clause = TREE_OPERAND (op, 2);
2005 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
2006 return false;
2008 if (TREE_CODE (then_clause) == SSA_NAME)
2010 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
2011 if (!vect_is_simple_use (then_clause, loop_vinfo,
2012 &then_def_stmt, &def, &dt))
2013 return false;
2015 else if (TREE_CODE (then_clause) != INTEGER_CST
2016 && TREE_CODE (then_clause) != REAL_CST)
2017 return false;
2019 if (TREE_CODE (else_clause) == SSA_NAME)
2021 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
2022 if (!vect_is_simple_use (else_clause, loop_vinfo,
2023 &else_def_stmt, &def, &dt))
2024 return false;
2026 else if (TREE_CODE (else_clause) != INTEGER_CST
2027 && TREE_CODE (else_clause) != REAL_CST)
2028 return false;
2031 vec_mode = TYPE_MODE (vectype);
2033 if (!vec_stmt)
2035 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
2036 return expand_vec_cond_expr_p (op, vec_mode);
2039 /* Transform */
2041 /* Handle def. */
2042 scalar_dest = TREE_OPERAND (stmt, 0);
2043 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2045 /* Handle cond expr. */
2046 vec_cond_lhs =
2047 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
2048 vec_cond_rhs =
2049 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
2050 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
2051 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
2053 /* Arguments are ready. create the new vector stmt. */
2054 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
2055 vec_cond_lhs, vec_cond_rhs);
2056 vec_cond_expr = build (VEC_COND_EXPR, vectype,
2057 vec_compare, vec_then_clause, vec_else_clause);
2059 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
2060 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2061 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2062 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2064 return true;
2067 /* Function vect_transform_stmt.
2069 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2071 bool
2072 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2074 bool is_store = false;
2075 tree vec_stmt = NULL_TREE;
2076 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2077 bool done;
2079 if (STMT_VINFO_RELEVANT_P (stmt_info))
2081 switch (STMT_VINFO_TYPE (stmt_info))
2083 case op_vec_info_type:
2084 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2085 gcc_assert (done);
2086 break;
2088 case assignment_vec_info_type:
2089 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2090 gcc_assert (done);
2091 break;
2093 case load_vec_info_type:
2094 done = vectorizable_load (stmt, bsi, &vec_stmt);
2095 gcc_assert (done);
2096 break;
2098 case store_vec_info_type:
2099 done = vectorizable_store (stmt, bsi, &vec_stmt);
2100 gcc_assert (done);
2101 is_store = true;
2102 break;
2104 case condition_vec_info_type:
2105 done = vectorizable_condition (stmt, bsi, &vec_stmt);
2106 gcc_assert (done);
2107 break;
2109 default:
2110 if (vect_print_dump_info (REPORT_DETAILS))
2111 fprintf (vect_dump, "stmt not supported.");
2112 gcc_unreachable ();
2115 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2118 if (STMT_VINFO_LIVE_P (stmt_info))
2120 switch (STMT_VINFO_TYPE (stmt_info))
2122 case reduc_vec_info_type:
2123 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
2124 gcc_assert (done);
2125 break;
2127 default:
2128 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
2129 gcc_assert (done);
2132 if (vec_stmt)
2134 gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
2135 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2139 return is_store;
2143 /* This function builds ni_name = number of iterations loop executes
2144 on the loop preheader. */
2146 static tree
2147 vect_build_loop_niters (loop_vec_info loop_vinfo)
2149 tree ni_name, stmt, var;
2150 edge pe;
2151 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2152 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2154 var = create_tmp_var (TREE_TYPE (ni), "niters");
2155 add_referenced_tmp_var (var);
2156 ni_name = force_gimple_operand (ni, &stmt, false, var);
2158 pe = loop_preheader_edge (loop);
2159 if (stmt)
2161 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2162 gcc_assert (!new_bb);
2165 return ni_name;
2169 /* This function generates the following statements:
2171 ni_name = number of iterations loop executes
2172 ratio = ni_name / vf
2173 ratio_mult_vf_name = ratio * vf
2175 and places them at the loop preheader edge. */
2177 static void
2178 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2179 tree *ni_name_ptr,
2180 tree *ratio_mult_vf_name_ptr,
2181 tree *ratio_name_ptr)
2184 edge pe;
2185 basic_block new_bb;
2186 tree stmt, ni_name;
2187 tree var;
2188 tree ratio_name;
2189 tree ratio_mult_vf_name;
2190 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2191 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2192 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2193 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
2195 pe = loop_preheader_edge (loop);
2197 /* Generate temporary variable that contains
2198 number of iterations loop executes. */
2200 ni_name = vect_build_loop_niters (loop_vinfo);
2202 /* Create: ratio = ni >> log2(vf) */
2204 var = create_tmp_var (TREE_TYPE (ni), "bnd");
2205 add_referenced_tmp_var (var);
2206 ratio_name = make_ssa_name (var, NULL_TREE);
2207 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2208 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2209 SSA_NAME_DEF_STMT (ratio_name) = stmt;
2211 pe = loop_preheader_edge (loop);
2212 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2213 gcc_assert (!new_bb);
2215 /* Create: ratio_mult_vf = ratio << log2 (vf). */
2217 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2218 add_referenced_tmp_var (var);
2219 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2220 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2221 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2222 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2224 pe = loop_preheader_edge (loop);
2225 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2226 gcc_assert (!new_bb);
2228 *ni_name_ptr = ni_name;
2229 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2230 *ratio_name_ptr = ratio_name;
2232 return;
2236 /* Function update_vuses_to_preheader.
2238 Input:
2239 STMT - a statement with potential VUSEs.
2240 LOOP - the loop whose preheader will contain STMT.
2242 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
2243 appears to be defined in a V_MAY_DEF in another statement in a loop.
2244 One such case is when the VUSE is at the dereference of a __restricted__
2245 pointer in a load and the V_MAY_DEF is at the dereference of a different
2246 __restricted__ pointer in a store. Vectorization may result in
2247 copy_virtual_uses being called to copy the problematic VUSE to a new
2248 statement that is being inserted in the loop preheader. This procedure
2249 is called to change the SSA_NAME in the new statement's VUSE from the
2250 SSA_NAME updated in the loop to the related SSA_NAME available on the
2251 path entering the loop.
2253 When this function is called, we have the following situation:
2255 # vuse <name1>
2256 S1: vload
2257 do {
2258 # name1 = phi < name0 , name2>
2260 # vuse <name1>
2261 S2: vload
2263 # name2 = vdef <name1>
2264 S3: vstore
2266 }while...
2268 Stmt S1 was created in the loop preheader block as part of misaligned-load
2269 handling. This function fixes the name of the vuse of S1 from 'name1' to
2270 'name0'. */
2272 static void
2273 update_vuses_to_preheader (tree stmt, struct loop *loop)
2275 basic_block header_bb = loop->header;
2276 edge preheader_e = loop_preheader_edge (loop);
2277 ssa_op_iter iter;
2278 use_operand_p use_p;
2280 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
2282 tree ssa_name = USE_FROM_PTR (use_p);
2283 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
2284 tree name_var = SSA_NAME_VAR (ssa_name);
2285 basic_block bb = bb_for_stmt (def_stmt);
2287 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
2288 if (!IS_EMPTY_STMT (def_stmt)
2289 && flow_bb_inside_loop_p (loop, bb))
2291 /* If the block containing the statement defining the SSA_NAME
2292 is in the loop then it's necessary to find the definition
2293 outside the loop using the PHI nodes of the header. */
2294 tree phi;
2295 bool updated = false;
2297 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
2299 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
2301 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
2302 updated = true;
2303 break;
2306 gcc_assert (updated);
2312 /* Function vect_update_ivs_after_vectorizer.
2314 "Advance" the induction variables of LOOP to the value they should take
2315 after the execution of LOOP. This is currently necessary because the
2316 vectorizer does not handle induction variables that are used after the
2317 loop. Such a situation occurs when the last iterations of LOOP are
2318 peeled, because:
2319 1. We introduced new uses after LOOP for IVs that were not originally used
2320 after LOOP: the IVs of LOOP are now used by an epilog loop.
2321 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2322 times, whereas the loop IVs should be bumped N times.
2324 Input:
2325 - LOOP - a loop that is going to be vectorized. The last few iterations
2326 of LOOP were peeled.
2327 - NITERS - the number of iterations that LOOP executes (before it is
2328 vectorized). i.e, the number of times the ivs should be bumped.
2329 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2330 coming out from LOOP on which there are uses of the LOOP ivs
2331 (this is the path from LOOP->exit to epilog_loop->preheader).
2333 The new definitions of the ivs are placed in LOOP->exit.
2334 The phi args associated with the edge UPDATE_E in the bb
2335 UPDATE_E->dest are updated accordingly.
2337 Assumption 1: Like the rest of the vectorizer, this function assumes
2338 a single loop exit that has a single predecessor.
2340 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2341 organized in the same order.
2343 Assumption 3: The access function of the ivs is simple enough (see
2344 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2346 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2347 coming out of LOOP on which the ivs of LOOP are used (this is the path
2348 that leads to the epilog loop; other paths skip the epilog loop). This
2349 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2350 needs to have its phis updated.
2353 static void
2354 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
2355 edge update_e)
2357 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2358 basic_block exit_bb = loop->single_exit->dest;
2359 tree phi, phi1;
2360 basic_block update_bb = update_e->dest;
2362 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
2364 /* Make sure there exists a single-predecessor exit bb: */
2365 gcc_assert (single_pred_p (exit_bb));
2367 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2368 phi && phi1;
2369 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2371 tree access_fn = NULL;
2372 tree evolution_part;
2373 tree init_expr;
2374 tree step_expr;
2375 tree var, stmt, ni, ni_name;
2376 block_stmt_iterator last_bsi;
2378 if (vect_print_dump_info (REPORT_DETAILS))
2380 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
2381 print_generic_expr (vect_dump, phi, TDF_SLIM);
2384 /* Skip virtual phi's. */
2385 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2387 if (vect_print_dump_info (REPORT_DETAILS))
2388 fprintf (vect_dump, "virtual phi. skip.");
2389 continue;
2392 /* Skip reduction phis. */
2393 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2395 if (vect_print_dump_info (REPORT_DETAILS))
2396 fprintf (vect_dump, "reduc phi. skip.");
2397 continue;
2400 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2401 gcc_assert (access_fn);
2402 evolution_part =
2403 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2404 gcc_assert (evolution_part != NULL_TREE);
2406 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2407 of degree >= 2 or exponential. */
2408 gcc_assert (!tree_is_chrec (evolution_part));
2410 step_expr = evolution_part;
2411 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2412 loop->num));
2414 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2415 build2 (MULT_EXPR, TREE_TYPE (niters),
2416 niters, step_expr), init_expr);
2418 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2419 add_referenced_tmp_var (var);
2421 ni_name = force_gimple_operand (ni, &stmt, false, var);
2423 /* Insert stmt into exit_bb. */
2424 last_bsi = bsi_last (exit_bb);
2425 if (stmt)
2426 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2428 /* Fix phi expressions in the successor bb. */
2429 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
2434 /* Function vect_do_peeling_for_loop_bound
2436 Peel the last iterations of the loop represented by LOOP_VINFO.
2437 The peeled iterations form a new epilog loop. Given that the loop now
2438 iterates NITERS times, the new epilog loop iterates
2439 NITERS % VECTORIZATION_FACTOR times.
2441 The original loop will later be made to iterate
2442 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
2444 static void
2445 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2446 struct loops *loops)
2448 tree ni_name, ratio_mult_vf_name;
2449 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2450 struct loop *new_loop;
2451 edge update_e;
2452 basic_block preheader;
2453 int loop_num;
2455 if (vect_print_dump_info (REPORT_DETAILS))
2456 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
2458 initialize_original_copy_tables ();
2460 /* Generate the following variables on the preheader of original loop:
2462 ni_name = number of iteration the original loop executes
2463 ratio = ni_name / vf
2464 ratio_mult_vf_name = ratio * vf */
2465 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2466 &ratio_mult_vf_name, ratio);
2468 loop_num = loop->num;
2469 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
2470 ratio_mult_vf_name, ni_name, false);
2471 gcc_assert (new_loop);
2472 gcc_assert (loop_num == loop->num);
2473 #ifdef ENABLE_CHECKING
2474 slpeel_verify_cfg_after_peeling (loop, new_loop);
2475 #endif
2477 /* A guard that controls whether the new_loop is to be executed or skipped
2478 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
2479 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
2480 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
2481 is on the path where the LOOP IVs are used and need to be updated. */
2483 preheader = loop_preheader_edge (new_loop)->src;
2484 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
2485 update_e = EDGE_PRED (preheader, 0);
2486 else
2487 update_e = EDGE_PRED (preheader, 1);
2489 /* Update IVs of original loop as if they were advanced
2490 by ratio_mult_vf_name steps. */
2491 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
2493 /* After peeling we have to reset scalar evolution analyzer. */
2494 scev_reset ();
2496 free_original_copy_tables ();
2500 /* Function vect_gen_niters_for_prolog_loop
2502 Set the number of iterations for the loop represented by LOOP_VINFO
2503 to the minimum between LOOP_NITERS (the original iteration count of the loop)
2504 and the misalignment of DR - the data reference recorded in
2505 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
2506 this loop, the data reference DR will refer to an aligned location.
2508 The following computation is generated:
2510 If the misalignment of DR is known at compile time:
2511 addr_mis = int mis = DR_MISALIGNMENT (dr);
2512 Else, compute address misalignment in bytes:
2513 addr_mis = addr & (vectype_size - 1)
2515 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
2517 (elem_size = element type size; an element is the scalar element
2518 whose type is the inner type of the vectype) */
2520 static tree
2521 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
2523 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2524 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2525 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2526 tree var, stmt;
2527 tree iters, iters_name;
2528 edge pe;
2529 basic_block new_bb;
2530 tree dr_stmt = DR_STMT (dr);
2531 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2532 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2533 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2534 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
2535 tree niters_type = TREE_TYPE (loop_niters);
2537 pe = loop_preheader_edge (loop);
2539 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2541 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2542 int element_size = vectype_align/vf;
2543 int elem_misalign = byte_misalign / element_size;
2545 if (vect_print_dump_info (REPORT_DETAILS))
2546 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
2547 iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
2549 else
2551 tree new_stmts = NULL_TREE;
2552 tree start_addr =
2553 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
2554 tree ptr_type = TREE_TYPE (start_addr);
2555 tree size = TYPE_SIZE (ptr_type);
2556 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2557 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2558 tree elem_size_log =
2559 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
2560 tree vf_tree = build_int_cst (unsigned_type_node, vf);
2561 tree byte_misalign;
2562 tree elem_misalign;
2564 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
2565 gcc_assert (!new_bb);
2567 /* Create: byte_misalign = addr & (vectype_size - 1) */
2568 byte_misalign =
2569 build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
2571 /* Create: elem_misalign = byte_misalign / element_size */
2572 elem_misalign =
2573 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
2575 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
2576 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
2577 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
2578 iters = fold_convert (niters_type, iters);
2581 /* Create: prolog_loop_niters = min (iters, loop_niters) */
2582 /* If the loop bound is known at compile time we already verified that it is
2583 greater than vf; since the misalignment ('iters') is at most vf, there's
2584 no need to generate the MIN_EXPR in this case. */
2585 if (TREE_CODE (loop_niters) != INTEGER_CST)
2586 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
2588 if (vect_print_dump_info (REPORT_DETAILS))
2590 fprintf (vect_dump, "niters for prolog loop: ");
2591 print_generic_expr (vect_dump, iters, TDF_SLIM);
2594 var = create_tmp_var (niters_type, "prolog_loop_niters");
2595 add_referenced_tmp_var (var);
2596 iters_name = force_gimple_operand (iters, &stmt, false, var);
2598 /* Insert stmt on loop preheader edge. */
2599 if (stmt)
2601 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2602 gcc_assert (!new_bb);
2605 return iters_name;
2609 /* Function vect_update_init_of_dr
2611 NITERS iterations were peeled from LOOP. DR represents a data reference
2612 in LOOP. This function updates the information recorded in DR to
2613 account for the fact that the first NITERS iterations had already been
2614 executed. Specifically, it updates the OFFSET field of DR. */
2616 static void
2617 vect_update_init_of_dr (struct data_reference *dr, tree niters)
2619 tree offset = DR_OFFSET (dr);
2621 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
2622 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
2623 DR_OFFSET (dr) = offset;
2627 /* Function vect_update_inits_of_drs
2629 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2630 This function updates the information recorded for the data references in
2631 the loop to account for the fact that the first NITERS iterations had
2632 already been executed. Specifically, it updates the initial_condition of the
2633 access_function of all the data_references in the loop. */
2635 static void
2636 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2638 unsigned int i;
2639 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2641 if (vect_dump && (dump_flags & TDF_DETAILS))
2642 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2644 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
2646 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
2647 vect_update_init_of_dr (dr, niters);
2652 /* Function vect_do_peeling_for_alignment
2654 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2655 'niters' is set to the misalignment of one of the data references in the
2656 loop, thereby forcing it to refer to an aligned location at the beginning
2657 of the execution of this loop. The data reference for which we are
2658 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
2660 static void
2661 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
2663 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2664 tree niters_of_prolog_loop, ni_name;
2665 tree n_iters;
2666 struct loop *new_loop;
2668 if (vect_print_dump_info (REPORT_DETAILS))
2669 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2671 initialize_original_copy_tables ();
2673 ni_name = vect_build_loop_niters (loop_vinfo);
2674 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
2676 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2677 new_loop =
2678 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
2679 niters_of_prolog_loop, ni_name, true);
2680 gcc_assert (new_loop);
2681 #ifdef ENABLE_CHECKING
2682 slpeel_verify_cfg_after_peeling (new_loop, loop);
2683 #endif
2685 /* Update number of times loop executes. */
2686 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2687 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2688 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2690 /* Update the init conditions of the access functions of all data refs. */
2691 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2693 /* After peeling we have to reset scalar evolution analyzer. */
2694 scev_reset ();
2696 free_original_copy_tables ();
2700 /* Function vect_create_cond_for_align_checks.
2702 Create a conditional expression that represents the alignment checks for
2703 all of data references (array element references) whose alignment must be
2704 checked at runtime.
2706 Input:
2707 LOOP_VINFO - two fields of the loop information are used.
2708 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2709 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2711 Output:
2712 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2713 expression.
2714 The returned value is the conditional expression to be used in the if
2715 statement that controls which version of the loop gets executed at runtime.
2717 The algorithm makes two assumptions:
2718 1) The number of bytes "n" in a vector is a power of 2.
2719 2) An address "a" is aligned if a%n is zero and that this
2720 test can be done as a&(n-1) == 0. For example, for 16
2721 byte vectors the test is a&0xf == 0. */
2723 static tree
2724 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2725 tree *cond_expr_stmt_list)
2727 VEC(tree,heap) *may_misalign_stmts
2728 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2729 tree ref_stmt;
2730 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2731 tree mask_cst;
2732 unsigned int i;
2733 tree psize;
2734 tree int_ptrsize_type;
2735 char tmp_name[20];
2736 tree or_tmp_name = NULL_TREE;
2737 tree and_tmp, and_tmp_name, and_stmt;
2738 tree ptrsize_zero;
2740 /* Check that mask is one less than a power of 2, i.e., mask is
2741 all zeros followed by all ones. */
2742 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2744 /* CHECKME: what is the best integer or unsigned type to use to hold a
2745 cast from a pointer value? */
2746 psize = TYPE_SIZE (ptr_type_node);
2747 int_ptrsize_type
2748 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2750 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2751 of the first vector of the i'th data reference. */
2753 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
2755 tree new_stmt_list = NULL_TREE;
2756 tree addr_base;
2757 tree addr_tmp, addr_tmp_name, addr_stmt;
2758 tree or_tmp, new_or_tmp_name, or_stmt;
2760 /* create: addr_tmp = (int)(address_of_first_vector) */
2761 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
2762 &new_stmt_list,
2763 NULL_TREE);
2765 if (new_stmt_list != NULL_TREE)
2766 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
2768 sprintf (tmp_name, "%s%d", "addr2int", i);
2769 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2770 add_referenced_tmp_var (addr_tmp);
2771 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
2772 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
2773 addr_stmt = build2 (MODIFY_EXPR, void_type_node,
2774 addr_tmp_name, addr_stmt);
2775 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2776 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
2778 /* The addresses are OR together. */
2780 if (or_tmp_name != NULL_TREE)
2782 /* create: or_tmp = or_tmp | addr_tmp */
2783 sprintf (tmp_name, "%s%d", "orptrs", i);
2784 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2785 add_referenced_tmp_var (or_tmp);
2786 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
2787 or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name,
2788 build2 (BIT_IOR_EXPR, int_ptrsize_type,
2789 or_tmp_name,
2790 addr_tmp_name));
2791 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2792 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
2793 or_tmp_name = new_or_tmp_name;
2795 else
2796 or_tmp_name = addr_tmp_name;
2798 } /* end for i */
2800 mask_cst = build_int_cst (int_ptrsize_type, mask);
2802 /* create: and_tmp = or_tmp & mask */
2803 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2804 add_referenced_tmp_var (and_tmp);
2805 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
2807 and_stmt = build2 (MODIFY_EXPR, void_type_node,
2808 and_tmp_name,
2809 build2 (BIT_AND_EXPR, int_ptrsize_type,
2810 or_tmp_name, mask_cst));
2811 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2812 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
2814 /* Make and_tmp the left operand of the conditional test against zero.
2815 if and_tmp has a non-zero bit then some address is unaligned. */
2816 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2817 return build2 (EQ_EXPR, boolean_type_node,
2818 and_tmp_name, ptrsize_zero);
2822 /* Function vect_transform_loop.
2824 The analysis phase has determined that the loop is vectorizable.
2825 Vectorize the loop - created vectorized stmts to replace the scalar
2826 stmts in the loop, and update the loop exit condition. */
2828 void
2829 vect_transform_loop (loop_vec_info loop_vinfo,
2830 struct loops *loops ATTRIBUTE_UNUSED)
2832 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2833 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2834 int nbbs = loop->num_nodes;
2835 block_stmt_iterator si;
2836 int i;
2837 tree ratio = NULL;
2838 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2839 bitmap_iterator bi;
2840 unsigned int j;
2842 if (vect_print_dump_info (REPORT_DETAILS))
2843 fprintf (vect_dump, "=== vec_transform_loop ===");
2845 /* If the loop has data references that may or may not be aligned then
2846 two versions of the loop need to be generated, one which is vectorized
2847 and one which isn't. A test is then generated to control which of the
2848 loops is executed. The test checks for the alignment of all of the
2849 data references that may or may not be aligned. */
2851 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
2853 struct loop *nloop;
2854 tree cond_expr;
2855 tree cond_expr_stmt_list = NULL_TREE;
2856 basic_block condition_bb;
2857 block_stmt_iterator cond_exp_bsi;
2859 cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
2860 &cond_expr_stmt_list);
2861 initialize_original_copy_tables ();
2862 nloop = loop_version (loops, loop, cond_expr, &condition_bb, true);
2863 free_original_copy_tables();
2864 update_ssa (TODO_update_ssa);
2865 cond_exp_bsi = bsi_last (condition_bb);
2866 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
2869 /* CHECKME: we wouldn't need this if we calles update_ssa once
2870 for all loops. */
2871 bitmap_zero (vect_vnames_to_rename);
2873 /* Peel the loop if there are data refs with unknown alignment.
2874 Only one data ref with unknown store is allowed. */
2876 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
2877 vect_do_peeling_for_alignment (loop_vinfo, loops);
2879 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
2880 compile time constant), or it is a constant that doesn't divide by the
2881 vectorization factor, then an epilog loop needs to be created.
2882 We therefore duplicate the loop: the original loop will be vectorized,
2883 and will compute the first (n/VF) iterations. The second copy of the loop
2884 will remain scalar and will compute the remaining (n%VF) iterations.
2885 (VF is the vectorization factor). */
2887 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2888 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2889 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
2890 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
2891 else
2892 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
2893 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
2895 /* 1) Make sure the loop header has exactly two entries
2896 2) Make sure we have a preheader basic block. */
2898 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
2900 loop_split_edge_with (loop_preheader_edge (loop), NULL);
2903 /* FORNOW: the vectorizer supports only loops which body consist
2904 of one basic block (header + empty latch). When the vectorizer will
2905 support more involved loop forms, the order by which the BBs are
2906 traversed need to be reconsidered. */
2908 for (i = 0; i < nbbs; i++)
2910 basic_block bb = bbs[i];
2912 for (si = bsi_start (bb); !bsi_end_p (si);)
2914 tree stmt = bsi_stmt (si);
2915 stmt_vec_info stmt_info;
2916 bool is_store;
2918 if (vect_print_dump_info (REPORT_DETAILS))
2920 fprintf (vect_dump, "------>vectorizing statement: ");
2921 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2923 stmt_info = vinfo_for_stmt (stmt);
2924 gcc_assert (stmt_info);
2925 if (!STMT_VINFO_RELEVANT_P (stmt_info)
2926 && !STMT_VINFO_LIVE_P (stmt_info))
2928 bsi_next (&si);
2929 continue;
2931 /* FORNOW: Verify that all stmts operate on the same number of
2932 units and no inner unrolling is necessary. */
2933 gcc_assert
2934 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
2935 == (unsigned HOST_WIDE_INT) vectorization_factor);
2937 /* -------- vectorize statement ------------ */
2938 if (vect_print_dump_info (REPORT_DETAILS))
2939 fprintf (vect_dump, "transform statement.");
2941 is_store = vect_transform_stmt (stmt, &si);
2942 if (is_store)
2944 /* Free the attached stmt_vec_info and remove the stmt. */
2945 stmt_ann_t ann = stmt_ann (stmt);
2946 free (stmt_info);
2947 set_stmt_info ((tree_ann_t)ann, NULL);
2948 bsi_remove (&si);
2949 continue;
2952 bsi_next (&si);
2953 } /* stmts in BB */
2954 } /* BBs in loop */
2956 slpeel_make_loop_iterate_ntimes (loop, ratio);
2958 EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
2959 mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
2961 /* The memory tags and pointers in vectorized statements need to
2962 have their SSA forms updated. FIXME, why can't this be delayed
2963 until all the loops have been transformed? */
2964 update_ssa (TODO_update_ssa);
2966 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2967 fprintf (vect_dump, "LOOP VECTORIZED.");