* sh.c (find_sole_member): New function.
[official-gcc.git] / gcc / tree-vect-transform.c
blobbe5b77943649acbd89c2e15afbc092fa4b31f9c3
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "tree-data-ref.h"
39 #include "tree-chrec.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
43 #include "tree-pass.h"
44 #include "toplev.h"
45 #include "real.h"
47 /* Utility functions for the code transformation. */
48 static bool vect_transform_stmt (tree, block_stmt_iterator *);
49 static void vect_align_data_ref (tree);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, bool);
53 static tree vect_create_index_for_vector_ref (loop_vec_info);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void update_vuses_to_preheader (tree, struct loop*);
62 static tree get_initial_def_for_reduction (tree, tree, tree *);
64 /* Utility function dealing with loop peeling (not peeling itself). */
65 static void vect_generate_tmps_on_preheader
66 (loop_vec_info, tree *, tree *, tree *);
67 static tree vect_build_loop_niters (loop_vec_info);
68 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
70 static void vect_update_init_of_dr (struct data_reference *, tree niters);
71 static void vect_update_inits_of_drs (loop_vec_info, tree);
72 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
73 static void vect_do_peeling_for_loop_bound
74 (loop_vec_info, tree *, struct loops *);
75 static int vect_min_worthwhile_factor (enum tree_code);
78 /* Function vect_get_new_vect_var.
80 Returns a name for a new variable. The current naming scheme appends the
81 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
82 the name of vectorizer generated variables, and appends that to NAME if
83 provided. */
85 static tree
86 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
88 const char *prefix;
89 tree new_vect_var;
91 switch (var_kind)
93 case vect_simple_var:
94 prefix = "vect_";
95 break;
96 case vect_scalar_var:
97 prefix = "stmp_";
98 break;
99 case vect_pointer_var:
100 prefix = "vect_p";
101 break;
102 default:
103 gcc_unreachable ();
106 if (name)
107 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
108 else
109 new_vect_var = create_tmp_var (type, prefix);
111 return new_vect_var;
115 /* Function vect_create_index_for_vector_ref.
117 Create (and return) an index variable, along with it's update chain in the
118 loop. This variable will be used to access a memory location in a vector
119 operation.
121 Input:
122 LOOP: The loop being vectorized.
123 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
124 function can be added here, or in the loop pre-header.
126 Output:
127 Return an index that will be used to index a vector array. It is expected
128 that a pointer to the first vector will be used as the base address for the
129 indexed reference.
131 FORNOW: we are not trying to be efficient, just creating a new index each
132 time from scratch. At this time all vector references could use the same
133 index.
135 TODO: create only one index to be used by all vector references. Record
136 the index in the LOOP_VINFO the first time this procedure is called and
137 return it on subsequent calls. The increment of this index must be placed
138 just before the conditional expression that ends the single block loop. */
140 static tree
141 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
143 tree init, step;
144 block_stmt_iterator incr_bsi;
145 bool insert_after;
146 tree indx_before_incr, indx_after_incr;
147 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
148 tree incr;
150 /* It is assumed that the base pointer used for vectorized access contains
151 the address of the first vector. Therefore the index used for vectorized
152 access must be initialized to zero and incremented by 1. */
154 init = integer_zero_node;
155 step = integer_one_node;
157 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
158 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
159 &indx_before_incr, &indx_after_incr);
160 incr = bsi_stmt (incr_bsi);
161 set_stmt_info ((tree_ann_t)stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
163 return indx_before_incr;
167 /* Function vect_create_addr_base_for_vector_ref.
169 Create an expression that computes the address of the first memory location
170 that will be accessed for a data reference.
172 Input:
173 STMT: The statement containing the data reference.
174 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
175 OFFSET: Optional. If supplied, it is be added to the initial address.
177 Output:
178 1. Return an SSA_NAME whose value is the address of the memory location of
179 the first vector of the data reference.
180 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
181 these statement(s) which define the returned SSA_NAME.
183 FORNOW: We are only handling array accesses with step 1. */
185 static tree
186 vect_create_addr_base_for_vector_ref (tree stmt,
187 tree *new_stmt_list,
188 tree offset)
190 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
191 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
192 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
193 tree base_name = build_fold_indirect_ref (data_ref_base);
194 tree ref = DR_REF (dr);
195 tree scalar_type = TREE_TYPE (ref);
196 tree scalar_ptr_type = build_pointer_type (scalar_type);
197 tree vec_stmt;
198 tree new_temp;
199 tree addr_base, addr_expr;
200 tree dest, new_stmt;
201 tree base_offset = unshare_expr (DR_OFFSET (dr));
202 tree init = unshare_expr (DR_INIT (dr));
204 /* Create base_offset */
205 base_offset = size_binop (PLUS_EXPR, base_offset, init);
206 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
207 add_referenced_tmp_var (dest);
208 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
209 append_to_statement_list_force (new_stmt, new_stmt_list);
211 if (offset)
213 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
214 add_referenced_tmp_var (tmp);
215 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
216 DR_STEP (dr));
217 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
218 base_offset, offset);
219 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
220 append_to_statement_list_force (new_stmt, new_stmt_list);
223 /* base + base_offset */
224 addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
225 base_offset);
227 /* addr_expr = addr_base */
228 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
229 get_name (base_name));
230 add_referenced_tmp_var (addr_expr);
231 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
232 new_temp = make_ssa_name (addr_expr, vec_stmt);
233 TREE_OPERAND (vec_stmt, 0) = new_temp;
234 append_to_statement_list_force (vec_stmt, new_stmt_list);
236 if (vect_print_dump_info (REPORT_DETAILS))
238 fprintf (vect_dump, "created ");
239 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
241 return new_temp;
245 /* Function vect_align_data_ref.
247 Handle misalignment of a memory accesses.
249 FORNOW: Can't handle misaligned accesses.
250 Make sure that the dataref is aligned. */
252 static void
253 vect_align_data_ref (tree stmt)
255 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
256 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
258 /* FORNOW: can't handle misaligned accesses;
259 all accesses expected to be aligned. */
260 gcc_assert (aligned_access_p (dr));
264 /* Function vect_create_data_ref_ptr.
266 Create a memory reference expression for vector access, to be used in a
267 vector load/store stmt. The reference is based on a new pointer to vector
268 type (vp).
270 Input:
271 1. STMT: a stmt that references memory. Expected to be of the form
272 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
273 2. BSI: block_stmt_iterator where new stmts can be added.
274 3. OFFSET (optional): an offset to be added to the initial address accessed
275 by the data-ref in STMT.
276 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
277 pointing to the initial address.
279 Output:
280 1. Declare a new ptr to vector_type, and have it point to the base of the
281 data reference (initial addressed accessed by the data reference).
282 For example, for vector of type V8HI, the following code is generated:
284 v8hi *vp;
285 vp = (v8hi *)initial_address;
287 if OFFSET is not supplied:
288 initial_address = &a[init];
289 if OFFSET is supplied:
290 initial_address = &a[init + OFFSET];
292 Return the initial_address in INITIAL_ADDRESS.
294 2. Create a data-reference in the loop based on the new vector pointer vp,
295 and using a new index variable 'idx' as follows:
297 vp' = vp + update
299 where if ONLY_INIT is true:
300 update = zero
301 and otherwise
302 update = idx + vector_type_size
304 Return the pointer vp'.
307 FORNOW: handle only aligned and consecutive accesses. */
309 static tree
310 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
311 tree *initial_address, bool only_init)
313 tree base_name;
314 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
315 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
316 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
317 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
318 tree vect_ptr_type;
319 tree vect_ptr;
320 tree tag;
321 tree new_temp;
322 tree vec_stmt;
323 tree new_stmt_list = NULL_TREE;
324 tree idx;
325 edge pe = loop_preheader_edge (loop);
326 basic_block new_bb;
327 tree vect_ptr_init;
328 tree vectype_size;
329 tree ptr_update;
330 tree data_ref_ptr;
331 tree type, tmp, size;
332 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
334 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
336 if (vect_print_dump_info (REPORT_DETAILS))
338 tree data_ref_base = base_name;
339 fprintf (vect_dump, "create array_ref of type: ");
340 print_generic_expr (vect_dump, vectype, TDF_SLIM);
341 if (TREE_CODE (data_ref_base) == VAR_DECL)
342 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
343 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
344 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
345 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
346 fprintf (vect_dump, " vectorizing a record based array ref: ");
347 else if (TREE_CODE (data_ref_base) == SSA_NAME)
348 fprintf (vect_dump, " vectorizing a pointer ref: ");
349 print_generic_expr (vect_dump, base_name, TDF_SLIM);
352 /** (1) Create the new vector-pointer variable: **/
354 vect_ptr_type = build_pointer_type (vectype);
355 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
356 get_name (base_name));
357 add_referenced_tmp_var (vect_ptr);
360 /** (2) Add aliasing information to the new vector-pointer:
361 (The points-to info (DR_PTR_INFO) may be defined later.) **/
363 tag = DR_MEMTAG (dr);
364 gcc_assert (tag);
366 /* If tag is a variable (and NOT_A_TAG) than a new type alias
367 tag must be created with tag added to its may alias list. */
368 if (var_ann (tag)->mem_tag_kind == NOT_A_TAG)
369 new_type_alias (vect_ptr, tag);
370 else
371 var_ann (vect_ptr)->type_mem_tag = tag;
373 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
375 /** (3) Calculate the initial address the vector-pointer, and set
376 the vector-pointer to point to it before the loop: **/
378 /* Create: (&(base[init_val+offset]) in the loop preheader. */
379 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
380 offset);
381 pe = loop_preheader_edge (loop);
382 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
383 gcc_assert (!new_bb);
384 *initial_address = new_temp;
386 /* Create: p = (vectype *) initial_base */
387 vec_stmt = fold_convert (vect_ptr_type, new_temp);
388 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
389 new_temp = make_ssa_name (vect_ptr, vec_stmt);
390 TREE_OPERAND (vec_stmt, 0) = new_temp;
391 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
392 gcc_assert (!new_bb);
393 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
396 /** (4) Handle the updating of the vector-pointer inside the loop: **/
398 if (only_init) /* No update in loop is required. */
400 /* Copy the points-to information if it exists. */
401 if (DR_PTR_INFO (dr))
402 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
403 return vect_ptr_init;
406 idx = vect_create_index_for_vector_ref (loop_vinfo);
408 /* Create: update = idx * vectype_size */
409 tmp = create_tmp_var (integer_type_node, "update");
410 add_referenced_tmp_var (tmp);
411 size = TYPE_SIZE (vect_ptr_type);
412 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
413 ptr_update = create_tmp_var (type, "update");
414 add_referenced_tmp_var (ptr_update);
415 vectype_size = TYPE_SIZE_UNIT (vectype);
416 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
417 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
418 new_temp = make_ssa_name (tmp, vec_stmt);
419 TREE_OPERAND (vec_stmt, 0) = new_temp;
420 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
421 vec_stmt = fold_convert (type, new_temp);
422 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
423 new_temp = make_ssa_name (ptr_update, vec_stmt);
424 TREE_OPERAND (vec_stmt, 0) = new_temp;
425 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
427 /* Create: data_ref_ptr = vect_ptr_init + update */
428 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
429 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
430 new_temp = make_ssa_name (vect_ptr, vec_stmt);
431 TREE_OPERAND (vec_stmt, 0) = new_temp;
432 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
433 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
435 /* Copy the points-to information if it exists. */
436 if (DR_PTR_INFO (dr))
437 duplicate_ssa_name_ptr_info (data_ref_ptr, DR_PTR_INFO (dr));
438 return data_ref_ptr;
442 /* Function vect_create_destination_var.
444 Create a new temporary of type VECTYPE. */
446 static tree
447 vect_create_destination_var (tree scalar_dest, tree vectype)
449 tree vec_dest;
450 const char *new_name;
451 tree type;
452 enum vect_var_kind kind;
454 kind = vectype ? vect_simple_var : vect_scalar_var;
455 type = vectype ? vectype : TREE_TYPE (scalar_dest);
457 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
459 new_name = get_name (scalar_dest);
460 if (!new_name)
461 new_name = "var_";
462 vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
463 add_referenced_tmp_var (vec_dest);
465 return vec_dest;
469 /* Function vect_init_vector.
471 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
472 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
473 used in the vectorization of STMT. */
475 static tree
476 vect_init_vector (tree stmt, tree vector_var)
478 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
479 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
480 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
481 tree new_var;
482 tree init_stmt;
483 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
484 tree vec_oprnd;
485 edge pe;
486 tree new_temp;
487 basic_block new_bb;
489 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
490 add_referenced_tmp_var (new_var);
492 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
493 new_temp = make_ssa_name (new_var, init_stmt);
494 TREE_OPERAND (init_stmt, 0) = new_temp;
496 pe = loop_preheader_edge (loop);
497 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
498 gcc_assert (!new_bb);
500 if (vect_print_dump_info (REPORT_DETAILS))
502 fprintf (vect_dump, "created new init_stmt: ");
503 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
506 vec_oprnd = TREE_OPERAND (init_stmt, 0);
507 return vec_oprnd;
511 /* Function vect_get_vec_def_for_operand.
513 OP is an operand in STMT. This function returns a (vector) def that will be
514 used in the vectorized stmt for STMT.
516 In the case that OP is an SSA_NAME which is defined in the loop, then
517 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
519 In case OP is an invariant or constant, a new stmt that creates a vector def
520 needs to be introduced. */
522 static tree
523 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
525 tree vec_oprnd;
526 tree vec_stmt;
527 tree def_stmt;
528 stmt_vec_info def_stmt_info = NULL;
529 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
530 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
531 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
532 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
533 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
534 tree vec_inv;
535 tree vec_cst;
536 tree t = NULL_TREE;
537 tree def;
538 int i;
539 enum vect_def_type dt;
540 bool is_simple_use;
542 if (vect_print_dump_info (REPORT_DETAILS))
544 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
545 print_generic_expr (vect_dump, op, TDF_SLIM);
548 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
549 gcc_assert (is_simple_use);
550 if (vect_print_dump_info (REPORT_DETAILS))
552 if (def)
554 fprintf (vect_dump, "def = ");
555 print_generic_expr (vect_dump, def, TDF_SLIM);
557 if (def_stmt)
559 fprintf (vect_dump, " def_stmt = ");
560 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
564 switch (dt)
566 /* Case 1: operand is a constant. */
567 case vect_constant_def:
569 if (scalar_def)
570 *scalar_def = op;
572 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
573 if (vect_print_dump_info (REPORT_DETAILS))
574 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
576 for (i = nunits - 1; i >= 0; --i)
578 t = tree_cons (NULL_TREE, op, t);
580 vec_cst = build_vector (vectype, t);
581 return vect_init_vector (stmt, vec_cst);
584 /* Case 2: operand is defined outside the loop - loop invariant. */
585 case vect_invariant_def:
587 if (scalar_def)
588 *scalar_def = def;
590 /* Create 'vec_inv = {inv,inv,..,inv}' */
591 if (vect_print_dump_info (REPORT_DETAILS))
592 fprintf (vect_dump, "Create vector_inv.");
594 for (i = nunits - 1; i >= 0; --i)
596 t = tree_cons (NULL_TREE, def, t);
599 /* FIXME: use build_constructor directly. */
600 vec_inv = build_constructor_from_list (vectype, t);
601 return vect_init_vector (stmt, vec_inv);
604 /* Case 3: operand is defined inside the loop. */
605 case vect_loop_def:
607 if (scalar_def)
608 *scalar_def = def_stmt;
610 /* Get the def from the vectorized stmt. */
611 def_stmt_info = vinfo_for_stmt (def_stmt);
612 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
613 gcc_assert (vec_stmt);
614 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
615 return vec_oprnd;
618 /* Case 4: operand is defined by a loop header phi - reduction */
619 case vect_reduction_def:
621 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
623 /* Get the def before the loop */
624 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
625 return get_initial_def_for_reduction (stmt, op, scalar_def);
628 /* Case 5: operand is defined by loop-header phi - induction. */
629 case vect_induction_def:
631 if (vect_print_dump_info (REPORT_DETAILS))
632 fprintf (vect_dump, "induction - unsupported.");
633 internal_error ("no support for induction"); /* FORNOW */
636 default:
637 gcc_unreachable ();
642 /* Function vect_finish_stmt_generation.
644 Insert a new stmt. */
646 static void
647 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
649 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
651 if (vect_print_dump_info (REPORT_DETAILS))
653 fprintf (vect_dump, "add new stmt: ");
654 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
657 /* Make sure bsi points to the stmt that is being vectorized. */
658 gcc_assert (stmt == bsi_stmt (*bsi));
660 #ifdef USE_MAPPED_LOCATION
661 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
662 #else
663 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
664 #endif
668 #define ADJUST_IN_EPILOG 1
670 /* Function get_initial_def_for_reduction
672 Input:
673 STMT - a stmt that performs a reduction operation in the loop.
674 INIT_VAL - the initial value of the reduction variable
676 Output:
677 SCALAR_DEF - a tree that holds a value to be added to the final result
678 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
679 Return a vector variable, initialized according to the operation that STMT
680 performs. This vector will be used as the initial value of the
681 vector of partial results.
683 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
684 add: [0,0,...,0,0]
685 mult: [1,1,...,1,1]
686 min/max: [init_val,init_val,..,init_val,init_val]
687 bit and/or: [init_val,init_val,..,init_val,init_val]
688 and when necessary (e.g. add/mult case) let the caller know
689 that it needs to adjust the result by init_val.
691 Option2: Initialize the vector as follows:
692 add: [0,0,...,0,init_val]
693 mult: [1,1,...,1,init_val]
694 min/max: [init_val,init_val,...,init_val]
695 bit and/or: [init_val,init_val,...,init_val]
696 and no adjustments are needed.
698 For example, for the following code:
700 s = init_val;
701 for (i=0;i<n;i++)
702 s = s + a[i];
704 STMT is 's = s + a[i]', and the reduction variable is 's'.
705 For a vector of 4 units, we want to return either [0,0,0,init_val],
706 or [0,0,0,0] and let the caller know that it needs to adjust
707 the result at the end by 'init_val'.
709 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
710 TODO: Use some cost-model to estimate which scheme is more profitable.
713 static tree
714 get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
716 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
717 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
718 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
719 int nelements;
720 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
721 tree type = TREE_TYPE (init_val);
722 tree def;
723 tree vec, t = NULL_TREE;
724 bool need_epilog_adjust;
725 int i;
727 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
729 switch (code)
731 case PLUS_EXPR:
732 if (INTEGRAL_TYPE_P (type))
733 def = build_int_cst (type, 0);
734 else
735 def = build_real (type, dconst0);
737 #ifdef ADJUST_IN_EPILOG
738 /* All the 'nunits' elements are set to 0. The final result will be
739 adjusted by 'init_val' at the loop epilog. */
740 nelements = nunits;
741 need_epilog_adjust = true;
742 #else
743 /* 'nunits - 1' elements are set to 0; The last element is set to
744 'init_val'. No further adjustments at the epilog are needed. */
745 nelements = nunits - 1;
746 need_epilog_adjust = false;
747 #endif
748 break;
750 case MIN_EXPR:
751 case MAX_EXPR:
752 def = init_val;
753 nelements = nunits;
754 need_epilog_adjust = true;
755 break;
757 default:
758 gcc_unreachable ();
761 for (i = nelements - 1; i >= 0; --i)
762 t = tree_cons (NULL_TREE, def, t);
764 if (nelements == nunits - 1)
766 /* Set the last element of the vector. */
767 t = tree_cons (NULL_TREE, init_val, t);
768 nelements += 1;
770 gcc_assert (nelements == nunits);
772 if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
773 vec = build_vector (vectype, t);
774 else
775 vec = build_constructor_from_list (vectype, t);
777 if (!need_epilog_adjust)
779 if (INTEGRAL_TYPE_P (type))
780 init_val = build_int_cst (type, 0);
781 else
782 init_val = build_real (type, dconst0);
784 *scalar_def = init_val;
786 return vect_init_vector (stmt, vec);
790 /* Function vect_create_epilog_for_reduction:
792 Create code at the loop-epilog to finalize the result of a reduction
793 computation.
795 LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it
796 into a single result, by applying the operation REDUC_CODE on the
797 partial-results-vector. For this, we need to create a new phi node at the
798 loop exit to preserve loop-closed form, as illustrated below.
800 STMT is the original scalar reduction stmt that is being vectorized.
801 REDUCTION_OP is the scalar reduction-variable.
802 REDUCTION_PHI is the phi-node that carries the reduction computation.
803 This function also sets the arguments for the REDUCTION_PHI:
804 The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP.
805 The loop-latch argument is VECT_DEF - the vector of partial sums.
807 This function transforms this:
809 loop:
810 vec_def = phi <null, null> # REDUCTION_PHI
811 ....
812 VECT_DEF = ...
814 loop_exit:
815 s_out0 = phi <s_loop> # EXIT_PHI
817 use <s_out0>
818 use <s_out0>
820 Into:
822 loop:
823 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
824 ....
825 VECT_DEF = ...
827 loop_exit:
828 s_out0 = phi <s_loop> # EXIT_PHI
829 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
831 v_out2 = reduc_expr <v_out1>
832 s_out3 = extract_field <v_out2, 0>
834 use <s_out3>
835 use <s_out3>
838 static void
839 vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
840 enum tree_code reduc_code, tree reduction_phi)
842 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
843 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
844 enum machine_mode mode = TYPE_MODE (vectype);
845 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
846 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
847 basic_block exit_bb;
848 tree scalar_dest = TREE_OPERAND (stmt, 0);
849 tree scalar_type = TREE_TYPE (scalar_dest);
850 tree new_phi;
851 block_stmt_iterator exit_bsi;
852 tree vec_dest;
853 tree new_temp;
854 tree new_name;
855 tree epilog_stmt;
856 tree new_scalar_dest, exit_phi;
857 tree bitsize, bitpos, bytesize;
858 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
859 tree scalar_initial_def;
860 tree vec_initial_def;
861 tree orig_name;
862 imm_use_iterator imm_iter;
863 use_operand_p use_p;
864 bool extract_scalar_result;
865 bool adjust_in_epilog;
867 /*** 1. Create the reduction def-use cycle ***/
869 /* 1.1 set the loop-entry arg of the reduction-phi: */
870 /* For the case of reduction, vect_get_vec_def_for_operand returns
871 the scalar def before the loop, that defines the initial value
872 of the reduction variable. */
873 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
874 &scalar_initial_def);
875 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
878 /* 1.2 set the loop-latch arg for the reduction-phi: */
879 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
881 if (vect_print_dump_info (REPORT_DETAILS))
883 fprintf (vect_dump, "transform reduction: created def-use cycle:");
884 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
885 fprintf (vect_dump, "\n");
886 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
890 /*** 2. Create epilog code ***/
892 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
893 v_out1 = phi <v_loop> */
895 exit_bb = loop->single_exit->dest;
896 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
897 SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
899 exit_bsi = bsi_start (exit_bb);
902 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
903 bitsize = TYPE_SIZE (scalar_type);
904 bytesize = TYPE_SIZE_UNIT (scalar_type);
906 /* 2.2 Create the reduction code. */
908 if (reduc_code < NUM_TREE_CODES)
910 /*** Case 1: Create:
911 v_out2 = reduc_expr <v_out1> */
913 if (vect_print_dump_info (REPORT_DETAILS))
914 fprintf (vect_dump, "Reduce using direct vector reduction.");
916 vec_dest = vect_create_destination_var (scalar_dest, vectype);
917 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
918 build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
919 new_temp = make_ssa_name (vec_dest, epilog_stmt);
920 TREE_OPERAND (epilog_stmt, 0) = new_temp;
921 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
923 extract_scalar_result = true;
924 adjust_in_epilog = true;
926 else
928 enum tree_code shift_code = 0;
929 bool have_whole_vector_shift = true;
930 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); /* CHECKME */
931 int bit_offset;
932 int element_bitsize = tree_low_cst (bitsize, 1);
933 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
934 tree vec_temp;
936 /* The result of the reduction is expected to be at the least
937 significant bits of the vector. This is merely convention,
938 as it's the extraction later that really matters, and that
939 is also under our control. */
940 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
941 shift_code = VEC_RSHIFT_EXPR;
942 else
943 have_whole_vector_shift = false;
945 /* Regardless of whether we have a whole vector shift, if we're
946 emulating the operation via tree-vect-generic, we don't want
947 to use it. Only the first round of the reduction is likely
948 to still be profitable via emulation. */
949 /* ??? It might be better to emit a reduction tree code here, so that
950 tree-vect-generic can expand the first round via bit tricks. */
951 if (!VECTOR_MODE_P (mode))
952 have_whole_vector_shift = false;
953 else
955 optab optab = optab_for_tree_code (code, vectype);
956 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
957 have_whole_vector_shift = false;
960 if (have_whole_vector_shift)
962 /*** Case 2:
963 for (offset = VS/2; offset >= element_size; offset/=2)
965 Create: va' = vec_shift <va, offset>
966 Create: va = vop <va, va'>
967 } */
969 if (vect_print_dump_info (REPORT_DETAILS))
970 fprintf (vect_dump, "Reduce using vector shifts");
972 vec_dest = vect_create_destination_var (scalar_dest, vectype);
973 new_temp = PHI_RESULT (new_phi);
975 for (bit_offset = vec_size_in_bits/2;
976 bit_offset >= element_bitsize;
977 bit_offset /= 2)
979 tree bitpos = size_int (bit_offset);
981 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
982 build2 (shift_code, vectype, new_temp, bitpos));
983 new_name = make_ssa_name (vec_dest, epilog_stmt);
984 TREE_OPERAND (epilog_stmt, 0) = new_name;
985 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
986 if (vect_print_dump_info (REPORT_DETAILS))
987 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
990 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
991 build2 (code, vectype, new_name, new_temp));
992 new_temp = make_ssa_name (vec_dest, epilog_stmt);
993 TREE_OPERAND (epilog_stmt, 0) = new_temp;
994 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
995 if (vect_print_dump_info (REPORT_DETAILS))
996 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
999 extract_scalar_result = true;
1000 adjust_in_epilog = true;
1002 else
1004 /*** Case 3:
1005 Create: s = init;
1006 for (offset=0; offset<vector_size; offset+=element_size;)
1008 Create: s' = extract_field <v_out2, offset>
1009 Create: s = op <s, s'>
1010 } */
1012 if (vect_print_dump_info (REPORT_DETAILS))
1013 fprintf (vect_dump, "Reduce using scalar code. ");
1015 vec_temp = PHI_RESULT (new_phi);
1016 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1018 /* first iteration is peeled out when possible to minimize
1019 the number of operations we generate: */
1020 if (code == PLUS_EXPR
1021 && (integer_zerop (scalar_initial_def)
1022 || real_zerop (scalar_initial_def)))
1024 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1025 bitsize_zero_node);
1027 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1028 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1029 rhs);
1030 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1031 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1032 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1033 if (vect_print_dump_info (REPORT_DETAILS))
1034 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1036 bit_offset = element_bitsize;
1038 else
1040 new_temp = scalar_initial_def;
1041 bit_offset = 0;
1044 for (;
1045 bit_offset < vec_size_in_bits;
1046 bit_offset += element_bitsize)
1048 tree bitpos = bitsize_int (bit_offset);
1049 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1050 bitpos);
1052 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1053 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1054 rhs);
1055 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1056 TREE_OPERAND (epilog_stmt, 0) = new_name;
1057 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1058 if (vect_print_dump_info (REPORT_DETAILS))
1059 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1062 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1063 build2 (code, scalar_type, new_name, new_temp));
1064 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1065 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1066 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1067 if (vect_print_dump_info (REPORT_DETAILS))
1068 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1071 extract_scalar_result = false;
1072 adjust_in_epilog = false;
1077 /* 2.3 Extract the final scalar result. Create:
1078 s_out3 = extract_field <v_out2, bitpos> */
1080 if (extract_scalar_result)
1082 tree rhs;
1084 if (vect_print_dump_info (REPORT_DETAILS))
1085 fprintf (vect_dump, "extract scalar result");
1087 /* The result is in the low order bits. */
1088 if (BITS_BIG_ENDIAN)
1089 bitpos = size_binop (MULT_EXPR,
1090 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1091 TYPE_SIZE (scalar_type));
1092 else
1093 bitpos = bitsize_zero_node;
1095 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1096 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1097 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1098 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1099 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1100 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1101 if (vect_print_dump_info (REPORT_DETAILS))
1102 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1106 /* 2.4 Adjust the final result by the initial value of the reduction
1107 variable. (when such adjustment is not needed, then
1108 'scalar_initial_def' is zero).
1110 Create:
1111 s_out = scalar_expr <s_out, scalar_initial_def> */
1113 if (adjust_in_epilog)
1115 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1116 build2 (code, scalar_type, new_temp, scalar_initial_def));
1117 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1118 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1119 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1121 if (vect_print_dump_info (REPORT_DETAILS))
1122 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1126 /* 2.5 Replace uses of s_out0 with uses of s_out3 */
1128 /* Find the loop-closed-use at the loop exit of the original
1129 scalar result. (The reduction result is expected to have
1130 two immediate uses - one at the latch block, and one at the
1131 loop exit). */
1132 exit_phi = NULL;
1133 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1135 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1137 exit_phi = USE_STMT (use_p);
1138 break;
1142 orig_name = PHI_RESULT (exit_phi);
1144 FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name)
1145 SET_USE (use_p, new_temp);
1149 /* Function vectorizable_reduction.
1151 Check if STMT performs a reduction operation that can be vectorized.
1152 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1153 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1154 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1156 bool
1157 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1159 tree vec_dest;
1160 tree scalar_dest;
1161 tree op0, op1;
1162 tree loop_vec_def;
1163 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1164 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1165 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1166 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1167 tree operation;
1168 enum tree_code code, reduc_code = 0;
1169 enum machine_mode vec_mode;
1170 int op_type;
1171 optab optab, reduc_optab;
1172 tree new_temp;
1173 tree def0, def1, def_stmt0, def_stmt1;
1174 enum vect_def_type dt0, dt1;
1175 tree new_phi;
1176 tree scalar_type;
1177 bool is_simple_use0;
1178 bool is_simple_use1;
1180 /* Is vectorizable reduction? */
1182 /* Not supportable if the reduction variable is used in the loop. */
1183 if (STMT_VINFO_RELEVANT_P (stmt_info))
1184 return false;
1186 if (!STMT_VINFO_LIVE_P (stmt_info))
1187 return false;
1189 /* Make sure it was already recognized as a reduction pattern. */
1190 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1191 return false;
1193 gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1195 operation = TREE_OPERAND (stmt, 1);
1196 code = TREE_CODE (operation);
1197 op_type = TREE_CODE_LENGTH (code);
1199 if (op_type != binary_op)
1200 return false;
1202 op0 = TREE_OPERAND (operation, 0);
1203 op1 = TREE_OPERAND (operation, 1);
1204 scalar_dest = TREE_OPERAND (stmt, 0);
1205 scalar_type = TREE_TYPE (scalar_dest);
1207 /* Check the first operand. It is expected to be defined inside the loop. */
1208 is_simple_use0 =
1209 vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0);
1210 is_simple_use1 =
1211 vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1);
1213 gcc_assert (is_simple_use0);
1214 gcc_assert (is_simple_use1);
1215 gcc_assert (dt0 == vect_loop_def);
1216 gcc_assert (dt1 == vect_reduction_def);
1217 gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE);
1218 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt1));
1220 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt1)))
1221 return false;
1223 /* Supportable by target? */
1225 /* check support for the operation in the loop */
1226 optab = optab_for_tree_code (code, vectype);
1227 if (!optab)
1229 if (vect_print_dump_info (REPORT_DETAILS))
1230 fprintf (vect_dump, "no optab.");
1231 return false;
1233 vec_mode = TYPE_MODE (vectype);
1234 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1236 if (vect_print_dump_info (REPORT_DETAILS))
1237 fprintf (vect_dump, "op not supported by target.");
1238 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1239 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1240 < vect_min_worthwhile_factor (code))
1241 return false;
1242 if (vect_print_dump_info (REPORT_DETAILS))
1243 fprintf (vect_dump, "proceeding using word mode.");
1246 /* Worthwhile without SIMD support? */
1247 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1248 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1249 < vect_min_worthwhile_factor (code))
1251 if (vect_print_dump_info (REPORT_DETAILS))
1252 fprintf (vect_dump, "not worthwhile without SIMD support.");
1253 return false;
1256 /* check support for the epilog operation */
1257 if (!reduction_code_for_scalar_code (code, &reduc_code))
1258 return false;
1259 reduc_optab = optab_for_tree_code (reduc_code, vectype);
1260 if (!reduc_optab)
1262 if (vect_print_dump_info (REPORT_DETAILS))
1263 fprintf (vect_dump, "no optab for reduction.");
1264 reduc_code = NUM_TREE_CODES;
1266 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1268 if (vect_print_dump_info (REPORT_DETAILS))
1269 fprintf (vect_dump, "reduc op not supported by target.");
1270 reduc_code = NUM_TREE_CODES;
1273 if (!vec_stmt) /* transformation not required. */
1275 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1276 return true;
1279 /** Transform. **/
1281 if (vect_print_dump_info (REPORT_DETAILS))
1282 fprintf (vect_dump, "transform reduction.");
1284 /* Create the destination vector */
1285 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1288 /* Create the reduction-phi that defines the reduction-operand. */
1289 new_phi = create_phi_node (vec_dest, loop->header);
1292 /* Prepare the operand that is defined inside the loop body */
1293 loop_vec_def = vect_get_vec_def_for_operand (op0, stmt, NULL);
1294 gcc_assert (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (loop_vec_def))));
1297 /* Create the vectorized operation that computes the partial results */
1298 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1299 build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi)));
1300 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1301 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1302 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1305 /* Finalize the reduction-phi (set it's arguments) and create the
1306 epilog reduction code. */
1307 vect_create_epilog_for_reduction (new_temp, stmt, op1, reduc_code, new_phi);
1308 return true;
1312 /* Function vectorizable_assignment.
1314 Check if STMT performs an assignment (copy) that can be vectorized.
1315 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1316 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1317 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1319 bool
1320 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1322 tree vec_dest;
1323 tree scalar_dest;
1324 tree op;
1325 tree vec_oprnd;
1326 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1327 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1328 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1329 tree new_temp;
1330 tree def, def_stmt;
1331 enum vect_def_type dt;
1333 /* Is vectorizable assignment? */
1334 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1335 return false;
1337 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1339 if (TREE_CODE (stmt) != MODIFY_EXPR)
1340 return false;
1342 scalar_dest = TREE_OPERAND (stmt, 0);
1343 if (TREE_CODE (scalar_dest) != SSA_NAME)
1344 return false;
1346 op = TREE_OPERAND (stmt, 1);
1347 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1349 if (vect_print_dump_info (REPORT_DETAILS))
1350 fprintf (vect_dump, "use not simple.");
1351 return false;
1354 if (!vec_stmt) /* transformation not required. */
1356 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1357 return true;
1360 /** Transform. **/
1361 if (vect_print_dump_info (REPORT_DETAILS))
1362 fprintf (vect_dump, "transform assignment.");
1364 /* Handle def. */
1365 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1367 /* Handle use. */
1368 op = TREE_OPERAND (stmt, 1);
1369 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1371 /* Arguments are ready. create the new vector stmt. */
1372 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1373 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1374 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1375 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1377 return true;
1381 /* Function vect_min_worthwhile_factor.
1383 For a loop where we could vectorize the operation indicated by CODE,
1384 return the minimum vectorization factor that makes it worthwhile
1385 to use generic vectors. */
1386 static int
1387 vect_min_worthwhile_factor (enum tree_code code)
1389 switch (code)
1391 case PLUS_EXPR:
1392 case MINUS_EXPR:
1393 case NEGATE_EXPR:
1394 return 4;
1396 case BIT_AND_EXPR:
1397 case BIT_IOR_EXPR:
1398 case BIT_XOR_EXPR:
1399 case BIT_NOT_EXPR:
1400 return 2;
1402 default:
1403 return INT_MAX;
1408 /* Function vectorizable_operation.
1410 Check if STMT performs a binary or unary operation that can be vectorized.
1411 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1412 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1413 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1415 bool
1416 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1418 tree vec_dest;
1419 tree scalar_dest;
1420 tree operation;
1421 tree op0, op1 = NULL;
1422 tree vec_oprnd0, vec_oprnd1=NULL;
1423 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1424 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1425 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1426 int i;
1427 enum tree_code code;
1428 enum machine_mode vec_mode;
1429 tree new_temp;
1430 int op_type;
1431 tree op;
1432 optab optab;
1433 tree def, def_stmt;
1434 enum vect_def_type dt;
1436 /* Is STMT a vectorizable binary/unary operation? */
1437 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1438 return false;
1440 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1442 if (STMT_VINFO_LIVE_P (stmt_info))
1444 /* FORNOW: not yet supported. */
1445 if (vect_print_dump_info (REPORT_DETAILS))
1446 fprintf (vect_dump, "value used after loop.");
1447 return false;
1450 if (TREE_CODE (stmt) != MODIFY_EXPR)
1451 return false;
1453 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1454 return false;
1456 operation = TREE_OPERAND (stmt, 1);
1457 code = TREE_CODE (operation);
1458 optab = optab_for_tree_code (code, vectype);
1460 /* Support only unary or binary operations. */
1461 op_type = TREE_CODE_LENGTH (code);
1462 if (op_type != unary_op && op_type != binary_op)
1464 if (vect_print_dump_info (REPORT_DETAILS))
1465 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1466 return false;
1469 for (i = 0; i < op_type; i++)
1471 op = TREE_OPERAND (operation, i);
1472 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1474 if (vect_print_dump_info (REPORT_DETAILS))
1475 fprintf (vect_dump, "use not simple.");
1476 return false;
1480 /* Supportable by target? */
1481 if (!optab)
1483 if (vect_print_dump_info (REPORT_DETAILS))
1484 fprintf (vect_dump, "no optab.");
1485 return false;
1487 vec_mode = TYPE_MODE (vectype);
1488 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1490 if (vect_print_dump_info (REPORT_DETAILS))
1491 fprintf (vect_dump, "op not supported by target.");
1492 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1493 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1494 < vect_min_worthwhile_factor (code))
1495 return false;
1496 if (vect_print_dump_info (REPORT_DETAILS))
1497 fprintf (vect_dump, "proceeding using word mode.");
1500 /* Worthwhile without SIMD support? */
1501 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1502 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1503 < vect_min_worthwhile_factor (code))
1505 if (vect_print_dump_info (REPORT_DETAILS))
1506 fprintf (vect_dump, "not worthwhile without SIMD support.");
1507 return false;
1510 if (!vec_stmt) /* transformation not required. */
1512 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1513 return true;
1516 /** Transform. **/
1518 if (vect_print_dump_info (REPORT_DETAILS))
1519 fprintf (vect_dump, "transform binary/unary operation.");
1521 /* Handle def. */
1522 scalar_dest = TREE_OPERAND (stmt, 0);
1523 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1525 /* Handle uses. */
1526 op0 = TREE_OPERAND (operation, 0);
1527 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1529 if (op_type == binary_op)
1531 op1 = TREE_OPERAND (operation, 1);
1532 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1535 /* Arguments are ready. create the new vector stmt. */
1537 if (op_type == binary_op)
1538 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1539 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1540 else
1541 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1542 build1 (code, vectype, vec_oprnd0));
1543 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1544 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1545 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1547 return true;
1551 /* Function vectorizable_store.
1553 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1554 can be vectorized.
1555 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1556 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1557 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1559 bool
1560 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1562 tree scalar_dest;
1563 tree data_ref;
1564 tree op;
1565 tree vec_oprnd1;
1566 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1567 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1568 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1569 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1570 enum machine_mode vec_mode;
1571 tree dummy;
1572 enum dr_alignment_support alignment_support_cheme;
1573 ssa_op_iter iter;
1574 tree def, def_stmt;
1575 enum vect_def_type dt;
1577 /* Is vectorizable store? */
1579 if (TREE_CODE (stmt) != MODIFY_EXPR)
1580 return false;
1582 scalar_dest = TREE_OPERAND (stmt, 0);
1583 if (TREE_CODE (scalar_dest) != ARRAY_REF
1584 && TREE_CODE (scalar_dest) != INDIRECT_REF)
1585 return false;
1587 op = TREE_OPERAND (stmt, 1);
1588 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1590 if (vect_print_dump_info (REPORT_DETAILS))
1591 fprintf (vect_dump, "use not simple.");
1592 return false;
1595 vec_mode = TYPE_MODE (vectype);
1596 /* FORNOW. In some cases can vectorize even if data-type not supported
1597 (e.g. - array initialization with 0). */
1598 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1599 return false;
1601 if (!STMT_VINFO_DATA_REF (stmt_info))
1602 return false;
1605 if (!vec_stmt) /* transformation not required. */
1607 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1608 return true;
1611 /** Transform. **/
1613 if (vect_print_dump_info (REPORT_DETAILS))
1614 fprintf (vect_dump, "transform store");
1616 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1617 gcc_assert (alignment_support_cheme);
1618 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
1620 /* Handle use - get the vectorized def from the defining stmt. */
1621 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1623 /* Handle def. */
1624 /* FORNOW: make sure the data reference is aligned. */
1625 vect_align_data_ref (stmt);
1626 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1627 data_ref = build_fold_indirect_ref (data_ref);
1629 /* Arguments are ready. create the new vector stmt. */
1630 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1631 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1633 /* Copy the V_MAY_DEFS representing the aliasing of the original array
1634 element's definition to the vector's definition then update the
1635 defining statement. The original is being deleted so the same
1636 SSA_NAMEs can be used. */
1637 copy_virtual_operands (*vec_stmt, stmt);
1639 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
1641 SSA_NAME_DEF_STMT (def) = *vec_stmt;
1643 /* If this virtual def has a use outside the loop and a loop peel is
1644 performed then the def may be renamed by the peel. Mark it for
1645 renaming so the later use will also be renamed. */
1646 mark_sym_for_renaming (SSA_NAME_VAR (def));
1649 return true;
1653 /* vectorizable_load.
1655 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1656 can be vectorized.
1657 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1658 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1659 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1661 bool
1662 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1664 tree scalar_dest;
1665 tree vec_dest = NULL;
1666 tree data_ref = NULL;
1667 tree op;
1668 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1669 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1670 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1671 tree new_temp;
1672 int mode;
1673 tree init_addr;
1674 tree new_stmt;
1675 tree dummy;
1676 basic_block new_bb;
1677 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1678 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1679 edge pe = loop_preheader_edge (loop);
1680 enum dr_alignment_support alignment_support_cheme;
1682 /* Is vectorizable load? */
1683 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1684 return false;
1686 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1688 if (STMT_VINFO_LIVE_P (stmt_info))
1690 /* FORNOW: not yet supported. */
1691 if (vect_print_dump_info (REPORT_DETAILS))
1692 fprintf (vect_dump, "value used after loop.");
1693 return false;
1696 if (TREE_CODE (stmt) != MODIFY_EXPR)
1697 return false;
1699 scalar_dest = TREE_OPERAND (stmt, 0);
1700 if (TREE_CODE (scalar_dest) != SSA_NAME)
1701 return false;
1703 op = TREE_OPERAND (stmt, 1);
1704 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1705 return false;
1707 if (!STMT_VINFO_DATA_REF (stmt_info))
1708 return false;
1710 mode = (int) TYPE_MODE (vectype);
1712 /* FORNOW. In some cases can vectorize even if data-type not supported
1713 (e.g. - data copies). */
1714 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1716 if (vect_print_dump_info (REPORT_DETAILS))
1717 fprintf (vect_dump, "Aligned load, but unsupported type.");
1718 return false;
1721 if (!vec_stmt) /* transformation not required. */
1723 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1724 return true;
1727 /** Transform. **/
1729 if (vect_print_dump_info (REPORT_DETAILS))
1730 fprintf (vect_dump, "transform load.");
1732 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1733 gcc_assert (alignment_support_cheme);
1735 if (alignment_support_cheme == dr_aligned
1736 || alignment_support_cheme == dr_unaligned_supported)
1738 /* Create:
1739 p = initial_addr;
1740 indx = 0;
1741 loop {
1742 vec_dest = *(p);
1743 indx = indx + 1;
1747 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1748 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1749 if (aligned_access_p (dr))
1750 data_ref = build_fold_indirect_ref (data_ref);
1751 else
1753 int mis = DR_MISALIGNMENT (dr);
1754 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1755 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1756 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1758 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1759 new_temp = make_ssa_name (vec_dest, new_stmt);
1760 TREE_OPERAND (new_stmt, 0) = new_temp;
1761 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1762 copy_virtual_operands (new_stmt, stmt);
1764 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1766 /* Create:
1767 p1 = initial_addr;
1768 msq_init = *(floor(p1))
1769 p2 = initial_addr + VS - 1;
1770 magic = have_builtin ? builtin_result : initial_address;
1771 indx = 0;
1772 loop {
1773 p2' = p2 + indx * vectype_size
1774 lsq = *(floor(p2'))
1775 vec_dest = realign_load (msq, lsq, magic)
1776 indx = indx + 1;
1777 msq = lsq;
1781 tree offset;
1782 tree magic;
1783 tree phi_stmt;
1784 tree msq_init;
1785 tree msq, lsq;
1786 tree dataref_ptr;
1787 tree params;
1789 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1790 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1791 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1792 &init_addr, true);
1793 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1794 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1795 new_temp = make_ssa_name (vec_dest, new_stmt);
1796 TREE_OPERAND (new_stmt, 0) = new_temp;
1797 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1798 gcc_assert (!new_bb);
1799 msq_init = TREE_OPERAND (new_stmt, 0);
1800 copy_virtual_operands (new_stmt, stmt);
1801 update_vuses_to_preheader (new_stmt, loop);
1804 /* <2> Create lsq = *(floor(p2')) in the loop */
1805 offset = build_int_cst (integer_type_node,
1806 TYPE_VECTOR_SUBPARTS (vectype));
1807 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1808 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1809 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1810 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1811 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1812 new_temp = make_ssa_name (vec_dest, new_stmt);
1813 TREE_OPERAND (new_stmt, 0) = new_temp;
1814 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1815 lsq = TREE_OPERAND (new_stmt, 0);
1816 copy_virtual_operands (new_stmt, stmt);
1819 /* <3> */
1820 if (targetm.vectorize.builtin_mask_for_load)
1822 /* Create permutation mask, if required, in loop preheader. */
1823 tree builtin_decl;
1824 params = build_tree_list (NULL_TREE, init_addr);
1825 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1826 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1827 new_stmt = build_function_call_expr (builtin_decl, params);
1828 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1829 new_temp = make_ssa_name (vec_dest, new_stmt);
1830 TREE_OPERAND (new_stmt, 0) = new_temp;
1831 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1832 gcc_assert (!new_bb);
1833 magic = TREE_OPERAND (new_stmt, 0);
1835 /* The result of the CALL_EXPR to this builtin is determined from
1836 the value of the parameter and no global variables are touched
1837 which makes the builtin a "const" function. Requiring the
1838 builtin to have the "const" attribute makes it unnecessary
1839 to call mark_call_clobbered_vars_to_rename. */
1840 gcc_assert (TREE_READONLY (builtin_decl));
1842 else
1844 /* Use current address instead of init_addr for reduced reg pressure.
1846 magic = dataref_ptr;
1850 /* <4> Create msq = phi <msq_init, lsq> in loop */
1851 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1852 msq = make_ssa_name (vec_dest, NULL_TREE);
1853 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1854 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1855 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1856 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1859 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1860 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1861 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1862 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1863 new_temp = make_ssa_name (vec_dest, new_stmt);
1864 TREE_OPERAND (new_stmt, 0) = new_temp;
1865 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1867 else
1868 gcc_unreachable ();
1870 *vec_stmt = new_stmt;
1871 return true;
1875 /* Function vectorizable_live_operation.
1877 STMT computes a value that is used outside the loop. Check if
1878 it can be supported. */
1880 bool
1881 vectorizable_live_operation (tree stmt,
1882 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1883 tree *vec_stmt ATTRIBUTE_UNUSED)
1885 tree operation;
1886 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1887 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1888 int i;
1889 enum tree_code code;
1890 int op_type;
1891 tree op;
1892 tree def, def_stmt;
1893 enum vect_def_type dt;
1895 if (!STMT_VINFO_LIVE_P (stmt_info))
1896 return false;
1898 if (TREE_CODE (stmt) != MODIFY_EXPR)
1899 return false;
1901 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1902 return false;
1904 operation = TREE_OPERAND (stmt, 1);
1905 code = TREE_CODE (operation);
1907 op_type = TREE_CODE_LENGTH (code);
1909 /* FORNOW: support only if all uses are invariant. This means
1910 that the scalar operations can remain in place, unvectorized.
1911 The original last scalar value that they compute will be used. */
1913 for (i = 0; i < op_type; i++)
1915 op = TREE_OPERAND (operation, i);
1916 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1918 if (vect_print_dump_info (REPORT_DETAILS))
1919 fprintf (vect_dump, "use not simple.");
1920 return false;
1923 if (dt != vect_invariant_def && dt != vect_constant_def)
1924 return false;
1927 /* No transformation is required for the cases we currently support. */
1928 return true;
1932 /* Function vect_is_simple_cond.
1934 Input:
1935 LOOP - the loop that is being vectorized.
1936 COND - Condition that is checked for simple use.
1938 Returns whether a COND can be vectorized. Checks whether
1939 condition operands are supportable using vec_is_simple_use. */
1941 static bool
1942 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
1944 tree lhs, rhs;
1945 tree def;
1946 enum vect_def_type dt;
1948 if (!COMPARISON_CLASS_P (cond))
1949 return false;
1951 lhs = TREE_OPERAND (cond, 0);
1952 rhs = TREE_OPERAND (cond, 1);
1954 if (TREE_CODE (lhs) == SSA_NAME)
1956 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
1957 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
1958 return false;
1960 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
1961 return false;
1963 if (TREE_CODE (rhs) == SSA_NAME)
1965 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
1966 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
1967 return false;
1969 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
1970 return false;
1972 return true;
1975 /* vectorizable_condition.
1977 Check if STMT is conditional modify expression that can be vectorized.
1978 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1979 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
1980 at BSI.
1982 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1984 bool
1985 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1987 tree scalar_dest = NULL_TREE;
1988 tree vec_dest = NULL_TREE;
1989 tree op = NULL_TREE;
1990 tree cond_expr, then_clause, else_clause;
1991 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1992 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1993 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
1994 tree vec_compare, vec_cond_expr;
1995 tree new_temp;
1996 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1997 enum machine_mode vec_mode;
1998 tree def;
1999 enum vect_def_type dt;
2001 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2002 return false;
2004 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2006 if (STMT_VINFO_LIVE_P (stmt_info))
2008 /* FORNOW: not yet supported. */
2009 if (vect_print_dump_info (REPORT_DETAILS))
2010 fprintf (vect_dump, "value used after loop.");
2011 return false;
2014 if (TREE_CODE (stmt) != MODIFY_EXPR)
2015 return false;
2017 op = TREE_OPERAND (stmt, 1);
2019 if (TREE_CODE (op) != COND_EXPR)
2020 return false;
2022 cond_expr = TREE_OPERAND (op, 0);
2023 then_clause = TREE_OPERAND (op, 1);
2024 else_clause = TREE_OPERAND (op, 2);
2026 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
2027 return false;
2029 if (TREE_CODE (then_clause) == SSA_NAME)
2031 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
2032 if (!vect_is_simple_use (then_clause, loop_vinfo,
2033 &then_def_stmt, &def, &dt))
2034 return false;
2036 else if (TREE_CODE (then_clause) != INTEGER_CST
2037 && TREE_CODE (then_clause) != REAL_CST)
2038 return false;
2040 if (TREE_CODE (else_clause) == SSA_NAME)
2042 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
2043 if (!vect_is_simple_use (else_clause, loop_vinfo,
2044 &else_def_stmt, &def, &dt))
2045 return false;
2047 else if (TREE_CODE (else_clause) != INTEGER_CST
2048 && TREE_CODE (else_clause) != REAL_CST)
2049 return false;
2052 vec_mode = TYPE_MODE (vectype);
2054 if (!vec_stmt)
2056 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
2057 return expand_vec_cond_expr_p (op, vec_mode);
2060 /* Transform */
2062 /* Handle def. */
2063 scalar_dest = TREE_OPERAND (stmt, 0);
2064 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2066 /* Handle cond expr. */
2067 vec_cond_lhs =
2068 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
2069 vec_cond_rhs =
2070 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
2071 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
2072 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
2074 /* Arguments are ready. create the new vector stmt. */
2075 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
2076 vec_cond_lhs, vec_cond_rhs);
2077 vec_cond_expr = build (VEC_COND_EXPR, vectype,
2078 vec_compare, vec_then_clause, vec_else_clause);
2080 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
2081 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2082 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2083 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2085 return true;
2088 /* Function vect_transform_stmt.
2090 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2092 bool
2093 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2095 bool is_store = false;
2096 tree vec_stmt = NULL_TREE;
2097 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2098 bool done;
2100 if (STMT_VINFO_RELEVANT_P (stmt_info))
2102 switch (STMT_VINFO_TYPE (stmt_info))
2104 case op_vec_info_type:
2105 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2106 gcc_assert (done);
2107 break;
2109 case assignment_vec_info_type:
2110 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2111 gcc_assert (done);
2112 break;
2114 case load_vec_info_type:
2115 done = vectorizable_load (stmt, bsi, &vec_stmt);
2116 gcc_assert (done);
2117 break;
2119 case store_vec_info_type:
2120 done = vectorizable_store (stmt, bsi, &vec_stmt);
2121 gcc_assert (done);
2122 is_store = true;
2123 break;
2125 case condition_vec_info_type:
2126 done = vectorizable_condition (stmt, bsi, &vec_stmt);
2127 gcc_assert (done);
2128 break;
2130 default:
2131 if (vect_print_dump_info (REPORT_DETAILS))
2132 fprintf (vect_dump, "stmt not supported.");
2133 gcc_unreachable ();
2136 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2139 if (STMT_VINFO_LIVE_P (stmt_info))
2141 switch (STMT_VINFO_TYPE (stmt_info))
2143 case reduc_vec_info_type:
2144 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
2145 gcc_assert (done);
2146 break;
2148 default:
2149 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
2150 gcc_assert (done);
2153 if (vec_stmt)
2155 gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
2156 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2160 return is_store;
2164 /* This function builds ni_name = number of iterations loop executes
2165 on the loop preheader. */
2167 static tree
2168 vect_build_loop_niters (loop_vec_info loop_vinfo)
2170 tree ni_name, stmt, var;
2171 edge pe;
2172 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2173 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2175 var = create_tmp_var (TREE_TYPE (ni), "niters");
2176 add_referenced_tmp_var (var);
2177 ni_name = force_gimple_operand (ni, &stmt, false, var);
2179 pe = loop_preheader_edge (loop);
2180 if (stmt)
2182 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2183 gcc_assert (!new_bb);
2186 return ni_name;
2190 /* This function generates the following statements:
2192 ni_name = number of iterations loop executes
2193 ratio = ni_name / vf
2194 ratio_mult_vf_name = ratio * vf
2196 and places them at the loop preheader edge. */
2198 static void
2199 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2200 tree *ni_name_ptr,
2201 tree *ratio_mult_vf_name_ptr,
2202 tree *ratio_name_ptr)
2205 edge pe;
2206 basic_block new_bb;
2207 tree stmt, ni_name;
2208 tree var;
2209 tree ratio_name;
2210 tree ratio_mult_vf_name;
2211 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2212 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2213 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2214 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
2216 pe = loop_preheader_edge (loop);
2218 /* Generate temporary variable that contains
2219 number of iterations loop executes. */
2221 ni_name = vect_build_loop_niters (loop_vinfo);
2223 /* Create: ratio = ni >> log2(vf) */
2225 var = create_tmp_var (TREE_TYPE (ni), "bnd");
2226 add_referenced_tmp_var (var);
2227 ratio_name = make_ssa_name (var, NULL_TREE);
2228 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2229 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2230 SSA_NAME_DEF_STMT (ratio_name) = stmt;
2232 pe = loop_preheader_edge (loop);
2233 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2234 gcc_assert (!new_bb);
2236 /* Create: ratio_mult_vf = ratio << log2 (vf). */
2238 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2239 add_referenced_tmp_var (var);
2240 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2241 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2242 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2243 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2245 pe = loop_preheader_edge (loop);
2246 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2247 gcc_assert (!new_bb);
2249 *ni_name_ptr = ni_name;
2250 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2251 *ratio_name_ptr = ratio_name;
2253 return;
2257 /* Function update_vuses_to_preheader.
2259 Input:
2260 STMT - a statement with potential VUSEs.
2261 LOOP - the loop whose preheader will contain STMT.
2263 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
2264 appears to be defined in a V_MAY_DEF in another statement in a loop.
2265 One such case is when the VUSE is at the dereference of a __restricted__
2266 pointer in a load and the V_MAY_DEF is at the dereference of a different
2267 __restricted__ pointer in a store. Vectorization may result in
2268 copy_virtual_uses being called to copy the problematic VUSE to a new
2269 statement that is being inserted in the loop preheader. This procedure
2270 is called to change the SSA_NAME in the new statement's VUSE from the
2271 SSA_NAME updated in the loop to the related SSA_NAME available on the
2272 path entering the loop.
2274 When this function is called, we have the following situation:
2276 # vuse <name1>
2277 S1: vload
2278 do {
2279 # name1 = phi < name0 , name2>
2281 # vuse <name1>
2282 S2: vload
2284 # name2 = vdef <name1>
2285 S3: vstore
2287 }while...
2289 Stmt S1 was created in the loop preheader block as part of misaligned-load
2290 handling. This function fixes the name of the vuse of S1 from 'name1' to
2291 'name0'. */
2293 static void
2294 update_vuses_to_preheader (tree stmt, struct loop *loop)
2296 basic_block header_bb = loop->header;
2297 edge preheader_e = loop_preheader_edge (loop);
2298 ssa_op_iter iter;
2299 use_operand_p use_p;
2301 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
2303 tree ssa_name = USE_FROM_PTR (use_p);
2304 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
2305 tree name_var = SSA_NAME_VAR (ssa_name);
2306 basic_block bb = bb_for_stmt (def_stmt);
2308 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
2309 if (!IS_EMPTY_STMT (def_stmt)
2310 && flow_bb_inside_loop_p (loop, bb))
2312 /* If the block containing the statement defining the SSA_NAME
2313 is in the loop then it's necessary to find the definition
2314 outside the loop using the PHI nodes of the header. */
2315 tree phi;
2316 bool updated = false;
2318 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
2320 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
2322 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
2323 updated = true;
2324 break;
2327 gcc_assert (updated);
2333 /* Function vect_update_ivs_after_vectorizer.
2335 "Advance" the induction variables of LOOP to the value they should take
2336 after the execution of LOOP. This is currently necessary because the
2337 vectorizer does not handle induction variables that are used after the
2338 loop. Such a situation occurs when the last iterations of LOOP are
2339 peeled, because:
2340 1. We introduced new uses after LOOP for IVs that were not originally used
2341 after LOOP: the IVs of LOOP are now used by an epilog loop.
2342 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2343 times, whereas the loop IVs should be bumped N times.
2345 Input:
2346 - LOOP - a loop that is going to be vectorized. The last few iterations
2347 of LOOP were peeled.
2348 - NITERS - the number of iterations that LOOP executes (before it is
2349 vectorized). i.e, the number of times the ivs should be bumped.
2350 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2351 coming out from LOOP on which there are uses of the LOOP ivs
2352 (this is the path from LOOP->exit to epilog_loop->preheader).
2354 The new definitions of the ivs are placed in LOOP->exit.
2355 The phi args associated with the edge UPDATE_E in the bb
2356 UPDATE_E->dest are updated accordingly.
2358 Assumption 1: Like the rest of the vectorizer, this function assumes
2359 a single loop exit that has a single predecessor.
2361 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2362 organized in the same order.
2364 Assumption 3: The access function of the ivs is simple enough (see
2365 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2367 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2368 coming out of LOOP on which the ivs of LOOP are used (this is the path
2369 that leads to the epilog loop; other paths skip the epilog loop). This
2370 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2371 needs to have its phis updated.
2374 static void
2375 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
2376 edge update_e)
2378 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2379 basic_block exit_bb = loop->single_exit->dest;
2380 tree phi, phi1;
2381 basic_block update_bb = update_e->dest;
2383 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
2385 /* Make sure there exists a single-predecessor exit bb: */
2386 gcc_assert (single_pred_p (exit_bb));
2388 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2389 phi && phi1;
2390 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2392 tree access_fn = NULL;
2393 tree evolution_part;
2394 tree init_expr;
2395 tree step_expr;
2396 tree var, stmt, ni, ni_name;
2397 block_stmt_iterator last_bsi;
2399 if (vect_print_dump_info (REPORT_DETAILS))
2401 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
2402 print_generic_expr (vect_dump, phi, TDF_SLIM);
2405 /* Skip virtual phi's. */
2406 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2408 if (vect_print_dump_info (REPORT_DETAILS))
2409 fprintf (vect_dump, "virtual phi. skip.");
2410 continue;
2413 /* Skip reduction phis. */
2414 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2416 if (vect_print_dump_info (REPORT_DETAILS))
2417 fprintf (vect_dump, "reduc phi. skip.");
2418 continue;
2421 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2422 gcc_assert (access_fn);
2423 evolution_part =
2424 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2425 gcc_assert (evolution_part != NULL_TREE);
2427 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2428 of degree >= 2 or exponential. */
2429 gcc_assert (!tree_is_chrec (evolution_part));
2431 step_expr = evolution_part;
2432 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2433 loop->num));
2435 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2436 build2 (MULT_EXPR, TREE_TYPE (niters),
2437 niters, step_expr), init_expr);
2439 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2440 add_referenced_tmp_var (var);
2442 ni_name = force_gimple_operand (ni, &stmt, false, var);
2444 /* Insert stmt into exit_bb. */
2445 last_bsi = bsi_last (exit_bb);
2446 if (stmt)
2447 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2449 /* Fix phi expressions in the successor bb. */
2450 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
2455 /* Function vect_do_peeling_for_loop_bound
2457 Peel the last iterations of the loop represented by LOOP_VINFO.
2458 The peeled iterations form a new epilog loop. Given that the loop now
2459 iterates NITERS times, the new epilog loop iterates
2460 NITERS % VECTORIZATION_FACTOR times.
2462 The original loop will later be made to iterate
2463 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
2465 static void
2466 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2467 struct loops *loops)
2469 tree ni_name, ratio_mult_vf_name;
2470 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2471 struct loop *new_loop;
2472 edge update_e;
2473 basic_block preheader;
2474 int loop_num;
2476 if (vect_print_dump_info (REPORT_DETAILS))
2477 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
2479 initialize_original_copy_tables ();
2481 /* Generate the following variables on the preheader of original loop:
2483 ni_name = number of iteration the original loop executes
2484 ratio = ni_name / vf
2485 ratio_mult_vf_name = ratio * vf */
2486 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2487 &ratio_mult_vf_name, ratio);
2489 loop_num = loop->num;
2490 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
2491 ratio_mult_vf_name, ni_name, false);
2492 gcc_assert (new_loop);
2493 gcc_assert (loop_num == loop->num);
2494 #ifdef ENABLE_CHECKING
2495 slpeel_verify_cfg_after_peeling (loop, new_loop);
2496 #endif
2498 /* A guard that controls whether the new_loop is to be executed or skipped
2499 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
2500 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
2501 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
2502 is on the path where the LOOP IVs are used and need to be updated. */
2504 preheader = loop_preheader_edge (new_loop)->src;
2505 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
2506 update_e = EDGE_PRED (preheader, 0);
2507 else
2508 update_e = EDGE_PRED (preheader, 1);
2510 /* Update IVs of original loop as if they were advanced
2511 by ratio_mult_vf_name steps. */
2512 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
2514 /* After peeling we have to reset scalar evolution analyzer. */
2515 scev_reset ();
2517 free_original_copy_tables ();
2521 /* Function vect_gen_niters_for_prolog_loop
2523 Set the number of iterations for the loop represented by LOOP_VINFO
2524 to the minimum between LOOP_NITERS (the original iteration count of the loop)
2525 and the misalignment of DR - the data reference recorded in
2526 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
2527 this loop, the data reference DR will refer to an aligned location.
2529 The following computation is generated:
2531 If the misalignment of DR is known at compile time:
2532 addr_mis = int mis = DR_MISALIGNMENT (dr);
2533 Else, compute address misalignment in bytes:
2534 addr_mis = addr & (vectype_size - 1)
2536 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
2538 (elem_size = element type size; an element is the scalar element
2539 whose type is the inner type of the vectype) */
2541 static tree
2542 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
2544 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2545 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2546 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2547 tree var, stmt;
2548 tree iters, iters_name;
2549 edge pe;
2550 basic_block new_bb;
2551 tree dr_stmt = DR_STMT (dr);
2552 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2553 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2554 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2555 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
2556 tree niters_type = TREE_TYPE (loop_niters);
2558 pe = loop_preheader_edge (loop);
2560 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2562 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2563 int element_size = vectype_align/vf;
2564 int elem_misalign = byte_misalign / element_size;
2566 if (vect_print_dump_info (REPORT_DETAILS))
2567 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
2568 iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
2570 else
2572 tree new_stmts = NULL_TREE;
2573 tree start_addr =
2574 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
2575 tree ptr_type = TREE_TYPE (start_addr);
2576 tree size = TYPE_SIZE (ptr_type);
2577 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2578 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2579 tree elem_size_log =
2580 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
2581 tree vf_tree = build_int_cst (unsigned_type_node, vf);
2582 tree byte_misalign;
2583 tree elem_misalign;
2585 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
2586 gcc_assert (!new_bb);
2588 /* Create: byte_misalign = addr & (vectype_size - 1) */
2589 byte_misalign =
2590 build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
2592 /* Create: elem_misalign = byte_misalign / element_size */
2593 elem_misalign =
2594 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
2596 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
2597 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
2598 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
2599 iters = fold_convert (niters_type, iters);
2602 /* Create: prolog_loop_niters = min (iters, loop_niters) */
2603 /* If the loop bound is known at compile time we already verified that it is
2604 greater than vf; since the misalignment ('iters') is at most vf, there's
2605 no need to generate the MIN_EXPR in this case. */
2606 if (TREE_CODE (loop_niters) != INTEGER_CST)
2607 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
2609 if (vect_print_dump_info (REPORT_DETAILS))
2611 fprintf (vect_dump, "niters for prolog loop: ");
2612 print_generic_expr (vect_dump, iters, TDF_SLIM);
2615 var = create_tmp_var (niters_type, "prolog_loop_niters");
2616 add_referenced_tmp_var (var);
2617 iters_name = force_gimple_operand (iters, &stmt, false, var);
2619 /* Insert stmt on loop preheader edge. */
2620 if (stmt)
2622 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2623 gcc_assert (!new_bb);
2626 return iters_name;
2630 /* Function vect_update_init_of_dr
2632 NITERS iterations were peeled from LOOP. DR represents a data reference
2633 in LOOP. This function updates the information recorded in DR to
2634 account for the fact that the first NITERS iterations had already been
2635 executed. Specifically, it updates the OFFSET field of DR. */
2637 static void
2638 vect_update_init_of_dr (struct data_reference *dr, tree niters)
2640 tree offset = DR_OFFSET (dr);
2642 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
2643 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
2644 DR_OFFSET (dr) = offset;
2648 /* Function vect_update_inits_of_drs
2650 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2651 This function updates the information recorded for the data references in
2652 the loop to account for the fact that the first NITERS iterations had
2653 already been executed. Specifically, it updates the initial_condition of the
2654 access_function of all the data_references in the loop. */
2656 static void
2657 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2659 unsigned int i;
2660 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2662 if (vect_dump && (dump_flags & TDF_DETAILS))
2663 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2665 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
2667 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
2668 vect_update_init_of_dr (dr, niters);
2673 /* Function vect_do_peeling_for_alignment
2675 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2676 'niters' is set to the misalignment of one of the data references in the
2677 loop, thereby forcing it to refer to an aligned location at the beginning
2678 of the execution of this loop. The data reference for which we are
2679 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
2681 static void
2682 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
2684 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2685 tree niters_of_prolog_loop, ni_name;
2686 tree n_iters;
2687 struct loop *new_loop;
2689 if (vect_print_dump_info (REPORT_DETAILS))
2690 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2692 initialize_original_copy_tables ();
2694 ni_name = vect_build_loop_niters (loop_vinfo);
2695 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
2697 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2698 new_loop =
2699 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
2700 niters_of_prolog_loop, ni_name, true);
2701 gcc_assert (new_loop);
2702 #ifdef ENABLE_CHECKING
2703 slpeel_verify_cfg_after_peeling (new_loop, loop);
2704 #endif
2706 /* Update number of times loop executes. */
2707 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2708 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2709 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2711 /* Update the init conditions of the access functions of all data refs. */
2712 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2714 /* After peeling we have to reset scalar evolution analyzer. */
2715 scev_reset ();
2717 free_original_copy_tables ();
2721 /* Function vect_transform_loop.
2723 The analysis phase has determined that the loop is vectorizable.
2724 Vectorize the loop - created vectorized stmts to replace the scalar
2725 stmts in the loop, and update the loop exit condition. */
2727 void
2728 vect_transform_loop (loop_vec_info loop_vinfo,
2729 struct loops *loops ATTRIBUTE_UNUSED)
2731 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2732 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2733 int nbbs = loop->num_nodes;
2734 block_stmt_iterator si;
2735 int i;
2736 tree ratio = NULL;
2737 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2738 bitmap_iterator bi;
2739 unsigned int j;
2741 if (vect_print_dump_info (REPORT_DETAILS))
2742 fprintf (vect_dump, "=== vec_transform_loop ===");
2744 /* CHECKME: we wouldn't need this if we calles update_ssa once
2745 for all loops. */
2746 bitmap_zero (vect_vnames_to_rename);
2748 /* Peel the loop if there are data refs with unknown alignment.
2749 Only one data ref with unknown store is allowed. */
2751 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
2752 vect_do_peeling_for_alignment (loop_vinfo, loops);
2754 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
2755 compile time constant), or it is a constant that doesn't divide by the
2756 vectorization factor, then an epilog loop needs to be created.
2757 We therefore duplicate the loop: the original loop will be vectorized,
2758 and will compute the first (n/VF) iterations. The second copy of the loop
2759 will remain scalar and will compute the remaining (n%VF) iterations.
2760 (VF is the vectorization factor). */
2762 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2763 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2764 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
2765 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
2766 else
2767 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
2768 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
2770 /* 1) Make sure the loop header has exactly two entries
2771 2) Make sure we have a preheader basic block. */
2773 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
2775 loop_split_edge_with (loop_preheader_edge (loop), NULL);
2778 /* FORNOW: the vectorizer supports only loops which body consist
2779 of one basic block (header + empty latch). When the vectorizer will
2780 support more involved loop forms, the order by which the BBs are
2781 traversed need to be reconsidered. */
2783 for (i = 0; i < nbbs; i++)
2785 basic_block bb = bbs[i];
2787 for (si = bsi_start (bb); !bsi_end_p (si);)
2789 tree stmt = bsi_stmt (si);
2790 stmt_vec_info stmt_info;
2791 bool is_store;
2793 if (vect_print_dump_info (REPORT_DETAILS))
2795 fprintf (vect_dump, "------>vectorizing statement: ");
2796 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2798 stmt_info = vinfo_for_stmt (stmt);
2799 gcc_assert (stmt_info);
2800 if (!STMT_VINFO_RELEVANT_P (stmt_info)
2801 && !STMT_VINFO_LIVE_P (stmt_info))
2803 bsi_next (&si);
2804 continue;
2806 /* FORNOW: Verify that all stmts operate on the same number of
2807 units and no inner unrolling is necessary. */
2808 gcc_assert
2809 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
2810 == (unsigned HOST_WIDE_INT) vectorization_factor);
2812 /* -------- vectorize statement ------------ */
2813 if (vect_print_dump_info (REPORT_DETAILS))
2814 fprintf (vect_dump, "transform statement.");
2816 is_store = vect_transform_stmt (stmt, &si);
2817 if (is_store)
2819 /* Free the attached stmt_vec_info and remove the stmt. */
2820 stmt_ann_t ann = stmt_ann (stmt);
2821 free (stmt_info);
2822 set_stmt_info ((tree_ann_t)ann, NULL);
2823 bsi_remove (&si);
2824 continue;
2827 bsi_next (&si);
2828 } /* stmts in BB */
2829 } /* BBs in loop */
2831 slpeel_make_loop_iterate_ntimes (loop, ratio);
2833 EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
2834 mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
2836 /* The memory tags and pointers in vectorized statements need to
2837 have their SSA forms updated. FIXME, why can't this be delayed
2838 until all the loops have been transformed? */
2839 update_ssa (TODO_update_ssa);
2841 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2842 fprintf (vect_dump, "LOOP VECTORIZED.");