This commit was manufactured by cvs2svn to create branch
[official-gcc.git] / gcc / tree-vect-transform.c
blob5f71256e14ea5bf6b36611d8096311bf712416f3
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "errors.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "timevar.h"
36 #include "cfgloop.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
45 #include "toplev.h"
47 /* Utility functions for the code transformation. */
48 static bool vect_transform_stmt (tree, block_stmt_iterator *);
49 static void vect_align_data_ref (tree);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, bool);
53 static tree vect_create_index_for_vector_ref (loop_vec_info);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree);
57 static tree vect_init_vector (tree, tree);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
61 /* Utility function dealing with loop peeling (not peeling itself). */
62 static void vect_generate_tmps_on_preheader
63 (loop_vec_info, tree *, tree *, tree *);
64 static tree vect_build_loop_niters (loop_vec_info);
65 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
66 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
67 static void vect_update_inits_of_dr (struct data_reference *, tree niters);
68 static void vect_update_inits_of_drs (loop_vec_info, tree);
69 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
70 static void vect_do_peeling_for_loop_bound
71 (loop_vec_info, tree *, struct loops *);
74 /* Function vect_get_new_vect_var.
76 Returns a name for a new variable. The current naming scheme appends the
77 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
78 the name of vectorizer generated variables, and appends that to NAME if
79 provided. */
81 static tree
82 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
84 const char *prefix;
85 int prefix_len;
86 tree new_vect_var;
88 if (var_kind == vect_simple_var)
89 prefix = "vect_";
90 else
91 prefix = "vect_p";
93 prefix_len = strlen (prefix);
95 if (name)
96 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
97 else
98 new_vect_var = create_tmp_var (type, prefix);
100 return new_vect_var;
104 /* Function vect_create_index_for_vector_ref.
106 Create (and return) an index variable, along with it's update chain in the
107 loop. This variable will be used to access a memory location in a vector
108 operation.
110 Input:
111 LOOP: The loop being vectorized.
112 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
113 function can be added here, or in the loop pre-header.
115 Output:
116 Return an index that will be used to index a vector array. It is expected
117 that a pointer to the first vector will be used as the base address for the
118 indexed reference.
120 FORNOW: we are not trying to be efficient, just creating a new index each
121 time from scratch. At this time all vector references could use the same
122 index.
124 TODO: create only one index to be used by all vector references. Record
125 the index in the LOOP_VINFO the first time this procedure is called and
126 return it on subsequent calls. The increment of this index must be placed
127 just before the conditional expression that ends the single block loop. */
129 static tree
130 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
132 tree init, step;
133 block_stmt_iterator incr_bsi;
134 bool insert_after;
135 tree indx_before_incr, indx_after_incr;
136 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
137 tree incr;
139 /* It is assumed that the base pointer used for vectorized access contains
140 the address of the first vector. Therefore the index used for vectorized
141 access must be initialized to zero and incremented by 1. */
143 init = integer_zero_node;
144 step = integer_one_node;
146 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
147 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
148 &indx_before_incr, &indx_after_incr);
149 incr = bsi_stmt (incr_bsi);
150 get_stmt_operands (incr);
151 set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
153 return indx_before_incr;
157 /* Function vect_create_addr_base_for_vector_ref.
159 Create an expression that computes the address of the first memory location
160 that will be accessed for a data reference.
162 Input:
163 STMT: The statement containing the data reference.
164 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
165 OFFSET: Optional. If supplied, it is be added to the initial address.
167 Output:
168 1. Return an SSA_NAME whose value is the address of the memory location of
169 the first vector of the data reference.
170 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
171 these statement(s) which define the returned SSA_NAME.
173 FORNOW: We are only handling array accesses with step 1. */
175 static tree
176 vect_create_addr_base_for_vector_ref (tree stmt,
177 tree *new_stmt_list,
178 tree offset)
180 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
181 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
182 tree data_ref_base =
183 unshare_expr (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
184 tree base_name = build_fold_indirect_ref (data_ref_base);
185 tree ref = DR_REF (dr);
186 tree scalar_type = TREE_TYPE (ref);
187 tree scalar_ptr_type = build_pointer_type (scalar_type);
188 tree vec_stmt;
189 tree new_temp;
190 tree addr_base, addr_expr;
191 tree dest, new_stmt;
192 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
194 /* Create base_offset */
195 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
196 add_referenced_tmp_var (dest);
197 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
198 append_to_statement_list_force (new_stmt, new_stmt_list);
200 if (offset)
202 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
203 add_referenced_tmp_var (tmp);
204 offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
205 STMT_VINFO_VECT_STEP (stmt_info)));
206 base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset),
207 base_offset, offset));
208 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
209 append_to_statement_list_force (new_stmt, new_stmt_list);
212 /* base + base_offset */
213 addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
214 base_offset));
216 /* addr_expr = addr_base */
217 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
218 get_name (base_name));
219 add_referenced_tmp_var (addr_expr);
220 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
221 new_temp = make_ssa_name (addr_expr, vec_stmt);
222 TREE_OPERAND (vec_stmt, 0) = new_temp;
223 append_to_statement_list_force (vec_stmt, new_stmt_list);
225 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
227 fprintf (vect_dump, "created ");
228 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
230 return new_temp;
234 /* Function vect_align_data_ref.
236 Handle mislignment of a memory accesses.
238 FORNOW: Can't handle misaligned accesses.
239 Make sure that the dataref is aligned. */
241 static void
242 vect_align_data_ref (tree stmt)
244 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
245 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
247 /* FORNOW: can't handle misaligned accesses;
248 all accesses expected to be aligned. */
249 gcc_assert (aligned_access_p (dr));
253 /* Function vect_create_data_ref_ptr.
255 Create a memory reference expression for vector access, to be used in a
256 vector load/store stmt. The reference is based on a new pointer to vector
257 type (vp).
259 Input:
260 1. STMT: a stmt that references memory. Expected to be of the form
261 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
262 2. BSI: block_stmt_iterator where new stmts can be added.
263 3. OFFSET (optional): an offset to be added to the initial address accessed
264 by the data-ref in STMT.
265 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
266 pointing to the initial address.
268 Output:
269 1. Declare a new ptr to vector_type, and have it point to the base of the
270 data reference (initial addressed accessed by the data reference).
271 For example, for vector of type V8HI, the following code is generated:
273 v8hi *vp;
274 vp = (v8hi *)initial_address;
276 if OFFSET is not supplied:
277 initial_address = &a[init];
278 if OFFSET is supplied:
279 initial_address = &a[init + OFFSET];
281 Return the initial_address in INITIAL_ADDRESS.
283 2. Create a data-reference in the loop based on the new vector pointer vp,
284 and using a new index variable 'idx' as follows:
286 vp' = vp + update
288 where if ONLY_INIT is true:
289 update = zero
290 and otherwise
291 update = idx + vector_type_size
293 Return the pointer vp'.
296 FORNOW: handle only aligned and consecutive accesses. */
298 static tree
299 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
300 tree *initial_address, bool only_init)
302 tree base_name;
303 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
304 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
305 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
306 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
307 tree vect_ptr_type;
308 tree vect_ptr;
309 tree tag;
310 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
311 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
312 vuse_optype vuses = STMT_VUSE_OPS (stmt);
313 int nvuses, nv_may_defs, nv_must_defs;
314 int i;
315 tree new_temp;
316 tree vec_stmt;
317 tree new_stmt_list = NULL_TREE;
318 tree idx;
319 edge pe = loop_preheader_edge (loop);
320 basic_block new_bb;
321 tree vect_ptr_init;
322 tree vectype_size;
323 tree ptr_update;
324 tree data_ref_ptr;
325 tree type, tmp, size;
327 base_name = build_fold_indirect_ref (unshare_expr (
328 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info)));
330 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
332 tree data_ref_base = base_name;
333 fprintf (vect_dump, "create array_ref of type: ");
334 print_generic_expr (vect_dump, vectype, TDF_SLIM);
335 if (TREE_CODE (data_ref_base) == VAR_DECL)
336 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
337 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
338 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
339 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
340 fprintf (vect_dump, " vectorizing a record based array ref: ");
341 else if (TREE_CODE (data_ref_base) == SSA_NAME)
342 fprintf (vect_dump, " vectorizing a pointer ref: ");
343 print_generic_expr (vect_dump, base_name, TDF_SLIM);
346 /** (1) Create the new vector-pointer variable: **/
348 vect_ptr_type = build_pointer_type (vectype);
349 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
350 get_name (base_name));
351 add_referenced_tmp_var (vect_ptr);
354 /** (2) Handle aliasing information of the new vector-pointer: **/
356 tag = STMT_VINFO_MEMTAG (stmt_info);
357 gcc_assert (tag);
358 get_var_ann (vect_ptr)->type_mem_tag = tag;
360 /* Mark for renaming all aliased variables
361 (i.e, the may-aliases of the type-mem-tag). */
362 nvuses = NUM_VUSES (vuses);
363 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
364 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
365 for (i = 0; i < nvuses; i++)
367 tree use = VUSE_OP (vuses, i);
368 if (TREE_CODE (use) == SSA_NAME)
369 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
371 for (i = 0; i < nv_may_defs; i++)
373 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
374 if (TREE_CODE (def) == SSA_NAME)
375 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
377 for (i = 0; i < nv_must_defs; i++)
379 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
380 if (TREE_CODE (def) == SSA_NAME)
381 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
385 /** (3) Calculate the initial address the vector-pointer, and set
386 the vector-pointer to point to it before the loop: **/
388 /* Create: (&(base[init_val+offset]) in the loop preheader. */
389 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
390 offset);
391 pe = loop_preheader_edge (loop);
392 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
393 gcc_assert (!new_bb);
394 *initial_address = new_temp;
396 /* Create: p = (vectype *) initial_base */
397 vec_stmt = fold_convert (vect_ptr_type, new_temp);
398 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
399 new_temp = make_ssa_name (vect_ptr, vec_stmt);
400 TREE_OPERAND (vec_stmt, 0) = new_temp;
401 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
402 gcc_assert (!new_bb);
403 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
406 /** (4) Handle the updating of the vector-pointer inside the loop: **/
408 if (only_init) /* No update in loop is required. */
409 return vect_ptr_init;
411 idx = vect_create_index_for_vector_ref (loop_vinfo);
413 /* Create: update = idx * vectype_size */
414 tmp = create_tmp_var (integer_type_node, "update");
415 add_referenced_tmp_var (tmp);
416 size = TYPE_SIZE (vect_ptr_type);
417 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
418 ptr_update = create_tmp_var (type, "update");
419 add_referenced_tmp_var (ptr_update);
420 vectype_size = TYPE_SIZE_UNIT (vectype);
421 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
422 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
423 new_temp = make_ssa_name (tmp, vec_stmt);
424 TREE_OPERAND (vec_stmt, 0) = new_temp;
425 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
426 vec_stmt = fold_convert (type, new_temp);
427 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
428 new_temp = make_ssa_name (ptr_update, vec_stmt);
429 TREE_OPERAND (vec_stmt, 0) = new_temp;
430 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
432 /* Create: data_ref_ptr = vect_ptr_init + update */
433 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
434 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
435 new_temp = make_ssa_name (vect_ptr, vec_stmt);
436 TREE_OPERAND (vec_stmt, 0) = new_temp;
437 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
438 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
440 return data_ref_ptr;
444 /* Function vect_create_destination_var.
446 Create a new temporary of type VECTYPE. */
448 static tree
449 vect_create_destination_var (tree scalar_dest, tree vectype)
451 tree vec_dest;
452 const char *new_name;
454 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
456 new_name = get_name (scalar_dest);
457 if (!new_name)
458 new_name = "var_";
459 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
460 add_referenced_tmp_var (vec_dest);
462 return vec_dest;
466 /* Function vect_init_vector.
468 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
469 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
470 used in the vectorization of STMT. */
472 static tree
473 vect_init_vector (tree stmt, tree vector_var)
475 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
476 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
477 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
478 tree new_var;
479 tree init_stmt;
480 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
481 tree vec_oprnd;
482 edge pe;
483 tree new_temp;
484 basic_block new_bb;
486 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
487 add_referenced_tmp_var (new_var);
489 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
490 new_temp = make_ssa_name (new_var, init_stmt);
491 TREE_OPERAND (init_stmt, 0) = new_temp;
493 pe = loop_preheader_edge (loop);
494 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
495 gcc_assert (!new_bb);
497 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
499 fprintf (vect_dump, "created new init_stmt: ");
500 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
503 vec_oprnd = TREE_OPERAND (init_stmt, 0);
504 return vec_oprnd;
508 /* Function vect_get_vec_def_for_operand.
510 OP is an operand in STMT. This function returns a (vector) def that will be
511 used in the vectorized stmt for STMT.
513 In the case that OP is an SSA_NAME which is defined in the loop, then
514 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
516 In case OP is an invariant or constant, a new stmt that creates a vector def
517 needs to be introduced. */
519 static tree
520 vect_get_vec_def_for_operand (tree op, tree stmt)
522 tree vec_oprnd;
523 tree vec_stmt;
524 tree def_stmt;
525 stmt_vec_info def_stmt_info = NULL;
526 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
527 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
528 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
529 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
530 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
531 basic_block bb;
532 tree vec_inv;
533 tree t = NULL_TREE;
534 tree def;
535 int i;
537 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
539 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
540 print_generic_expr (vect_dump, op, TDF_SLIM);
543 /** ===> Case 1: operand is a constant. **/
545 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
547 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
549 tree vec_cst;
551 /* Build a tree with vector elements. */
552 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
553 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
555 for (i = nunits - 1; i >= 0; --i)
557 t = tree_cons (NULL_TREE, op, t);
559 vec_cst = build_vector (vectype, t);
560 return vect_init_vector (stmt, vec_cst);
563 gcc_assert (TREE_CODE (op) == SSA_NAME);
565 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
567 def_stmt = SSA_NAME_DEF_STMT (op);
568 def_stmt_info = vinfo_for_stmt (def_stmt);
570 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
572 fprintf (vect_dump, "vect_get_vec_def_for_operand: def_stmt: ");
573 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
577 /** ==> Case 2.1: operand is defined inside the loop. **/
579 if (def_stmt_info)
581 /* Get the def from the vectorized stmt. */
583 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
584 gcc_assert (vec_stmt);
585 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
586 return vec_oprnd;
590 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
591 it is a reduction/induction. **/
593 bb = bb_for_stmt (def_stmt);
594 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
596 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
597 fprintf (vect_dump, "reduction/induction - unsupported.");
598 internal_error ("no support for reduction/induction"); /* FORNOW */
602 /** ==> Case 2.3: operand is defined outside the loop -
603 it is a loop invariant. */
605 switch (TREE_CODE (def_stmt))
607 case PHI_NODE:
608 def = PHI_RESULT (def_stmt);
609 break;
610 case MODIFY_EXPR:
611 def = TREE_OPERAND (def_stmt, 0);
612 break;
613 case NOP_EXPR:
614 def = TREE_OPERAND (def_stmt, 0);
615 gcc_assert (IS_EMPTY_STMT (def_stmt));
616 def = op;
617 break;
618 default:
619 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
621 fprintf (vect_dump, "unsupported defining stmt: ");
622 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
624 internal_error ("unsupported defining stmt");
627 /* Build a tree with vector elements.
628 Create 'vec_inv = {inv,inv,..,inv}' */
630 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
631 fprintf (vect_dump, "Create vector_inv.");
633 for (i = nunits - 1; i >= 0; --i)
635 t = tree_cons (NULL_TREE, def, t);
638 vec_inv = build_constructor (vectype, t);
639 return vect_init_vector (stmt, vec_inv);
643 /* Function vect_finish_stmt_generation.
645 Insert a new stmt. */
647 static void
648 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
650 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
652 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
654 fprintf (vect_dump, "add new stmt: ");
655 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
658 #ifdef ENABLE_CHECKING
659 /* Make sure bsi points to the stmt that is being vectorized. */
660 gcc_assert (stmt == bsi_stmt (*bsi));
661 #endif
663 #ifdef USE_MAPPED_LOCATION
664 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCUS (stmt));
665 #else
666 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
667 #endif
671 /* Function vectorizable_assignment.
673 Check if STMT performs an assignment (copy) that can be vectorized.
674 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
675 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
676 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
678 bool
679 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
681 tree vec_dest;
682 tree scalar_dest;
683 tree op;
684 tree vec_oprnd;
685 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
686 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
687 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
688 tree new_temp;
690 /* Is vectorizable assignment? */
692 if (TREE_CODE (stmt) != MODIFY_EXPR)
693 return false;
695 scalar_dest = TREE_OPERAND (stmt, 0);
696 if (TREE_CODE (scalar_dest) != SSA_NAME)
697 return false;
699 op = TREE_OPERAND (stmt, 1);
700 if (!vect_is_simple_use (op, loop_vinfo, NULL))
702 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
703 fprintf (vect_dump, "use not simple.");
704 return false;
707 if (!vec_stmt) /* transformation not required. */
709 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
710 return true;
713 /** Transform. **/
714 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
715 fprintf (vect_dump, "transform assignment.");
717 /* Handle def. */
718 vec_dest = vect_create_destination_var (scalar_dest, vectype);
720 /* Handle use. */
721 op = TREE_OPERAND (stmt, 1);
722 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
724 /* Arguments are ready. create the new vector stmt. */
725 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
726 new_temp = make_ssa_name (vec_dest, *vec_stmt);
727 TREE_OPERAND (*vec_stmt, 0) = new_temp;
728 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
730 return true;
734 /* Function vectorizable_operation.
736 Check if STMT performs a binary or unary operation that can be vectorized.
737 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
738 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
739 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
741 bool
742 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
744 tree vec_dest;
745 tree scalar_dest;
746 tree operation;
747 tree op0, op1 = NULL;
748 tree vec_oprnd0, vec_oprnd1=NULL;
749 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
750 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
751 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
752 int i;
753 enum tree_code code;
754 enum machine_mode vec_mode;
755 tree new_temp;
756 int op_type;
757 tree op;
758 optab optab;
760 /* Is STMT a vectorizable binary/unary operation? */
761 if (TREE_CODE (stmt) != MODIFY_EXPR)
762 return false;
764 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
765 return false;
767 operation = TREE_OPERAND (stmt, 1);
768 code = TREE_CODE (operation);
769 optab = optab_for_tree_code (code, vectype);
771 /* Support only unary or binary operations. */
772 op_type = TREE_CODE_LENGTH (code);
773 if (op_type != unary_op && op_type != binary_op)
775 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
776 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
777 return false;
780 for (i = 0; i < op_type; i++)
782 op = TREE_OPERAND (operation, i);
783 if (!vect_is_simple_use (op, loop_vinfo, NULL))
785 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
786 fprintf (vect_dump, "use not simple.");
787 return false;
791 /* Supportable by target? */
792 if (!optab)
794 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
795 fprintf (vect_dump, "no optab.");
796 return false;
798 vec_mode = TYPE_MODE (vectype);
799 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
801 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
802 fprintf (vect_dump, "op not supported by target.");
803 return false;
806 if (!vec_stmt) /* transformation not required. */
808 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
809 return true;
812 /** Transform. **/
814 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
815 fprintf (vect_dump, "transform binary/unary operation.");
817 /* Handle def. */
818 scalar_dest = TREE_OPERAND (stmt, 0);
819 vec_dest = vect_create_destination_var (scalar_dest, vectype);
821 /* Handle uses. */
822 op0 = TREE_OPERAND (operation, 0);
823 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
825 if (op_type == binary_op)
827 op1 = TREE_OPERAND (operation, 1);
828 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
831 /* Arguments are ready. create the new vector stmt. */
833 if (op_type == binary_op)
834 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
835 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
836 else
837 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
838 build1 (code, vectype, vec_oprnd0));
839 new_temp = make_ssa_name (vec_dest, *vec_stmt);
840 TREE_OPERAND (*vec_stmt, 0) = new_temp;
841 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
843 return true;
847 /* Function vectorizable_store.
849 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
850 can be vectorized.
851 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
852 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
853 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
855 bool
856 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
858 tree scalar_dest;
859 tree data_ref;
860 tree op;
861 tree vec_oprnd1;
862 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
863 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
864 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
865 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
866 enum machine_mode vec_mode;
867 tree dummy;
868 enum dr_alignment_support alignment_support_cheme;
870 /* Is vectorizable store? */
872 if (TREE_CODE (stmt) != MODIFY_EXPR)
873 return false;
875 scalar_dest = TREE_OPERAND (stmt, 0);
876 if (TREE_CODE (scalar_dest) != ARRAY_REF
877 && TREE_CODE (scalar_dest) != INDIRECT_REF)
878 return false;
880 op = TREE_OPERAND (stmt, 1);
881 if (!vect_is_simple_use (op, loop_vinfo, NULL))
883 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
884 fprintf (vect_dump, "use not simple.");
885 return false;
888 vec_mode = TYPE_MODE (vectype);
889 /* FORNOW. In some cases can vectorize even if data-type not supported
890 (e.g. - array initialization with 0). */
891 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
892 return false;
894 if (!STMT_VINFO_DATA_REF (stmt_info))
895 return false;
898 if (!vec_stmt) /* transformation not required. */
900 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
901 return true;
904 /** Transform. **/
906 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
907 fprintf (vect_dump, "transform store");
909 alignment_support_cheme = vect_supportable_dr_alignment (dr);
910 gcc_assert (alignment_support_cheme);
911 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
913 /* Handle use - get the vectorized def from the defining stmt. */
914 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
916 /* Handle def. */
917 /* FORNOW: make sure the data reference is aligned. */
918 vect_align_data_ref (stmt);
919 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
920 data_ref = build_fold_indirect_ref (data_ref);
922 /* Arguments are ready. create the new vector stmt. */
923 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
924 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
926 return true;
930 /* vectorizable_load.
932 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
933 can be vectorized.
934 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
935 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
936 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
938 bool
939 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
941 tree scalar_dest;
942 tree vec_dest = NULL;
943 tree data_ref = NULL;
944 tree op;
945 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
946 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
947 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
948 tree new_temp;
949 int mode;
950 tree init_addr;
951 tree new_stmt;
952 tree dummy;
953 basic_block new_bb;
954 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
955 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
956 edge pe = loop_preheader_edge (loop);
957 enum dr_alignment_support alignment_support_cheme;
959 /* Is vectorizable load? */
961 if (TREE_CODE (stmt) != MODIFY_EXPR)
962 return false;
964 scalar_dest = TREE_OPERAND (stmt, 0);
965 if (TREE_CODE (scalar_dest) != SSA_NAME)
966 return false;
968 op = TREE_OPERAND (stmt, 1);
969 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
970 return false;
972 if (!STMT_VINFO_DATA_REF (stmt_info))
973 return false;
975 mode = (int) TYPE_MODE (vectype);
977 /* FORNOW. In some cases can vectorize even if data-type not supported
978 (e.g. - data copies). */
979 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
981 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
982 fprintf (vect_dump, "Aligned load, but unsupported type.");
983 return false;
986 if (!vec_stmt) /* transformation not required. */
988 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
989 return true;
992 /** Transform. **/
994 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
995 fprintf (vect_dump, "transform load.");
997 alignment_support_cheme = vect_supportable_dr_alignment (dr);
998 gcc_assert (alignment_support_cheme);
1000 if (alignment_support_cheme == dr_aligned
1001 || alignment_support_cheme == dr_unaligned_supported)
1003 /* Create:
1004 p = initial_addr;
1005 indx = 0;
1006 loop {
1007 vec_dest = *(p);
1008 indx = indx + 1;
1012 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1013 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1014 if (aligned_access_p (dr))
1015 data_ref = build_fold_indirect_ref (data_ref);
1016 else
1018 int mis = DR_MISALIGNMENT (dr);
1019 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1020 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1021 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1023 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1024 new_temp = make_ssa_name (vec_dest, new_stmt);
1025 TREE_OPERAND (new_stmt, 0) = new_temp;
1026 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1028 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1030 /* Create:
1031 p1 = initial_addr;
1032 msq_init = *(floor(p1))
1033 p2 = initial_addr + VS - 1;
1034 magic = have_builtin ? builtin_result : initial_address;
1035 indx = 0;
1036 loop {
1037 p2' = p2 + indx * vectype_size
1038 lsq = *(floor(p2'))
1039 vec_dest = realign_load (msq, lsq, magic)
1040 indx = indx + 1;
1041 msq = lsq;
1045 tree offset;
1046 tree magic;
1047 tree phi_stmt;
1048 tree msq_init;
1049 tree msq, lsq;
1050 tree dataref_ptr;
1051 tree params;
1053 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1054 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1055 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1056 &init_addr, true);
1057 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1058 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1059 new_temp = make_ssa_name (vec_dest, new_stmt);
1060 TREE_OPERAND (new_stmt, 0) = new_temp;
1061 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1062 gcc_assert (!new_bb);
1063 msq_init = TREE_OPERAND (new_stmt, 0);
1066 /* <2> Create lsq = *(floor(p2')) in the loop */
1067 offset = build_int_cst (integer_type_node,
1068 GET_MODE_NUNITS (TYPE_MODE (vectype)));
1069 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1070 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1071 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1072 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1073 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1074 new_temp = make_ssa_name (vec_dest, new_stmt);
1075 TREE_OPERAND (new_stmt, 0) = new_temp;
1076 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1077 lsq = TREE_OPERAND (new_stmt, 0);
1080 /* <3> */
1081 if (targetm.vectorize.builtin_mask_for_load)
1083 /* Create permutation mask, if required, in loop preheader. */
1084 tree builtin_decl;
1085 params = build_tree_list (NULL_TREE, init_addr);
1086 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1087 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1088 new_stmt = build_function_call_expr (builtin_decl, params);
1089 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1090 new_temp = make_ssa_name (vec_dest, new_stmt);
1091 TREE_OPERAND (new_stmt, 0) = new_temp;
1092 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1093 gcc_assert (!new_bb);
1094 magic = TREE_OPERAND (new_stmt, 0);
1096 /* Since we have just created a CALL_EXPR, we may need to
1097 rename call-clobbered variables. */
1098 mark_call_clobbered_vars_to_rename ();
1100 else
1102 /* Use current address instead of init_addr for reduced reg pressure.
1104 magic = dataref_ptr;
1108 /* <4> Create msq = phi <msq_init, lsq> in loop */
1109 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1110 msq = make_ssa_name (vec_dest, NULL_TREE);
1111 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1112 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1113 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1114 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1117 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1118 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1119 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1120 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1121 new_temp = make_ssa_name (vec_dest, new_stmt);
1122 TREE_OPERAND (new_stmt, 0) = new_temp;
1123 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1125 else
1126 gcc_unreachable ();
1128 *vec_stmt = new_stmt;
1129 return true;
1133 /* Function vect_transform_stmt.
1135 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1137 bool
1138 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
1140 bool is_store = false;
1141 tree vec_stmt = NULL_TREE;
1142 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1143 bool done;
1145 switch (STMT_VINFO_TYPE (stmt_info))
1147 case op_vec_info_type:
1148 done = vectorizable_operation (stmt, bsi, &vec_stmt);
1149 gcc_assert (done);
1150 break;
1152 case assignment_vec_info_type:
1153 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
1154 gcc_assert (done);
1155 break;
1157 case load_vec_info_type:
1158 done = vectorizable_load (stmt, bsi, &vec_stmt);
1159 gcc_assert (done);
1160 break;
1162 case store_vec_info_type:
1163 done = vectorizable_store (stmt, bsi, &vec_stmt);
1164 gcc_assert (done);
1165 is_store = true;
1166 break;
1167 default:
1168 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1169 fprintf (vect_dump, "stmt not supported.");
1170 gcc_unreachable ();
1173 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1175 return is_store;
1179 /* This function builds ni_name = number of iterations loop executes
1180 on the loop preheader. */
1182 static tree
1183 vect_build_loop_niters (loop_vec_info loop_vinfo)
1185 tree ni_name, stmt, var;
1186 edge pe;
1187 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1188 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1190 var = create_tmp_var (TREE_TYPE (ni), "niters");
1191 add_referenced_tmp_var (var);
1192 ni_name = force_gimple_operand (ni, &stmt, false, var);
1194 pe = loop_preheader_edge (loop);
1195 if (stmt)
1197 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1198 gcc_assert (!new_bb);
1201 return ni_name;
1205 /* This function generates the following statements:
1207 ni_name = number of iterations loop executes
1208 ratio = ni_name / vf
1209 ratio_mult_vf_name = ratio * vf
1211 and places them at the loop preheader edge. */
1213 static void
1214 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1215 tree *ni_name_ptr,
1216 tree *ratio_mult_vf_name_ptr,
1217 tree *ratio_name_ptr)
1220 edge pe;
1221 basic_block new_bb;
1222 tree stmt, ni_name;
1223 tree var;
1224 tree ratio_name;
1225 tree ratio_mult_vf_name;
1226 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1227 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1228 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1229 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
1231 pe = loop_preheader_edge (loop);
1233 /* Generate temporary variable that contains
1234 number of iterations loop executes. */
1236 ni_name = vect_build_loop_niters (loop_vinfo);
1238 /* Create: ratio = ni >> log2(vf) */
1240 var = create_tmp_var (TREE_TYPE (ni), "bnd");
1241 add_referenced_tmp_var (var);
1242 ratio_name = make_ssa_name (var, NULL_TREE);
1243 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
1244 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
1245 SSA_NAME_DEF_STMT (ratio_name) = stmt;
1247 pe = loop_preheader_edge (loop);
1248 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1249 gcc_assert (!new_bb);
1251 /* Create: ratio_mult_vf = ratio << log2 (vf). */
1253 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1254 add_referenced_tmp_var (var);
1255 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
1256 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
1257 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
1258 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
1260 pe = loop_preheader_edge (loop);
1261 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1262 gcc_assert (!new_bb);
1264 *ni_name_ptr = ni_name;
1265 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1266 *ratio_name_ptr = ratio_name;
1268 return;
1272 /* Function vect_update_ivs_after_vectorizer.
1274 "Advance" the induction variables of LOOP to the value they should take
1275 after the execution of LOOP. This is currently necessary because the
1276 vectorizer does not handle induction variables that are used after the
1277 loop. Such a situation occurs when the last iterations of LOOP are
1278 peeled, because:
1279 1. We introduced new uses after LOOP for IVs that were not originally used
1280 after LOOP: the IVs of LOOP are now used by an epilog loop.
1281 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1282 times, whereas the loop IVs should be bumped N times.
1284 Input:
1285 - LOOP - a loop that is going to be vectorized. The last few iterations
1286 of LOOP were peeled.
1287 - NITERS - the number of iterations that LOOP executes (before it is
1288 vectorized). i.e, the number of times the ivs should be bumped.
1289 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1290 coming out from LOOP on which there are uses of the LOOP ivs
1291 (this is the path from LOOP->exit to epilog_loop->preheader).
1293 The new definitions of the ivs are placed in LOOP->exit.
1294 The phi args associated with the edge UPDATE_E in the bb
1295 UPDATE_E->dest are updated accordingly.
1297 Assumption 1: Like the rest of the vectorizer, this function assumes
1298 a single loop exit that has a single predecessor.
1300 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1301 organized in the same order.
1303 Assumption 3: The access function of the ivs is simple enough (see
1304 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1306 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1307 coming out of LOOP on which the ivs of LOOP are used (this is the path
1308 that leads to the epilog loop; other paths skip the epilog loop). This
1309 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1310 needs to have its phis updated.
1313 static void
1314 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1315 edge update_e)
1317 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1318 basic_block exit_bb = loop->exit_edges[0]->dest;
1319 tree phi, phi1;
1320 basic_block update_bb = update_e->dest;
1322 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1324 /* Make sure there exists a single-predecessor exit bb: */
1325 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
1327 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
1328 phi && phi1;
1329 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
1331 tree access_fn = NULL;
1332 tree evolution_part;
1333 tree init_expr;
1334 tree step_expr;
1335 tree var, stmt, ni, ni_name;
1336 block_stmt_iterator last_bsi;
1338 /* Skip virtual phi's. */
1339 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1341 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1342 fprintf (vect_dump, "virtual phi. skip.");
1343 continue;
1346 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1347 gcc_assert (access_fn);
1348 evolution_part =
1349 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
1350 gcc_assert (evolution_part != NULL_TREE);
1352 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1353 of degree >= 2 or exponential. */
1354 gcc_assert (!tree_is_chrec (evolution_part));
1356 step_expr = evolution_part;
1357 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1358 loop->num));
1360 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
1361 build2 (MULT_EXPR, TREE_TYPE (niters),
1362 niters, step_expr), init_expr);
1364 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
1365 add_referenced_tmp_var (var);
1367 ni_name = force_gimple_operand (ni, &stmt, false, var);
1369 /* Insert stmt into exit_bb. */
1370 last_bsi = bsi_last (exit_bb);
1371 if (stmt)
1372 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
1374 /* Fix phi expressions in the successor bb. */
1375 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
1376 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
1377 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
1382 /* Function vect_do_peeling_for_loop_bound
1384 Peel the last iterations of the loop represented by LOOP_VINFO.
1385 The peeled iterations form a new epilog loop. Given that the loop now
1386 iterates NITERS times, the new epilog loop iterates
1387 NITERS % VECTORIZATION_FACTOR times.
1389 The original loop will later be made to iterate
1390 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
1392 static void
1393 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1394 struct loops *loops)
1397 tree ni_name, ratio_mult_vf_name;
1398 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1399 struct loop *new_loop;
1400 edge update_e;
1401 #ifdef ENABLE_CHECKING
1402 int loop_num;
1403 #endif
1405 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1406 fprintf (vect_dump, "=== vect_transtorm_for_unknown_loop_bound ===");
1408 /* Generate the following variables on the preheader of original loop:
1410 ni_name = number of iteration the original loop executes
1411 ratio = ni_name / vf
1412 ratio_mult_vf_name = ratio * vf */
1413 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1414 &ratio_mult_vf_name, ratio);
1416 /* Update loop info. */
1417 loop->pre_header = loop_preheader_edge (loop)->src;
1418 loop->pre_header_edges[0] = loop_preheader_edge (loop);
1420 #ifdef ENABLE_CHECKING
1421 loop_num = loop->num;
1422 #endif
1423 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
1424 ratio_mult_vf_name, ni_name, false);
1425 #ifdef ENABLE_CHECKING
1426 gcc_assert (new_loop);
1427 gcc_assert (loop_num == loop->num);
1428 slpeel_verify_cfg_after_peeling (loop, new_loop);
1429 #endif
1431 /* A guard that controls whether the new_loop is to be executed or skipped
1432 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1433 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1434 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1435 is on the path where the LOOP IVs are used and need to be updated. */
1437 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
1438 update_e = EDGE_PRED (new_loop->pre_header, 0);
1439 else
1440 update_e = EDGE_PRED (new_loop->pre_header, 1);
1442 /* Update IVs of original loop as if they were advanced
1443 by ratio_mult_vf_name steps. */
1444 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1446 /* After peeling we have to reset scalar evolution analyzer. */
1447 scev_reset ();
1449 return;
1453 /* Function vect_gen_niters_for_prolog_loop
1455 Set the number of iterations for the loop represented by LOOP_VINFO
1456 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1457 and the misalignment of DR - the first data reference recorded in
1458 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
1459 this loop, the data reference DR will refer to an aligned location.
1461 The following computation is generated:
1463 compute address misalignment in bytes:
1464 addr_mis = addr & (vectype_size - 1)
1466 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
1468 (elem_size = element type size; an element is the scalar element
1469 whose type is the inner type of the vectype) */
1471 static tree
1472 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
1474 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1475 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1476 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1477 tree var, stmt;
1478 tree iters, iters_name;
1479 edge pe;
1480 basic_block new_bb;
1481 tree dr_stmt = DR_STMT (dr);
1482 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1483 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1484 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1485 tree elem_misalign;
1486 tree byte_misalign;
1487 tree new_stmts = NULL_TREE;
1488 tree start_addr =
1489 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
1490 tree ptr_type = TREE_TYPE (start_addr);
1491 tree size = TYPE_SIZE (ptr_type);
1492 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
1493 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
1494 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
1495 tree niters_type = TREE_TYPE (loop_niters);
1496 tree elem_size_log =
1497 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
1498 tree vf_tree = build_int_cst (unsigned_type_node, vf);
1500 pe = loop_preheader_edge (loop);
1501 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
1502 gcc_assert (!new_bb);
1504 /* Create: byte_misalign = addr & (vectype_size - 1) */
1505 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
1507 /* Create: elem_misalign = byte_misalign / element_size */
1508 elem_misalign =
1509 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
1511 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
1512 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
1513 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
1514 iters = fold_convert (niters_type, iters);
1516 /* Create: prolog_loop_niters = min (iters, loop_niters) */
1517 /* If the loop bound is known at compile time we already verified that it is
1518 greater than vf; since the misalignment ('iters') is at most vf, there's
1519 no need to generate the MIN_EXPR in this case. */
1520 if (TREE_CODE (loop_niters) != INTEGER_CST)
1521 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
1523 var = create_tmp_var (niters_type, "prolog_loop_niters");
1524 add_referenced_tmp_var (var);
1525 iters_name = force_gimple_operand (iters, &stmt, false, var);
1527 /* Insert stmt on loop preheader edge. */
1528 pe = loop_preheader_edge (loop);
1529 if (stmt)
1531 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1532 gcc_assert (!new_bb);
1535 return iters_name;
1539 /* Function vect_update_inits_of_dr
1541 NITERS iterations were peeled from LOOP. DR represents a data reference
1542 in LOOP. This function updates the information recorded in DR to
1543 account for the fact that the first NITERS iterations had already been
1544 executed. Specifically, it updates the OFFSET field of stmt_info. */
1546 static void
1547 vect_update_inits_of_dr (struct data_reference *dr, tree niters)
1549 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1550 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
1552 niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
1553 STMT_VINFO_VECT_STEP (stmt_info)));
1554 offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
1555 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
1559 /* Function vect_update_inits_of_drs
1561 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1562 This function updates the information recorded for the data references in
1563 the loop to account for the fact that the first NITERS iterations had
1564 already been executed. Specifically, it updates the initial_condition of the
1565 access_function of all the data_references in the loop. */
1567 static void
1568 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1570 unsigned int i;
1571 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1572 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1574 if (vect_dump && (dump_flags & TDF_DETAILS))
1575 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
1577 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1579 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1580 vect_update_inits_of_dr (dr, niters);
1583 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1585 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1586 vect_update_inits_of_dr (dr, niters);
1591 /* Function vect_do_peeling_for_alignment
1593 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1594 'niters' is set to the misalignment of one of the data references in the
1595 loop, thereby forcing it to refer to an aligned location at the beginning
1596 of the execution of this loop. The data reference for which we are
1597 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
1599 static void
1600 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
1602 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1603 tree niters_of_prolog_loop, ni_name;
1604 tree n_iters;
1605 struct loop *new_loop;
1607 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1608 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
1610 ni_name = vect_build_loop_niters (loop_vinfo);
1611 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
1613 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
1614 new_loop =
1615 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
1616 niters_of_prolog_loop, ni_name, true);
1617 #ifdef ENABLE_CHECKING
1618 gcc_assert (new_loop);
1619 slpeel_verify_cfg_after_peeling (new_loop, loop);
1620 #endif
1622 /* Update number of times loop executes. */
1623 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
1624 LOOP_VINFO_NITERS (loop_vinfo) =
1625 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
1627 /* Update the init conditions of the access functions of all data refs. */
1628 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
1630 /* After peeling we have to reset scalar evolution analyzer. */
1631 scev_reset ();
1633 return;
1637 /* Function vect_transform_loop.
1639 The analysis phase has determined that the loop is vectorizable.
1640 Vectorize the loop - created vectorized stmts to replace the scalar
1641 stmts in the loop, and update the loop exit condition. */
1643 void
1644 vect_transform_loop (loop_vec_info loop_vinfo,
1645 struct loops *loops ATTRIBUTE_UNUSED)
1647 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1648 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1649 int nbbs = loop->num_nodes;
1650 block_stmt_iterator si;
1651 int i;
1652 tree ratio = NULL;
1653 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1655 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1656 fprintf (vect_dump, "=== vec_transform_loop ===");
1659 /* Peel the loop if there are data refs with unknown alignment.
1660 Only one data ref with unknown store is allowed. */
1662 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
1663 vect_do_peeling_for_alignment (loop_vinfo, loops);
1665 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
1666 compile time constant), or it is a constant that doesn't divide by the
1667 vectorization factor, then an epilog loop needs to be created.
1668 We therefore duplicate the loop: the original loop will be vectorized,
1669 and will compute the first (n/VF) iterations. The second copy of the loop
1670 will remain scalar and will compute the remaining (n%VF) iterations.
1671 (VF is the vectorization factor). */
1673 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1674 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1675 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
1676 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
1677 else
1678 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
1679 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
1681 /* 1) Make sure the loop header has exactly two entries
1682 2) Make sure we have a preheader basic block. */
1684 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
1686 loop_split_edge_with (loop_preheader_edge (loop), NULL);
1689 /* FORNOW: the vectorizer supports only loops which body consist
1690 of one basic block (header + empty latch). When the vectorizer will
1691 support more involved loop forms, the order by which the BBs are
1692 traversed need to be reconsidered. */
1694 for (i = 0; i < nbbs; i++)
1696 basic_block bb = bbs[i];
1698 for (si = bsi_start (bb); !bsi_end_p (si);)
1700 tree stmt = bsi_stmt (si);
1701 stmt_vec_info stmt_info;
1702 bool is_store;
1704 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1706 fprintf (vect_dump, "------>vectorizing statement: ");
1707 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1709 stmt_info = vinfo_for_stmt (stmt);
1710 gcc_assert (stmt_info);
1711 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1713 bsi_next (&si);
1714 continue;
1716 #ifdef ENABLE_CHECKING
1717 /* FORNOW: Verify that all stmts operate on the same number of
1718 units and no inner unrolling is necessary. */
1719 gcc_assert
1720 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
1721 == vectorization_factor);
1722 #endif
1723 /* -------- vectorize statement ------------ */
1724 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1725 fprintf (vect_dump, "transform statement.");
1727 is_store = vect_transform_stmt (stmt, &si);
1728 if (is_store)
1730 /* free the attached stmt_vec_info and remove the stmt. */
1731 stmt_ann_t ann = stmt_ann (stmt);
1732 free (stmt_info);
1733 set_stmt_info (ann, NULL);
1734 bsi_remove (&si);
1735 continue;
1738 bsi_next (&si);
1739 } /* stmts in BB */
1740 } /* BBs in loop */
1742 slpeel_make_loop_iterate_ntimes (loop, ratio);
1744 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, LOOP_LOC (loop_vinfo)))
1745 fprintf (vect_dump, "LOOP VECTORIZED.");