2005-03-23 Daniel Berlin <dberlin@dberlin.org>
[official-gcc.git] / gcc / tree-vect-transform.c
blob5dd9efecdbc90b85d7c6deb616246be9322534f4
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "errors.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "timevar.h"
36 #include "cfgloop.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
45 #include "toplev.h"
47 /* Utility functions for the code transformation. */
48 static bool vect_transform_stmt (tree, block_stmt_iterator *);
49 static void vect_align_data_ref (tree);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, bool);
53 static tree vect_create_index_for_vector_ref (loop_vec_info);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree);
57 static tree vect_init_vector (tree, tree);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
61 /* Utility function dealing with loop peeling (not peeling itself). */
62 static void vect_generate_tmps_on_preheader
63 (loop_vec_info, tree *, tree *, tree *);
64 static tree vect_build_loop_niters (loop_vec_info);
65 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
66 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
67 static void vect_update_init_of_dr (struct data_reference *, tree niters);
68 static void vect_update_inits_of_drs (loop_vec_info, tree);
69 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
70 static void vect_do_peeling_for_loop_bound
71 (loop_vec_info, tree *, struct loops *);
74 /* Function vect_get_new_vect_var.
76 Returns a name for a new variable. The current naming scheme appends the
77 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
78 the name of vectorizer generated variables, and appends that to NAME if
79 provided. */
81 static tree
82 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
84 const char *prefix;
85 tree new_vect_var;
87 if (var_kind == vect_simple_var)
88 prefix = "vect_";
89 else
90 prefix = "vect_p";
92 if (name)
93 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
94 else
95 new_vect_var = create_tmp_var (type, prefix);
97 return new_vect_var;
101 /* Function vect_create_index_for_vector_ref.
103 Create (and return) an index variable, along with it's update chain in the
104 loop. This variable will be used to access a memory location in a vector
105 operation.
107 Input:
108 LOOP: The loop being vectorized.
109 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
110 function can be added here, or in the loop pre-header.
112 Output:
113 Return an index that will be used to index a vector array. It is expected
114 that a pointer to the first vector will be used as the base address for the
115 indexed reference.
117 FORNOW: we are not trying to be efficient, just creating a new index each
118 time from scratch. At this time all vector references could use the same
119 index.
121 TODO: create only one index to be used by all vector references. Record
122 the index in the LOOP_VINFO the first time this procedure is called and
123 return it on subsequent calls. The increment of this index must be placed
124 just before the conditional expression that ends the single block loop. */
126 static tree
127 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
129 tree init, step;
130 block_stmt_iterator incr_bsi;
131 bool insert_after;
132 tree indx_before_incr, indx_after_incr;
133 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
134 tree incr;
136 /* It is assumed that the base pointer used for vectorized access contains
137 the address of the first vector. Therefore the index used for vectorized
138 access must be initialized to zero and incremented by 1. */
140 init = integer_zero_node;
141 step = integer_one_node;
143 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
144 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
145 &indx_before_incr, &indx_after_incr);
146 incr = bsi_stmt (incr_bsi);
147 get_stmt_operands (incr);
148 set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
150 return indx_before_incr;
154 /* Function vect_create_addr_base_for_vector_ref.
156 Create an expression that computes the address of the first memory location
157 that will be accessed for a data reference.
159 Input:
160 STMT: The statement containing the data reference.
161 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
162 OFFSET: Optional. If supplied, it is be added to the initial address.
164 Output:
165 1. Return an SSA_NAME whose value is the address of the memory location of
166 the first vector of the data reference.
167 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
168 these statement(s) which define the returned SSA_NAME.
170 FORNOW: We are only handling array accesses with step 1. */
172 static tree
173 vect_create_addr_base_for_vector_ref (tree stmt,
174 tree *new_stmt_list,
175 tree offset)
177 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
178 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
179 tree data_ref_base =
180 unshare_expr (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
181 tree base_name = build_fold_indirect_ref (data_ref_base);
182 tree ref = DR_REF (dr);
183 tree scalar_type = TREE_TYPE (ref);
184 tree scalar_ptr_type = build_pointer_type (scalar_type);
185 tree vec_stmt;
186 tree new_temp;
187 tree addr_base, addr_expr;
188 tree dest, new_stmt;
189 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
191 /* Create base_offset */
192 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
193 add_referenced_tmp_var (dest);
194 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
195 append_to_statement_list_force (new_stmt, new_stmt_list);
197 if (offset)
199 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
200 add_referenced_tmp_var (tmp);
201 offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
202 STMT_VINFO_VECT_STEP (stmt_info)));
203 base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset),
204 base_offset, offset));
205 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
206 append_to_statement_list_force (new_stmt, new_stmt_list);
209 /* base + base_offset */
210 addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
211 base_offset));
213 /* addr_expr = addr_base */
214 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
215 get_name (base_name));
216 add_referenced_tmp_var (addr_expr);
217 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
218 new_temp = make_ssa_name (addr_expr, vec_stmt);
219 TREE_OPERAND (vec_stmt, 0) = new_temp;
220 append_to_statement_list_force (vec_stmt, new_stmt_list);
222 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
224 fprintf (vect_dump, "created ");
225 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
227 return new_temp;
231 /* Function vect_align_data_ref.
233 Handle mislignment of a memory accesses.
235 FORNOW: Can't handle misaligned accesses.
236 Make sure that the dataref is aligned. */
238 static void
239 vect_align_data_ref (tree stmt)
241 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
242 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
244 /* FORNOW: can't handle misaligned accesses;
245 all accesses expected to be aligned. */
246 gcc_assert (aligned_access_p (dr));
250 /* Function vect_create_data_ref_ptr.
252 Create a memory reference expression for vector access, to be used in a
253 vector load/store stmt. The reference is based on a new pointer to vector
254 type (vp).
256 Input:
257 1. STMT: a stmt that references memory. Expected to be of the form
258 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
259 2. BSI: block_stmt_iterator where new stmts can be added.
260 3. OFFSET (optional): an offset to be added to the initial address accessed
261 by the data-ref in STMT.
262 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
263 pointing to the initial address.
265 Output:
266 1. Declare a new ptr to vector_type, and have it point to the base of the
267 data reference (initial addressed accessed by the data reference).
268 For example, for vector of type V8HI, the following code is generated:
270 v8hi *vp;
271 vp = (v8hi *)initial_address;
273 if OFFSET is not supplied:
274 initial_address = &a[init];
275 if OFFSET is supplied:
276 initial_address = &a[init + OFFSET];
278 Return the initial_address in INITIAL_ADDRESS.
280 2. Create a data-reference in the loop based on the new vector pointer vp,
281 and using a new index variable 'idx' as follows:
283 vp' = vp + update
285 where if ONLY_INIT is true:
286 update = zero
287 and otherwise
288 update = idx + vector_type_size
290 Return the pointer vp'.
293 FORNOW: handle only aligned and consecutive accesses. */
295 static tree
296 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
297 tree *initial_address, bool only_init)
299 tree base_name;
300 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
301 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
304 tree vect_ptr_type;
305 tree vect_ptr;
306 tree tag;
307 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
308 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
309 vuse_optype vuses = STMT_VUSE_OPS (stmt);
310 int nvuses, nv_may_defs, nv_must_defs;
311 int i;
312 tree new_temp;
313 tree vec_stmt;
314 tree new_stmt_list = NULL_TREE;
315 tree idx;
316 edge pe = loop_preheader_edge (loop);
317 basic_block new_bb;
318 tree vect_ptr_init;
319 tree vectype_size;
320 tree ptr_update;
321 tree data_ref_ptr;
322 tree type, tmp, size;
324 base_name = build_fold_indirect_ref (unshare_expr (
325 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info)));
327 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
329 tree data_ref_base = base_name;
330 fprintf (vect_dump, "create array_ref of type: ");
331 print_generic_expr (vect_dump, vectype, TDF_SLIM);
332 if (TREE_CODE (data_ref_base) == VAR_DECL)
333 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
334 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
335 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
336 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
337 fprintf (vect_dump, " vectorizing a record based array ref: ");
338 else if (TREE_CODE (data_ref_base) == SSA_NAME)
339 fprintf (vect_dump, " vectorizing a pointer ref: ");
340 print_generic_expr (vect_dump, base_name, TDF_SLIM);
343 /** (1) Create the new vector-pointer variable: **/
345 vect_ptr_type = build_pointer_type (vectype);
346 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
347 get_name (base_name));
348 add_referenced_tmp_var (vect_ptr);
351 /** (2) Handle aliasing information of the new vector-pointer: **/
353 tag = STMT_VINFO_MEMTAG (stmt_info);
354 gcc_assert (tag);
355 get_var_ann (vect_ptr)->type_mem_tag = tag;
356 get_var_ann (vect_ptr)->subvars = STMT_VINFO_SUBVARS (stmt_info);
358 /* Mark for renaming all aliased variables
359 (i.e, the may-aliases of the type-mem-tag). */
360 nvuses = NUM_VUSES (vuses);
361 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
362 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
364 for (i = 0; i < nvuses; i++)
366 tree use = VUSE_OP (vuses, i);
367 if (TREE_CODE (use) == SSA_NAME)
368 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
370 for (i = 0; i < nv_may_defs; i++)
372 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
373 if (TREE_CODE (def) == SSA_NAME)
374 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
376 for (i = 0; i < nv_must_defs; i++)
378 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
379 if (TREE_CODE (def) == SSA_NAME)
380 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
384 /** (3) Calculate the initial address the vector-pointer, and set
385 the vector-pointer to point to it before the loop: **/
387 /* Create: (&(base[init_val+offset]) in the loop preheader. */
388 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
389 offset);
390 pe = loop_preheader_edge (loop);
391 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
392 gcc_assert (!new_bb);
393 *initial_address = new_temp;
395 /* Create: p = (vectype *) initial_base */
396 vec_stmt = fold_convert (vect_ptr_type, new_temp);
397 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
398 new_temp = make_ssa_name (vect_ptr, vec_stmt);
399 TREE_OPERAND (vec_stmt, 0) = new_temp;
400 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
401 gcc_assert (!new_bb);
402 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
405 /** (4) Handle the updating of the vector-pointer inside the loop: **/
407 if (only_init) /* No update in loop is required. */
408 return vect_ptr_init;
410 idx = vect_create_index_for_vector_ref (loop_vinfo);
412 /* Create: update = idx * vectype_size */
413 tmp = create_tmp_var (integer_type_node, "update");
414 add_referenced_tmp_var (tmp);
415 size = TYPE_SIZE (vect_ptr_type);
416 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
417 ptr_update = create_tmp_var (type, "update");
418 add_referenced_tmp_var (ptr_update);
419 vectype_size = TYPE_SIZE_UNIT (vectype);
420 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
421 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
422 new_temp = make_ssa_name (tmp, vec_stmt);
423 TREE_OPERAND (vec_stmt, 0) = new_temp;
424 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
425 vec_stmt = fold_convert (type, new_temp);
426 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
427 new_temp = make_ssa_name (ptr_update, vec_stmt);
428 TREE_OPERAND (vec_stmt, 0) = new_temp;
429 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
431 /* Create: data_ref_ptr = vect_ptr_init + update */
432 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
433 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
434 new_temp = make_ssa_name (vect_ptr, vec_stmt);
435 TREE_OPERAND (vec_stmt, 0) = new_temp;
436 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
437 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
439 return data_ref_ptr;
443 /* Function vect_create_destination_var.
445 Create a new temporary of type VECTYPE. */
447 static tree
448 vect_create_destination_var (tree scalar_dest, tree vectype)
450 tree vec_dest;
451 const char *new_name;
453 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
455 new_name = get_name (scalar_dest);
456 if (!new_name)
457 new_name = "var_";
458 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
459 add_referenced_tmp_var (vec_dest);
461 return vec_dest;
465 /* Function vect_init_vector.
467 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
468 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
469 used in the vectorization of STMT. */
471 static tree
472 vect_init_vector (tree stmt, tree vector_var)
474 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
475 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
476 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
477 tree new_var;
478 tree init_stmt;
479 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
480 tree vec_oprnd;
481 edge pe;
482 tree new_temp;
483 basic_block new_bb;
485 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
486 add_referenced_tmp_var (new_var);
488 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
489 new_temp = make_ssa_name (new_var, init_stmt);
490 TREE_OPERAND (init_stmt, 0) = new_temp;
492 pe = loop_preheader_edge (loop);
493 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
494 gcc_assert (!new_bb);
496 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
498 fprintf (vect_dump, "created new init_stmt: ");
499 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
502 vec_oprnd = TREE_OPERAND (init_stmt, 0);
503 return vec_oprnd;
507 /* Function vect_get_vec_def_for_operand.
509 OP is an operand in STMT. This function returns a (vector) def that will be
510 used in the vectorized stmt for STMT.
512 In the case that OP is an SSA_NAME which is defined in the loop, then
513 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
515 In case OP is an invariant or constant, a new stmt that creates a vector def
516 needs to be introduced. */
518 static tree
519 vect_get_vec_def_for_operand (tree op, tree stmt)
521 tree vec_oprnd;
522 tree vec_stmt;
523 tree def_stmt;
524 stmt_vec_info def_stmt_info = NULL;
525 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
526 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
527 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
528 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
529 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
530 basic_block bb;
531 tree vec_inv;
532 tree t = NULL_TREE;
533 tree def;
534 int i;
536 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
538 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
539 print_generic_expr (vect_dump, op, TDF_SLIM);
542 /** ===> Case 1: operand is a constant. **/
544 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
546 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
548 tree vec_cst;
550 /* Build a tree with vector elements. */
551 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
552 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
554 for (i = nunits - 1; i >= 0; --i)
556 t = tree_cons (NULL_TREE, op, t);
558 vec_cst = build_vector (vectype, t);
559 return vect_init_vector (stmt, vec_cst);
562 gcc_assert (TREE_CODE (op) == SSA_NAME);
564 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
566 def_stmt = SSA_NAME_DEF_STMT (op);
567 def_stmt_info = vinfo_for_stmt (def_stmt);
569 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
571 fprintf (vect_dump, "vect_get_vec_def_for_operand: def_stmt: ");
572 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
576 /** ==> Case 2.1: operand is defined inside the loop. **/
578 if (def_stmt_info)
580 /* Get the def from the vectorized stmt. */
582 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
583 gcc_assert (vec_stmt);
584 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
585 return vec_oprnd;
589 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
590 it is a reduction/induction. **/
592 bb = bb_for_stmt (def_stmt);
593 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
595 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
596 fprintf (vect_dump, "reduction/induction - unsupported.");
597 internal_error ("no support for reduction/induction"); /* FORNOW */
601 /** ==> Case 2.3: operand is defined outside the loop -
602 it is a loop invariant. */
604 switch (TREE_CODE (def_stmt))
606 case PHI_NODE:
607 def = PHI_RESULT (def_stmt);
608 break;
609 case MODIFY_EXPR:
610 def = TREE_OPERAND (def_stmt, 0);
611 break;
612 case NOP_EXPR:
613 def = TREE_OPERAND (def_stmt, 0);
614 gcc_assert (IS_EMPTY_STMT (def_stmt));
615 def = op;
616 break;
617 default:
618 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
620 fprintf (vect_dump, "unsupported defining stmt: ");
621 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
623 internal_error ("unsupported defining stmt");
626 /* Build a tree with vector elements.
627 Create 'vec_inv = {inv,inv,..,inv}' */
629 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
630 fprintf (vect_dump, "Create vector_inv.");
632 for (i = nunits - 1; i >= 0; --i)
634 t = tree_cons (NULL_TREE, def, t);
637 vec_inv = build_constructor (vectype, t);
638 return vect_init_vector (stmt, vec_inv);
642 /* Function vect_finish_stmt_generation.
644 Insert a new stmt. */
646 static void
647 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
649 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
651 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
653 fprintf (vect_dump, "add new stmt: ");
654 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
657 #ifdef ENABLE_CHECKING
658 /* Make sure bsi points to the stmt that is being vectorized. */
659 gcc_assert (stmt == bsi_stmt (*bsi));
660 #endif
662 #ifdef USE_MAPPED_LOCATION
663 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
664 #else
665 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
666 #endif
670 /* Function vectorizable_assignment.
672 Check if STMT performs an assignment (copy) that can be vectorized.
673 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
674 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
675 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
677 bool
678 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
680 tree vec_dest;
681 tree scalar_dest;
682 tree op;
683 tree vec_oprnd;
684 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
685 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
686 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
687 tree new_temp;
689 /* Is vectorizable assignment? */
691 if (TREE_CODE (stmt) != MODIFY_EXPR)
692 return false;
694 scalar_dest = TREE_OPERAND (stmt, 0);
695 if (TREE_CODE (scalar_dest) != SSA_NAME)
696 return false;
698 op = TREE_OPERAND (stmt, 1);
699 if (!vect_is_simple_use (op, loop_vinfo, NULL))
701 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
702 fprintf (vect_dump, "use not simple.");
703 return false;
706 if (!vec_stmt) /* transformation not required. */
708 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
709 return true;
712 /** Transform. **/
713 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
714 fprintf (vect_dump, "transform assignment.");
716 /* Handle def. */
717 vec_dest = vect_create_destination_var (scalar_dest, vectype);
719 /* Handle use. */
720 op = TREE_OPERAND (stmt, 1);
721 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
723 /* Arguments are ready. create the new vector stmt. */
724 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
725 new_temp = make_ssa_name (vec_dest, *vec_stmt);
726 TREE_OPERAND (*vec_stmt, 0) = new_temp;
727 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
729 return true;
733 /* Function vectorizable_operation.
735 Check if STMT performs a binary or unary operation that can be vectorized.
736 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
737 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
738 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
740 bool
741 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
743 tree vec_dest;
744 tree scalar_dest;
745 tree operation;
746 tree op0, op1 = NULL;
747 tree vec_oprnd0, vec_oprnd1=NULL;
748 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
749 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
750 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
751 int i;
752 enum tree_code code;
753 enum machine_mode vec_mode;
754 tree new_temp;
755 int op_type;
756 tree op;
757 optab optab;
759 /* Is STMT a vectorizable binary/unary operation? */
760 if (TREE_CODE (stmt) != MODIFY_EXPR)
761 return false;
763 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
764 return false;
766 operation = TREE_OPERAND (stmt, 1);
767 code = TREE_CODE (operation);
768 optab = optab_for_tree_code (code, vectype);
770 /* Support only unary or binary operations. */
771 op_type = TREE_CODE_LENGTH (code);
772 if (op_type != unary_op && op_type != binary_op)
774 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
775 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
776 return false;
779 for (i = 0; i < op_type; i++)
781 op = TREE_OPERAND (operation, i);
782 if (!vect_is_simple_use (op, loop_vinfo, NULL))
784 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
785 fprintf (vect_dump, "use not simple.");
786 return false;
790 /* Supportable by target? */
791 if (!optab)
793 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
794 fprintf (vect_dump, "no optab.");
795 return false;
797 vec_mode = TYPE_MODE (vectype);
798 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
800 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
801 fprintf (vect_dump, "op not supported by target.");
802 return false;
805 if (!vec_stmt) /* transformation not required. */
807 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
808 return true;
811 /** Transform. **/
813 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
814 fprintf (vect_dump, "transform binary/unary operation.");
816 /* Handle def. */
817 scalar_dest = TREE_OPERAND (stmt, 0);
818 vec_dest = vect_create_destination_var (scalar_dest, vectype);
820 /* Handle uses. */
821 op0 = TREE_OPERAND (operation, 0);
822 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
824 if (op_type == binary_op)
826 op1 = TREE_OPERAND (operation, 1);
827 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
830 /* Arguments are ready. create the new vector stmt. */
832 if (op_type == binary_op)
833 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
834 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
835 else
836 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
837 build1 (code, vectype, vec_oprnd0));
838 new_temp = make_ssa_name (vec_dest, *vec_stmt);
839 TREE_OPERAND (*vec_stmt, 0) = new_temp;
840 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
842 return true;
846 /* Function vectorizable_store.
848 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
849 can be vectorized.
850 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
851 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
852 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
854 bool
855 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
857 tree scalar_dest;
858 tree data_ref;
859 tree op;
860 tree vec_oprnd1;
861 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
862 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
863 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
864 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
865 enum machine_mode vec_mode;
866 tree dummy;
867 enum dr_alignment_support alignment_support_cheme;
869 /* Is vectorizable store? */
871 if (TREE_CODE (stmt) != MODIFY_EXPR)
872 return false;
874 scalar_dest = TREE_OPERAND (stmt, 0);
875 if (TREE_CODE (scalar_dest) != ARRAY_REF
876 && TREE_CODE (scalar_dest) != INDIRECT_REF)
877 return false;
879 op = TREE_OPERAND (stmt, 1);
880 if (!vect_is_simple_use (op, loop_vinfo, NULL))
882 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
883 fprintf (vect_dump, "use not simple.");
884 return false;
887 vec_mode = TYPE_MODE (vectype);
888 /* FORNOW. In some cases can vectorize even if data-type not supported
889 (e.g. - array initialization with 0). */
890 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
891 return false;
893 if (!STMT_VINFO_DATA_REF (stmt_info))
894 return false;
897 if (!vec_stmt) /* transformation not required. */
899 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
900 return true;
903 /** Transform. **/
905 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
906 fprintf (vect_dump, "transform store");
908 alignment_support_cheme = vect_supportable_dr_alignment (dr);
909 gcc_assert (alignment_support_cheme);
910 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
912 /* Handle use - get the vectorized def from the defining stmt. */
913 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
915 /* Handle def. */
916 /* FORNOW: make sure the data reference is aligned. */
917 vect_align_data_ref (stmt);
918 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
919 data_ref = build_fold_indirect_ref (data_ref);
921 /* Arguments are ready. create the new vector stmt. */
922 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
923 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
925 return true;
929 /* vectorizable_load.
931 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
932 can be vectorized.
933 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
934 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
935 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
937 bool
938 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
940 tree scalar_dest;
941 tree vec_dest = NULL;
942 tree data_ref = NULL;
943 tree op;
944 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
945 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
946 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
947 tree new_temp;
948 int mode;
949 tree init_addr;
950 tree new_stmt;
951 tree dummy;
952 basic_block new_bb;
953 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
954 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
955 edge pe = loop_preheader_edge (loop);
956 enum dr_alignment_support alignment_support_cheme;
958 /* Is vectorizable load? */
960 if (TREE_CODE (stmt) != MODIFY_EXPR)
961 return false;
963 scalar_dest = TREE_OPERAND (stmt, 0);
964 if (TREE_CODE (scalar_dest) != SSA_NAME)
965 return false;
967 op = TREE_OPERAND (stmt, 1);
968 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
969 return false;
971 if (!STMT_VINFO_DATA_REF (stmt_info))
972 return false;
974 mode = (int) TYPE_MODE (vectype);
976 /* FORNOW. In some cases can vectorize even if data-type not supported
977 (e.g. - data copies). */
978 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
980 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
981 fprintf (vect_dump, "Aligned load, but unsupported type.");
982 return false;
985 if (!vec_stmt) /* transformation not required. */
987 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
988 return true;
991 /** Transform. **/
993 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
994 fprintf (vect_dump, "transform load.");
996 alignment_support_cheme = vect_supportable_dr_alignment (dr);
997 gcc_assert (alignment_support_cheme);
999 if (alignment_support_cheme == dr_aligned
1000 || alignment_support_cheme == dr_unaligned_supported)
1002 /* Create:
1003 p = initial_addr;
1004 indx = 0;
1005 loop {
1006 vec_dest = *(p);
1007 indx = indx + 1;
1011 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1012 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1013 if (aligned_access_p (dr))
1014 data_ref = build_fold_indirect_ref (data_ref);
1015 else
1017 int mis = DR_MISALIGNMENT (dr);
1018 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1019 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1020 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1022 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1023 new_temp = make_ssa_name (vec_dest, new_stmt);
1024 TREE_OPERAND (new_stmt, 0) = new_temp;
1025 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1027 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1029 /* Create:
1030 p1 = initial_addr;
1031 msq_init = *(floor(p1))
1032 p2 = initial_addr + VS - 1;
1033 magic = have_builtin ? builtin_result : initial_address;
1034 indx = 0;
1035 loop {
1036 p2' = p2 + indx * vectype_size
1037 lsq = *(floor(p2'))
1038 vec_dest = realign_load (msq, lsq, magic)
1039 indx = indx + 1;
1040 msq = lsq;
1044 tree offset;
1045 tree magic;
1046 tree phi_stmt;
1047 tree msq_init;
1048 tree msq, lsq;
1049 tree dataref_ptr;
1050 tree params;
1052 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1053 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1054 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1055 &init_addr, true);
1056 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1057 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1058 new_temp = make_ssa_name (vec_dest, new_stmt);
1059 TREE_OPERAND (new_stmt, 0) = new_temp;
1060 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1061 gcc_assert (!new_bb);
1062 msq_init = TREE_OPERAND (new_stmt, 0);
1065 /* <2> Create lsq = *(floor(p2')) in the loop */
1066 offset = build_int_cst (integer_type_node,
1067 GET_MODE_NUNITS (TYPE_MODE (vectype)));
1068 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1069 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1070 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1071 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1072 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1073 new_temp = make_ssa_name (vec_dest, new_stmt);
1074 TREE_OPERAND (new_stmt, 0) = new_temp;
1075 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1076 lsq = TREE_OPERAND (new_stmt, 0);
1079 /* <3> */
1080 if (targetm.vectorize.builtin_mask_for_load)
1082 /* Create permutation mask, if required, in loop preheader. */
1083 tree builtin_decl;
1084 params = build_tree_list (NULL_TREE, init_addr);
1085 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1086 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1087 new_stmt = build_function_call_expr (builtin_decl, params);
1088 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1089 new_temp = make_ssa_name (vec_dest, new_stmt);
1090 TREE_OPERAND (new_stmt, 0) = new_temp;
1091 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1092 gcc_assert (!new_bb);
1093 magic = TREE_OPERAND (new_stmt, 0);
1095 /* Since we have just created a CALL_EXPR, we may need to
1096 rename call-clobbered variables. */
1097 mark_call_clobbered_vars_to_rename ();
1099 else
1101 /* Use current address instead of init_addr for reduced reg pressure.
1103 magic = dataref_ptr;
1107 /* <4> Create msq = phi <msq_init, lsq> in loop */
1108 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1109 msq = make_ssa_name (vec_dest, NULL_TREE);
1110 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1111 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1112 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1113 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1116 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1117 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1118 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1119 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1120 new_temp = make_ssa_name (vec_dest, new_stmt);
1121 TREE_OPERAND (new_stmt, 0) = new_temp;
1122 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1124 else
1125 gcc_unreachable ();
1127 *vec_stmt = new_stmt;
1128 return true;
1132 /* Function vect_transform_stmt.
1134 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1136 bool
1137 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
1139 bool is_store = false;
1140 tree vec_stmt = NULL_TREE;
1141 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1142 bool done;
1144 switch (STMT_VINFO_TYPE (stmt_info))
1146 case op_vec_info_type:
1147 done = vectorizable_operation (stmt, bsi, &vec_stmt);
1148 gcc_assert (done);
1149 break;
1151 case assignment_vec_info_type:
1152 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
1153 gcc_assert (done);
1154 break;
1156 case load_vec_info_type:
1157 done = vectorizable_load (stmt, bsi, &vec_stmt);
1158 gcc_assert (done);
1159 break;
1161 case store_vec_info_type:
1162 done = vectorizable_store (stmt, bsi, &vec_stmt);
1163 gcc_assert (done);
1164 is_store = true;
1165 break;
1166 default:
1167 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1168 fprintf (vect_dump, "stmt not supported.");
1169 gcc_unreachable ();
1172 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1174 return is_store;
1178 /* This function builds ni_name = number of iterations loop executes
1179 on the loop preheader. */
1181 static tree
1182 vect_build_loop_niters (loop_vec_info loop_vinfo)
1184 tree ni_name, stmt, var;
1185 edge pe;
1186 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1187 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1189 var = create_tmp_var (TREE_TYPE (ni), "niters");
1190 add_referenced_tmp_var (var);
1191 ni_name = force_gimple_operand (ni, &stmt, false, var);
1193 pe = loop_preheader_edge (loop);
1194 if (stmt)
1196 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1197 gcc_assert (!new_bb);
1200 return ni_name;
1204 /* This function generates the following statements:
1206 ni_name = number of iterations loop executes
1207 ratio = ni_name / vf
1208 ratio_mult_vf_name = ratio * vf
1210 and places them at the loop preheader edge. */
1212 static void
1213 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1214 tree *ni_name_ptr,
1215 tree *ratio_mult_vf_name_ptr,
1216 tree *ratio_name_ptr)
1219 edge pe;
1220 basic_block new_bb;
1221 tree stmt, ni_name;
1222 tree var;
1223 tree ratio_name;
1224 tree ratio_mult_vf_name;
1225 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1226 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1227 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1228 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
1230 pe = loop_preheader_edge (loop);
1232 /* Generate temporary variable that contains
1233 number of iterations loop executes. */
1235 ni_name = vect_build_loop_niters (loop_vinfo);
1237 /* Create: ratio = ni >> log2(vf) */
1239 var = create_tmp_var (TREE_TYPE (ni), "bnd");
1240 add_referenced_tmp_var (var);
1241 ratio_name = make_ssa_name (var, NULL_TREE);
1242 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
1243 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
1244 SSA_NAME_DEF_STMT (ratio_name) = stmt;
1246 pe = loop_preheader_edge (loop);
1247 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1248 gcc_assert (!new_bb);
1250 /* Create: ratio_mult_vf = ratio << log2 (vf). */
1252 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1253 add_referenced_tmp_var (var);
1254 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
1255 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
1256 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
1257 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
1259 pe = loop_preheader_edge (loop);
1260 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1261 gcc_assert (!new_bb);
1263 *ni_name_ptr = ni_name;
1264 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1265 *ratio_name_ptr = ratio_name;
1267 return;
1271 /* Function vect_update_ivs_after_vectorizer.
1273 "Advance" the induction variables of LOOP to the value they should take
1274 after the execution of LOOP. This is currently necessary because the
1275 vectorizer does not handle induction variables that are used after the
1276 loop. Such a situation occurs when the last iterations of LOOP are
1277 peeled, because:
1278 1. We introduced new uses after LOOP for IVs that were not originally used
1279 after LOOP: the IVs of LOOP are now used by an epilog loop.
1280 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1281 times, whereas the loop IVs should be bumped N times.
1283 Input:
1284 - LOOP - a loop that is going to be vectorized. The last few iterations
1285 of LOOP were peeled.
1286 - NITERS - the number of iterations that LOOP executes (before it is
1287 vectorized). i.e, the number of times the ivs should be bumped.
1288 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1289 coming out from LOOP on which there are uses of the LOOP ivs
1290 (this is the path from LOOP->exit to epilog_loop->preheader).
1292 The new definitions of the ivs are placed in LOOP->exit.
1293 The phi args associated with the edge UPDATE_E in the bb
1294 UPDATE_E->dest are updated accordingly.
1296 Assumption 1: Like the rest of the vectorizer, this function assumes
1297 a single loop exit that has a single predecessor.
1299 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1300 organized in the same order.
1302 Assumption 3: The access function of the ivs is simple enough (see
1303 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1305 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1306 coming out of LOOP on which the ivs of LOOP are used (this is the path
1307 that leads to the epilog loop; other paths skip the epilog loop). This
1308 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1309 needs to have its phis updated.
1312 static void
1313 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1314 edge update_e)
1316 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1317 basic_block exit_bb = loop->single_exit->dest;
1318 tree phi, phi1;
1319 basic_block update_bb = update_e->dest;
1321 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1323 /* Make sure there exists a single-predecessor exit bb: */
1324 gcc_assert (single_pred_p (exit_bb));
1326 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
1327 phi && phi1;
1328 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
1330 tree access_fn = NULL;
1331 tree evolution_part;
1332 tree init_expr;
1333 tree step_expr;
1334 tree var, stmt, ni, ni_name;
1335 block_stmt_iterator last_bsi;
1337 /* Skip virtual phi's. */
1338 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1340 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1341 fprintf (vect_dump, "virtual phi. skip.");
1342 continue;
1345 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1346 gcc_assert (access_fn);
1347 evolution_part =
1348 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
1349 gcc_assert (evolution_part != NULL_TREE);
1351 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1352 of degree >= 2 or exponential. */
1353 gcc_assert (!tree_is_chrec (evolution_part));
1355 step_expr = evolution_part;
1356 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1357 loop->num));
1359 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
1360 build2 (MULT_EXPR, TREE_TYPE (niters),
1361 niters, step_expr), init_expr);
1363 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
1364 add_referenced_tmp_var (var);
1366 ni_name = force_gimple_operand (ni, &stmt, false, var);
1368 /* Insert stmt into exit_bb. */
1369 last_bsi = bsi_last (exit_bb);
1370 if (stmt)
1371 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
1373 /* Fix phi expressions in the successor bb. */
1374 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
1375 PHI_ARG_DEF_FROM_EDGE (phi, single_succ_edge (loop->latch)));
1376 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
1381 /* Function vect_do_peeling_for_loop_bound
1383 Peel the last iterations of the loop represented by LOOP_VINFO.
1384 The peeled iterations form a new epilog loop. Given that the loop now
1385 iterates NITERS times, the new epilog loop iterates
1386 NITERS % VECTORIZATION_FACTOR times.
1388 The original loop will later be made to iterate
1389 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
1391 static void
1392 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1393 struct loops *loops)
1396 tree ni_name, ratio_mult_vf_name;
1397 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1398 struct loop *new_loop;
1399 edge update_e;
1400 basic_block preheader;
1401 #ifdef ENABLE_CHECKING
1402 int loop_num;
1403 #endif
1405 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1406 fprintf (vect_dump, "=== vect_transtorm_for_unknown_loop_bound ===");
1408 /* Generate the following variables on the preheader of original loop:
1410 ni_name = number of iteration the original loop executes
1411 ratio = ni_name / vf
1412 ratio_mult_vf_name = ratio * vf */
1413 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1414 &ratio_mult_vf_name, ratio);
1416 #ifdef ENABLE_CHECKING
1417 loop_num = loop->num;
1418 #endif
1419 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
1420 ratio_mult_vf_name, ni_name, false);
1421 #ifdef ENABLE_CHECKING
1422 gcc_assert (new_loop);
1423 gcc_assert (loop_num == loop->num);
1424 slpeel_verify_cfg_after_peeling (loop, new_loop);
1425 #endif
1427 /* A guard that controls whether the new_loop is to be executed or skipped
1428 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1429 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1430 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1431 is on the path where the LOOP IVs are used and need to be updated. */
1433 preheader = loop_preheader_edge (new_loop)->src;
1434 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
1435 update_e = EDGE_PRED (preheader, 0);
1436 else
1437 update_e = EDGE_PRED (preheader, 1);
1439 /* Update IVs of original loop as if they were advanced
1440 by ratio_mult_vf_name steps. */
1441 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1443 /* After peeling we have to reset scalar evolution analyzer. */
1444 scev_reset ();
1446 return;
1450 /* Function vect_gen_niters_for_prolog_loop
1452 Set the number of iterations for the loop represented by LOOP_VINFO
1453 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1454 and the misalignment of DR - the data reference recorded in
1455 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
1456 this loop, the data reference DR will refer to an aligned location.
1458 The following computation is generated:
1460 If the misalignment of DR is known at compile time:
1461 addr_mis = int mis = DR_MISALIGNMENT (dr);
1462 Else, compute address misalignment in bytes:
1463 addr_mis = addr & (vectype_size - 1)
1465 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
1467 (elem_size = element type size; an element is the scalar element
1468 whose type is the inner type of the vectype) */
1470 static tree
1471 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
1473 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1474 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1475 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1476 tree var, stmt;
1477 tree iters, iters_name;
1478 edge pe;
1479 basic_block new_bb;
1480 tree dr_stmt = DR_STMT (dr);
1481 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1482 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1483 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1484 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
1485 tree niters_type = TREE_TYPE (loop_niters);
1487 pe = loop_preheader_edge (loop);
1489 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1491 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1492 int element_size = vectype_align/vf;
1493 int elem_misalign = byte_misalign / element_size;
1495 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1496 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
1497 iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
1499 else
1501 tree new_stmts = NULL_TREE;
1502 tree start_addr =
1503 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
1504 tree ptr_type = TREE_TYPE (start_addr);
1505 tree size = TYPE_SIZE (ptr_type);
1506 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
1507 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
1508 tree elem_size_log =
1509 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
1510 tree vf_tree = build_int_cst (unsigned_type_node, vf);
1511 tree byte_misalign;
1512 tree elem_misalign;
1514 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
1515 gcc_assert (!new_bb);
1517 /* Create: byte_misalign = addr & (vectype_size - 1) */
1518 byte_misalign =
1519 build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
1521 /* Create: elem_misalign = byte_misalign / element_size */
1522 elem_misalign =
1523 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
1525 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
1526 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
1527 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
1528 iters = fold_convert (niters_type, iters);
1531 /* Create: prolog_loop_niters = min (iters, loop_niters) */
1532 /* If the loop bound is known at compile time we already verified that it is
1533 greater than vf; since the misalignment ('iters') is at most vf, there's
1534 no need to generate the MIN_EXPR in this case. */
1535 if (TREE_CODE (loop_niters) != INTEGER_CST)
1536 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
1538 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1540 fprintf (vect_dump, "niters for prolog loop: ");
1541 print_generic_expr (vect_dump, iters, TDF_SLIM);
1544 var = create_tmp_var (niters_type, "prolog_loop_niters");
1545 add_referenced_tmp_var (var);
1546 iters_name = force_gimple_operand (iters, &stmt, false, var);
1548 /* Insert stmt on loop preheader edge. */
1549 if (stmt)
1551 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1552 gcc_assert (!new_bb);
1555 return iters_name;
1559 /* Function vect_update_init_of_dr
1561 NITERS iterations were peeled from LOOP. DR represents a data reference
1562 in LOOP. This function updates the information recorded in DR to
1563 account for the fact that the first NITERS iterations had already been
1564 executed. Specifically, it updates the OFFSET field of stmt_info. */
1566 static void
1567 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1569 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1570 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
1572 niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
1573 STMT_VINFO_VECT_STEP (stmt_info)));
1574 offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
1575 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
1579 /* Function vect_update_inits_of_drs
1581 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1582 This function updates the information recorded for the data references in
1583 the loop to account for the fact that the first NITERS iterations had
1584 already been executed. Specifically, it updates the initial_condition of the
1585 access_function of all the data_references in the loop. */
1587 static void
1588 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1590 unsigned int i;
1591 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1592 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1594 if (vect_dump && (dump_flags & TDF_DETAILS))
1595 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
1597 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1599 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1600 vect_update_init_of_dr (dr, niters);
1603 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1605 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1606 vect_update_init_of_dr (dr, niters);
1611 /* Function vect_do_peeling_for_alignment
1613 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1614 'niters' is set to the misalignment of one of the data references in the
1615 loop, thereby forcing it to refer to an aligned location at the beginning
1616 of the execution of this loop. The data reference for which we are
1617 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
1619 static void
1620 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
1622 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1623 tree niters_of_prolog_loop, ni_name;
1624 tree n_iters;
1625 struct loop *new_loop;
1627 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1628 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
1630 ni_name = vect_build_loop_niters (loop_vinfo);
1631 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
1633 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
1634 new_loop =
1635 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
1636 niters_of_prolog_loop, ni_name, true);
1637 #ifdef ENABLE_CHECKING
1638 gcc_assert (new_loop);
1639 slpeel_verify_cfg_after_peeling (new_loop, loop);
1640 #endif
1642 /* Update number of times loop executes. */
1643 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
1644 LOOP_VINFO_NITERS (loop_vinfo) = fold (build2 (MINUS_EXPR,
1645 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop));
1647 /* Update the init conditions of the access functions of all data refs. */
1648 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
1650 /* After peeling we have to reset scalar evolution analyzer. */
1651 scev_reset ();
1653 return;
1657 /* Function vect_transform_loop.
1659 The analysis phase has determined that the loop is vectorizable.
1660 Vectorize the loop - created vectorized stmts to replace the scalar
1661 stmts in the loop, and update the loop exit condition. */
1663 void
1664 vect_transform_loop (loop_vec_info loop_vinfo,
1665 struct loops *loops ATTRIBUTE_UNUSED)
1667 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1668 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1669 int nbbs = loop->num_nodes;
1670 block_stmt_iterator si;
1671 int i;
1672 tree ratio = NULL;
1673 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1675 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1676 fprintf (vect_dump, "=== vec_transform_loop ===");
1679 /* Peel the loop if there are data refs with unknown alignment.
1680 Only one data ref with unknown store is allowed. */
1682 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1683 vect_do_peeling_for_alignment (loop_vinfo, loops);
1685 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
1686 compile time constant), or it is a constant that doesn't divide by the
1687 vectorization factor, then an epilog loop needs to be created.
1688 We therefore duplicate the loop: the original loop will be vectorized,
1689 and will compute the first (n/VF) iterations. The second copy of the loop
1690 will remain scalar and will compute the remaining (n%VF) iterations.
1691 (VF is the vectorization factor). */
1693 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1694 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1695 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
1696 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
1697 else
1698 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
1699 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
1701 /* 1) Make sure the loop header has exactly two entries
1702 2) Make sure we have a preheader basic block. */
1704 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
1706 loop_split_edge_with (loop_preheader_edge (loop), NULL);
1709 /* FORNOW: the vectorizer supports only loops which body consist
1710 of one basic block (header + empty latch). When the vectorizer will
1711 support more involved loop forms, the order by which the BBs are
1712 traversed need to be reconsidered. */
1714 for (i = 0; i < nbbs; i++)
1716 basic_block bb = bbs[i];
1718 for (si = bsi_start (bb); !bsi_end_p (si);)
1720 tree stmt = bsi_stmt (si);
1721 stmt_vec_info stmt_info;
1722 bool is_store;
1724 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1726 fprintf (vect_dump, "------>vectorizing statement: ");
1727 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1729 stmt_info = vinfo_for_stmt (stmt);
1730 gcc_assert (stmt_info);
1731 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1733 bsi_next (&si);
1734 continue;
1736 #ifdef ENABLE_CHECKING
1737 /* FORNOW: Verify that all stmts operate on the same number of
1738 units and no inner unrolling is necessary. */
1739 gcc_assert
1740 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
1741 == vectorization_factor);
1742 #endif
1743 /* -------- vectorize statement ------------ */
1744 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1745 fprintf (vect_dump, "transform statement.");
1747 is_store = vect_transform_stmt (stmt, &si);
1748 if (is_store)
1750 /* free the attached stmt_vec_info and remove the stmt. */
1751 stmt_ann_t ann = stmt_ann (stmt);
1752 free (stmt_info);
1753 set_stmt_info (ann, NULL);
1754 bsi_remove (&si);
1755 continue;
1758 bsi_next (&si);
1759 } /* stmts in BB */
1760 } /* BBs in loop */
1762 slpeel_make_loop_iterate_ntimes (loop, ratio);
1764 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, LOOP_LOC (loop_vinfo)))
1765 fprintf (vect_dump, "LOOP VECTORIZED.");