* tree-cfg.c (make_goto_expr_edges): Don't use error_mark_node.
[official-gcc.git] / gcc / tree-vect-transform.c
blob6cd6a1295ddaa95001ffd25df0ffa7fbd0ec2e87
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "errors.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "timevar.h"
36 #include "cfgloop.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
45 #include "toplev.h"
47 /* Utility functions for the code transformation. */
48 static bool vect_transform_stmt (tree, block_stmt_iterator *);
49 static void vect_align_data_ref (tree);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, bool);
53 static tree vect_create_index_for_vector_ref (loop_vec_info);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree);
57 static tree vect_init_vector (tree, tree);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
61 /* Utility function dealing with loop peeling (not peeling itself). */
62 static void vect_generate_tmps_on_preheader
63 (loop_vec_info, tree *, tree *, tree *);
64 static tree vect_build_loop_niters (loop_vec_info);
65 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
66 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
67 static void vect_update_inits_of_dr (struct data_reference *, tree niters);
68 static void vect_update_inits_of_drs (loop_vec_info, tree);
69 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
70 static void vect_do_peeling_for_loop_bound
71 (loop_vec_info, tree *, struct loops *);
74 /* Function vect_get_new_vect_var.
76 Returns a name for a new variable. The current naming scheme appends the
77 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
78 the name of vectorizer generated variables, and appends that to NAME if
79 provided. */
81 static tree
82 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
84 const char *prefix;
85 tree new_vect_var;
87 if (var_kind == vect_simple_var)
88 prefix = "vect_";
89 else
90 prefix = "vect_p";
92 if (name)
93 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
94 else
95 new_vect_var = create_tmp_var (type, prefix);
97 return new_vect_var;
101 /* Function vect_create_index_for_vector_ref.
103 Create (and return) an index variable, along with it's update chain in the
104 loop. This variable will be used to access a memory location in a vector
105 operation.
107 Input:
108 LOOP: The loop being vectorized.
109 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
110 function can be added here, or in the loop pre-header.
112 Output:
113 Return an index that will be used to index a vector array. It is expected
114 that a pointer to the first vector will be used as the base address for the
115 indexed reference.
117 FORNOW: we are not trying to be efficient, just creating a new index each
118 time from scratch. At this time all vector references could use the same
119 index.
121 TODO: create only one index to be used by all vector references. Record
122 the index in the LOOP_VINFO the first time this procedure is called and
123 return it on subsequent calls. The increment of this index must be placed
124 just before the conditional expression that ends the single block loop. */
126 static tree
127 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
129 tree init, step;
130 block_stmt_iterator incr_bsi;
131 bool insert_after;
132 tree indx_before_incr, indx_after_incr;
133 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
134 tree incr;
136 /* It is assumed that the base pointer used for vectorized access contains
137 the address of the first vector. Therefore the index used for vectorized
138 access must be initialized to zero and incremented by 1. */
140 init = integer_zero_node;
141 step = integer_one_node;
143 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
144 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
145 &indx_before_incr, &indx_after_incr);
146 incr = bsi_stmt (incr_bsi);
147 get_stmt_operands (incr);
148 set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
150 return indx_before_incr;
154 /* Function vect_create_addr_base_for_vector_ref.
156 Create an expression that computes the address of the first memory location
157 that will be accessed for a data reference.
159 Input:
160 STMT: The statement containing the data reference.
161 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
162 OFFSET: Optional. If supplied, it is be added to the initial address.
164 Output:
165 1. Return an SSA_NAME whose value is the address of the memory location of
166 the first vector of the data reference.
167 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
168 these statement(s) which define the returned SSA_NAME.
170 FORNOW: We are only handling array accesses with step 1. */
172 static tree
173 vect_create_addr_base_for_vector_ref (tree stmt,
174 tree *new_stmt_list,
175 tree offset)
177 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
178 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
179 tree data_ref_base =
180 unshare_expr (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
181 tree base_name = build_fold_indirect_ref (data_ref_base);
182 tree ref = DR_REF (dr);
183 tree scalar_type = TREE_TYPE (ref);
184 tree scalar_ptr_type = build_pointer_type (scalar_type);
185 tree vec_stmt;
186 tree new_temp;
187 tree addr_base, addr_expr;
188 tree dest, new_stmt;
189 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
191 /* Create base_offset */
192 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
193 add_referenced_tmp_var (dest);
194 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
195 append_to_statement_list_force (new_stmt, new_stmt_list);
197 if (offset)
199 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
200 add_referenced_tmp_var (tmp);
201 offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
202 STMT_VINFO_VECT_STEP (stmt_info)));
203 base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset),
204 base_offset, offset));
205 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
206 append_to_statement_list_force (new_stmt, new_stmt_list);
209 /* base + base_offset */
210 addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
211 base_offset));
213 /* addr_expr = addr_base */
214 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
215 get_name (base_name));
216 add_referenced_tmp_var (addr_expr);
217 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
218 new_temp = make_ssa_name (addr_expr, vec_stmt);
219 TREE_OPERAND (vec_stmt, 0) = new_temp;
220 append_to_statement_list_force (vec_stmt, new_stmt_list);
222 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
224 fprintf (vect_dump, "created ");
225 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
227 return new_temp;
231 /* Function vect_align_data_ref.
233 Handle mislignment of a memory accesses.
235 FORNOW: Can't handle misaligned accesses.
236 Make sure that the dataref is aligned. */
238 static void
239 vect_align_data_ref (tree stmt)
241 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
242 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
244 /* FORNOW: can't handle misaligned accesses;
245 all accesses expected to be aligned. */
246 gcc_assert (aligned_access_p (dr));
250 /* Function vect_create_data_ref_ptr.
252 Create a memory reference expression for vector access, to be used in a
253 vector load/store stmt. The reference is based on a new pointer to vector
254 type (vp).
256 Input:
257 1. STMT: a stmt that references memory. Expected to be of the form
258 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
259 2. BSI: block_stmt_iterator where new stmts can be added.
260 3. OFFSET (optional): an offset to be added to the initial address accessed
261 by the data-ref in STMT.
262 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
263 pointing to the initial address.
265 Output:
266 1. Declare a new ptr to vector_type, and have it point to the base of the
267 data reference (initial addressed accessed by the data reference).
268 For example, for vector of type V8HI, the following code is generated:
270 v8hi *vp;
271 vp = (v8hi *)initial_address;
273 if OFFSET is not supplied:
274 initial_address = &a[init];
275 if OFFSET is supplied:
276 initial_address = &a[init + OFFSET];
278 Return the initial_address in INITIAL_ADDRESS.
280 2. Create a data-reference in the loop based on the new vector pointer vp,
281 and using a new index variable 'idx' as follows:
283 vp' = vp + update
285 where if ONLY_INIT is true:
286 update = zero
287 and otherwise
288 update = idx + vector_type_size
290 Return the pointer vp'.
293 FORNOW: handle only aligned and consecutive accesses. */
295 static tree
296 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
297 tree *initial_address, bool only_init)
299 tree base_name;
300 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
301 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
304 tree vect_ptr_type;
305 tree vect_ptr;
306 tree tag;
307 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
308 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
309 vuse_optype vuses = STMT_VUSE_OPS (stmt);
310 int nvuses, nv_may_defs, nv_must_defs;
311 int i;
312 tree new_temp;
313 tree vec_stmt;
314 tree new_stmt_list = NULL_TREE;
315 tree idx;
316 edge pe = loop_preheader_edge (loop);
317 basic_block new_bb;
318 tree vect_ptr_init;
319 tree vectype_size;
320 tree ptr_update;
321 tree data_ref_ptr;
322 tree type, tmp, size;
324 base_name = build_fold_indirect_ref (unshare_expr (
325 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info)));
327 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
329 tree data_ref_base = base_name;
330 fprintf (vect_dump, "create array_ref of type: ");
331 print_generic_expr (vect_dump, vectype, TDF_SLIM);
332 if (TREE_CODE (data_ref_base) == VAR_DECL)
333 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
334 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
335 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
336 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
337 fprintf (vect_dump, " vectorizing a record based array ref: ");
338 else if (TREE_CODE (data_ref_base) == SSA_NAME)
339 fprintf (vect_dump, " vectorizing a pointer ref: ");
340 print_generic_expr (vect_dump, base_name, TDF_SLIM);
343 /** (1) Create the new vector-pointer variable: **/
345 vect_ptr_type = build_pointer_type (vectype);
346 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
347 get_name (base_name));
348 add_referenced_tmp_var (vect_ptr);
351 /** (2) Handle aliasing information of the new vector-pointer: **/
353 tag = STMT_VINFO_MEMTAG (stmt_info);
354 gcc_assert (tag);
355 get_var_ann (vect_ptr)->type_mem_tag = tag;
357 /* Mark for renaming all aliased variables
358 (i.e, the may-aliases of the type-mem-tag). */
359 nvuses = NUM_VUSES (vuses);
360 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
361 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
362 for (i = 0; i < nvuses; i++)
364 tree use = VUSE_OP (vuses, i);
365 if (TREE_CODE (use) == SSA_NAME)
366 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
368 for (i = 0; i < nv_may_defs; i++)
370 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
371 if (TREE_CODE (def) == SSA_NAME)
372 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
374 for (i = 0; i < nv_must_defs; i++)
376 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
377 if (TREE_CODE (def) == SSA_NAME)
378 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
382 /** (3) Calculate the initial address the vector-pointer, and set
383 the vector-pointer to point to it before the loop: **/
385 /* Create: (&(base[init_val+offset]) in the loop preheader. */
386 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
387 offset);
388 pe = loop_preheader_edge (loop);
389 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
390 gcc_assert (!new_bb);
391 *initial_address = new_temp;
393 /* Create: p = (vectype *) initial_base */
394 vec_stmt = fold_convert (vect_ptr_type, new_temp);
395 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
396 new_temp = make_ssa_name (vect_ptr, vec_stmt);
397 TREE_OPERAND (vec_stmt, 0) = new_temp;
398 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
399 gcc_assert (!new_bb);
400 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
403 /** (4) Handle the updating of the vector-pointer inside the loop: **/
405 if (only_init) /* No update in loop is required. */
406 return vect_ptr_init;
408 idx = vect_create_index_for_vector_ref (loop_vinfo);
410 /* Create: update = idx * vectype_size */
411 tmp = create_tmp_var (integer_type_node, "update");
412 add_referenced_tmp_var (tmp);
413 size = TYPE_SIZE (vect_ptr_type);
414 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
415 ptr_update = create_tmp_var (type, "update");
416 add_referenced_tmp_var (ptr_update);
417 vectype_size = TYPE_SIZE_UNIT (vectype);
418 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
419 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
420 new_temp = make_ssa_name (tmp, vec_stmt);
421 TREE_OPERAND (vec_stmt, 0) = new_temp;
422 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
423 vec_stmt = fold_convert (type, new_temp);
424 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
425 new_temp = make_ssa_name (ptr_update, vec_stmt);
426 TREE_OPERAND (vec_stmt, 0) = new_temp;
427 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
429 /* Create: data_ref_ptr = vect_ptr_init + update */
430 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
431 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
432 new_temp = make_ssa_name (vect_ptr, vec_stmt);
433 TREE_OPERAND (vec_stmt, 0) = new_temp;
434 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
435 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
437 return data_ref_ptr;
441 /* Function vect_create_destination_var.
443 Create a new temporary of type VECTYPE. */
445 static tree
446 vect_create_destination_var (tree scalar_dest, tree vectype)
448 tree vec_dest;
449 const char *new_name;
451 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
453 new_name = get_name (scalar_dest);
454 if (!new_name)
455 new_name = "var_";
456 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
457 add_referenced_tmp_var (vec_dest);
459 return vec_dest;
463 /* Function vect_init_vector.
465 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
466 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
467 used in the vectorization of STMT. */
469 static tree
470 vect_init_vector (tree stmt, tree vector_var)
472 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
473 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
474 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
475 tree new_var;
476 tree init_stmt;
477 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
478 tree vec_oprnd;
479 edge pe;
480 tree new_temp;
481 basic_block new_bb;
483 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
484 add_referenced_tmp_var (new_var);
486 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
487 new_temp = make_ssa_name (new_var, init_stmt);
488 TREE_OPERAND (init_stmt, 0) = new_temp;
490 pe = loop_preheader_edge (loop);
491 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
492 gcc_assert (!new_bb);
494 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
496 fprintf (vect_dump, "created new init_stmt: ");
497 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
500 vec_oprnd = TREE_OPERAND (init_stmt, 0);
501 return vec_oprnd;
505 /* Function vect_get_vec_def_for_operand.
507 OP is an operand in STMT. This function returns a (vector) def that will be
508 used in the vectorized stmt for STMT.
510 In the case that OP is an SSA_NAME which is defined in the loop, then
511 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
513 In case OP is an invariant or constant, a new stmt that creates a vector def
514 needs to be introduced. */
516 static tree
517 vect_get_vec_def_for_operand (tree op, tree stmt)
519 tree vec_oprnd;
520 tree vec_stmt;
521 tree def_stmt;
522 stmt_vec_info def_stmt_info = NULL;
523 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
524 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
525 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
526 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
527 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
528 basic_block bb;
529 tree vec_inv;
530 tree t = NULL_TREE;
531 tree def;
532 int i;
534 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
536 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
537 print_generic_expr (vect_dump, op, TDF_SLIM);
540 /** ===> Case 1: operand is a constant. **/
542 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
544 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
546 tree vec_cst;
548 /* Build a tree with vector elements. */
549 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
550 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
552 for (i = nunits - 1; i >= 0; --i)
554 t = tree_cons (NULL_TREE, op, t);
556 vec_cst = build_vector (vectype, t);
557 return vect_init_vector (stmt, vec_cst);
560 gcc_assert (TREE_CODE (op) == SSA_NAME);
562 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
564 def_stmt = SSA_NAME_DEF_STMT (op);
565 def_stmt_info = vinfo_for_stmt (def_stmt);
567 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
569 fprintf (vect_dump, "vect_get_vec_def_for_operand: def_stmt: ");
570 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
574 /** ==> Case 2.1: operand is defined inside the loop. **/
576 if (def_stmt_info)
578 /* Get the def from the vectorized stmt. */
580 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
581 gcc_assert (vec_stmt);
582 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
583 return vec_oprnd;
587 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
588 it is a reduction/induction. **/
590 bb = bb_for_stmt (def_stmt);
591 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
593 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
594 fprintf (vect_dump, "reduction/induction - unsupported.");
595 internal_error ("no support for reduction/induction"); /* FORNOW */
599 /** ==> Case 2.3: operand is defined outside the loop -
600 it is a loop invariant. */
602 switch (TREE_CODE (def_stmt))
604 case PHI_NODE:
605 def = PHI_RESULT (def_stmt);
606 break;
607 case MODIFY_EXPR:
608 def = TREE_OPERAND (def_stmt, 0);
609 break;
610 case NOP_EXPR:
611 def = TREE_OPERAND (def_stmt, 0);
612 gcc_assert (IS_EMPTY_STMT (def_stmt));
613 def = op;
614 break;
615 default:
616 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
618 fprintf (vect_dump, "unsupported defining stmt: ");
619 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
621 internal_error ("unsupported defining stmt");
624 /* Build a tree with vector elements.
625 Create 'vec_inv = {inv,inv,..,inv}' */
627 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
628 fprintf (vect_dump, "Create vector_inv.");
630 for (i = nunits - 1; i >= 0; --i)
632 t = tree_cons (NULL_TREE, def, t);
635 vec_inv = build_constructor (vectype, t);
636 return vect_init_vector (stmt, vec_inv);
640 /* Function vect_finish_stmt_generation.
642 Insert a new stmt. */
644 static void
645 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
647 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
649 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
651 fprintf (vect_dump, "add new stmt: ");
652 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
655 #ifdef ENABLE_CHECKING
656 /* Make sure bsi points to the stmt that is being vectorized. */
657 gcc_assert (stmt == bsi_stmt (*bsi));
658 #endif
660 #ifdef USE_MAPPED_LOCATION
661 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
662 #else
663 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
664 #endif
668 /* Function vectorizable_assignment.
670 Check if STMT performs an assignment (copy) that can be vectorized.
671 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
672 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
673 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
675 bool
676 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
678 tree vec_dest;
679 tree scalar_dest;
680 tree op;
681 tree vec_oprnd;
682 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
683 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
684 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
685 tree new_temp;
687 /* Is vectorizable assignment? */
689 if (TREE_CODE (stmt) != MODIFY_EXPR)
690 return false;
692 scalar_dest = TREE_OPERAND (stmt, 0);
693 if (TREE_CODE (scalar_dest) != SSA_NAME)
694 return false;
696 op = TREE_OPERAND (stmt, 1);
697 if (!vect_is_simple_use (op, loop_vinfo, NULL))
699 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
700 fprintf (vect_dump, "use not simple.");
701 return false;
704 if (!vec_stmt) /* transformation not required. */
706 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
707 return true;
710 /** Transform. **/
711 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
712 fprintf (vect_dump, "transform assignment.");
714 /* Handle def. */
715 vec_dest = vect_create_destination_var (scalar_dest, vectype);
717 /* Handle use. */
718 op = TREE_OPERAND (stmt, 1);
719 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
721 /* Arguments are ready. create the new vector stmt. */
722 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
723 new_temp = make_ssa_name (vec_dest, *vec_stmt);
724 TREE_OPERAND (*vec_stmt, 0) = new_temp;
725 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
727 return true;
731 /* Function vectorizable_operation.
733 Check if STMT performs a binary or unary operation that can be vectorized.
734 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
735 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
736 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
738 bool
739 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
741 tree vec_dest;
742 tree scalar_dest;
743 tree operation;
744 tree op0, op1 = NULL;
745 tree vec_oprnd0, vec_oprnd1=NULL;
746 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
747 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
748 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
749 int i;
750 enum tree_code code;
751 enum machine_mode vec_mode;
752 tree new_temp;
753 int op_type;
754 tree op;
755 optab optab;
757 /* Is STMT a vectorizable binary/unary operation? */
758 if (TREE_CODE (stmt) != MODIFY_EXPR)
759 return false;
761 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
762 return false;
764 operation = TREE_OPERAND (stmt, 1);
765 code = TREE_CODE (operation);
766 optab = optab_for_tree_code (code, vectype);
768 /* Support only unary or binary operations. */
769 op_type = TREE_CODE_LENGTH (code);
770 if (op_type != unary_op && op_type != binary_op)
772 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
773 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
774 return false;
777 for (i = 0; i < op_type; i++)
779 op = TREE_OPERAND (operation, i);
780 if (!vect_is_simple_use (op, loop_vinfo, NULL))
782 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
783 fprintf (vect_dump, "use not simple.");
784 return false;
788 /* Supportable by target? */
789 if (!optab)
791 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
792 fprintf (vect_dump, "no optab.");
793 return false;
795 vec_mode = TYPE_MODE (vectype);
796 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
798 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
799 fprintf (vect_dump, "op not supported by target.");
800 return false;
803 if (!vec_stmt) /* transformation not required. */
805 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
806 return true;
809 /** Transform. **/
811 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
812 fprintf (vect_dump, "transform binary/unary operation.");
814 /* Handle def. */
815 scalar_dest = TREE_OPERAND (stmt, 0);
816 vec_dest = vect_create_destination_var (scalar_dest, vectype);
818 /* Handle uses. */
819 op0 = TREE_OPERAND (operation, 0);
820 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
822 if (op_type == binary_op)
824 op1 = TREE_OPERAND (operation, 1);
825 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
828 /* Arguments are ready. create the new vector stmt. */
830 if (op_type == binary_op)
831 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
832 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
833 else
834 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
835 build1 (code, vectype, vec_oprnd0));
836 new_temp = make_ssa_name (vec_dest, *vec_stmt);
837 TREE_OPERAND (*vec_stmt, 0) = new_temp;
838 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
840 return true;
844 /* Function vectorizable_store.
846 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
847 can be vectorized.
848 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
849 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
850 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
852 bool
853 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
855 tree scalar_dest;
856 tree data_ref;
857 tree op;
858 tree vec_oprnd1;
859 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
860 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
861 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
862 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
863 enum machine_mode vec_mode;
864 tree dummy;
865 enum dr_alignment_support alignment_support_cheme;
867 /* Is vectorizable store? */
869 if (TREE_CODE (stmt) != MODIFY_EXPR)
870 return false;
872 scalar_dest = TREE_OPERAND (stmt, 0);
873 if (TREE_CODE (scalar_dest) != ARRAY_REF
874 && TREE_CODE (scalar_dest) != INDIRECT_REF)
875 return false;
877 op = TREE_OPERAND (stmt, 1);
878 if (!vect_is_simple_use (op, loop_vinfo, NULL))
880 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
881 fprintf (vect_dump, "use not simple.");
882 return false;
885 vec_mode = TYPE_MODE (vectype);
886 /* FORNOW. In some cases can vectorize even if data-type not supported
887 (e.g. - array initialization with 0). */
888 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
889 return false;
891 if (!STMT_VINFO_DATA_REF (stmt_info))
892 return false;
895 if (!vec_stmt) /* transformation not required. */
897 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
898 return true;
901 /** Transform. **/
903 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
904 fprintf (vect_dump, "transform store");
906 alignment_support_cheme = vect_supportable_dr_alignment (dr);
907 gcc_assert (alignment_support_cheme);
908 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
910 /* Handle use - get the vectorized def from the defining stmt. */
911 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
913 /* Handle def. */
914 /* FORNOW: make sure the data reference is aligned. */
915 vect_align_data_ref (stmt);
916 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
917 data_ref = build_fold_indirect_ref (data_ref);
919 /* Arguments are ready. create the new vector stmt. */
920 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
921 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
923 return true;
927 /* vectorizable_load.
929 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
930 can be vectorized.
931 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
932 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
933 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
935 bool
936 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
938 tree scalar_dest;
939 tree vec_dest = NULL;
940 tree data_ref = NULL;
941 tree op;
942 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
943 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
944 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
945 tree new_temp;
946 int mode;
947 tree init_addr;
948 tree new_stmt;
949 tree dummy;
950 basic_block new_bb;
951 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
952 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
953 edge pe = loop_preheader_edge (loop);
954 enum dr_alignment_support alignment_support_cheme;
956 /* Is vectorizable load? */
958 if (TREE_CODE (stmt) != MODIFY_EXPR)
959 return false;
961 scalar_dest = TREE_OPERAND (stmt, 0);
962 if (TREE_CODE (scalar_dest) != SSA_NAME)
963 return false;
965 op = TREE_OPERAND (stmt, 1);
966 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
967 return false;
969 if (!STMT_VINFO_DATA_REF (stmt_info))
970 return false;
972 mode = (int) TYPE_MODE (vectype);
974 /* FORNOW. In some cases can vectorize even if data-type not supported
975 (e.g. - data copies). */
976 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
978 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
979 fprintf (vect_dump, "Aligned load, but unsupported type.");
980 return false;
983 if (!vec_stmt) /* transformation not required. */
985 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
986 return true;
989 /** Transform. **/
991 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
992 fprintf (vect_dump, "transform load.");
994 alignment_support_cheme = vect_supportable_dr_alignment (dr);
995 gcc_assert (alignment_support_cheme);
997 if (alignment_support_cheme == dr_aligned
998 || alignment_support_cheme == dr_unaligned_supported)
1000 /* Create:
1001 p = initial_addr;
1002 indx = 0;
1003 loop {
1004 vec_dest = *(p);
1005 indx = indx + 1;
1009 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1010 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1011 if (aligned_access_p (dr))
1012 data_ref = build_fold_indirect_ref (data_ref);
1013 else
1015 int mis = DR_MISALIGNMENT (dr);
1016 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1017 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1018 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1020 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1021 new_temp = make_ssa_name (vec_dest, new_stmt);
1022 TREE_OPERAND (new_stmt, 0) = new_temp;
1023 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1025 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1027 /* Create:
1028 p1 = initial_addr;
1029 msq_init = *(floor(p1))
1030 p2 = initial_addr + VS - 1;
1031 magic = have_builtin ? builtin_result : initial_address;
1032 indx = 0;
1033 loop {
1034 p2' = p2 + indx * vectype_size
1035 lsq = *(floor(p2'))
1036 vec_dest = realign_load (msq, lsq, magic)
1037 indx = indx + 1;
1038 msq = lsq;
1042 tree offset;
1043 tree magic;
1044 tree phi_stmt;
1045 tree msq_init;
1046 tree msq, lsq;
1047 tree dataref_ptr;
1048 tree params;
1050 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1051 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1052 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1053 &init_addr, true);
1054 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1055 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1056 new_temp = make_ssa_name (vec_dest, new_stmt);
1057 TREE_OPERAND (new_stmt, 0) = new_temp;
1058 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1059 gcc_assert (!new_bb);
1060 msq_init = TREE_OPERAND (new_stmt, 0);
1063 /* <2> Create lsq = *(floor(p2')) in the loop */
1064 offset = build_int_cst (integer_type_node,
1065 GET_MODE_NUNITS (TYPE_MODE (vectype)));
1066 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1067 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1068 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1069 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1070 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1071 new_temp = make_ssa_name (vec_dest, new_stmt);
1072 TREE_OPERAND (new_stmt, 0) = new_temp;
1073 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1074 lsq = TREE_OPERAND (new_stmt, 0);
1077 /* <3> */
1078 if (targetm.vectorize.builtin_mask_for_load)
1080 /* Create permutation mask, if required, in loop preheader. */
1081 tree builtin_decl;
1082 params = build_tree_list (NULL_TREE, init_addr);
1083 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1084 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1085 new_stmt = build_function_call_expr (builtin_decl, params);
1086 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1087 new_temp = make_ssa_name (vec_dest, new_stmt);
1088 TREE_OPERAND (new_stmt, 0) = new_temp;
1089 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1090 gcc_assert (!new_bb);
1091 magic = TREE_OPERAND (new_stmt, 0);
1093 /* Since we have just created a CALL_EXPR, we may need to
1094 rename call-clobbered variables. */
1095 mark_call_clobbered_vars_to_rename ();
1097 else
1099 /* Use current address instead of init_addr for reduced reg pressure.
1101 magic = dataref_ptr;
1105 /* <4> Create msq = phi <msq_init, lsq> in loop */
1106 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1107 msq = make_ssa_name (vec_dest, NULL_TREE);
1108 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1109 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1110 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1111 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1114 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1115 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1116 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1117 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1118 new_temp = make_ssa_name (vec_dest, new_stmt);
1119 TREE_OPERAND (new_stmt, 0) = new_temp;
1120 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1122 else
1123 gcc_unreachable ();
1125 *vec_stmt = new_stmt;
1126 return true;
1130 /* Function vect_transform_stmt.
1132 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1134 bool
1135 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
1137 bool is_store = false;
1138 tree vec_stmt = NULL_TREE;
1139 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1140 bool done;
1142 switch (STMT_VINFO_TYPE (stmt_info))
1144 case op_vec_info_type:
1145 done = vectorizable_operation (stmt, bsi, &vec_stmt);
1146 gcc_assert (done);
1147 break;
1149 case assignment_vec_info_type:
1150 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
1151 gcc_assert (done);
1152 break;
1154 case load_vec_info_type:
1155 done = vectorizable_load (stmt, bsi, &vec_stmt);
1156 gcc_assert (done);
1157 break;
1159 case store_vec_info_type:
1160 done = vectorizable_store (stmt, bsi, &vec_stmt);
1161 gcc_assert (done);
1162 is_store = true;
1163 break;
1164 default:
1165 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1166 fprintf (vect_dump, "stmt not supported.");
1167 gcc_unreachable ();
1170 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1172 return is_store;
1176 /* This function builds ni_name = number of iterations loop executes
1177 on the loop preheader. */
1179 static tree
1180 vect_build_loop_niters (loop_vec_info loop_vinfo)
1182 tree ni_name, stmt, var;
1183 edge pe;
1184 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1185 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1187 var = create_tmp_var (TREE_TYPE (ni), "niters");
1188 add_referenced_tmp_var (var);
1189 ni_name = force_gimple_operand (ni, &stmt, false, var);
1191 pe = loop_preheader_edge (loop);
1192 if (stmt)
1194 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1195 gcc_assert (!new_bb);
1198 return ni_name;
1202 /* This function generates the following statements:
1204 ni_name = number of iterations loop executes
1205 ratio = ni_name / vf
1206 ratio_mult_vf_name = ratio * vf
1208 and places them at the loop preheader edge. */
1210 static void
1211 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1212 tree *ni_name_ptr,
1213 tree *ratio_mult_vf_name_ptr,
1214 tree *ratio_name_ptr)
1217 edge pe;
1218 basic_block new_bb;
1219 tree stmt, ni_name;
1220 tree var;
1221 tree ratio_name;
1222 tree ratio_mult_vf_name;
1223 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1224 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1225 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1226 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
1228 pe = loop_preheader_edge (loop);
1230 /* Generate temporary variable that contains
1231 number of iterations loop executes. */
1233 ni_name = vect_build_loop_niters (loop_vinfo);
1235 /* Create: ratio = ni >> log2(vf) */
1237 var = create_tmp_var (TREE_TYPE (ni), "bnd");
1238 add_referenced_tmp_var (var);
1239 ratio_name = make_ssa_name (var, NULL_TREE);
1240 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
1241 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
1242 SSA_NAME_DEF_STMT (ratio_name) = stmt;
1244 pe = loop_preheader_edge (loop);
1245 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1246 gcc_assert (!new_bb);
1248 /* Create: ratio_mult_vf = ratio << log2 (vf). */
1250 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1251 add_referenced_tmp_var (var);
1252 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
1253 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
1254 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
1255 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
1257 pe = loop_preheader_edge (loop);
1258 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1259 gcc_assert (!new_bb);
1261 *ni_name_ptr = ni_name;
1262 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1263 *ratio_name_ptr = ratio_name;
1265 return;
1269 /* Function vect_update_ivs_after_vectorizer.
1271 "Advance" the induction variables of LOOP to the value they should take
1272 after the execution of LOOP. This is currently necessary because the
1273 vectorizer does not handle induction variables that are used after the
1274 loop. Such a situation occurs when the last iterations of LOOP are
1275 peeled, because:
1276 1. We introduced new uses after LOOP for IVs that were not originally used
1277 after LOOP: the IVs of LOOP are now used by an epilog loop.
1278 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1279 times, whereas the loop IVs should be bumped N times.
1281 Input:
1282 - LOOP - a loop that is going to be vectorized. The last few iterations
1283 of LOOP were peeled.
1284 - NITERS - the number of iterations that LOOP executes (before it is
1285 vectorized). i.e, the number of times the ivs should be bumped.
1286 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1287 coming out from LOOP on which there are uses of the LOOP ivs
1288 (this is the path from LOOP->exit to epilog_loop->preheader).
1290 The new definitions of the ivs are placed in LOOP->exit.
1291 The phi args associated with the edge UPDATE_E in the bb
1292 UPDATE_E->dest are updated accordingly.
1294 Assumption 1: Like the rest of the vectorizer, this function assumes
1295 a single loop exit that has a single predecessor.
1297 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1298 organized in the same order.
1300 Assumption 3: The access function of the ivs is simple enough (see
1301 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1303 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1304 coming out of LOOP on which the ivs of LOOP are used (this is the path
1305 that leads to the epilog loop; other paths skip the epilog loop). This
1306 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1307 needs to have its phis updated.
1310 static void
1311 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1312 edge update_e)
1314 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1315 basic_block exit_bb = loop->single_exit->dest;
1316 tree phi, phi1;
1317 basic_block update_bb = update_e->dest;
1319 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1321 /* Make sure there exists a single-predecessor exit bb: */
1322 gcc_assert (single_pred_p (exit_bb));
1324 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
1325 phi && phi1;
1326 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
1328 tree access_fn = NULL;
1329 tree evolution_part;
1330 tree init_expr;
1331 tree step_expr;
1332 tree var, stmt, ni, ni_name;
1333 block_stmt_iterator last_bsi;
1335 /* Skip virtual phi's. */
1336 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1338 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1339 fprintf (vect_dump, "virtual phi. skip.");
1340 continue;
1343 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1344 gcc_assert (access_fn);
1345 evolution_part =
1346 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
1347 gcc_assert (evolution_part != NULL_TREE);
1349 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1350 of degree >= 2 or exponential. */
1351 gcc_assert (!tree_is_chrec (evolution_part));
1353 step_expr = evolution_part;
1354 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1355 loop->num));
1357 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
1358 build2 (MULT_EXPR, TREE_TYPE (niters),
1359 niters, step_expr), init_expr);
1361 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
1362 add_referenced_tmp_var (var);
1364 ni_name = force_gimple_operand (ni, &stmt, false, var);
1366 /* Insert stmt into exit_bb. */
1367 last_bsi = bsi_last (exit_bb);
1368 if (stmt)
1369 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
1371 /* Fix phi expressions in the successor bb. */
1372 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
1373 PHI_ARG_DEF_FROM_EDGE (phi, single_succ_edge (loop->latch)));
1374 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
1379 /* Function vect_do_peeling_for_loop_bound
1381 Peel the last iterations of the loop represented by LOOP_VINFO.
1382 The peeled iterations form a new epilog loop. Given that the loop now
1383 iterates NITERS times, the new epilog loop iterates
1384 NITERS % VECTORIZATION_FACTOR times.
1386 The original loop will later be made to iterate
1387 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
1389 static void
1390 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1391 struct loops *loops)
1394 tree ni_name, ratio_mult_vf_name;
1395 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1396 struct loop *new_loop;
1397 edge update_e;
1398 basic_block preheader;
1399 #ifdef ENABLE_CHECKING
1400 int loop_num;
1401 #endif
1403 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1404 fprintf (vect_dump, "=== vect_transtorm_for_unknown_loop_bound ===");
1406 /* Generate the following variables on the preheader of original loop:
1408 ni_name = number of iteration the original loop executes
1409 ratio = ni_name / vf
1410 ratio_mult_vf_name = ratio * vf */
1411 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1412 &ratio_mult_vf_name, ratio);
1414 #ifdef ENABLE_CHECKING
1415 loop_num = loop->num;
1416 #endif
1417 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
1418 ratio_mult_vf_name, ni_name, false);
1419 #ifdef ENABLE_CHECKING
1420 gcc_assert (new_loop);
1421 gcc_assert (loop_num == loop->num);
1422 slpeel_verify_cfg_after_peeling (loop, new_loop);
1423 #endif
1425 /* A guard that controls whether the new_loop is to be executed or skipped
1426 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1427 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1428 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1429 is on the path where the LOOP IVs are used and need to be updated. */
1431 preheader = loop_preheader_edge (new_loop)->src;
1432 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
1433 update_e = EDGE_PRED (preheader, 0);
1434 else
1435 update_e = EDGE_PRED (preheader, 1);
1437 /* Update IVs of original loop as if they were advanced
1438 by ratio_mult_vf_name steps. */
1439 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1441 /* After peeling we have to reset scalar evolution analyzer. */
1442 scev_reset ();
1444 return;
1448 /* Function vect_gen_niters_for_prolog_loop
1450 Set the number of iterations for the loop represented by LOOP_VINFO
1451 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1452 and the misalignment of DR - the first data reference recorded in
1453 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
1454 this loop, the data reference DR will refer to an aligned location.
1456 The following computation is generated:
1458 compute address misalignment in bytes:
1459 addr_mis = addr & (vectype_size - 1)
1461 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
1463 (elem_size = element type size; an element is the scalar element
1464 whose type is the inner type of the vectype) */
1466 static tree
1467 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
1469 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1470 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1471 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1472 tree var, stmt;
1473 tree iters, iters_name;
1474 edge pe;
1475 basic_block new_bb;
1476 tree dr_stmt = DR_STMT (dr);
1477 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1478 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1479 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1480 tree elem_misalign;
1481 tree byte_misalign;
1482 tree new_stmts = NULL_TREE;
1483 tree start_addr =
1484 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
1485 tree ptr_type = TREE_TYPE (start_addr);
1486 tree size = TYPE_SIZE (ptr_type);
1487 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
1488 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
1489 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
1490 tree niters_type = TREE_TYPE (loop_niters);
1491 tree elem_size_log =
1492 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
1493 tree vf_tree = build_int_cst (unsigned_type_node, vf);
1495 pe = loop_preheader_edge (loop);
1496 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
1497 gcc_assert (!new_bb);
1499 /* Create: byte_misalign = addr & (vectype_size - 1) */
1500 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
1502 /* Create: elem_misalign = byte_misalign / element_size */
1503 elem_misalign =
1504 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
1506 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
1507 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
1508 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
1509 iters = fold_convert (niters_type, iters);
1511 /* Create: prolog_loop_niters = min (iters, loop_niters) */
1512 /* If the loop bound is known at compile time we already verified that it is
1513 greater than vf; since the misalignment ('iters') is at most vf, there's
1514 no need to generate the MIN_EXPR in this case. */
1515 if (TREE_CODE (loop_niters) != INTEGER_CST)
1516 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
1518 var = create_tmp_var (niters_type, "prolog_loop_niters");
1519 add_referenced_tmp_var (var);
1520 iters_name = force_gimple_operand (iters, &stmt, false, var);
1522 /* Insert stmt on loop preheader edge. */
1523 pe = loop_preheader_edge (loop);
1524 if (stmt)
1526 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1527 gcc_assert (!new_bb);
1530 return iters_name;
1534 /* Function vect_update_inits_of_dr
1536 NITERS iterations were peeled from LOOP. DR represents a data reference
1537 in LOOP. This function updates the information recorded in DR to
1538 account for the fact that the first NITERS iterations had already been
1539 executed. Specifically, it updates the OFFSET field of stmt_info. */
1541 static void
1542 vect_update_inits_of_dr (struct data_reference *dr, tree niters)
1544 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1545 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
1547 niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
1548 STMT_VINFO_VECT_STEP (stmt_info)));
1549 offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
1550 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
1554 /* Function vect_update_inits_of_drs
1556 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1557 This function updates the information recorded for the data references in
1558 the loop to account for the fact that the first NITERS iterations had
1559 already been executed. Specifically, it updates the initial_condition of the
1560 access_function of all the data_references in the loop. */
1562 static void
1563 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1565 unsigned int i;
1566 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1567 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1569 if (vect_dump && (dump_flags & TDF_DETAILS))
1570 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
1572 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1574 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1575 vect_update_inits_of_dr (dr, niters);
1578 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1580 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1581 vect_update_inits_of_dr (dr, niters);
1586 /* Function vect_do_peeling_for_alignment
1588 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1589 'niters' is set to the misalignment of one of the data references in the
1590 loop, thereby forcing it to refer to an aligned location at the beginning
1591 of the execution of this loop. The data reference for which we are
1592 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
1594 static void
1595 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
1597 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1598 tree niters_of_prolog_loop, ni_name;
1599 tree n_iters;
1600 struct loop *new_loop;
1602 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1603 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
1605 ni_name = vect_build_loop_niters (loop_vinfo);
1606 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
1608 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
1609 new_loop =
1610 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
1611 niters_of_prolog_loop, ni_name, true);
1612 #ifdef ENABLE_CHECKING
1613 gcc_assert (new_loop);
1614 slpeel_verify_cfg_after_peeling (new_loop, loop);
1615 #endif
1617 /* Update number of times loop executes. */
1618 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
1619 LOOP_VINFO_NITERS (loop_vinfo) =
1620 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
1622 /* Update the init conditions of the access functions of all data refs. */
1623 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
1625 /* After peeling we have to reset scalar evolution analyzer. */
1626 scev_reset ();
1628 return;
1632 /* Function vect_transform_loop.
1634 The analysis phase has determined that the loop is vectorizable.
1635 Vectorize the loop - created vectorized stmts to replace the scalar
1636 stmts in the loop, and update the loop exit condition. */
1638 void
1639 vect_transform_loop (loop_vec_info loop_vinfo,
1640 struct loops *loops ATTRIBUTE_UNUSED)
1642 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1643 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1644 int nbbs = loop->num_nodes;
1645 block_stmt_iterator si;
1646 int i;
1647 tree ratio = NULL;
1648 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1650 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1651 fprintf (vect_dump, "=== vec_transform_loop ===");
1654 /* Peel the loop if there are data refs with unknown alignment.
1655 Only one data ref with unknown store is allowed. */
1657 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
1658 vect_do_peeling_for_alignment (loop_vinfo, loops);
1660 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
1661 compile time constant), or it is a constant that doesn't divide by the
1662 vectorization factor, then an epilog loop needs to be created.
1663 We therefore duplicate the loop: the original loop will be vectorized,
1664 and will compute the first (n/VF) iterations. The second copy of the loop
1665 will remain scalar and will compute the remaining (n%VF) iterations.
1666 (VF is the vectorization factor). */
1668 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1669 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1670 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
1671 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
1672 else
1673 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
1674 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
1676 /* 1) Make sure the loop header has exactly two entries
1677 2) Make sure we have a preheader basic block. */
1679 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
1681 loop_split_edge_with (loop_preheader_edge (loop), NULL);
1684 /* FORNOW: the vectorizer supports only loops which body consist
1685 of one basic block (header + empty latch). When the vectorizer will
1686 support more involved loop forms, the order by which the BBs are
1687 traversed need to be reconsidered. */
1689 for (i = 0; i < nbbs; i++)
1691 basic_block bb = bbs[i];
1693 for (si = bsi_start (bb); !bsi_end_p (si);)
1695 tree stmt = bsi_stmt (si);
1696 stmt_vec_info stmt_info;
1697 bool is_store;
1699 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1701 fprintf (vect_dump, "------>vectorizing statement: ");
1702 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1704 stmt_info = vinfo_for_stmt (stmt);
1705 gcc_assert (stmt_info);
1706 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1708 bsi_next (&si);
1709 continue;
1711 #ifdef ENABLE_CHECKING
1712 /* FORNOW: Verify that all stmts operate on the same number of
1713 units and no inner unrolling is necessary. */
1714 gcc_assert
1715 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
1716 == vectorization_factor);
1717 #endif
1718 /* -------- vectorize statement ------------ */
1719 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1720 fprintf (vect_dump, "transform statement.");
1722 is_store = vect_transform_stmt (stmt, &si);
1723 if (is_store)
1725 /* free the attached stmt_vec_info and remove the stmt. */
1726 stmt_ann_t ann = stmt_ann (stmt);
1727 free (stmt_info);
1728 set_stmt_info (ann, NULL);
1729 bsi_remove (&si);
1730 continue;
1733 bsi_next (&si);
1734 } /* stmts in BB */
1735 } /* BBs in loop */
1737 slpeel_make_loop_iterate_ntimes (loop, ratio);
1739 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, LOOP_LOC (loop_vinfo)))
1740 fprintf (vect_dump, "LOOP VECTORIZED.");