* configure.ac (target_header_dir): vfork is a stub under djgpp.
[official-gcc.git] / gcc / tree-vect-transform.c
blob1935a738f71d3465dfe807398c996ddd79609d63
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "errors.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "timevar.h"
36 #include "cfgloop.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
45 #include "toplev.h"
47 /* Utility functions for the code transformation. */
48 static bool vect_transform_stmt (tree, block_stmt_iterator *);
49 static void vect_align_data_ref (tree);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, bool);
53 static tree vect_create_index_for_vector_ref (loop_vec_info);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree);
57 static tree vect_init_vector (tree, tree);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
61 /* Utility function dealing with loop peeling (not peeling itself). */
62 static void vect_generate_tmps_on_preheader
63 (loop_vec_info, tree *, tree *, tree *);
64 static tree vect_build_loop_niters (loop_vec_info);
65 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
66 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
67 static void vect_update_inits_of_dr (struct data_reference *, tree niters);
68 static void vect_update_inits_of_drs (loop_vec_info, tree);
69 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
70 static void vect_do_peeling_for_loop_bound
71 (loop_vec_info, tree *, struct loops *);
74 /* Function vect_get_new_vect_var.
76 Returns a name for a new variable. The current naming scheme appends the
77 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
78 the name of vectorizer generated variables, and appends that to NAME if
79 provided. */
81 static tree
82 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
84 const char *prefix;
85 int prefix_len;
86 tree new_vect_var;
88 if (var_kind == vect_simple_var)
89 prefix = "vect_";
90 else
91 prefix = "vect_p";
93 prefix_len = strlen (prefix);
95 if (name)
96 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
97 else
98 new_vect_var = create_tmp_var (type, prefix);
100 return new_vect_var;
104 /* Function vect_create_index_for_vector_ref.
106 Create (and return) an index variable, along with it's update chain in the
107 loop. This variable will be used to access a memory location in a vector
108 operation.
110 Input:
111 LOOP: The loop being vectorized.
112 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
113 function can be added here, or in the loop pre-header.
115 Output:
116 Return an index that will be used to index a vector array. It is expected
117 that a pointer to the first vector will be used as the base address for the
118 indexed reference.
120 FORNOW: we are not trying to be efficient, just creating a new index each
121 time from scratch. At this time all vector references could use the same
122 index.
124 TODO: create only one index to be used by all vector references. Record
125 the index in the LOOP_VINFO the first time this procedure is called and
126 return it on subsequent calls. The increment of this index must be placed
127 just before the conditional expression that ends the single block loop. */
129 static tree
130 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
132 tree init, step;
133 block_stmt_iterator incr_bsi;
134 bool insert_after;
135 tree indx_before_incr, indx_after_incr;
136 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
137 tree incr;
139 /* It is assumed that the base pointer used for vectorized access contains
140 the address of the first vector. Therefore the index used for vectorized
141 access must be initialized to zero and incremented by 1. */
143 init = integer_zero_node;
144 step = integer_one_node;
146 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
147 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
148 &indx_before_incr, &indx_after_incr);
149 incr = bsi_stmt (incr_bsi);
150 get_stmt_operands (incr);
151 set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
153 return indx_before_incr;
157 /* Function vect_create_addr_base_for_vector_ref.
159 Create an expression that computes the address of the first memory location
160 that will be accessed for a data reference.
162 Input:
163 STMT: The statement containing the data reference.
164 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
165 OFFSET: Optional. If supplied, it is be added to the initial address.
167 Output:
168 1. Return an SSA_NAME whose value is the address of the memory location of
169 the first vector of the data reference.
170 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
171 these statement(s) which define the returned SSA_NAME.
173 FORNOW: We are only handling array accesses with step 1. */
175 static tree
176 vect_create_addr_base_for_vector_ref (tree stmt,
177 tree *new_stmt_list,
178 tree offset)
180 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
181 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
182 tree data_ref_base =
183 unshare_expr (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
184 tree base_name = build_fold_indirect_ref (data_ref_base);
185 tree ref = DR_REF (dr);
186 tree scalar_type = TREE_TYPE (ref);
187 tree scalar_ptr_type = build_pointer_type (scalar_type);
188 tree vec_stmt;
189 tree new_temp;
190 tree addr_base, addr_expr;
191 tree dest, new_stmt;
192 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
194 /* Create base_offset */
195 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
196 add_referenced_tmp_var (dest);
197 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
198 append_to_statement_list_force (new_stmt, new_stmt_list);
200 if (offset)
202 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
203 add_referenced_tmp_var (tmp);
204 offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
205 STMT_VINFO_VECT_STEP (stmt_info)));
206 base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset),
207 base_offset, offset));
208 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
209 append_to_statement_list_force (new_stmt, new_stmt_list);
212 /* base + base_offset */
213 addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
214 base_offset));
216 /* addr_expr = addr_base */
217 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
218 get_name (base_name));
219 add_referenced_tmp_var (addr_expr);
220 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
221 new_temp = make_ssa_name (addr_expr, vec_stmt);
222 TREE_OPERAND (vec_stmt, 0) = new_temp;
223 append_to_statement_list_force (vec_stmt, new_stmt_list);
225 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
227 fprintf (vect_dump, "created ");
228 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
230 return new_temp;
234 /* Function vect_align_data_ref.
236 Handle mislignment of a memory accesses.
238 FORNOW: Can't handle misaligned accesses.
239 Make sure that the dataref is aligned. */
241 static void
242 vect_align_data_ref (tree stmt)
244 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
245 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
247 /* FORNOW: can't handle misaligned accesses;
248 all accesses expected to be aligned. */
249 gcc_assert (aligned_access_p (dr));
253 /* Function vect_create_data_ref_ptr.
255 Create a memory reference expression for vector access, to be used in a
256 vector load/store stmt. The reference is based on a new pointer to vector
257 type (vp).
259 Input:
260 1. STMT: a stmt that references memory. Expected to be of the form
261 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
262 2. BSI: block_stmt_iterator where new stmts can be added.
263 3. OFFSET (optional): an offset to be added to the initial address accessed
264 by the data-ref in STMT.
265 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
266 pointing to the initial address.
268 Output:
269 1. Declare a new ptr to vector_type, and have it point to the base of the
270 data reference (initial addressed accessed by the data reference).
271 For example, for vector of type V8HI, the following code is generated:
273 v8hi *vp;
274 vp = (v8hi *)initial_address;
276 if OFFSET is not supplied:
277 initial_address = &a[init];
278 if OFFSET is supplied:
279 initial_address = &a[init + OFFSET];
281 Return the initial_address in INITIAL_ADDRESS.
283 2. Create a data-reference in the loop based on the new vector pointer vp,
284 and using a new index variable 'idx' as follows:
286 vp' = vp + update
288 where if ONLY_INIT is true:
289 update = zero
290 and otherwise
291 update = idx + vector_type_size
293 Return the pointer vp'.
296 FORNOW: handle only aligned and consecutive accesses. */
298 static tree
299 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
300 tree *initial_address, bool only_init)
302 tree base_name;
303 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
304 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
305 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
306 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
307 tree vect_ptr_type;
308 tree vect_ptr;
309 tree tag;
310 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
311 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
312 vuse_optype vuses = STMT_VUSE_OPS (stmt);
313 int nvuses, nv_may_defs, nv_must_defs;
314 int i;
315 tree new_temp;
316 tree vec_stmt;
317 tree new_stmt_list = NULL_TREE;
318 tree idx;
319 edge pe = loop_preheader_edge (loop);
320 basic_block new_bb;
321 tree vect_ptr_init;
322 tree vectype_size;
323 tree ptr_update;
324 tree data_ref_ptr;
325 tree type, tmp, size;
327 base_name = build_fold_indirect_ref (unshare_expr (
328 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info)));
330 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
332 tree data_ref_base = base_name;
333 fprintf (vect_dump, "create array_ref of type: ");
334 print_generic_expr (vect_dump, vectype, TDF_SLIM);
335 if (TREE_CODE (data_ref_base) == VAR_DECL)
336 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
337 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
338 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
339 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
340 fprintf (vect_dump, " vectorizing a record based array ref: ");
341 else if (TREE_CODE (data_ref_base) == SSA_NAME)
342 fprintf (vect_dump, " vectorizing a pointer ref: ");
343 print_generic_expr (vect_dump, base_name, TDF_SLIM);
346 /** (1) Create the new vector-pointer variable: **/
348 vect_ptr_type = build_pointer_type (vectype);
349 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
350 get_name (base_name));
351 add_referenced_tmp_var (vect_ptr);
354 /** (2) Handle aliasing information of the new vector-pointer: **/
356 tag = STMT_VINFO_MEMTAG (stmt_info);
357 gcc_assert (tag);
358 get_var_ann (vect_ptr)->type_mem_tag = tag;
360 /* Mark for renaming all aliased variables
361 (i.e, the may-aliases of the type-mem-tag). */
362 nvuses = NUM_VUSES (vuses);
363 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
364 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
365 for (i = 0; i < nvuses; i++)
367 tree use = VUSE_OP (vuses, i);
368 if (TREE_CODE (use) == SSA_NAME)
369 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
371 for (i = 0; i < nv_may_defs; i++)
373 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
374 if (TREE_CODE (def) == SSA_NAME)
375 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
377 for (i = 0; i < nv_must_defs; i++)
379 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
380 if (TREE_CODE (def) == SSA_NAME)
381 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
385 /** (3) Calculate the initial address the vector-pointer, and set
386 the vector-pointer to point to it before the loop: **/
388 /* Create: (&(base[init_val+offset]) in the loop preheader. */
389 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
390 offset);
391 pe = loop_preheader_edge (loop);
392 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
393 gcc_assert (!new_bb);
394 *initial_address = new_temp;
396 /* Create: p = (vectype *) initial_base */
397 vec_stmt = fold_convert (vect_ptr_type, new_temp);
398 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
399 new_temp = make_ssa_name (vect_ptr, vec_stmt);
400 TREE_OPERAND (vec_stmt, 0) = new_temp;
401 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
402 gcc_assert (!new_bb);
403 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
406 /** (4) Handle the updating of the vector-pointer inside the loop: **/
408 if (only_init) /* No update in loop is required. */
409 return vect_ptr_init;
411 idx = vect_create_index_for_vector_ref (loop_vinfo);
413 /* Create: update = idx * vectype_size */
414 tmp = create_tmp_var (integer_type_node, "update");
415 add_referenced_tmp_var (tmp);
416 size = TYPE_SIZE (vect_ptr_type);
417 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
418 ptr_update = create_tmp_var (type, "update");
419 add_referenced_tmp_var (ptr_update);
420 vectype_size = TYPE_SIZE_UNIT (vectype);
421 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
422 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
423 new_temp = make_ssa_name (tmp, vec_stmt);
424 TREE_OPERAND (vec_stmt, 0) = new_temp;
425 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
426 vec_stmt = fold_convert (type, new_temp);
427 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
428 new_temp = make_ssa_name (ptr_update, vec_stmt);
429 TREE_OPERAND (vec_stmt, 0) = new_temp;
430 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
432 /* Create: data_ref_ptr = vect_ptr_init + update */
433 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
434 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
435 new_temp = make_ssa_name (vect_ptr, vec_stmt);
436 TREE_OPERAND (vec_stmt, 0) = new_temp;
437 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
438 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
440 return data_ref_ptr;
444 /* Function vect_create_destination_var.
446 Create a new temporary of type VECTYPE. */
448 static tree
449 vect_create_destination_var (tree scalar_dest, tree vectype)
451 tree vec_dest;
452 const char *new_name;
454 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
456 new_name = get_name (scalar_dest);
457 if (!new_name)
458 new_name = "var_";
459 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
460 add_referenced_tmp_var (vec_dest);
462 return vec_dest;
466 /* Function vect_init_vector.
468 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
469 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
470 used in the vectorization of STMT. */
472 static tree
473 vect_init_vector (tree stmt, tree vector_var)
475 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
476 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
477 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
478 tree new_var;
479 tree init_stmt;
480 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
481 tree vec_oprnd;
482 edge pe;
483 tree new_temp;
484 basic_block new_bb;
486 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
487 add_referenced_tmp_var (new_var);
489 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
490 new_temp = make_ssa_name (new_var, init_stmt);
491 TREE_OPERAND (init_stmt, 0) = new_temp;
493 pe = loop_preheader_edge (loop);
494 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
495 gcc_assert (!new_bb);
497 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
499 fprintf (vect_dump, "created new init_stmt: ");
500 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
503 vec_oprnd = TREE_OPERAND (init_stmt, 0);
504 return vec_oprnd;
508 /* Function vect_get_vec_def_for_operand.
510 OP is an operand in STMT. This function returns a (vector) def that will be
511 used in the vectorized stmt for STMT.
513 In the case that OP is an SSA_NAME which is defined in the loop, then
514 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
516 In case OP is an invariant or constant, a new stmt that creates a vector def
517 needs to be introduced. */
519 static tree
520 vect_get_vec_def_for_operand (tree op, tree stmt)
522 tree vec_oprnd;
523 tree vec_stmt;
524 tree def_stmt;
525 stmt_vec_info def_stmt_info = NULL;
526 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
527 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
528 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
529 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
530 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
531 basic_block bb;
532 tree vec_inv;
533 tree t = NULL_TREE;
534 tree def;
535 int i;
537 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
539 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
540 print_generic_expr (vect_dump, op, TDF_SLIM);
543 /** ===> Case 1: operand is a constant. **/
545 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
547 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
549 tree vec_cst;
551 /* Build a tree with vector elements. */
552 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
553 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
555 for (i = nunits - 1; i >= 0; --i)
557 t = tree_cons (NULL_TREE, op, t);
559 vec_cst = build_vector (vectype, t);
560 return vect_init_vector (stmt, vec_cst);
563 gcc_assert (TREE_CODE (op) == SSA_NAME);
565 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
567 def_stmt = SSA_NAME_DEF_STMT (op);
568 def_stmt_info = vinfo_for_stmt (def_stmt);
570 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
572 fprintf (vect_dump, "vect_get_vec_def_for_operand: def_stmt: ");
573 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
577 /** ==> Case 2.1: operand is defined inside the loop. **/
579 if (def_stmt_info)
581 /* Get the def from the vectorized stmt. */
583 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
584 gcc_assert (vec_stmt);
585 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
586 return vec_oprnd;
590 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
591 it is a reduction/induction. **/
593 bb = bb_for_stmt (def_stmt);
594 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
596 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
597 fprintf (vect_dump, "reduction/induction - unsupported.");
598 internal_error ("no support for reduction/induction"); /* FORNOW */
602 /** ==> Case 2.3: operand is defined outside the loop -
603 it is a loop invariant. */
605 switch (TREE_CODE (def_stmt))
607 case PHI_NODE:
608 def = PHI_RESULT (def_stmt);
609 break;
610 case MODIFY_EXPR:
611 def = TREE_OPERAND (def_stmt, 0);
612 break;
613 case NOP_EXPR:
614 def = TREE_OPERAND (def_stmt, 0);
615 gcc_assert (IS_EMPTY_STMT (def_stmt));
616 def = op;
617 break;
618 default:
619 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
621 fprintf (vect_dump, "unsupported defining stmt: ");
622 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
624 internal_error ("unsupported defining stmt");
627 /* Build a tree with vector elements.
628 Create 'vec_inv = {inv,inv,..,inv}' */
630 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
631 fprintf (vect_dump, "Create vector_inv.");
633 for (i = nunits - 1; i >= 0; --i)
635 t = tree_cons (NULL_TREE, def, t);
638 vec_inv = build_constructor (vectype, t);
639 return vect_init_vector (stmt, vec_inv);
643 /* Function vect_finish_stmt_generation.
645 Insert a new stmt. */
647 static void
648 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
650 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
652 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
654 fprintf (vect_dump, "add new stmt: ");
655 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
658 #ifdef ENABLE_CHECKING
659 /* Make sure bsi points to the stmt that is being vectorized. */
660 gcc_assert (stmt == bsi_stmt (*bsi));
661 #endif
663 #ifdef USE_MAPPED_LOCATION
664 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCUS (stmt));
665 #else
666 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
667 #endif
671 /* Function vectorizable_assignment.
673 Check if STMT performs an assignment (copy) that can be vectorized.
674 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
675 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
676 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
678 bool
679 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
681 tree vec_dest;
682 tree scalar_dest;
683 tree op;
684 tree vec_oprnd;
685 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
686 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
687 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
688 tree new_temp;
690 /* Is vectorizable assignment? */
692 if (TREE_CODE (stmt) != MODIFY_EXPR)
693 return false;
695 scalar_dest = TREE_OPERAND (stmt, 0);
696 if (TREE_CODE (scalar_dest) != SSA_NAME)
697 return false;
699 op = TREE_OPERAND (stmt, 1);
700 if (!vect_is_simple_use (op, loop_vinfo, NULL))
702 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
703 fprintf (vect_dump, "use not simple.");
704 return false;
707 if (!vec_stmt) /* transformation not required. */
709 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
710 return true;
713 /** Transform. **/
714 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
715 fprintf (vect_dump, "transform assignment.");
717 /* Handle def. */
718 vec_dest = vect_create_destination_var (scalar_dest, vectype);
720 /* Handle use. */
721 op = TREE_OPERAND (stmt, 1);
722 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
724 /* Arguments are ready. create the new vector stmt. */
725 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
726 new_temp = make_ssa_name (vec_dest, *vec_stmt);
727 TREE_OPERAND (*vec_stmt, 0) = new_temp;
728 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
730 return true;
734 /* Function vectorizable_operation.
736 Check if STMT performs a binary or unary operation that can be vectorized.
737 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
738 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
739 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
741 bool
742 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
744 tree vec_dest;
745 tree scalar_dest;
746 tree operation;
747 tree op0, op1 = NULL;
748 tree vec_oprnd0, vec_oprnd1=NULL;
749 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
750 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
751 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
752 int i;
753 enum tree_code code;
754 enum machine_mode vec_mode;
755 tree new_temp;
756 int op_type;
757 tree op;
758 optab optab;
760 /* Is STMT a vectorizable binary/unary operation? */
761 if (TREE_CODE (stmt) != MODIFY_EXPR)
762 return false;
764 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
765 return false;
767 operation = TREE_OPERAND (stmt, 1);
768 code = TREE_CODE (operation);
769 optab = optab_for_tree_code (code, vectype);
771 /* Support only unary or binary operations. */
772 op_type = TREE_CODE_LENGTH (code);
773 if (op_type != unary_op && op_type != binary_op)
775 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
776 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
777 return false;
780 for (i = 0; i < op_type; i++)
782 op = TREE_OPERAND (operation, i);
783 if (!vect_is_simple_use (op, loop_vinfo, NULL))
785 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
786 fprintf (vect_dump, "use not simple.");
787 return false;
791 /* Supportable by target? */
792 if (!optab)
794 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
795 fprintf (vect_dump, "no optab.");
796 return false;
798 vec_mode = TYPE_MODE (vectype);
799 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
801 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
802 fprintf (vect_dump, "op not supported by target.");
803 return false;
806 if (!vec_stmt) /* transformation not required. */
808 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
809 return true;
812 /** Transform. **/
814 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
815 fprintf (vect_dump, "transform binary/unary operation.");
817 /* Handle def. */
818 scalar_dest = TREE_OPERAND (stmt, 0);
819 vec_dest = vect_create_destination_var (scalar_dest, vectype);
821 /* Handle uses. */
822 op0 = TREE_OPERAND (operation, 0);
823 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
825 if (op_type == binary_op)
827 op1 = TREE_OPERAND (operation, 1);
828 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
831 /* Arguments are ready. create the new vector stmt. */
833 if (op_type == binary_op)
834 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
835 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
836 else
837 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
838 build1 (code, vectype, vec_oprnd0));
839 new_temp = make_ssa_name (vec_dest, *vec_stmt);
840 TREE_OPERAND (*vec_stmt, 0) = new_temp;
841 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
843 return true;
847 /* Function vectorizable_store.
849 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
850 can be vectorized.
851 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
852 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
853 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
855 bool
856 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
858 tree scalar_dest;
859 tree data_ref;
860 tree op;
861 tree vec_oprnd1;
862 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
863 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
864 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
865 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
866 enum machine_mode vec_mode;
867 tree dummy;
868 enum dr_alignment_support alignment_support_cheme;
870 /* Is vectorizable store? */
872 if (TREE_CODE (stmt) != MODIFY_EXPR)
873 return false;
875 scalar_dest = TREE_OPERAND (stmt, 0);
876 if (TREE_CODE (scalar_dest) != ARRAY_REF
877 && TREE_CODE (scalar_dest) != INDIRECT_REF)
878 return false;
880 op = TREE_OPERAND (stmt, 1);
881 if (!vect_is_simple_use (op, loop_vinfo, NULL))
883 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
884 fprintf (vect_dump, "use not simple.");
885 return false;
888 vec_mode = TYPE_MODE (vectype);
889 /* FORNOW. In some cases can vectorize even if data-type not supported
890 (e.g. - array initialization with 0). */
891 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
892 return false;
894 if (!STMT_VINFO_DATA_REF (stmt_info))
895 return false;
898 if (!vec_stmt) /* transformation not required. */
900 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
901 return true;
904 /** Transform. **/
906 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
907 fprintf (vect_dump, "transform store");
909 alignment_support_cheme = vect_supportable_dr_alignment (dr);
910 gcc_assert (alignment_support_cheme);
911 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
913 /* Handle use - get the vectorized def from the defining stmt. */
914 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
916 /* Handle def. */
917 /* FORNOW: make sure the data reference is aligned. */
918 vect_align_data_ref (stmt);
919 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
920 data_ref = build_fold_indirect_ref (data_ref);
922 /* Arguments are ready. create the new vector stmt. */
923 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
924 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
926 return true;
930 /* vectorizable_load.
932 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
933 can be vectorized.
934 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
935 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
936 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
938 bool
939 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
941 tree scalar_dest;
942 tree vec_dest = NULL;
943 tree data_ref = NULL;
944 tree op;
945 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
946 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
947 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
948 tree new_temp;
949 int mode;
950 tree init_addr;
951 tree new_stmt;
952 tree dummy;
953 basic_block new_bb;
954 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
955 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
956 edge pe = loop_preheader_edge (loop);
957 enum dr_alignment_support alignment_support_cheme;
959 /* Is vectorizable load? */
961 if (TREE_CODE (stmt) != MODIFY_EXPR)
962 return false;
964 scalar_dest = TREE_OPERAND (stmt, 0);
965 if (TREE_CODE (scalar_dest) != SSA_NAME)
966 return false;
968 op = TREE_OPERAND (stmt, 1);
969 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
970 return false;
972 if (!STMT_VINFO_DATA_REF (stmt_info))
973 return false;
975 mode = (int) TYPE_MODE (vectype);
977 /* FORNOW. In some cases can vectorize even if data-type not supported
978 (e.g. - data copies). */
979 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
981 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
982 fprintf (vect_dump, "Aligned load, but unsupported type.");
983 return false;
986 if (!vec_stmt) /* transformation not required. */
988 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
989 return true;
992 /** Transform. **/
994 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
995 fprintf (vect_dump, "transform load.");
997 alignment_support_cheme = vect_supportable_dr_alignment (dr);
998 gcc_assert (alignment_support_cheme);
1000 if (alignment_support_cheme == dr_aligned
1001 || alignment_support_cheme == dr_unaligned_supported)
1003 /* Create:
1004 p = initial_addr;
1005 indx = 0;
1006 loop {
1007 vec_dest = *(p);
1008 indx = indx + 1;
1012 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1013 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1014 if (aligned_access_p (dr))
1015 data_ref = build_fold_indirect_ref (data_ref);
1016 else
1018 int mis = DR_MISALIGNMENT (dr);
1019 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1020 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1021 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1023 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1024 new_temp = make_ssa_name (vec_dest, new_stmt);
1025 TREE_OPERAND (new_stmt, 0) = new_temp;
1026 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1028 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1030 /* Create:
1031 p1 = initial_addr;
1032 msq_init = *(floor(p1))
1033 p2 = initial_addr + VS - 1;
1034 magic = have_builtin ? builtin_result : initial_address;
1035 indx = 0;
1036 loop {
1037 p2' = p2 + indx * vectype_size
1038 lsq = *(floor(p2'))
1039 vec_dest = realign_load (msq, lsq, magic)
1040 indx = indx + 1;
1041 msq = lsq;
1045 tree offset;
1046 tree magic;
1047 tree phi_stmt;
1048 tree msq_init;
1049 tree msq, lsq;
1050 tree dataref_ptr;
1051 tree params;
1053 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1054 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1055 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1056 &init_addr, true);
1057 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1058 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1059 new_temp = make_ssa_name (vec_dest, new_stmt);
1060 TREE_OPERAND (new_stmt, 0) = new_temp;
1061 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1062 gcc_assert (!new_bb);
1063 msq_init = TREE_OPERAND (new_stmt, 0);
1066 /* <2> Create lsq = *(floor(p2')) in the loop */
1067 offset = build_int_cst (integer_type_node,
1068 GET_MODE_NUNITS (TYPE_MODE (vectype)));
1069 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1070 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1071 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1072 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1073 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1074 new_temp = make_ssa_name (vec_dest, new_stmt);
1075 TREE_OPERAND (new_stmt, 0) = new_temp;
1076 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1077 lsq = TREE_OPERAND (new_stmt, 0);
1080 /* <3> */
1081 if (targetm.vectorize.builtin_mask_for_load)
1083 /* Create permutation mask, if required, in loop preheader. */
1084 tree builtin_decl;
1085 params = build_tree_list (NULL_TREE, init_addr);
1086 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1087 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1088 new_stmt = build_function_call_expr (builtin_decl, params);
1089 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1090 new_temp = make_ssa_name (vec_dest, new_stmt);
1091 TREE_OPERAND (new_stmt, 0) = new_temp;
1092 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1093 gcc_assert (!new_bb);
1094 magic = TREE_OPERAND (new_stmt, 0);
1096 /* Since we have just created a CALL_EXPR, we may need to
1097 rename call-clobbered variables. */
1098 mark_call_clobbered_vars_to_rename ();
1100 else
1102 /* Use current address instead of init_addr for reduced reg pressure.
1104 magic = dataref_ptr;
1108 /* <4> Create msq = phi <msq_init, lsq> in loop */
1109 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1110 msq = make_ssa_name (vec_dest, NULL_TREE);
1111 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1112 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1113 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1114 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1117 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1118 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1119 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1120 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1121 new_temp = make_ssa_name (vec_dest, new_stmt);
1122 TREE_OPERAND (new_stmt, 0) = new_temp;
1123 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1125 else
1126 gcc_unreachable ();
1128 *vec_stmt = new_stmt;
1129 return true;
1133 /* Function vect_transform_stmt.
1135 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1137 bool
1138 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
1140 bool is_store = false;
1141 tree vec_stmt = NULL_TREE;
1142 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1143 bool done;
1145 switch (STMT_VINFO_TYPE (stmt_info))
1147 case op_vec_info_type:
1148 done = vectorizable_operation (stmt, bsi, &vec_stmt);
1149 gcc_assert (done);
1150 break;
1152 case assignment_vec_info_type:
1153 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
1154 gcc_assert (done);
1155 break;
1157 case load_vec_info_type:
1158 done = vectorizable_load (stmt, bsi, &vec_stmt);
1159 gcc_assert (done);
1160 break;
1162 case store_vec_info_type:
1163 done = vectorizable_store (stmt, bsi, &vec_stmt);
1164 gcc_assert (done);
1165 is_store = true;
1166 break;
1167 default:
1168 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1169 fprintf (vect_dump, "stmt not supported.");
1170 gcc_unreachable ();
1173 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1175 return is_store;
1179 /* This function builds ni_name = number of iterations loop executes
1180 on the loop preheader. */
1182 static tree
1183 vect_build_loop_niters (loop_vec_info loop_vinfo)
1185 tree ni_name, stmt, var;
1186 edge pe;
1187 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1188 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1190 var = create_tmp_var (TREE_TYPE (ni), "niters");
1191 add_referenced_tmp_var (var);
1192 ni_name = force_gimple_operand (ni, &stmt, false, var);
1194 pe = loop_preheader_edge (loop);
1195 if (stmt)
1197 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1198 gcc_assert (!new_bb);
1201 return ni_name;
1205 /* This function generates the following statements:
1207 ni_name = number of iterations loop executes
1208 ratio = ni_name / vf
1209 ratio_mult_vf_name = ratio * vf
1211 and places them at the loop preheader edge. */
1213 static void
1214 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1215 tree *ni_name_ptr,
1216 tree *ratio_mult_vf_name_ptr,
1217 tree *ratio_name_ptr)
1220 edge pe;
1221 basic_block new_bb;
1222 tree stmt, ni_name;
1223 tree var;
1224 tree ratio_name;
1225 tree ratio_mult_vf_name;
1226 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1227 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1228 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1229 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
1231 pe = loop_preheader_edge (loop);
1233 /* Generate temporary variable that contains
1234 number of iterations loop executes. */
1236 ni_name = vect_build_loop_niters (loop_vinfo);
1238 /* Create: ratio = ni >> log2(vf) */
1240 var = create_tmp_var (TREE_TYPE (ni), "bnd");
1241 add_referenced_tmp_var (var);
1242 ratio_name = make_ssa_name (var, NULL_TREE);
1243 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
1244 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
1245 SSA_NAME_DEF_STMT (ratio_name) = stmt;
1247 pe = loop_preheader_edge (loop);
1248 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1249 gcc_assert (!new_bb);
1251 /* Create: ratio_mult_vf = ratio << log2 (vf). */
1253 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1254 add_referenced_tmp_var (var);
1255 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
1256 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
1257 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
1258 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
1260 pe = loop_preheader_edge (loop);
1261 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1262 gcc_assert (!new_bb);
1264 *ni_name_ptr = ni_name;
1265 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1266 *ratio_name_ptr = ratio_name;
1268 return;
1272 /* Function vect_update_ivs_after_vectorizer.
1274 "Advance" the induction variables of LOOP to the value they should take
1275 after the execution of LOOP. This is currently necessary because the
1276 vectorizer does not handle induction variables that are used after the
1277 loop. Such a situation occurs when the last iterations of LOOP are
1278 peeled, because:
1279 1. We introduced new uses after LOOP for IVs that were not originally used
1280 after LOOP: the IVs of LOOP are now used by an epilog loop.
1281 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1282 times, whereas the loop IVs should be bumped N times.
1284 Input:
1285 - LOOP - a loop that is going to be vectorized. The last few iterations
1286 of LOOP were peeled.
1287 - NITERS - the number of iterations that LOOP executes (before it is
1288 vectorized). i.e, the number of times the ivs should be bumped.
1289 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1290 coming out from LOOP on which there are uses of the LOOP ivs
1291 (this is the path from LOOP->exit to epilog_loop->preheader).
1293 The new definitions of the ivs are placed in LOOP->exit.
1294 The phi args associated with the edge UPDATE_E in the bb
1295 UPDATE_E->dest are updated accordingly.
1297 Assumption 1: Like the rest of the vectorizer, this function assumes
1298 a single loop exit that has a single predecessor.
1300 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1301 organized in the same order.
1303 Assumption 3: The access function of the ivs is simple enough (see
1304 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1306 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1307 coming out of LOOP on which the ivs of LOOP are used (this is the path
1308 that leads to the epilog loop; other paths skip the epilog loop). This
1309 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1310 needs to have its phis updated.
1313 static void
1314 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1315 edge update_e)
1317 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1318 basic_block exit_bb = loop->single_exit->dest;
1319 tree phi, phi1;
1320 basic_block update_bb = update_e->dest;
1322 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1324 /* Make sure there exists a single-predecessor exit bb: */
1325 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
1327 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
1328 phi && phi1;
1329 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
1331 tree access_fn = NULL;
1332 tree evolution_part;
1333 tree init_expr;
1334 tree step_expr;
1335 tree var, stmt, ni, ni_name;
1336 block_stmt_iterator last_bsi;
1338 /* Skip virtual phi's. */
1339 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1341 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1342 fprintf (vect_dump, "virtual phi. skip.");
1343 continue;
1346 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1347 gcc_assert (access_fn);
1348 evolution_part =
1349 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
1350 gcc_assert (evolution_part != NULL_TREE);
1352 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1353 of degree >= 2 or exponential. */
1354 gcc_assert (!tree_is_chrec (evolution_part));
1356 step_expr = evolution_part;
1357 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1358 loop->num));
1360 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
1361 build2 (MULT_EXPR, TREE_TYPE (niters),
1362 niters, step_expr), init_expr);
1364 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
1365 add_referenced_tmp_var (var);
1367 ni_name = force_gimple_operand (ni, &stmt, false, var);
1369 /* Insert stmt into exit_bb. */
1370 last_bsi = bsi_last (exit_bb);
1371 if (stmt)
1372 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
1374 /* Fix phi expressions in the successor bb. */
1375 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
1376 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
1377 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
1382 /* Function vect_do_peeling_for_loop_bound
1384 Peel the last iterations of the loop represented by LOOP_VINFO.
1385 The peeled iterations form a new epilog loop. Given that the loop now
1386 iterates NITERS times, the new epilog loop iterates
1387 NITERS % VECTORIZATION_FACTOR times.
1389 The original loop will later be made to iterate
1390 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
1392 static void
1393 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1394 struct loops *loops)
1397 tree ni_name, ratio_mult_vf_name;
1398 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1399 struct loop *new_loop;
1400 edge update_e;
1401 basic_block preheader;
1402 #ifdef ENABLE_CHECKING
1403 int loop_num;
1404 #endif
1406 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1407 fprintf (vect_dump, "=== vect_transtorm_for_unknown_loop_bound ===");
1409 /* Generate the following variables on the preheader of original loop:
1411 ni_name = number of iteration the original loop executes
1412 ratio = ni_name / vf
1413 ratio_mult_vf_name = ratio * vf */
1414 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1415 &ratio_mult_vf_name, ratio);
1417 #ifdef ENABLE_CHECKING
1418 loop_num = loop->num;
1419 #endif
1420 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
1421 ratio_mult_vf_name, ni_name, false);
1422 #ifdef ENABLE_CHECKING
1423 gcc_assert (new_loop);
1424 gcc_assert (loop_num == loop->num);
1425 slpeel_verify_cfg_after_peeling (loop, new_loop);
1426 #endif
1428 /* A guard that controls whether the new_loop is to be executed or skipped
1429 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1430 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1431 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1432 is on the path where the LOOP IVs are used and need to be updated. */
1434 preheader = loop_preheader_edge (new_loop)->src;
1435 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
1436 update_e = EDGE_PRED (preheader, 0);
1437 else
1438 update_e = EDGE_PRED (preheader, 1);
1440 /* Update IVs of original loop as if they were advanced
1441 by ratio_mult_vf_name steps. */
1442 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1444 /* After peeling we have to reset scalar evolution analyzer. */
1445 scev_reset ();
1447 return;
1451 /* Function vect_gen_niters_for_prolog_loop
1453 Set the number of iterations for the loop represented by LOOP_VINFO
1454 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1455 and the misalignment of DR - the first data reference recorded in
1456 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
1457 this loop, the data reference DR will refer to an aligned location.
1459 The following computation is generated:
1461 compute address misalignment in bytes:
1462 addr_mis = addr & (vectype_size - 1)
1464 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
1466 (elem_size = element type size; an element is the scalar element
1467 whose type is the inner type of the vectype) */
1469 static tree
1470 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
1472 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1473 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1474 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1475 tree var, stmt;
1476 tree iters, iters_name;
1477 edge pe;
1478 basic_block new_bb;
1479 tree dr_stmt = DR_STMT (dr);
1480 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1481 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1482 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1483 tree elem_misalign;
1484 tree byte_misalign;
1485 tree new_stmts = NULL_TREE;
1486 tree start_addr =
1487 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
1488 tree ptr_type = TREE_TYPE (start_addr);
1489 tree size = TYPE_SIZE (ptr_type);
1490 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
1491 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
1492 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
1493 tree niters_type = TREE_TYPE (loop_niters);
1494 tree elem_size_log =
1495 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
1496 tree vf_tree = build_int_cst (unsigned_type_node, vf);
1498 pe = loop_preheader_edge (loop);
1499 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
1500 gcc_assert (!new_bb);
1502 /* Create: byte_misalign = addr & (vectype_size - 1) */
1503 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
1505 /* Create: elem_misalign = byte_misalign / element_size */
1506 elem_misalign =
1507 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
1509 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
1510 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
1511 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
1512 iters = fold_convert (niters_type, iters);
1514 /* Create: prolog_loop_niters = min (iters, loop_niters) */
1515 /* If the loop bound is known at compile time we already verified that it is
1516 greater than vf; since the misalignment ('iters') is at most vf, there's
1517 no need to generate the MIN_EXPR in this case. */
1518 if (TREE_CODE (loop_niters) != INTEGER_CST)
1519 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
1521 var = create_tmp_var (niters_type, "prolog_loop_niters");
1522 add_referenced_tmp_var (var);
1523 iters_name = force_gimple_operand (iters, &stmt, false, var);
1525 /* Insert stmt on loop preheader edge. */
1526 pe = loop_preheader_edge (loop);
1527 if (stmt)
1529 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1530 gcc_assert (!new_bb);
1533 return iters_name;
1537 /* Function vect_update_inits_of_dr
1539 NITERS iterations were peeled from LOOP. DR represents a data reference
1540 in LOOP. This function updates the information recorded in DR to
1541 account for the fact that the first NITERS iterations had already been
1542 executed. Specifically, it updates the OFFSET field of stmt_info. */
1544 static void
1545 vect_update_inits_of_dr (struct data_reference *dr, tree niters)
1547 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1548 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
1550 niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
1551 STMT_VINFO_VECT_STEP (stmt_info)));
1552 offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
1553 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
1557 /* Function vect_update_inits_of_drs
1559 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1560 This function updates the information recorded for the data references in
1561 the loop to account for the fact that the first NITERS iterations had
1562 already been executed. Specifically, it updates the initial_condition of the
1563 access_function of all the data_references in the loop. */
1565 static void
1566 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1568 unsigned int i;
1569 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1570 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1572 if (vect_dump && (dump_flags & TDF_DETAILS))
1573 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
1575 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1577 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1578 vect_update_inits_of_dr (dr, niters);
1581 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1583 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1584 vect_update_inits_of_dr (dr, niters);
1589 /* Function vect_do_peeling_for_alignment
1591 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1592 'niters' is set to the misalignment of one of the data references in the
1593 loop, thereby forcing it to refer to an aligned location at the beginning
1594 of the execution of this loop. The data reference for which we are
1595 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
1597 static void
1598 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
1600 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1601 tree niters_of_prolog_loop, ni_name;
1602 tree n_iters;
1603 struct loop *new_loop;
1605 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1606 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
1608 ni_name = vect_build_loop_niters (loop_vinfo);
1609 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
1611 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
1612 new_loop =
1613 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
1614 niters_of_prolog_loop, ni_name, true);
1615 #ifdef ENABLE_CHECKING
1616 gcc_assert (new_loop);
1617 slpeel_verify_cfg_after_peeling (new_loop, loop);
1618 #endif
1620 /* Update number of times loop executes. */
1621 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
1622 LOOP_VINFO_NITERS (loop_vinfo) =
1623 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
1625 /* Update the init conditions of the access functions of all data refs. */
1626 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
1628 /* After peeling we have to reset scalar evolution analyzer. */
1629 scev_reset ();
1631 return;
1635 /* Function vect_transform_loop.
1637 The analysis phase has determined that the loop is vectorizable.
1638 Vectorize the loop - created vectorized stmts to replace the scalar
1639 stmts in the loop, and update the loop exit condition. */
1641 void
1642 vect_transform_loop (loop_vec_info loop_vinfo,
1643 struct loops *loops ATTRIBUTE_UNUSED)
1645 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1646 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1647 int nbbs = loop->num_nodes;
1648 block_stmt_iterator si;
1649 int i;
1650 tree ratio = NULL;
1651 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1653 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1654 fprintf (vect_dump, "=== vec_transform_loop ===");
1657 /* Peel the loop if there are data refs with unknown alignment.
1658 Only one data ref with unknown store is allowed. */
1660 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
1661 vect_do_peeling_for_alignment (loop_vinfo, loops);
1663 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
1664 compile time constant), or it is a constant that doesn't divide by the
1665 vectorization factor, then an epilog loop needs to be created.
1666 We therefore duplicate the loop: the original loop will be vectorized,
1667 and will compute the first (n/VF) iterations. The second copy of the loop
1668 will remain scalar and will compute the remaining (n%VF) iterations.
1669 (VF is the vectorization factor). */
1671 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1672 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1673 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
1674 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
1675 else
1676 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
1677 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
1679 /* 1) Make sure the loop header has exactly two entries
1680 2) Make sure we have a preheader basic block. */
1682 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
1684 loop_split_edge_with (loop_preheader_edge (loop), NULL);
1687 /* FORNOW: the vectorizer supports only loops which body consist
1688 of one basic block (header + empty latch). When the vectorizer will
1689 support more involved loop forms, the order by which the BBs are
1690 traversed need to be reconsidered. */
1692 for (i = 0; i < nbbs; i++)
1694 basic_block bb = bbs[i];
1696 for (si = bsi_start (bb); !bsi_end_p (si);)
1698 tree stmt = bsi_stmt (si);
1699 stmt_vec_info stmt_info;
1700 bool is_store;
1702 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1704 fprintf (vect_dump, "------>vectorizing statement: ");
1705 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1707 stmt_info = vinfo_for_stmt (stmt);
1708 gcc_assert (stmt_info);
1709 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1711 bsi_next (&si);
1712 continue;
1714 #ifdef ENABLE_CHECKING
1715 /* FORNOW: Verify that all stmts operate on the same number of
1716 units and no inner unrolling is necessary. */
1717 gcc_assert
1718 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
1719 == vectorization_factor);
1720 #endif
1721 /* -------- vectorize statement ------------ */
1722 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1723 fprintf (vect_dump, "transform statement.");
1725 is_store = vect_transform_stmt (stmt, &si);
1726 if (is_store)
1728 /* free the attached stmt_vec_info and remove the stmt. */
1729 stmt_ann_t ann = stmt_ann (stmt);
1730 free (stmt_info);
1731 set_stmt_info (ann, NULL);
1732 bsi_remove (&si);
1733 continue;
1736 bsi_next (&si);
1737 } /* stmts in BB */
1738 } /* BBs in loop */
1740 slpeel_make_loop_iterate_ntimes (loop, ratio);
1742 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, LOOP_LOC (loop_vinfo)))
1743 fprintf (vect_dump, "LOOP VECTORIZED.");