os_dep.c: Add FreeBSD/PowerPC bits.
[official-gcc.git] / gcc / tree-vect-transform.c
blob116f01a5fe42312dcaa8a9597a72c42c42f9eb37
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "tree-data-ref.h"
39 #include "tree-chrec.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
43 #include "tree-pass.h"
44 #include "toplev.h"
46 /* Utility functions for the code transformation. */
47 static bool vect_transform_stmt (tree, block_stmt_iterator *);
48 static void vect_align_data_ref (tree);
49 static tree vect_create_destination_var (tree, tree);
50 static tree vect_create_data_ref_ptr
51 (tree, block_stmt_iterator *, tree, tree *, bool);
52 static tree vect_create_index_for_vector_ref (loop_vec_info);
53 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
54 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
55 static tree vect_get_vec_def_for_operand (tree, tree);
56 static tree vect_init_vector (tree, tree);
57 static void vect_finish_stmt_generation
58 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
59 static bool vect_is_simple_cond (tree, loop_vec_info);
60 static void update_vuses_to_preheader (tree, struct loop*);
62 /* Utility function dealing with loop peeling (not peeling itself). */
63 static void vect_generate_tmps_on_preheader
64 (loop_vec_info, tree *, tree *, tree *);
65 static tree vect_build_loop_niters (loop_vec_info);
66 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
67 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
68 static void vect_update_init_of_dr (struct data_reference *, tree niters);
69 static void vect_update_inits_of_drs (loop_vec_info, tree);
70 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
71 static void vect_do_peeling_for_loop_bound
72 (loop_vec_info, tree *, struct loops *);
75 /* Function vect_get_new_vect_var.
77 Returns a name for a new variable. The current naming scheme appends the
78 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
79 the name of vectorizer generated variables, and appends that to NAME if
80 provided. */
82 static tree
83 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
85 const char *prefix;
86 tree new_vect_var;
88 if (var_kind == vect_simple_var)
89 prefix = "vect_";
90 else
91 prefix = "vect_p";
93 if (name)
94 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
95 else
96 new_vect_var = create_tmp_var (type, prefix);
98 return new_vect_var;
102 /* Function vect_create_index_for_vector_ref.
104 Create (and return) an index variable, along with it's update chain in the
105 loop. This variable will be used to access a memory location in a vector
106 operation.
108 Input:
109 LOOP: The loop being vectorized.
110 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
111 function can be added here, or in the loop pre-header.
113 Output:
114 Return an index that will be used to index a vector array. It is expected
115 that a pointer to the first vector will be used as the base address for the
116 indexed reference.
118 FORNOW: we are not trying to be efficient, just creating a new index each
119 time from scratch. At this time all vector references could use the same
120 index.
122 TODO: create only one index to be used by all vector references. Record
123 the index in the LOOP_VINFO the first time this procedure is called and
124 return it on subsequent calls. The increment of this index must be placed
125 just before the conditional expression that ends the single block loop. */
127 static tree
128 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
130 tree init, step;
131 block_stmt_iterator incr_bsi;
132 bool insert_after;
133 tree indx_before_incr, indx_after_incr;
134 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
135 tree incr;
137 /* It is assumed that the base pointer used for vectorized access contains
138 the address of the first vector. Therefore the index used for vectorized
139 access must be initialized to zero and incremented by 1. */
141 init = integer_zero_node;
142 step = integer_one_node;
144 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
145 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
146 &indx_before_incr, &indx_after_incr);
147 incr = bsi_stmt (incr_bsi);
148 set_stmt_info ((tree_ann_t)stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
150 return indx_before_incr;
154 /* Function vect_create_addr_base_for_vector_ref.
156 Create an expression that computes the address of the first memory location
157 that will be accessed for a data reference.
159 Input:
160 STMT: The statement containing the data reference.
161 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
162 OFFSET: Optional. If supplied, it is be added to the initial address.
164 Output:
165 1. Return an SSA_NAME whose value is the address of the memory location of
166 the first vector of the data reference.
167 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
168 these statement(s) which define the returned SSA_NAME.
170 FORNOW: We are only handling array accesses with step 1. */
172 static tree
173 vect_create_addr_base_for_vector_ref (tree stmt,
174 tree *new_stmt_list,
175 tree offset)
177 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
178 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
179 tree data_ref_base =
180 unshare_expr (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
181 tree base_name = build_fold_indirect_ref (data_ref_base);
182 tree ref = DR_REF (dr);
183 tree scalar_type = TREE_TYPE (ref);
184 tree scalar_ptr_type = build_pointer_type (scalar_type);
185 tree vec_stmt;
186 tree new_temp;
187 tree addr_base, addr_expr;
188 tree dest, new_stmt;
189 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
191 /* Create base_offset */
192 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
193 add_referenced_tmp_var (dest);
194 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
195 append_to_statement_list_force (new_stmt, new_stmt_list);
197 if (offset)
199 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
200 add_referenced_tmp_var (tmp);
201 offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
202 STMT_VINFO_VECT_STEP (stmt_info)));
203 base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset),
204 base_offset, offset));
205 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
206 append_to_statement_list_force (new_stmt, new_stmt_list);
209 /* base + base_offset */
210 addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
211 base_offset));
213 /* addr_expr = addr_base */
214 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
215 get_name (base_name));
216 add_referenced_tmp_var (addr_expr);
217 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
218 new_temp = make_ssa_name (addr_expr, vec_stmt);
219 TREE_OPERAND (vec_stmt, 0) = new_temp;
220 append_to_statement_list_force (vec_stmt, new_stmt_list);
222 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
224 fprintf (vect_dump, "created ");
225 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
227 return new_temp;
231 /* Function vect_align_data_ref.
233 Handle misalignment of a memory accesses.
235 FORNOW: Can't handle misaligned accesses.
236 Make sure that the dataref is aligned. */
238 static void
239 vect_align_data_ref (tree stmt)
241 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
242 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
244 /* FORNOW: can't handle misaligned accesses;
245 all accesses expected to be aligned. */
246 gcc_assert (aligned_access_p (dr));
250 /* Function vect_create_data_ref_ptr.
252 Create a memory reference expression for vector access, to be used in a
253 vector load/store stmt. The reference is based on a new pointer to vector
254 type (vp).
256 Input:
257 1. STMT: a stmt that references memory. Expected to be of the form
258 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
259 2. BSI: block_stmt_iterator where new stmts can be added.
260 3. OFFSET (optional): an offset to be added to the initial address accessed
261 by the data-ref in STMT.
262 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
263 pointing to the initial address.
265 Output:
266 1. Declare a new ptr to vector_type, and have it point to the base of the
267 data reference (initial addressed accessed by the data reference).
268 For example, for vector of type V8HI, the following code is generated:
270 v8hi *vp;
271 vp = (v8hi *)initial_address;
273 if OFFSET is not supplied:
274 initial_address = &a[init];
275 if OFFSET is supplied:
276 initial_address = &a[init + OFFSET];
278 Return the initial_address in INITIAL_ADDRESS.
280 2. Create a data-reference in the loop based on the new vector pointer vp,
281 and using a new index variable 'idx' as follows:
283 vp' = vp + update
285 where if ONLY_INIT is true:
286 update = zero
287 and otherwise
288 update = idx + vector_type_size
290 Return the pointer vp'.
293 FORNOW: handle only aligned and consecutive accesses. */
295 static tree
296 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
297 tree *initial_address, bool only_init)
299 tree base_name;
300 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
301 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
304 tree vect_ptr_type;
305 tree vect_ptr;
306 tree tag;
307 tree new_temp;
308 tree vec_stmt;
309 tree new_stmt_list = NULL_TREE;
310 tree idx;
311 edge pe = loop_preheader_edge (loop);
312 basic_block new_bb;
313 tree vect_ptr_init;
314 tree vectype_size;
315 tree ptr_update;
316 tree data_ref_ptr;
317 tree type, tmp, size;
319 base_name = build_fold_indirect_ref (unshare_expr (
320 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info)));
322 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
324 tree data_ref_base = base_name;
325 fprintf (vect_dump, "create array_ref of type: ");
326 print_generic_expr (vect_dump, vectype, TDF_SLIM);
327 if (TREE_CODE (data_ref_base) == VAR_DECL)
328 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
329 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
330 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
331 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
332 fprintf (vect_dump, " vectorizing a record based array ref: ");
333 else if (TREE_CODE (data_ref_base) == SSA_NAME)
334 fprintf (vect_dump, " vectorizing a pointer ref: ");
335 print_generic_expr (vect_dump, base_name, TDF_SLIM);
338 /** (1) Create the new vector-pointer variable: **/
340 vect_ptr_type = build_pointer_type (vectype);
341 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
342 get_name (base_name));
343 add_referenced_tmp_var (vect_ptr);
346 /** (2) Add aliasing information to the new vector-pointer:
347 (The points-to info (SSA_NAME_PTR_INFO) may be defined later.) **/
349 tag = STMT_VINFO_MEMTAG (stmt_info);
350 gcc_assert (tag);
352 /* If tag is a variable (and NOT_A_TAG) than a new type alias
353 tag must be created with tag added to its may alias list. */
354 if (var_ann (tag)->mem_tag_kind == NOT_A_TAG)
355 new_type_alias (vect_ptr, tag);
356 else
357 var_ann (vect_ptr)->type_mem_tag = tag;
359 var_ann (vect_ptr)->subvars = STMT_VINFO_SUBVARS (stmt_info);
361 /** (3) Calculate the initial address the vector-pointer, and set
362 the vector-pointer to point to it before the loop: **/
364 /* Create: (&(base[init_val+offset]) in the loop preheader. */
365 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
366 offset);
367 pe = loop_preheader_edge (loop);
368 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
369 gcc_assert (!new_bb);
370 *initial_address = new_temp;
372 /* Create: p = (vectype *) initial_base */
373 vec_stmt = fold_convert (vect_ptr_type, new_temp);
374 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
375 new_temp = make_ssa_name (vect_ptr, vec_stmt);
376 TREE_OPERAND (vec_stmt, 0) = new_temp;
377 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
378 gcc_assert (!new_bb);
379 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
382 /** (4) Handle the updating of the vector-pointer inside the loop: **/
384 if (only_init) /* No update in loop is required. */
386 /* Copy the points-to information if it exists. */
387 if (STMT_VINFO_PTR_INFO (stmt_info))
388 duplicate_ssa_name_ptr_info (vect_ptr_init,
389 STMT_VINFO_PTR_INFO (stmt_info));
390 return vect_ptr_init;
393 idx = vect_create_index_for_vector_ref (loop_vinfo);
395 /* Create: update = idx * vectype_size */
396 tmp = create_tmp_var (integer_type_node, "update");
397 add_referenced_tmp_var (tmp);
398 size = TYPE_SIZE (vect_ptr_type);
399 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
400 ptr_update = create_tmp_var (type, "update");
401 add_referenced_tmp_var (ptr_update);
402 vectype_size = TYPE_SIZE_UNIT (vectype);
403 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
404 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
405 new_temp = make_ssa_name (tmp, vec_stmt);
406 TREE_OPERAND (vec_stmt, 0) = new_temp;
407 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
408 vec_stmt = fold_convert (type, new_temp);
409 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
410 new_temp = make_ssa_name (ptr_update, vec_stmt);
411 TREE_OPERAND (vec_stmt, 0) = new_temp;
412 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
414 /* Create: data_ref_ptr = vect_ptr_init + update */
415 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
416 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
417 new_temp = make_ssa_name (vect_ptr, vec_stmt);
418 TREE_OPERAND (vec_stmt, 0) = new_temp;
419 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
420 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
422 /* Copy the points-to information if it exists. */
423 if (STMT_VINFO_PTR_INFO (stmt_info))
424 duplicate_ssa_name_ptr_info (data_ref_ptr, STMT_VINFO_PTR_INFO (stmt_info));
425 return data_ref_ptr;
429 /* Function vect_create_destination_var.
431 Create a new temporary of type VECTYPE. */
433 static tree
434 vect_create_destination_var (tree scalar_dest, tree vectype)
436 tree vec_dest;
437 const char *new_name;
439 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
441 new_name = get_name (scalar_dest);
442 if (!new_name)
443 new_name = "var_";
444 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
445 add_referenced_tmp_var (vec_dest);
447 return vec_dest;
451 /* Function vect_init_vector.
453 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
454 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
455 used in the vectorization of STMT. */
457 static tree
458 vect_init_vector (tree stmt, tree vector_var)
460 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
461 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
462 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463 tree new_var;
464 tree init_stmt;
465 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
466 tree vec_oprnd;
467 edge pe;
468 tree new_temp;
469 basic_block new_bb;
471 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
472 add_referenced_tmp_var (new_var);
474 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
475 new_temp = make_ssa_name (new_var, init_stmt);
476 TREE_OPERAND (init_stmt, 0) = new_temp;
478 pe = loop_preheader_edge (loop);
479 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
480 gcc_assert (!new_bb);
482 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
484 fprintf (vect_dump, "created new init_stmt: ");
485 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
488 vec_oprnd = TREE_OPERAND (init_stmt, 0);
489 return vec_oprnd;
493 /* Function vect_get_vec_def_for_operand.
495 OP is an operand in STMT. This function returns a (vector) def that will be
496 used in the vectorized stmt for STMT.
498 In the case that OP is an SSA_NAME which is defined in the loop, then
499 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
501 In case OP is an invariant or constant, a new stmt that creates a vector def
502 needs to be introduced. */
504 static tree
505 vect_get_vec_def_for_operand (tree op, tree stmt)
507 tree vec_oprnd;
508 tree vec_stmt;
509 tree def_stmt;
510 stmt_vec_info def_stmt_info = NULL;
511 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
512 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
513 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
514 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
515 tree vec_inv;
516 tree vec_cst;
517 tree t = NULL_TREE;
518 tree def;
519 int i;
520 enum vect_def_type dt;
521 bool is_simple_use;
523 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
525 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
526 print_generic_expr (vect_dump, op, TDF_SLIM);
529 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
530 gcc_assert (is_simple_use);
531 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
533 if (def)
535 fprintf (vect_dump, "def = ");
536 print_generic_expr (vect_dump, def, TDF_SLIM);
538 if (def_stmt)
540 fprintf (vect_dump, " def_stmt = ");
541 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
545 /* FORNOW */
546 gcc_assert (dt != vect_reduction_def);
548 switch (dt)
550 /* Case 1: operand is a constant. */
551 case vect_constant_def:
553 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
554 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
555 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
557 for (i = nunits - 1; i >= 0; --i)
559 t = tree_cons (NULL_TREE, op, t);
561 vec_cst = build_vector (vectype, t);
562 return vect_init_vector (stmt, vec_cst);
565 /* Case 2: operand is defined outside the loop - loop invariant. */
566 case vect_invariant_def:
568 /* Create 'vec_inv = {inv,inv,..,inv}' */
569 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
570 fprintf (vect_dump, "Create vector_inv.");
572 for (i = nunits - 1; i >= 0; --i)
574 t = tree_cons (NULL_TREE, def, t);
577 vec_inv = build_constructor (vectype, t);
578 return vect_init_vector (stmt, vec_inv);
581 /* Case 3: operand is defined inside the loop. */
582 case vect_loop_def:
584 /* Get the def from the vectorized stmt. */
585 def_stmt_info = vinfo_for_stmt (def_stmt);
586 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
587 gcc_assert (vec_stmt);
588 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
589 return vec_oprnd;
592 /* Case 4: operand is defined by loop-header phi - induction. */
593 case vect_induction_def:
595 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
596 fprintf (vect_dump, "induction - unsupported.");
597 internal_error ("no support for induction"); /* FORNOW */
600 default:
601 gcc_unreachable ();
606 /* Function vect_finish_stmt_generation.
608 Insert a new stmt. */
610 static void
611 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
613 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
615 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
617 fprintf (vect_dump, "add new stmt: ");
618 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
621 #ifdef ENABLE_CHECKING
622 /* Make sure bsi points to the stmt that is being vectorized. */
623 gcc_assert (stmt == bsi_stmt (*bsi));
624 #endif
626 #ifdef USE_MAPPED_LOCATION
627 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
628 #else
629 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
630 #endif
634 /* Function vectorizable_assignment.
636 Check if STMT performs an assignment (copy) that can be vectorized.
637 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
638 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
639 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
641 bool
642 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
644 tree vec_dest;
645 tree scalar_dest;
646 tree op;
647 tree vec_oprnd;
648 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
649 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
650 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
651 tree new_temp;
652 tree def, def_stmt;
653 enum vect_def_type dt;
655 /* Is vectorizable assignment? */
656 if (!STMT_VINFO_RELEVANT_P (stmt_info))
657 return false;
659 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
661 if (TREE_CODE (stmt) != MODIFY_EXPR)
662 return false;
664 scalar_dest = TREE_OPERAND (stmt, 0);
665 if (TREE_CODE (scalar_dest) != SSA_NAME)
666 return false;
668 op = TREE_OPERAND (stmt, 1);
669 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
671 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
672 fprintf (vect_dump, "use not simple.");
673 return false;
676 if (!vec_stmt) /* transformation not required. */
678 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
679 return true;
682 /** Transform. **/
683 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
684 fprintf (vect_dump, "transform assignment.");
686 /* Handle def. */
687 vec_dest = vect_create_destination_var (scalar_dest, vectype);
689 /* Handle use. */
690 op = TREE_OPERAND (stmt, 1);
691 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
693 /* Arguments are ready. create the new vector stmt. */
694 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
695 new_temp = make_ssa_name (vec_dest, *vec_stmt);
696 TREE_OPERAND (*vec_stmt, 0) = new_temp;
697 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
699 return true;
703 /* Function vect_min_worthwhile_factor.
705 For a loop where we could vectorize the operation indicated by CODE,
706 return the minimum vectorization factor that makes it worthwhile
707 to use generic vectors. */
708 static int
709 vect_min_worthwhile_factor (enum tree_code code)
711 switch (code)
713 case PLUS_EXPR:
714 case MINUS_EXPR:
715 case NEGATE_EXPR:
716 return 4;
718 case BIT_AND_EXPR:
719 case BIT_IOR_EXPR:
720 case BIT_XOR_EXPR:
721 case BIT_NOT_EXPR:
722 return 2;
724 default:
725 return INT_MAX;
730 /* Function vectorizable_operation.
732 Check if STMT performs a binary or unary operation that can be vectorized.
733 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
734 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
735 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
737 bool
738 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
740 tree vec_dest;
741 tree scalar_dest;
742 tree operation;
743 tree op0, op1 = NULL;
744 tree vec_oprnd0, vec_oprnd1=NULL;
745 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
746 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
747 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
748 int i;
749 enum tree_code code;
750 enum machine_mode vec_mode;
751 tree new_temp;
752 int op_type;
753 tree op;
754 optab optab;
755 tree def, def_stmt;
756 enum vect_def_type dt;
758 /* Is STMT a vectorizable binary/unary operation? */
759 if (!STMT_VINFO_RELEVANT_P (stmt_info))
760 return false;
762 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
764 if (STMT_VINFO_LIVE_P (stmt_info))
766 /* FORNOW: not yet supported. */
767 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
768 fprintf (vect_dump, "value used after loop.");
769 return false;
772 if (TREE_CODE (stmt) != MODIFY_EXPR)
773 return false;
775 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
776 return false;
778 operation = TREE_OPERAND (stmt, 1);
779 code = TREE_CODE (operation);
780 optab = optab_for_tree_code (code, vectype);
782 /* Support only unary or binary operations. */
783 op_type = TREE_CODE_LENGTH (code);
784 if (op_type != unary_op && op_type != binary_op)
786 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
787 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
788 return false;
791 for (i = 0; i < op_type; i++)
793 op = TREE_OPERAND (operation, i);
794 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
796 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
797 fprintf (vect_dump, "use not simple.");
798 return false;
802 /* Supportable by target? */
803 if (!optab)
805 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
806 fprintf (vect_dump, "no optab.");
807 return false;
809 vec_mode = TYPE_MODE (vectype);
810 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
812 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
813 fprintf (vect_dump, "op not supported by target.");
814 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
815 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
816 < vect_min_worthwhile_factor (code))
817 return false;
818 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
819 fprintf (vect_dump, "proceeding using word mode.");
822 /* Worthwhile without SIMD support? */
823 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
824 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
825 < vect_min_worthwhile_factor (code))
827 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
828 fprintf (vect_dump, "not worthwhile without SIMD support.");
829 return false;
832 if (!vec_stmt) /* transformation not required. */
834 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
835 return true;
838 /** Transform. **/
840 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
841 fprintf (vect_dump, "transform binary/unary operation.");
843 /* Handle def. */
844 scalar_dest = TREE_OPERAND (stmt, 0);
845 vec_dest = vect_create_destination_var (scalar_dest, vectype);
847 /* Handle uses. */
848 op0 = TREE_OPERAND (operation, 0);
849 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
851 if (op_type == binary_op)
853 op1 = TREE_OPERAND (operation, 1);
854 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
857 /* Arguments are ready. create the new vector stmt. */
859 if (op_type == binary_op)
860 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
861 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
862 else
863 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
864 build1 (code, vectype, vec_oprnd0));
865 new_temp = make_ssa_name (vec_dest, *vec_stmt);
866 TREE_OPERAND (*vec_stmt, 0) = new_temp;
867 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
869 return true;
873 /* Function vectorizable_store.
875 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
876 can be vectorized.
877 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
878 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
879 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
881 bool
882 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
884 tree scalar_dest;
885 tree data_ref;
886 tree op;
887 tree vec_oprnd1;
888 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
889 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
890 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
891 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
892 enum machine_mode vec_mode;
893 tree dummy;
894 enum dr_alignment_support alignment_support_cheme;
895 ssa_op_iter iter;
896 tree def, def_stmt;
897 enum vect_def_type dt;
899 /* Is vectorizable store? */
901 if (TREE_CODE (stmt) != MODIFY_EXPR)
902 return false;
904 scalar_dest = TREE_OPERAND (stmt, 0);
905 if (TREE_CODE (scalar_dest) != ARRAY_REF
906 && TREE_CODE (scalar_dest) != INDIRECT_REF)
907 return false;
909 op = TREE_OPERAND (stmt, 1);
910 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
912 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
913 fprintf (vect_dump, "use not simple.");
914 return false;
917 vec_mode = TYPE_MODE (vectype);
918 /* FORNOW. In some cases can vectorize even if data-type not supported
919 (e.g. - array initialization with 0). */
920 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
921 return false;
923 if (!STMT_VINFO_DATA_REF (stmt_info))
924 return false;
927 if (!vec_stmt) /* transformation not required. */
929 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
930 return true;
933 /** Transform. **/
935 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
936 fprintf (vect_dump, "transform store");
938 alignment_support_cheme = vect_supportable_dr_alignment (dr);
939 gcc_assert (alignment_support_cheme);
940 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
942 /* Handle use - get the vectorized def from the defining stmt. */
943 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
945 /* Handle def. */
946 /* FORNOW: make sure the data reference is aligned. */
947 vect_align_data_ref (stmt);
948 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
949 data_ref = build_fold_indirect_ref (data_ref);
951 /* Arguments are ready. create the new vector stmt. */
952 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
953 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
955 /* Copy the V_MAY_DEFS representing the aliasing of the original array
956 element's definition to the vector's definition then update the
957 defining statement. The original is being deleted so the same
958 SSA_NAMEs can be used. */
959 copy_virtual_operands (*vec_stmt, stmt);
961 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
963 SSA_NAME_DEF_STMT (def) = *vec_stmt;
965 /* If this virtual def has a use outside the loop and a loop peel is
966 performed then the def may be renamed by the peel. Mark it for
967 renaming so the later use will also be renamed. */
968 mark_sym_for_renaming (SSA_NAME_VAR (def));
971 return true;
975 /* vectorizable_load.
977 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
978 can be vectorized.
979 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
980 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
981 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
983 bool
984 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
986 tree scalar_dest;
987 tree vec_dest = NULL;
988 tree data_ref = NULL;
989 tree op;
990 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
991 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
992 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
993 tree new_temp;
994 int mode;
995 tree init_addr;
996 tree new_stmt;
997 tree dummy;
998 basic_block new_bb;
999 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1000 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1001 edge pe = loop_preheader_edge (loop);
1002 enum dr_alignment_support alignment_support_cheme;
1004 /* Is vectorizable load? */
1005 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1006 return false;
1008 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1010 if (STMT_VINFO_LIVE_P (stmt_info))
1012 /* FORNOW: not yet supported. */
1013 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
1014 fprintf (vect_dump, "value used after loop.");
1015 return false;
1018 if (TREE_CODE (stmt) != MODIFY_EXPR)
1019 return false;
1021 scalar_dest = TREE_OPERAND (stmt, 0);
1022 if (TREE_CODE (scalar_dest) != SSA_NAME)
1023 return false;
1025 op = TREE_OPERAND (stmt, 1);
1026 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1027 return false;
1029 if (!STMT_VINFO_DATA_REF (stmt_info))
1030 return false;
1032 mode = (int) TYPE_MODE (vectype);
1034 /* FORNOW. In some cases can vectorize even if data-type not supported
1035 (e.g. - data copies). */
1036 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1038 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
1039 fprintf (vect_dump, "Aligned load, but unsupported type.");
1040 return false;
1043 if (!vec_stmt) /* transformation not required. */
1045 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1046 return true;
1049 /** Transform. **/
1051 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1052 fprintf (vect_dump, "transform load.");
1054 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1055 gcc_assert (alignment_support_cheme);
1057 if (alignment_support_cheme == dr_aligned
1058 || alignment_support_cheme == dr_unaligned_supported)
1060 /* Create:
1061 p = initial_addr;
1062 indx = 0;
1063 loop {
1064 vec_dest = *(p);
1065 indx = indx + 1;
1069 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1070 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1071 if (aligned_access_p (dr))
1072 data_ref = build_fold_indirect_ref (data_ref);
1073 else
1075 int mis = DR_MISALIGNMENT (dr);
1076 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1077 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1078 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1080 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1081 new_temp = make_ssa_name (vec_dest, new_stmt);
1082 TREE_OPERAND (new_stmt, 0) = new_temp;
1083 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1084 copy_virtual_operands (new_stmt, stmt);
1086 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1088 /* Create:
1089 p1 = initial_addr;
1090 msq_init = *(floor(p1))
1091 p2 = initial_addr + VS - 1;
1092 magic = have_builtin ? builtin_result : initial_address;
1093 indx = 0;
1094 loop {
1095 p2' = p2 + indx * vectype_size
1096 lsq = *(floor(p2'))
1097 vec_dest = realign_load (msq, lsq, magic)
1098 indx = indx + 1;
1099 msq = lsq;
1103 tree offset;
1104 tree magic;
1105 tree phi_stmt;
1106 tree msq_init;
1107 tree msq, lsq;
1108 tree dataref_ptr;
1109 tree params;
1111 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1112 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1113 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1114 &init_addr, true);
1115 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1116 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1117 new_temp = make_ssa_name (vec_dest, new_stmt);
1118 TREE_OPERAND (new_stmt, 0) = new_temp;
1119 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1120 gcc_assert (!new_bb);
1121 msq_init = TREE_OPERAND (new_stmt, 0);
1122 copy_virtual_operands (new_stmt, stmt);
1123 update_vuses_to_preheader (new_stmt, loop);
1126 /* <2> Create lsq = *(floor(p2')) in the loop */
1127 offset = build_int_cst (integer_type_node,
1128 TYPE_VECTOR_SUBPARTS (vectype));
1129 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1130 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1131 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1132 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1133 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1134 new_temp = make_ssa_name (vec_dest, new_stmt);
1135 TREE_OPERAND (new_stmt, 0) = new_temp;
1136 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1137 lsq = TREE_OPERAND (new_stmt, 0);
1138 copy_virtual_operands (new_stmt, stmt);
1141 /* <3> */
1142 if (targetm.vectorize.builtin_mask_for_load)
1144 /* Create permutation mask, if required, in loop preheader. */
1145 tree builtin_decl;
1146 params = build_tree_list (NULL_TREE, init_addr);
1147 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1148 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1149 new_stmt = build_function_call_expr (builtin_decl, params);
1150 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1151 new_temp = make_ssa_name (vec_dest, new_stmt);
1152 TREE_OPERAND (new_stmt, 0) = new_temp;
1153 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1154 gcc_assert (!new_bb);
1155 magic = TREE_OPERAND (new_stmt, 0);
1157 /* The result of the CALL_EXPR to this builtin is determined from
1158 the value of the parameter and no global variables are touched
1159 which makes the builtin a "const" function. Requiring the
1160 builtin to have the "const" attribute makes it unnecessary
1161 to call mark_call_clobbered_vars_to_rename. */
1162 gcc_assert (TREE_READONLY (builtin_decl));
1164 else
1166 /* Use current address instead of init_addr for reduced reg pressure.
1168 magic = dataref_ptr;
1172 /* <4> Create msq = phi <msq_init, lsq> in loop */
1173 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1174 msq = make_ssa_name (vec_dest, NULL_TREE);
1175 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1176 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1177 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1178 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1181 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1182 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1183 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1184 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1185 new_temp = make_ssa_name (vec_dest, new_stmt);
1186 TREE_OPERAND (new_stmt, 0) = new_temp;
1187 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1189 else
1190 gcc_unreachable ();
1192 *vec_stmt = new_stmt;
1193 return true;
1197 /* Function vectorizable_live_operation.
1199 STMT computes a value that is used outside the loop. Check if
1200 it can be supported. */
1202 bool
1203 vectorizable_live_operation (tree stmt,
1204 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1205 tree *vec_stmt ATTRIBUTE_UNUSED)
1207 tree operation;
1208 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1210 int i;
1211 enum tree_code code;
1212 int op_type;
1213 tree op;
1214 tree def, def_stmt;
1215 enum vect_def_type dt;
1217 if (!STMT_VINFO_LIVE_P (stmt_info))
1218 return false;
1220 if (TREE_CODE (stmt) != MODIFY_EXPR)
1221 return false;
1223 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1224 return false;
1226 operation = TREE_OPERAND (stmt, 1);
1227 code = TREE_CODE (operation);
1229 op_type = TREE_CODE_LENGTH (code);
1231 /* FORNOW: support only if all uses are invariant. This means
1232 that the scalar operations can remain in place, unvectorized.
1233 The original last scalar value that they compute will be used. */
1235 for (i = 0; i < op_type; i++)
1237 op = TREE_OPERAND (operation, i);
1238 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1240 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1241 fprintf (vect_dump, "use not simple.");
1242 return false;
1245 if (dt != vect_invariant_def && dt != vect_constant_def)
1246 return false;
1249 /* No transformation is required for the cases we currently support. */
1250 return true;
1254 /* Function vect_is_simple_cond.
1256 Input:
1257 LOOP - the loop that is being vectorized.
1258 COND - Condition that is checked for simple use.
1260 Returns whether a COND can be vectorized. Checks whether
1261 condition operands are supportable using vec_is_simple_use. */
1263 static bool
1264 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
1266 tree lhs, rhs;
1267 tree def;
1268 enum vect_def_type dt;
1270 if (!COMPARISON_CLASS_P (cond))
1271 return false;
1273 lhs = TREE_OPERAND (cond, 0);
1274 rhs = TREE_OPERAND (cond, 1);
1276 if (TREE_CODE (lhs) == SSA_NAME)
1278 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
1279 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
1280 return false;
1282 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
1283 return false;
1285 if (TREE_CODE (rhs) == SSA_NAME)
1287 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
1288 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
1289 return false;
1291 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
1292 return false;
1294 return true;
1297 /* vectorizable_condition.
1299 Check if STMT is conditional modify expression that can be vectorized.
1300 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1301 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
1302 at BSI.
1304 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1306 bool
1307 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1309 tree scalar_dest = NULL_TREE;
1310 tree vec_dest = NULL_TREE;
1311 tree op = NULL_TREE;
1312 tree cond_expr, then_clause, else_clause;
1313 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1314 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1315 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
1316 tree vec_compare, vec_cond_expr;
1317 tree new_temp;
1318 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1319 enum machine_mode vec_mode;
1320 tree def;
1321 enum vect_def_type dt;
1323 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1324 return false;
1326 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1328 if (STMT_VINFO_LIVE_P (stmt_info))
1330 /* FORNOW: not yet supported. */
1331 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
1332 fprintf (vect_dump, "value used after loop.");
1333 return false;
1336 if (TREE_CODE (stmt) != MODIFY_EXPR)
1337 return false;
1339 op = TREE_OPERAND (stmt, 1);
1341 if (TREE_CODE (op) != COND_EXPR)
1342 return false;
1344 cond_expr = TREE_OPERAND (op, 0);
1345 then_clause = TREE_OPERAND (op, 1);
1346 else_clause = TREE_OPERAND (op, 2);
1348 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
1349 return false;
1351 if (TREE_CODE (then_clause) == SSA_NAME)
1353 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
1354 if (!vect_is_simple_use (then_clause, loop_vinfo,
1355 &then_def_stmt, &def, &dt))
1356 return false;
1358 else if (TREE_CODE (then_clause) != INTEGER_CST
1359 && TREE_CODE (then_clause) != REAL_CST)
1360 return false;
1362 if (TREE_CODE (else_clause) == SSA_NAME)
1364 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
1365 if (!vect_is_simple_use (else_clause, loop_vinfo,
1366 &else_def_stmt, &def, &dt))
1367 return false;
1369 else if (TREE_CODE (else_clause) != INTEGER_CST
1370 && TREE_CODE (else_clause) != REAL_CST)
1371 return false;
1374 vec_mode = TYPE_MODE (vectype);
1376 if (!vec_stmt)
1378 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
1379 return expand_vec_cond_expr_p (op, vec_mode);
1382 /* Transform */
1384 /* Handle def. */
1385 scalar_dest = TREE_OPERAND (stmt, 0);
1386 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1388 /* Handle cond expr. */
1389 vec_cond_lhs =
1390 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt);
1391 vec_cond_rhs =
1392 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt);
1393 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt);
1394 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt);
1396 /* Arguments are ready. create the new vector stmt. */
1397 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
1398 vec_cond_lhs, vec_cond_rhs);
1399 vec_cond_expr = build (VEC_COND_EXPR, vectype,
1400 vec_compare, vec_then_clause, vec_else_clause);
1402 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
1403 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1404 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1405 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1407 return true;
1410 /* Function vect_transform_stmt.
1412 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1414 bool
1415 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
1417 bool is_store = false;
1418 tree vec_stmt = NULL_TREE;
1419 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1420 bool done;
1422 if (STMT_VINFO_RELEVANT_P (stmt_info))
1424 switch (STMT_VINFO_TYPE (stmt_info))
1426 case op_vec_info_type:
1427 done = vectorizable_operation (stmt, bsi, &vec_stmt);
1428 gcc_assert (done);
1429 break;
1431 case assignment_vec_info_type:
1432 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
1433 gcc_assert (done);
1434 break;
1436 case load_vec_info_type:
1437 done = vectorizable_load (stmt, bsi, &vec_stmt);
1438 gcc_assert (done);
1439 break;
1441 case store_vec_info_type:
1442 done = vectorizable_store (stmt, bsi, &vec_stmt);
1443 gcc_assert (done);
1444 is_store = true;
1445 break;
1447 case condition_vec_info_type:
1448 done = vectorizable_condition (stmt, bsi, &vec_stmt);
1449 gcc_assert (done);
1450 break;
1452 default:
1453 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1454 fprintf (vect_dump, "stmt not supported.");
1455 gcc_unreachable ();
1458 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1461 if (STMT_VINFO_LIVE_P (stmt_info))
1463 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
1464 gcc_assert (done);
1467 return is_store;
1471 /* This function builds ni_name = number of iterations loop executes
1472 on the loop preheader. */
1474 static tree
1475 vect_build_loop_niters (loop_vec_info loop_vinfo)
1477 tree ni_name, stmt, var;
1478 edge pe;
1479 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1480 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1482 var = create_tmp_var (TREE_TYPE (ni), "niters");
1483 add_referenced_tmp_var (var);
1484 ni_name = force_gimple_operand (ni, &stmt, false, var);
1486 pe = loop_preheader_edge (loop);
1487 if (stmt)
1489 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1490 gcc_assert (!new_bb);
1493 return ni_name;
1497 /* This function generates the following statements:
1499 ni_name = number of iterations loop executes
1500 ratio = ni_name / vf
1501 ratio_mult_vf_name = ratio * vf
1503 and places them at the loop preheader edge. */
1505 static void
1506 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1507 tree *ni_name_ptr,
1508 tree *ratio_mult_vf_name_ptr,
1509 tree *ratio_name_ptr)
1512 edge pe;
1513 basic_block new_bb;
1514 tree stmt, ni_name;
1515 tree var;
1516 tree ratio_name;
1517 tree ratio_mult_vf_name;
1518 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1519 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1520 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1521 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
1523 pe = loop_preheader_edge (loop);
1525 /* Generate temporary variable that contains
1526 number of iterations loop executes. */
1528 ni_name = vect_build_loop_niters (loop_vinfo);
1530 /* Create: ratio = ni >> log2(vf) */
1532 var = create_tmp_var (TREE_TYPE (ni), "bnd");
1533 add_referenced_tmp_var (var);
1534 ratio_name = make_ssa_name (var, NULL_TREE);
1535 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
1536 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
1537 SSA_NAME_DEF_STMT (ratio_name) = stmt;
1539 pe = loop_preheader_edge (loop);
1540 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1541 gcc_assert (!new_bb);
1543 /* Create: ratio_mult_vf = ratio << log2 (vf). */
1545 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1546 add_referenced_tmp_var (var);
1547 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
1548 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
1549 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
1550 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
1552 pe = loop_preheader_edge (loop);
1553 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1554 gcc_assert (!new_bb);
1556 *ni_name_ptr = ni_name;
1557 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1558 *ratio_name_ptr = ratio_name;
1560 return;
1564 /* Function update_vuses_to_preheader.
1566 Input:
1567 STMT - a statement with potential VUSEs.
1568 LOOP - the loop whose preheader will contain STMT.
1570 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
1571 appears to be defined in a V_MAY_DEF in another statement in a loop.
1572 One such case is when the VUSE is at the dereference of a __restricted__
1573 pointer in a load and the V_MAY_DEF is at the dereference of a different
1574 __restricted__ pointer in a store. Vectorization may result in
1575 copy_virtual_uses being called to copy the problematic VUSE to a new
1576 statement that is being inserted in the loop preheader. This procedure
1577 is called to change the SSA_NAME in the new statement's VUSE from the
1578 SSA_NAME updated in the loop to the related SSA_NAME available on the
1579 path entering the loop.
1581 When this function is called, we have the following situation:
1583 # vuse <name1>
1584 S1: vload
1585 do {
1586 # name1 = phi < name0 , name2>
1588 # vuse <name1>
1589 S2: vload
1591 # name2 = vdef <name1>
1592 S3: vstore
1594 }while...
1596 Stmt S1 was created in the loop preheader block as part of misaligned-load
1597 handling. This function fixes the name of the vuse of S1 from 'name1' to
1598 'name0'. */
1600 static void
1601 update_vuses_to_preheader (tree stmt, struct loop *loop)
1603 basic_block header_bb = loop->header;
1604 edge preheader_e = loop_preheader_edge (loop);
1605 ssa_op_iter iter;
1606 use_operand_p use_p;
1608 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
1610 tree ssa_name = USE_FROM_PTR (use_p);
1611 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
1612 tree name_var = SSA_NAME_VAR (ssa_name);
1613 basic_block bb = bb_for_stmt (def_stmt);
1615 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
1616 if (!IS_EMPTY_STMT (def_stmt)
1617 && flow_bb_inside_loop_p (loop, bb))
1619 /* If the block containing the statement defining the SSA_NAME
1620 is in the loop then it's necessary to find the definition
1621 outside the loop using the PHI nodes of the header. */
1622 tree phi;
1623 bool updated = false;
1625 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
1627 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
1629 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
1630 updated = true;
1631 break;
1634 gcc_assert (updated);
1640 /* Function vect_update_ivs_after_vectorizer.
1642 "Advance" the induction variables of LOOP to the value they should take
1643 after the execution of LOOP. This is currently necessary because the
1644 vectorizer does not handle induction variables that are used after the
1645 loop. Such a situation occurs when the last iterations of LOOP are
1646 peeled, because:
1647 1. We introduced new uses after LOOP for IVs that were not originally used
1648 after LOOP: the IVs of LOOP are now used by an epilog loop.
1649 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1650 times, whereas the loop IVs should be bumped N times.
1652 Input:
1653 - LOOP - a loop that is going to be vectorized. The last few iterations
1654 of LOOP were peeled.
1655 - NITERS - the number of iterations that LOOP executes (before it is
1656 vectorized). i.e, the number of times the ivs should be bumped.
1657 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1658 coming out from LOOP on which there are uses of the LOOP ivs
1659 (this is the path from LOOP->exit to epilog_loop->preheader).
1661 The new definitions of the ivs are placed in LOOP->exit.
1662 The phi args associated with the edge UPDATE_E in the bb
1663 UPDATE_E->dest are updated accordingly.
1665 Assumption 1: Like the rest of the vectorizer, this function assumes
1666 a single loop exit that has a single predecessor.
1668 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1669 organized in the same order.
1671 Assumption 3: The access function of the ivs is simple enough (see
1672 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1674 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1675 coming out of LOOP on which the ivs of LOOP are used (this is the path
1676 that leads to the epilog loop; other paths skip the epilog loop). This
1677 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1678 needs to have its phis updated.
1681 static void
1682 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1683 edge update_e)
1685 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1686 basic_block exit_bb = loop->single_exit->dest;
1687 tree phi, phi1;
1688 basic_block update_bb = update_e->dest;
1690 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1692 /* Make sure there exists a single-predecessor exit bb: */
1693 gcc_assert (single_pred_p (exit_bb));
1695 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
1696 phi && phi1;
1697 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
1699 tree access_fn = NULL;
1700 tree evolution_part;
1701 tree init_expr;
1702 tree step_expr;
1703 tree var, stmt, ni, ni_name;
1704 block_stmt_iterator last_bsi;
1706 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1708 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
1709 print_generic_expr (vect_dump, phi, TDF_SLIM);
1712 /* Skip virtual phi's. */
1713 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1715 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1716 fprintf (vect_dump, "virtual phi. skip.");
1717 continue;
1720 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1721 gcc_assert (access_fn);
1722 evolution_part =
1723 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
1724 gcc_assert (evolution_part != NULL_TREE);
1726 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1727 of degree >= 2 or exponential. */
1728 gcc_assert (!tree_is_chrec (evolution_part));
1730 step_expr = evolution_part;
1731 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1732 loop->num));
1734 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
1735 build2 (MULT_EXPR, TREE_TYPE (niters),
1736 niters, step_expr), init_expr);
1738 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
1739 add_referenced_tmp_var (var);
1741 ni_name = force_gimple_operand (ni, &stmt, false, var);
1743 /* Insert stmt into exit_bb. */
1744 last_bsi = bsi_last (exit_bb);
1745 if (stmt)
1746 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
1748 /* Fix phi expressions in the successor bb. */
1749 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
1754 /* Function vect_do_peeling_for_loop_bound
1756 Peel the last iterations of the loop represented by LOOP_VINFO.
1757 The peeled iterations form a new epilog loop. Given that the loop now
1758 iterates NITERS times, the new epilog loop iterates
1759 NITERS % VECTORIZATION_FACTOR times.
1761 The original loop will later be made to iterate
1762 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
1764 static void
1765 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1766 struct loops *loops)
1769 tree ni_name, ratio_mult_vf_name;
1770 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1771 struct loop *new_loop;
1772 edge update_e;
1773 basic_block preheader;
1774 #ifdef ENABLE_CHECKING
1775 int loop_num;
1776 #endif
1778 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1779 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
1781 /* Generate the following variables on the preheader of original loop:
1783 ni_name = number of iteration the original loop executes
1784 ratio = ni_name / vf
1785 ratio_mult_vf_name = ratio * vf */
1786 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1787 &ratio_mult_vf_name, ratio);
1789 #ifdef ENABLE_CHECKING
1790 loop_num = loop->num;
1791 #endif
1792 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
1793 ratio_mult_vf_name, ni_name, false);
1794 #ifdef ENABLE_CHECKING
1795 gcc_assert (new_loop);
1796 gcc_assert (loop_num == loop->num);
1797 slpeel_verify_cfg_after_peeling (loop, new_loop);
1798 #endif
1800 /* A guard that controls whether the new_loop is to be executed or skipped
1801 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1802 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1803 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1804 is on the path where the LOOP IVs are used and need to be updated. */
1806 preheader = loop_preheader_edge (new_loop)->src;
1807 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
1808 update_e = EDGE_PRED (preheader, 0);
1809 else
1810 update_e = EDGE_PRED (preheader, 1);
1812 /* Update IVs of original loop as if they were advanced
1813 by ratio_mult_vf_name steps. */
1814 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1816 /* After peeling we have to reset scalar evolution analyzer. */
1817 scev_reset ();
1819 return;
1823 /* Function vect_gen_niters_for_prolog_loop
1825 Set the number of iterations for the loop represented by LOOP_VINFO
1826 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1827 and the misalignment of DR - the data reference recorded in
1828 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
1829 this loop, the data reference DR will refer to an aligned location.
1831 The following computation is generated:
1833 If the misalignment of DR is known at compile time:
1834 addr_mis = int mis = DR_MISALIGNMENT (dr);
1835 Else, compute address misalignment in bytes:
1836 addr_mis = addr & (vectype_size - 1)
1838 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
1840 (elem_size = element type size; an element is the scalar element
1841 whose type is the inner type of the vectype) */
1843 static tree
1844 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
1846 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1847 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1848 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1849 tree var, stmt;
1850 tree iters, iters_name;
1851 edge pe;
1852 basic_block new_bb;
1853 tree dr_stmt = DR_STMT (dr);
1854 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1855 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1856 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1857 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
1858 tree niters_type = TREE_TYPE (loop_niters);
1860 pe = loop_preheader_edge (loop);
1862 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1864 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1865 int element_size = vectype_align/vf;
1866 int elem_misalign = byte_misalign / element_size;
1868 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1869 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
1870 iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
1872 else
1874 tree new_stmts = NULL_TREE;
1875 tree start_addr =
1876 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
1877 tree ptr_type = TREE_TYPE (start_addr);
1878 tree size = TYPE_SIZE (ptr_type);
1879 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
1880 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
1881 tree elem_size_log =
1882 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
1883 tree vf_tree = build_int_cst (unsigned_type_node, vf);
1884 tree byte_misalign;
1885 tree elem_misalign;
1887 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
1888 gcc_assert (!new_bb);
1890 /* Create: byte_misalign = addr & (vectype_size - 1) */
1891 byte_misalign =
1892 build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
1894 /* Create: elem_misalign = byte_misalign / element_size */
1895 elem_misalign =
1896 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
1898 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
1899 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
1900 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
1901 iters = fold_convert (niters_type, iters);
1904 /* Create: prolog_loop_niters = min (iters, loop_niters) */
1905 /* If the loop bound is known at compile time we already verified that it is
1906 greater than vf; since the misalignment ('iters') is at most vf, there's
1907 no need to generate the MIN_EXPR in this case. */
1908 if (TREE_CODE (loop_niters) != INTEGER_CST)
1909 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
1911 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1913 fprintf (vect_dump, "niters for prolog loop: ");
1914 print_generic_expr (vect_dump, iters, TDF_SLIM);
1917 var = create_tmp_var (niters_type, "prolog_loop_niters");
1918 add_referenced_tmp_var (var);
1919 iters_name = force_gimple_operand (iters, &stmt, false, var);
1921 /* Insert stmt on loop preheader edge. */
1922 if (stmt)
1924 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1925 gcc_assert (!new_bb);
1928 return iters_name;
1932 /* Function vect_update_init_of_dr
1934 NITERS iterations were peeled from LOOP. DR represents a data reference
1935 in LOOP. This function updates the information recorded in DR to
1936 account for the fact that the first NITERS iterations had already been
1937 executed. Specifically, it updates the OFFSET field of stmt_info. */
1939 static void
1940 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1942 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1943 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
1945 niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
1946 STMT_VINFO_VECT_STEP (stmt_info)));
1947 offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
1948 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
1952 /* Function vect_update_inits_of_drs
1954 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1955 This function updates the information recorded for the data references in
1956 the loop to account for the fact that the first NITERS iterations had
1957 already been executed. Specifically, it updates the initial_condition of the
1958 access_function of all the data_references in the loop. */
1960 static void
1961 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1963 unsigned int i;
1964 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1965 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1967 if (vect_dump && (dump_flags & TDF_DETAILS))
1968 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
1970 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1972 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1973 vect_update_init_of_dr (dr, niters);
1976 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1978 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1979 vect_update_init_of_dr (dr, niters);
1984 /* Function vect_do_peeling_for_alignment
1986 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1987 'niters' is set to the misalignment of one of the data references in the
1988 loop, thereby forcing it to refer to an aligned location at the beginning
1989 of the execution of this loop. The data reference for which we are
1990 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
1992 static void
1993 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
1995 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1996 tree niters_of_prolog_loop, ni_name;
1997 tree n_iters;
1998 struct loop *new_loop;
2000 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2001 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2003 ni_name = vect_build_loop_niters (loop_vinfo);
2004 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
2006 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2007 new_loop =
2008 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
2009 niters_of_prolog_loop, ni_name, true);
2010 #ifdef ENABLE_CHECKING
2011 gcc_assert (new_loop);
2012 slpeel_verify_cfg_after_peeling (new_loop, loop);
2013 #endif
2015 /* Update number of times loop executes. */
2016 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2017 LOOP_VINFO_NITERS (loop_vinfo) = fold (build2 (MINUS_EXPR,
2018 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop));
2020 /* Update the init conditions of the access functions of all data refs. */
2021 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2023 /* After peeling we have to reset scalar evolution analyzer. */
2024 scev_reset ();
2026 return;
2030 /* Function vect_transform_loop.
2032 The analysis phase has determined that the loop is vectorizable.
2033 Vectorize the loop - created vectorized stmts to replace the scalar
2034 stmts in the loop, and update the loop exit condition. */
2036 void
2037 vect_transform_loop (loop_vec_info loop_vinfo,
2038 struct loops *loops ATTRIBUTE_UNUSED)
2040 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2041 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2042 int nbbs = loop->num_nodes;
2043 block_stmt_iterator si;
2044 int i;
2045 tree ratio = NULL;
2046 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2048 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2049 fprintf (vect_dump, "=== vec_transform_loop ===");
2052 /* Peel the loop if there are data refs with unknown alignment.
2053 Only one data ref with unknown store is allowed. */
2055 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
2056 vect_do_peeling_for_alignment (loop_vinfo, loops);
2058 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
2059 compile time constant), or it is a constant that doesn't divide by the
2060 vectorization factor, then an epilog loop needs to be created.
2061 We therefore duplicate the loop: the original loop will be vectorized,
2062 and will compute the first (n/VF) iterations. The second copy of the loop
2063 will remain scalar and will compute the remaining (n%VF) iterations.
2064 (VF is the vectorization factor). */
2066 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2067 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2068 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
2069 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
2070 else
2071 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
2072 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
2074 /* 1) Make sure the loop header has exactly two entries
2075 2) Make sure we have a preheader basic block. */
2077 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
2079 loop_split_edge_with (loop_preheader_edge (loop), NULL);
2082 /* FORNOW: the vectorizer supports only loops which body consist
2083 of one basic block (header + empty latch). When the vectorizer will
2084 support more involved loop forms, the order by which the BBs are
2085 traversed need to be reconsidered. */
2087 for (i = 0; i < nbbs; i++)
2089 basic_block bb = bbs[i];
2091 for (si = bsi_start (bb); !bsi_end_p (si);)
2093 tree stmt = bsi_stmt (si);
2094 stmt_vec_info stmt_info;
2095 bool is_store;
2097 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2099 fprintf (vect_dump, "------>vectorizing statement: ");
2100 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2102 stmt_info = vinfo_for_stmt (stmt);
2103 gcc_assert (stmt_info);
2104 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2106 bsi_next (&si);
2107 continue;
2109 #ifdef ENABLE_CHECKING
2110 /* FORNOW: Verify that all stmts operate on the same number of
2111 units and no inner unrolling is necessary. */
2112 gcc_assert
2113 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
2114 == vectorization_factor);
2115 #endif
2116 /* -------- vectorize statement ------------ */
2117 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2118 fprintf (vect_dump, "transform statement.");
2120 is_store = vect_transform_stmt (stmt, &si);
2121 if (is_store)
2123 /* Free the attached stmt_vec_info and remove the stmt. */
2124 stmt_ann_t ann = stmt_ann (stmt);
2125 free (stmt_info);
2126 set_stmt_info ((tree_ann_t)ann, NULL);
2127 bsi_remove (&si);
2128 continue;
2131 bsi_next (&si);
2132 } /* stmts in BB */
2133 } /* BBs in loop */
2135 slpeel_make_loop_iterate_ntimes (loop, ratio);
2137 /* The memory tags and pointers in vectorized statements need to
2138 have their SSA forms updated. FIXME, why can't this be delayed
2139 until all the loops have been transformed? */
2140 update_ssa (TODO_update_ssa);
2142 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, LOOP_LOC (loop_vinfo)))
2143 fprintf (vect_dump, "LOOP VECTORIZED.");