* env.c (parse_schedule): Reject out of range values.
[official-gcc.git] / gcc / tree-vect-transform.c
blob19097fd8c3e4339b6bc9349ff53a3837fdfaf019
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "recog.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
45 #include "toplev.h"
46 #include "real.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (tree, block_stmt_iterator *);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, tree *, bool);
53 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
54 static tree vect_setup_realignment (tree, block_stmt_iterator *, tree *);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void update_vuses_to_preheader (tree, struct loop*);
62 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
63 static tree get_initial_def_for_reduction (tree, tree, tree *);
65 /* Utility function dealing with loop peeling (not peeling itself). */
66 static void vect_generate_tmps_on_preheader
67 (loop_vec_info, tree *, tree *, tree *);
68 static tree vect_build_loop_niters (loop_vec_info);
69 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
70 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
71 static void vect_update_init_of_dr (struct data_reference *, tree niters);
72 static void vect_update_inits_of_drs (loop_vec_info, tree);
73 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
74 static void vect_do_peeling_for_loop_bound
75 (loop_vec_info, tree *, struct loops *);
76 static int vect_min_worthwhile_factor (enum tree_code);
79 /* Function vect_get_new_vect_var.
81 Returns a name for a new variable. The current naming scheme appends the
82 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
83 the name of vectorizer generated variables, and appends that to NAME if
84 provided. */
86 static tree
87 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
89 const char *prefix;
90 tree new_vect_var;
92 switch (var_kind)
94 case vect_simple_var:
95 prefix = "vect_";
96 break;
97 case vect_scalar_var:
98 prefix = "stmp_";
99 break;
100 case vect_pointer_var:
101 prefix = "vect_p";
102 break;
103 default:
104 gcc_unreachable ();
107 if (name)
108 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
109 else
110 new_vect_var = create_tmp_var (type, prefix);
112 return new_vect_var;
116 /* Function vect_create_addr_base_for_vector_ref.
118 Create an expression that computes the address of the first memory location
119 that will be accessed for a data reference.
121 Input:
122 STMT: The statement containing the data reference.
123 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
124 OFFSET: Optional. If supplied, it is be added to the initial address.
126 Output:
127 1. Return an SSA_NAME whose value is the address of the memory location of
128 the first vector of the data reference.
129 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
130 these statement(s) which define the returned SSA_NAME.
132 FORNOW: We are only handling array accesses with step 1. */
134 static tree
135 vect_create_addr_base_for_vector_ref (tree stmt,
136 tree *new_stmt_list,
137 tree offset)
139 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
140 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
141 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
142 tree base_name = build_fold_indirect_ref (data_ref_base);
143 tree ref = DR_REF (dr);
144 tree scalar_type = TREE_TYPE (ref);
145 tree scalar_ptr_type = build_pointer_type (scalar_type);
146 tree vec_stmt;
147 tree new_temp;
148 tree addr_base, addr_expr;
149 tree dest, new_stmt;
150 tree base_offset = unshare_expr (DR_OFFSET (dr));
151 tree init = unshare_expr (DR_INIT (dr));
153 /* Create base_offset */
154 base_offset = size_binop (PLUS_EXPR, base_offset, init);
155 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
156 add_referenced_var (dest);
157 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
158 append_to_statement_list_force (new_stmt, new_stmt_list);
160 if (offset)
162 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
163 add_referenced_var (tmp);
164 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
165 DR_STEP (dr));
166 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
167 base_offset, offset);
168 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
169 append_to_statement_list_force (new_stmt, new_stmt_list);
172 /* base + base_offset */
173 addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
174 base_offset);
176 /* addr_expr = addr_base */
177 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
178 get_name (base_name));
179 add_referenced_var (addr_expr);
180 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
181 new_temp = make_ssa_name (addr_expr, vec_stmt);
182 TREE_OPERAND (vec_stmt, 0) = new_temp;
183 append_to_statement_list_force (vec_stmt, new_stmt_list);
185 if (vect_print_dump_info (REPORT_DETAILS))
187 fprintf (vect_dump, "created ");
188 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
190 return new_temp;
194 /* Function vect_create_data_ref_ptr.
196 Create a new pointer to vector type (vp), that points to the first location
197 accessed in the loop by STMT, along with the def-use update chain to
198 appropriately advance the pointer through the loop iterations. Also set
199 aliasing information for the pointer. This vector pointer is used by the
200 callers to this function to create a memory reference expression for vector
201 load/store access.
203 Input:
204 1. STMT: a stmt that references memory. Expected to be of the form
205 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
206 2. BSI: block_stmt_iterator where new stmts can be added.
207 3. OFFSET (optional): an offset to be added to the initial address accessed
208 by the data-ref in STMT.
209 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
210 pointing to the initial address.
212 Output:
213 1. Declare a new ptr to vector_type, and have it point to the base of the
214 data reference (initial addressed accessed by the data reference).
215 For example, for vector of type V8HI, the following code is generated:
217 v8hi *vp;
218 vp = (v8hi *)initial_address;
220 if OFFSET is not supplied:
221 initial_address = &a[init];
222 if OFFSET is supplied:
223 initial_address = &a[init + OFFSET];
225 Return the initial_address in INITIAL_ADDRESS.
227 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
228 update the pointer in each iteration of the loop.
230 Return the increment stmt that updates the pointer in PTR_INCR.
232 3. Return the pointer. */
234 static tree
235 vect_create_data_ref_ptr (tree stmt,
236 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
237 tree offset, tree *initial_address, tree *ptr_incr,
238 bool only_init)
240 tree base_name;
241 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
242 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
243 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
244 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
245 tree vect_ptr_type;
246 tree vect_ptr;
247 tree tag;
248 tree new_temp;
249 tree vec_stmt;
250 tree new_stmt_list = NULL_TREE;
251 edge pe = loop_preheader_edge (loop);
252 basic_block new_bb;
253 tree vect_ptr_init;
254 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
256 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
258 if (vect_print_dump_info (REPORT_DETAILS))
260 tree data_ref_base = base_name;
261 fprintf (vect_dump, "create vector-pointer variable to type: ");
262 print_generic_expr (vect_dump, vectype, TDF_SLIM);
263 if (TREE_CODE (data_ref_base) == VAR_DECL)
264 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
265 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
266 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
267 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
268 fprintf (vect_dump, " vectorizing a record based array ref: ");
269 else if (TREE_CODE (data_ref_base) == SSA_NAME)
270 fprintf (vect_dump, " vectorizing a pointer ref: ");
271 print_generic_expr (vect_dump, base_name, TDF_SLIM);
274 /** (1) Create the new vector-pointer variable: **/
276 vect_ptr_type = build_pointer_type (vectype);
277 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
278 get_name (base_name));
279 add_referenced_var (vect_ptr);
282 /** (2) Add aliasing information to the new vector-pointer:
283 (The points-to info (DR_PTR_INFO) may be defined later.) **/
285 tag = DR_MEMTAG (dr);
286 gcc_assert (tag);
288 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
289 tag must be created with tag added to its may alias list. */
290 if (!MTAG_P (tag))
291 new_type_alias (vect_ptr, tag, DR_REF (dr));
292 else
293 var_ann (vect_ptr)->symbol_mem_tag = tag;
295 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
297 /** (3) Calculate the initial address the vector-pointer, and set
298 the vector-pointer to point to it before the loop: **/
300 /* Create: (&(base[init_val+offset]) in the loop preheader. */
301 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
302 offset);
303 pe = loop_preheader_edge (loop);
304 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
305 gcc_assert (!new_bb);
306 *initial_address = new_temp;
308 /* Create: p = (vectype *) initial_base */
309 vec_stmt = fold_convert (vect_ptr_type, new_temp);
310 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
311 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
312 TREE_OPERAND (vec_stmt, 0) = vect_ptr_init;
313 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
314 gcc_assert (!new_bb);
317 /** (4) Handle the updating of the vector-pointer inside the loop: **/
319 if (only_init) /* No update in loop is required. */
321 /* Copy the points-to information if it exists. */
322 if (DR_PTR_INFO (dr))
323 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
324 return vect_ptr_init;
326 else
328 block_stmt_iterator incr_bsi;
329 bool insert_after;
330 tree indx_before_incr, indx_after_incr;
331 tree incr;
333 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
334 create_iv (vect_ptr_init,
335 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
336 NULL_TREE, loop, &incr_bsi, insert_after,
337 &indx_before_incr, &indx_after_incr);
338 incr = bsi_stmt (incr_bsi);
339 set_stmt_info (stmt_ann (incr),
340 new_stmt_vec_info (incr, loop_vinfo));
342 /* Copy the points-to information if it exists. */
343 if (DR_PTR_INFO (dr))
345 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
346 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
348 merge_alias_info (vect_ptr_init, indx_before_incr);
349 merge_alias_info (vect_ptr_init, indx_after_incr);
350 if (ptr_incr)
351 *ptr_incr = incr;
353 return indx_before_incr;
358 /* Function bump_vector_ptr
360 Increment a pointer (to a vector type) by vector-size. Connect the new
361 increment stmt to the exising def-use update-chain of the pointer.
363 The pointer def-use update-chain before this function:
364 DATAREF_PTR = phi (p_0, p_2)
365 ....
366 PTR_INCR: p_2 = DATAREF_PTR + step
368 The pointer def-use update-chain after this function:
369 DATAREF_PTR = phi (p_0, p_2)
370 ....
371 NEW_DATAREF_PTR = DATAREF_PTR + vector_size
372 ....
373 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
375 Input:
376 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
377 in the loop.
378 PTR_INCR - the stmt that updates the pointer in each iteration of the loop.
379 The increment amount across iterations is also expected to be
380 vector_size.
381 BSI - location where the new update stmt is to be placed.
382 STMT - the original scalar memory-access stmt that is being vectorized.
384 Output: Return NEW_DATAREF_PTR as illustrated above.
388 static tree
389 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
390 tree stmt)
392 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
393 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
394 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
395 tree vptr_type = TREE_TYPE (dataref_ptr);
396 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
397 tree update = fold_convert (vptr_type, TYPE_SIZE_UNIT (vectype));
398 tree incr_stmt;
399 ssa_op_iter iter;
400 use_operand_p use_p;
401 tree new_dataref_ptr;
403 incr_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_var,
404 build2 (PLUS_EXPR, vptr_type, dataref_ptr, update));
405 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
406 TREE_OPERAND (incr_stmt, 0) = new_dataref_ptr;
407 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
409 /* Update the vector-pointer's cross-iteration increment. */
410 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
412 tree use = USE_FROM_PTR (use_p);
414 if (use == dataref_ptr)
415 SET_USE (use_p, new_dataref_ptr);
416 else
417 gcc_assert (tree_int_cst_compare (use, update) == 0);
420 /* Copy the points-to information if it exists. */
421 if (DR_PTR_INFO (dr))
422 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
423 merge_alias_info (new_dataref_ptr, dataref_ptr);
425 return new_dataref_ptr;
429 /* Function vect_create_destination_var.
431 Create a new temporary of type VECTYPE. */
433 static tree
434 vect_create_destination_var (tree scalar_dest, tree vectype)
436 tree vec_dest;
437 const char *new_name;
438 tree type;
439 enum vect_var_kind kind;
441 kind = vectype ? vect_simple_var : vect_scalar_var;
442 type = vectype ? vectype : TREE_TYPE (scalar_dest);
444 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
446 new_name = get_name (scalar_dest);
447 if (!new_name)
448 new_name = "var_";
449 vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
450 add_referenced_var (vec_dest);
452 return vec_dest;
456 /* Function vect_init_vector.
458 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
459 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
460 used in the vectorization of STMT. */
462 static tree
463 vect_init_vector (tree stmt, tree vector_var)
465 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
466 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
467 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
468 tree new_var;
469 tree init_stmt;
470 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
471 tree vec_oprnd;
472 edge pe;
473 tree new_temp;
474 basic_block new_bb;
476 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
477 add_referenced_var (new_var);
479 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
480 new_temp = make_ssa_name (new_var, init_stmt);
481 TREE_OPERAND (init_stmt, 0) = new_temp;
483 pe = loop_preheader_edge (loop);
484 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
485 gcc_assert (!new_bb);
487 if (vect_print_dump_info (REPORT_DETAILS))
489 fprintf (vect_dump, "created new init_stmt: ");
490 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
493 vec_oprnd = TREE_OPERAND (init_stmt, 0);
494 return vec_oprnd;
498 /* Function vect_get_vec_def_for_operand.
500 OP is an operand in STMT. This function returns a (vector) def that will be
501 used in the vectorized stmt for STMT.
503 In the case that OP is an SSA_NAME which is defined in the loop, then
504 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
506 In case OP is an invariant or constant, a new stmt that creates a vector def
507 needs to be introduced. */
509 static tree
510 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
512 tree vec_oprnd;
513 tree vec_stmt;
514 tree def_stmt;
515 stmt_vec_info def_stmt_info = NULL;
516 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
517 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
518 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
519 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
520 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
521 tree vec_inv;
522 tree vec_cst;
523 tree t = NULL_TREE;
524 tree def;
525 int i;
526 enum vect_def_type dt;
527 bool is_simple_use;
529 if (vect_print_dump_info (REPORT_DETAILS))
531 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
532 print_generic_expr (vect_dump, op, TDF_SLIM);
535 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
536 gcc_assert (is_simple_use);
537 if (vect_print_dump_info (REPORT_DETAILS))
539 if (def)
541 fprintf (vect_dump, "def = ");
542 print_generic_expr (vect_dump, def, TDF_SLIM);
544 if (def_stmt)
546 fprintf (vect_dump, " def_stmt = ");
547 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
551 switch (dt)
553 /* Case 1: operand is a constant. */
554 case vect_constant_def:
556 if (scalar_def)
557 *scalar_def = op;
559 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
560 if (vect_print_dump_info (REPORT_DETAILS))
561 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
563 for (i = nunits - 1; i >= 0; --i)
565 t = tree_cons (NULL_TREE, op, t);
567 vec_cst = build_vector (vectype, t);
568 return vect_init_vector (stmt, vec_cst);
571 /* Case 2: operand is defined outside the loop - loop invariant. */
572 case vect_invariant_def:
574 if (scalar_def)
575 *scalar_def = def;
577 /* Create 'vec_inv = {inv,inv,..,inv}' */
578 if (vect_print_dump_info (REPORT_DETAILS))
579 fprintf (vect_dump, "Create vector_inv.");
581 for (i = nunits - 1; i >= 0; --i)
583 t = tree_cons (NULL_TREE, def, t);
586 /* FIXME: use build_constructor directly. */
587 vec_inv = build_constructor_from_list (vectype, t);
588 return vect_init_vector (stmt, vec_inv);
591 /* Case 3: operand is defined inside the loop. */
592 case vect_loop_def:
594 if (scalar_def)
595 *scalar_def = def_stmt;
597 /* Get the def from the vectorized stmt. */
598 def_stmt_info = vinfo_for_stmt (def_stmt);
599 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
600 gcc_assert (vec_stmt);
601 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
602 return vec_oprnd;
605 /* Case 4: operand is defined by a loop header phi - reduction */
606 case vect_reduction_def:
608 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
610 /* Get the def before the loop */
611 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
612 return get_initial_def_for_reduction (stmt, op, scalar_def);
615 /* Case 5: operand is defined by loop-header phi - induction. */
616 case vect_induction_def:
618 if (vect_print_dump_info (REPORT_DETAILS))
619 fprintf (vect_dump, "induction - unsupported.");
620 internal_error ("no support for induction"); /* FORNOW */
623 default:
624 gcc_unreachable ();
629 /* Function vect_get_vec_def_for_stmt_copy
631 Return a vector-def for an operand. This function is used when the
632 vectorized stmt to be created (by the caller to this function) is a "copy"
633 created in case the vectorized result cannot fit in one vector, and several
634 copies of the vector-stmt are required. In this case the vector-def is
635 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
636 of the stmt that defines VEC_OPRND.
637 DT is the type of the vector def VEC_OPRND.
639 Context:
640 In case the vectorization factor (VF) is bigger than the number
641 of elements that can fit in a vectype (nunits), we have to generate
642 more than one vector stmt to vectorize the scalar stmt. This situation
643 arises when there are multiple data-types operated upon in the loop; the
644 smallest data-type determines the VF, and as a result, when vectorizing
645 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
646 vector stmt (each computing a vector of 'nunits' results, and together
647 computing 'VF' results in each iteration). This function is called when
648 vectorizing such a stmt (e.g. vectorizing S2 in the illusration below, in
649 which VF=16 and nuniti=4, so the number of copies required is 4):
651 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
653 S1: x = load VS1.0: vx.0 = memref0 VS1.1
654 VS1.1: vx.1 = memref1 VS1.2
655 VS1.2: vx.2 = memref2 VS1.3
656 VS1.3: vx.3 = memref3
658 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
659 VSnew.1: vz1 = vx.1 + ... VSnew.2
660 VSnew.2: vz2 = vx.2 + ... VSnew.3
661 VSnew.3: vz3 = vx.3 + ...
663 The vectorization of S1 is explained in vectorizable_load.
664 The vectorization of S2:
665 To create the first vector-stmt out of the 4 copies - VSnew.0 -
666 the function 'vect_get_vec_def_for_operand' is called to
667 get the relevant vector-def for each operand of S2. For operand x it
668 returns the vector-def 'vx.0'.
670 To create the remaining copies of the vector-stmt (VSnew.j), this
671 function is called to get the relevant vector-def for each operand. It is
672 obtained from the respective VS1.j stmt, which is recorded in the
673 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
675 For example, to obtain the vector-def 'vx.1' in order to create the
676 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
677 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
678 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
679 and return its def ('vx.1').
680 Overall, to create the above sequence this function will be called 3 times:
681 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
682 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
683 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
685 static tree
686 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
688 tree vec_stmt_for_operand;
689 stmt_vec_info def_stmt_info;
691 if (dt == vect_invariant_def || dt == vect_constant_def)
693 /* Do nothing; can reuse same def. */ ;
694 return vec_oprnd;
697 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
698 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
699 gcc_assert (def_stmt_info);
700 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
701 gcc_assert (vec_stmt_for_operand);
702 vec_oprnd = TREE_OPERAND (vec_stmt_for_operand, 0);
704 return vec_oprnd;
708 /* Function vect_finish_stmt_generation.
710 Insert a new stmt. */
712 static void
713 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
714 block_stmt_iterator *bsi)
716 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
717 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
719 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
720 set_stmt_info (get_stmt_ann (vec_stmt),
721 new_stmt_vec_info (vec_stmt, loop_vinfo));
723 if (vect_print_dump_info (REPORT_DETAILS))
725 fprintf (vect_dump, "add new stmt: ");
726 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
729 /* Make sure bsi points to the stmt that is being vectorized. */
730 gcc_assert (stmt == bsi_stmt (*bsi));
732 #ifdef USE_MAPPED_LOCATION
733 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
734 #else
735 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
736 #endif
740 #define ADJUST_IN_EPILOG 1
742 /* Function get_initial_def_for_reduction
744 Input:
745 STMT - a stmt that performs a reduction operation in the loop.
746 INIT_VAL - the initial value of the reduction variable
748 Output:
749 SCALAR_DEF - a tree that holds a value to be added to the final result
750 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
751 Return a vector variable, initialized according to the operation that STMT
752 performs. This vector will be used as the initial value of the
753 vector of partial results.
755 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
756 add: [0,0,...,0,0]
757 mult: [1,1,...,1,1]
758 min/max: [init_val,init_val,..,init_val,init_val]
759 bit and/or: [init_val,init_val,..,init_val,init_val]
760 and when necessary (e.g. add/mult case) let the caller know
761 that it needs to adjust the result by init_val.
763 Option2: Initialize the vector as follows:
764 add: [0,0,...,0,init_val]
765 mult: [1,1,...,1,init_val]
766 min/max: [init_val,init_val,...,init_val]
767 bit and/or: [init_val,init_val,...,init_val]
768 and no adjustments are needed.
770 For example, for the following code:
772 s = init_val;
773 for (i=0;i<n;i++)
774 s = s + a[i];
776 STMT is 's = s + a[i]', and the reduction variable is 's'.
777 For a vector of 4 units, we want to return either [0,0,0,init_val],
778 or [0,0,0,0] and let the caller know that it needs to adjust
779 the result at the end by 'init_val'.
781 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
782 TODO: Use some cost-model to estimate which scheme is more profitable.
785 static tree
786 get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
788 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
789 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
790 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
791 int nelements;
792 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
793 tree type = TREE_TYPE (init_val);
794 tree def;
795 tree vec, t = NULL_TREE;
796 bool need_epilog_adjust;
797 int i;
799 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
801 switch (code)
803 case WIDEN_SUM_EXPR:
804 case DOT_PROD_EXPR:
805 case PLUS_EXPR:
806 if (INTEGRAL_TYPE_P (type))
807 def = build_int_cst (type, 0);
808 else
809 def = build_real (type, dconst0);
811 #ifdef ADJUST_IN_EPILOG
812 /* All the 'nunits' elements are set to 0. The final result will be
813 adjusted by 'init_val' at the loop epilog. */
814 nelements = nunits;
815 need_epilog_adjust = true;
816 #else
817 /* 'nunits - 1' elements are set to 0; The last element is set to
818 'init_val'. No further adjustments at the epilog are needed. */
819 nelements = nunits - 1;
820 need_epilog_adjust = false;
821 #endif
822 break;
824 case MIN_EXPR:
825 case MAX_EXPR:
826 def = init_val;
827 nelements = nunits;
828 need_epilog_adjust = false;
829 break;
831 default:
832 gcc_unreachable ();
835 for (i = nelements - 1; i >= 0; --i)
836 t = tree_cons (NULL_TREE, def, t);
838 if (nelements == nunits - 1)
840 /* Set the last element of the vector. */
841 t = tree_cons (NULL_TREE, init_val, t);
842 nelements += 1;
844 gcc_assert (nelements == nunits);
846 if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
847 vec = build_vector (vectype, t);
848 else
849 vec = build_constructor_from_list (vectype, t);
851 if (!need_epilog_adjust)
852 *scalar_def = NULL_TREE;
853 else
854 *scalar_def = init_val;
856 return vect_init_vector (stmt, vec);
860 /* Function vect_create_epilog_for_reduction
862 Create code at the loop-epilog to finalize the result of a reduction
863 computation.
865 VECT_DEF is a vector of partial results.
866 REDUC_CODE is the tree-code for the epilog reduction.
867 STMT is the scalar reduction stmt that is being vectorized.
868 REDUCTION_PHI is the phi-node that carries the reduction computation.
870 This function:
871 1. Creates the reduction def-use cycle: sets the the arguments for
872 REDUCTION_PHI:
873 The loop-entry argument is the vectorized initial-value of the reduction.
874 The loop-latch argument is VECT_DEF - the vector of partial sums.
875 2. "Reduces" the vector of partial results VECT_DEF into a single result,
876 by applying the operation specified by REDUC_CODE if available, or by
877 other means (whole-vector shifts or a scalar loop).
878 The function also creates a new phi node at the loop exit to preserve
879 loop-closed form, as illustrated below.
881 The flow at the entry to this function:
883 loop:
884 vec_def = phi <null, null> # REDUCTION_PHI
885 VECT_DEF = vector_stmt # vectorized form of STMT
886 s_loop = scalar_stmt # (scalar) STMT
887 loop_exit:
888 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
889 use <s_out0>
890 use <s_out0>
892 The above is transformed by this function into:
894 loop:
895 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
896 VECT_DEF = vector_stmt # vectorized form of STMT
897 s_loop = scalar_stmt # (scalar) STMT
898 loop_exit:
899 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
900 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
901 v_out2 = reduce <v_out1>
902 s_out3 = extract_field <v_out2, 0>
903 s_out4 = adjust_result <s_out3>
904 use <s_out4>
905 use <s_out4>
908 static void
909 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
910 enum tree_code reduc_code, tree reduction_phi)
912 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
913 tree vectype;
914 enum machine_mode mode;
915 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
916 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
917 basic_block exit_bb;
918 tree scalar_dest;
919 tree scalar_type;
920 tree new_phi;
921 block_stmt_iterator exit_bsi;
922 tree vec_dest;
923 tree new_temp;
924 tree new_name;
925 tree epilog_stmt;
926 tree new_scalar_dest, exit_phi;
927 tree bitsize, bitpos, bytesize;
928 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
929 tree scalar_initial_def;
930 tree vec_initial_def;
931 tree orig_name;
932 imm_use_iterator imm_iter;
933 use_operand_p use_p;
934 bool extract_scalar_result;
935 tree reduction_op;
936 tree orig_stmt;
937 tree use_stmt;
938 tree operation = TREE_OPERAND (stmt, 1);
939 int op_type;
941 op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
942 reduction_op = TREE_OPERAND (operation, op_type-1);
943 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
944 mode = TYPE_MODE (vectype);
946 /*** 1. Create the reduction def-use cycle ***/
948 /* 1.1 set the loop-entry arg of the reduction-phi: */
949 /* For the case of reduction, vect_get_vec_def_for_operand returns
950 the scalar def before the loop, that defines the initial value
951 of the reduction variable. */
952 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
953 &scalar_initial_def);
954 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
956 /* 1.2 set the loop-latch arg for the reduction-phi: */
957 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
959 if (vect_print_dump_info (REPORT_DETAILS))
961 fprintf (vect_dump, "transform reduction: created def-use cycle:");
962 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
963 fprintf (vect_dump, "\n");
964 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
968 /*** 2. Create epilog code
969 The reduction epilog code operates across the elements of the vector
970 of partial results computed by the vectorized loop.
971 The reduction epilog code consists of:
972 step 1: compute the scalar result in a vector (v_out2)
973 step 2: extract the scalar result (s_out3) from the vector (v_out2)
974 step 3: adjust the scalar result (s_out3) if needed.
976 Step 1 can be accomplished using one the following three schemes:
977 (scheme 1) using reduc_code, if available.
978 (scheme 2) using whole-vector shifts, if available.
979 (scheme 3) using a scalar loop. In this case steps 1+2 above are
980 combined.
982 The overall epilog code looks like this:
984 s_out0 = phi <s_loop> # original EXIT_PHI
985 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
986 v_out2 = reduce <v_out1> # step 1
987 s_out3 = extract_field <v_out2, 0> # step 2
988 s_out4 = adjust_result <s_out3> # step 3
990 (step 3 is optional, and step2 1 and 2 may be combined).
991 Lastly, the uses of s_out0 are replaced by s_out4.
993 ***/
995 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
996 v_out1 = phi <v_loop> */
998 exit_bb = loop->single_exit->dest;
999 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
1000 SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
1001 exit_bsi = bsi_start (exit_bb);
1003 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
1004 (i.e. when reduc_code is not available) and in the final adjustment code
1005 (if needed). Also get the original scalar reduction variable as
1006 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
1007 represents a reduction pattern), the tree-code and scalar-def are
1008 taken from the original stmt that the pattern-stmt (STMT) replaces.
1009 Otherwise (it is a regular reduction) - the tree-code and scalar-def
1010 are taken from STMT. */
1012 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1013 if (!orig_stmt)
1015 /* Regular reduction */
1016 orig_stmt = stmt;
1018 else
1020 /* Reduction pattern */
1021 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
1022 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1023 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
1025 code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
1026 scalar_dest = TREE_OPERAND (orig_stmt, 0);
1027 scalar_type = TREE_TYPE (scalar_dest);
1028 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
1029 bitsize = TYPE_SIZE (scalar_type);
1030 bytesize = TYPE_SIZE_UNIT (scalar_type);
1032 /* 2.3 Create the reduction code, using one of the three schemes described
1033 above. */
1035 if (reduc_code < NUM_TREE_CODES)
1037 /*** Case 1: Create:
1038 v_out2 = reduc_expr <v_out1> */
1040 if (vect_print_dump_info (REPORT_DETAILS))
1041 fprintf (vect_dump, "Reduce using direct vector reduction.");
1043 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1044 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1045 build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
1046 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1047 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1048 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1050 extract_scalar_result = true;
1052 else
1054 enum tree_code shift_code = 0;
1055 bool have_whole_vector_shift = true;
1056 int bit_offset;
1057 int element_bitsize = tree_low_cst (bitsize, 1);
1058 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1059 tree vec_temp;
1061 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1062 shift_code = VEC_RSHIFT_EXPR;
1063 else
1064 have_whole_vector_shift = false;
1066 /* Regardless of whether we have a whole vector shift, if we're
1067 emulating the operation via tree-vect-generic, we don't want
1068 to use it. Only the first round of the reduction is likely
1069 to still be profitable via emulation. */
1070 /* ??? It might be better to emit a reduction tree code here, so that
1071 tree-vect-generic can expand the first round via bit tricks. */
1072 if (!VECTOR_MODE_P (mode))
1073 have_whole_vector_shift = false;
1074 else
1076 optab optab = optab_for_tree_code (code, vectype);
1077 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
1078 have_whole_vector_shift = false;
1081 if (have_whole_vector_shift)
1083 /*** Case 2: Create:
1084 for (offset = VS/2; offset >= element_size; offset/=2)
1086 Create: va' = vec_shift <va, offset>
1087 Create: va = vop <va, va'>
1088 } */
1090 if (vect_print_dump_info (REPORT_DETAILS))
1091 fprintf (vect_dump, "Reduce using vector shifts");
1093 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1094 new_temp = PHI_RESULT (new_phi);
1096 for (bit_offset = vec_size_in_bits/2;
1097 bit_offset >= element_bitsize;
1098 bit_offset /= 2)
1100 tree bitpos = size_int (bit_offset);
1102 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1103 build2 (shift_code, vectype, new_temp, bitpos));
1104 new_name = make_ssa_name (vec_dest, epilog_stmt);
1105 TREE_OPERAND (epilog_stmt, 0) = new_name;
1106 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1108 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1109 build2 (code, vectype, new_name, new_temp));
1110 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1111 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1112 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1115 extract_scalar_result = true;
1117 else
1119 tree rhs;
1121 /*** Case 3: Create:
1122 s = extract_field <v_out2, 0>
1123 for (offset = element_size;
1124 offset < vector_size;
1125 offset += element_size;)
1127 Create: s' = extract_field <v_out2, offset>
1128 Create: s = op <s, s'>
1129 } */
1131 if (vect_print_dump_info (REPORT_DETAILS))
1132 fprintf (vect_dump, "Reduce using scalar code. ");
1134 vec_temp = PHI_RESULT (new_phi);
1135 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1136 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1137 bitsize_zero_node);
1138 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1139 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1140 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1141 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1142 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1144 for (bit_offset = element_bitsize;
1145 bit_offset < vec_size_in_bits;
1146 bit_offset += element_bitsize)
1148 tree bitpos = bitsize_int (bit_offset);
1149 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1150 bitpos);
1152 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1153 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1154 rhs);
1155 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1156 TREE_OPERAND (epilog_stmt, 0) = new_name;
1157 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1159 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1160 build2 (code, scalar_type, new_name, new_temp));
1161 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1162 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1163 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1166 extract_scalar_result = false;
1170 /* 2.4 Extract the final scalar result. Create:
1171 s_out3 = extract_field <v_out2, bitpos> */
1173 if (extract_scalar_result)
1175 tree rhs;
1177 if (vect_print_dump_info (REPORT_DETAILS))
1178 fprintf (vect_dump, "extract scalar result");
1180 if (BYTES_BIG_ENDIAN)
1181 bitpos = size_binop (MULT_EXPR,
1182 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1183 TYPE_SIZE (scalar_type));
1184 else
1185 bitpos = bitsize_zero_node;
1187 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1188 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1189 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1190 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1191 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1192 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1195 /* 2.4 Adjust the final result by the initial value of the reduction
1196 variable. (When such adjustment is not needed, then
1197 'scalar_initial_def' is zero).
1199 Create:
1200 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1202 if (scalar_initial_def)
1204 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1205 build2 (code, scalar_type, new_temp, scalar_initial_def));
1206 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1207 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1208 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1211 /* 2.6 Replace uses of s_out0 with uses of s_out3 */
1213 /* Find the loop-closed-use at the loop exit of the original scalar result.
1214 (The reduction result is expected to have two immediate uses - one at the
1215 latch block, and one at the loop exit). */
1216 exit_phi = NULL;
1217 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1219 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1221 exit_phi = USE_STMT (use_p);
1222 break;
1225 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1226 gcc_assert (exit_phi);
1227 /* Replace the uses: */
1228 orig_name = PHI_RESULT (exit_phi);
1229 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1230 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1231 SET_USE (use_p, new_temp);
1235 /* Function vectorizable_reduction.
1237 Check if STMT performs a reduction operation that can be vectorized.
1238 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1239 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1240 Return FALSE if not a vectorizable STMT, TRUE otherwise.
1242 This function also handles reduction idioms (patterns) that have been
1243 recognized in advance during vect_pattern_recog. In this case, STMT may be
1244 of this form:
1245 X = pattern_expr (arg0, arg1, ..., X)
1246 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1247 sequence that had been detected and replaced by the pattern-stmt (STMT).
1249 In some cases of reduction patterns, the type of the reduction variable X is
1250 different than the type of the other arguments of STMT.
1251 In such cases, the vectype that is used when transforming STMT into a vector
1252 stmt is different than the vectype that is used to determine the
1253 vectorization factor, because it consists of a different number of elements
1254 than the actual number of elements that are being operated upon in parallel.
1256 For example, consider an accumulation of shorts into an int accumulator.
1257 On some targets it's possible to vectorize this pattern operating on 8
1258 shorts at a time (hence, the vectype for purposes of determining the
1259 vectorization factor should be V8HI); on the other hand, the vectype that
1260 is used to create the vector form is actually V4SI (the type of the result).
1262 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1263 indicates what is the actual level of parallelism (V8HI in the example), so
1264 that the right vectorization factor would be derived. This vectype
1265 corresponds to the type of arguments to the reduction stmt, and should *NOT*
1266 be used to create the vectorized stmt. The right vectype for the vectorized
1267 stmt is obtained from the type of the result X:
1268 get_vectype_for_scalar_type (TREE_TYPE (X))
1270 This means that, contrary to "regular" reductions (or "regular" stmts in
1271 general), the following equation:
1272 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1273 does *NOT* necessarily hold for reduction patterns. */
1275 bool
1276 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1278 tree vec_dest;
1279 tree scalar_dest;
1280 tree op;
1281 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
1282 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1283 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1284 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1285 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1286 tree operation;
1287 enum tree_code code, orig_code, epilog_reduc_code = 0;
1288 enum machine_mode vec_mode;
1289 int op_type;
1290 optab optab, reduc_optab;
1291 tree new_temp = NULL_TREE;
1292 tree def, def_stmt;
1293 enum vect_def_type dt;
1294 tree new_phi;
1295 tree scalar_type;
1296 bool is_simple_use;
1297 tree orig_stmt;
1298 stmt_vec_info orig_stmt_info;
1299 tree expr = NULL_TREE;
1300 int i;
1301 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1302 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1303 stmt_vec_info prev_stmt_info;
1304 tree reduc_def;
1305 tree new_stmt = NULL_TREE;
1306 int j;
1308 gcc_assert (ncopies >= 1);
1310 /* 1. Is vectorizable reduction? */
1312 /* Not supportable if the reduction variable is used in the loop. */
1313 if (STMT_VINFO_RELEVANT_P (stmt_info))
1314 return false;
1316 if (!STMT_VINFO_LIVE_P (stmt_info))
1317 return false;
1319 /* Make sure it was already recognized as a reduction computation. */
1320 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1321 return false;
1323 /* 2. Has this been recognized as a reduction pattern?
1325 Check if STMT represents a pattern that has been recognized
1326 in earlier analysis stages. For stmts that represent a pattern,
1327 the STMT_VINFO_RELATED_STMT field records the last stmt in
1328 the original sequence that constitutes the pattern. */
1330 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1331 if (orig_stmt)
1333 orig_stmt_info = vinfo_for_stmt (orig_stmt);
1334 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
1335 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
1336 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
1339 /* 3. Check the operands of the operation. The first operands are defined
1340 inside the loop body. The last operand is the reduction variable,
1341 which is defined by the loop-header-phi. */
1343 gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1345 operation = TREE_OPERAND (stmt, 1);
1346 code = TREE_CODE (operation);
1347 op_type = TREE_CODE_LENGTH (code);
1348 if (op_type != binary_op && op_type != ternary_op)
1349 return false;
1350 scalar_dest = TREE_OPERAND (stmt, 0);
1351 scalar_type = TREE_TYPE (scalar_dest);
1353 /* All uses but the last are expected to be defined in the loop.
1354 The last use is the reduction variable. */
1355 for (i = 0; i < op_type-1; i++)
1357 op = TREE_OPERAND (operation, i);
1358 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1359 gcc_assert (is_simple_use);
1360 gcc_assert (dt == vect_loop_def || dt == vect_invariant_def ||
1361 dt == vect_constant_def);
1364 op = TREE_OPERAND (operation, i);
1365 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1366 gcc_assert (is_simple_use);
1367 gcc_assert (dt == vect_reduction_def);
1368 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1369 if (orig_stmt)
1370 gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
1371 else
1372 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
1374 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
1375 return false;
1377 /* 4. Supportable by target? */
1379 /* 4.1. check support for the operation in the loop */
1380 optab = optab_for_tree_code (code, vectype);
1381 if (!optab)
1383 if (vect_print_dump_info (REPORT_DETAILS))
1384 fprintf (vect_dump, "no optab.");
1385 return false;
1387 vec_mode = TYPE_MODE (vectype);
1388 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1390 if (vect_print_dump_info (REPORT_DETAILS))
1391 fprintf (vect_dump, "op not supported by target.");
1392 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1393 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1394 < vect_min_worthwhile_factor (code))
1395 return false;
1396 if (vect_print_dump_info (REPORT_DETAILS))
1397 fprintf (vect_dump, "proceeding using word mode.");
1400 /* Worthwhile without SIMD support? */
1401 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1402 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1403 < vect_min_worthwhile_factor (code))
1405 if (vect_print_dump_info (REPORT_DETAILS))
1406 fprintf (vect_dump, "not worthwhile without SIMD support.");
1407 return false;
1410 /* 4.2. Check support for the epilog operation.
1412 If STMT represents a reduction pattern, then the type of the
1413 reduction variable may be different than the type of the rest
1414 of the arguments. For example, consider the case of accumulation
1415 of shorts into an int accumulator; The original code:
1416 S1: int_a = (int) short_a;
1417 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
1419 was replaced with:
1420 STMT: int_acc = widen_sum <short_a, int_acc>
1422 This means that:
1423 1. The tree-code that is used to create the vector operation in the
1424 epilog code (that reduces the partial results) is not the
1425 tree-code of STMT, but is rather the tree-code of the original
1426 stmt from the pattern that STMT is replacing. I.e, in the example
1427 above we want to use 'widen_sum' in the loop, but 'plus' in the
1428 epilog.
1429 2. The type (mode) we use to check available target support
1430 for the vector operation to be created in the *epilog*, is
1431 determined by the type of the reduction variable (in the example
1432 above we'd check this: plus_optab[vect_int_mode]).
1433 However the type (mode) we use to check available target support
1434 for the vector operation to be created *inside the loop*, is
1435 determined by the type of the other arguments to STMT (in the
1436 example we'd check this: widen_sum_optab[vect_short_mode]).
1438 This is contrary to "regular" reductions, in which the types of all
1439 the arguments are the same as the type of the reduction variable.
1440 For "regular" reductions we can therefore use the same vector type
1441 (and also the same tree-code) when generating the epilog code and
1442 when generating the code inside the loop. */
1444 if (orig_stmt)
1446 /* This is a reduction pattern: get the vectype from the type of the
1447 reduction variable, and get the tree-code from orig_stmt. */
1448 orig_code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
1449 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
1450 vec_mode = TYPE_MODE (vectype);
1452 else
1454 /* Regular reduction: use the same vectype and tree-code as used for
1455 the vector code inside the loop can be used for the epilog code. */
1456 orig_code = code;
1459 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
1460 return false;
1461 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
1462 if (!reduc_optab)
1464 if (vect_print_dump_info (REPORT_DETAILS))
1465 fprintf (vect_dump, "no optab for reduction.");
1466 epilog_reduc_code = NUM_TREE_CODES;
1468 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1470 if (vect_print_dump_info (REPORT_DETAILS))
1471 fprintf (vect_dump, "reduc op not supported by target.");
1472 epilog_reduc_code = NUM_TREE_CODES;
1475 if (!vec_stmt) /* transformation not required. */
1477 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1478 return true;
1481 /** Transform. **/
1483 if (vect_print_dump_info (REPORT_DETAILS))
1484 fprintf (vect_dump, "transform reduction.");
1486 /* Create the destination vector */
1487 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1489 /* Create the reduction-phi that defines the reduction-operand. */
1490 new_phi = create_phi_node (vec_dest, loop->header);
1492 /* In case the vectorization factor (VF) is bigger than the number
1493 of elements that we can fit in a vectype (nunits), we have to generate
1494 more than one vector stmt - i.e - we need to "unroll" the
1495 vector stmt by a factor VF/nunits. For more details see documentation
1496 in vectorizable_operation. */
1498 prev_stmt_info = NULL;
1499 for (j = 0; j < ncopies; j++)
1501 /* Handle uses. */
1502 if (j == 0)
1504 op = TREE_OPERAND (operation, 0);
1505 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
1506 if (op_type == ternary_op)
1508 op = TREE_OPERAND (operation, 1);
1509 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1512 /* Get the vector def for the reduction variable from the phi node */
1513 reduc_def = PHI_RESULT (new_phi);
1515 else
1517 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
1518 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
1519 if (op_type == ternary_op)
1520 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
1522 /* Get the vector def for the reduction variable from the vectorized
1523 reduction operation generated in the previous iteration (j-1) */
1524 reduc_def = TREE_OPERAND (new_stmt ,0);
1527 /* Arguments are ready. create the new vector stmt. */
1529 if (op_type == binary_op)
1530 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
1531 else
1532 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
1533 reduc_def);
1534 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest, expr);
1535 new_temp = make_ssa_name (vec_dest, new_stmt);
1536 TREE_OPERAND (new_stmt, 0) = new_temp;
1537 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1539 if (j == 0)
1540 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1541 else
1542 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1543 prev_stmt_info = vinfo_for_stmt (new_stmt);
1546 /* Finalize the reduction-phi (set it's arguments) and create the
1547 epilog reduction code. */
1548 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
1549 return true;
1553 /* Function vectorizable_assignment.
1555 Check if STMT performs an assignment (copy) that can be vectorized.
1556 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1557 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1558 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1560 bool
1561 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1563 tree vec_dest;
1564 tree scalar_dest;
1565 tree op;
1566 tree vec_oprnd;
1567 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1568 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1569 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1570 tree new_temp;
1571 tree def, def_stmt;
1572 enum vect_def_type dt;
1573 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1574 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1576 gcc_assert (ncopies >= 1);
1577 if (ncopies > 1)
1578 return false; /* FORNOW */
1580 /* Is vectorizable assignment? */
1581 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1582 return false;
1584 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1586 if (TREE_CODE (stmt) != MODIFY_EXPR)
1587 return false;
1589 scalar_dest = TREE_OPERAND (stmt, 0);
1590 if (TREE_CODE (scalar_dest) != SSA_NAME)
1591 return false;
1593 op = TREE_OPERAND (stmt, 1);
1594 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1596 if (vect_print_dump_info (REPORT_DETAILS))
1597 fprintf (vect_dump, "use not simple.");
1598 return false;
1601 if (!vec_stmt) /* transformation not required. */
1603 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1604 return true;
1607 /** Transform. **/
1608 if (vect_print_dump_info (REPORT_DETAILS))
1609 fprintf (vect_dump, "transform assignment.");
1611 /* Handle def. */
1612 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1614 /* Handle use. */
1615 op = TREE_OPERAND (stmt, 1);
1616 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1618 /* Arguments are ready. create the new vector stmt. */
1619 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1620 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1621 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1622 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1624 return true;
1628 /* Function vect_min_worthwhile_factor.
1630 For a loop where we could vectorize the operation indicated by CODE,
1631 return the minimum vectorization factor that makes it worthwhile
1632 to use generic vectors. */
1633 static int
1634 vect_min_worthwhile_factor (enum tree_code code)
1636 switch (code)
1638 case PLUS_EXPR:
1639 case MINUS_EXPR:
1640 case NEGATE_EXPR:
1641 return 4;
1643 case BIT_AND_EXPR:
1644 case BIT_IOR_EXPR:
1645 case BIT_XOR_EXPR:
1646 case BIT_NOT_EXPR:
1647 return 2;
1649 default:
1650 return INT_MAX;
1655 /* Function vectorizable_operation.
1657 Check if STMT performs a binary or unary operation that can be vectorized.
1658 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1659 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1660 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1662 bool
1663 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1665 tree vec_dest;
1666 tree scalar_dest;
1667 tree operation;
1668 tree op0, op1 = NULL;
1669 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1670 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1671 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1672 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1673 enum tree_code code;
1674 enum machine_mode vec_mode;
1675 tree new_temp;
1676 int op_type;
1677 optab optab;
1678 int icode;
1679 enum machine_mode optab_op2_mode;
1680 tree def, def_stmt;
1681 enum vect_def_type dt0, dt1;
1682 tree new_stmt;
1683 stmt_vec_info prev_stmt_info;
1684 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
1685 int nunits_out;
1686 tree vectype_out;
1687 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1688 int j;
1690 gcc_assert (ncopies >= 1);
1692 /* Is STMT a vectorizable binary/unary operation? */
1693 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1694 return false;
1696 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1698 if (STMT_VINFO_LIVE_P (stmt_info))
1700 /* FORNOW: not yet supported. */
1701 if (vect_print_dump_info (REPORT_DETAILS))
1702 fprintf (vect_dump, "value used after loop.");
1703 return false;
1706 if (TREE_CODE (stmt) != MODIFY_EXPR)
1707 return false;
1709 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1710 return false;
1712 scalar_dest = TREE_OPERAND (stmt, 0);
1713 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
1714 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1715 if (nunits_out != nunits_in)
1716 return false;
1718 operation = TREE_OPERAND (stmt, 1);
1719 code = TREE_CODE (operation);
1720 optab = optab_for_tree_code (code, vectype);
1722 /* Support only unary or binary operations. */
1723 op_type = TREE_CODE_LENGTH (code);
1724 if (op_type != unary_op && op_type != binary_op)
1726 if (vect_print_dump_info (REPORT_DETAILS))
1727 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1728 return false;
1731 op0 = TREE_OPERAND (operation, 0);
1732 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
1734 if (vect_print_dump_info (REPORT_DETAILS))
1735 fprintf (vect_dump, "use not simple.");
1736 return false;
1739 if (op_type == binary_op)
1741 op1 = TREE_OPERAND (operation, 1);
1742 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
1744 if (vect_print_dump_info (REPORT_DETAILS))
1745 fprintf (vect_dump, "use not simple.");
1746 return false;
1750 /* Supportable by target? */
1751 if (!optab)
1753 if (vect_print_dump_info (REPORT_DETAILS))
1754 fprintf (vect_dump, "no optab.");
1755 return false;
1757 vec_mode = TYPE_MODE (vectype);
1758 icode = (int) optab->handlers[(int) vec_mode].insn_code;
1759 if (icode == CODE_FOR_nothing)
1761 if (vect_print_dump_info (REPORT_DETAILS))
1762 fprintf (vect_dump, "op not supported by target.");
1763 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1764 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1765 < vect_min_worthwhile_factor (code))
1766 return false;
1767 if (vect_print_dump_info (REPORT_DETAILS))
1768 fprintf (vect_dump, "proceeding using word mode.");
1771 /* Worthwhile without SIMD support? */
1772 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1773 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1774 < vect_min_worthwhile_factor (code))
1776 if (vect_print_dump_info (REPORT_DETAILS))
1777 fprintf (vect_dump, "not worthwhile without SIMD support.");
1778 return false;
1781 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1783 /* FORNOW: not yet supported. */
1784 if (!VECTOR_MODE_P (vec_mode))
1785 return false;
1787 /* Invariant argument is needed for a vector shift
1788 by a scalar shift operand. */
1789 optab_op2_mode = insn_data[icode].operand[2].mode;
1790 if (! (VECTOR_MODE_P (optab_op2_mode)
1791 || dt1 == vect_constant_def
1792 || dt1 == vect_invariant_def))
1794 if (vect_print_dump_info (REPORT_DETAILS))
1795 fprintf (vect_dump, "operand mode requires invariant argument.");
1796 return false;
1800 if (!vec_stmt) /* transformation not required. */
1802 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1803 return true;
1806 /** Transform. **/
1808 if (vect_print_dump_info (REPORT_DETAILS))
1809 fprintf (vect_dump, "transform binary/unary operation.");
1811 /* Handle def. */
1812 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1814 /* In case the vectorization factor (VF) is bigger than the number
1815 of elements that we can fit in a vectype (nunits), we have to generate
1816 more than one vector stmt - i.e - we need to "unroll" the
1817 vector stmt by a factor VF/nunits. In doing so, we record a pointer
1818 from one copy of the vector stmt to the next, in the field
1819 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
1820 stages to find the correct vector defs to be used when vectorizing
1821 stmts that use the defs of the current stmt. The example below illustrates
1822 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
1823 4 vectorized stmts):
1825 before vectorization:
1826 RELATED_STMT VEC_STMT
1827 S1: x = memref - -
1828 S2: z = x + 1 - -
1830 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
1831 there):
1832 RELATED_STMT VEC_STMT
1833 VS1_0: vx0 = memref0 VS1_1 -
1834 VS1_1: vx1 = memref1 VS1_2 -
1835 VS1_2: vx2 = memref2 VS1_3 -
1836 VS1_3: vx3 = memref3 - -
1837 S1: x = load - VS1_0
1838 S2: z = x + 1 - -
1840 step2: vectorize stmt S2 (done here):
1841 To vectorize stmt S2 we first need to find the relevant vector
1842 def for the first operand 'x'. This is, as usual, obtained from
1843 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
1844 that defines 'x' (S1). This way we find the stmt VS1_0, and the
1845 relevant vector def 'vx0'. Having found 'vx0' we can generate
1846 the vector stmt VS2_0, and as usual, record it in the
1847 STMT_VINFO_VEC_STMT of stmt S2.
1848 When creating the second copy (VS2_1), we obtain the relevant vector
1849 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
1850 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
1851 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
1852 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
1853 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
1854 chain of stmts and pointers:
1855 RELATED_STMT VEC_STMT
1856 VS1_0: vx0 = memref0 VS1_1 -
1857 VS1_1: vx1 = memref1 VS1_2 -
1858 VS1_2: vx2 = memref2 VS1_3 -
1859 VS1_3: vx3 = memref3 - -
1860 S1: x = load - VS1_0
1861 VS2_0: vz0 = vx0 + v1 VS2_1 -
1862 VS2_1: vz1 = vx1 + v1 VS2_2 -
1863 VS2_2: vz2 = vx2 + v1 VS2_3 -
1864 VS2_3: vz3 = vx3 + v1 - -
1865 S2: z = x + 1 - VS2_0 */
1867 prev_stmt_info = NULL;
1868 for (j = 0; j < ncopies; j++)
1870 /* Handle uses. */
1871 if (j == 0)
1873 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1874 if (op_type == binary_op)
1876 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1878 /* Vector shl and shr insn patterns can be defined with
1879 scalar operand 2 (shift operand). In this case, use
1880 constant or loop invariant op1 directly, without
1881 extending it to vector mode first. */
1882 optab_op2_mode = insn_data[icode].operand[2].mode;
1883 if (!VECTOR_MODE_P (optab_op2_mode))
1885 if (vect_print_dump_info (REPORT_DETAILS))
1886 fprintf (vect_dump, "operand 1 using scalar mode.");
1887 vec_oprnd1 = op1;
1890 if (!vec_oprnd1)
1891 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1894 else
1896 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
1897 if (op_type == binary_op)
1898 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
1901 /* Arguments are ready. create the new vector stmt. */
1903 if (op_type == binary_op)
1904 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest,
1905 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1906 else
1907 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest,
1908 build1 (code, vectype, vec_oprnd0));
1909 new_temp = make_ssa_name (vec_dest, new_stmt);
1910 TREE_OPERAND (new_stmt, 0) = new_temp;
1911 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1913 if (j == 0)
1914 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1915 else
1916 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1917 prev_stmt_info = vinfo_for_stmt (new_stmt);
1920 return true;
1924 /* Function vectorizable_type_demotion
1926 Check if STMT performs a binary or unary operation that involves
1927 type demotion, and if it can be vectorized.
1928 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1929 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1930 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1932 bool
1933 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
1934 tree *vec_stmt)
1936 tree vec_dest;
1937 tree scalar_dest;
1938 tree operation;
1939 tree op0;
1940 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
1941 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1942 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1943 enum tree_code code;
1944 tree new_temp;
1945 tree def, def_stmt;
1946 enum vect_def_type dt0;
1947 tree new_stmt;
1948 stmt_vec_info prev_stmt_info;
1949 int nunits_in;
1950 int nunits_out;
1951 tree vectype_out;
1952 int ncopies;
1953 int j;
1954 tree expr;
1955 tree vectype_in;
1956 tree scalar_type;
1957 optab optab;
1958 enum machine_mode vec_mode;
1960 /* Is STMT a vectorizable type-demotion operation? */
1962 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1963 return false;
1965 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1967 if (STMT_VINFO_LIVE_P (stmt_info))
1969 /* FORNOW: not yet supported. */
1970 if (vect_print_dump_info (REPORT_DETAILS))
1971 fprintf (vect_dump, "value used after loop.");
1972 return false;
1975 if (TREE_CODE (stmt) != MODIFY_EXPR)
1976 return false;
1978 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1979 return false;
1981 operation = TREE_OPERAND (stmt, 1);
1982 code = TREE_CODE (operation);
1983 if (code != NOP_EXPR && code != CONVERT_EXPR)
1984 return false;
1986 op0 = TREE_OPERAND (operation, 0);
1987 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
1988 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1990 scalar_dest = TREE_OPERAND (stmt, 0);
1991 scalar_type = TREE_TYPE (scalar_dest);
1992 vectype_out = get_vectype_for_scalar_type (scalar_type);
1993 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1994 if (nunits_in != nunits_out / 2) /* FORNOW */
1995 return false;
1997 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1998 gcc_assert (ncopies >= 1);
2000 /* Check the operands of the operation. */
2001 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2003 if (vect_print_dump_info (REPORT_DETAILS))
2004 fprintf (vect_dump, "use not simple.");
2005 return false;
2008 /* Supportable by target? */
2009 code = VEC_PACK_MOD_EXPR;
2010 optab = optab_for_tree_code (VEC_PACK_MOD_EXPR, vectype_in);
2011 if (!optab)
2012 return false;
2014 vec_mode = TYPE_MODE (vectype_in);
2015 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2016 return false;
2018 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2020 if (!vec_stmt) /* transformation not required. */
2022 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2023 return true;
2026 /** Transform. **/
2028 if (vect_print_dump_info (REPORT_DETAILS))
2029 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2030 ncopies);
2032 /* Handle def. */
2033 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2035 /* In case the vectorization factor (VF) is bigger than the number
2036 of elements that we can fit in a vectype (nunits), we have to generate
2037 more than one vector stmt - i.e - we need to "unroll" the
2038 vector stmt by a factor VF/nunits. */
2039 prev_stmt_info = NULL;
2040 for (j = 0; j < ncopies; j++)
2042 /* Handle uses. */
2043 if (j == 0)
2045 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2046 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2047 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd0);
2049 else
2051 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
2052 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2055 /* Arguments are ready. Create the new vector stmt. */
2056 expr = build2 (code, vectype_out, vec_oprnd0, vec_oprnd1);
2057 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest, expr);
2058 new_temp = make_ssa_name (vec_dest, new_stmt);
2059 TREE_OPERAND (new_stmt, 0) = new_temp;
2060 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2062 if (j == 0)
2063 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2064 else
2065 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2067 prev_stmt_info = vinfo_for_stmt (new_stmt);
2070 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2071 return true;
2075 /* Function vect_gen_widened_results_half
2077 Create a vector stmt whose code, type, number of arguments, and result
2078 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
2079 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2080 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2081 needs to be created (DECL is a function-decl of a target-builtin).
2082 STMT is the original scalar stmt that we are vectorizing. */
2084 static tree
2085 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
2086 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2087 tree vec_dest, block_stmt_iterator *bsi,
2088 tree stmt)
2090 tree vec_params;
2091 tree expr;
2092 tree new_stmt;
2093 tree new_temp;
2094 tree sym;
2095 ssa_op_iter iter;
2097 /* Generate half of the widened result: */
2098 if (code == CALL_EXPR)
2100 /* Target specific support */
2101 vec_params = build_tree_list (NULL_TREE, vec_oprnd0);
2102 if (op_type == binary_op)
2103 vec_params = tree_cons (NULL_TREE, vec_oprnd1, vec_params);
2104 expr = build_function_call_expr (decl, vec_params);
2106 else
2108 /* Generic support */
2109 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2110 if (op_type == binary_op)
2111 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
2112 else
2113 expr = build1 (code, vectype, vec_oprnd0);
2115 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest, expr);
2116 new_temp = make_ssa_name (vec_dest, new_stmt);
2117 TREE_OPERAND (new_stmt, 0) = new_temp;
2118 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2120 if (code == CALL_EXPR)
2122 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2124 if (TREE_CODE (sym) == SSA_NAME)
2125 sym = SSA_NAME_VAR (sym);
2126 mark_sym_for_renaming (sym);
2130 return new_stmt;
2134 /* Function vectorizable_type_promotion
2136 Check if STMT performs a binary or unary operation that involves
2137 type promotion, and if it can be vectorized.
2138 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2139 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2140 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2142 bool
2143 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
2144 tree *vec_stmt)
2146 tree vec_dest;
2147 tree scalar_dest;
2148 tree operation;
2149 tree op0, op1 = NULL;
2150 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2151 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2152 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2153 enum tree_code code, code1 = CODE_FOR_nothing, code2 = CODE_FOR_nothing;
2154 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2155 int op_type;
2156 tree def, def_stmt;
2157 enum vect_def_type dt0, dt1;
2158 tree new_stmt;
2159 stmt_vec_info prev_stmt_info;
2160 int nunits_in;
2161 int nunits_out;
2162 tree vectype_out;
2163 int ncopies;
2164 int j;
2165 tree vectype_in;
2167 /* Is STMT a vectorizable type-promotion operation? */
2169 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2170 return false;
2172 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2174 if (STMT_VINFO_LIVE_P (stmt_info))
2176 /* FORNOW: not yet supported. */
2177 if (vect_print_dump_info (REPORT_DETAILS))
2178 fprintf (vect_dump, "value used after loop.");
2179 return false;
2182 if (TREE_CODE (stmt) != MODIFY_EXPR)
2183 return false;
2185 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2186 return false;
2188 operation = TREE_OPERAND (stmt, 1);
2189 code = TREE_CODE (operation);
2190 if (code != NOP_EXPR && code != WIDEN_MULT_EXPR)
2191 return false;
2193 op0 = TREE_OPERAND (operation, 0);
2194 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2195 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2196 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2197 gcc_assert (ncopies >= 1);
2199 scalar_dest = TREE_OPERAND (stmt, 0);
2200 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2201 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2202 if (nunits_out != nunits_in / 2) /* FORNOW */
2203 return false;
2205 /* Check the operands of the operation. */
2206 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2208 if (vect_print_dump_info (REPORT_DETAILS))
2209 fprintf (vect_dump, "use not simple.");
2210 return false;
2213 op_type = TREE_CODE_LENGTH (code);
2214 if (op_type == binary_op)
2216 op1 = TREE_OPERAND (operation, 1);
2217 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
2219 if (vect_print_dump_info (REPORT_DETAILS))
2220 fprintf (vect_dump, "use not simple.");
2221 return false;
2225 /* Supportable by target? */
2226 if (!supportable_widening_operation (code, stmt, vectype_in,
2227 &decl1, &decl2, &code1, &code2))
2228 return false;
2230 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2232 if (!vec_stmt) /* transformation not required. */
2234 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2235 return true;
2238 /** Transform. **/
2240 if (vect_print_dump_info (REPORT_DETAILS))
2241 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2242 ncopies);
2244 /* Handle def. */
2245 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2247 /* In case the vectorization factor (VF) is bigger than the number
2248 of elements that we can fit in a vectype (nunits), we have to generate
2249 more than one vector stmt - i.e - we need to "unroll" the
2250 vector stmt by a factor VF/nunits. */
2252 prev_stmt_info = NULL;
2253 for (j = 0; j < ncopies; j++)
2255 /* Handle uses. */
2256 if (j == 0)
2258 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2259 if (op_type == binary_op)
2260 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2262 else
2264 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2265 if (op_type == binary_op)
2266 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
2269 /* Arguments are ready. Create the new vector stmt. We are creating
2270 two vector defs because the widened result does not fit in one vector.
2271 The vectorized stmt can be expressed as a call to a taregt builtin,
2272 or a using a tree-code. */
2273 /* Generate first half of the widened result: */
2274 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
2275 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
2276 if (j == 0)
2277 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2278 else
2279 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2280 prev_stmt_info = vinfo_for_stmt (new_stmt);
2282 /* Generate second half of the widened result: */
2283 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
2284 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
2285 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2286 prev_stmt_info = vinfo_for_stmt (new_stmt);
2290 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2291 return true;
2295 /* Function vectorizable_store.
2297 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2298 can be vectorized.
2299 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2300 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2301 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2303 bool
2304 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2306 tree scalar_dest;
2307 tree data_ref;
2308 tree op;
2309 tree vec_oprnd = NULL_TREE;
2310 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2311 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2312 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2313 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2314 enum machine_mode vec_mode;
2315 tree dummy;
2316 enum dr_alignment_support alignment_support_cheme;
2317 ssa_op_iter iter;
2318 def_operand_p def_p;
2319 tree def, def_stmt;
2320 enum vect_def_type dt;
2321 stmt_vec_info prev_stmt_info;
2322 tree dataref_ptr = NULL_TREE;
2323 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2324 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2325 int j;
2327 gcc_assert (ncopies >= 1);
2329 /* Is vectorizable store? */
2331 if (TREE_CODE (stmt) != MODIFY_EXPR)
2332 return false;
2334 scalar_dest = TREE_OPERAND (stmt, 0);
2335 if (TREE_CODE (scalar_dest) != ARRAY_REF
2336 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2337 return false;
2339 op = TREE_OPERAND (stmt, 1);
2340 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2342 if (vect_print_dump_info (REPORT_DETAILS))
2343 fprintf (vect_dump, "use not simple.");
2344 return false;
2347 vec_mode = TYPE_MODE (vectype);
2348 /* FORNOW. In some cases can vectorize even if data-type not supported
2349 (e.g. - array initialization with 0). */
2350 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2351 return false;
2353 if (!STMT_VINFO_DATA_REF (stmt_info))
2354 return false;
2357 if (!vec_stmt) /* transformation not required. */
2359 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2360 return true;
2363 /** Transform. **/
2365 if (vect_print_dump_info (REPORT_DETAILS))
2366 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
2368 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2369 gcc_assert (alignment_support_cheme);
2370 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
2372 /* In case the vectorization factor (VF) is bigger than the number
2373 of elements that we can fit in a vectype (nunits), we have to generate
2374 more than one vector stmt - i.e - we need to "unroll" the
2375 vector stmt by a factor VF/nunits. For more details see documentation in
2376 vect_get_vec_def_for_copy_stmt. */
2378 prev_stmt_info = NULL;
2379 for (j = 0; j < ncopies; j++)
2381 tree new_stmt;
2382 tree ptr_incr;
2384 if (j == 0)
2386 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
2387 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy,
2388 &ptr_incr, false);
2390 else
2392 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2393 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
2396 /* Arguments are ready. create the new vector stmt. */
2397 data_ref = build_fold_indirect_ref (dataref_ptr);
2398 new_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd);
2399 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2401 /* Set the V_MAY_DEFS for the vector pointer. If this virtual def has a
2402 use outside the loop and a loop peel is performed then the def may be
2403 renamed by the peel. Mark it for renaming so the later use will also
2404 be renamed. */
2405 copy_virtual_operands (new_stmt, stmt);
2406 if (j == 0)
2408 /* The original store is deleted so the same SSA_NAMEs can be used.
2410 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
2412 SSA_NAME_DEF_STMT (def) = new_stmt;
2413 mark_sym_for_renaming (SSA_NAME_VAR (def));
2416 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2418 else
2420 /* Create new names for all the definitions created by COPY and
2421 add replacement mappings for each new name. */
2422 FOR_EACH_SSA_DEF_OPERAND (def_p, new_stmt, iter, SSA_OP_VMAYDEF)
2424 create_new_def_for (DEF_FROM_PTR (def_p), new_stmt, def_p);
2425 mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p)));
2428 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2431 prev_stmt_info = vinfo_for_stmt (new_stmt);
2434 return true;
2438 /* Function vect_setup_realignment
2440 This function is called when vectorizing an unaligned load using
2441 the dr_unaligned_software_pipeline scheme.
2442 This function generates the following code at the loop prolog:
2444 p = initial_addr;
2445 msq_init = *(floor(p)); # prolog load
2446 realignment_token = call target_builtin;
2447 loop:
2448 msq = phi (msq_init, ---)
2450 The code above sets up a new (vector) pointer, pointing to the first
2451 location accessed by STMT, and a "floor-aligned" load using that pointer.
2452 It also generates code to compute the "realignment-token" (if the relevant
2453 target hook was defined), and creates a phi-node at the loop-header bb
2454 whose arguments are the result of the prolog-load (created by this
2455 function) and the result of a load that takes place in the loop (to be
2456 created by the caller to this function).
2457 The caller to this function uses the phi-result (msq) to create the
2458 realignment code inside the loop, and sets up the missing phi argument,
2459 as follows:
2461 loop:
2462 msq = phi (msq_init, lsq)
2463 lsq = *(floor(p')); # load in loop
2464 result = realign_load (msq, lsq, realignment_token);
2466 Input:
2467 STMT - (scalar) load stmt to be vectorized. This load accesses
2468 a memory location that may be unaligned.
2469 BSI - place where new code is to be inserted.
2471 Output:
2472 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2473 target hook, if defined.
2474 Return value - the result of the loop-header phi node.
2477 static tree
2478 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
2479 tree *realignment_token)
2481 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2482 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2483 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2484 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2485 edge pe = loop_preheader_edge (loop);
2486 tree scalar_dest = TREE_OPERAND (stmt, 0);
2487 tree vec_dest;
2488 tree init_addr;
2489 tree inc;
2490 tree ptr;
2491 tree data_ref;
2492 tree new_stmt;
2493 basic_block new_bb;
2494 tree msq_init;
2495 tree new_temp;
2496 tree phi_stmt;
2497 tree msq;
2499 /* 1. Create msq_init = *(floor(p1)) in the loop preheader */
2500 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2501 ptr = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &init_addr, &inc, true);
2502 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
2503 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2504 new_temp = make_ssa_name (vec_dest, new_stmt);
2505 TREE_OPERAND (new_stmt, 0) = new_temp;
2506 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2507 gcc_assert (!new_bb);
2508 msq_init = TREE_OPERAND (new_stmt, 0);
2509 copy_virtual_operands (new_stmt, stmt);
2510 update_vuses_to_preheader (new_stmt, loop);
2512 /* 2. Create permutation mask, if required, in loop preheader. */
2513 if (targetm.vectorize.builtin_mask_for_load)
2515 tree builtin_decl;
2516 tree params = build_tree_list (NULL_TREE, init_addr);
2518 vec_dest = vect_create_destination_var (scalar_dest,
2519 TREE_TYPE (new_stmt));
2520 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2521 new_stmt = build_function_call_expr (builtin_decl, params);
2522 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest, new_stmt);
2523 new_temp = make_ssa_name (vec_dest, new_stmt);
2524 TREE_OPERAND (new_stmt, 0) = new_temp;
2525 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2526 gcc_assert (!new_bb);
2527 *realignment_token = TREE_OPERAND (new_stmt, 0);
2529 /* The result of the CALL_EXPR to this builtin is determined from
2530 the value of the parameter and no global variables are touched
2531 which makes the builtin a "const" function. Requiring the
2532 builtin to have the "const" attribute makes it unnecessary
2533 to call mark_call_clobbered. */
2534 gcc_assert (TREE_READONLY (builtin_decl));
2537 /* 3. Create msq = phi <msq_init, lsq> in loop */
2538 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2539 msq = make_ssa_name (vec_dest, NULL_TREE);
2540 phi_stmt = create_phi_node (msq, loop->header);
2541 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2542 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2544 return msq;
2548 /* vectorizable_load.
2550 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2551 can be vectorized.
2552 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2553 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2554 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2556 bool
2557 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2559 tree scalar_dest;
2560 tree vec_dest = NULL;
2561 tree data_ref = NULL;
2562 tree op;
2563 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2564 stmt_vec_info prev_stmt_info;
2565 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2566 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2567 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2568 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2569 tree new_temp;
2570 int mode;
2571 tree new_stmt;
2572 tree dummy;
2573 enum dr_alignment_support alignment_support_cheme;
2574 tree dataref_ptr = NULL_TREE;
2575 tree ptr_incr;
2576 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2577 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2578 int j;
2579 tree msq = NULL_TREE, lsq;
2580 tree offset = NULL_TREE;
2581 tree realignment_token = NULL_TREE;
2582 tree phi_stmt = NULL_TREE;
2584 /* Is vectorizable load? */
2585 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2586 return false;
2588 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2590 if (STMT_VINFO_LIVE_P (stmt_info))
2592 /* FORNOW: not yet supported. */
2593 if (vect_print_dump_info (REPORT_DETAILS))
2594 fprintf (vect_dump, "value used after loop.");
2595 return false;
2598 if (TREE_CODE (stmt) != MODIFY_EXPR)
2599 return false;
2601 scalar_dest = TREE_OPERAND (stmt, 0);
2602 if (TREE_CODE (scalar_dest) != SSA_NAME)
2603 return false;
2605 op = TREE_OPERAND (stmt, 1);
2606 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2607 return false;
2609 if (!STMT_VINFO_DATA_REF (stmt_info))
2610 return false;
2612 mode = (int) TYPE_MODE (vectype);
2614 /* FORNOW. In some cases can vectorize even if data-type not supported
2615 (e.g. - data copies). */
2616 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2618 if (vect_print_dump_info (REPORT_DETAILS))
2619 fprintf (vect_dump, "Aligned load, but unsupported type.");
2620 return false;
2623 if (!vec_stmt) /* transformation not required. */
2625 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2626 return true;
2629 /** Transform. **/
2631 if (vect_print_dump_info (REPORT_DETAILS))
2632 fprintf (vect_dump, "transform load.");
2634 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2635 gcc_assert (alignment_support_cheme);
2637 /* In case the vectorization factor (VF) is bigger than the number
2638 of elements that we can fit in a vectype (nunits), we have to generate
2639 more than one vector stmt - i.e - we need to "unroll" the
2640 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2641 from one copy of the vector stmt to the next, in the field
2642 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2643 stages to find the correct vector defs to be used when vectorizing
2644 stmts that use the defs of the current stmt. The example below illustrates
2645 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2646 4 vectorized stmts):
2648 before vectorization:
2649 RELATED_STMT VEC_STMT
2650 S1: x = memref - -
2651 S2: z = x + 1 - -
2653 step 1: vectorize stmt S1:
2654 We first create the vector stmt VS1_0, and, as usual, record a
2655 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
2656 Next, we create the vector stmt VS1_1, and record a pointer to
2657 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
2658 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
2659 stmts and pointers:
2660 RELATED_STMT VEC_STMT
2661 VS1_0: vx0 = memref0 VS1_1 -
2662 VS1_1: vx1 = memref1 VS1_2 -
2663 VS1_2: vx2 = memref2 VS1_3 -
2664 VS1_3: vx3 = memref3 - -
2665 S1: x = load - VS1_0
2666 S2: z = x + 1 - -
2668 See in documentation in vect_get_vec_def_for_stmt_copy for how the
2669 information we recorded in RELATED_STMT field is used to vectorize
2670 stmt S2. */
2672 /* If the data reference is aligned (dr_aligned) or potentially unaligned
2673 on a target that supports unaligned accesses (dr_unaligned_supported)
2674 we generate the following code:
2675 p = initial_addr;
2676 indx = 0;
2677 loop {
2678 p = p + indx * vectype_size;
2679 vec_dest = *(p);
2680 indx = indx + 1;
2683 Otherwise, the data reference is potentially unaligned on a target that
2684 does not support unaligned accesses (dr_unaligned_software_pipeline) -
2685 then generate the following code, in which the data in each iteration is
2686 obtained by two vector loads, one from the previous iteration, and one
2687 from the current iteration:
2688 p1 = initial_addr;
2689 msq_init = *(floor(p1))
2690 p2 = initial_addr + VS - 1;
2691 realignment_token = call target_builtin;
2692 indx = 0;
2693 loop {
2694 p2 = p2 + indx * vectype_size
2695 lsq = *(floor(p2))
2696 vec_dest = realign_load (msq, lsq, realignment_token)
2697 indx = indx + 1;
2698 msq = lsq;
2702 if (alignment_support_cheme == dr_unaligned_software_pipeline)
2704 msq = vect_setup_realignment (stmt, bsi, &realignment_token);
2705 phi_stmt = SSA_NAME_DEF_STMT (msq);
2706 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
2709 prev_stmt_info = NULL;
2710 for (j = 0; j < ncopies; j++)
2712 /* 1. Create the vector pointer update chain. */
2713 if (j == 0)
2714 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset,
2715 &dummy, &ptr_incr, false);
2716 else
2717 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
2719 /* 2. Create the vector-load in the loop. */
2720 switch (alignment_support_cheme)
2722 case dr_aligned:
2723 gcc_assert (aligned_access_p (dr));
2724 data_ref = build_fold_indirect_ref (dataref_ptr);
2725 break;
2726 case dr_unaligned_supported:
2728 int mis = DR_MISALIGNMENT (dr);
2729 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
2731 gcc_assert (!aligned_access_p (dr));
2732 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
2733 data_ref =
2734 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
2735 break;
2737 case dr_unaligned_software_pipeline:
2738 gcc_assert (!aligned_access_p (dr));
2739 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2740 break;
2741 default:
2742 gcc_unreachable ();
2744 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2745 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2746 new_temp = make_ssa_name (vec_dest, new_stmt);
2747 TREE_OPERAND (new_stmt, 0) = new_temp;
2748 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2749 copy_virtual_operands (new_stmt, stmt);
2750 mark_new_vars_to_rename (new_stmt);
2752 /* 3. Handle explicit realignment if necessary/supported. */
2753 if (alignment_support_cheme == dr_unaligned_software_pipeline)
2755 /* Create in loop:
2756 <vec_dest = realign_load (msq, lsq, realignment_token)> */
2757 lsq = TREE_OPERAND (new_stmt, 0);
2758 if (!realignment_token)
2759 realignment_token = dataref_ptr;
2760 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2761 new_stmt =
2762 build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, realignment_token);
2763 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2764 new_temp = make_ssa_name (vec_dest, new_stmt);
2765 TREE_OPERAND (new_stmt, 0) = new_temp;
2766 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2767 if (j == ncopies - 1)
2768 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2769 msq = lsq;
2772 if (j == 0)
2773 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2774 else
2775 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2776 prev_stmt_info = vinfo_for_stmt (new_stmt);
2779 return true;
2783 /* Function vectorizable_live_operation.
2785 STMT computes a value that is used outside the loop. Check if
2786 it can be supported. */
2788 bool
2789 vectorizable_live_operation (tree stmt,
2790 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
2791 tree *vec_stmt ATTRIBUTE_UNUSED)
2793 tree operation;
2794 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2795 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2796 int i;
2797 enum tree_code code;
2798 int op_type;
2799 tree op;
2800 tree def, def_stmt;
2801 enum vect_def_type dt;
2803 if (!STMT_VINFO_LIVE_P (stmt_info))
2804 return false;
2806 if (TREE_CODE (stmt) != MODIFY_EXPR)
2807 return false;
2809 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2810 return false;
2812 operation = TREE_OPERAND (stmt, 1);
2813 code = TREE_CODE (operation);
2815 op_type = TREE_CODE_LENGTH (code);
2817 /* FORNOW: support only if all uses are invariant. This means
2818 that the scalar operations can remain in place, unvectorized.
2819 The original last scalar value that they compute will be used. */
2821 for (i = 0; i < op_type; i++)
2823 op = TREE_OPERAND (operation, i);
2824 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2826 if (vect_print_dump_info (REPORT_DETAILS))
2827 fprintf (vect_dump, "use not simple.");
2828 return false;
2831 if (dt != vect_invariant_def && dt != vect_constant_def)
2832 return false;
2835 /* No transformation is required for the cases we currently support. */
2836 return true;
2840 /* Function vect_is_simple_cond.
2842 Input:
2843 LOOP - the loop that is being vectorized.
2844 COND - Condition that is checked for simple use.
2846 Returns whether a COND can be vectorized. Checks whether
2847 condition operands are supportable using vec_is_simple_use. */
2849 static bool
2850 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
2852 tree lhs, rhs;
2853 tree def;
2854 enum vect_def_type dt;
2856 if (!COMPARISON_CLASS_P (cond))
2857 return false;
2859 lhs = TREE_OPERAND (cond, 0);
2860 rhs = TREE_OPERAND (cond, 1);
2862 if (TREE_CODE (lhs) == SSA_NAME)
2864 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
2865 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
2866 return false;
2868 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
2869 return false;
2871 if (TREE_CODE (rhs) == SSA_NAME)
2873 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
2874 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
2875 return false;
2877 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
2878 return false;
2880 return true;
2883 /* vectorizable_condition.
2885 Check if STMT is conditional modify expression that can be vectorized.
2886 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2887 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
2888 at BSI.
2890 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2892 bool
2893 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2895 tree scalar_dest = NULL_TREE;
2896 tree vec_dest = NULL_TREE;
2897 tree op = NULL_TREE;
2898 tree cond_expr, then_clause, else_clause;
2899 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2900 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2901 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
2902 tree vec_compare, vec_cond_expr;
2903 tree new_temp;
2904 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2905 enum machine_mode vec_mode;
2906 tree def;
2907 enum vect_def_type dt;
2908 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2909 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2911 gcc_assert (ncopies >= 1);
2912 if (ncopies > 1)
2913 return false; /* FORNOW */
2915 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2916 return false;
2918 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2920 if (STMT_VINFO_LIVE_P (stmt_info))
2922 /* FORNOW: not yet supported. */
2923 if (vect_print_dump_info (REPORT_DETAILS))
2924 fprintf (vect_dump, "value used after loop.");
2925 return false;
2928 if (TREE_CODE (stmt) != MODIFY_EXPR)
2929 return false;
2931 op = TREE_OPERAND (stmt, 1);
2933 if (TREE_CODE (op) != COND_EXPR)
2934 return false;
2936 cond_expr = TREE_OPERAND (op, 0);
2937 then_clause = TREE_OPERAND (op, 1);
2938 else_clause = TREE_OPERAND (op, 2);
2940 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
2941 return false;
2943 /* We do not handle two different vector types for the condition
2944 and the values. */
2945 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
2946 return false;
2948 if (TREE_CODE (then_clause) == SSA_NAME)
2950 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
2951 if (!vect_is_simple_use (then_clause, loop_vinfo,
2952 &then_def_stmt, &def, &dt))
2953 return false;
2955 else if (TREE_CODE (then_clause) != INTEGER_CST
2956 && TREE_CODE (then_clause) != REAL_CST)
2957 return false;
2959 if (TREE_CODE (else_clause) == SSA_NAME)
2961 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
2962 if (!vect_is_simple_use (else_clause, loop_vinfo,
2963 &else_def_stmt, &def, &dt))
2964 return false;
2966 else if (TREE_CODE (else_clause) != INTEGER_CST
2967 && TREE_CODE (else_clause) != REAL_CST)
2968 return false;
2971 vec_mode = TYPE_MODE (vectype);
2973 if (!vec_stmt)
2975 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
2976 return expand_vec_cond_expr_p (op, vec_mode);
2979 /* Transform */
2981 /* Handle def. */
2982 scalar_dest = TREE_OPERAND (stmt, 0);
2983 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2985 /* Handle cond expr. */
2986 vec_cond_lhs =
2987 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
2988 vec_cond_rhs =
2989 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
2990 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
2991 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
2993 /* Arguments are ready. create the new vector stmt. */
2994 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
2995 vec_cond_lhs, vec_cond_rhs);
2996 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
2997 vec_compare, vec_then_clause, vec_else_clause);
2999 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
3000 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3001 TREE_OPERAND (*vec_stmt, 0) = new_temp;
3002 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
3004 return true;
3007 /* Function vect_transform_stmt.
3009 Create a vectorized stmt to replace STMT, and insert it at BSI. */
3011 bool
3012 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
3014 bool is_store = false;
3015 tree vec_stmt = NULL_TREE;
3016 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3017 tree orig_stmt_in_pattern;
3018 bool done;
3020 if (STMT_VINFO_RELEVANT_P (stmt_info))
3022 switch (STMT_VINFO_TYPE (stmt_info))
3024 case type_demotion_vec_info_type:
3025 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
3026 gcc_assert (done);
3027 break;
3029 case type_promotion_vec_info_type:
3030 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
3031 gcc_assert (done);
3032 break;
3034 case op_vec_info_type:
3035 done = vectorizable_operation (stmt, bsi, &vec_stmt);
3036 gcc_assert (done);
3037 break;
3039 case assignment_vec_info_type:
3040 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
3041 gcc_assert (done);
3042 break;
3044 case load_vec_info_type:
3045 done = vectorizable_load (stmt, bsi, &vec_stmt);
3046 gcc_assert (done);
3047 break;
3049 case store_vec_info_type:
3050 done = vectorizable_store (stmt, bsi, &vec_stmt);
3051 gcc_assert (done);
3052 is_store = true;
3053 break;
3055 case condition_vec_info_type:
3056 done = vectorizable_condition (stmt, bsi, &vec_stmt);
3057 gcc_assert (done);
3058 break;
3060 default:
3061 if (vect_print_dump_info (REPORT_DETAILS))
3062 fprintf (vect_dump, "stmt not supported.");
3063 gcc_unreachable ();
3066 gcc_assert (vec_stmt);
3067 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
3068 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
3069 if (orig_stmt_in_pattern)
3071 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
3072 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3074 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
3076 /* STMT was inserted by the vectorizer to replace a computation
3077 idiom. ORIG_STMT_IN_PATTERN is a stmt in the original
3078 sequence that computed this idiom. We need to record a pointer
3079 to VEC_STMT in the stmt_info of ORIG_STMT_IN_PATTERN. See more
3080 detail in the documentation of vect_pattern_recog. */
3082 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
3087 if (STMT_VINFO_LIVE_P (stmt_info))
3089 switch (STMT_VINFO_TYPE (stmt_info))
3091 case reduc_vec_info_type:
3092 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
3093 gcc_assert (done);
3094 break;
3096 default:
3097 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
3098 gcc_assert (done);
3102 return is_store;
3106 /* This function builds ni_name = number of iterations loop executes
3107 on the loop preheader. */
3109 static tree
3110 vect_build_loop_niters (loop_vec_info loop_vinfo)
3112 tree ni_name, stmt, var;
3113 edge pe;
3114 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3115 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
3117 var = create_tmp_var (TREE_TYPE (ni), "niters");
3118 add_referenced_var (var);
3119 ni_name = force_gimple_operand (ni, &stmt, false, var);
3121 pe = loop_preheader_edge (loop);
3122 if (stmt)
3124 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3125 gcc_assert (!new_bb);
3128 return ni_name;
3132 /* This function generates the following statements:
3134 ni_name = number of iterations loop executes
3135 ratio = ni_name / vf
3136 ratio_mult_vf_name = ratio * vf
3138 and places them at the loop preheader edge. */
3140 static void
3141 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
3142 tree *ni_name_ptr,
3143 tree *ratio_mult_vf_name_ptr,
3144 tree *ratio_name_ptr)
3147 edge pe;
3148 basic_block new_bb;
3149 tree stmt, ni_name;
3150 tree var;
3151 tree ratio_name;
3152 tree ratio_mult_vf_name;
3153 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3154 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3155 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3156 tree log_vf;
3158 pe = loop_preheader_edge (loop);
3160 /* Generate temporary variable that contains
3161 number of iterations loop executes. */
3163 ni_name = vect_build_loop_niters (loop_vinfo);
3164 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
3166 /* Create: ratio = ni >> log2(vf) */
3168 var = create_tmp_var (TREE_TYPE (ni), "bnd");
3169 add_referenced_var (var);
3170 ratio_name = make_ssa_name (var, NULL_TREE);
3171 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
3172 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
3173 SSA_NAME_DEF_STMT (ratio_name) = stmt;
3175 pe = loop_preheader_edge (loop);
3176 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3177 gcc_assert (!new_bb);
3179 /* Create: ratio_mult_vf = ratio << log2 (vf). */
3181 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
3182 add_referenced_var (var);
3183 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
3184 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
3185 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
3186 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
3188 pe = loop_preheader_edge (loop);
3189 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3190 gcc_assert (!new_bb);
3192 *ni_name_ptr = ni_name;
3193 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
3194 *ratio_name_ptr = ratio_name;
3196 return;
3200 /* Function update_vuses_to_preheader.
3202 Input:
3203 STMT - a statement with potential VUSEs.
3204 LOOP - the loop whose preheader will contain STMT.
3206 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
3207 appears to be defined in a V_MAY_DEF in another statement in a loop.
3208 One such case is when the VUSE is at the dereference of a __restricted__
3209 pointer in a load and the V_MAY_DEF is at the dereference of a different
3210 __restricted__ pointer in a store. Vectorization may result in
3211 copy_virtual_uses being called to copy the problematic VUSE to a new
3212 statement that is being inserted in the loop preheader. This procedure
3213 is called to change the SSA_NAME in the new statement's VUSE from the
3214 SSA_NAME updated in the loop to the related SSA_NAME available on the
3215 path entering the loop.
3217 When this function is called, we have the following situation:
3219 # vuse <name1>
3220 S1: vload
3221 do {
3222 # name1 = phi < name0 , name2>
3224 # vuse <name1>
3225 S2: vload
3227 # name2 = vdef <name1>
3228 S3: vstore
3230 }while...
3232 Stmt S1 was created in the loop preheader block as part of misaligned-load
3233 handling. This function fixes the name of the vuse of S1 from 'name1' to
3234 'name0'. */
3236 static void
3237 update_vuses_to_preheader (tree stmt, struct loop *loop)
3239 basic_block header_bb = loop->header;
3240 edge preheader_e = loop_preheader_edge (loop);
3241 ssa_op_iter iter;
3242 use_operand_p use_p;
3244 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
3246 tree ssa_name = USE_FROM_PTR (use_p);
3247 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
3248 tree name_var = SSA_NAME_VAR (ssa_name);
3249 basic_block bb = bb_for_stmt (def_stmt);
3251 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
3252 if (!IS_EMPTY_STMT (def_stmt)
3253 && flow_bb_inside_loop_p (loop, bb))
3255 /* If the block containing the statement defining the SSA_NAME
3256 is in the loop then it's necessary to find the definition
3257 outside the loop using the PHI nodes of the header. */
3258 tree phi;
3259 bool updated = false;
3261 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
3263 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
3265 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
3266 updated = true;
3267 break;
3270 gcc_assert (updated);
3276 /* Function vect_update_ivs_after_vectorizer.
3278 "Advance" the induction variables of LOOP to the value they should take
3279 after the execution of LOOP. This is currently necessary because the
3280 vectorizer does not handle induction variables that are used after the
3281 loop. Such a situation occurs when the last iterations of LOOP are
3282 peeled, because:
3283 1. We introduced new uses after LOOP for IVs that were not originally used
3284 after LOOP: the IVs of LOOP are now used by an epilog loop.
3285 2. LOOP is going to be vectorized; this means that it will iterate N/VF
3286 times, whereas the loop IVs should be bumped N times.
3288 Input:
3289 - LOOP - a loop that is going to be vectorized. The last few iterations
3290 of LOOP were peeled.
3291 - NITERS - the number of iterations that LOOP executes (before it is
3292 vectorized). i.e, the number of times the ivs should be bumped.
3293 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3294 coming out from LOOP on which there are uses of the LOOP ivs
3295 (this is the path from LOOP->exit to epilog_loop->preheader).
3297 The new definitions of the ivs are placed in LOOP->exit.
3298 The phi args associated with the edge UPDATE_E in the bb
3299 UPDATE_E->dest are updated accordingly.
3301 Assumption 1: Like the rest of the vectorizer, this function assumes
3302 a single loop exit that has a single predecessor.
3304 Assumption 2: The phi nodes in the LOOP header and in update_bb are
3305 organized in the same order.
3307 Assumption 3: The access function of the ivs is simple enough (see
3308 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
3310 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3311 coming out of LOOP on which the ivs of LOOP are used (this is the path
3312 that leads to the epilog loop; other paths skip the epilog loop). This
3313 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3314 needs to have its phis updated.
3317 static void
3318 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
3319 edge update_e)
3321 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3322 basic_block exit_bb = loop->single_exit->dest;
3323 tree phi, phi1;
3324 basic_block update_bb = update_e->dest;
3326 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
3328 /* Make sure there exists a single-predecessor exit bb: */
3329 gcc_assert (single_pred_p (exit_bb));
3331 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
3332 phi && phi1;
3333 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
3335 tree access_fn = NULL;
3336 tree evolution_part;
3337 tree init_expr;
3338 tree step_expr;
3339 tree var, stmt, ni, ni_name;
3340 block_stmt_iterator last_bsi;
3342 if (vect_print_dump_info (REPORT_DETAILS))
3344 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
3345 print_generic_expr (vect_dump, phi, TDF_SLIM);
3348 /* Skip virtual phi's. */
3349 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3351 if (vect_print_dump_info (REPORT_DETAILS))
3352 fprintf (vect_dump, "virtual phi. skip.");
3353 continue;
3356 /* Skip reduction phis. */
3357 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3359 if (vect_print_dump_info (REPORT_DETAILS))
3360 fprintf (vect_dump, "reduc phi. skip.");
3361 continue;
3364 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
3365 gcc_assert (access_fn);
3366 evolution_part =
3367 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
3368 gcc_assert (evolution_part != NULL_TREE);
3370 /* FORNOW: We do not support IVs whose evolution function is a polynomial
3371 of degree >= 2 or exponential. */
3372 gcc_assert (!tree_is_chrec (evolution_part));
3374 step_expr = evolution_part;
3375 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
3376 loop->num));
3378 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3379 build2 (MULT_EXPR, TREE_TYPE (niters),
3380 niters, step_expr), init_expr);
3382 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3383 add_referenced_var (var);
3385 ni_name = force_gimple_operand (ni, &stmt, false, var);
3387 /* Insert stmt into exit_bb. */
3388 last_bsi = bsi_last (exit_bb);
3389 if (stmt)
3390 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3392 /* Fix phi expressions in the successor bb. */
3393 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
3398 /* Function vect_do_peeling_for_loop_bound
3400 Peel the last iterations of the loop represented by LOOP_VINFO.
3401 The peeled iterations form a new epilog loop. Given that the loop now
3402 iterates NITERS times, the new epilog loop iterates
3403 NITERS % VECTORIZATION_FACTOR times.
3405 The original loop will later be made to iterate
3406 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
3408 static void
3409 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3410 struct loops *loops)
3412 tree ni_name, ratio_mult_vf_name;
3413 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3414 struct loop *new_loop;
3415 edge update_e;
3416 basic_block preheader;
3417 int loop_num;
3419 if (vect_print_dump_info (REPORT_DETAILS))
3420 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
3422 initialize_original_copy_tables ();
3424 /* Generate the following variables on the preheader of original loop:
3426 ni_name = number of iteration the original loop executes
3427 ratio = ni_name / vf
3428 ratio_mult_vf_name = ratio * vf */
3429 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3430 &ratio_mult_vf_name, ratio);
3432 loop_num = loop->num;
3433 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
3434 ratio_mult_vf_name, ni_name, false);
3435 gcc_assert (new_loop);
3436 gcc_assert (loop_num == loop->num);
3437 #ifdef ENABLE_CHECKING
3438 slpeel_verify_cfg_after_peeling (loop, new_loop);
3439 #endif
3441 /* A guard that controls whether the new_loop is to be executed or skipped
3442 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3443 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3444 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3445 is on the path where the LOOP IVs are used and need to be updated. */
3447 preheader = loop_preheader_edge (new_loop)->src;
3448 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
3449 update_e = EDGE_PRED (preheader, 0);
3450 else
3451 update_e = EDGE_PRED (preheader, 1);
3453 /* Update IVs of original loop as if they were advanced
3454 by ratio_mult_vf_name steps. */
3455 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
3457 /* After peeling we have to reset scalar evolution analyzer. */
3458 scev_reset ();
3460 free_original_copy_tables ();
3464 /* Function vect_gen_niters_for_prolog_loop
3466 Set the number of iterations for the loop represented by LOOP_VINFO
3467 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3468 and the misalignment of DR - the data reference recorded in
3469 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3470 this loop, the data reference DR will refer to an aligned location.
3472 The following computation is generated:
3474 If the misalignment of DR is known at compile time:
3475 addr_mis = int mis = DR_MISALIGNMENT (dr);
3476 Else, compute address misalignment in bytes:
3477 addr_mis = addr & (vectype_size - 1)
3479 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3481 (elem_size = element type size; an element is the scalar element
3482 whose type is the inner type of the vectype) */
3484 static tree
3485 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3487 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3488 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3489 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3490 tree var, stmt;
3491 tree iters, iters_name;
3492 edge pe;
3493 basic_block new_bb;
3494 tree dr_stmt = DR_STMT (dr);
3495 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3496 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3497 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3498 tree niters_type = TREE_TYPE (loop_niters);
3500 pe = loop_preheader_edge (loop);
3502 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
3504 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
3505 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
3506 int elem_misalign = byte_misalign / element_size;
3508 if (vect_print_dump_info (REPORT_DETAILS))
3509 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
3510 iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
3512 else
3514 tree new_stmts = NULL_TREE;
3515 tree start_addr =
3516 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3517 tree ptr_type = TREE_TYPE (start_addr);
3518 tree size = TYPE_SIZE (ptr_type);
3519 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3520 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3521 tree elem_size_log =
3522 build_int_cst (type, exact_log2 (vectype_align/vf));
3523 tree vf_minus_1 = build_int_cst (type, vf - 1);
3524 tree vf_tree = build_int_cst (type, vf);
3525 tree byte_misalign;
3526 tree elem_misalign;
3528 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3529 gcc_assert (!new_bb);
3531 /* Create: byte_misalign = addr & (vectype_size - 1) */
3532 byte_misalign =
3533 build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3535 /* Create: elem_misalign = byte_misalign / element_size */
3536 elem_misalign =
3537 build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
3539 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3540 iters = build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
3541 iters = build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
3542 iters = fold_convert (niters_type, iters);
3545 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3546 /* If the loop bound is known at compile time we already verified that it is
3547 greater than vf; since the misalignment ('iters') is at most vf, there's
3548 no need to generate the MIN_EXPR in this case. */
3549 if (TREE_CODE (loop_niters) != INTEGER_CST)
3550 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3552 if (vect_print_dump_info (REPORT_DETAILS))
3554 fprintf (vect_dump, "niters for prolog loop: ");
3555 print_generic_expr (vect_dump, iters, TDF_SLIM);
3558 var = create_tmp_var (niters_type, "prolog_loop_niters");
3559 add_referenced_var (var);
3560 iters_name = force_gimple_operand (iters, &stmt, false, var);
3562 /* Insert stmt on loop preheader edge. */
3563 if (stmt)
3565 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3566 gcc_assert (!new_bb);
3569 return iters_name;
3573 /* Function vect_update_init_of_dr
3575 NITERS iterations were peeled from LOOP. DR represents a data reference
3576 in LOOP. This function updates the information recorded in DR to
3577 account for the fact that the first NITERS iterations had already been
3578 executed. Specifically, it updates the OFFSET field of DR. */
3580 static void
3581 vect_update_init_of_dr (struct data_reference *dr, tree niters)
3583 tree offset = DR_OFFSET (dr);
3585 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
3586 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
3587 DR_OFFSET (dr) = offset;
3591 /* Function vect_update_inits_of_drs
3593 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3594 This function updates the information recorded for the data references in
3595 the loop to account for the fact that the first NITERS iterations had
3596 already been executed. Specifically, it updates the initial_condition of the
3597 access_function of all the data_references in the loop. */
3599 static void
3600 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3602 unsigned int i;
3603 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3604 struct data_reference *dr;
3606 if (vect_dump && (dump_flags & TDF_DETAILS))
3607 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
3609 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3610 vect_update_init_of_dr (dr, niters);
3614 /* Function vect_do_peeling_for_alignment
3616 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3617 'niters' is set to the misalignment of one of the data references in the
3618 loop, thereby forcing it to refer to an aligned location at the beginning
3619 of the execution of this loop. The data reference for which we are
3620 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3622 static void
3623 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3625 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3626 tree niters_of_prolog_loop, ni_name;
3627 tree n_iters;
3628 struct loop *new_loop;
3630 if (vect_print_dump_info (REPORT_DETAILS))
3631 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
3633 initialize_original_copy_tables ();
3635 ni_name = vect_build_loop_niters (loop_vinfo);
3636 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3638 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3639 new_loop =
3640 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3641 niters_of_prolog_loop, ni_name, true);
3642 gcc_assert (new_loop);
3643 #ifdef ENABLE_CHECKING
3644 slpeel_verify_cfg_after_peeling (new_loop, loop);
3645 #endif
3647 /* Update number of times loop executes. */
3648 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3649 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
3650 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3652 /* Update the init conditions of the access functions of all data refs. */
3653 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3655 /* After peeling we have to reset scalar evolution analyzer. */
3656 scev_reset ();
3658 free_original_copy_tables ();
3662 /* Function vect_create_cond_for_align_checks.
3664 Create a conditional expression that represents the alignment checks for
3665 all of data references (array element references) whose alignment must be
3666 checked at runtime.
3668 Input:
3669 LOOP_VINFO - two fields of the loop information are used.
3670 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
3671 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
3673 Output:
3674 COND_EXPR_STMT_LIST - statements needed to construct the conditional
3675 expression.
3676 The returned value is the conditional expression to be used in the if
3677 statement that controls which version of the loop gets executed at runtime.
3679 The algorithm makes two assumptions:
3680 1) The number of bytes "n" in a vector is a power of 2.
3681 2) An address "a" is aligned if a%n is zero and that this
3682 test can be done as a&(n-1) == 0. For example, for 16
3683 byte vectors the test is a&0xf == 0. */
3685 static tree
3686 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
3687 tree *cond_expr_stmt_list)
3689 VEC(tree,heap) *may_misalign_stmts
3690 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
3691 tree ref_stmt;
3692 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
3693 tree mask_cst;
3694 unsigned int i;
3695 tree psize;
3696 tree int_ptrsize_type;
3697 char tmp_name[20];
3698 tree or_tmp_name = NULL_TREE;
3699 tree and_tmp, and_tmp_name, and_stmt;
3700 tree ptrsize_zero;
3702 /* Check that mask is one less than a power of 2, i.e., mask is
3703 all zeros followed by all ones. */
3704 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
3706 /* CHECKME: what is the best integer or unsigned type to use to hold a
3707 cast from a pointer value? */
3708 psize = TYPE_SIZE (ptr_type_node);
3709 int_ptrsize_type
3710 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
3712 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
3713 of the first vector of the i'th data reference. */
3715 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
3717 tree new_stmt_list = NULL_TREE;
3718 tree addr_base;
3719 tree addr_tmp, addr_tmp_name, addr_stmt;
3720 tree or_tmp, new_or_tmp_name, or_stmt;
3722 /* create: addr_tmp = (int)(address_of_first_vector) */
3723 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
3724 &new_stmt_list,
3725 NULL_TREE);
3727 if (new_stmt_list != NULL_TREE)
3728 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
3730 sprintf (tmp_name, "%s%d", "addr2int", i);
3731 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
3732 add_referenced_var (addr_tmp);
3733 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
3734 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
3735 addr_stmt = build2 (MODIFY_EXPR, void_type_node,
3736 addr_tmp_name, addr_stmt);
3737 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
3738 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
3740 /* The addresses are OR together. */
3742 if (or_tmp_name != NULL_TREE)
3744 /* create: or_tmp = or_tmp | addr_tmp */
3745 sprintf (tmp_name, "%s%d", "orptrs", i);
3746 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
3747 add_referenced_var (or_tmp);
3748 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
3749 or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name,
3750 build2 (BIT_IOR_EXPR, int_ptrsize_type,
3751 or_tmp_name,
3752 addr_tmp_name));
3753 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
3754 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
3755 or_tmp_name = new_or_tmp_name;
3757 else
3758 or_tmp_name = addr_tmp_name;
3760 } /* end for i */
3762 mask_cst = build_int_cst (int_ptrsize_type, mask);
3764 /* create: and_tmp = or_tmp & mask */
3765 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
3766 add_referenced_var (and_tmp);
3767 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
3769 and_stmt = build2 (MODIFY_EXPR, void_type_node,
3770 and_tmp_name,
3771 build2 (BIT_AND_EXPR, int_ptrsize_type,
3772 or_tmp_name, mask_cst));
3773 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
3774 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
3776 /* Make and_tmp the left operand of the conditional test against zero.
3777 if and_tmp has a nonzero bit then some address is unaligned. */
3778 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
3779 return build2 (EQ_EXPR, boolean_type_node,
3780 and_tmp_name, ptrsize_zero);
3784 /* Function vect_transform_loop.
3786 The analysis phase has determined that the loop is vectorizable.
3787 Vectorize the loop - created vectorized stmts to replace the scalar
3788 stmts in the loop, and update the loop exit condition. */
3790 void
3791 vect_transform_loop (loop_vec_info loop_vinfo,
3792 struct loops *loops ATTRIBUTE_UNUSED)
3794 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3795 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3796 int nbbs = loop->num_nodes;
3797 block_stmt_iterator si;
3798 int i;
3799 tree ratio = NULL;
3800 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3801 bitmap_iterator bi;
3802 unsigned int j;
3804 if (vect_print_dump_info (REPORT_DETAILS))
3805 fprintf (vect_dump, "=== vec_transform_loop ===");
3807 /* If the loop has data references that may or may not be aligned then
3808 two versions of the loop need to be generated, one which is vectorized
3809 and one which isn't. A test is then generated to control which of the
3810 loops is executed. The test checks for the alignment of all of the
3811 data references that may or may not be aligned. */
3813 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
3815 struct loop *nloop;
3816 tree cond_expr;
3817 tree cond_expr_stmt_list = NULL_TREE;
3818 basic_block condition_bb;
3819 block_stmt_iterator cond_exp_bsi;
3820 basic_block merge_bb;
3821 basic_block new_exit_bb;
3822 edge new_exit_e, e;
3823 tree orig_phi, new_phi, arg;
3825 cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
3826 &cond_expr_stmt_list);
3827 initialize_original_copy_tables ();
3828 nloop = loop_version (loops, loop, cond_expr, &condition_bb, true);
3829 free_original_copy_tables();
3831 /** Loop versioning violates an assumption we try to maintain during
3832 vectorization - that the loop exit block has a single predecessor.
3833 After versioning, the exit block of both loop versions is the same
3834 basic block (i.e. it has two predecessors). Just in order to simplify
3835 following transformations in the vectorizer, we fix this situation
3836 here by adding a new (empty) block on the exit-edge of the loop,
3837 with the proper loop-exit phis to maintain loop-closed-form. **/
3839 merge_bb = loop->single_exit->dest;
3840 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
3841 new_exit_bb = split_edge (loop->single_exit);
3842 add_bb_to_loop (new_exit_bb, loop->outer);
3843 new_exit_e = loop->single_exit;
3844 e = EDGE_SUCC (new_exit_bb, 0);
3846 for (orig_phi = phi_nodes (merge_bb); orig_phi;
3847 orig_phi = PHI_CHAIN (orig_phi))
3849 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
3850 new_exit_bb);
3851 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
3852 add_phi_arg (new_phi, arg, new_exit_e);
3853 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
3856 /** end loop-exit-fixes after versioning **/
3858 update_ssa (TODO_update_ssa);
3859 cond_exp_bsi = bsi_last (condition_bb);
3860 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
3863 /* CHECKME: we wouldn't need this if we called update_ssa once
3864 for all loops. */
3865 bitmap_zero (vect_vnames_to_rename);
3867 /* Peel the loop if there are data refs with unknown alignment.
3868 Only one data ref with unknown store is allowed. */
3870 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
3871 vect_do_peeling_for_alignment (loop_vinfo, loops);
3873 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3874 compile time constant), or it is a constant that doesn't divide by the
3875 vectorization factor, then an epilog loop needs to be created.
3876 We therefore duplicate the loop: the original loop will be vectorized,
3877 and will compute the first (n/VF) iterations. The second copy of the loop
3878 will remain scalar and will compute the remaining (n%VF) iterations.
3879 (VF is the vectorization factor). */
3881 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3882 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3883 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3884 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3885 else
3886 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3887 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3889 /* 1) Make sure the loop header has exactly two entries
3890 2) Make sure we have a preheader basic block. */
3892 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3894 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3897 /* FORNOW: the vectorizer supports only loops which body consist
3898 of one basic block (header + empty latch). When the vectorizer will
3899 support more involved loop forms, the order by which the BBs are
3900 traversed need to be reconsidered. */
3902 for (i = 0; i < nbbs; i++)
3904 basic_block bb = bbs[i];
3906 for (si = bsi_start (bb); !bsi_end_p (si);)
3908 tree stmt = bsi_stmt (si);
3909 stmt_vec_info stmt_info;
3910 bool is_store;
3912 if (vect_print_dump_info (REPORT_DETAILS))
3914 fprintf (vect_dump, "------>vectorizing statement: ");
3915 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3917 stmt_info = vinfo_for_stmt (stmt);
3918 gcc_assert (stmt_info);
3919 if (!STMT_VINFO_RELEVANT_P (stmt_info)
3920 && !STMT_VINFO_LIVE_P (stmt_info))
3922 bsi_next (&si);
3923 continue;
3926 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
3927 != (unsigned HOST_WIDE_INT) vectorization_factor)
3928 && vect_print_dump_info (REPORT_DETAILS))
3929 fprintf (vect_dump, "multiple-types.");
3931 /* -------- vectorize statement ------------ */
3932 if (vect_print_dump_info (REPORT_DETAILS))
3933 fprintf (vect_dump, "transform statement.");
3935 is_store = vect_transform_stmt (stmt, &si);
3936 if (is_store)
3938 /* Free the attached stmt_vec_info and remove the stmt. */
3939 stmt_ann_t ann = stmt_ann (stmt);
3940 free (stmt_info);
3941 set_stmt_info (ann, NULL);
3942 bsi_remove (&si, true);
3943 continue;
3946 bsi_next (&si);
3947 } /* stmts in BB */
3948 } /* BBs in loop */
3950 slpeel_make_loop_iterate_ntimes (loop, ratio);
3952 EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
3953 mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
3955 /* The memory tags and pointers in vectorized statements need to
3956 have their SSA forms updated. FIXME, why can't this be delayed
3957 until all the loops have been transformed? */
3958 update_ssa (TODO_update_ssa);
3960 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
3961 fprintf (vect_dump, "LOOP VECTORIZED.");