PR target/32506
[official-gcc.git] / gcc / tree-vect-transform.c
blob5fdbbe122be422fcdb7d6a34e694fc04ddcf4055
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "params.h"
39 #include "recog.h"
40 #include "tree-data-ref.h"
41 #include "tree-chrec.h"
42 #include "tree-scalar-evolution.h"
43 #include "tree-vectorizer.h"
44 #include "langhooks.h"
45 #include "tree-pass.h"
46 #include "toplev.h"
47 #include "real.h"
49 /* Utility functions for the code transformation. */
50 static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *);
51 static tree vect_create_destination_var (tree, tree);
52 static tree vect_create_data_ref_ptr
53 (tree, block_stmt_iterator *, tree, tree *, tree *, bool, tree);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_setup_realignment (tree, block_stmt_iterator *, tree *);
56 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
57 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
58 static tree vect_init_vector (tree, tree, tree);
59 static void vect_finish_stmt_generation
60 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
61 static bool vect_is_simple_cond (tree, loop_vec_info);
62 static void update_vuses_to_preheader (tree, struct loop*);
63 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
64 static tree get_initial_def_for_reduction (tree, tree, tree *);
66 /* Utility function dealing with loop peeling (not peeling itself). */
67 static void vect_generate_tmps_on_preheader
68 (loop_vec_info, tree *, tree *, tree *);
69 static tree vect_build_loop_niters (loop_vec_info);
70 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
71 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
72 static void vect_update_init_of_dr (struct data_reference *, tree niters);
73 static void vect_update_inits_of_drs (loop_vec_info, tree);
74 static int vect_min_worthwhile_factor (enum tree_code);
77 /* Function vect_estimate_min_profitable_iters
79 Return the number of iterations required for the vector version of the
80 loop to be profitable relative to the cost of the scalar version of the
81 loop.
83 TODO: Take profile info into account before making vectorization
84 decisions, if available. */
86 int
87 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
89 int i;
90 int min_profitable_iters;
91 int peel_iters_prologue;
92 int peel_iters_epilogue;
93 int vec_inside_cost = 0;
94 int vec_outside_cost = 0;
95 int scalar_single_iter_cost = 0;
96 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
97 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
98 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
99 int nbbs = loop->num_nodes;
101 /* Cost model disabled. */
102 if (!flag_vect_cost_model)
104 if (vect_print_dump_info (REPORT_DETAILS))
105 fprintf (vect_dump, "cost model disabled.");
106 return 0;
109 /* Requires loop versioning tests to handle misalignment.
110 FIXME: Make cost depend on number of stmts in may_misalign list. */
112 if (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
114 vec_outside_cost += TARG_COND_BRANCH_COST;
115 if (vect_print_dump_info (REPORT_DETAILS))
116 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
117 "versioning.\n");
120 /* Requires a prologue loop when peeling to handle misalignment. Add cost of
121 two guards, one for the peeled loop and one for the vector loop. */
123 peel_iters_prologue = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
124 if (peel_iters_prologue)
126 vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
127 if (vect_print_dump_info (REPORT_DETAILS))
128 fprintf (vect_dump, "cost model: Adding cost of checks for "
129 "prologue.\n");
132 /* Requires an epilogue loop to finish up remaining iterations after vector
133 loop. Add cost of two guards, one for the peeled loop and one for the
134 vector loop. */
136 if ((peel_iters_prologue < 0)
137 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
138 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vf)
140 vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
141 if (vect_print_dump_info (REPORT_DETAILS))
142 fprintf (vect_dump, "cost model : Adding cost of checks for "
143 "epilogue.\n");
146 /* Count statements in scalar loop. Using this as scalar cost for a single
147 iteration for now.
149 TODO: Add outer loop support.
151 TODO: Consider assigning different costs to different scalar
152 statements. */
154 for (i = 0; i < nbbs; i++)
156 block_stmt_iterator si;
157 basic_block bb = bbs[i];
159 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
161 tree stmt = bsi_stmt (si);
162 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
163 if (!STMT_VINFO_RELEVANT_P (stmt_info)
164 && !STMT_VINFO_LIVE_P (stmt_info))
165 continue;
166 scalar_single_iter_cost++;
167 vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info);
168 vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
172 /* Add additional cost for the peeled instructions in prologue and epilogue
173 loop.
175 FORNOW: If we dont know the value of peel_iters for prologue or epilogue
176 at compile-time - we assume the worst.
178 TODO: Build an expression that represents peel_iters for prologue and
179 epilogue to be used in a run-time test. */
181 peel_iters_prologue = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
183 if (peel_iters_prologue < 0)
185 peel_iters_prologue = vf - 1;
186 if (vect_print_dump_info (REPORT_DETAILS))
187 fprintf (vect_dump, "cost model: "
188 "prologue peel iters set conservatively.");
190 /* If peeling for alignment is unknown, loop bound of main loop becomes
191 unknown. */
192 peel_iters_epilogue = vf - 1;
193 if (vect_print_dump_info (REPORT_DETAILS))
194 fprintf (vect_dump, "cost model: "
195 "epilogue peel iters set conservatively because "
196 "peeling for alignment is unknown .");
198 else
200 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
202 peel_iters_epilogue = vf - 1;
203 if (vect_print_dump_info (REPORT_DETAILS))
204 fprintf (vect_dump, "cost model: "
205 "epilogue peel iters set conservatively because "
206 "loop iterations are unknown .");
208 else
209 peel_iters_epilogue =
210 (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_iters_prologue)
211 % vf;
214 vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
215 + (peel_iters_epilogue * scalar_single_iter_cost);
217 /* Calculate number of iterations required to make the vector version
218 profitable, relative to the loop bodies only. The following condition
219 must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
220 SIC = scalar iteration cost, VIC = vector iteration cost,
221 VOC = vector outside cost and VF = vectorization factor. */
223 if ((scalar_single_iter_cost * vf) > vec_inside_cost)
225 if (vec_outside_cost == 0)
226 min_profitable_iters = 1;
227 else
229 min_profitable_iters = (vec_outside_cost * vf)
230 / ((scalar_single_iter_cost * vf)
231 - vec_inside_cost);
233 if ((scalar_single_iter_cost * vf * min_profitable_iters)
234 <= ((vec_inside_cost * min_profitable_iters)
235 + (vec_outside_cost * vf)))
236 min_profitable_iters++;
239 /* vector version will never be profitable. */
240 else
242 if (vect_print_dump_info (REPORT_DETAILS))
243 fprintf (vect_dump, "cost model: vector iteration cost = %d "
244 "is divisible by scalar iteration cost = %d by a factor "
245 "greater than or equal to the vectorization factor = %d .",
246 vec_inside_cost, scalar_single_iter_cost, vf);
247 return -1;
250 if (vect_print_dump_info (REPORT_DETAILS))
252 fprintf (vect_dump, "Cost model analysis: \n");
253 fprintf (vect_dump, " Vector inside of loop cost: %d\n",
254 vec_inside_cost);
255 fprintf (vect_dump, " Vector outside of loop cost: %d\n",
256 vec_outside_cost);
257 fprintf (vect_dump, " Scalar cost: %d\n", scalar_single_iter_cost);
258 fprintf (vect_dump, " prologue iterations: %d\n",
259 peel_iters_prologue);
260 fprintf (vect_dump, " epilogue iterations: %d\n",
261 peel_iters_epilogue);
262 fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
263 min_profitable_iters);
264 fprintf (vect_dump, " Actual minimum iters for profitability: %d\n",
265 min_profitable_iters < vf ? vf : min_profitable_iters);
268 return min_profitable_iters < vf ? vf : min_profitable_iters;
272 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
273 functions. Design better to avoid maintenance issues. */
275 /* Function vect_model_reduction_cost.
277 Models cost for a reduction operation, including the vector ops
278 generated within the strip-mine loop, the initial definition before
279 the loop, and the epilogue code that must be generated. */
281 static void
282 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
283 int ncopies)
285 int outer_cost = 0;
286 enum tree_code code;
287 optab optab;
288 tree vectype;
289 tree orig_stmt;
290 tree reduction_op;
291 enum machine_mode mode;
292 tree operation = GIMPLE_STMT_OPERAND (STMT_VINFO_STMT (stmt_info), 1);
293 int op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
295 /* Cost of reduction op inside loop. */
296 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
298 reduction_op = TREE_OPERAND (operation, op_type-1);
299 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
300 mode = TYPE_MODE (vectype);
301 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
303 if (!orig_stmt)
304 orig_stmt = STMT_VINFO_STMT (stmt_info);
306 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
308 /* Add in cost for initial definition. */
309 outer_cost += TARG_VEC_STMT_COST;
311 /* Determine cost of epilogue code.
313 We have a reduction operator that will reduce the vector in one statement.
314 Also requires scalar extract. */
316 if (reduc_code < NUM_TREE_CODES)
317 outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
318 else
320 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
321 tree bitsize =
322 TYPE_SIZE (TREE_TYPE ( GIMPLE_STMT_OPERAND (orig_stmt, 0)));
323 int element_bitsize = tree_low_cst (bitsize, 1);
324 int nelements = vec_size_in_bits / element_bitsize;
326 optab = optab_for_tree_code (code, vectype);
328 /* We have a whole vector shift available. */
329 if (!VECTOR_MODE_P (mode)
330 || optab->handlers[mode].insn_code == CODE_FOR_nothing)
331 /* Final reduction via vector shifts and the reduction operator. Also
332 requires scalar extract. */
333 outer_cost += ((exact_log2(nelements) * 2 + 1) * TARG_VEC_STMT_COST);
334 else
335 /* Use extracts and reduction op for final reduction. For N elements,
336 we have N extracts and N-1 reduction ops. */
337 outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
340 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
342 if (vect_print_dump_info (REPORT_DETAILS))
343 fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
344 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
345 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
349 /* Function vect_model_induction_cost.
351 Models cost for induction operations. */
353 static void
354 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
356 /* loop cost for vec_loop. */
357 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
358 /* prologue cost for vec_init and vec_step. */
359 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_VEC_STMT_COST;
361 if (vect_print_dump_info (REPORT_DETAILS))
362 fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
363 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
364 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
368 /* Function vect_model_simple_cost.
370 Models cost for simple operations, i.e. those that only emit ncopies of a
371 single op. Right now, this does not account for multiple insns that could
372 be generated for the single vector op. We will handle that shortly. */
374 static void
375 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies)
377 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
379 if (vect_print_dump_info (REPORT_DETAILS))
380 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
381 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
382 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
386 /* Function vect_cost_strided_group_size
388 For strided load or store, return the group_size only if it is the first
389 load or store of a group, else return 1. This ensures that group size is
390 only returned once per group. */
392 static int
393 vect_cost_strided_group_size (stmt_vec_info stmt_info)
395 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
397 if (first_stmt == STMT_VINFO_STMT (stmt_info))
398 return DR_GROUP_SIZE (stmt_info);
400 return 1;
404 /* Function vect_model_store_cost
406 Models cost for stores. In the case of strided accesses, one access
407 has the overhead of the strided access attributed to it. */
409 static void
410 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies)
412 int cost = 0;
413 int group_size;
415 /* Strided access? */
416 if (DR_GROUP_FIRST_DR (stmt_info))
417 group_size = vect_cost_strided_group_size (stmt_info);
418 /* Not a strided access. */
419 else
420 group_size = 1;
422 /* Is this an access in a group of stores, which provide strided access?
423 If so, add in the cost of the permutes. */
424 if (group_size > 1)
426 /* Uses a high and low interleave operation for each needed permute. */
427 cost = ncopies * exact_log2(group_size) * group_size
428 * TARG_VEC_STMT_COST;
430 if (vect_print_dump_info (REPORT_DETAILS))
431 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
432 group_size);
436 /* Costs of the stores. */
437 cost += ncopies * TARG_VEC_STORE_COST;
439 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost;
441 if (vect_print_dump_info (REPORT_DETAILS))
442 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
443 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
444 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
448 /* Function vect_model_load_cost
450 Models cost for loads. In the case of strided accesses, the last access
451 has the overhead of the strided access attributed to it. Since unaligned
452 accesses are supported for loads, we also account for the costs of the
453 access scheme chosen. */
455 static void
456 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies)
459 int inner_cost = 0;
460 int group_size;
461 int alignment_support_cheme;
462 tree first_stmt;
463 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
465 /* Strided accesses? */
466 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
467 if (first_stmt)
469 group_size = vect_cost_strided_group_size (stmt_info);
470 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
472 /* Not a strided access. */
473 else
475 group_size = 1;
476 first_dr = dr;
479 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
481 /* Is this an access in a group of loads providing strided access?
482 If so, add in the cost of the permutes. */
483 if (group_size > 1)
485 /* Uses an even and odd extract operations for each needed permute. */
486 inner_cost = ncopies * exact_log2(group_size) * group_size
487 * TARG_VEC_STMT_COST;
489 if (vect_print_dump_info (REPORT_DETAILS))
490 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
491 group_size);
495 /* The loads themselves. */
496 switch (alignment_support_cheme)
498 case dr_aligned:
500 inner_cost += ncopies * TARG_VEC_LOAD_COST;
502 if (vect_print_dump_info (REPORT_DETAILS))
503 fprintf (vect_dump, "vect_model_load_cost: aligned.");
505 break;
507 case dr_unaligned_supported:
509 /* Here, we assign an additional cost for the unaligned load. */
510 inner_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
512 if (vect_print_dump_info (REPORT_DETAILS))
513 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
514 "hardware.");
516 break;
518 case dr_unaligned_software_pipeline:
520 int outer_cost = 0;
522 if (vect_print_dump_info (REPORT_DETAILS))
523 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
524 "pipelined.");
526 /* Unaligned software pipeline has a load of an address, an initial
527 load, and possibly a mask operation to "prime" the loop. However,
528 if this is an access in a group of loads, which provide strided
529 access, then the above cost should only be considered for one
530 access in the group. Inside the loop, there is a load op
531 and a realignment op. */
533 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1)
535 outer_cost = 2*TARG_VEC_STMT_COST;
536 if (targetm.vectorize.builtin_mask_for_load)
537 outer_cost += TARG_VEC_STMT_COST;
540 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
542 inner_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
544 break;
547 default:
548 gcc_unreachable ();
551 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = inner_cost;
553 if (vect_print_dump_info (REPORT_DETAILS))
554 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
555 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
556 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
561 /* Function vect_get_new_vect_var.
563 Returns a name for a new variable. The current naming scheme appends the
564 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
565 the name of vectorizer generated variables, and appends that to NAME if
566 provided. */
568 static tree
569 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
571 const char *prefix;
572 tree new_vect_var;
574 switch (var_kind)
576 case vect_simple_var:
577 prefix = "vect_";
578 break;
579 case vect_scalar_var:
580 prefix = "stmp_";
581 break;
582 case vect_pointer_var:
583 prefix = "vect_p";
584 break;
585 default:
586 gcc_unreachable ();
589 if (name)
590 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
591 else
592 new_vect_var = create_tmp_var (type, prefix);
594 /* Mark vector typed variable as a gimple register variable. */
595 if (TREE_CODE (type) == VECTOR_TYPE)
596 DECL_GIMPLE_REG_P (new_vect_var) = true;
598 return new_vect_var;
602 /* Function vect_create_addr_base_for_vector_ref.
604 Create an expression that computes the address of the first memory location
605 that will be accessed for a data reference.
607 Input:
608 STMT: The statement containing the data reference.
609 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
610 OFFSET: Optional. If supplied, it is be added to the initial address.
612 Output:
613 1. Return an SSA_NAME whose value is the address of the memory location of
614 the first vector of the data reference.
615 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
616 these statement(s) which define the returned SSA_NAME.
618 FORNOW: We are only handling array accesses with step 1. */
620 static tree
621 vect_create_addr_base_for_vector_ref (tree stmt,
622 tree *new_stmt_list,
623 tree offset)
625 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
626 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
627 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
628 tree base_name = build_fold_indirect_ref (data_ref_base);
629 tree vec_stmt;
630 tree addr_base, addr_expr;
631 tree dest, new_stmt;
632 tree base_offset = unshare_expr (DR_OFFSET (dr));
633 tree init = unshare_expr (DR_INIT (dr));
634 tree vect_ptr_type, addr_expr2;
636 /* Create base_offset */
637 base_offset = size_binop (PLUS_EXPR, base_offset, init);
638 base_offset = fold_convert (sizetype, base_offset);
639 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
640 add_referenced_var (dest);
641 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
642 append_to_statement_list_force (new_stmt, new_stmt_list);
644 if (offset)
646 tree tmp = create_tmp_var (sizetype, "offset");
647 tree step;
649 /* For interleaved access step we divide STEP by the size of the
650 interleaving group. */
651 if (DR_GROUP_SIZE (stmt_info))
652 step = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (offset), DR_STEP (dr),
653 build_int_cst (TREE_TYPE (offset),
654 DR_GROUP_SIZE (stmt_info)));
655 else
656 step = DR_STEP (dr);
658 add_referenced_var (tmp);
659 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
660 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
661 base_offset, offset);
662 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
663 append_to_statement_list_force (new_stmt, new_stmt_list);
666 /* base + base_offset */
667 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
668 base_offset);
670 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
672 /* addr_expr = addr_base */
673 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
674 get_name (base_name));
675 add_referenced_var (addr_expr);
676 vec_stmt = fold_convert (vect_ptr_type, addr_base);
677 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
678 get_name (base_name));
679 add_referenced_var (addr_expr2);
680 vec_stmt = force_gimple_operand (vec_stmt, &new_stmt, false, addr_expr2);
681 append_to_statement_list_force (new_stmt, new_stmt_list);
683 if (vect_print_dump_info (REPORT_DETAILS))
685 fprintf (vect_dump, "created ");
686 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
688 return vec_stmt;
692 /* Function vect_create_data_ref_ptr.
694 Create a new pointer to vector type (vp), that points to the first location
695 accessed in the loop by STMT, along with the def-use update chain to
696 appropriately advance the pointer through the loop iterations. Also set
697 aliasing information for the pointer. This vector pointer is used by the
698 callers to this function to create a memory reference expression for vector
699 load/store access.
701 Input:
702 1. STMT: a stmt that references memory. Expected to be of the form
703 GIMPLE_MODIFY_STMT <name, data-ref> or
704 GIMPLE_MODIFY_STMT <data-ref, name>.
705 2. BSI: block_stmt_iterator where new stmts can be added.
706 3. OFFSET (optional): an offset to be added to the initial address accessed
707 by the data-ref in STMT.
708 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
709 pointing to the initial address.
710 5. TYPE: if not NULL indicates the required type of the data-ref
712 Output:
713 1. Declare a new ptr to vector_type, and have it point to the base of the
714 data reference (initial addressed accessed by the data reference).
715 For example, for vector of type V8HI, the following code is generated:
717 v8hi *vp;
718 vp = (v8hi *)initial_address;
720 if OFFSET is not supplied:
721 initial_address = &a[init];
722 if OFFSET is supplied:
723 initial_address = &a[init + OFFSET];
725 Return the initial_address in INITIAL_ADDRESS.
727 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
728 update the pointer in each iteration of the loop.
730 Return the increment stmt that updates the pointer in PTR_INCR.
732 3. Return the pointer. */
734 static tree
735 vect_create_data_ref_ptr (tree stmt,
736 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
737 tree offset, tree *initial_address, tree *ptr_incr,
738 bool only_init, tree type)
740 tree base_name;
741 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
742 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
743 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
744 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
745 tree vect_ptr_type;
746 tree vect_ptr;
747 tree tag;
748 tree new_temp;
749 tree vec_stmt;
750 tree new_stmt_list = NULL_TREE;
751 edge pe = loop_preheader_edge (loop);
752 basic_block new_bb;
753 tree vect_ptr_init;
754 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
756 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
758 if (vect_print_dump_info (REPORT_DETAILS))
760 tree data_ref_base = base_name;
761 fprintf (vect_dump, "create vector-pointer variable to type: ");
762 print_generic_expr (vect_dump, vectype, TDF_SLIM);
763 if (TREE_CODE (data_ref_base) == VAR_DECL)
764 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
765 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
766 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
767 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
768 fprintf (vect_dump, " vectorizing a record based array ref: ");
769 else if (TREE_CODE (data_ref_base) == SSA_NAME)
770 fprintf (vect_dump, " vectorizing a pointer ref: ");
771 print_generic_expr (vect_dump, base_name, TDF_SLIM);
774 /** (1) Create the new vector-pointer variable: **/
775 if (type)
776 vect_ptr_type = build_pointer_type (type);
777 else
778 vect_ptr_type = build_pointer_type (vectype);
779 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
780 get_name (base_name));
781 add_referenced_var (vect_ptr);
783 /** (2) Add aliasing information to the new vector-pointer:
784 (The points-to info (DR_PTR_INFO) may be defined later.) **/
786 tag = DR_SYMBOL_TAG (dr);
787 gcc_assert (tag);
789 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
790 tag must be created with tag added to its may alias list. */
791 if (!MTAG_P (tag))
792 new_type_alias (vect_ptr, tag, DR_REF (dr));
793 else
794 set_symbol_mem_tag (vect_ptr, tag);
796 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
798 /** (3) Calculate the initial address the vector-pointer, and set
799 the vector-pointer to point to it before the loop: **/
801 /* Create: (&(base[init_val+offset]) in the loop preheader. */
802 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
803 offset);
804 pe = loop_preheader_edge (loop);
805 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
806 gcc_assert (!new_bb);
807 *initial_address = new_temp;
809 /* Create: p = (vectype *) initial_base */
810 vec_stmt = fold_convert (vect_ptr_type, new_temp);
811 vec_stmt = build_gimple_modify_stmt (vect_ptr, vec_stmt);
812 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
813 GIMPLE_STMT_OPERAND (vec_stmt, 0) = vect_ptr_init;
814 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
815 gcc_assert (!new_bb);
818 /** (4) Handle the updating of the vector-pointer inside the loop: **/
820 if (only_init) /* No update in loop is required. */
822 /* Copy the points-to information if it exists. */
823 if (DR_PTR_INFO (dr))
824 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
825 return vect_ptr_init;
827 else
829 block_stmt_iterator incr_bsi;
830 bool insert_after;
831 tree indx_before_incr, indx_after_incr;
832 tree incr;
834 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
835 create_iv (vect_ptr_init,
836 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
837 NULL_TREE, loop, &incr_bsi, insert_after,
838 &indx_before_incr, &indx_after_incr);
839 incr = bsi_stmt (incr_bsi);
840 set_stmt_info (stmt_ann (incr),
841 new_stmt_vec_info (incr, loop_vinfo));
843 /* Copy the points-to information if it exists. */
844 if (DR_PTR_INFO (dr))
846 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
847 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
849 merge_alias_info (vect_ptr_init, indx_before_incr);
850 merge_alias_info (vect_ptr_init, indx_after_incr);
851 if (ptr_incr)
852 *ptr_incr = incr;
854 return indx_before_incr;
859 /* Function bump_vector_ptr
861 Increment a pointer (to a vector type) by vector-size. Connect the new
862 increment stmt to the existing def-use update-chain of the pointer.
864 The pointer def-use update-chain before this function:
865 DATAREF_PTR = phi (p_0, p_2)
866 ....
867 PTR_INCR: p_2 = DATAREF_PTR + step
869 The pointer def-use update-chain after this function:
870 DATAREF_PTR = phi (p_0, p_2)
871 ....
872 NEW_DATAREF_PTR = DATAREF_PTR + vector_size
873 ....
874 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
876 Input:
877 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
878 in the loop.
879 PTR_INCR - the stmt that updates the pointer in each iteration of the loop.
880 The increment amount across iterations is also expected to be
881 vector_size.
882 BSI - location where the new update stmt is to be placed.
883 STMT - the original scalar memory-access stmt that is being vectorized.
885 Output: Return NEW_DATAREF_PTR as illustrated above.
889 static tree
890 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
891 tree stmt)
893 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
894 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
895 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
896 tree vptr_type = TREE_TYPE (dataref_ptr);
897 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
898 tree update = TYPE_SIZE_UNIT (vectype);
899 tree incr_stmt;
900 ssa_op_iter iter;
901 use_operand_p use_p;
902 tree new_dataref_ptr;
904 incr_stmt = build_gimple_modify_stmt (ptr_var,
905 build2 (POINTER_PLUS_EXPR, vptr_type,
906 dataref_ptr, update));
907 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
908 GIMPLE_STMT_OPERAND (incr_stmt, 0) = new_dataref_ptr;
909 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
911 /* Update the vector-pointer's cross-iteration increment. */
912 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
914 tree use = USE_FROM_PTR (use_p);
916 if (use == dataref_ptr)
917 SET_USE (use_p, new_dataref_ptr);
918 else
919 gcc_assert (tree_int_cst_compare (use, update) == 0);
922 /* Copy the points-to information if it exists. */
923 if (DR_PTR_INFO (dr))
924 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
925 merge_alias_info (new_dataref_ptr, dataref_ptr);
927 return new_dataref_ptr;
931 /* Function vect_create_destination_var.
933 Create a new temporary of type VECTYPE. */
935 static tree
936 vect_create_destination_var (tree scalar_dest, tree vectype)
938 tree vec_dest;
939 const char *new_name;
940 tree type;
941 enum vect_var_kind kind;
943 kind = vectype ? vect_simple_var : vect_scalar_var;
944 type = vectype ? vectype : TREE_TYPE (scalar_dest);
946 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
948 new_name = get_name (scalar_dest);
949 if (!new_name)
950 new_name = "var_";
951 vec_dest = vect_get_new_vect_var (type, kind, new_name);
952 add_referenced_var (vec_dest);
954 return vec_dest;
958 /* Function vect_init_vector.
960 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
961 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
962 used in the vectorization of STMT. */
964 static tree
965 vect_init_vector (tree stmt, tree vector_var, tree vector_type)
967 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
968 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
969 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
970 tree new_var;
971 tree init_stmt;
972 tree vec_oprnd;
973 edge pe;
974 tree new_temp;
975 basic_block new_bb;
977 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
978 add_referenced_var (new_var);
980 init_stmt = build_gimple_modify_stmt (new_var, vector_var);
981 new_temp = make_ssa_name (new_var, init_stmt);
982 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_temp;
984 pe = loop_preheader_edge (loop);
985 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
986 gcc_assert (!new_bb);
988 if (vect_print_dump_info (REPORT_DETAILS))
990 fprintf (vect_dump, "created new init_stmt: ");
991 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
994 vec_oprnd = GIMPLE_STMT_OPERAND (init_stmt, 0);
995 return vec_oprnd;
999 /* Function get_initial_def_for_induction
1001 Input:
1002 IV_PHI - the initial value of the induction variable
1004 Output:
1005 Return a vector variable, initialized with the first VF values of
1006 the induction variable. E.g., for an iv with IV_PHI='X' and
1007 evolution S, for a vector of 4 units, we want to return:
1008 [X, X + S, X + 2*S, X + 3*S]. */
1010 static tree
1011 get_initial_def_for_induction (tree iv_phi)
1013 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
1014 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1015 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1016 tree scalar_type = TREE_TYPE (PHI_RESULT_TREE (iv_phi));
1017 tree vectype = get_vectype_for_scalar_type (scalar_type);
1018 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1019 edge pe = loop_preheader_edge (loop);
1020 basic_block new_bb;
1021 block_stmt_iterator bsi;
1022 tree vec, vec_init, vec_step, t;
1023 tree access_fn;
1024 tree new_var;
1025 tree new_name;
1026 tree init_stmt;
1027 tree induction_phi, induc_def, new_stmt, vec_def, vec_dest;
1028 tree init_expr, step_expr;
1029 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1030 int i;
1031 bool ok;
1032 int ncopies = vf / nunits;
1033 tree expr;
1034 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
1035 tree stmts;
1036 tree stmt = NULL_TREE;
1037 block_stmt_iterator si;
1038 basic_block bb = bb_for_stmt (iv_phi);
1040 gcc_assert (phi_info);
1041 gcc_assert (ncopies >= 1);
1043 /* Find the first insertion point in the BB. */
1044 si = bsi_after_labels (bb);
1045 stmt = bsi_stmt (si);
1047 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (iv_phi));
1048 gcc_assert (access_fn);
1049 ok = vect_is_simple_iv_evolution (loop->num, access_fn,
1050 &init_expr, &step_expr);
1051 gcc_assert (ok);
1053 /* Create the vector that holds the initial_value of the induction. */
1054 new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
1055 add_referenced_var (new_var);
1057 new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
1058 if (stmts)
1060 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
1061 gcc_assert (!new_bb);
1064 t = NULL_TREE;
1065 t = tree_cons (NULL_TREE, new_name, t);
1066 for (i = 1; i < nunits; i++)
1068 tree tmp;
1070 /* Create: new_name = new_name + step_expr */
1071 tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr);
1072 init_stmt = build_gimple_modify_stmt (new_var, tmp);
1073 new_name = make_ssa_name (new_var, init_stmt);
1074 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name;
1076 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1077 gcc_assert (!new_bb);
1079 if (vect_print_dump_info (REPORT_DETAILS))
1081 fprintf (vect_dump, "created new init_stmt: ");
1082 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1084 t = tree_cons (NULL_TREE, new_name, t);
1086 vec = build_constructor_from_list (vectype, nreverse (t));
1087 vec_init = vect_init_vector (stmt, vec, vectype);
1090 /* Create the vector that holds the step of the induction. */
1091 expr = build_int_cst (scalar_type, vf);
1092 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1093 t = NULL_TREE;
1094 for (i = 0; i < nunits; i++)
1095 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1096 vec = build_constructor_from_list (vectype, t);
1097 vec_step = vect_init_vector (stmt, vec, vectype);
1100 /* Create the following def-use cycle:
1101 loop prolog:
1102 vec_init = [X, X+S, X+2*S, X+3*S]
1103 vec_step = [VF*S, VF*S, VF*S, VF*S]
1104 loop:
1105 vec_iv = PHI <vec_init, vec_loop>
1107 STMT
1109 vec_loop = vec_iv + vec_step; */
1111 /* Create the induction-phi that defines the induction-operand. */
1112 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
1113 add_referenced_var (vec_dest);
1114 induction_phi = create_phi_node (vec_dest, loop->header);
1115 set_stmt_info (get_stmt_ann (induction_phi),
1116 new_stmt_vec_info (induction_phi, loop_vinfo));
1117 induc_def = PHI_RESULT (induction_phi);
1119 /* Create the iv update inside the loop */
1120 new_stmt = build_gimple_modify_stmt (NULL_TREE,
1121 build2 (PLUS_EXPR, vectype,
1122 induc_def, vec_step));
1123 vec_def = make_ssa_name (vec_dest, new_stmt);
1124 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1125 bsi = bsi_for_stmt (stmt);
1126 vect_finish_stmt_generation (stmt, new_stmt, &bsi);
1128 /* Set the arguments of the phi node: */
1129 add_phi_arg (induction_phi, vec_init, loop_preheader_edge (loop));
1130 add_phi_arg (induction_phi, vec_def, loop_latch_edge (loop));
1133 /* In case the vectorization factor (VF) is bigger than the number
1134 of elements that we can fit in a vectype (nunits), we have to generate
1135 more than one vector stmt - i.e - we need to "unroll" the
1136 vector stmt by a factor VF/nunits. For more details see documentation
1137 in vectorizable_operation. */
1139 if (ncopies > 1)
1141 stmt_vec_info prev_stmt_vinfo;
1143 /* Create the vector that holds the step of the induction. */
1144 expr = build_int_cst (scalar_type, nunits);
1145 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1146 t = NULL_TREE;
1147 for (i = 0; i < nunits; i++)
1148 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1149 vec = build_constructor_from_list (vectype, t);
1150 vec_step = vect_init_vector (stmt, vec, vectype);
1152 vec_def = induc_def;
1153 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
1154 for (i = 1; i < ncopies; i++)
1156 tree tmp;
1158 /* vec_i = vec_prev + vec_{step*nunits} */
1159 tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step);
1160 new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp);
1161 vec_def = make_ssa_name (vec_dest, new_stmt);
1162 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1163 bsi = bsi_for_stmt (stmt);
1164 vect_finish_stmt_generation (stmt, new_stmt, &bsi);
1166 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
1167 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
1171 if (vect_print_dump_info (REPORT_DETAILS))
1173 fprintf (vect_dump, "transform induction: created def-use cycle:");
1174 print_generic_expr (vect_dump, induction_phi, TDF_SLIM);
1175 fprintf (vect_dump, "\n");
1176 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vec_def), TDF_SLIM);
1179 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
1180 return induc_def;
1184 /* Function vect_get_vec_def_for_operand.
1186 OP is an operand in STMT. This function returns a (vector) def that will be
1187 used in the vectorized stmt for STMT.
1189 In the case that OP is an SSA_NAME which is defined in the loop, then
1190 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1192 In case OP is an invariant or constant, a new stmt that creates a vector def
1193 needs to be introduced. */
1195 static tree
1196 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
1198 tree vec_oprnd;
1199 tree vec_stmt;
1200 tree def_stmt;
1201 stmt_vec_info def_stmt_info = NULL;
1202 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1203 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1204 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1205 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1206 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1207 tree vec_inv;
1208 tree vec_cst;
1209 tree t = NULL_TREE;
1210 tree def;
1211 int i;
1212 enum vect_def_type dt;
1213 bool is_simple_use;
1214 tree vector_type;
1216 if (vect_print_dump_info (REPORT_DETAILS))
1218 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1219 print_generic_expr (vect_dump, op, TDF_SLIM);
1222 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1223 gcc_assert (is_simple_use);
1224 if (vect_print_dump_info (REPORT_DETAILS))
1226 if (def)
1228 fprintf (vect_dump, "def = ");
1229 print_generic_expr (vect_dump, def, TDF_SLIM);
1231 if (def_stmt)
1233 fprintf (vect_dump, " def_stmt = ");
1234 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1238 switch (dt)
1240 /* Case 1: operand is a constant. */
1241 case vect_constant_def:
1243 if (scalar_def)
1244 *scalar_def = op;
1246 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1247 if (vect_print_dump_info (REPORT_DETAILS))
1248 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1250 for (i = nunits - 1; i >= 0; --i)
1252 t = tree_cons (NULL_TREE, op, t);
1254 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1255 vec_cst = build_vector (vector_type, t);
1257 return vect_init_vector (stmt, vec_cst, vector_type);
1260 /* Case 2: operand is defined outside the loop - loop invariant. */
1261 case vect_invariant_def:
1263 if (scalar_def)
1264 *scalar_def = def;
1266 /* Create 'vec_inv = {inv,inv,..,inv}' */
1267 if (vect_print_dump_info (REPORT_DETAILS))
1268 fprintf (vect_dump, "Create vector_inv.");
1270 for (i = nunits - 1; i >= 0; --i)
1272 t = tree_cons (NULL_TREE, def, t);
1275 /* FIXME: use build_constructor directly. */
1276 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1277 vec_inv = build_constructor_from_list (vector_type, t);
1278 return vect_init_vector (stmt, vec_inv, vector_type);
1281 /* Case 3: operand is defined inside the loop. */
1282 case vect_loop_def:
1284 if (scalar_def)
1285 *scalar_def = def_stmt;
1287 /* Get the def from the vectorized stmt. */
1288 def_stmt_info = vinfo_for_stmt (def_stmt);
1289 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1290 gcc_assert (vec_stmt);
1291 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
1292 return vec_oprnd;
1295 /* Case 4: operand is defined by a loop header phi - reduction */
1296 case vect_reduction_def:
1298 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1300 /* Get the def before the loop */
1301 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1302 return get_initial_def_for_reduction (stmt, op, scalar_def);
1305 /* Case 5: operand is defined by loop-header phi - induction. */
1306 case vect_induction_def:
1308 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1310 /* Get the def before the loop */
1311 return get_initial_def_for_induction (def_stmt);
1314 default:
1315 gcc_unreachable ();
1320 /* Function vect_get_vec_def_for_stmt_copy
1322 Return a vector-def for an operand. This function is used when the
1323 vectorized stmt to be created (by the caller to this function) is a "copy"
1324 created in case the vectorized result cannot fit in one vector, and several
1325 copies of the vector-stmt are required. In this case the vector-def is
1326 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1327 of the stmt that defines VEC_OPRND.
1328 DT is the type of the vector def VEC_OPRND.
1330 Context:
1331 In case the vectorization factor (VF) is bigger than the number
1332 of elements that can fit in a vectype (nunits), we have to generate
1333 more than one vector stmt to vectorize the scalar stmt. This situation
1334 arises when there are multiple data-types operated upon in the loop; the
1335 smallest data-type determines the VF, and as a result, when vectorizing
1336 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1337 vector stmt (each computing a vector of 'nunits' results, and together
1338 computing 'VF' results in each iteration). This function is called when
1339 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1340 which VF=16 and nunits=4, so the number of copies required is 4):
1342 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1344 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1345 VS1.1: vx.1 = memref1 VS1.2
1346 VS1.2: vx.2 = memref2 VS1.3
1347 VS1.3: vx.3 = memref3
1349 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1350 VSnew.1: vz1 = vx.1 + ... VSnew.2
1351 VSnew.2: vz2 = vx.2 + ... VSnew.3
1352 VSnew.3: vz3 = vx.3 + ...
1354 The vectorization of S1 is explained in vectorizable_load.
1355 The vectorization of S2:
1356 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1357 the function 'vect_get_vec_def_for_operand' is called to
1358 get the relevant vector-def for each operand of S2. For operand x it
1359 returns the vector-def 'vx.0'.
1361 To create the remaining copies of the vector-stmt (VSnew.j), this
1362 function is called to get the relevant vector-def for each operand. It is
1363 obtained from the respective VS1.j stmt, which is recorded in the
1364 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1366 For example, to obtain the vector-def 'vx.1' in order to create the
1367 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1368 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1369 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1370 and return its def ('vx.1').
1371 Overall, to create the above sequence this function will be called 3 times:
1372 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1373 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1374 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1376 static tree
1377 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1379 tree vec_stmt_for_operand;
1380 stmt_vec_info def_stmt_info;
1382 /* Do nothing; can reuse same def. */
1383 if (dt == vect_invariant_def || dt == vect_constant_def )
1384 return vec_oprnd;
1386 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1387 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1388 gcc_assert (def_stmt_info);
1389 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1390 gcc_assert (vec_stmt_for_operand);
1391 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
1393 return vec_oprnd;
1397 /* Function vect_finish_stmt_generation.
1399 Insert a new stmt. */
1401 static void
1402 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
1403 block_stmt_iterator *bsi)
1405 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1406 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1408 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1409 set_stmt_info (get_stmt_ann (vec_stmt),
1410 new_stmt_vec_info (vec_stmt, loop_vinfo));
1412 if (vect_print_dump_info (REPORT_DETAILS))
1414 fprintf (vect_dump, "add new stmt: ");
1415 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
1418 /* Make sure bsi points to the stmt that is being vectorized. */
1419 gcc_assert (stmt == bsi_stmt (*bsi));
1421 #ifdef USE_MAPPED_LOCATION
1422 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
1423 #else
1424 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
1425 #endif
1429 /* Function get_initial_def_for_reduction
1431 Input:
1432 STMT - a stmt that performs a reduction operation in the loop.
1433 INIT_VAL - the initial value of the reduction variable
1435 Output:
1436 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
1437 of the reduction (used for adjusting the epilog - see below).
1438 Return a vector variable, initialized according to the operation that STMT
1439 performs. This vector will be used as the initial value of the
1440 vector of partial results.
1442 Option1 (adjust in epilog): Initialize the vector as follows:
1443 add: [0,0,...,0,0]
1444 mult: [1,1,...,1,1]
1445 min/max: [init_val,init_val,..,init_val,init_val]
1446 bit and/or: [init_val,init_val,..,init_val,init_val]
1447 and when necessary (e.g. add/mult case) let the caller know
1448 that it needs to adjust the result by init_val.
1450 Option2: Initialize the vector as follows:
1451 add: [0,0,...,0,init_val]
1452 mult: [1,1,...,1,init_val]
1453 min/max: [init_val,init_val,...,init_val]
1454 bit and/or: [init_val,init_val,...,init_val]
1455 and no adjustments are needed.
1457 For example, for the following code:
1459 s = init_val;
1460 for (i=0;i<n;i++)
1461 s = s + a[i];
1463 STMT is 's = s + a[i]', and the reduction variable is 's'.
1464 For a vector of 4 units, we want to return either [0,0,0,init_val],
1465 or [0,0,0,0] and let the caller know that it needs to adjust
1466 the result at the end by 'init_val'.
1468 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
1469 initialization vector is simpler (same element in all entries).
1470 A cost model should help decide between these two schemes. */
1472 static tree
1473 get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
1475 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1476 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1477 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1478 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
1479 tree type = TREE_TYPE (init_val);
1480 tree vecdef;
1481 tree def_for_init;
1482 tree init_def;
1483 tree t = NULL_TREE;
1484 int i;
1485 tree vector_type;
1487 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
1488 vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
1490 switch (code)
1492 case WIDEN_SUM_EXPR:
1493 case DOT_PROD_EXPR:
1494 case PLUS_EXPR:
1495 *adjustment_def = init_val;
1496 /* Create a vector of zeros for init_def. */
1497 if (INTEGRAL_TYPE_P (type))
1498 def_for_init = build_int_cst (type, 0);
1499 else
1500 def_for_init = build_real (type, dconst0);
1501 for (i = nunits - 1; i >= 0; --i)
1502 t = tree_cons (NULL_TREE, def_for_init, t);
1503 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def_for_init));
1504 init_def = build_vector (vector_type, t);
1505 break;
1507 case MIN_EXPR:
1508 case MAX_EXPR:
1509 *adjustment_def = NULL_TREE;
1510 init_def = vecdef;
1511 break;
1513 default:
1514 gcc_unreachable ();
1517 return init_def;
1521 /* Function vect_create_epilog_for_reduction
1523 Create code at the loop-epilog to finalize the result of a reduction
1524 computation.
1526 VECT_DEF is a vector of partial results.
1527 REDUC_CODE is the tree-code for the epilog reduction.
1528 STMT is the scalar reduction stmt that is being vectorized.
1529 REDUCTION_PHI is the phi-node that carries the reduction computation.
1531 This function:
1532 1. Creates the reduction def-use cycle: sets the arguments for
1533 REDUCTION_PHI:
1534 The loop-entry argument is the vectorized initial-value of the reduction.
1535 The loop-latch argument is VECT_DEF - the vector of partial sums.
1536 2. "Reduces" the vector of partial results VECT_DEF into a single result,
1537 by applying the operation specified by REDUC_CODE if available, or by
1538 other means (whole-vector shifts or a scalar loop).
1539 The function also creates a new phi node at the loop exit to preserve
1540 loop-closed form, as illustrated below.
1542 The flow at the entry to this function:
1544 loop:
1545 vec_def = phi <null, null> # REDUCTION_PHI
1546 VECT_DEF = vector_stmt # vectorized form of STMT
1547 s_loop = scalar_stmt # (scalar) STMT
1548 loop_exit:
1549 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1550 use <s_out0>
1551 use <s_out0>
1553 The above is transformed by this function into:
1555 loop:
1556 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
1557 VECT_DEF = vector_stmt # vectorized form of STMT
1558 s_loop = scalar_stmt # (scalar) STMT
1559 loop_exit:
1560 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1561 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1562 v_out2 = reduce <v_out1>
1563 s_out3 = extract_field <v_out2, 0>
1564 s_out4 = adjust_result <s_out3>
1565 use <s_out4>
1566 use <s_out4>
1569 static void
1570 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
1571 enum tree_code reduc_code, tree reduction_phi)
1573 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1574 tree vectype;
1575 enum machine_mode mode;
1576 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1577 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1578 basic_block exit_bb;
1579 tree scalar_dest;
1580 tree scalar_type;
1581 tree new_phi;
1582 block_stmt_iterator exit_bsi;
1583 tree vec_dest;
1584 tree new_temp;
1585 tree new_name;
1586 tree epilog_stmt;
1587 tree new_scalar_dest, exit_phi;
1588 tree bitsize, bitpos, bytesize;
1589 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
1590 tree scalar_initial_def;
1591 tree vec_initial_def;
1592 tree orig_name;
1593 imm_use_iterator imm_iter;
1594 use_operand_p use_p;
1595 bool extract_scalar_result;
1596 tree reduction_op;
1597 tree orig_stmt;
1598 tree use_stmt;
1599 tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
1600 int op_type;
1602 op_type = TREE_OPERAND_LENGTH (operation);
1603 reduction_op = TREE_OPERAND (operation, op_type-1);
1604 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
1605 mode = TYPE_MODE (vectype);
1607 /*** 1. Create the reduction def-use cycle ***/
1609 /* 1.1 set the loop-entry arg of the reduction-phi: */
1610 /* For the case of reduction, vect_get_vec_def_for_operand returns
1611 the scalar def before the loop, that defines the initial value
1612 of the reduction variable. */
1613 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
1614 &scalar_initial_def);
1615 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
1617 /* 1.2 set the loop-latch arg for the reduction-phi: */
1618 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
1620 if (vect_print_dump_info (REPORT_DETAILS))
1622 fprintf (vect_dump, "transform reduction: created def-use cycle:");
1623 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
1624 fprintf (vect_dump, "\n");
1625 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
1629 /*** 2. Create epilog code
1630 The reduction epilog code operates across the elements of the vector
1631 of partial results computed by the vectorized loop.
1632 The reduction epilog code consists of:
1633 step 1: compute the scalar result in a vector (v_out2)
1634 step 2: extract the scalar result (s_out3) from the vector (v_out2)
1635 step 3: adjust the scalar result (s_out3) if needed.
1637 Step 1 can be accomplished using one the following three schemes:
1638 (scheme 1) using reduc_code, if available.
1639 (scheme 2) using whole-vector shifts, if available.
1640 (scheme 3) using a scalar loop. In this case steps 1+2 above are
1641 combined.
1643 The overall epilog code looks like this:
1645 s_out0 = phi <s_loop> # original EXIT_PHI
1646 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1647 v_out2 = reduce <v_out1> # step 1
1648 s_out3 = extract_field <v_out2, 0> # step 2
1649 s_out4 = adjust_result <s_out3> # step 3
1651 (step 3 is optional, and step2 1 and 2 may be combined).
1652 Lastly, the uses of s_out0 are replaced by s_out4.
1654 ***/
1656 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
1657 v_out1 = phi <v_loop> */
1659 exit_bb = single_exit (loop)->dest;
1660 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
1661 SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def);
1662 exit_bsi = bsi_after_labels (exit_bb);
1664 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
1665 (i.e. when reduc_code is not available) and in the final adjustment
1666 code (if needed). Also get the original scalar reduction variable as
1667 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
1668 represents a reduction pattern), the tree-code and scalar-def are
1669 taken from the original stmt that the pattern-stmt (STMT) replaces.
1670 Otherwise (it is a regular reduction) - the tree-code and scalar-def
1671 are taken from STMT. */
1673 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1674 if (!orig_stmt)
1676 /* Regular reduction */
1677 orig_stmt = stmt;
1679 else
1681 /* Reduction pattern */
1682 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
1683 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1684 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
1686 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
1687 scalar_dest = GIMPLE_STMT_OPERAND (orig_stmt, 0);
1688 scalar_type = TREE_TYPE (scalar_dest);
1689 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
1690 bitsize = TYPE_SIZE (scalar_type);
1691 bytesize = TYPE_SIZE_UNIT (scalar_type);
1693 /* 2.3 Create the reduction code, using one of the three schemes described
1694 above. */
1696 if (reduc_code < NUM_TREE_CODES)
1698 tree tmp;
1700 /*** Case 1: Create:
1701 v_out2 = reduc_expr <v_out1> */
1703 if (vect_print_dump_info (REPORT_DETAILS))
1704 fprintf (vect_dump, "Reduce using direct vector reduction.");
1706 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1707 tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi));
1708 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1709 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1710 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1711 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1713 extract_scalar_result = true;
1715 else
1717 enum tree_code shift_code = 0;
1718 bool have_whole_vector_shift = true;
1719 int bit_offset;
1720 int element_bitsize = tree_low_cst (bitsize, 1);
1721 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1722 tree vec_temp;
1724 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1725 shift_code = VEC_RSHIFT_EXPR;
1726 else
1727 have_whole_vector_shift = false;
1729 /* Regardless of whether we have a whole vector shift, if we're
1730 emulating the operation via tree-vect-generic, we don't want
1731 to use it. Only the first round of the reduction is likely
1732 to still be profitable via emulation. */
1733 /* ??? It might be better to emit a reduction tree code here, so that
1734 tree-vect-generic can expand the first round via bit tricks. */
1735 if (!VECTOR_MODE_P (mode))
1736 have_whole_vector_shift = false;
1737 else
1739 optab optab = optab_for_tree_code (code, vectype);
1740 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
1741 have_whole_vector_shift = false;
1744 if (have_whole_vector_shift)
1746 /*** Case 2: Create:
1747 for (offset = VS/2; offset >= element_size; offset/=2)
1749 Create: va' = vec_shift <va, offset>
1750 Create: va = vop <va, va'>
1751 } */
1753 if (vect_print_dump_info (REPORT_DETAILS))
1754 fprintf (vect_dump, "Reduce using vector shifts");
1756 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1757 new_temp = PHI_RESULT (new_phi);
1759 for (bit_offset = vec_size_in_bits/2;
1760 bit_offset >= element_bitsize;
1761 bit_offset /= 2)
1763 tree bitpos = size_int (bit_offset);
1764 tree tmp = build2 (shift_code, vectype, new_temp, bitpos);
1765 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1766 new_name = make_ssa_name (vec_dest, epilog_stmt);
1767 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1768 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1770 tmp = build2 (code, vectype, new_name, new_temp);
1771 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1772 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1773 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1774 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1777 extract_scalar_result = true;
1779 else
1781 tree rhs;
1783 /*** Case 3: Create:
1784 s = extract_field <v_out2, 0>
1785 for (offset = element_size;
1786 offset < vector_size;
1787 offset += element_size;)
1789 Create: s' = extract_field <v_out2, offset>
1790 Create: s = op <s, s'>
1791 } */
1793 if (vect_print_dump_info (REPORT_DETAILS))
1794 fprintf (vect_dump, "Reduce using scalar code. ");
1796 vec_temp = PHI_RESULT (new_phi);
1797 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1798 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1799 bitsize_zero_node);
1800 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1801 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1802 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1803 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1804 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1806 for (bit_offset = element_bitsize;
1807 bit_offset < vec_size_in_bits;
1808 bit_offset += element_bitsize)
1810 tree tmp;
1811 tree bitpos = bitsize_int (bit_offset);
1812 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1813 bitpos);
1815 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1816 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1817 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1818 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1819 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1821 tmp = build2 (code, scalar_type, new_name, new_temp);
1822 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
1823 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1824 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1825 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1828 extract_scalar_result = false;
1832 /* 2.4 Extract the final scalar result. Create:
1833 s_out3 = extract_field <v_out2, bitpos> */
1835 if (extract_scalar_result)
1837 tree rhs;
1839 if (vect_print_dump_info (REPORT_DETAILS))
1840 fprintf (vect_dump, "extract scalar result");
1842 if (BYTES_BIG_ENDIAN)
1843 bitpos = size_binop (MULT_EXPR,
1844 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1845 TYPE_SIZE (scalar_type));
1846 else
1847 bitpos = bitsize_zero_node;
1849 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1850 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1851 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1852 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1853 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1854 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1857 /* 2.4 Adjust the final result by the initial value of the reduction
1858 variable. (When such adjustment is not needed, then
1859 'scalar_initial_def' is zero).
1861 Create:
1862 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1864 if (scalar_initial_def)
1866 tree tmp = build2 (code, scalar_type, new_temp, scalar_initial_def);
1867 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
1868 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1869 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1870 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1873 /* 2.6 Replace uses of s_out0 with uses of s_out3 */
1875 /* Find the loop-closed-use at the loop exit of the original scalar result.
1876 (The reduction result is expected to have two immediate uses - one at the
1877 latch block, and one at the loop exit). */
1878 exit_phi = NULL;
1879 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1881 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1883 exit_phi = USE_STMT (use_p);
1884 break;
1887 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1888 gcc_assert (exit_phi);
1889 /* Replace the uses: */
1890 orig_name = PHI_RESULT (exit_phi);
1891 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1892 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1893 SET_USE (use_p, new_temp);
1897 /* Function vectorizable_reduction.
1899 Check if STMT performs a reduction operation that can be vectorized.
1900 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1901 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1902 Return FALSE if not a vectorizable STMT, TRUE otherwise.
1904 This function also handles reduction idioms (patterns) that have been
1905 recognized in advance during vect_pattern_recog. In this case, STMT may be
1906 of this form:
1907 X = pattern_expr (arg0, arg1, ..., X)
1908 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1909 sequence that had been detected and replaced by the pattern-stmt (STMT).
1911 In some cases of reduction patterns, the type of the reduction variable X is
1912 different than the type of the other arguments of STMT.
1913 In such cases, the vectype that is used when transforming STMT into a vector
1914 stmt is different than the vectype that is used to determine the
1915 vectorization factor, because it consists of a different number of elements
1916 than the actual number of elements that are being operated upon in parallel.
1918 For example, consider an accumulation of shorts into an int accumulator.
1919 On some targets it's possible to vectorize this pattern operating on 8
1920 shorts at a time (hence, the vectype for purposes of determining the
1921 vectorization factor should be V8HI); on the other hand, the vectype that
1922 is used to create the vector form is actually V4SI (the type of the result).
1924 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1925 indicates what is the actual level of parallelism (V8HI in the example), so
1926 that the right vectorization factor would be derived. This vectype
1927 corresponds to the type of arguments to the reduction stmt, and should *NOT*
1928 be used to create the vectorized stmt. The right vectype for the vectorized
1929 stmt is obtained from the type of the result X:
1930 get_vectype_for_scalar_type (TREE_TYPE (X))
1932 This means that, contrary to "regular" reductions (or "regular" stmts in
1933 general), the following equation:
1934 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1935 does *NOT* necessarily hold for reduction patterns. */
1937 bool
1938 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1940 tree vec_dest;
1941 tree scalar_dest;
1942 tree op;
1943 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
1944 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1945 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1946 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1947 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1948 tree operation;
1949 enum tree_code code, orig_code, epilog_reduc_code = 0;
1950 enum machine_mode vec_mode;
1951 int op_type;
1952 optab optab, reduc_optab;
1953 tree new_temp = NULL_TREE;
1954 tree def, def_stmt;
1955 enum vect_def_type dt;
1956 tree new_phi;
1957 tree scalar_type;
1958 bool is_simple_use;
1959 tree orig_stmt;
1960 stmt_vec_info orig_stmt_info;
1961 tree expr = NULL_TREE;
1962 int i;
1963 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1964 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1965 stmt_vec_info prev_stmt_info;
1966 tree reduc_def;
1967 tree new_stmt = NULL_TREE;
1968 int j;
1970 gcc_assert (ncopies >= 1);
1972 /* 1. Is vectorizable reduction? */
1974 /* Not supportable if the reduction variable is used in the loop. */
1975 if (STMT_VINFO_RELEVANT_P (stmt_info))
1976 return false;
1978 if (!STMT_VINFO_LIVE_P (stmt_info))
1979 return false;
1981 /* Make sure it was already recognized as a reduction computation. */
1982 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1983 return false;
1985 /* 2. Has this been recognized as a reduction pattern?
1987 Check if STMT represents a pattern that has been recognized
1988 in earlier analysis stages. For stmts that represent a pattern,
1989 the STMT_VINFO_RELATED_STMT field records the last stmt in
1990 the original sequence that constitutes the pattern. */
1992 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1993 if (orig_stmt)
1995 orig_stmt_info = vinfo_for_stmt (orig_stmt);
1996 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
1997 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
1998 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
2001 /* 3. Check the operands of the operation. The first operands are defined
2002 inside the loop body. The last operand is the reduction variable,
2003 which is defined by the loop-header-phi. */
2005 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2007 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2008 code = TREE_CODE (operation);
2009 op_type = TREE_OPERAND_LENGTH (operation);
2010 if (op_type != binary_op && op_type != ternary_op)
2011 return false;
2012 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2013 scalar_type = TREE_TYPE (scalar_dest);
2015 /* All uses but the last are expected to be defined in the loop.
2016 The last use is the reduction variable. */
2017 for (i = 0; i < op_type-1; i++)
2019 op = TREE_OPERAND (operation, i);
2020 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2021 gcc_assert (is_simple_use);
2022 if (dt != vect_loop_def
2023 && dt != vect_invariant_def
2024 && dt != vect_constant_def
2025 && dt != vect_induction_def)
2026 return false;
2029 op = TREE_OPERAND (operation, i);
2030 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2031 gcc_assert (is_simple_use);
2032 gcc_assert (dt == vect_reduction_def);
2033 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
2034 if (orig_stmt)
2035 gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
2036 else
2037 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
2039 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
2040 return false;
2042 /* 4. Supportable by target? */
2044 /* 4.1. check support for the operation in the loop */
2045 optab = optab_for_tree_code (code, vectype);
2046 if (!optab)
2048 if (vect_print_dump_info (REPORT_DETAILS))
2049 fprintf (vect_dump, "no optab.");
2050 return false;
2052 vec_mode = TYPE_MODE (vectype);
2053 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2055 if (vect_print_dump_info (REPORT_DETAILS))
2056 fprintf (vect_dump, "op not supported by target.");
2057 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2058 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2059 < vect_min_worthwhile_factor (code))
2060 return false;
2061 if (vect_print_dump_info (REPORT_DETAILS))
2062 fprintf (vect_dump, "proceeding using word mode.");
2065 /* Worthwhile without SIMD support? */
2066 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2067 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2068 < vect_min_worthwhile_factor (code))
2070 if (vect_print_dump_info (REPORT_DETAILS))
2071 fprintf (vect_dump, "not worthwhile without SIMD support.");
2072 return false;
2075 /* 4.2. Check support for the epilog operation.
2077 If STMT represents a reduction pattern, then the type of the
2078 reduction variable may be different than the type of the rest
2079 of the arguments. For example, consider the case of accumulation
2080 of shorts into an int accumulator; The original code:
2081 S1: int_a = (int) short_a;
2082 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
2084 was replaced with:
2085 STMT: int_acc = widen_sum <short_a, int_acc>
2087 This means that:
2088 1. The tree-code that is used to create the vector operation in the
2089 epilog code (that reduces the partial results) is not the
2090 tree-code of STMT, but is rather the tree-code of the original
2091 stmt from the pattern that STMT is replacing. I.e, in the example
2092 above we want to use 'widen_sum' in the loop, but 'plus' in the
2093 epilog.
2094 2. The type (mode) we use to check available target support
2095 for the vector operation to be created in the *epilog*, is
2096 determined by the type of the reduction variable (in the example
2097 above we'd check this: plus_optab[vect_int_mode]).
2098 However the type (mode) we use to check available target support
2099 for the vector operation to be created *inside the loop*, is
2100 determined by the type of the other arguments to STMT (in the
2101 example we'd check this: widen_sum_optab[vect_short_mode]).
2103 This is contrary to "regular" reductions, in which the types of all
2104 the arguments are the same as the type of the reduction variable.
2105 For "regular" reductions we can therefore use the same vector type
2106 (and also the same tree-code) when generating the epilog code and
2107 when generating the code inside the loop. */
2109 if (orig_stmt)
2111 /* This is a reduction pattern: get the vectype from the type of the
2112 reduction variable, and get the tree-code from orig_stmt. */
2113 orig_code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2114 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
2115 vec_mode = TYPE_MODE (vectype);
2117 else
2119 /* Regular reduction: use the same vectype and tree-code as used for
2120 the vector code inside the loop can be used for the epilog code. */
2121 orig_code = code;
2124 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
2125 return false;
2126 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
2127 if (!reduc_optab)
2129 if (vect_print_dump_info (REPORT_DETAILS))
2130 fprintf (vect_dump, "no optab for reduction.");
2131 epilog_reduc_code = NUM_TREE_CODES;
2133 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2135 if (vect_print_dump_info (REPORT_DETAILS))
2136 fprintf (vect_dump, "reduc op not supported by target.");
2137 epilog_reduc_code = NUM_TREE_CODES;
2140 if (!vec_stmt) /* transformation not required. */
2142 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2143 vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
2144 return true;
2147 /** Transform. **/
2149 if (vect_print_dump_info (REPORT_DETAILS))
2150 fprintf (vect_dump, "transform reduction.");
2152 /* Create the destination vector */
2153 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2155 /* Create the reduction-phi that defines the reduction-operand. */
2156 new_phi = create_phi_node (vec_dest, loop->header);
2158 /* In case the vectorization factor (VF) is bigger than the number
2159 of elements that we can fit in a vectype (nunits), we have to generate
2160 more than one vector stmt - i.e - we need to "unroll" the
2161 vector stmt by a factor VF/nunits. For more details see documentation
2162 in vectorizable_operation. */
2164 prev_stmt_info = NULL;
2165 for (j = 0; j < ncopies; j++)
2167 /* Handle uses. */
2168 if (j == 0)
2170 op = TREE_OPERAND (operation, 0);
2171 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
2172 if (op_type == ternary_op)
2174 op = TREE_OPERAND (operation, 1);
2175 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
2178 /* Get the vector def for the reduction variable from the phi node */
2179 reduc_def = PHI_RESULT (new_phi);
2181 else
2183 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2184 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
2185 if (op_type == ternary_op)
2186 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
2188 /* Get the vector def for the reduction variable from the vectorized
2189 reduction operation generated in the previous iteration (j-1) */
2190 reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
2193 /* Arguments are ready. create the new vector stmt. */
2194 if (op_type == binary_op)
2195 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
2196 else
2197 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
2198 reduc_def);
2199 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2200 new_temp = make_ssa_name (vec_dest, new_stmt);
2201 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2202 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2204 if (j == 0)
2205 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2206 else
2207 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2208 prev_stmt_info = vinfo_for_stmt (new_stmt);
2211 /* Finalize the reduction-phi (set it's arguments) and create the
2212 epilog reduction code. */
2213 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
2214 return true;
2217 /* Checks if CALL can be vectorized in type VECTYPE. Returns
2218 a function declaration if the target has a vectorized version
2219 of the function, or NULL_TREE if the function cannot be vectorized. */
2221 tree
2222 vectorizable_function (tree call, tree vectype_out, tree vectype_in)
2224 tree fndecl = get_callee_fndecl (call);
2225 enum built_in_function code;
2227 /* We only handle functions that do not read or clobber memory -- i.e.
2228 const or novops ones. */
2229 if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
2230 return NULL_TREE;
2232 if (!fndecl
2233 || TREE_CODE (fndecl) != FUNCTION_DECL
2234 || !DECL_BUILT_IN (fndecl))
2235 return NULL_TREE;
2237 code = DECL_FUNCTION_CODE (fndecl);
2238 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
2239 vectype_in);
2242 /* Function vectorizable_call.
2244 Check if STMT performs a function call that can be vectorized.
2245 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2246 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2247 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2249 bool
2250 vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2252 tree vec_dest;
2253 tree scalar_dest;
2254 tree operation;
2255 tree op, type;
2256 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2257 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2258 tree vectype_out, vectype_in;
2259 int nunits_in;
2260 int nunits_out;
2261 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2262 tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
2263 enum vect_def_type dt[2];
2264 tree new_stmt;
2265 int ncopies, j, nargs;
2266 call_expr_arg_iterator iter;
2267 tree vargs;
2268 enum { NARROW, NONE, WIDEN } modifier;
2270 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2271 return false;
2273 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2274 return false;
2276 /* FORNOW: not yet supported. */
2277 if (STMT_VINFO_LIVE_P (stmt_info))
2279 if (vect_print_dump_info (REPORT_DETAILS))
2280 fprintf (vect_dump, "value used after loop.");
2281 return false;
2284 /* Is STMT a vectorizable call? */
2285 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2286 return false;
2288 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2289 return false;
2291 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2292 if (TREE_CODE (operation) != CALL_EXPR)
2293 return false;
2295 /* Process function arguments. */
2296 rhs_type = NULL_TREE;
2297 nargs = 0;
2298 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2300 /* Bail out if the function has more than two arguments, we
2301 do not have interesting builtin functions to vectorize with
2302 more than two arguments. */
2303 if (nargs >= 2)
2304 return false;
2306 /* We can only handle calls with arguments of the same type. */
2307 if (rhs_type
2308 && rhs_type != TREE_TYPE (op))
2310 if (vect_print_dump_info (REPORT_DETAILS))
2311 fprintf (vect_dump, "argument types differ.");
2312 return false;
2314 rhs_type = TREE_TYPE (op);
2316 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs]))
2318 if (vect_print_dump_info (REPORT_DETAILS))
2319 fprintf (vect_dump, "use not simple.");
2320 return false;
2323 ++nargs;
2326 /* No arguments is also not good. */
2327 if (nargs == 0)
2328 return false;
2330 vectype_in = get_vectype_for_scalar_type (rhs_type);
2331 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2333 lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2334 vectype_out = get_vectype_for_scalar_type (lhs_type);
2335 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2337 /* FORNOW */
2338 if (nunits_in == nunits_out / 2)
2339 modifier = NARROW;
2340 else if (nunits_out == nunits_in)
2341 modifier = NONE;
2342 else if (nunits_out == nunits_in / 2)
2343 modifier = WIDEN;
2344 else
2345 return false;
2347 /* For now, we only vectorize functions if a target specific builtin
2348 is available. TODO -- in some cases, it might be profitable to
2349 insert the calls for pieces of the vector, in order to be able
2350 to vectorize other operations in the loop. */
2351 fndecl = vectorizable_function (operation, vectype_out, vectype_in);
2352 if (fndecl == NULL_TREE)
2354 if (vect_print_dump_info (REPORT_DETAILS))
2355 fprintf (vect_dump, "function is not vectorizable.");
2357 return false;
2360 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
2362 if (modifier == NARROW)
2363 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2364 else
2365 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2367 /* Sanity check: make sure that at least one copy of the vectorized stmt
2368 needs to be generated. */
2369 gcc_assert (ncopies >= 1);
2371 if (!vec_stmt) /* transformation not required. */
2373 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2374 if (vect_print_dump_info (REPORT_DETAILS))
2375 fprintf (vect_dump, "=== vectorizable_call ===");
2376 vect_model_simple_cost (stmt_info, ncopies);
2377 return true;
2380 /** Transform. **/
2382 if (vect_print_dump_info (REPORT_DETAILS))
2383 fprintf (vect_dump, "transform operation.");
2385 /* Handle def. */
2386 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2387 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2389 prev_stmt_info = NULL;
2390 switch (modifier)
2392 case NONE:
2393 for (j = 0; j < ncopies; ++j)
2395 /* Build argument list for the vectorized call. */
2396 /* FIXME: Rewrite this so that it doesn't
2397 construct a temporary list. */
2398 vargs = NULL_TREE;
2399 nargs = 0;
2400 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2402 if (j == 0)
2403 vec_oprnd0
2404 = vect_get_vec_def_for_operand (op, stmt, NULL);
2405 else
2406 vec_oprnd0
2407 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
2409 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
2411 ++nargs;
2413 vargs = nreverse (vargs);
2415 rhs = build_function_call_expr (fndecl, vargs);
2416 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
2417 new_temp = make_ssa_name (vec_dest, new_stmt);
2418 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2420 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2422 if (j == 0)
2423 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2424 else
2425 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2427 prev_stmt_info = vinfo_for_stmt (new_stmt);
2430 break;
2432 case NARROW:
2433 for (j = 0; j < ncopies; ++j)
2435 /* Build argument list for the vectorized call. */
2436 /* FIXME: Rewrite this so that it doesn't
2437 construct a temporary list. */
2438 vargs = NULL_TREE;
2439 nargs = 0;
2440 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2442 if (j == 0)
2444 vec_oprnd0
2445 = vect_get_vec_def_for_operand (op, stmt, NULL);
2446 vec_oprnd1
2447 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
2449 else
2451 vec_oprnd0
2452 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
2453 vec_oprnd1
2454 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
2457 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
2458 vargs = tree_cons (NULL_TREE, vec_oprnd1, vargs);
2460 ++nargs;
2462 vargs = nreverse (vargs);
2464 rhs = build_function_call_expr (fndecl, vargs);
2465 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
2466 new_temp = make_ssa_name (vec_dest, new_stmt);
2467 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2469 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2471 if (j == 0)
2472 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2473 else
2474 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2476 prev_stmt_info = vinfo_for_stmt (new_stmt);
2479 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2481 break;
2483 case WIDEN:
2484 /* No current target implements this case. */
2485 return false;
2488 /* The call in STMT might prevent it from being removed in dce.
2489 We however cannot remove it here, due to the way the ssa name
2490 it defines is mapped to the new definition. So just replace
2491 rhs of the statement with something harmless. */
2492 type = TREE_TYPE (scalar_dest);
2493 GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
2494 update_stmt (stmt);
2496 return true;
2500 /* Function vect_gen_widened_results_half
2502 Create a vector stmt whose code, type, number of arguments, and result
2503 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
2504 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2505 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2506 needs to be created (DECL is a function-decl of a target-builtin).
2507 STMT is the original scalar stmt that we are vectorizing. */
2509 static tree
2510 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
2511 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2512 tree vec_dest, block_stmt_iterator *bsi,
2513 tree stmt)
2515 tree expr;
2516 tree new_stmt;
2517 tree new_temp;
2518 tree sym;
2519 ssa_op_iter iter;
2521 /* Generate half of the widened result: */
2522 if (code == CALL_EXPR)
2524 /* Target specific support */
2525 if (op_type == binary_op)
2526 expr = build_call_expr (decl, 2, vec_oprnd0, vec_oprnd1);
2527 else
2528 expr = build_call_expr (decl, 1, vec_oprnd0);
2530 else
2532 /* Generic support */
2533 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2534 if (op_type == binary_op)
2535 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
2536 else
2537 expr = build1 (code, vectype, vec_oprnd0);
2539 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2540 new_temp = make_ssa_name (vec_dest, new_stmt);
2541 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2542 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2544 if (code == CALL_EXPR)
2546 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2548 if (TREE_CODE (sym) == SSA_NAME)
2549 sym = SSA_NAME_VAR (sym);
2550 mark_sym_for_renaming (sym);
2554 return new_stmt;
2558 /* Function vectorizable_conversion.
2560 Check if STMT performs a conversion operation, that can be vectorized.
2561 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2562 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2563 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2565 bool
2566 vectorizable_conversion (tree stmt, block_stmt_iterator * bsi,
2567 tree * vec_stmt)
2569 tree vec_dest;
2570 tree scalar_dest;
2571 tree operation;
2572 tree op0;
2573 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2574 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2575 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2576 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2577 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2578 tree new_temp;
2579 tree def, def_stmt;
2580 enum vect_def_type dt0;
2581 tree new_stmt;
2582 stmt_vec_info prev_stmt_info;
2583 int nunits_in;
2584 int nunits_out;
2585 tree vectype_out, vectype_in;
2586 int ncopies, j;
2587 tree expr;
2588 tree rhs_type, lhs_type;
2589 tree builtin_decl;
2590 enum { NARROW, NONE, WIDEN } modifier;
2592 /* Is STMT a vectorizable conversion? */
2594 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2595 return false;
2597 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2598 return false;
2600 if (STMT_VINFO_LIVE_P (stmt_info))
2602 /* FORNOW: not yet supported. */
2603 if (vect_print_dump_info (REPORT_DETAILS))
2604 fprintf (vect_dump, "value used after loop.");
2605 return false;
2608 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2609 return false;
2611 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2612 return false;
2614 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2615 code = TREE_CODE (operation);
2616 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2617 return false;
2619 /* Check types of lhs and rhs */
2620 op0 = TREE_OPERAND (operation, 0);
2621 rhs_type = TREE_TYPE (op0);
2622 vectype_in = get_vectype_for_scalar_type (rhs_type);
2623 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2625 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2626 lhs_type = TREE_TYPE (scalar_dest);
2627 vectype_out = get_vectype_for_scalar_type (lhs_type);
2628 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2630 /* FORNOW */
2631 if (nunits_in == nunits_out / 2)
2632 modifier = NARROW;
2633 else if (nunits_out == nunits_in)
2634 modifier = NONE;
2635 else if (nunits_out == nunits_in / 2)
2636 modifier = WIDEN;
2637 else
2638 return false;
2640 if (modifier == NONE)
2641 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
2643 /* Bail out if the types are both integral or non-integral */
2644 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
2645 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
2646 return false;
2648 if (modifier == NARROW)
2649 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2650 else
2651 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2653 /* Sanity check: make sure that at least one copy of the vectorized stmt
2654 needs to be generated. */
2655 gcc_assert (ncopies >= 1);
2657 /* Check the operands of the operation. */
2658 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2660 if (vect_print_dump_info (REPORT_DETAILS))
2661 fprintf (vect_dump, "use not simple.");
2662 return false;
2665 /* Supportable by target? */
2666 if ((modifier == NONE
2667 && !targetm.vectorize.builtin_conversion (code, vectype_in))
2668 || (modifier == WIDEN
2669 && !supportable_widening_operation (code, stmt, vectype_in,
2670 &decl1, &decl2,
2671 &code1, &code2))
2672 || (modifier == NARROW
2673 && !supportable_narrowing_operation (code, stmt, vectype_in,
2674 &code1)))
2676 if (vect_print_dump_info (REPORT_DETAILS))
2677 fprintf (vect_dump, "op not supported by target.");
2678 return false;
2681 if (modifier != NONE)
2682 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2684 if (!vec_stmt) /* transformation not required. */
2686 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2687 return true;
2690 /** Transform. **/
2691 if (vect_print_dump_info (REPORT_DETAILS))
2692 fprintf (vect_dump, "transform conversion.");
2694 /* Handle def. */
2695 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2697 prev_stmt_info = NULL;
2698 switch (modifier)
2700 case NONE:
2701 for (j = 0; j < ncopies; j++)
2703 tree sym;
2704 ssa_op_iter iter;
2706 if (j == 0)
2707 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2708 else
2709 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2711 builtin_decl =
2712 targetm.vectorize.builtin_conversion (code, vectype_in);
2713 new_stmt = build_call_expr (builtin_decl, 1, vec_oprnd0);
2715 /* Arguments are ready. create the new vector stmt. */
2716 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
2717 new_temp = make_ssa_name (vec_dest, new_stmt);
2718 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2719 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2720 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2722 if (TREE_CODE (sym) == SSA_NAME)
2723 sym = SSA_NAME_VAR (sym);
2724 mark_sym_for_renaming (sym);
2727 if (j == 0)
2728 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2729 else
2730 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2731 prev_stmt_info = vinfo_for_stmt (new_stmt);
2733 break;
2735 case WIDEN:
2736 /* In case the vectorization factor (VF) is bigger than the number
2737 of elements that we can fit in a vectype (nunits), we have to
2738 generate more than one vector stmt - i.e - we need to "unroll"
2739 the vector stmt by a factor VF/nunits. */
2740 for (j = 0; j < ncopies; j++)
2742 if (j == 0)
2743 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2744 else
2745 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2747 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2749 /* Generate first half of the widened result: */
2750 new_stmt
2751 = vect_gen_widened_results_half (code1, vectype_out, decl1,
2752 vec_oprnd0, vec_oprnd1,
2753 unary_op, vec_dest, bsi, stmt);
2754 if (j == 0)
2755 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2756 else
2757 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2758 prev_stmt_info = vinfo_for_stmt (new_stmt);
2760 /* Generate second half of the widened result: */
2761 new_stmt
2762 = vect_gen_widened_results_half (code2, vectype_out, decl2,
2763 vec_oprnd0, vec_oprnd1,
2764 unary_op, vec_dest, bsi, stmt);
2765 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2766 prev_stmt_info = vinfo_for_stmt (new_stmt);
2768 break;
2770 case NARROW:
2771 /* In case the vectorization factor (VF) is bigger than the number
2772 of elements that we can fit in a vectype (nunits), we have to
2773 generate more than one vector stmt - i.e - we need to "unroll"
2774 the vector stmt by a factor VF/nunits. */
2775 for (j = 0; j < ncopies; j++)
2777 /* Handle uses. */
2778 if (j == 0)
2780 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2781 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2783 else
2785 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
2786 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2789 /* Arguments are ready. Create the new vector stmt. */
2790 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
2791 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2792 new_temp = make_ssa_name (vec_dest, new_stmt);
2793 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2794 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2796 if (j == 0)
2797 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2798 else
2799 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2801 prev_stmt_info = vinfo_for_stmt (new_stmt);
2804 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2806 return true;
2810 /* Function vectorizable_assignment.
2812 Check if STMT performs an assignment (copy) that can be vectorized.
2813 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2814 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2815 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2817 bool
2818 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2820 tree vec_dest;
2821 tree scalar_dest;
2822 tree op;
2823 tree vec_oprnd;
2824 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2825 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2826 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2827 tree new_temp;
2828 tree def, def_stmt;
2829 enum vect_def_type dt;
2830 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2831 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2833 gcc_assert (ncopies >= 1);
2834 if (ncopies > 1)
2835 return false; /* FORNOW */
2837 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2838 return false;
2840 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2841 return false;
2843 /* FORNOW: not yet supported. */
2844 if (STMT_VINFO_LIVE_P (stmt_info))
2846 if (vect_print_dump_info (REPORT_DETAILS))
2847 fprintf (vect_dump, "value used after loop.");
2848 return false;
2851 /* Is vectorizable assignment? */
2852 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2853 return false;
2855 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2856 if (TREE_CODE (scalar_dest) != SSA_NAME)
2857 return false;
2859 op = GIMPLE_STMT_OPERAND (stmt, 1);
2860 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2862 if (vect_print_dump_info (REPORT_DETAILS))
2863 fprintf (vect_dump, "use not simple.");
2864 return false;
2867 if (!vec_stmt) /* transformation not required. */
2869 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2870 if (vect_print_dump_info (REPORT_DETAILS))
2871 fprintf (vect_dump, "=== vectorizable_assignment ===");
2872 vect_model_simple_cost (stmt_info, ncopies);
2873 return true;
2876 /** Transform. **/
2877 if (vect_print_dump_info (REPORT_DETAILS))
2878 fprintf (vect_dump, "transform assignment.");
2880 /* Handle def. */
2881 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2883 /* Handle use. */
2884 op = GIMPLE_STMT_OPERAND (stmt, 1);
2885 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
2887 /* Arguments are ready. create the new vector stmt. */
2888 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_oprnd);
2889 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2890 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
2891 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2893 return true;
2897 /* Function vect_min_worthwhile_factor.
2899 For a loop where we could vectorize the operation indicated by CODE,
2900 return the minimum vectorization factor that makes it worthwhile
2901 to use generic vectors. */
2902 static int
2903 vect_min_worthwhile_factor (enum tree_code code)
2905 switch (code)
2907 case PLUS_EXPR:
2908 case MINUS_EXPR:
2909 case NEGATE_EXPR:
2910 return 4;
2912 case BIT_AND_EXPR:
2913 case BIT_IOR_EXPR:
2914 case BIT_XOR_EXPR:
2915 case BIT_NOT_EXPR:
2916 return 2;
2918 default:
2919 return INT_MAX;
2924 /* Function vectorizable_induction
2926 Check if PHI performs an induction computation that can be vectorized.
2927 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
2928 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
2929 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2931 bool
2932 vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
2933 tree *vec_stmt)
2935 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
2936 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2937 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2938 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2939 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2940 tree vec_def;
2942 gcc_assert (ncopies >= 1);
2944 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2945 return false;
2947 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
2949 if (STMT_VINFO_LIVE_P (stmt_info))
2951 /* FORNOW: not yet supported. */
2952 if (vect_print_dump_info (REPORT_DETAILS))
2953 fprintf (vect_dump, "value used after loop.");
2954 return false;
2957 if (TREE_CODE (phi) != PHI_NODE)
2958 return false;
2960 if (!vec_stmt) /* transformation not required. */
2962 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
2963 if (vect_print_dump_info (REPORT_DETAILS))
2964 fprintf (vect_dump, "=== vectorizable_induction ===");
2965 vect_model_induction_cost (stmt_info, ncopies);
2966 return true;
2969 /** Transform. **/
2971 if (vect_print_dump_info (REPORT_DETAILS))
2972 fprintf (vect_dump, "transform induction phi.");
2974 vec_def = get_initial_def_for_induction (phi);
2975 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
2976 return true;
2980 /* Function vectorizable_operation.
2982 Check if STMT performs a binary or unary operation that can be vectorized.
2983 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2984 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2985 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2987 bool
2988 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2990 tree vec_dest;
2991 tree scalar_dest;
2992 tree operation;
2993 tree op0, op1 = NULL;
2994 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2995 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2996 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2997 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2998 enum tree_code code;
2999 enum machine_mode vec_mode;
3000 tree new_temp;
3001 int op_type;
3002 optab optab;
3003 int icode;
3004 enum machine_mode optab_op2_mode;
3005 tree def, def_stmt;
3006 enum vect_def_type dt0, dt1;
3007 tree new_stmt;
3008 stmt_vec_info prev_stmt_info;
3009 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3010 int nunits_out;
3011 tree vectype_out;
3012 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3013 int j;
3015 gcc_assert (ncopies >= 1);
3017 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3018 return false;
3020 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3021 return false;
3023 /* FORNOW: not yet supported. */
3024 if (STMT_VINFO_LIVE_P (stmt_info))
3026 if (vect_print_dump_info (REPORT_DETAILS))
3027 fprintf (vect_dump, "value used after loop.");
3028 return false;
3031 /* Is STMT a vectorizable binary/unary operation? */
3032 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3033 return false;
3035 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3036 return false;
3038 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3039 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3040 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3041 if (nunits_out != nunits_in)
3042 return false;
3044 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3045 code = TREE_CODE (operation);
3047 /* For pointer addition, we should use the normal plus for
3048 the vector addition. */
3049 if (code == POINTER_PLUS_EXPR)
3050 code = PLUS_EXPR;
3052 optab = optab_for_tree_code (code, vectype);
3054 /* Support only unary or binary operations. */
3055 op_type = TREE_OPERAND_LENGTH (operation);
3056 if (op_type != unary_op && op_type != binary_op)
3058 if (vect_print_dump_info (REPORT_DETAILS))
3059 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
3060 return false;
3063 op0 = TREE_OPERAND (operation, 0);
3064 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
3066 if (vect_print_dump_info (REPORT_DETAILS))
3067 fprintf (vect_dump, "use not simple.");
3068 return false;
3071 if (op_type == binary_op)
3073 op1 = TREE_OPERAND (operation, 1);
3074 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
3076 if (vect_print_dump_info (REPORT_DETAILS))
3077 fprintf (vect_dump, "use not simple.");
3078 return false;
3082 /* Supportable by target? */
3083 if (!optab)
3085 if (vect_print_dump_info (REPORT_DETAILS))
3086 fprintf (vect_dump, "no optab.");
3087 return false;
3089 vec_mode = TYPE_MODE (vectype);
3090 icode = (int) optab->handlers[(int) vec_mode].insn_code;
3091 if (icode == CODE_FOR_nothing)
3093 if (vect_print_dump_info (REPORT_DETAILS))
3094 fprintf (vect_dump, "op not supported by target.");
3095 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3096 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3097 < vect_min_worthwhile_factor (code))
3098 return false;
3099 if (vect_print_dump_info (REPORT_DETAILS))
3100 fprintf (vect_dump, "proceeding using word mode.");
3103 /* Worthwhile without SIMD support? */
3104 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3105 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3106 < vect_min_worthwhile_factor (code))
3108 if (vect_print_dump_info (REPORT_DETAILS))
3109 fprintf (vect_dump, "not worthwhile without SIMD support.");
3110 return false;
3113 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3115 /* FORNOW: not yet supported. */
3116 if (!VECTOR_MODE_P (vec_mode))
3117 return false;
3119 /* Invariant argument is needed for a vector shift
3120 by a scalar shift operand. */
3121 optab_op2_mode = insn_data[icode].operand[2].mode;
3122 if (! (VECTOR_MODE_P (optab_op2_mode)
3123 || dt1 == vect_constant_def
3124 || dt1 == vect_invariant_def))
3126 if (vect_print_dump_info (REPORT_DETAILS))
3127 fprintf (vect_dump, "operand mode requires invariant argument.");
3128 return false;
3132 if (!vec_stmt) /* transformation not required. */
3134 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3135 if (vect_print_dump_info (REPORT_DETAILS))
3136 fprintf (vect_dump, "=== vectorizable_operation ===");
3137 vect_model_simple_cost (stmt_info, ncopies);
3138 return true;
3141 /** Transform. **/
3143 if (vect_print_dump_info (REPORT_DETAILS))
3144 fprintf (vect_dump, "transform binary/unary operation.");
3146 /* Handle def. */
3147 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3149 /* In case the vectorization factor (VF) is bigger than the number
3150 of elements that we can fit in a vectype (nunits), we have to generate
3151 more than one vector stmt - i.e - we need to "unroll" the
3152 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3153 from one copy of the vector stmt to the next, in the field
3154 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3155 stages to find the correct vector defs to be used when vectorizing
3156 stmts that use the defs of the current stmt. The example below illustrates
3157 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3158 4 vectorized stmts):
3160 before vectorization:
3161 RELATED_STMT VEC_STMT
3162 S1: x = memref - -
3163 S2: z = x + 1 - -
3165 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3166 there):
3167 RELATED_STMT VEC_STMT
3168 VS1_0: vx0 = memref0 VS1_1 -
3169 VS1_1: vx1 = memref1 VS1_2 -
3170 VS1_2: vx2 = memref2 VS1_3 -
3171 VS1_3: vx3 = memref3 - -
3172 S1: x = load - VS1_0
3173 S2: z = x + 1 - -
3175 step2: vectorize stmt S2 (done here):
3176 To vectorize stmt S2 we first need to find the relevant vector
3177 def for the first operand 'x'. This is, as usual, obtained from
3178 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3179 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3180 relevant vector def 'vx0'. Having found 'vx0' we can generate
3181 the vector stmt VS2_0, and as usual, record it in the
3182 STMT_VINFO_VEC_STMT of stmt S2.
3183 When creating the second copy (VS2_1), we obtain the relevant vector
3184 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3185 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3186 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3187 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3188 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3189 chain of stmts and pointers:
3190 RELATED_STMT VEC_STMT
3191 VS1_0: vx0 = memref0 VS1_1 -
3192 VS1_1: vx1 = memref1 VS1_2 -
3193 VS1_2: vx2 = memref2 VS1_3 -
3194 VS1_3: vx3 = memref3 - -
3195 S1: x = load - VS1_0
3196 VS2_0: vz0 = vx0 + v1 VS2_1 -
3197 VS2_1: vz1 = vx1 + v1 VS2_2 -
3198 VS2_2: vz2 = vx2 + v1 VS2_3 -
3199 VS2_3: vz3 = vx3 + v1 - -
3200 S2: z = x + 1 - VS2_0 */
3202 prev_stmt_info = NULL;
3203 for (j = 0; j < ncopies; j++)
3205 /* Handle uses. */
3206 if (j == 0)
3208 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3209 if (op_type == binary_op)
3211 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3213 /* Vector shl and shr insn patterns can be defined with
3214 scalar operand 2 (shift operand). In this case, use
3215 constant or loop invariant op1 directly, without
3216 extending it to vector mode first. */
3217 optab_op2_mode = insn_data[icode].operand[2].mode;
3218 if (!VECTOR_MODE_P (optab_op2_mode))
3220 if (vect_print_dump_info (REPORT_DETAILS))
3221 fprintf (vect_dump, "operand 1 using scalar mode.");
3222 vec_oprnd1 = op1;
3225 if (!vec_oprnd1)
3226 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3229 else
3231 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
3232 if (op_type == binary_op)
3233 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
3236 /* Arguments are ready. create the new vector stmt. */
3238 if (op_type == binary_op)
3239 new_stmt = build_gimple_modify_stmt (vec_dest,
3240 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
3241 else
3242 new_stmt = build_gimple_modify_stmt (vec_dest,
3243 build1 (code, vectype, vec_oprnd0));
3244 new_temp = make_ssa_name (vec_dest, new_stmt);
3245 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3246 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3248 if (j == 0)
3249 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3250 else
3251 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3252 prev_stmt_info = vinfo_for_stmt (new_stmt);
3255 return true;
3259 /* Function vectorizable_type_demotion
3261 Check if STMT performs a binary or unary operation that involves
3262 type demotion, and if it can be vectorized.
3263 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3264 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3265 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3267 bool
3268 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
3269 tree *vec_stmt)
3271 tree vec_dest;
3272 tree scalar_dest;
3273 tree operation;
3274 tree op0;
3275 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3276 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3277 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3278 enum tree_code code, code1 = ERROR_MARK;
3279 tree new_temp;
3280 tree def, def_stmt;
3281 enum vect_def_type dt0;
3282 tree new_stmt;
3283 stmt_vec_info prev_stmt_info;
3284 int nunits_in;
3285 int nunits_out;
3286 tree vectype_out;
3287 int ncopies;
3288 int j;
3289 tree expr;
3290 tree vectype_in;
3292 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3293 return false;
3295 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3296 return false;
3298 /* FORNOW: not yet supported. */
3299 if (STMT_VINFO_LIVE_P (stmt_info))
3301 if (vect_print_dump_info (REPORT_DETAILS))
3302 fprintf (vect_dump, "value used after loop.");
3303 return false;
3306 /* Is STMT a vectorizable type-demotion operation? */
3307 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3308 return false;
3310 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3311 return false;
3313 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3314 code = TREE_CODE (operation);
3315 if (code != NOP_EXPR && code != CONVERT_EXPR)
3316 return false;
3318 op0 = TREE_OPERAND (operation, 0);
3319 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
3320 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3322 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3323 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3324 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3325 if (nunits_in != nunits_out / 2) /* FORNOW */
3326 return false;
3328 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3329 gcc_assert (ncopies >= 1);
3331 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3332 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3333 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3334 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3335 && (code == NOP_EXPR || code == CONVERT_EXPR))))
3336 return false;
3338 /* Check the operands of the operation. */
3339 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
3341 if (vect_print_dump_info (REPORT_DETAILS))
3342 fprintf (vect_dump, "use not simple.");
3343 return false;
3346 /* Supportable by target? */
3347 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1))
3348 return false;
3350 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3352 if (!vec_stmt) /* transformation not required. */
3354 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3355 if (vect_print_dump_info (REPORT_DETAILS))
3356 fprintf (vect_dump, "=== vectorizable_demotion ===");
3357 vect_model_simple_cost (stmt_info, ncopies);
3358 return true;
3361 /** Transform. **/
3362 if (vect_print_dump_info (REPORT_DETAILS))
3363 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3364 ncopies);
3366 /* Handle def. */
3367 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3369 /* In case the vectorization factor (VF) is bigger than the number
3370 of elements that we can fit in a vectype (nunits), we have to generate
3371 more than one vector stmt - i.e - we need to "unroll" the
3372 vector stmt by a factor VF/nunits. */
3373 prev_stmt_info = NULL;
3374 for (j = 0; j < ncopies; j++)
3376 /* Handle uses. */
3377 if (j == 0)
3379 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3380 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
3382 else
3384 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
3385 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
3388 /* Arguments are ready. Create the new vector stmt. */
3389 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
3390 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3391 new_temp = make_ssa_name (vec_dest, new_stmt);
3392 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3393 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3395 if (j == 0)
3396 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3397 else
3398 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3400 prev_stmt_info = vinfo_for_stmt (new_stmt);
3403 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3404 return true;
3408 /* Function vectorizable_type_promotion
3410 Check if STMT performs a binary or unary operation that involves
3411 type promotion, and if it can be vectorized.
3412 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3413 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3414 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3416 bool
3417 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
3418 tree *vec_stmt)
3420 tree vec_dest;
3421 tree scalar_dest;
3422 tree operation;
3423 tree op0, op1 = NULL;
3424 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3425 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3426 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3427 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3428 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3429 int op_type;
3430 tree def, def_stmt;
3431 enum vect_def_type dt0, dt1;
3432 tree new_stmt;
3433 stmt_vec_info prev_stmt_info;
3434 int nunits_in;
3435 int nunits_out;
3436 tree vectype_out;
3437 int ncopies;
3438 int j;
3439 tree vectype_in;
3441 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3442 return false;
3444 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3445 return false;
3447 /* FORNOW: not yet supported. */
3448 if (STMT_VINFO_LIVE_P (stmt_info))
3450 if (vect_print_dump_info (REPORT_DETAILS))
3451 fprintf (vect_dump, "value used after loop.");
3452 return false;
3455 /* Is STMT a vectorizable type-promotion operation? */
3456 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3457 return false;
3459 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3460 return false;
3462 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3463 code = TREE_CODE (operation);
3464 if (code != NOP_EXPR && code != CONVERT_EXPR
3465 && code != WIDEN_MULT_EXPR)
3466 return false;
3468 op0 = TREE_OPERAND (operation, 0);
3469 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
3470 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3472 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3473 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3474 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3475 if (nunits_out != nunits_in / 2) /* FORNOW */
3476 return false;
3478 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3479 gcc_assert (ncopies >= 1);
3481 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3482 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3483 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3484 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3485 && (code == CONVERT_EXPR || code == NOP_EXPR))))
3486 return false;
3488 /* Check the operands of the operation. */
3489 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
3491 if (vect_print_dump_info (REPORT_DETAILS))
3492 fprintf (vect_dump, "use not simple.");
3493 return false;
3496 op_type = TREE_CODE_LENGTH (code);
3497 if (op_type == binary_op)
3499 op1 = TREE_OPERAND (operation, 1);
3500 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
3502 if (vect_print_dump_info (REPORT_DETAILS))
3503 fprintf (vect_dump, "use not simple.");
3504 return false;
3508 /* Supportable by target? */
3509 if (!supportable_widening_operation (code, stmt, vectype_in,
3510 &decl1, &decl2, &code1, &code2))
3511 return false;
3513 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3515 if (!vec_stmt) /* transformation not required. */
3517 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3518 if (vect_print_dump_info (REPORT_DETAILS))
3519 fprintf (vect_dump, "=== vectorizable_promotion ===");
3520 vect_model_simple_cost (stmt_info, 2*ncopies);
3521 return true;
3524 /** Transform. **/
3526 if (vect_print_dump_info (REPORT_DETAILS))
3527 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3528 ncopies);
3530 /* Handle def. */
3531 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3533 /* In case the vectorization factor (VF) is bigger than the number
3534 of elements that we can fit in a vectype (nunits), we have to generate
3535 more than one vector stmt - i.e - we need to "unroll" the
3536 vector stmt by a factor VF/nunits. */
3538 prev_stmt_info = NULL;
3539 for (j = 0; j < ncopies; j++)
3541 /* Handle uses. */
3542 if (j == 0)
3544 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3545 if (op_type == binary_op)
3546 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3548 else
3550 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
3551 if (op_type == binary_op)
3552 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
3555 /* Arguments are ready. Create the new vector stmt. We are creating
3556 two vector defs because the widened result does not fit in one vector.
3557 The vectorized stmt can be expressed as a call to a taregt builtin,
3558 or a using a tree-code. */
3559 /* Generate first half of the widened result: */
3560 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
3561 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
3562 if (j == 0)
3563 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3564 else
3565 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3566 prev_stmt_info = vinfo_for_stmt (new_stmt);
3568 /* Generate second half of the widened result: */
3569 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
3570 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
3571 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3572 prev_stmt_info = vinfo_for_stmt (new_stmt);
3576 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3577 return true;
3581 /* Function vect_strided_store_supported.
3583 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3584 and FALSE otherwise. */
3586 static bool
3587 vect_strided_store_supported (tree vectype)
3589 optab interleave_high_optab, interleave_low_optab;
3590 int mode;
3592 mode = (int) TYPE_MODE (vectype);
3594 /* Check that the operation is supported. */
3595 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3596 vectype);
3597 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3598 vectype);
3599 if (!interleave_high_optab || !interleave_low_optab)
3601 if (vect_print_dump_info (REPORT_DETAILS))
3602 fprintf (vect_dump, "no optab for interleave.");
3603 return false;
3606 if (interleave_high_optab->handlers[(int) mode].insn_code
3607 == CODE_FOR_nothing
3608 || interleave_low_optab->handlers[(int) mode].insn_code
3609 == CODE_FOR_nothing)
3611 if (vect_print_dump_info (REPORT_DETAILS))
3612 fprintf (vect_dump, "interleave op not supported by target.");
3613 return false;
3615 return true;
3619 /* Function vect_permute_store_chain.
3621 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3622 a power of 2, generate interleave_high/low stmts to reorder the data
3623 correctly for the stores. Return the final references for stores in
3624 RESULT_CHAIN.
3626 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3627 The input is 4 vectors each containing 8 elements. We assign a number to each
3628 element, the input sequence is:
3630 1st vec: 0 1 2 3 4 5 6 7
3631 2nd vec: 8 9 10 11 12 13 14 15
3632 3rd vec: 16 17 18 19 20 21 22 23
3633 4th vec: 24 25 26 27 28 29 30 31
3635 The output sequence should be:
3637 1st vec: 0 8 16 24 1 9 17 25
3638 2nd vec: 2 10 18 26 3 11 19 27
3639 3rd vec: 4 12 20 28 5 13 21 30
3640 4th vec: 6 14 22 30 7 15 23 31
3642 i.e., we interleave the contents of the four vectors in their order.
3644 We use interleave_high/low instructions to create such output. The input of
3645 each interleave_high/low operation is two vectors:
3646 1st vec 2nd vec
3647 0 1 2 3 4 5 6 7
3648 the even elements of the result vector are obtained left-to-right from the
3649 high/low elements of the first vector. The odd elements of the result are
3650 obtained left-to-right from the high/low elements of the second vector.
3651 The output of interleave_high will be: 0 4 1 5
3652 and of interleave_low: 2 6 3 7
3655 The permutation is done in log LENGTH stages. In each stage interleave_high
3656 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3657 where the first argument is taken from the first half of DR_CHAIN and the
3658 second argument from it's second half.
3659 In our example,
3661 I1: interleave_high (1st vec, 3rd vec)
3662 I2: interleave_low (1st vec, 3rd vec)
3663 I3: interleave_high (2nd vec, 4th vec)
3664 I4: interleave_low (2nd vec, 4th vec)
3666 The output for the first stage is:
3668 I1: 0 16 1 17 2 18 3 19
3669 I2: 4 20 5 21 6 22 7 23
3670 I3: 8 24 9 25 10 26 11 27
3671 I4: 12 28 13 29 14 30 15 31
3673 The output of the second stage, i.e. the final result is:
3675 I1: 0 8 16 24 1 9 17 25
3676 I2: 2 10 18 26 3 11 19 27
3677 I3: 4 12 20 28 5 13 21 30
3678 I4: 6 14 22 30 7 15 23 31. */
3680 static bool
3681 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3682 unsigned int length,
3683 tree stmt,
3684 block_stmt_iterator *bsi,
3685 VEC(tree,heap) **result_chain)
3687 tree perm_dest, perm_stmt, vect1, vect2, high, low;
3688 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3689 tree scalar_dest, tmp;
3690 int i;
3691 unsigned int j;
3692 VEC(tree,heap) *first, *second;
3694 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3695 first = VEC_alloc (tree, heap, length/2);
3696 second = VEC_alloc (tree, heap, length/2);
3698 /* Check that the operation is supported. */
3699 if (!vect_strided_store_supported (vectype))
3700 return false;
3702 *result_chain = VEC_copy (tree, heap, dr_chain);
3704 for (i = 0; i < exact_log2 (length); i++)
3706 for (j = 0; j < length/2; j++)
3708 vect1 = VEC_index (tree, dr_chain, j);
3709 vect2 = VEC_index (tree, dr_chain, j+length/2);
3711 /* Create interleaving stmt:
3712 in the case of big endian:
3713 high = interleave_high (vect1, vect2)
3714 and in the case of little endian:
3715 high = interleave_low (vect1, vect2). */
3716 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3717 DECL_GIMPLE_REG_P (perm_dest) = 1;
3718 add_referenced_var (perm_dest);
3719 if (BYTES_BIG_ENDIAN)
3720 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
3721 else
3722 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
3723 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3724 high = make_ssa_name (perm_dest, perm_stmt);
3725 GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
3726 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3727 VEC_replace (tree, *result_chain, 2*j, high);
3729 /* Create interleaving stmt:
3730 in the case of big endian:
3731 low = interleave_low (vect1, vect2)
3732 and in the case of little endian:
3733 low = interleave_high (vect1, vect2). */
3734 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3735 DECL_GIMPLE_REG_P (perm_dest) = 1;
3736 add_referenced_var (perm_dest);
3737 if (BYTES_BIG_ENDIAN)
3738 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
3739 else
3740 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
3741 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3742 low = make_ssa_name (perm_dest, perm_stmt);
3743 GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
3744 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3745 VEC_replace (tree, *result_chain, 2*j+1, low);
3747 dr_chain = VEC_copy (tree, heap, *result_chain);
3749 return true;
3753 /* Function vectorizable_store.
3755 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3756 can be vectorized.
3757 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3758 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3759 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3761 bool
3762 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3764 tree scalar_dest;
3765 tree data_ref;
3766 tree op;
3767 tree vec_oprnd = NULL_TREE;
3768 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3769 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3770 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3771 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3772 enum machine_mode vec_mode;
3773 tree dummy;
3774 enum dr_alignment_support alignment_support_cheme;
3775 ssa_op_iter iter;
3776 def_operand_p def_p;
3777 tree def, def_stmt;
3778 enum vect_def_type dt;
3779 stmt_vec_info prev_stmt_info = NULL;
3780 tree dataref_ptr = NULL_TREE;
3781 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3782 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3783 int j;
3784 tree next_stmt, first_stmt;
3785 bool strided_store = false;
3786 unsigned int group_size, i;
3787 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3788 gcc_assert (ncopies >= 1);
3790 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3791 return false;
3793 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3794 return false;
3796 if (STMT_VINFO_LIVE_P (stmt_info))
3798 if (vect_print_dump_info (REPORT_DETAILS))
3799 fprintf (vect_dump, "value used after loop.");
3800 return false;
3803 /* Is vectorizable store? */
3805 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3806 return false;
3808 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3809 if (TREE_CODE (scalar_dest) != ARRAY_REF
3810 && TREE_CODE (scalar_dest) != INDIRECT_REF
3811 && !DR_GROUP_FIRST_DR (stmt_info))
3812 return false;
3814 op = GIMPLE_STMT_OPERAND (stmt, 1);
3815 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
3817 if (vect_print_dump_info (REPORT_DETAILS))
3818 fprintf (vect_dump, "use not simple.");
3819 return false;
3822 vec_mode = TYPE_MODE (vectype);
3823 /* FORNOW. In some cases can vectorize even if data-type not supported
3824 (e.g. - array initialization with 0). */
3825 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
3826 return false;
3828 if (!STMT_VINFO_DATA_REF (stmt_info))
3829 return false;
3831 if (DR_GROUP_FIRST_DR (stmt_info))
3833 strided_store = true;
3834 if (!vect_strided_store_supported (vectype))
3835 return false;
3838 if (!vec_stmt) /* transformation not required. */
3840 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3841 vect_model_store_cost (stmt_info, ncopies);
3842 return true;
3845 /** Transform. **/
3847 if (strided_store)
3849 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3850 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3851 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3853 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3855 /* We vectorize all the stmts of the interleaving group when we
3856 reach the last stmt in the group. */
3857 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3858 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)))
3860 *vec_stmt = NULL_TREE;
3861 return true;
3864 else
3866 first_stmt = stmt;
3867 first_dr = dr;
3868 group_size = 1;
3871 if (vect_print_dump_info (REPORT_DETAILS))
3872 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3874 dr_chain = VEC_alloc (tree, heap, group_size);
3875 oprnds = VEC_alloc (tree, heap, group_size);
3877 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
3878 gcc_assert (alignment_support_cheme);
3879 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
3881 /* In case the vectorization factor (VF) is bigger than the number
3882 of elements that we can fit in a vectype (nunits), we have to generate
3883 more than one vector stmt - i.e - we need to "unroll" the
3884 vector stmt by a factor VF/nunits. For more details see documentation in
3885 vect_get_vec_def_for_copy_stmt. */
3887 /* In case of interleaving (non-unit strided access):
3889 S1: &base + 2 = x2
3890 S2: &base = x0
3891 S3: &base + 1 = x1
3892 S4: &base + 3 = x3
3894 We create vectorized stores starting from base address (the access of the
3895 first stmt in the chain (S2 in the above example), when the last store stmt
3896 of the chain (S4) is reached:
3898 VS1: &base = vx2
3899 VS2: &base + vec_size*1 = vx0
3900 VS3: &base + vec_size*2 = vx1
3901 VS4: &base + vec_size*3 = vx3
3903 Then permutation statements are generated:
3905 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3906 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3909 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3910 (the order of the data-refs in the output of vect_permute_store_chain
3911 corresponds to the order of scalar stmts in the interleaving chain - see
3912 the documentation of vect_permute_store_chain()).
3914 In case of both multiple types and interleaving, above vector stores and
3915 permutation stmts are created for every copy. The result vector stmts are
3916 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3917 STMT_VINFO_RELATED_STMT for the next copies.
3920 prev_stmt_info = NULL;
3921 for (j = 0; j < ncopies; j++)
3923 tree new_stmt;
3924 tree ptr_incr;
3926 if (j == 0)
3928 /* For interleaved stores we collect vectorized defs for all the
3929 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then used
3930 as an input to vect_permute_store_chain(), and OPRNDS as an input
3931 to vect_get_vec_def_for_stmt_copy() for the next copy.
3932 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3933 OPRNDS are of size 1. */
3934 next_stmt = first_stmt;
3935 for (i = 0; i < group_size; i++)
3937 /* Since gaps are not supported for interleaved stores, GROUP_SIZE
3938 is the exact number of stmts in the chain. Therefore, NEXT_STMT
3939 can't be NULL_TREE. In case that there is no interleaving,
3940 GROUP_SIZE is 1, and only one iteration of the loop will be
3941 executed. */
3942 gcc_assert (next_stmt);
3943 op = GIMPLE_STMT_OPERAND (next_stmt, 1);
3944 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, NULL);
3945 VEC_quick_push(tree, dr_chain, vec_oprnd);
3946 VEC_quick_push(tree, oprnds, vec_oprnd);
3947 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3949 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, NULL_TREE,
3950 &dummy, &ptr_incr, false,
3951 TREE_TYPE (vec_oprnd));
3953 else
3955 /* For interleaved stores we created vectorized defs for all the
3956 defs stored in OPRNDS in the previous iteration (previous copy).
3957 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3958 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3959 next copy.
3960 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3961 OPRNDS are of size 1. */
3962 for (i = 0; i < group_size; i++)
3964 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt,
3965 VEC_index (tree, oprnds, i));
3966 VEC_replace(tree, dr_chain, i, vec_oprnd);
3967 VEC_replace(tree, oprnds, i, vec_oprnd);
3969 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
3972 if (strided_store)
3974 result_chain = VEC_alloc (tree, heap, group_size);
3975 /* Permute. */
3976 if (!vect_permute_store_chain (dr_chain, group_size, stmt, bsi,
3977 &result_chain))
3978 return false;
3981 next_stmt = first_stmt;
3982 for (i = 0; i < group_size; i++)
3984 /* For strided stores vectorized defs are interleaved in
3985 vect_permute_store_chain(). */
3986 if (strided_store)
3987 vec_oprnd = VEC_index(tree, result_chain, i);
3989 data_ref = build_fold_indirect_ref (dataref_ptr);
3990 /* Arguments are ready. Create the new vector stmt. */
3991 new_stmt = build_gimple_modify_stmt (data_ref, vec_oprnd);
3992 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3994 /* Set the VDEFs for the vector pointer. If this virtual def
3995 has a use outside the loop and a loop peel is performed
3996 then the def may be renamed by the peel. Mark it for
3997 renaming so the later use will also be renamed. */
3998 copy_virtual_operands (new_stmt, next_stmt);
3999 if (j == 0)
4001 /* The original store is deleted so the same SSA_NAMEs
4002 can be used. */
4003 FOR_EACH_SSA_TREE_OPERAND (def, next_stmt, iter, SSA_OP_VDEF)
4005 SSA_NAME_DEF_STMT (def) = new_stmt;
4006 mark_sym_for_renaming (SSA_NAME_VAR (def));
4009 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4011 else
4013 /* Create new names for all the definitions created by COPY and
4014 add replacement mappings for each new name. */
4015 FOR_EACH_SSA_DEF_OPERAND (def_p, new_stmt, iter, SSA_OP_VDEF)
4017 create_new_def_for (DEF_FROM_PTR (def_p), new_stmt, def_p);
4018 mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p)));
4021 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4024 prev_stmt_info = vinfo_for_stmt (new_stmt);
4025 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4026 if (!next_stmt)
4027 break;
4028 /* Bump the vector pointer. */
4029 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4033 return true;
4037 /* Function vect_setup_realignment
4039 This function is called when vectorizing an unaligned load using
4040 the dr_unaligned_software_pipeline scheme.
4041 This function generates the following code at the loop prolog:
4043 p = initial_addr;
4044 msq_init = *(floor(p)); # prolog load
4045 realignment_token = call target_builtin;
4046 loop:
4047 msq = phi (msq_init, ---)
4049 The code above sets up a new (vector) pointer, pointing to the first
4050 location accessed by STMT, and a "floor-aligned" load using that pointer.
4051 It also generates code to compute the "realignment-token" (if the relevant
4052 target hook was defined), and creates a phi-node at the loop-header bb
4053 whose arguments are the result of the prolog-load (created by this
4054 function) and the result of a load that takes place in the loop (to be
4055 created by the caller to this function).
4056 The caller to this function uses the phi-result (msq) to create the
4057 realignment code inside the loop, and sets up the missing phi argument,
4058 as follows:
4060 loop:
4061 msq = phi (msq_init, lsq)
4062 lsq = *(floor(p')); # load in loop
4063 result = realign_load (msq, lsq, realignment_token);
4065 Input:
4066 STMT - (scalar) load stmt to be vectorized. This load accesses
4067 a memory location that may be unaligned.
4068 BSI - place where new code is to be inserted.
4070 Output:
4071 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4072 target hook, if defined.
4073 Return value - the result of the loop-header phi node. */
4075 static tree
4076 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
4077 tree *realignment_token)
4079 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4080 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4081 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4082 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4083 edge pe = loop_preheader_edge (loop);
4084 tree scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4085 tree vec_dest;
4086 tree init_addr;
4087 tree inc;
4088 tree ptr;
4089 tree data_ref;
4090 tree new_stmt;
4091 basic_block new_bb;
4092 tree msq_init;
4093 tree new_temp;
4094 tree phi_stmt;
4095 tree msq;
4097 /* 1. Create msq_init = *(floor(p1)) in the loop preheader */
4098 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4099 ptr = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &init_addr, &inc, true,
4100 NULL_TREE);
4101 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
4102 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
4103 new_temp = make_ssa_name (vec_dest, new_stmt);
4104 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4105 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
4106 gcc_assert (!new_bb);
4107 msq_init = GIMPLE_STMT_OPERAND (new_stmt, 0);
4108 copy_virtual_operands (new_stmt, stmt);
4109 update_vuses_to_preheader (new_stmt, loop);
4111 /* 2. Create permutation mask, if required, in loop preheader. */
4112 if (targetm.vectorize.builtin_mask_for_load)
4114 tree builtin_decl;
4116 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4117 new_stmt = build_call_expr (builtin_decl, 1, init_addr);
4118 vec_dest = vect_create_destination_var (scalar_dest,
4119 TREE_TYPE (new_stmt));
4120 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
4121 new_temp = make_ssa_name (vec_dest, new_stmt);
4122 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4123 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
4124 gcc_assert (!new_bb);
4125 *realignment_token = GIMPLE_STMT_OPERAND (new_stmt, 0);
4127 /* The result of the CALL_EXPR to this builtin is determined from
4128 the value of the parameter and no global variables are touched
4129 which makes the builtin a "const" function. Requiring the
4130 builtin to have the "const" attribute makes it unnecessary
4131 to call mark_call_clobbered. */
4132 gcc_assert (TREE_READONLY (builtin_decl));
4135 /* 3. Create msq = phi <msq_init, lsq> in loop */
4136 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4137 msq = make_ssa_name (vec_dest, NULL_TREE);
4138 phi_stmt = create_phi_node (msq, loop->header);
4139 SSA_NAME_DEF_STMT (msq) = phi_stmt;
4140 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
4142 return msq;
4146 /* Function vect_strided_load_supported.
4148 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
4149 and FALSE otherwise. */
4151 static bool
4152 vect_strided_load_supported (tree vectype)
4154 optab perm_even_optab, perm_odd_optab;
4155 int mode;
4157 mode = (int) TYPE_MODE (vectype);
4159 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype);
4160 if (!perm_even_optab)
4162 if (vect_print_dump_info (REPORT_DETAILS))
4163 fprintf (vect_dump, "no optab for perm_even.");
4164 return false;
4167 if (perm_even_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4169 if (vect_print_dump_info (REPORT_DETAILS))
4170 fprintf (vect_dump, "perm_even op not supported by target.");
4171 return false;
4174 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
4175 if (!perm_odd_optab)
4177 if (vect_print_dump_info (REPORT_DETAILS))
4178 fprintf (vect_dump, "no optab for perm_odd.");
4179 return false;
4182 if (perm_odd_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4184 if (vect_print_dump_info (REPORT_DETAILS))
4185 fprintf (vect_dump, "perm_odd op not supported by target.");
4186 return false;
4188 return true;
4192 /* Function vect_permute_load_chain.
4194 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4195 a power of 2, generate extract_even/odd stmts to reorder the input data
4196 correctly. Return the final references for loads in RESULT_CHAIN.
4198 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4199 The input is 4 vectors each containing 8 elements. We assign a number to each
4200 element, the input sequence is:
4202 1st vec: 0 1 2 3 4 5 6 7
4203 2nd vec: 8 9 10 11 12 13 14 15
4204 3rd vec: 16 17 18 19 20 21 22 23
4205 4th vec: 24 25 26 27 28 29 30 31
4207 The output sequence should be:
4209 1st vec: 0 4 8 12 16 20 24 28
4210 2nd vec: 1 5 9 13 17 21 25 29
4211 3rd vec: 2 6 10 14 18 22 26 30
4212 4th vec: 3 7 11 15 19 23 27 31
4214 i.e., the first output vector should contain the first elements of each
4215 interleaving group, etc.
4217 We use extract_even/odd instructions to create such output. The input of each
4218 extract_even/odd operation is two vectors
4219 1st vec 2nd vec
4220 0 1 2 3 4 5 6 7
4222 and the output is the vector of extracted even/odd elements. The output of
4223 extract_even will be: 0 2 4 6
4224 and of extract_odd: 1 3 5 7
4227 The permutation is done in log LENGTH stages. In each stage extract_even and
4228 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
4229 order. In our example,
4231 E1: extract_even (1st vec, 2nd vec)
4232 E2: extract_odd (1st vec, 2nd vec)
4233 E3: extract_even (3rd vec, 4th vec)
4234 E4: extract_odd (3rd vec, 4th vec)
4236 The output for the first stage will be:
4238 E1: 0 2 4 6 8 10 12 14
4239 E2: 1 3 5 7 9 11 13 15
4240 E3: 16 18 20 22 24 26 28 30
4241 E4: 17 19 21 23 25 27 29 31
4243 In order to proceed and create the correct sequence for the next stage (or
4244 for the correct output, if the second stage is the last one, as in our
4245 example), we first put the output of extract_even operation and then the
4246 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4247 The input for the second stage is:
4249 1st vec (E1): 0 2 4 6 8 10 12 14
4250 2nd vec (E3): 16 18 20 22 24 26 28 30
4251 3rd vec (E2): 1 3 5 7 9 11 13 15
4252 4th vec (E4): 17 19 21 23 25 27 29 31
4254 The output of the second stage:
4256 E1: 0 4 8 12 16 20 24 28
4257 E2: 2 6 10 14 18 22 26 30
4258 E3: 1 5 9 13 17 21 25 29
4259 E4: 3 7 11 15 19 23 27 31
4261 And RESULT_CHAIN after reordering:
4263 1st vec (E1): 0 4 8 12 16 20 24 28
4264 2nd vec (E3): 1 5 9 13 17 21 25 29
4265 3rd vec (E2): 2 6 10 14 18 22 26 30
4266 4th vec (E4): 3 7 11 15 19 23 27 31. */
4268 static bool
4269 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4270 unsigned int length,
4271 tree stmt,
4272 block_stmt_iterator *bsi,
4273 VEC(tree,heap) **result_chain)
4275 tree perm_dest, perm_stmt, data_ref, first_vect, second_vect;
4276 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4277 tree tmp;
4278 int i;
4279 unsigned int j;
4281 /* Check that the operation is supported. */
4282 if (!vect_strided_load_supported (vectype))
4283 return false;
4285 *result_chain = VEC_copy (tree, heap, dr_chain);
4286 for (i = 0; i < exact_log2 (length); i++)
4288 for (j = 0; j < length; j +=2)
4290 first_vect = VEC_index (tree, dr_chain, j);
4291 second_vect = VEC_index (tree, dr_chain, j+1);
4293 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4294 perm_dest = create_tmp_var (vectype, "vect_perm_even");
4295 DECL_GIMPLE_REG_P (perm_dest) = 1;
4296 add_referenced_var (perm_dest);
4298 tmp = build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
4299 first_vect, second_vect);
4300 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4302 data_ref = make_ssa_name (perm_dest, perm_stmt);
4303 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
4304 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4305 mark_symbols_for_renaming (perm_stmt);
4307 VEC_replace (tree, *result_chain, j/2, data_ref);
4309 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4310 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4311 DECL_GIMPLE_REG_P (perm_dest) = 1;
4312 add_referenced_var (perm_dest);
4314 tmp = build2 (VEC_EXTRACT_ODD_EXPR, vectype,
4315 first_vect, second_vect);
4316 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4317 data_ref = make_ssa_name (perm_dest, perm_stmt);
4318 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
4319 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4320 mark_symbols_for_renaming (perm_stmt);
4322 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4324 dr_chain = VEC_copy (tree, heap, *result_chain);
4326 return true;
4330 /* Function vect_transform_strided_load.
4332 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4333 to perform their permutation and ascribe the result vectorized statements to
4334 the scalar statements.
4337 static bool
4338 vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
4339 block_stmt_iterator *bsi)
4341 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4342 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4343 tree next_stmt, new_stmt;
4344 VEC(tree,heap) *result_chain = NULL;
4345 unsigned int i, gap_count;
4346 tree tmp_data_ref;
4348 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4349 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4350 vectors, that are ready for vector computation. */
4351 result_chain = VEC_alloc (tree, heap, size);
4352 /* Permute. */
4353 if (!vect_permute_load_chain (dr_chain, size, stmt, bsi, &result_chain))
4354 return false;
4356 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4357 Since we scan the chain starting from it's first node, their order
4358 corresponds the order of data-refs in RESULT_CHAIN. */
4359 next_stmt = first_stmt;
4360 gap_count = 1;
4361 for (i = 0; VEC_iterate(tree, result_chain, i, tmp_data_ref); i++)
4363 if (!next_stmt)
4364 break;
4366 /* Skip the gaps. Loads created for the gaps will be removed by dead
4367 code elimination pass later.
4368 DR_GROUP_GAP is the number of steps in elements from the previous
4369 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4370 correspond to the gaps.
4372 if (gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4374 gap_count++;
4375 continue;
4378 while (next_stmt)
4380 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4381 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4382 copies, and we put the new vector statement in the first available
4383 RELATED_STMT. */
4384 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4385 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4386 else
4388 tree prev_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4389 tree rel_stmt = STMT_VINFO_RELATED_STMT (
4390 vinfo_for_stmt (prev_stmt));
4391 while (rel_stmt)
4393 prev_stmt = rel_stmt;
4394 rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4396 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
4398 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4399 gap_count = 1;
4400 /* If NEXT_STMT accesses the same DR as the previous statement,
4401 put the same TMP_DATA_REF as its vectorized statement; otherwise
4402 get the next data-ref from RESULT_CHAIN. */
4403 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4404 break;
4407 return true;
4411 /* vectorizable_load.
4413 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4414 can be vectorized.
4415 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4416 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4417 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4419 bool
4420 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
4422 tree scalar_dest;
4423 tree vec_dest = NULL;
4424 tree data_ref = NULL;
4425 tree op;
4426 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4427 stmt_vec_info prev_stmt_info;
4428 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4429 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4430 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4431 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4432 tree new_temp;
4433 int mode;
4434 tree new_stmt = NULL_TREE;
4435 tree dummy;
4436 enum dr_alignment_support alignment_support_cheme;
4437 tree dataref_ptr = NULL_TREE;
4438 tree ptr_incr;
4439 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4440 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4441 int i, j, group_size;
4442 tree msq = NULL_TREE, lsq;
4443 tree offset = NULL_TREE;
4444 tree realignment_token = NULL_TREE;
4445 tree phi_stmt = NULL_TREE;
4446 VEC(tree,heap) *dr_chain = NULL;
4447 bool strided_load = false;
4448 tree first_stmt;
4450 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4451 return false;
4453 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4454 return false;
4456 /* FORNOW: not yet supported. */
4457 if (STMT_VINFO_LIVE_P (stmt_info))
4459 if (vect_print_dump_info (REPORT_DETAILS))
4460 fprintf (vect_dump, "value used after loop.");
4461 return false;
4464 /* Is vectorizable load? */
4465 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4466 return false;
4468 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4469 if (TREE_CODE (scalar_dest) != SSA_NAME)
4470 return false;
4472 op = GIMPLE_STMT_OPERAND (stmt, 1);
4473 if (TREE_CODE (op) != ARRAY_REF
4474 && TREE_CODE (op) != INDIRECT_REF
4475 && !DR_GROUP_FIRST_DR (stmt_info))
4476 return false;
4478 if (!STMT_VINFO_DATA_REF (stmt_info))
4479 return false;
4481 mode = (int) TYPE_MODE (vectype);
4483 /* FORNOW. In some cases can vectorize even if data-type not supported
4484 (e.g. - data copies). */
4485 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4487 if (vect_print_dump_info (REPORT_DETAILS))
4488 fprintf (vect_dump, "Aligned load, but unsupported type.");
4489 return false;
4492 /* Check if the load is a part of an interleaving chain. */
4493 if (DR_GROUP_FIRST_DR (stmt_info))
4495 strided_load = true;
4497 /* Check if interleaving is supported. */
4498 if (!vect_strided_load_supported (vectype))
4499 return false;
4502 if (!vec_stmt) /* transformation not required. */
4504 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4505 vect_model_load_cost (stmt_info, ncopies);
4506 return true;
4509 if (vect_print_dump_info (REPORT_DETAILS))
4510 fprintf (vect_dump, "transform load.");
4512 /** Transform. **/
4514 if (strided_load)
4516 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4517 /* Check if the chain of loads is already vectorized. */
4518 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4520 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4521 return true;
4523 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4524 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4525 dr_chain = VEC_alloc (tree, heap, group_size);
4527 else
4529 first_stmt = stmt;
4530 first_dr = dr;
4531 group_size = 1;
4534 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
4535 gcc_assert (alignment_support_cheme);
4538 /* In case the vectorization factor (VF) is bigger than the number
4539 of elements that we can fit in a vectype (nunits), we have to generate
4540 more than one vector stmt - i.e - we need to "unroll" the
4541 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4542 from one copy of the vector stmt to the next, in the field
4543 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4544 stages to find the correct vector defs to be used when vectorizing
4545 stmts that use the defs of the current stmt. The example below illustrates
4546 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
4547 4 vectorized stmts):
4549 before vectorization:
4550 RELATED_STMT VEC_STMT
4551 S1: x = memref - -
4552 S2: z = x + 1 - -
4554 step 1: vectorize stmt S1:
4555 We first create the vector stmt VS1_0, and, as usual, record a
4556 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4557 Next, we create the vector stmt VS1_1, and record a pointer to
4558 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4559 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4560 stmts and pointers:
4561 RELATED_STMT VEC_STMT
4562 VS1_0: vx0 = memref0 VS1_1 -
4563 VS1_1: vx1 = memref1 VS1_2 -
4564 VS1_2: vx2 = memref2 VS1_3 -
4565 VS1_3: vx3 = memref3 - -
4566 S1: x = load - VS1_0
4567 S2: z = x + 1 - -
4569 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4570 information we recorded in RELATED_STMT field is used to vectorize
4571 stmt S2. */
4573 /* In case of interleaving (non-unit strided access):
4575 S1: x2 = &base + 2
4576 S2: x0 = &base
4577 S3: x1 = &base + 1
4578 S4: x3 = &base + 3
4580 Vectorized loads are created in the order of memory accesses
4581 starting from the access of the first stmt of the chain:
4583 VS1: vx0 = &base
4584 VS2: vx1 = &base + vec_size*1
4585 VS3: vx3 = &base + vec_size*2
4586 VS4: vx4 = &base + vec_size*3
4588 Then permutation statements are generated:
4590 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4591 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4594 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4595 (the order of the data-refs in the output of vect_permute_load_chain
4596 corresponds to the order of scalar stmts in the interleaving chain - see
4597 the documentation of vect_permute_load_chain()).
4598 The generation of permutation stmts and recording them in
4599 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4601 In case of both multiple types and interleaving, the vector loads and
4602 permutation stmts above are created for every copy. The result vector stmts
4603 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4604 STMT_VINFO_RELATED_STMT for the next copies. */
4606 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4607 on a target that supports unaligned accesses (dr_unaligned_supported)
4608 we generate the following code:
4609 p = initial_addr;
4610 indx = 0;
4611 loop {
4612 p = p + indx * vectype_size;
4613 vec_dest = *(p);
4614 indx = indx + 1;
4617 Otherwise, the data reference is potentially unaligned on a target that
4618 does not support unaligned accesses (dr_unaligned_software_pipeline) -
4619 then generate the following code, in which the data in each iteration is
4620 obtained by two vector loads, one from the previous iteration, and one
4621 from the current iteration:
4622 p1 = initial_addr;
4623 msq_init = *(floor(p1))
4624 p2 = initial_addr + VS - 1;
4625 realignment_token = call target_builtin;
4626 indx = 0;
4627 loop {
4628 p2 = p2 + indx * vectype_size
4629 lsq = *(floor(p2))
4630 vec_dest = realign_load (msq, lsq, realignment_token)
4631 indx = indx + 1;
4632 msq = lsq;
4633 } */
4635 if (alignment_support_cheme == dr_unaligned_software_pipeline)
4637 msq = vect_setup_realignment (first_stmt, bsi, &realignment_token);
4638 phi_stmt = SSA_NAME_DEF_STMT (msq);
4639 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4642 prev_stmt_info = NULL;
4643 for (j = 0; j < ncopies; j++)
4645 /* 1. Create the vector pointer update chain. */
4646 if (j == 0)
4647 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, offset, &dummy,
4648 &ptr_incr, false, NULL_TREE);
4649 else
4650 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4652 for (i = 0; i < group_size; i++)
4654 /* 2. Create the vector-load in the loop. */
4655 switch (alignment_support_cheme)
4657 case dr_aligned:
4658 gcc_assert (aligned_access_p (first_dr));
4659 data_ref = build_fold_indirect_ref (dataref_ptr);
4660 break;
4661 case dr_unaligned_supported:
4663 int mis = DR_MISALIGNMENT (first_dr);
4664 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
4666 gcc_assert (!aligned_access_p (first_dr));
4667 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
4668 data_ref =
4669 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
4670 break;
4672 case dr_unaligned_software_pipeline:
4673 gcc_assert (!aligned_access_p (first_dr));
4674 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
4675 break;
4676 default:
4677 gcc_unreachable ();
4679 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4680 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
4681 new_temp = make_ssa_name (vec_dest, new_stmt);
4682 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4683 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4684 copy_virtual_operands (new_stmt, stmt);
4685 mark_symbols_for_renaming (new_stmt);
4687 /* 3. Handle explicit realignment if necessary/supported. */
4688 if (alignment_support_cheme == dr_unaligned_software_pipeline)
4690 /* Create in loop:
4691 <vec_dest = realign_load (msq, lsq, realignment_token)> */
4692 lsq = GIMPLE_STMT_OPERAND (new_stmt, 0);
4693 if (!realignment_token)
4694 realignment_token = dataref_ptr;
4695 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4696 new_stmt =
4697 build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, realignment_token);
4698 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
4699 new_temp = make_ssa_name (vec_dest, new_stmt);
4700 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4701 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4702 if (i == group_size - 1 && j == ncopies - 1)
4703 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
4704 msq = lsq;
4706 if (strided_load)
4707 VEC_quick_push (tree, dr_chain, new_temp);
4708 if (i < group_size - 1)
4709 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4712 if (strided_load)
4714 if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
4715 return false;
4716 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4717 dr_chain = VEC_alloc (tree, heap, group_size);
4719 else
4721 if (j == 0)
4722 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4723 else
4724 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4725 prev_stmt_info = vinfo_for_stmt (new_stmt);
4729 return true;
4733 /* Function vectorizable_live_operation.
4735 STMT computes a value that is used outside the loop. Check if
4736 it can be supported. */
4738 bool
4739 vectorizable_live_operation (tree stmt,
4740 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
4741 tree *vec_stmt ATTRIBUTE_UNUSED)
4743 tree operation;
4744 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4745 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4746 int i;
4747 int op_type;
4748 tree op;
4749 tree def, def_stmt;
4750 enum vect_def_type dt;
4752 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
4754 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
4755 return false;
4757 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4758 return false;
4760 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4761 return false;
4763 operation = GIMPLE_STMT_OPERAND (stmt, 1);
4764 op_type = TREE_OPERAND_LENGTH (operation);
4766 /* FORNOW: support only if all uses are invariant. This means
4767 that the scalar operations can remain in place, unvectorized.
4768 The original last scalar value that they compute will be used. */
4770 for (i = 0; i < op_type; i++)
4772 op = TREE_OPERAND (operation, i);
4773 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
4775 if (vect_print_dump_info (REPORT_DETAILS))
4776 fprintf (vect_dump, "use not simple.");
4777 return false;
4780 if (dt != vect_invariant_def && dt != vect_constant_def)
4781 return false;
4784 /* No transformation is required for the cases we currently support. */
4785 return true;
4789 /* Function vect_is_simple_cond.
4791 Input:
4792 LOOP - the loop that is being vectorized.
4793 COND - Condition that is checked for simple use.
4795 Returns whether a COND can be vectorized. Checks whether
4796 condition operands are supportable using vec_is_simple_use. */
4798 static bool
4799 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
4801 tree lhs, rhs;
4802 tree def;
4803 enum vect_def_type dt;
4805 if (!COMPARISON_CLASS_P (cond))
4806 return false;
4808 lhs = TREE_OPERAND (cond, 0);
4809 rhs = TREE_OPERAND (cond, 1);
4811 if (TREE_CODE (lhs) == SSA_NAME)
4813 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4814 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
4815 return false;
4817 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
4818 return false;
4820 if (TREE_CODE (rhs) == SSA_NAME)
4822 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4823 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
4824 return false;
4826 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
4827 return false;
4829 return true;
4832 /* vectorizable_condition.
4834 Check if STMT is conditional modify expression that can be vectorized.
4835 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4836 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4837 at BSI.
4839 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4841 bool
4842 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
4844 tree scalar_dest = NULL_TREE;
4845 tree vec_dest = NULL_TREE;
4846 tree op = NULL_TREE;
4847 tree cond_expr, then_clause, else_clause;
4848 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4849 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4850 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
4851 tree vec_compare, vec_cond_expr;
4852 tree new_temp;
4853 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4854 enum machine_mode vec_mode;
4855 tree def;
4856 enum vect_def_type dt;
4857 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4858 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4860 gcc_assert (ncopies >= 1);
4861 if (ncopies > 1)
4862 return false; /* FORNOW */
4864 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4865 return false;
4867 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4868 return false;
4870 /* FORNOW: not yet supported. */
4871 if (STMT_VINFO_LIVE_P (stmt_info))
4873 if (vect_print_dump_info (REPORT_DETAILS))
4874 fprintf (vect_dump, "value used after loop.");
4875 return false;
4878 /* Is vectorizable conditional operation? */
4879 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4880 return false;
4882 op = GIMPLE_STMT_OPERAND (stmt, 1);
4884 if (TREE_CODE (op) != COND_EXPR)
4885 return false;
4887 cond_expr = TREE_OPERAND (op, 0);
4888 then_clause = TREE_OPERAND (op, 1);
4889 else_clause = TREE_OPERAND (op, 2);
4891 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
4892 return false;
4894 /* We do not handle two different vector types for the condition
4895 and the values. */
4896 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
4897 return false;
4899 if (TREE_CODE (then_clause) == SSA_NAME)
4901 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4902 if (!vect_is_simple_use (then_clause, loop_vinfo,
4903 &then_def_stmt, &def, &dt))
4904 return false;
4906 else if (TREE_CODE (then_clause) != INTEGER_CST
4907 && TREE_CODE (then_clause) != REAL_CST)
4908 return false;
4910 if (TREE_CODE (else_clause) == SSA_NAME)
4912 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4913 if (!vect_is_simple_use (else_clause, loop_vinfo,
4914 &else_def_stmt, &def, &dt))
4915 return false;
4917 else if (TREE_CODE (else_clause) != INTEGER_CST
4918 && TREE_CODE (else_clause) != REAL_CST)
4919 return false;
4922 vec_mode = TYPE_MODE (vectype);
4924 if (!vec_stmt)
4926 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4927 return expand_vec_cond_expr_p (op, vec_mode);
4930 /* Transform */
4932 /* Handle def. */
4933 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4934 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4936 /* Handle cond expr. */
4937 vec_cond_lhs =
4938 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
4939 vec_cond_rhs =
4940 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
4941 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
4942 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
4944 /* Arguments are ready. create the new vector stmt. */
4945 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4946 vec_cond_lhs, vec_cond_rhs);
4947 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4948 vec_compare, vec_then_clause, vec_else_clause);
4950 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_cond_expr);
4951 new_temp = make_ssa_name (vec_dest, *vec_stmt);
4952 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
4953 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
4955 return true;
4958 /* Function vect_transform_stmt.
4960 Create a vectorized stmt to replace STMT, and insert it at BSI. */
4962 bool
4963 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store)
4965 bool is_store = false;
4966 tree vec_stmt = NULL_TREE;
4967 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4968 tree orig_stmt_in_pattern;
4969 bool done;
4971 switch (STMT_VINFO_TYPE (stmt_info))
4973 case type_demotion_vec_info_type:
4974 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
4975 gcc_assert (done);
4976 break;
4978 case type_promotion_vec_info_type:
4979 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
4980 gcc_assert (done);
4981 break;
4983 case type_conversion_vec_info_type:
4984 done = vectorizable_conversion (stmt, bsi, &vec_stmt);
4985 gcc_assert (done);
4986 break;
4988 case induc_vec_info_type:
4989 done = vectorizable_induction (stmt, bsi, &vec_stmt);
4990 gcc_assert (done);
4991 break;
4993 case op_vec_info_type:
4994 done = vectorizable_operation (stmt, bsi, &vec_stmt);
4995 gcc_assert (done);
4996 break;
4998 case assignment_vec_info_type:
4999 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
5000 gcc_assert (done);
5001 break;
5003 case load_vec_info_type:
5004 done = vectorizable_load (stmt, bsi, &vec_stmt);
5005 gcc_assert (done);
5006 break;
5008 case store_vec_info_type:
5009 done = vectorizable_store (stmt, bsi, &vec_stmt);
5010 gcc_assert (done);
5011 if (DR_GROUP_FIRST_DR (stmt_info))
5013 /* In case of interleaving, the whole chain is vectorized when the
5014 last store in the chain is reached. Store stmts before the last
5015 one are skipped, and there vec_stmt_info shouldn't be freed
5016 meanwhile. */
5017 *strided_store = true;
5018 if (STMT_VINFO_VEC_STMT (stmt_info))
5019 is_store = true;
5021 else
5022 is_store = true;
5023 break;
5025 case condition_vec_info_type:
5026 done = vectorizable_condition (stmt, bsi, &vec_stmt);
5027 gcc_assert (done);
5028 break;
5030 case call_vec_info_type:
5031 done = vectorizable_call (stmt, bsi, &vec_stmt);
5032 break;
5034 case reduc_vec_info_type:
5035 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
5036 gcc_assert (done);
5037 break;
5039 default:
5040 if (!STMT_VINFO_LIVE_P (stmt_info))
5042 if (vect_print_dump_info (REPORT_DETAILS))
5043 fprintf (vect_dump, "stmt not supported.");
5044 gcc_unreachable ();
5048 if (STMT_VINFO_LIVE_P (stmt_info)
5049 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5051 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
5052 gcc_assert (done);
5055 if (vec_stmt)
5057 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5058 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
5059 if (orig_stmt_in_pattern)
5061 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
5062 /* STMT was inserted by the vectorizer to replace a computation idiom.
5063 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
5064 computed this idiom. We need to record a pointer to VEC_STMT in
5065 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
5066 documentation of vect_pattern_recog. */
5067 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
5069 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
5070 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
5075 return is_store;
5079 /* This function builds ni_name = number of iterations loop executes
5080 on the loop preheader. */
5082 static tree
5083 vect_build_loop_niters (loop_vec_info loop_vinfo)
5085 tree ni_name, stmt, var;
5086 edge pe;
5087 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5088 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
5090 var = create_tmp_var (TREE_TYPE (ni), "niters");
5091 add_referenced_var (var);
5092 ni_name = force_gimple_operand (ni, &stmt, false, var);
5094 pe = loop_preheader_edge (loop);
5095 if (stmt)
5097 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5098 gcc_assert (!new_bb);
5101 return ni_name;
5105 /* This function generates the following statements:
5107 ni_name = number of iterations loop executes
5108 ratio = ni_name / vf
5109 ratio_mult_vf_name = ratio * vf
5111 and places them at the loop preheader edge. */
5113 static void
5114 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
5115 tree *ni_name_ptr,
5116 tree *ratio_mult_vf_name_ptr,
5117 tree *ratio_name_ptr)
5120 edge pe;
5121 basic_block new_bb;
5122 tree stmt, ni_name;
5123 tree var;
5124 tree ratio_name;
5125 tree ratio_mult_vf_name;
5126 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5127 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
5128 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5129 tree log_vf;
5131 pe = loop_preheader_edge (loop);
5133 /* Generate temporary variable that contains
5134 number of iterations loop executes. */
5136 ni_name = vect_build_loop_niters (loop_vinfo);
5137 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
5139 /* Create: ratio = ni >> log2(vf) */
5141 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
5142 if (!is_gimple_val (ratio_name))
5144 var = create_tmp_var (TREE_TYPE (ni), "bnd");
5145 add_referenced_var (var);
5147 ratio_name = force_gimple_operand (ratio_name, &stmt, true, var);
5148 pe = loop_preheader_edge (loop);
5149 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5150 gcc_assert (!new_bb);
5153 /* Create: ratio_mult_vf = ratio << log2 (vf). */
5155 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
5156 ratio_name, log_vf);
5157 if (!is_gimple_val (ratio_mult_vf_name))
5159 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
5160 add_referenced_var (var);
5162 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmt,
5163 true, var);
5164 pe = loop_preheader_edge (loop);
5165 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5166 gcc_assert (!new_bb);
5169 *ni_name_ptr = ni_name;
5170 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
5171 *ratio_name_ptr = ratio_name;
5173 return;
5177 /* Function update_vuses_to_preheader.
5179 Input:
5180 STMT - a statement with potential VUSEs.
5181 LOOP - the loop whose preheader will contain STMT.
5183 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
5184 appears to be defined in a VDEF in another statement in a loop.
5185 One such case is when the VUSE is at the dereference of a __restricted__
5186 pointer in a load and the VDEF is at the dereference of a different
5187 __restricted__ pointer in a store. Vectorization may result in
5188 copy_virtual_uses being called to copy the problematic VUSE to a new
5189 statement that is being inserted in the loop preheader. This procedure
5190 is called to change the SSA_NAME in the new statement's VUSE from the
5191 SSA_NAME updated in the loop to the related SSA_NAME available on the
5192 path entering the loop.
5194 When this function is called, we have the following situation:
5196 # vuse <name1>
5197 S1: vload
5198 do {
5199 # name1 = phi < name0 , name2>
5201 # vuse <name1>
5202 S2: vload
5204 # name2 = vdef <name1>
5205 S3: vstore
5207 }while...
5209 Stmt S1 was created in the loop preheader block as part of misaligned-load
5210 handling. This function fixes the name of the vuse of S1 from 'name1' to
5211 'name0'. */
5213 static void
5214 update_vuses_to_preheader (tree stmt, struct loop *loop)
5216 basic_block header_bb = loop->header;
5217 edge preheader_e = loop_preheader_edge (loop);
5218 ssa_op_iter iter;
5219 use_operand_p use_p;
5221 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
5223 tree ssa_name = USE_FROM_PTR (use_p);
5224 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
5225 tree name_var = SSA_NAME_VAR (ssa_name);
5226 basic_block bb = bb_for_stmt (def_stmt);
5228 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
5229 if (!IS_EMPTY_STMT (def_stmt)
5230 && flow_bb_inside_loop_p (loop, bb))
5232 /* If the block containing the statement defining the SSA_NAME
5233 is in the loop then it's necessary to find the definition
5234 outside the loop using the PHI nodes of the header. */
5235 tree phi;
5236 bool updated = false;
5238 for (phi = phi_nodes (header_bb); phi; phi = PHI_CHAIN (phi))
5240 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
5242 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
5243 updated = true;
5244 break;
5247 gcc_assert (updated);
5253 /* Function vect_update_ivs_after_vectorizer.
5255 "Advance" the induction variables of LOOP to the value they should take
5256 after the execution of LOOP. This is currently necessary because the
5257 vectorizer does not handle induction variables that are used after the
5258 loop. Such a situation occurs when the last iterations of LOOP are
5259 peeled, because:
5260 1. We introduced new uses after LOOP for IVs that were not originally used
5261 after LOOP: the IVs of LOOP are now used by an epilog loop.
5262 2. LOOP is going to be vectorized; this means that it will iterate N/VF
5263 times, whereas the loop IVs should be bumped N times.
5265 Input:
5266 - LOOP - a loop that is going to be vectorized. The last few iterations
5267 of LOOP were peeled.
5268 - NITERS - the number of iterations that LOOP executes (before it is
5269 vectorized). i.e, the number of times the ivs should be bumped.
5270 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
5271 coming out from LOOP on which there are uses of the LOOP ivs
5272 (this is the path from LOOP->exit to epilog_loop->preheader).
5274 The new definitions of the ivs are placed in LOOP->exit.
5275 The phi args associated with the edge UPDATE_E in the bb
5276 UPDATE_E->dest are updated accordingly.
5278 Assumption 1: Like the rest of the vectorizer, this function assumes
5279 a single loop exit that has a single predecessor.
5281 Assumption 2: The phi nodes in the LOOP header and in update_bb are
5282 organized in the same order.
5284 Assumption 3: The access function of the ivs is simple enough (see
5285 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
5287 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
5288 coming out of LOOP on which the ivs of LOOP are used (this is the path
5289 that leads to the epilog loop; other paths skip the epilog loop). This
5290 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
5291 needs to have its phis updated.
5294 static void
5295 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
5296 edge update_e)
5298 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5299 basic_block exit_bb = single_exit (loop)->dest;
5300 tree phi, phi1;
5301 basic_block update_bb = update_e->dest;
5303 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
5305 /* Make sure there exists a single-predecessor exit bb: */
5306 gcc_assert (single_pred_p (exit_bb));
5308 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
5309 phi && phi1;
5310 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
5312 tree access_fn = NULL;
5313 tree evolution_part;
5314 tree init_expr;
5315 tree step_expr;
5316 tree var, stmt, ni, ni_name;
5317 block_stmt_iterator last_bsi;
5319 if (vect_print_dump_info (REPORT_DETAILS))
5321 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
5322 print_generic_expr (vect_dump, phi, TDF_SLIM);
5325 /* Skip virtual phi's. */
5326 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5328 if (vect_print_dump_info (REPORT_DETAILS))
5329 fprintf (vect_dump, "virtual phi. skip.");
5330 continue;
5333 /* Skip reduction phis. */
5334 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
5336 if (vect_print_dump_info (REPORT_DETAILS))
5337 fprintf (vect_dump, "reduc phi. skip.");
5338 continue;
5341 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
5342 gcc_assert (access_fn);
5343 evolution_part =
5344 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
5345 gcc_assert (evolution_part != NULL_TREE);
5347 /* FORNOW: We do not support IVs whose evolution function is a polynomial
5348 of degree >= 2 or exponential. */
5349 gcc_assert (!tree_is_chrec (evolution_part));
5351 step_expr = evolution_part;
5352 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
5353 loop->num));
5355 if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
5356 ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
5357 init_expr,
5358 fold_convert (sizetype,
5359 fold_build2 (MULT_EXPR, TREE_TYPE (niters),
5360 niters, step_expr)));
5361 else
5362 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
5363 fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
5364 fold_convert (TREE_TYPE (init_expr),
5365 niters),
5366 step_expr),
5367 init_expr);
5371 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
5372 add_referenced_var (var);
5374 ni_name = force_gimple_operand (ni, &stmt, false, var);
5376 /* Insert stmt into exit_bb. */
5377 last_bsi = bsi_last (exit_bb);
5378 if (stmt)
5379 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
5381 /* Fix phi expressions in the successor bb. */
5382 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
5387 /* Function vect_do_peeling_for_loop_bound
5389 Peel the last iterations of the loop represented by LOOP_VINFO.
5390 The peeled iterations form a new epilog loop. Given that the loop now
5391 iterates NITERS times, the new epilog loop iterates
5392 NITERS % VECTORIZATION_FACTOR times.
5394 The original loop will later be made to iterate
5395 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
5397 static void
5398 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
5400 tree ni_name, ratio_mult_vf_name;
5401 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5402 struct loop *new_loop;
5403 edge update_e;
5404 basic_block preheader;
5405 int loop_num;
5406 unsigned int th;
5407 int min_scalar_loop_bound;
5408 int min_profitable_iters;
5410 if (vect_print_dump_info (REPORT_DETAILS))
5411 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
5413 initialize_original_copy_tables ();
5415 /* Generate the following variables on the preheader of original loop:
5417 ni_name = number of iteration the original loop executes
5418 ratio = ni_name / vf
5419 ratio_mult_vf_name = ratio * vf */
5420 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
5421 &ratio_mult_vf_name, ratio);
5423 loop_num = loop->num;
5425 /* Analyze cost to set threshhold for vectorized loop. */
5426 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
5428 min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
5429 * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5431 /* Use the cost model only if it is more conservative than user specified
5432 threshold. */
5434 th = (unsigned) min_scalar_loop_bound;
5435 if (min_profitable_iters
5436 && (!min_scalar_loop_bound
5437 || min_profitable_iters > min_scalar_loop_bound))
5438 th = (unsigned) min_profitable_iters;
5440 if (vect_print_dump_info (REPORT_DETAILS))
5441 fprintf (vect_dump, "vectorization may not be profitable.");
5443 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
5444 ratio_mult_vf_name, ni_name, false,
5445 th);
5446 gcc_assert (new_loop);
5447 gcc_assert (loop_num == loop->num);
5448 #ifdef ENABLE_CHECKING
5449 slpeel_verify_cfg_after_peeling (loop, new_loop);
5450 #endif
5452 /* A guard that controls whether the new_loop is to be executed or skipped
5453 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
5454 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
5455 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
5456 is on the path where the LOOP IVs are used and need to be updated. */
5458 preheader = loop_preheader_edge (new_loop)->src;
5459 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
5460 update_e = EDGE_PRED (preheader, 0);
5461 else
5462 update_e = EDGE_PRED (preheader, 1);
5464 /* Update IVs of original loop as if they were advanced
5465 by ratio_mult_vf_name steps. */
5466 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
5468 /* After peeling we have to reset scalar evolution analyzer. */
5469 scev_reset ();
5471 free_original_copy_tables ();
5475 /* Function vect_gen_niters_for_prolog_loop
5477 Set the number of iterations for the loop represented by LOOP_VINFO
5478 to the minimum between LOOP_NITERS (the original iteration count of the loop)
5479 and the misalignment of DR - the data reference recorded in
5480 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
5481 this loop, the data reference DR will refer to an aligned location.
5483 The following computation is generated:
5485 If the misalignment of DR is known at compile time:
5486 addr_mis = int mis = DR_MISALIGNMENT (dr);
5487 Else, compute address misalignment in bytes:
5488 addr_mis = addr & (vectype_size - 1)
5490 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
5492 (elem_size = element type size; an element is the scalar element
5493 whose type is the inner type of the vectype)
5495 For interleaving,
5497 prolog_niters = min ( LOOP_NITERS ,
5498 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
5499 where group_size is the size of the interleaved group.
5501 The above formulas assume that VF == number of elements in the vector. This
5502 may not hold when there are multiple-types in the loop.
5503 In this case, for some data-references in the loop the VF does not represent
5504 the number of elements that fit in the vector. Therefore, instead of VF we
5505 use TYPE_VECTOR_SUBPARTS. */
5507 static tree
5508 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
5510 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
5511 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5512 tree var, stmt;
5513 tree iters, iters_name;
5514 edge pe;
5515 basic_block new_bb;
5516 tree dr_stmt = DR_STMT (dr);
5517 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
5518 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5519 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
5520 tree niters_type = TREE_TYPE (loop_niters);
5521 int group_size = 1;
5522 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
5523 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
5525 if (DR_GROUP_FIRST_DR (stmt_info))
5527 /* For interleaved access element size must be multiplied by the size of
5528 the interleaved group. */
5529 group_size = DR_GROUP_SIZE (vinfo_for_stmt (
5530 DR_GROUP_FIRST_DR (stmt_info)));
5531 element_size *= group_size;
5534 pe = loop_preheader_edge (loop);
5536 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
5538 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
5539 int elem_misalign = byte_misalign / element_size;
5541 if (vect_print_dump_info (REPORT_DETAILS))
5542 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
5543 iters = build_int_cst (niters_type,
5544 (nelements - elem_misalign)&(nelements/group_size-1));
5546 else
5548 tree new_stmts = NULL_TREE;
5549 tree start_addr =
5550 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
5551 tree ptr_type = TREE_TYPE (start_addr);
5552 tree size = TYPE_SIZE (ptr_type);
5553 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
5554 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
5555 tree elem_size_log =
5556 build_int_cst (type, exact_log2 (vectype_align/nelements));
5557 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
5558 tree nelements_tree = build_int_cst (type, nelements);
5559 tree byte_misalign;
5560 tree elem_misalign;
5562 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
5563 gcc_assert (!new_bb);
5565 /* Create: byte_misalign = addr & (vectype_size - 1) */
5566 byte_misalign =
5567 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
5569 /* Create: elem_misalign = byte_misalign / element_size */
5570 elem_misalign =
5571 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
5573 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
5574 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
5575 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
5576 iters = fold_convert (niters_type, iters);
5579 /* Create: prolog_loop_niters = min (iters, loop_niters) */
5580 /* If the loop bound is known at compile time we already verified that it is
5581 greater than vf; since the misalignment ('iters') is at most vf, there's
5582 no need to generate the MIN_EXPR in this case. */
5583 if (TREE_CODE (loop_niters) != INTEGER_CST)
5584 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
5586 if (vect_print_dump_info (REPORT_DETAILS))
5588 fprintf (vect_dump, "niters for prolog loop: ");
5589 print_generic_expr (vect_dump, iters, TDF_SLIM);
5592 var = create_tmp_var (niters_type, "prolog_loop_niters");
5593 add_referenced_var (var);
5594 iters_name = force_gimple_operand (iters, &stmt, false, var);
5596 /* Insert stmt on loop preheader edge. */
5597 if (stmt)
5599 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5600 gcc_assert (!new_bb);
5603 return iters_name;
5607 /* Function vect_update_init_of_dr
5609 NITERS iterations were peeled from LOOP. DR represents a data reference
5610 in LOOP. This function updates the information recorded in DR to
5611 account for the fact that the first NITERS iterations had already been
5612 executed. Specifically, it updates the OFFSET field of DR. */
5614 static void
5615 vect_update_init_of_dr (struct data_reference *dr, tree niters)
5617 tree offset = DR_OFFSET (dr);
5619 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
5620 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
5621 DR_OFFSET (dr) = offset;
5625 /* Function vect_update_inits_of_drs
5627 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
5628 This function updates the information recorded for the data references in
5629 the loop to account for the fact that the first NITERS iterations had
5630 already been executed. Specifically, it updates the initial_condition of
5631 the access_function of all the data_references in the loop. */
5633 static void
5634 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
5636 unsigned int i;
5637 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
5638 struct data_reference *dr;
5640 if (vect_print_dump_info (REPORT_DETAILS))
5641 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
5643 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
5644 vect_update_init_of_dr (dr, niters);
5648 /* Function vect_do_peeling_for_alignment
5650 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
5651 'niters' is set to the misalignment of one of the data references in the
5652 loop, thereby forcing it to refer to an aligned location at the beginning
5653 of the execution of this loop. The data reference for which we are
5654 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
5656 static void
5657 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
5659 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5660 tree niters_of_prolog_loop, ni_name;
5661 tree n_iters;
5662 struct loop *new_loop;
5664 if (vect_print_dump_info (REPORT_DETAILS))
5665 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
5667 initialize_original_copy_tables ();
5669 ni_name = vect_build_loop_niters (loop_vinfo);
5670 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
5672 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
5673 new_loop =
5674 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
5675 niters_of_prolog_loop, ni_name, true, 0);
5676 gcc_assert (new_loop);
5677 #ifdef ENABLE_CHECKING
5678 slpeel_verify_cfg_after_peeling (new_loop, loop);
5679 #endif
5681 /* Update number of times loop executes. */
5682 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
5683 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
5684 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
5686 /* Update the init conditions of the access functions of all data refs. */
5687 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
5689 /* After peeling we have to reset scalar evolution analyzer. */
5690 scev_reset ();
5692 free_original_copy_tables ();
5696 /* Function vect_create_cond_for_align_checks.
5698 Create a conditional expression that represents the alignment checks for
5699 all of data references (array element references) whose alignment must be
5700 checked at runtime.
5702 Input:
5703 LOOP_VINFO - two fields of the loop information are used.
5704 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
5705 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
5707 Output:
5708 COND_EXPR_STMT_LIST - statements needed to construct the conditional
5709 expression.
5710 The returned value is the conditional expression to be used in the if
5711 statement that controls which version of the loop gets executed at runtime.
5713 The algorithm makes two assumptions:
5714 1) The number of bytes "n" in a vector is a power of 2.
5715 2) An address "a" is aligned if a%n is zero and that this
5716 test can be done as a&(n-1) == 0. For example, for 16
5717 byte vectors the test is a&0xf == 0. */
5719 static tree
5720 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
5721 tree *cond_expr_stmt_list)
5723 VEC(tree,heap) *may_misalign_stmts
5724 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
5725 tree ref_stmt, tmp;
5726 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
5727 tree mask_cst;
5728 unsigned int i;
5729 tree psize;
5730 tree int_ptrsize_type;
5731 char tmp_name[20];
5732 tree or_tmp_name = NULL_TREE;
5733 tree and_tmp, and_tmp_name, and_stmt;
5734 tree ptrsize_zero;
5736 /* Check that mask is one less than a power of 2, i.e., mask is
5737 all zeros followed by all ones. */
5738 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
5740 /* CHECKME: what is the best integer or unsigned type to use to hold a
5741 cast from a pointer value? */
5742 psize = TYPE_SIZE (ptr_type_node);
5743 int_ptrsize_type
5744 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
5746 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
5747 of the first vector of the i'th data reference. */
5749 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
5751 tree new_stmt_list = NULL_TREE;
5752 tree addr_base;
5753 tree addr_tmp, addr_tmp_name, addr_stmt;
5754 tree or_tmp, new_or_tmp_name, or_stmt;
5756 /* create: addr_tmp = (int)(address_of_first_vector) */
5757 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
5758 &new_stmt_list,
5759 NULL_TREE);
5761 if (new_stmt_list != NULL_TREE)
5762 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
5764 sprintf (tmp_name, "%s%d", "addr2int", i);
5765 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
5766 add_referenced_var (addr_tmp);
5767 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
5768 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
5769 addr_stmt = build_gimple_modify_stmt (addr_tmp_name, addr_stmt);
5770 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
5771 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
5773 /* The addresses are OR together. */
5775 if (or_tmp_name != NULL_TREE)
5777 /* create: or_tmp = or_tmp | addr_tmp */
5778 sprintf (tmp_name, "%s%d", "orptrs", i);
5779 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
5780 add_referenced_var (or_tmp);
5781 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
5782 tmp = build2 (BIT_IOR_EXPR, int_ptrsize_type,
5783 or_tmp_name, addr_tmp_name);
5784 or_stmt = build_gimple_modify_stmt (new_or_tmp_name, tmp);
5785 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
5786 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
5787 or_tmp_name = new_or_tmp_name;
5789 else
5790 or_tmp_name = addr_tmp_name;
5792 } /* end for i */
5794 mask_cst = build_int_cst (int_ptrsize_type, mask);
5796 /* create: and_tmp = or_tmp & mask */
5797 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
5798 add_referenced_var (and_tmp);
5799 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
5801 tmp = build2 (BIT_AND_EXPR, int_ptrsize_type, or_tmp_name, mask_cst);
5802 and_stmt = build_gimple_modify_stmt (and_tmp_name, tmp);
5803 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
5804 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
5806 /* Make and_tmp the left operand of the conditional test against zero.
5807 if and_tmp has a nonzero bit then some address is unaligned. */
5808 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
5809 return build2 (EQ_EXPR, boolean_type_node,
5810 and_tmp_name, ptrsize_zero);
5814 /* Function vect_transform_loop.
5816 The analysis phase has determined that the loop is vectorizable.
5817 Vectorize the loop - created vectorized stmts to replace the scalar
5818 stmts in the loop, and update the loop exit condition. */
5820 void
5821 vect_transform_loop (loop_vec_info loop_vinfo)
5823 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5824 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5825 int nbbs = loop->num_nodes;
5826 block_stmt_iterator si, next_si;
5827 int i;
5828 tree ratio = NULL;
5829 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5830 bool strided_store;
5832 if (vect_print_dump_info (REPORT_DETAILS))
5833 fprintf (vect_dump, "=== vec_transform_loop ===");
5835 /* If the loop has data references that may or may not be aligned then
5836 two versions of the loop need to be generated, one which is vectorized
5837 and one which isn't. A test is then generated to control which of the
5838 loops is executed. The test checks for the alignment of all of the
5839 data references that may or may not be aligned. */
5841 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
5843 struct loop *nloop;
5844 tree cond_expr;
5845 tree cond_expr_stmt_list = NULL_TREE;
5846 basic_block condition_bb;
5847 block_stmt_iterator cond_exp_bsi;
5848 basic_block merge_bb;
5849 basic_block new_exit_bb;
5850 edge new_exit_e, e;
5851 tree orig_phi, new_phi, arg;
5852 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
5854 cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
5855 &cond_expr_stmt_list);
5856 initialize_original_copy_tables ();
5857 nloop = loop_version (loop, cond_expr, &condition_bb,
5858 prob, prob, REG_BR_PROB_BASE - prob, true);
5859 free_original_copy_tables();
5861 /** Loop versioning violates an assumption we try to maintain during
5862 vectorization - that the loop exit block has a single predecessor.
5863 After versioning, the exit block of both loop versions is the same
5864 basic block (i.e. it has two predecessors). Just in order to simplify
5865 following transformations in the vectorizer, we fix this situation
5866 here by adding a new (empty) block on the exit-edge of the loop,
5867 with the proper loop-exit phis to maintain loop-closed-form. **/
5869 merge_bb = single_exit (loop)->dest;
5870 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
5871 new_exit_bb = split_edge (single_exit (loop));
5872 new_exit_e = single_exit (loop);
5873 e = EDGE_SUCC (new_exit_bb, 0);
5875 for (orig_phi = phi_nodes (merge_bb); orig_phi;
5876 orig_phi = PHI_CHAIN (orig_phi))
5878 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
5879 new_exit_bb);
5880 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
5881 add_phi_arg (new_phi, arg, new_exit_e);
5882 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
5885 /** end loop-exit-fixes after versioning **/
5887 update_ssa (TODO_update_ssa);
5888 cond_exp_bsi = bsi_last (condition_bb);
5889 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
5892 /* CHECKME: we wouldn't need this if we called update_ssa once
5893 for all loops. */
5894 bitmap_zero (vect_memsyms_to_rename);
5896 /* Peel the loop if there are data refs with unknown alignment.
5897 Only one data ref with unknown store is allowed. */
5899 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5900 vect_do_peeling_for_alignment (loop_vinfo);
5902 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5903 compile time constant), or it is a constant that doesn't divide by the
5904 vectorization factor, then an epilog loop needs to be created.
5905 We therefore duplicate the loop: the original loop will be vectorized,
5906 and will compute the first (n/VF) iterations. The second copy of the loop
5907 will remain scalar and will compute the remaining (n%VF) iterations.
5908 (VF is the vectorization factor). */
5910 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5911 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5912 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
5913 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
5914 else
5915 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5916 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5918 /* 1) Make sure the loop header has exactly two entries
5919 2) Make sure we have a preheader basic block. */
5921 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5923 split_edge (loop_preheader_edge (loop));
5925 /* FORNOW: the vectorizer supports only loops which body consist
5926 of one basic block (header + empty latch). When the vectorizer will
5927 support more involved loop forms, the order by which the BBs are
5928 traversed need to be reconsidered. */
5930 for (i = 0; i < nbbs; i++)
5932 basic_block bb = bbs[i];
5933 stmt_vec_info stmt_info;
5934 tree phi;
5936 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5938 if (vect_print_dump_info (REPORT_DETAILS))
5940 fprintf (vect_dump, "------>vectorizing phi: ");
5941 print_generic_expr (vect_dump, phi, TDF_SLIM);
5943 stmt_info = vinfo_for_stmt (phi);
5944 if (!stmt_info)
5945 continue;
5946 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5947 && !STMT_VINFO_LIVE_P (stmt_info))
5948 continue;
5950 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5951 != (unsigned HOST_WIDE_INT) vectorization_factor)
5952 && vect_print_dump_info (REPORT_DETAILS))
5953 fprintf (vect_dump, "multiple-types.");
5955 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5957 if (vect_print_dump_info (REPORT_DETAILS))
5958 fprintf (vect_dump, "transform phi.");
5959 vect_transform_stmt (phi, NULL, NULL);
5963 for (si = bsi_start (bb); !bsi_end_p (si);)
5965 tree stmt = bsi_stmt (si);
5966 bool is_store;
5968 if (vect_print_dump_info (REPORT_DETAILS))
5970 fprintf (vect_dump, "------>vectorizing statement: ");
5971 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5973 stmt_info = vinfo_for_stmt (stmt);
5974 gcc_assert (stmt_info);
5975 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5976 && !STMT_VINFO_LIVE_P (stmt_info))
5978 bsi_next (&si);
5979 continue;
5982 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5983 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5984 != (unsigned HOST_WIDE_INT) vectorization_factor)
5985 && vect_print_dump_info (REPORT_DETAILS))
5986 fprintf (vect_dump, "multiple-types.");
5988 /* -------- vectorize statement ------------ */
5989 if (vect_print_dump_info (REPORT_DETAILS))
5990 fprintf (vect_dump, "transform statement.");
5992 strided_store = false;
5993 is_store = vect_transform_stmt (stmt, &si, &strided_store);
5994 if (is_store)
5996 stmt_ann_t ann;
5997 if (DR_GROUP_FIRST_DR (stmt_info))
5999 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
6000 interleaving chain was completed - free all the stores in
6001 the chain. */
6002 tree next = DR_GROUP_FIRST_DR (stmt_info);
6003 tree tmp;
6004 stmt_vec_info next_stmt_info;
6006 while (next)
6008 next_si = bsi_for_stmt (next);
6009 next_stmt_info = vinfo_for_stmt (next);
6010 /* Free the attached stmt_vec_info and remove the stmt. */
6011 ann = stmt_ann (next);
6012 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
6013 free (next_stmt_info);
6014 set_stmt_info (ann, NULL);
6015 bsi_remove (&next_si, true);
6016 next = tmp;
6018 bsi_remove (&si, true);
6019 continue;
6021 else
6023 /* Free the attached stmt_vec_info and remove the stmt. */
6024 ann = stmt_ann (stmt);
6025 free (stmt_info);
6026 set_stmt_info (ann, NULL);
6027 bsi_remove (&si, true);
6028 continue;
6031 bsi_next (&si);
6032 } /* stmts in BB */
6033 } /* BBs in loop */
6035 slpeel_make_loop_iterate_ntimes (loop, ratio);
6037 mark_set_for_renaming (vect_memsyms_to_rename);
6039 /* The memory tags and pointers in vectorized statements need to
6040 have their SSA forms updated. FIXME, why can't this be delayed
6041 until all the loops have been transformed? */
6042 update_ssa (TODO_update_ssa);
6044 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
6045 fprintf (vect_dump, "LOOP VECTORIZED.");