Fix PR#.
[official-gcc.git] / gcc / tree-vect-transform.c
blob97366336b7bc6790a86b02f4f0e88011cf50c5f1
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "params.h"
38 #include "recog.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
45 #include "toplev.h"
46 #include "real.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (gimple, gimple_stmt_iterator *, bool *,
50 slp_tree);
51 static tree vect_create_destination_var (tree, tree);
52 static tree vect_create_data_ref_ptr
53 (gimple, struct loop*, tree, tree *, gimple *, bool, bool *);
54 static tree vect_create_addr_base_for_vector_ref
55 (gimple, gimple_seq *, tree, struct loop *);
56 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
57 static tree vect_get_vec_def_for_operand (tree, gimple, tree *);
58 static tree vect_init_vector (gimple, tree, tree, gimple_stmt_iterator *);
59 static void vect_finish_stmt_generation
60 (gimple stmt, gimple vec_stmt, gimple_stmt_iterator *);
61 static bool vect_is_simple_cond (tree, loop_vec_info);
62 static void vect_create_epilog_for_reduction
63 (tree, gimple, int, enum tree_code, gimple);
64 static tree get_initial_def_for_reduction (gimple, tree, tree *);
66 /* Utility function dealing with loop peeling (not peeling itself). */
67 static void vect_generate_tmps_on_preheader
68 (loop_vec_info, tree *, tree *, tree *);
69 static tree vect_build_loop_niters (loop_vec_info);
70 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
71 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
72 static void vect_update_init_of_dr (struct data_reference *, tree niters);
73 static void vect_update_inits_of_drs (loop_vec_info, tree);
74 static int vect_min_worthwhile_factor (enum tree_code);
77 static int
78 cost_for_stmt (gimple stmt)
80 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
82 switch (STMT_VINFO_TYPE (stmt_info))
84 case load_vec_info_type:
85 return TARG_SCALAR_LOAD_COST;
86 case store_vec_info_type:
87 return TARG_SCALAR_STORE_COST;
88 case op_vec_info_type:
89 case condition_vec_info_type:
90 case assignment_vec_info_type:
91 case reduc_vec_info_type:
92 case induc_vec_info_type:
93 case type_promotion_vec_info_type:
94 case type_demotion_vec_info_type:
95 case type_conversion_vec_info_type:
96 case call_vec_info_type:
97 return TARG_SCALAR_STMT_COST;
98 case undef_vec_info_type:
99 default:
100 gcc_unreachable ();
105 /* Function vect_estimate_min_profitable_iters
107 Return the number of iterations required for the vector version of the
108 loop to be profitable relative to the cost of the scalar version of the
109 loop.
111 TODO: Take profile info into account before making vectorization
112 decisions, if available. */
115 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
117 int i;
118 int min_profitable_iters;
119 int peel_iters_prologue;
120 int peel_iters_epilogue;
121 int vec_inside_cost = 0;
122 int vec_outside_cost = 0;
123 int scalar_single_iter_cost = 0;
124 int scalar_outside_cost = 0;
125 bool runtime_test = false;
126 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
127 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
128 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
129 int nbbs = loop->num_nodes;
130 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
131 int peel_guard_costs = 0;
132 int innerloop_iters = 0, factor;
133 VEC (slp_instance, heap) *slp_instances;
134 slp_instance instance;
136 /* Cost model disabled. */
137 if (!flag_vect_cost_model)
139 if (vect_print_dump_info (REPORT_COST))
140 fprintf (vect_dump, "cost model disabled.");
141 return 0;
144 /* If the number of iterations is unknown, or the
145 peeling-for-misalignment amount is unknown, we will have to generate
146 a runtime test to test the loop count against the threshold. */
147 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
148 || (byte_misalign < 0))
149 runtime_test = true;
151 /* Requires loop versioning tests to handle misalignment. */
153 if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
155 /* FIXME: Make cost depend on complexity of individual check. */
156 vec_outside_cost +=
157 VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
158 if (vect_print_dump_info (REPORT_COST))
159 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
160 "versioning to treat misalignment.\n");
163 if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
165 /* FIXME: Make cost depend on complexity of individual check. */
166 vec_outside_cost +=
167 VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
168 if (vect_print_dump_info (REPORT_COST))
169 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
170 "versioning aliasing.\n");
173 if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
174 || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
176 vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
179 /* Count statements in scalar loop. Using this as scalar cost for a single
180 iteration for now.
182 TODO: Add outer loop support.
184 TODO: Consider assigning different costs to different scalar
185 statements. */
187 /* FORNOW. */
188 if (loop->inner)
189 innerloop_iters = 50; /* FIXME */
191 for (i = 0; i < nbbs; i++)
193 gimple_stmt_iterator si;
194 basic_block bb = bbs[i];
196 if (bb->loop_father == loop->inner)
197 factor = innerloop_iters;
198 else
199 factor = 1;
201 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
203 gimple stmt = gsi_stmt (si);
204 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
205 /* Skip stmts that are not vectorized inside the loop. */
206 if (!STMT_VINFO_RELEVANT_P (stmt_info)
207 && (!STMT_VINFO_LIVE_P (stmt_info)
208 || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
209 continue;
210 scalar_single_iter_cost += cost_for_stmt (stmt) * factor;
211 vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
212 /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
213 some of the "outside" costs are generated inside the outer-loop. */
214 vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
218 /* Add additional cost for the peeled instructions in prologue and epilogue
219 loop.
221 FORNOW: If we don't know the value of peel_iters for prologue or epilogue
222 at compile-time - we assume it's vf/2 (the worst would be vf-1).
224 TODO: Build an expression that represents peel_iters for prologue and
225 epilogue to be used in a run-time test. */
227 if (byte_misalign < 0)
229 peel_iters_prologue = vf/2;
230 if (vect_print_dump_info (REPORT_COST))
231 fprintf (vect_dump, "cost model: "
232 "prologue peel iters set to vf/2.");
234 /* If peeling for alignment is unknown, loop bound of main loop becomes
235 unknown. */
236 peel_iters_epilogue = vf/2;
237 if (vect_print_dump_info (REPORT_COST))
238 fprintf (vect_dump, "cost model: "
239 "epilogue peel iters set to vf/2 because "
240 "peeling for alignment is unknown .");
242 /* If peeled iterations are unknown, count a taken branch and a not taken
243 branch per peeled loop. Even if scalar loop iterations are known,
244 vector iterations are not known since peeled prologue iterations are
245 not known. Hence guards remain the same. */
246 peel_guard_costs += 2 * (TARG_COND_TAKEN_BRANCH_COST
247 + TARG_COND_NOT_TAKEN_BRANCH_COST);
250 else
252 if (byte_misalign)
254 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
255 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
256 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
257 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
259 peel_iters_prologue = nelements - (byte_misalign / element_size);
261 else
262 peel_iters_prologue = 0;
264 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
266 peel_iters_epilogue = vf/2;
267 if (vect_print_dump_info (REPORT_COST))
268 fprintf (vect_dump, "cost model: "
269 "epilogue peel iters set to vf/2 because "
270 "loop iterations are unknown .");
272 /* If peeled iterations are known but number of scalar loop
273 iterations are unknown, count a taken branch per peeled loop. */
274 peel_guard_costs += 2 * TARG_COND_TAKEN_BRANCH_COST;
277 else
279 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
280 peel_iters_prologue = niters < peel_iters_prologue ?
281 niters : peel_iters_prologue;
282 peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
286 vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
287 + (peel_iters_epilogue * scalar_single_iter_cost)
288 + peel_guard_costs;
290 /* FORNOW: The scalar outside cost is incremented in one of the
291 following ways:
293 1. The vectorizer checks for alignment and aliasing and generates
294 a condition that allows dynamic vectorization. A cost model
295 check is ANDED with the versioning condition. Hence scalar code
296 path now has the added cost of the versioning check.
298 if (cost > th & versioning_check)
299 jmp to vector code
301 Hence run-time scalar is incremented by not-taken branch cost.
303 2. The vectorizer then checks if a prologue is required. If the
304 cost model check was not done before during versioning, it has to
305 be done before the prologue check.
307 if (cost <= th)
308 prologue = scalar_iters
309 if (prologue == 0)
310 jmp to vector code
311 else
312 execute prologue
313 if (prologue == num_iters)
314 go to exit
316 Hence the run-time scalar cost is incremented by a taken branch,
317 plus a not-taken branch, plus a taken branch cost.
319 3. The vectorizer then checks if an epilogue is required. If the
320 cost model check was not done before during prologue check, it
321 has to be done with the epilogue check.
323 if (prologue == 0)
324 jmp to vector code
325 else
326 execute prologue
327 if (prologue == num_iters)
328 go to exit
329 vector code:
330 if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
331 jmp to epilogue
333 Hence the run-time scalar cost should be incremented by 2 taken
334 branches.
336 TODO: The back end may reorder the BBS's differently and reverse
337 conditions/branch directions. Change the estimates below to
338 something more reasonable. */
340 if (runtime_test)
342 /* Cost model check occurs at versioning. */
343 if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
344 || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
345 scalar_outside_cost += TARG_COND_NOT_TAKEN_BRANCH_COST;
346 else
348 /* Cost model occurs at prologue generation. */
349 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
350 scalar_outside_cost += 2 * TARG_COND_TAKEN_BRANCH_COST
351 + TARG_COND_NOT_TAKEN_BRANCH_COST;
352 /* Cost model check occurs at epilogue generation. */
353 else
354 scalar_outside_cost += 2 * TARG_COND_TAKEN_BRANCH_COST;
358 /* Add SLP costs. */
359 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
360 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
362 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
363 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
366 /* Calculate number of iterations required to make the vector version
367 profitable, relative to the loop bodies only. The following condition
368 must hold true:
369 SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
370 where
371 SIC = scalar iteration cost, VIC = vector iteration cost,
372 VOC = vector outside cost, VF = vectorization factor,
373 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
374 SOC = scalar outside cost for run time cost model check. */
376 if ((scalar_single_iter_cost * vf) > vec_inside_cost)
378 if (vec_outside_cost <= 0)
379 min_profitable_iters = 1;
380 else
382 min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
383 - vec_inside_cost * peel_iters_prologue
384 - vec_inside_cost * peel_iters_epilogue)
385 / ((scalar_single_iter_cost * vf)
386 - vec_inside_cost);
388 if ((scalar_single_iter_cost * vf * min_profitable_iters)
389 <= ((vec_inside_cost * min_profitable_iters)
390 + ((vec_outside_cost - scalar_outside_cost) * vf)))
391 min_profitable_iters++;
394 /* vector version will never be profitable. */
395 else
397 if (vect_print_dump_info (REPORT_COST))
398 fprintf (vect_dump, "cost model: vector iteration cost = %d "
399 "is divisible by scalar iteration cost = %d by a factor "
400 "greater than or equal to the vectorization factor = %d .",
401 vec_inside_cost, scalar_single_iter_cost, vf);
402 return -1;
405 if (vect_print_dump_info (REPORT_COST))
407 fprintf (vect_dump, "Cost model analysis: \n");
408 fprintf (vect_dump, " Vector inside of loop cost: %d\n",
409 vec_inside_cost);
410 fprintf (vect_dump, " Vector outside of loop cost: %d\n",
411 vec_outside_cost);
412 fprintf (vect_dump, " Scalar iteration cost: %d\n",
413 scalar_single_iter_cost);
414 fprintf (vect_dump, " Scalar outside cost: %d\n", scalar_outside_cost);
415 fprintf (vect_dump, " prologue iterations: %d\n",
416 peel_iters_prologue);
417 fprintf (vect_dump, " epilogue iterations: %d\n",
418 peel_iters_epilogue);
419 fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
420 min_profitable_iters);
423 min_profitable_iters =
424 min_profitable_iters < vf ? vf : min_profitable_iters;
426 /* Because the condition we create is:
427 if (niters <= min_profitable_iters)
428 then skip the vectorized loop. */
429 min_profitable_iters--;
431 if (vect_print_dump_info (REPORT_COST))
432 fprintf (vect_dump, " Profitability threshold = %d\n",
433 min_profitable_iters);
435 return min_profitable_iters;
439 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
440 functions. Design better to avoid maintenance issues. */
442 /* Function vect_model_reduction_cost.
444 Models cost for a reduction operation, including the vector ops
445 generated within the strip-mine loop, the initial definition before
446 the loop, and the epilogue code that must be generated. */
448 static bool
449 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
450 int ncopies)
452 int outer_cost = 0;
453 enum tree_code code;
454 optab optab;
455 tree vectype;
456 gimple stmt, orig_stmt;
457 tree reduction_op;
458 enum machine_mode mode;
459 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
460 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463 /* Cost of reduction op inside loop. */
464 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
466 stmt = STMT_VINFO_STMT (stmt_info);
468 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
470 case GIMPLE_SINGLE_RHS:
471 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
472 reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
473 break;
474 case GIMPLE_UNARY_RHS:
475 reduction_op = gimple_assign_rhs1 (stmt);
476 break;
477 case GIMPLE_BINARY_RHS:
478 reduction_op = gimple_assign_rhs2 (stmt);
479 break;
480 default:
481 gcc_unreachable ();
484 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
485 if (!vectype)
487 if (vect_print_dump_info (REPORT_COST))
489 fprintf (vect_dump, "unsupported data-type ");
490 print_generic_expr (vect_dump, TREE_TYPE (reduction_op), TDF_SLIM);
492 return false;
495 mode = TYPE_MODE (vectype);
496 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
498 if (!orig_stmt)
499 orig_stmt = STMT_VINFO_STMT (stmt_info);
501 code = gimple_assign_rhs_code (orig_stmt);
503 /* Add in cost for initial definition. */
504 outer_cost += TARG_SCALAR_TO_VEC_COST;
506 /* Determine cost of epilogue code.
508 We have a reduction operator that will reduce the vector in one statement.
509 Also requires scalar extract. */
511 if (!nested_in_vect_loop_p (loop, orig_stmt))
513 if (reduc_code < NUM_TREE_CODES)
514 outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
515 else
517 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
518 tree bitsize =
519 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
520 int element_bitsize = tree_low_cst (bitsize, 1);
521 int nelements = vec_size_in_bits / element_bitsize;
523 optab = optab_for_tree_code (code, vectype, optab_default);
525 /* We have a whole vector shift available. */
526 if (VECTOR_MODE_P (mode)
527 && optab_handler (optab, mode)->insn_code != CODE_FOR_nothing
528 && optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
529 /* Final reduction via vector shifts and the reduction operator. Also
530 requires scalar extract. */
531 outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
532 + TARG_VEC_TO_SCALAR_COST);
533 else
534 /* Use extracts and reduction op for final reduction. For N elements,
535 we have N extracts and N-1 reduction ops. */
536 outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
540 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
542 if (vect_print_dump_info (REPORT_COST))
543 fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
544 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
545 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
547 return true;
551 /* Function vect_model_induction_cost.
553 Models cost for induction operations. */
555 static void
556 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
558 /* loop cost for vec_loop. */
559 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
560 /* prologue cost for vec_init and vec_step. */
561 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST;
563 if (vect_print_dump_info (REPORT_COST))
564 fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
565 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
566 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
570 /* Function vect_model_simple_cost.
572 Models cost for simple operations, i.e. those that only emit ncopies of a
573 single op. Right now, this does not account for multiple insns that could
574 be generated for the single vector op. We will handle that shortly. */
576 void
577 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
578 enum vect_def_type *dt, slp_tree slp_node)
580 int i;
581 int inside_cost = 0, outside_cost = 0;
583 /* The SLP costs were already calculated during SLP tree build. */
584 if (PURE_SLP_STMT (stmt_info))
585 return;
587 inside_cost = ncopies * TARG_VEC_STMT_COST;
589 /* FORNOW: Assuming maximum 2 args per stmts. */
590 for (i = 0; i < 2; i++)
592 if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def)
593 outside_cost += TARG_SCALAR_TO_VEC_COST;
596 if (vect_print_dump_info (REPORT_COST))
597 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
598 "outside_cost = %d .", inside_cost, outside_cost);
600 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
601 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
602 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
606 /* Function vect_cost_strided_group_size
608 For strided load or store, return the group_size only if it is the first
609 load or store of a group, else return 1. This ensures that group size is
610 only returned once per group. */
612 static int
613 vect_cost_strided_group_size (stmt_vec_info stmt_info)
615 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
617 if (first_stmt == STMT_VINFO_STMT (stmt_info))
618 return DR_GROUP_SIZE (stmt_info);
620 return 1;
624 /* Function vect_model_store_cost
626 Models cost for stores. In the case of strided accesses, one access
627 has the overhead of the strided access attributed to it. */
629 void
630 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
631 enum vect_def_type dt, slp_tree slp_node)
633 int group_size;
634 int inside_cost = 0, outside_cost = 0;
636 /* The SLP costs were already calculated during SLP tree build. */
637 if (PURE_SLP_STMT (stmt_info))
638 return;
640 if (dt == vect_constant_def || dt == vect_invariant_def)
641 outside_cost = TARG_SCALAR_TO_VEC_COST;
643 /* Strided access? */
644 if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node)
645 group_size = vect_cost_strided_group_size (stmt_info);
646 /* Not a strided access. */
647 else
648 group_size = 1;
650 /* Is this an access in a group of stores, which provide strided access?
651 If so, add in the cost of the permutes. */
652 if (group_size > 1)
654 /* Uses a high and low interleave operation for each needed permute. */
655 inside_cost = ncopies * exact_log2(group_size) * group_size
656 * TARG_VEC_STMT_COST;
658 if (vect_print_dump_info (REPORT_COST))
659 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
660 group_size);
664 /* Costs of the stores. */
665 inside_cost += ncopies * TARG_VEC_STORE_COST;
667 if (vect_print_dump_info (REPORT_COST))
668 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
669 "outside_cost = %d .", inside_cost, outside_cost);
671 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
672 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
673 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
677 /* Function vect_model_load_cost
679 Models cost for loads. In the case of strided accesses, the last access
680 has the overhead of the strided access attributed to it. Since unaligned
681 accesses are supported for loads, we also account for the costs of the
682 access scheme chosen. */
684 void
685 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
688 int group_size;
689 int alignment_support_cheme;
690 gimple first_stmt;
691 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
692 int inside_cost = 0, outside_cost = 0;
694 /* The SLP costs were already calculated during SLP tree build. */
695 if (PURE_SLP_STMT (stmt_info))
696 return;
698 /* Strided accesses? */
699 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
700 if (first_stmt && !slp_node)
702 group_size = vect_cost_strided_group_size (stmt_info);
703 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
705 /* Not a strided access. */
706 else
708 group_size = 1;
709 first_dr = dr;
712 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
714 /* Is this an access in a group of loads providing strided access?
715 If so, add in the cost of the permutes. */
716 if (group_size > 1)
718 /* Uses an even and odd extract operations for each needed permute. */
719 inside_cost = ncopies * exact_log2(group_size) * group_size
720 * TARG_VEC_STMT_COST;
722 if (vect_print_dump_info (REPORT_COST))
723 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
724 group_size);
728 /* The loads themselves. */
729 switch (alignment_support_cheme)
731 case dr_aligned:
733 inside_cost += ncopies * TARG_VEC_LOAD_COST;
735 if (vect_print_dump_info (REPORT_COST))
736 fprintf (vect_dump, "vect_model_load_cost: aligned.");
738 break;
740 case dr_unaligned_supported:
742 /* Here, we assign an additional cost for the unaligned load. */
743 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
745 if (vect_print_dump_info (REPORT_COST))
746 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
747 "hardware.");
749 break;
751 case dr_explicit_realign:
753 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
755 /* FIXME: If the misalignment remains fixed across the iterations of
756 the containing loop, the following cost should be added to the
757 outside costs. */
758 if (targetm.vectorize.builtin_mask_for_load)
759 inside_cost += TARG_VEC_STMT_COST;
761 break;
763 case dr_explicit_realign_optimized:
765 if (vect_print_dump_info (REPORT_COST))
766 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
767 "pipelined.");
769 /* Unaligned software pipeline has a load of an address, an initial
770 load, and possibly a mask operation to "prime" the loop. However,
771 if this is an access in a group of loads, which provide strided
772 access, then the above cost should only be considered for one
773 access in the group. Inside the loop, there is a load op
774 and a realignment op. */
776 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
778 outside_cost = 2*TARG_VEC_STMT_COST;
779 if (targetm.vectorize.builtin_mask_for_load)
780 outside_cost += TARG_VEC_STMT_COST;
783 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
785 break;
788 default:
789 gcc_unreachable ();
792 if (vect_print_dump_info (REPORT_COST))
793 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
794 "outside_cost = %d .", inside_cost, outside_cost);
796 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
797 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
798 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
802 /* Function vect_get_new_vect_var.
804 Returns a name for a new variable. The current naming scheme appends the
805 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
806 the name of vectorizer generated variables, and appends that to NAME if
807 provided. */
809 static tree
810 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
812 const char *prefix;
813 tree new_vect_var;
815 switch (var_kind)
817 case vect_simple_var:
818 prefix = "vect_";
819 break;
820 case vect_scalar_var:
821 prefix = "stmp_";
822 break;
823 case vect_pointer_var:
824 prefix = "vect_p";
825 break;
826 default:
827 gcc_unreachable ();
830 if (name)
832 char* tmp = concat (prefix, name, NULL);
833 new_vect_var = create_tmp_var (type, tmp);
834 free (tmp);
836 else
837 new_vect_var = create_tmp_var (type, prefix);
839 /* Mark vector typed variable as a gimple register variable. */
840 if (TREE_CODE (type) == VECTOR_TYPE)
841 DECL_GIMPLE_REG_P (new_vect_var) = true;
843 return new_vect_var;
847 /* Function vect_create_addr_base_for_vector_ref.
849 Create an expression that computes the address of the first memory location
850 that will be accessed for a data reference.
852 Input:
853 STMT: The statement containing the data reference.
854 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
855 OFFSET: Optional. If supplied, it is be added to the initial address.
856 LOOP: Specify relative to which loop-nest should the address be computed.
857 For example, when the dataref is in an inner-loop nested in an
858 outer-loop that is now being vectorized, LOOP can be either the
859 outer-loop, or the inner-loop. The first memory location accessed
860 by the following dataref ('in' points to short):
862 for (i=0; i<N; i++)
863 for (j=0; j<M; j++)
864 s += in[i+j]
866 is as follows:
867 if LOOP=i_loop: &in (relative to i_loop)
868 if LOOP=j_loop: &in+i*2B (relative to j_loop)
870 Output:
871 1. Return an SSA_NAME whose value is the address of the memory location of
872 the first vector of the data reference.
873 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
874 these statement(s) which define the returned SSA_NAME.
876 FORNOW: We are only handling array accesses with step 1. */
878 static tree
879 vect_create_addr_base_for_vector_ref (gimple stmt,
880 gimple_seq *new_stmt_list,
881 tree offset,
882 struct loop *loop)
884 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
885 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
886 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
887 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
888 tree base_name;
889 tree data_ref_base_var;
890 tree vec_stmt;
891 tree addr_base, addr_expr;
892 tree dest;
893 gimple_seq seq = NULL;
894 tree base_offset = unshare_expr (DR_OFFSET (dr));
895 tree init = unshare_expr (DR_INIT (dr));
896 tree vect_ptr_type, addr_expr2;
897 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
899 gcc_assert (loop);
900 if (loop != containing_loop)
902 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
903 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
905 gcc_assert (nested_in_vect_loop_p (loop, stmt));
907 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
908 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
909 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
912 /* Create data_ref_base */
913 base_name = build_fold_indirect_ref (data_ref_base);
914 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
915 add_referenced_var (data_ref_base_var);
916 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
917 data_ref_base_var);
918 gimple_seq_add_seq (new_stmt_list, seq);
920 /* Create base_offset */
921 base_offset = size_binop (PLUS_EXPR, base_offset, init);
922 base_offset = fold_convert (sizetype, base_offset);
923 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
924 add_referenced_var (dest);
925 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
926 gimple_seq_add_seq (new_stmt_list, seq);
928 if (offset)
930 tree tmp = create_tmp_var (sizetype, "offset");
932 add_referenced_var (tmp);
933 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
934 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
935 base_offset, offset);
936 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
937 gimple_seq_add_seq (new_stmt_list, seq);
940 /* base + base_offset */
941 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
942 data_ref_base, base_offset);
944 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
946 /* addr_expr = addr_base */
947 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
948 get_name (base_name));
949 add_referenced_var (addr_expr);
950 vec_stmt = fold_convert (vect_ptr_type, addr_base);
951 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
952 get_name (base_name));
953 add_referenced_var (addr_expr2);
954 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr2);
955 gimple_seq_add_seq (new_stmt_list, seq);
957 if (vect_print_dump_info (REPORT_DETAILS))
959 fprintf (vect_dump, "created ");
960 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
962 return vec_stmt;
966 /* Function vect_create_data_ref_ptr.
968 Create a new pointer to vector type (vp), that points to the first location
969 accessed in the loop by STMT, along with the def-use update chain to
970 appropriately advance the pointer through the loop iterations. Also set
971 aliasing information for the pointer. This vector pointer is used by the
972 callers to this function to create a memory reference expression for vector
973 load/store access.
975 Input:
976 1. STMT: a stmt that references memory. Expected to be of the form
977 GIMPLE_ASSIGN <name, data-ref> or
978 GIMPLE_ASSIGN <data-ref, name>.
979 2. AT_LOOP: the loop where the vector memref is to be created.
980 3. OFFSET (optional): an offset to be added to the initial address accessed
981 by the data-ref in STMT.
982 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
983 pointing to the initial address.
985 Output:
986 1. Declare a new ptr to vector_type, and have it point to the base of the
987 data reference (initial addressed accessed by the data reference).
988 For example, for vector of type V8HI, the following code is generated:
990 v8hi *vp;
991 vp = (v8hi *)initial_address;
993 if OFFSET is not supplied:
994 initial_address = &a[init];
995 if OFFSET is supplied:
996 initial_address = &a[init + OFFSET];
998 Return the initial_address in INITIAL_ADDRESS.
1000 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
1001 update the pointer in each iteration of the loop.
1003 Return the increment stmt that updates the pointer in PTR_INCR.
1005 3. Set INV_P to true if the access pattern of the data reference in the
1006 vectorized loop is invariant. Set it to false otherwise.
1008 4. Return the pointer. */
1010 static tree
1011 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
1012 tree offset, tree *initial_address, gimple *ptr_incr,
1013 bool only_init, bool *inv_p)
1015 tree base_name;
1016 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1017 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1018 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1019 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1020 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
1021 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1022 tree vect_ptr_type;
1023 tree vect_ptr;
1024 tree tag;
1025 tree new_temp;
1026 gimple vec_stmt;
1027 gimple_seq new_stmt_list = NULL;
1028 edge pe;
1029 basic_block new_bb;
1030 tree vect_ptr_init;
1031 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1032 tree vptr;
1033 gimple_stmt_iterator incr_gsi;
1034 bool insert_after;
1035 tree indx_before_incr, indx_after_incr;
1036 gimple incr;
1037 tree step;
1039 /* Check the step (evolution) of the load in LOOP, and record
1040 whether it's invariant. */
1041 if (nested_in_vect_loop)
1042 step = STMT_VINFO_DR_STEP (stmt_info);
1043 else
1044 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
1046 if (tree_int_cst_compare (step, size_zero_node) == 0)
1047 *inv_p = true;
1048 else
1049 *inv_p = false;
1051 /* Create an expression for the first address accessed by this load
1052 in LOOP. */
1053 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
1055 if (vect_print_dump_info (REPORT_DETAILS))
1057 tree data_ref_base = base_name;
1058 fprintf (vect_dump, "create vector-pointer variable to type: ");
1059 print_generic_expr (vect_dump, vectype, TDF_SLIM);
1060 if (TREE_CODE (data_ref_base) == VAR_DECL)
1061 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
1062 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1063 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
1064 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1065 fprintf (vect_dump, " vectorizing a record based array ref: ");
1066 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1067 fprintf (vect_dump, " vectorizing a pointer ref: ");
1068 print_generic_expr (vect_dump, base_name, TDF_SLIM);
1071 /** (1) Create the new vector-pointer variable: **/
1072 vect_ptr_type = build_pointer_type (vectype);
1074 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1075 get_name (base_name));
1076 add_referenced_var (vect_ptr);
1078 /** (2) Add aliasing information to the new vector-pointer:
1079 (The points-to info (DR_PTR_INFO) may be defined later.) **/
1081 tag = DR_SYMBOL_TAG (dr);
1082 gcc_assert (tag);
1084 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
1085 tag must be created with tag added to its may alias list. */
1086 if (!MTAG_P (tag))
1087 new_type_alias (vect_ptr, tag, DR_REF (dr));
1088 else
1089 set_symbol_mem_tag (vect_ptr, tag);
1091 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
1092 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
1093 def-use update cycles for the pointer: One relative to the outer-loop
1094 (LOOP), which is what steps (3) and (4) below do. The other is relative
1095 to the inner-loop (which is the inner-most loop containing the dataref),
1096 and this is done be step (5) below.
1098 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
1099 inner-most loop, and so steps (3),(4) work the same, and step (5) is
1100 redundant. Steps (3),(4) create the following:
1102 vp0 = &base_addr;
1103 LOOP: vp1 = phi(vp0,vp2)
1104 ...
1106 vp2 = vp1 + step
1107 goto LOOP
1109 If there is an inner-loop nested in loop, then step (5) will also be
1110 applied, and an additional update in the inner-loop will be created:
1112 vp0 = &base_addr;
1113 LOOP: vp1 = phi(vp0,vp2)
1115 inner: vp3 = phi(vp1,vp4)
1116 vp4 = vp3 + inner_step
1117 if () goto inner
1119 vp2 = vp1 + step
1120 if () goto LOOP */
1122 /** (3) Calculate the initial address the vector-pointer, and set
1123 the vector-pointer to point to it before the loop: **/
1125 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1127 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1128 offset, loop);
1129 pe = loop_preheader_edge (loop);
1130 if (new_stmt_list)
1132 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
1133 gcc_assert (!new_bb);
1136 *initial_address = new_temp;
1138 /* Create: p = (vectype *) initial_base */
1139 vec_stmt = gimple_build_assign (vect_ptr,
1140 fold_convert (vect_ptr_type, new_temp));
1141 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
1142 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
1143 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
1144 gcc_assert (!new_bb);
1147 /** (4) Handle the updating of the vector-pointer inside the loop.
1148 This is needed when ONLY_INIT is false, and also when AT_LOOP
1149 is the inner-loop nested in LOOP (during outer-loop vectorization).
1152 if (only_init && at_loop == loop) /* No update in loop is required. */
1154 /* Copy the points-to information if it exists. */
1155 if (DR_PTR_INFO (dr))
1156 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
1157 vptr = vect_ptr_init;
1159 else
1161 /* The step of the vector pointer is the Vector Size. */
1162 tree step = TYPE_SIZE_UNIT (vectype);
1163 /* One exception to the above is when the scalar step of the load in
1164 LOOP is zero. In this case the step here is also zero. */
1165 if (*inv_p)
1166 step = size_zero_node;
1168 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
1170 create_iv (vect_ptr_init,
1171 fold_convert (vect_ptr_type, step),
1172 NULL_TREE, loop, &incr_gsi, insert_after,
1173 &indx_before_incr, &indx_after_incr);
1174 incr = gsi_stmt (incr_gsi);
1175 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
1177 /* Copy the points-to information if it exists. */
1178 if (DR_PTR_INFO (dr))
1180 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
1181 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
1183 merge_alias_info (vect_ptr_init, indx_before_incr);
1184 merge_alias_info (vect_ptr_init, indx_after_incr);
1185 if (ptr_incr)
1186 *ptr_incr = incr;
1188 vptr = indx_before_incr;
1191 if (!nested_in_vect_loop || only_init)
1192 return vptr;
1195 /** (5) Handle the updating of the vector-pointer inside the inner-loop
1196 nested in LOOP, if exists: **/
1198 gcc_assert (nested_in_vect_loop);
1199 if (!only_init)
1201 standard_iv_increment_position (containing_loop, &incr_gsi,
1202 &insert_after);
1203 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), NULL_TREE,
1204 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
1205 &indx_after_incr);
1206 incr = gsi_stmt (incr_gsi);
1207 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
1209 /* Copy the points-to information if it exists. */
1210 if (DR_PTR_INFO (dr))
1212 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
1213 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
1215 merge_alias_info (vect_ptr_init, indx_before_incr);
1216 merge_alias_info (vect_ptr_init, indx_after_incr);
1217 if (ptr_incr)
1218 *ptr_incr = incr;
1220 return indx_before_incr;
1222 else
1223 gcc_unreachable ();
1227 /* Function bump_vector_ptr
1229 Increment a pointer (to a vector type) by vector-size. If requested,
1230 i.e. if PTR-INCR is given, then also connect the new increment stmt
1231 to the existing def-use update-chain of the pointer, by modifying
1232 the PTR_INCR as illustrated below:
1234 The pointer def-use update-chain before this function:
1235 DATAREF_PTR = phi (p_0, p_2)
1236 ....
1237 PTR_INCR: p_2 = DATAREF_PTR + step
1239 The pointer def-use update-chain after this function:
1240 DATAREF_PTR = phi (p_0, p_2)
1241 ....
1242 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
1243 ....
1244 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
1246 Input:
1247 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
1248 in the loop.
1249 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
1250 the loop. The increment amount across iterations is expected
1251 to be vector_size.
1252 BSI - location where the new update stmt is to be placed.
1253 STMT - the original scalar memory-access stmt that is being vectorized.
1254 BUMP - optional. The offset by which to bump the pointer. If not given,
1255 the offset is assumed to be vector_size.
1257 Output: Return NEW_DATAREF_PTR as illustrated above.
1261 static tree
1262 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
1263 gimple stmt, tree bump)
1265 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1266 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1267 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1268 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
1269 tree update = TYPE_SIZE_UNIT (vectype);
1270 gimple incr_stmt;
1271 ssa_op_iter iter;
1272 use_operand_p use_p;
1273 tree new_dataref_ptr;
1275 if (bump)
1276 update = bump;
1278 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
1279 dataref_ptr, update);
1280 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
1281 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
1282 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
1284 /* Copy the points-to information if it exists. */
1285 if (DR_PTR_INFO (dr))
1286 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
1287 merge_alias_info (new_dataref_ptr, dataref_ptr);
1289 if (!ptr_incr)
1290 return new_dataref_ptr;
1292 /* Update the vector-pointer's cross-iteration increment. */
1293 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
1295 tree use = USE_FROM_PTR (use_p);
1297 if (use == dataref_ptr)
1298 SET_USE (use_p, new_dataref_ptr);
1299 else
1300 gcc_assert (tree_int_cst_compare (use, update) == 0);
1303 return new_dataref_ptr;
1307 /* Function vect_create_destination_var.
1309 Create a new temporary of type VECTYPE. */
1311 static tree
1312 vect_create_destination_var (tree scalar_dest, tree vectype)
1314 tree vec_dest;
1315 const char *new_name;
1316 tree type;
1317 enum vect_var_kind kind;
1319 kind = vectype ? vect_simple_var : vect_scalar_var;
1320 type = vectype ? vectype : TREE_TYPE (scalar_dest);
1322 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1324 new_name = get_name (scalar_dest);
1325 if (!new_name)
1326 new_name = "var_";
1327 vec_dest = vect_get_new_vect_var (type, kind, new_name);
1328 add_referenced_var (vec_dest);
1330 return vec_dest;
1334 /* Function vect_init_vector.
1336 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1337 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1338 is not NULL. Otherwise, place the initialization at the loop preheader.
1339 Return the DEF of INIT_STMT.
1340 It will be used in the vectorization of STMT. */
1342 static tree
1343 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1344 gimple_stmt_iterator *gsi)
1346 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1347 tree new_var;
1348 gimple init_stmt;
1349 tree vec_oprnd;
1350 edge pe;
1351 tree new_temp;
1352 basic_block new_bb;
1354 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1355 add_referenced_var (new_var);
1356 init_stmt = gimple_build_assign (new_var, vector_var);
1357 new_temp = make_ssa_name (new_var, init_stmt);
1358 gimple_assign_set_lhs (init_stmt, new_temp);
1360 if (gsi)
1361 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1362 else
1364 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1365 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1367 if (nested_in_vect_loop_p (loop, stmt))
1368 loop = loop->inner;
1369 pe = loop_preheader_edge (loop);
1370 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1371 gcc_assert (!new_bb);
1374 if (vect_print_dump_info (REPORT_DETAILS))
1376 fprintf (vect_dump, "created new init_stmt: ");
1377 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1380 vec_oprnd = gimple_assign_lhs (init_stmt);
1381 return vec_oprnd;
1385 /* For constant and loop invariant defs of SLP_NODE this function returns
1386 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1387 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1388 stmts. */
1390 static void
1391 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1392 unsigned int op_num)
1394 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1395 gimple stmt = VEC_index (gimple, stmts, 0);
1396 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1397 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1398 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1399 tree vec_cst;
1400 tree t = NULL_TREE;
1401 int j, number_of_places_left_in_vector;
1402 tree vector_type;
1403 tree op, vop;
1404 int group_size = VEC_length (gimple, stmts);
1405 unsigned int vec_num, i;
1406 int number_of_copies = 1;
1407 bool is_store = false;
1408 unsigned int number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1409 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1410 bool constant_p;
1412 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1413 is_store = true;
1415 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1416 created vectors. It is greater than 1 if unrolling is performed.
1418 For example, we have two scalar operands, s1 and s2 (e.g., group of
1419 strided accesses of size two), while NUNITS is four (i.e., four scalars
1420 of this type can be packed in a vector). The output vector will contain
1421 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1422 will be 2).
1424 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1425 containing the operands.
1427 For example, NUNITS is four as before, and the group size is 8
1428 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1429 {s5, s6, s7, s8}. */
1431 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1433 number_of_places_left_in_vector = nunits;
1434 constant_p = true;
1435 for (j = 0; j < number_of_copies; j++)
1437 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1439 if (is_store)
1440 op = gimple_assign_rhs1 (stmt);
1441 else
1442 op = gimple_op (stmt, op_num + 1);
1443 if (!CONSTANT_CLASS_P (op))
1444 constant_p = false;
1446 /* Create 'vect_ = {op0,op1,...,opn}'. */
1447 t = tree_cons (NULL_TREE, op, t);
1449 number_of_places_left_in_vector--;
1451 if (number_of_places_left_in_vector == 0)
1453 number_of_places_left_in_vector = nunits;
1455 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1456 gcc_assert (vector_type);
1457 if (constant_p)
1458 vec_cst = build_vector (vector_type, t);
1459 else
1460 vec_cst = build_constructor_from_list (vector_type, t);
1461 constant_p = true;
1462 VEC_quick_push (tree, voprnds,
1463 vect_init_vector (stmt, vec_cst, vector_type,
1464 NULL));
1465 t = NULL_TREE;
1470 /* Since the vectors are created in the reverse order, we should invert
1471 them. */
1472 vec_num = VEC_length (tree, voprnds);
1473 for (j = vec_num - 1; j >= 0; j--)
1475 vop = VEC_index (tree, voprnds, j);
1476 VEC_quick_push (tree, *vec_oprnds, vop);
1479 VEC_free (tree, heap, voprnds);
1481 /* In case that VF is greater than the unrolling factor needed for the SLP
1482 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1483 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1484 to replicate the vectors. */
1485 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1487 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1488 VEC_quick_push (tree, *vec_oprnds, vop);
1493 /* Get vectorized definitions from SLP_NODE that contains corresponding
1494 vectorized def-stmts. */
1496 static void
1497 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1499 tree vec_oprnd;
1500 gimple vec_def_stmt;
1501 unsigned int i;
1503 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1505 for (i = 0;
1506 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1507 i++)
1509 gcc_assert (vec_def_stmt);
1510 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1511 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1516 /* Get vectorized definitions for SLP_NODE.
1517 If the scalar definitions are loop invariants or constants, collect them and
1518 call vect_get_constant_vectors() to create vector stmts.
1519 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1520 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1521 vect_get_slp_vect_defs() to retrieve them.
1522 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1523 the right node. This is used when the second operand must remain scalar. */
1525 static void
1526 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1527 VEC (tree,heap) **vec_oprnds1)
1529 gimple first_stmt;
1530 enum tree_code code;
1531 int number_of_vects;
1533 /* The number of vector defs is determined by the number of vector statements
1534 in the node from which we get those statements. */
1535 if (SLP_TREE_LEFT (slp_node))
1536 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1537 else
1538 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1540 /* Allocate memory for vectorized defs. */
1541 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1543 /* SLP_NODE corresponds either to a group of stores or to a group of
1544 unary/binary operations. We don't call this function for loads. */
1545 if (SLP_TREE_LEFT (slp_node))
1546 /* The defs are already vectorized. */
1547 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1548 else
1549 /* Build vectors from scalar defs. */
1550 vect_get_constant_vectors (slp_node, vec_oprnds0, 0);
1552 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1553 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1554 /* Since we don't call this function with loads, this is a group of
1555 stores. */
1556 return;
1558 code = gimple_assign_rhs_code (first_stmt);
1559 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1560 return;
1562 /* The number of vector defs is determined by the number of vector statements
1563 in the node from which we get those statements. */
1564 if (SLP_TREE_RIGHT (slp_node))
1565 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1566 else
1567 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1569 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1571 if (SLP_TREE_RIGHT (slp_node))
1572 /* The defs are already vectorized. */
1573 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1574 else
1575 /* Build vectors from scalar defs. */
1576 vect_get_constant_vectors (slp_node, vec_oprnds1, 1);
1580 /* Function get_initial_def_for_induction
1582 Input:
1583 STMT - a stmt that performs an induction operation in the loop.
1584 IV_PHI - the initial value of the induction variable
1586 Output:
1587 Return a vector variable, initialized with the first VF values of
1588 the induction variable. E.g., for an iv with IV_PHI='X' and
1589 evolution S, for a vector of 4 units, we want to return:
1590 [X, X + S, X + 2*S, X + 3*S]. */
1592 static tree
1593 get_initial_def_for_induction (gimple iv_phi)
1595 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
1596 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1597 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1598 tree scalar_type = TREE_TYPE (gimple_phi_result (iv_phi));
1599 tree vectype;
1600 int nunits;
1601 edge pe = loop_preheader_edge (loop);
1602 struct loop *iv_loop;
1603 basic_block new_bb;
1604 tree vec, vec_init, vec_step, t;
1605 tree access_fn;
1606 tree new_var;
1607 tree new_name;
1608 gimple init_stmt, induction_phi, new_stmt;
1609 tree induc_def, vec_def, vec_dest;
1610 tree init_expr, step_expr;
1611 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1612 int i;
1613 bool ok;
1614 int ncopies;
1615 tree expr;
1616 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
1617 bool nested_in_vect_loop = false;
1618 gimple_seq stmts = NULL;
1619 imm_use_iterator imm_iter;
1620 use_operand_p use_p;
1621 gimple exit_phi;
1622 edge latch_e;
1623 tree loop_arg;
1624 gimple_stmt_iterator si;
1625 basic_block bb = gimple_bb (iv_phi);
1627 vectype = get_vectype_for_scalar_type (scalar_type);
1628 gcc_assert (vectype);
1629 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1630 ncopies = vf / nunits;
1632 gcc_assert (phi_info);
1633 gcc_assert (ncopies >= 1);
1635 /* Find the first insertion point in the BB. */
1636 si = gsi_after_labels (bb);
1638 if (INTEGRAL_TYPE_P (scalar_type) || POINTER_TYPE_P (scalar_type))
1639 step_expr = build_int_cst (scalar_type, 0);
1640 else
1641 step_expr = build_real (scalar_type, dconst0);
1643 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
1644 if (nested_in_vect_loop_p (loop, iv_phi))
1646 nested_in_vect_loop = true;
1647 iv_loop = loop->inner;
1649 else
1650 iv_loop = loop;
1651 gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
1653 latch_e = loop_latch_edge (iv_loop);
1654 loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
1656 access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
1657 gcc_assert (access_fn);
1658 ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
1659 &init_expr, &step_expr);
1660 gcc_assert (ok);
1661 pe = loop_preheader_edge (iv_loop);
1663 /* Create the vector that holds the initial_value of the induction. */
1664 if (nested_in_vect_loop)
1666 /* iv_loop is nested in the loop to be vectorized. init_expr had already
1667 been created during vectorization of previous stmts; We obtain it from
1668 the STMT_VINFO_VEC_STMT of the defining stmt. */
1669 tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, loop_preheader_edge (iv_loop));
1670 vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
1672 else
1674 /* iv_loop is the loop to be vectorized. Create:
1675 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
1676 new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
1677 add_referenced_var (new_var);
1679 new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
1680 if (stmts)
1682 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1683 gcc_assert (!new_bb);
1686 t = NULL_TREE;
1687 t = tree_cons (NULL_TREE, init_expr, t);
1688 for (i = 1; i < nunits; i++)
1690 /* Create: new_name_i = new_name + step_expr */
1691 enum tree_code code = POINTER_TYPE_P (scalar_type)
1692 ? POINTER_PLUS_EXPR : PLUS_EXPR;
1693 init_stmt = gimple_build_assign_with_ops (code, new_var,
1694 new_name, step_expr);
1695 new_name = make_ssa_name (new_var, init_stmt);
1696 gimple_assign_set_lhs (init_stmt, new_name);
1698 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1699 gcc_assert (!new_bb);
1701 if (vect_print_dump_info (REPORT_DETAILS))
1703 fprintf (vect_dump, "created new init_stmt: ");
1704 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1706 t = tree_cons (NULL_TREE, new_name, t);
1708 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
1709 vec = build_constructor_from_list (vectype, nreverse (t));
1710 vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
1714 /* Create the vector that holds the step of the induction. */
1715 if (nested_in_vect_loop)
1716 /* iv_loop is nested in the loop to be vectorized. Generate:
1717 vec_step = [S, S, S, S] */
1718 new_name = step_expr;
1719 else
1721 /* iv_loop is the loop to be vectorized. Generate:
1722 vec_step = [VF*S, VF*S, VF*S, VF*S] */
1723 expr = build_int_cst (scalar_type, vf);
1724 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1727 t = NULL_TREE;
1728 for (i = 0; i < nunits; i++)
1729 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1730 gcc_assert (CONSTANT_CLASS_P (new_name));
1731 vec = build_vector (vectype, t);
1732 vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
1735 /* Create the following def-use cycle:
1736 loop prolog:
1737 vec_init = ...
1738 vec_step = ...
1739 loop:
1740 vec_iv = PHI <vec_init, vec_loop>
1742 STMT
1744 vec_loop = vec_iv + vec_step; */
1746 /* Create the induction-phi that defines the induction-operand. */
1747 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
1748 add_referenced_var (vec_dest);
1749 induction_phi = create_phi_node (vec_dest, iv_loop->header);
1750 set_vinfo_for_stmt (induction_phi,
1751 new_stmt_vec_info (induction_phi, loop_vinfo));
1752 induc_def = PHI_RESULT (induction_phi);
1754 /* Create the iv update inside the loop */
1755 new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
1756 induc_def, vec_step);
1757 vec_def = make_ssa_name (vec_dest, new_stmt);
1758 gimple_assign_set_lhs (new_stmt, vec_def);
1759 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
1760 set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo));
1762 /* Set the arguments of the phi node: */
1763 add_phi_arg (induction_phi, vec_init, pe);
1764 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop));
1767 /* In case that vectorization factor (VF) is bigger than the number
1768 of elements that we can fit in a vectype (nunits), we have to generate
1769 more than one vector stmt - i.e - we need to "unroll" the
1770 vector stmt by a factor VF/nunits. For more details see documentation
1771 in vectorizable_operation. */
1773 if (ncopies > 1)
1775 stmt_vec_info prev_stmt_vinfo;
1776 /* FORNOW. This restriction should be relaxed. */
1777 gcc_assert (!nested_in_vect_loop);
1779 /* Create the vector that holds the step of the induction. */
1780 expr = build_int_cst (scalar_type, nunits);
1781 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1782 t = NULL_TREE;
1783 for (i = 0; i < nunits; i++)
1784 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1785 gcc_assert (CONSTANT_CLASS_P (new_name));
1786 vec = build_vector (vectype, t);
1787 vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
1789 vec_def = induc_def;
1790 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
1791 for (i = 1; i < ncopies; i++)
1793 /* vec_i = vec_prev + vec_step */
1794 new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
1795 vec_def, vec_step);
1796 vec_def = make_ssa_name (vec_dest, new_stmt);
1797 gimple_assign_set_lhs (new_stmt, vec_def);
1799 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
1800 set_vinfo_for_stmt (new_stmt,
1801 new_stmt_vec_info (new_stmt, loop_vinfo));
1802 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
1803 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
1807 if (nested_in_vect_loop)
1809 /* Find the loop-closed exit-phi of the induction, and record
1810 the final vector of induction results: */
1811 exit_phi = NULL;
1812 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
1814 if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (USE_STMT (use_p))))
1816 exit_phi = USE_STMT (use_p);
1817 break;
1820 if (exit_phi)
1822 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
1823 /* FORNOW. Currently not supporting the case that an inner-loop induction
1824 is not used in the outer-loop (i.e. only outside the outer-loop). */
1825 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
1826 && !STMT_VINFO_LIVE_P (stmt_vinfo));
1828 STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
1829 if (vect_print_dump_info (REPORT_DETAILS))
1831 fprintf (vect_dump, "vector of inductions after inner-loop:");
1832 print_gimple_stmt (vect_dump, new_stmt, 0, TDF_SLIM);
1838 if (vect_print_dump_info (REPORT_DETAILS))
1840 fprintf (vect_dump, "transform induction: created def-use cycle: ");
1841 print_gimple_stmt (vect_dump, induction_phi, 0, TDF_SLIM);
1842 fprintf (vect_dump, "\n");
1843 print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vec_def), 0, TDF_SLIM);
1846 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
1847 return induc_def;
1851 /* Function vect_get_vec_def_for_operand.
1853 OP is an operand in STMT. This function returns a (vector) def that will be
1854 used in the vectorized stmt for STMT.
1856 In the case that OP is an SSA_NAME which is defined in the loop, then
1857 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1859 In case OP is an invariant or constant, a new stmt that creates a vector def
1860 needs to be introduced. */
1862 static tree
1863 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1865 tree vec_oprnd;
1866 gimple vec_stmt;
1867 gimple def_stmt;
1868 stmt_vec_info def_stmt_info = NULL;
1869 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1870 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1871 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1872 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1873 tree vec_inv;
1874 tree vec_cst;
1875 tree t = NULL_TREE;
1876 tree def;
1877 int i;
1878 enum vect_def_type dt;
1879 bool is_simple_use;
1880 tree vector_type;
1882 if (vect_print_dump_info (REPORT_DETAILS))
1884 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1885 print_generic_expr (vect_dump, op, TDF_SLIM);
1888 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1889 gcc_assert (is_simple_use);
1890 if (vect_print_dump_info (REPORT_DETAILS))
1892 if (def)
1894 fprintf (vect_dump, "def = ");
1895 print_generic_expr (vect_dump, def, TDF_SLIM);
1897 if (def_stmt)
1899 fprintf (vect_dump, " def_stmt = ");
1900 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1904 switch (dt)
1906 /* Case 1: operand is a constant. */
1907 case vect_constant_def:
1909 if (scalar_def)
1910 *scalar_def = op;
1912 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1913 if (vect_print_dump_info (REPORT_DETAILS))
1914 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1916 for (i = nunits - 1; i >= 0; --i)
1918 t = tree_cons (NULL_TREE, op, t);
1920 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1921 gcc_assert (vector_type);
1922 vec_cst = build_vector (vector_type, t);
1924 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1927 /* Case 2: operand is defined outside the loop - loop invariant. */
1928 case vect_invariant_def:
1930 if (scalar_def)
1931 *scalar_def = def;
1933 /* Create 'vec_inv = {inv,inv,..,inv}' */
1934 if (vect_print_dump_info (REPORT_DETAILS))
1935 fprintf (vect_dump, "Create vector_inv.");
1937 for (i = nunits - 1; i >= 0; --i)
1939 t = tree_cons (NULL_TREE, def, t);
1942 /* FIXME: use build_constructor directly. */
1943 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1944 gcc_assert (vector_type);
1945 vec_inv = build_constructor_from_list (vector_type, t);
1946 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1949 /* Case 3: operand is defined inside the loop. */
1950 case vect_loop_def:
1952 if (scalar_def)
1953 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1955 /* Get the def from the vectorized stmt. */
1956 def_stmt_info = vinfo_for_stmt (def_stmt);
1957 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1958 gcc_assert (vec_stmt);
1959 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1960 vec_oprnd = PHI_RESULT (vec_stmt);
1961 else if (is_gimple_call (vec_stmt))
1962 vec_oprnd = gimple_call_lhs (vec_stmt);
1963 else
1964 vec_oprnd = gimple_assign_lhs (vec_stmt);
1965 return vec_oprnd;
1968 /* Case 4: operand is defined by a loop header phi - reduction */
1969 case vect_reduction_def:
1971 struct loop *loop;
1973 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1974 loop = (gimple_bb (def_stmt))->loop_father;
1976 /* Get the def before the loop */
1977 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1978 return get_initial_def_for_reduction (stmt, op, scalar_def);
1981 /* Case 5: operand is defined by loop-header phi - induction. */
1982 case vect_induction_def:
1984 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1986 /* Get the def from the vectorized stmt. */
1987 def_stmt_info = vinfo_for_stmt (def_stmt);
1988 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1989 gcc_assert (vec_stmt && gimple_code (vec_stmt) == GIMPLE_PHI);
1990 vec_oprnd = PHI_RESULT (vec_stmt);
1991 return vec_oprnd;
1994 default:
1995 gcc_unreachable ();
2000 /* Function vect_get_vec_def_for_stmt_copy
2002 Return a vector-def for an operand. This function is used when the
2003 vectorized stmt to be created (by the caller to this function) is a "copy"
2004 created in case the vectorized result cannot fit in one vector, and several
2005 copies of the vector-stmt are required. In this case the vector-def is
2006 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
2007 of the stmt that defines VEC_OPRND.
2008 DT is the type of the vector def VEC_OPRND.
2010 Context:
2011 In case the vectorization factor (VF) is bigger than the number
2012 of elements that can fit in a vectype (nunits), we have to generate
2013 more than one vector stmt to vectorize the scalar stmt. This situation
2014 arises when there are multiple data-types operated upon in the loop; the
2015 smallest data-type determines the VF, and as a result, when vectorizing
2016 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
2017 vector stmt (each computing a vector of 'nunits' results, and together
2018 computing 'VF' results in each iteration). This function is called when
2019 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
2020 which VF=16 and nunits=4, so the number of copies required is 4):
2022 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
2024 S1: x = load VS1.0: vx.0 = memref0 VS1.1
2025 VS1.1: vx.1 = memref1 VS1.2
2026 VS1.2: vx.2 = memref2 VS1.3
2027 VS1.3: vx.3 = memref3
2029 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
2030 VSnew.1: vz1 = vx.1 + ... VSnew.2
2031 VSnew.2: vz2 = vx.2 + ... VSnew.3
2032 VSnew.3: vz3 = vx.3 + ...
2034 The vectorization of S1 is explained in vectorizable_load.
2035 The vectorization of S2:
2036 To create the first vector-stmt out of the 4 copies - VSnew.0 -
2037 the function 'vect_get_vec_def_for_operand' is called to
2038 get the relevant vector-def for each operand of S2. For operand x it
2039 returns the vector-def 'vx.0'.
2041 To create the remaining copies of the vector-stmt (VSnew.j), this
2042 function is called to get the relevant vector-def for each operand. It is
2043 obtained from the respective VS1.j stmt, which is recorded in the
2044 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
2046 For example, to obtain the vector-def 'vx.1' in order to create the
2047 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
2048 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
2049 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
2050 and return its def ('vx.1').
2051 Overall, to create the above sequence this function will be called 3 times:
2052 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
2053 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
2054 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
2056 static tree
2057 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
2059 gimple vec_stmt_for_operand;
2060 stmt_vec_info def_stmt_info;
2062 /* Do nothing; can reuse same def. */
2063 if (dt == vect_invariant_def || dt == vect_constant_def )
2064 return vec_oprnd;
2066 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
2067 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
2068 gcc_assert (def_stmt_info);
2069 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
2070 gcc_assert (vec_stmt_for_operand);
2071 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
2072 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
2073 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
2074 else
2075 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
2076 return vec_oprnd;
2080 /* Get vectorized definitions for the operands to create a copy of an original
2081 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
2083 static void
2084 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
2085 VEC(tree,heap) **vec_oprnds0,
2086 VEC(tree,heap) **vec_oprnds1)
2088 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
2090 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
2091 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
2093 if (vec_oprnds1 && *vec_oprnds1)
2095 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
2096 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
2097 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
2102 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
2104 static void
2105 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
2106 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
2107 slp_tree slp_node)
2109 if (slp_node)
2110 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
2111 else
2113 tree vec_oprnd;
2115 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
2116 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
2117 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
2119 if (op1)
2121 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
2122 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
2123 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
2129 /* Function vect_finish_stmt_generation.
2131 Insert a new stmt. */
2133 static void
2134 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
2135 gimple_stmt_iterator *gsi)
2137 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2138 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2140 gcc_assert (stmt == gsi_stmt (*gsi));
2141 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
2143 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
2145 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo));
2147 if (vect_print_dump_info (REPORT_DETAILS))
2149 fprintf (vect_dump, "add new stmt: ");
2150 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
2153 /* Make sure gsi points to the stmt that is being vectorized. */
2154 gcc_assert (stmt == gsi_stmt (*gsi));
2156 gimple_set_location (vec_stmt, gimple_location (stmt));
2160 /* Function get_initial_def_for_reduction
2162 Input:
2163 STMT - a stmt that performs a reduction operation in the loop.
2164 INIT_VAL - the initial value of the reduction variable
2166 Output:
2167 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
2168 of the reduction (used for adjusting the epilog - see below).
2169 Return a vector variable, initialized according to the operation that STMT
2170 performs. This vector will be used as the initial value of the
2171 vector of partial results.
2173 Option1 (adjust in epilog): Initialize the vector as follows:
2174 add: [0,0,...,0,0]
2175 mult: [1,1,...,1,1]
2176 min/max: [init_val,init_val,..,init_val,init_val]
2177 bit and/or: [init_val,init_val,..,init_val,init_val]
2178 and when necessary (e.g. add/mult case) let the caller know
2179 that it needs to adjust the result by init_val.
2181 Option2: Initialize the vector as follows:
2182 add: [0,0,...,0,init_val]
2183 mult: [1,1,...,1,init_val]
2184 min/max: [init_val,init_val,...,init_val]
2185 bit and/or: [init_val,init_val,...,init_val]
2186 and no adjustments are needed.
2188 For example, for the following code:
2190 s = init_val;
2191 for (i=0;i<n;i++)
2192 s = s + a[i];
2194 STMT is 's = s + a[i]', and the reduction variable is 's'.
2195 For a vector of 4 units, we want to return either [0,0,0,init_val],
2196 or [0,0,0,0] and let the caller know that it needs to adjust
2197 the result at the end by 'init_val'.
2199 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
2200 initialization vector is simpler (same element in all entries).
2201 A cost model should help decide between these two schemes. */
2203 static tree
2204 get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def)
2206 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2207 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2208 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2209 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2210 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2211 enum tree_code code = gimple_assign_rhs_code (stmt);
2212 tree type = TREE_TYPE (init_val);
2213 tree vecdef;
2214 tree def_for_init;
2215 tree init_def;
2216 tree t = NULL_TREE;
2217 int i;
2218 tree vector_type;
2219 bool nested_in_vect_loop = false;
2221 gcc_assert (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
2222 if (nested_in_vect_loop_p (loop, stmt))
2223 nested_in_vect_loop = true;
2224 else
2225 gcc_assert (loop == (gimple_bb (stmt))->loop_father);
2227 vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
2229 switch (code)
2231 case WIDEN_SUM_EXPR:
2232 case DOT_PROD_EXPR:
2233 case PLUS_EXPR:
2234 if (nested_in_vect_loop)
2235 *adjustment_def = vecdef;
2236 else
2237 *adjustment_def = init_val;
2238 /* Create a vector of zeros for init_def. */
2239 if (SCALAR_FLOAT_TYPE_P (type))
2240 def_for_init = build_real (type, dconst0);
2241 else
2242 def_for_init = build_int_cst (type, 0);
2243 for (i = nunits - 1; i >= 0; --i)
2244 t = tree_cons (NULL_TREE, def_for_init, t);
2245 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def_for_init));
2246 gcc_assert (vector_type);
2247 init_def = build_vector (vector_type, t);
2248 break;
2250 case MIN_EXPR:
2251 case MAX_EXPR:
2252 *adjustment_def = NULL_TREE;
2253 init_def = vecdef;
2254 break;
2256 default:
2257 gcc_unreachable ();
2260 return init_def;
2264 /* Function vect_create_epilog_for_reduction
2266 Create code at the loop-epilog to finalize the result of a reduction
2267 computation.
2269 VECT_DEF is a vector of partial results.
2270 REDUC_CODE is the tree-code for the epilog reduction.
2271 NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
2272 number of elements that we can fit in a vectype (nunits). In this case
2273 we have to generate more than one vector stmt - i.e - we need to "unroll"
2274 the vector stmt by a factor VF/nunits. For more details see documentation
2275 in vectorizable_operation.
2276 STMT is the scalar reduction stmt that is being vectorized.
2277 REDUCTION_PHI is the phi-node that carries the reduction computation.
2279 This function:
2280 1. Creates the reduction def-use cycle: sets the arguments for
2281 REDUCTION_PHI:
2282 The loop-entry argument is the vectorized initial-value of the reduction.
2283 The loop-latch argument is VECT_DEF - the vector of partial sums.
2284 2. "Reduces" the vector of partial results VECT_DEF into a single result,
2285 by applying the operation specified by REDUC_CODE if available, or by
2286 other means (whole-vector shifts or a scalar loop).
2287 The function also creates a new phi node at the loop exit to preserve
2288 loop-closed form, as illustrated below.
2290 The flow at the entry to this function:
2292 loop:
2293 vec_def = phi <null, null> # REDUCTION_PHI
2294 VECT_DEF = vector_stmt # vectorized form of STMT
2295 s_loop = scalar_stmt # (scalar) STMT
2296 loop_exit:
2297 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2298 use <s_out0>
2299 use <s_out0>
2301 The above is transformed by this function into:
2303 loop:
2304 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
2305 VECT_DEF = vector_stmt # vectorized form of STMT
2306 s_loop = scalar_stmt # (scalar) STMT
2307 loop_exit:
2308 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2309 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2310 v_out2 = reduce <v_out1>
2311 s_out3 = extract_field <v_out2, 0>
2312 s_out4 = adjust_result <s_out3>
2313 use <s_out4>
2314 use <s_out4>
2317 static void
2318 vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
2319 int ncopies,
2320 enum tree_code reduc_code,
2321 gimple reduction_phi)
2323 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2324 stmt_vec_info prev_phi_info;
2325 tree vectype;
2326 enum machine_mode mode;
2327 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2328 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2329 basic_block exit_bb;
2330 tree scalar_dest;
2331 tree scalar_type;
2332 gimple new_phi = NULL, phi;
2333 gimple_stmt_iterator exit_gsi;
2334 tree vec_dest;
2335 tree new_temp = NULL_TREE;
2336 tree new_name;
2337 gimple epilog_stmt = NULL;
2338 tree new_scalar_dest, new_dest;
2339 gimple exit_phi;
2340 tree bitsize, bitpos, bytesize;
2341 enum tree_code code = gimple_assign_rhs_code (stmt);
2342 tree adjustment_def;
2343 tree vec_initial_def, def;
2344 tree orig_name;
2345 imm_use_iterator imm_iter;
2346 use_operand_p use_p;
2347 bool extract_scalar_result = false;
2348 tree reduction_op, expr;
2349 gimple orig_stmt;
2350 gimple use_stmt;
2351 bool nested_in_vect_loop = false;
2352 VEC(gimple,heap) *phis = NULL;
2353 enum vect_def_type dt = vect_unknown_def_type;
2354 int j, i;
2356 if (nested_in_vect_loop_p (loop, stmt))
2358 loop = loop->inner;
2359 nested_in_vect_loop = true;
2362 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
2364 case GIMPLE_SINGLE_RHS:
2365 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
2366 reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
2367 break;
2368 case GIMPLE_UNARY_RHS:
2369 reduction_op = gimple_assign_rhs1 (stmt);
2370 break;
2371 case GIMPLE_BINARY_RHS:
2372 reduction_op = gimple_assign_rhs2 (stmt);
2373 break;
2374 default:
2375 gcc_unreachable ();
2378 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
2379 gcc_assert (vectype);
2380 mode = TYPE_MODE (vectype);
2382 /*** 1. Create the reduction def-use cycle ***/
2384 /* For the case of reduction, vect_get_vec_def_for_operand returns
2385 the scalar def before the loop, that defines the initial value
2386 of the reduction variable. */
2387 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
2388 &adjustment_def);
2390 phi = reduction_phi;
2391 def = vect_def;
2392 for (j = 0; j < ncopies; j++)
2394 /* 1.1 set the loop-entry arg of the reduction-phi: */
2395 add_phi_arg (phi, vec_initial_def, loop_preheader_edge (loop));
2397 /* 1.2 set the loop-latch arg for the reduction-phi: */
2398 if (j > 0)
2399 def = vect_get_vec_def_for_stmt_copy (dt, def);
2400 add_phi_arg (phi, def, loop_latch_edge (loop));
2402 if (vect_print_dump_info (REPORT_DETAILS))
2404 fprintf (vect_dump, "transform reduction: created def-use cycle: ");
2405 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
2406 fprintf (vect_dump, "\n");
2407 print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0, TDF_SLIM);
2410 phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
2413 /*** 2. Create epilog code
2414 The reduction epilog code operates across the elements of the vector
2415 of partial results computed by the vectorized loop.
2416 The reduction epilog code consists of:
2417 step 1: compute the scalar result in a vector (v_out2)
2418 step 2: extract the scalar result (s_out3) from the vector (v_out2)
2419 step 3: adjust the scalar result (s_out3) if needed.
2421 Step 1 can be accomplished using one the following three schemes:
2422 (scheme 1) using reduc_code, if available.
2423 (scheme 2) using whole-vector shifts, if available.
2424 (scheme 3) using a scalar loop. In this case steps 1+2 above are
2425 combined.
2427 The overall epilog code looks like this:
2429 s_out0 = phi <s_loop> # original EXIT_PHI
2430 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2431 v_out2 = reduce <v_out1> # step 1
2432 s_out3 = extract_field <v_out2, 0> # step 2
2433 s_out4 = adjust_result <s_out3> # step 3
2435 (step 3 is optional, and steps 1 and 2 may be combined).
2436 Lastly, the uses of s_out0 are replaced by s_out4.
2438 ***/
2440 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
2441 v_out1 = phi <v_loop> */
2443 exit_bb = single_exit (loop)->dest;
2444 def = vect_def;
2445 prev_phi_info = NULL;
2446 for (j = 0; j < ncopies; j++)
2448 phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
2449 set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo));
2450 if (j == 0)
2451 new_phi = phi;
2452 else
2454 def = vect_get_vec_def_for_stmt_copy (dt, def);
2455 STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
2457 SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
2458 prev_phi_info = vinfo_for_stmt (phi);
2460 exit_gsi = gsi_after_labels (exit_bb);
2462 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
2463 (i.e. when reduc_code is not available) and in the final adjustment
2464 code (if needed). Also get the original scalar reduction variable as
2465 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
2466 represents a reduction pattern), the tree-code and scalar-def are
2467 taken from the original stmt that the pattern-stmt (STMT) replaces.
2468 Otherwise (it is a regular reduction) - the tree-code and scalar-def
2469 are taken from STMT. */
2471 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2472 if (!orig_stmt)
2474 /* Regular reduction */
2475 orig_stmt = stmt;
2477 else
2479 /* Reduction pattern */
2480 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
2481 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
2482 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
2484 code = gimple_assign_rhs_code (orig_stmt);
2485 scalar_dest = gimple_assign_lhs (orig_stmt);
2486 scalar_type = TREE_TYPE (scalar_dest);
2487 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
2488 bitsize = TYPE_SIZE (scalar_type);
2489 bytesize = TYPE_SIZE_UNIT (scalar_type);
2492 /* In case this is a reduction in an inner-loop while vectorizing an outer
2493 loop - we don't need to extract a single scalar result at the end of the
2494 inner-loop. The final vector of partial results will be used in the
2495 vectorized outer-loop, or reduced to a scalar result at the end of the
2496 outer-loop. */
2497 if (nested_in_vect_loop)
2498 goto vect_finalize_reduction;
2500 /* FORNOW */
2501 gcc_assert (ncopies == 1);
2503 /* 2.3 Create the reduction code, using one of the three schemes described
2504 above. */
2506 if (reduc_code < NUM_TREE_CODES)
2508 tree tmp;
2510 /*** Case 1: Create:
2511 v_out2 = reduc_expr <v_out1> */
2513 if (vect_print_dump_info (REPORT_DETAILS))
2514 fprintf (vect_dump, "Reduce using direct vector reduction.");
2516 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2517 tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi));
2518 epilog_stmt = gimple_build_assign (vec_dest, tmp);
2519 new_temp = make_ssa_name (vec_dest, epilog_stmt);
2520 gimple_assign_set_lhs (epilog_stmt, new_temp);
2521 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
2523 extract_scalar_result = true;
2525 else
2527 enum tree_code shift_code = 0;
2528 bool have_whole_vector_shift = true;
2529 int bit_offset;
2530 int element_bitsize = tree_low_cst (bitsize, 1);
2531 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2532 tree vec_temp;
2534 if (optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
2535 shift_code = VEC_RSHIFT_EXPR;
2536 else
2537 have_whole_vector_shift = false;
2539 /* Regardless of whether we have a whole vector shift, if we're
2540 emulating the operation via tree-vect-generic, we don't want
2541 to use it. Only the first round of the reduction is likely
2542 to still be profitable via emulation. */
2543 /* ??? It might be better to emit a reduction tree code here, so that
2544 tree-vect-generic can expand the first round via bit tricks. */
2545 if (!VECTOR_MODE_P (mode))
2546 have_whole_vector_shift = false;
2547 else
2549 optab optab = optab_for_tree_code (code, vectype, optab_default);
2550 if (optab_handler (optab, mode)->insn_code == CODE_FOR_nothing)
2551 have_whole_vector_shift = false;
2554 if (have_whole_vector_shift)
2556 /*** Case 2: Create:
2557 for (offset = VS/2; offset >= element_size; offset/=2)
2559 Create: va' = vec_shift <va, offset>
2560 Create: va = vop <va, va'>
2561 } */
2563 if (vect_print_dump_info (REPORT_DETAILS))
2564 fprintf (vect_dump, "Reduce using vector shifts");
2566 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2567 new_temp = PHI_RESULT (new_phi);
2569 for (bit_offset = vec_size_in_bits/2;
2570 bit_offset >= element_bitsize;
2571 bit_offset /= 2)
2573 tree bitpos = size_int (bit_offset);
2574 epilog_stmt = gimple_build_assign_with_ops (shift_code, vec_dest,
2575 new_temp, bitpos);
2576 new_name = make_ssa_name (vec_dest, epilog_stmt);
2577 gimple_assign_set_lhs (epilog_stmt, new_name);
2578 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
2580 epilog_stmt = gimple_build_assign_with_ops (code, vec_dest,
2581 new_name, new_temp);
2582 new_temp = make_ssa_name (vec_dest, epilog_stmt);
2583 gimple_assign_set_lhs (epilog_stmt, new_temp);
2584 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
2587 extract_scalar_result = true;
2589 else
2591 tree rhs;
2593 /*** Case 3: Create:
2594 s = extract_field <v_out2, 0>
2595 for (offset = element_size;
2596 offset < vector_size;
2597 offset += element_size;)
2599 Create: s' = extract_field <v_out2, offset>
2600 Create: s = op <s, s'>
2601 } */
2603 if (vect_print_dump_info (REPORT_DETAILS))
2604 fprintf (vect_dump, "Reduce using scalar code. ");
2606 vec_temp = PHI_RESULT (new_phi);
2607 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2608 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
2609 bitsize_zero_node);
2610 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
2611 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2612 gimple_assign_set_lhs (epilog_stmt, new_temp);
2613 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
2615 for (bit_offset = element_bitsize;
2616 bit_offset < vec_size_in_bits;
2617 bit_offset += element_bitsize)
2619 tree bitpos = bitsize_int (bit_offset);
2620 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
2621 bitpos);
2623 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
2624 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
2625 gimple_assign_set_lhs (epilog_stmt, new_name);
2626 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
2628 epilog_stmt = gimple_build_assign_with_ops (code,
2629 new_scalar_dest,
2630 new_name, new_temp);
2631 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2632 gimple_assign_set_lhs (epilog_stmt, new_temp);
2633 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
2636 extract_scalar_result = false;
2640 /* 2.4 Extract the final scalar result. Create:
2641 s_out3 = extract_field <v_out2, bitpos> */
2643 if (extract_scalar_result)
2645 tree rhs;
2647 gcc_assert (!nested_in_vect_loop);
2648 if (vect_print_dump_info (REPORT_DETAILS))
2649 fprintf (vect_dump, "extract scalar result");
2651 if (BYTES_BIG_ENDIAN)
2652 bitpos = size_binop (MULT_EXPR,
2653 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
2654 TYPE_SIZE (scalar_type));
2655 else
2656 bitpos = bitsize_zero_node;
2658 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
2659 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
2660 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2661 gimple_assign_set_lhs (epilog_stmt, new_temp);
2662 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
2665 vect_finalize_reduction:
2667 /* 2.5 Adjust the final result by the initial value of the reduction
2668 variable. (When such adjustment is not needed, then
2669 'adjustment_def' is zero). For example, if code is PLUS we create:
2670 new_temp = loop_exit_def + adjustment_def */
2672 if (adjustment_def)
2674 if (nested_in_vect_loop)
2676 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
2677 expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
2678 new_dest = vect_create_destination_var (scalar_dest, vectype);
2680 else
2682 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
2683 expr = build2 (code, scalar_type, new_temp, adjustment_def);
2684 new_dest = vect_create_destination_var (scalar_dest, scalar_type);
2686 epilog_stmt = gimple_build_assign (new_dest, expr);
2687 new_temp = make_ssa_name (new_dest, epilog_stmt);
2688 gimple_assign_set_lhs (epilog_stmt, new_temp);
2689 SSA_NAME_DEF_STMT (new_temp) = epilog_stmt;
2690 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
2694 /* 2.6 Handle the loop-exit phi */
2696 /* Replace uses of s_out0 with uses of s_out3:
2697 Find the loop-closed-use at the loop exit of the original scalar result.
2698 (The reduction result is expected to have two immediate uses - one at the
2699 latch block, and one at the loop exit). */
2700 phis = VEC_alloc (gimple, heap, 10);
2701 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
2703 if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2705 exit_phi = USE_STMT (use_p);
2706 VEC_quick_push (gimple, phis, exit_phi);
2709 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
2710 gcc_assert (!VEC_empty (gimple, phis));
2712 for (i = 0; VEC_iterate (gimple, phis, i, exit_phi); i++)
2714 if (nested_in_vect_loop)
2716 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
2718 /* FORNOW. Currently not supporting the case that an inner-loop
2719 reduction is not used in the outer-loop (but only outside the
2720 outer-loop). */
2721 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
2722 && !STMT_VINFO_LIVE_P (stmt_vinfo));
2724 epilog_stmt = adjustment_def ? epilog_stmt : new_phi;
2725 STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt;
2726 set_vinfo_for_stmt (epilog_stmt,
2727 new_stmt_vec_info (epilog_stmt, loop_vinfo));
2728 if (adjustment_def)
2729 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
2730 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
2731 continue;
2734 /* Replace the uses: */
2735 orig_name = PHI_RESULT (exit_phi);
2736 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
2737 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2738 SET_USE (use_p, new_temp);
2740 VEC_free (gimple, heap, phis);
2744 /* Function vectorizable_reduction.
2746 Check if STMT performs a reduction operation that can be vectorized.
2747 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2748 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2749 Return FALSE if not a vectorizable STMT, TRUE otherwise.
2751 This function also handles reduction idioms (patterns) that have been
2752 recognized in advance during vect_pattern_recog. In this case, STMT may be
2753 of this form:
2754 X = pattern_expr (arg0, arg1, ..., X)
2755 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
2756 sequence that had been detected and replaced by the pattern-stmt (STMT).
2758 In some cases of reduction patterns, the type of the reduction variable X is
2759 different than the type of the other arguments of STMT.
2760 In such cases, the vectype that is used when transforming STMT into a vector
2761 stmt is different than the vectype that is used to determine the
2762 vectorization factor, because it consists of a different number of elements
2763 than the actual number of elements that are being operated upon in parallel.
2765 For example, consider an accumulation of shorts into an int accumulator.
2766 On some targets it's possible to vectorize this pattern operating on 8
2767 shorts at a time (hence, the vectype for purposes of determining the
2768 vectorization factor should be V8HI); on the other hand, the vectype that
2769 is used to create the vector form is actually V4SI (the type of the result).
2771 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
2772 indicates what is the actual level of parallelism (V8HI in the example), so
2773 that the right vectorization factor would be derived. This vectype
2774 corresponds to the type of arguments to the reduction stmt, and should *NOT*
2775 be used to create the vectorized stmt. The right vectype for the vectorized
2776 stmt is obtained from the type of the result X:
2777 get_vectype_for_scalar_type (TREE_TYPE (X))
2779 This means that, contrary to "regular" reductions (or "regular" stmts in
2780 general), the following equation:
2781 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
2782 does *NOT* necessarily hold for reduction patterns. */
2784 bool
2785 vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
2786 gimple *vec_stmt)
2788 tree vec_dest;
2789 tree scalar_dest;
2790 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
2791 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2792 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2793 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2794 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2795 enum tree_code code, orig_code, epilog_reduc_code = 0;
2796 enum machine_mode vec_mode;
2797 int op_type;
2798 optab optab, reduc_optab;
2799 tree new_temp = NULL_TREE;
2800 tree def;
2801 gimple def_stmt;
2802 enum vect_def_type dt;
2803 gimple new_phi = NULL;
2804 tree scalar_type;
2805 bool is_simple_use;
2806 gimple orig_stmt;
2807 stmt_vec_info orig_stmt_info;
2808 tree expr = NULL_TREE;
2809 int i;
2810 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2811 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2812 int epilog_copies;
2813 stmt_vec_info prev_stmt_info, prev_phi_info;
2814 gimple first_phi = NULL;
2815 bool single_defuse_cycle = false;
2816 tree reduc_def;
2817 gimple new_stmt = NULL;
2818 int j;
2819 tree ops[3];
2821 if (nested_in_vect_loop_p (loop, stmt))
2822 loop = loop->inner;
2824 gcc_assert (ncopies >= 1);
2826 /* FORNOW: SLP not supported. */
2827 if (STMT_SLP_TYPE (stmt_info))
2828 return false;
2830 /* 1. Is vectorizable reduction? */
2832 /* Not supportable if the reduction variable is used in the loop. */
2833 if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer)
2834 return false;
2836 /* Reductions that are not used even in an enclosing outer-loop,
2837 are expected to be "live" (used out of the loop). */
2838 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop
2839 && !STMT_VINFO_LIVE_P (stmt_info))
2840 return false;
2842 /* Make sure it was already recognized as a reduction computation. */
2843 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2844 return false;
2846 /* 2. Has this been recognized as a reduction pattern?
2848 Check if STMT represents a pattern that has been recognized
2849 in earlier analysis stages. For stmts that represent a pattern,
2850 the STMT_VINFO_RELATED_STMT field records the last stmt in
2851 the original sequence that constitutes the pattern. */
2853 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2854 if (orig_stmt)
2856 orig_stmt_info = vinfo_for_stmt (orig_stmt);
2857 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
2858 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
2859 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
2862 /* 3. Check the operands of the operation. The first operands are defined
2863 inside the loop body. The last operand is the reduction variable,
2864 which is defined by the loop-header-phi. */
2866 gcc_assert (is_gimple_assign (stmt));
2868 /* Flatten RHS */
2869 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
2871 case GIMPLE_SINGLE_RHS:
2872 op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
2873 if (op_type == ternary_op)
2875 tree rhs = gimple_assign_rhs1 (stmt);
2876 ops[0] = TREE_OPERAND (rhs, 0);
2877 ops[1] = TREE_OPERAND (rhs, 1);
2878 ops[2] = TREE_OPERAND (rhs, 2);
2879 code = TREE_CODE (rhs);
2881 else
2882 return false;
2883 break;
2885 case GIMPLE_BINARY_RHS:
2886 code = gimple_assign_rhs_code (stmt);
2887 op_type = TREE_CODE_LENGTH (code);
2888 gcc_assert (op_type == binary_op);
2889 ops[0] = gimple_assign_rhs1 (stmt);
2890 ops[1] = gimple_assign_rhs2 (stmt);
2891 break;
2893 case GIMPLE_UNARY_RHS:
2894 return false;
2896 default:
2897 gcc_unreachable ();
2900 scalar_dest = gimple_assign_lhs (stmt);
2901 scalar_type = TREE_TYPE (scalar_dest);
2902 if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
2903 && !SCALAR_FLOAT_TYPE_P (scalar_type))
2904 return false;
2906 /* All uses but the last are expected to be defined in the loop.
2907 The last use is the reduction variable. */
2908 for (i = 0; i < op_type-1; i++)
2910 is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &def_stmt,
2911 &def, &dt);
2912 gcc_assert (is_simple_use);
2913 if (dt != vect_loop_def
2914 && dt != vect_invariant_def
2915 && dt != vect_constant_def
2916 && dt != vect_induction_def)
2917 return false;
2920 is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &def_stmt, &def, &dt);
2921 gcc_assert (is_simple_use);
2922 gcc_assert (dt == vect_reduction_def);
2923 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
2924 if (orig_stmt)
2925 gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
2926 else
2927 gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
2929 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
2930 return false;
2932 /* 4. Supportable by target? */
2934 /* 4.1. check support for the operation in the loop */
2935 optab = optab_for_tree_code (code, vectype, optab_default);
2936 if (!optab)
2938 if (vect_print_dump_info (REPORT_DETAILS))
2939 fprintf (vect_dump, "no optab.");
2940 return false;
2942 vec_mode = TYPE_MODE (vectype);
2943 if (optab_handler (optab, vec_mode)->insn_code == CODE_FOR_nothing)
2945 if (vect_print_dump_info (REPORT_DETAILS))
2946 fprintf (vect_dump, "op not supported by target.");
2947 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2948 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2949 < vect_min_worthwhile_factor (code))
2950 return false;
2951 if (vect_print_dump_info (REPORT_DETAILS))
2952 fprintf (vect_dump, "proceeding using word mode.");
2955 /* Worthwhile without SIMD support? */
2956 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2957 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2958 < vect_min_worthwhile_factor (code))
2960 if (vect_print_dump_info (REPORT_DETAILS))
2961 fprintf (vect_dump, "not worthwhile without SIMD support.");
2962 return false;
2965 /* 4.2. Check support for the epilog operation.
2967 If STMT represents a reduction pattern, then the type of the
2968 reduction variable may be different than the type of the rest
2969 of the arguments. For example, consider the case of accumulation
2970 of shorts into an int accumulator; The original code:
2971 S1: int_a = (int) short_a;
2972 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
2974 was replaced with:
2975 STMT: int_acc = widen_sum <short_a, int_acc>
2977 This means that:
2978 1. The tree-code that is used to create the vector operation in the
2979 epilog code (that reduces the partial results) is not the
2980 tree-code of STMT, but is rather the tree-code of the original
2981 stmt from the pattern that STMT is replacing. I.e, in the example
2982 above we want to use 'widen_sum' in the loop, but 'plus' in the
2983 epilog.
2984 2. The type (mode) we use to check available target support
2985 for the vector operation to be created in the *epilog*, is
2986 determined by the type of the reduction variable (in the example
2987 above we'd check this: plus_optab[vect_int_mode]).
2988 However the type (mode) we use to check available target support
2989 for the vector operation to be created *inside the loop*, is
2990 determined by the type of the other arguments to STMT (in the
2991 example we'd check this: widen_sum_optab[vect_short_mode]).
2993 This is contrary to "regular" reductions, in which the types of all
2994 the arguments are the same as the type of the reduction variable.
2995 For "regular" reductions we can therefore use the same vector type
2996 (and also the same tree-code) when generating the epilog code and
2997 when generating the code inside the loop. */
2999 if (orig_stmt)
3001 /* This is a reduction pattern: get the vectype from the type of the
3002 reduction variable, and get the tree-code from orig_stmt. */
3003 orig_code = gimple_assign_rhs_code (orig_stmt);
3004 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
3005 if (!vectype)
3007 if (vect_print_dump_info (REPORT_DETAILS))
3009 fprintf (vect_dump, "unsupported data-type ");
3010 print_generic_expr (vect_dump, TREE_TYPE (def), TDF_SLIM);
3012 return false;
3015 vec_mode = TYPE_MODE (vectype);
3017 else
3019 /* Regular reduction: use the same vectype and tree-code as used for
3020 the vector code inside the loop can be used for the epilog code. */
3021 orig_code = code;
3024 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
3025 return false;
3026 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype, optab_default);
3027 if (!reduc_optab)
3029 if (vect_print_dump_info (REPORT_DETAILS))
3030 fprintf (vect_dump, "no optab for reduction.");
3031 epilog_reduc_code = NUM_TREE_CODES;
3033 if (optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing)
3035 if (vect_print_dump_info (REPORT_DETAILS))
3036 fprintf (vect_dump, "reduc op not supported by target.");
3037 epilog_reduc_code = NUM_TREE_CODES;
3040 if (!vec_stmt) /* transformation not required. */
3042 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3043 if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
3044 return false;
3045 return true;
3048 /** Transform. **/
3050 if (vect_print_dump_info (REPORT_DETAILS))
3051 fprintf (vect_dump, "transform reduction.");
3053 /* Create the destination vector */
3054 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3056 /* In case the vectorization factor (VF) is bigger than the number
3057 of elements that we can fit in a vectype (nunits), we have to generate
3058 more than one vector stmt - i.e - we need to "unroll" the
3059 vector stmt by a factor VF/nunits. For more details see documentation
3060 in vectorizable_operation. */
3062 /* If the reduction is used in an outer loop we need to generate
3063 VF intermediate results, like so (e.g. for ncopies=2):
3064 r0 = phi (init, r0)
3065 r1 = phi (init, r1)
3066 r0 = x0 + r0;
3067 r1 = x1 + r1;
3068 (i.e. we generate VF results in 2 registers).
3069 In this case we have a separate def-use cycle for each copy, and therefore
3070 for each copy we get the vector def for the reduction variable from the
3071 respective phi node created for this copy.
3073 Otherwise (the reduction is unused in the loop nest), we can combine
3074 together intermediate results, like so (e.g. for ncopies=2):
3075 r = phi (init, r)
3076 r = x0 + r;
3077 r = x1 + r;
3078 (i.e. we generate VF/2 results in a single register).
3079 In this case for each copy we get the vector def for the reduction variable
3080 from the vectorized reduction operation generated in the previous iteration.
3083 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop)
3085 single_defuse_cycle = true;
3086 epilog_copies = 1;
3088 else
3089 epilog_copies = ncopies;
3091 prev_stmt_info = NULL;
3092 prev_phi_info = NULL;
3093 for (j = 0; j < ncopies; j++)
3095 if (j == 0 || !single_defuse_cycle)
3097 /* Create the reduction-phi that defines the reduction-operand. */
3098 new_phi = create_phi_node (vec_dest, loop->header);
3099 set_vinfo_for_stmt (new_phi, new_stmt_vec_info (new_phi, loop_vinfo));
3102 /* Handle uses. */
3103 if (j == 0)
3105 loop_vec_def0 = vect_get_vec_def_for_operand (ops[0], stmt, NULL);
3106 if (op_type == ternary_op)
3108 loop_vec_def1 = vect_get_vec_def_for_operand (ops[1], stmt, NULL);
3111 /* Get the vector def for the reduction variable from the phi node */
3112 reduc_def = PHI_RESULT (new_phi);
3113 first_phi = new_phi;
3115 else
3117 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
3118 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
3119 if (op_type == ternary_op)
3120 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
3122 if (single_defuse_cycle)
3123 reduc_def = gimple_assign_lhs (new_stmt);
3124 else
3125 reduc_def = PHI_RESULT (new_phi);
3127 STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
3130 /* Arguments are ready. create the new vector stmt. */
3131 if (op_type == binary_op)
3132 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
3133 else
3134 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
3135 reduc_def);
3136 new_stmt = gimple_build_assign (vec_dest, expr);
3137 new_temp = make_ssa_name (vec_dest, new_stmt);
3138 gimple_assign_set_lhs (new_stmt, new_temp);
3139 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3141 if (j == 0)
3142 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3143 else
3144 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3145 prev_stmt_info = vinfo_for_stmt (new_stmt);
3146 prev_phi_info = vinfo_for_stmt (new_phi);
3149 /* Finalize the reduction-phi (set its arguments) and create the
3150 epilog reduction code. */
3151 if (!single_defuse_cycle)
3152 new_temp = gimple_assign_lhs (*vec_stmt);
3153 vect_create_epilog_for_reduction (new_temp, stmt, epilog_copies,
3154 epilog_reduc_code, first_phi);
3155 return true;
3158 /* Checks if CALL can be vectorized in type VECTYPE. Returns
3159 a function declaration if the target has a vectorized version
3160 of the function, or NULL_TREE if the function cannot be vectorized. */
3162 tree
3163 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
3165 tree fndecl = gimple_call_fndecl (call);
3166 enum built_in_function code;
3168 /* We only handle functions that do not read or clobber memory -- i.e.
3169 const or novops ones. */
3170 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
3171 return NULL_TREE;
3173 if (!fndecl
3174 || TREE_CODE (fndecl) != FUNCTION_DECL
3175 || !DECL_BUILT_IN (fndecl))
3176 return NULL_TREE;
3178 code = DECL_FUNCTION_CODE (fndecl);
3179 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
3180 vectype_in);
3183 /* Function vectorizable_call.
3185 Check if STMT performs a function call that can be vectorized.
3186 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3187 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3188 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3190 bool
3191 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
3193 tree vec_dest;
3194 tree scalar_dest;
3195 tree op, type;
3196 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3197 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3198 tree vectype_out, vectype_in;
3199 int nunits_in;
3200 int nunits_out;
3201 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3202 tree fndecl, new_temp, def, rhs_type, lhs_type;
3203 gimple def_stmt;
3204 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3205 gimple new_stmt;
3206 int ncopies, j;
3207 VEC(tree, heap) *vargs = NULL;
3208 enum { NARROW, NONE, WIDEN } modifier;
3209 size_t i, nargs;
3211 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3212 return false;
3214 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3215 return false;
3217 /* FORNOW: SLP not supported. */
3218 if (STMT_SLP_TYPE (stmt_info))
3219 return false;
3221 /* Is STMT a vectorizable call? */
3222 if (!is_gimple_call (stmt))
3223 return false;
3225 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3226 return false;
3228 /* Process function arguments. */
3229 rhs_type = NULL_TREE;
3230 nargs = gimple_call_num_args (stmt);
3232 /* Bail out if the function has more than two arguments, we
3233 do not have interesting builtin functions to vectorize with
3234 more than two arguments. No arguments is also not good. */
3235 if (nargs == 0 || nargs > 2)
3236 return false;
3238 for (i = 0; i < nargs; i++)
3240 op = gimple_call_arg (stmt, i);
3242 /* We can only handle calls with arguments of the same type. */
3243 if (rhs_type
3244 && rhs_type != TREE_TYPE (op))
3246 if (vect_print_dump_info (REPORT_DETAILS))
3247 fprintf (vect_dump, "argument types differ.");
3248 return false;
3250 rhs_type = TREE_TYPE (op);
3252 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[i]))
3254 if (vect_print_dump_info (REPORT_DETAILS))
3255 fprintf (vect_dump, "use not simple.");
3256 return false;
3260 vectype_in = get_vectype_for_scalar_type (rhs_type);
3261 if (!vectype_in)
3262 return false;
3263 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3265 lhs_type = TREE_TYPE (gimple_call_lhs (stmt));
3266 vectype_out = get_vectype_for_scalar_type (lhs_type);
3267 if (!vectype_out)
3268 return false;
3269 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3271 /* FORNOW */
3272 if (nunits_in == nunits_out / 2)
3273 modifier = NARROW;
3274 else if (nunits_out == nunits_in)
3275 modifier = NONE;
3276 else if (nunits_out == nunits_in / 2)
3277 modifier = WIDEN;
3278 else
3279 return false;
3281 /* For now, we only vectorize functions if a target specific builtin
3282 is available. TODO -- in some cases, it might be profitable to
3283 insert the calls for pieces of the vector, in order to be able
3284 to vectorize other operations in the loop. */
3285 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
3286 if (fndecl == NULL_TREE)
3288 if (vect_print_dump_info (REPORT_DETAILS))
3289 fprintf (vect_dump, "function is not vectorizable.");
3291 return false;
3294 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
3296 if (modifier == NARROW)
3297 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3298 else
3299 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3301 /* Sanity check: make sure that at least one copy of the vectorized stmt
3302 needs to be generated. */
3303 gcc_assert (ncopies >= 1);
3305 if (!vec_stmt) /* transformation not required. */
3307 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3308 if (vect_print_dump_info (REPORT_DETAILS))
3309 fprintf (vect_dump, "=== vectorizable_call ===");
3310 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3311 return true;
3314 /** Transform. **/
3316 if (vect_print_dump_info (REPORT_DETAILS))
3317 fprintf (vect_dump, "transform operation.");
3319 /* Handle def. */
3320 scalar_dest = gimple_call_lhs (stmt);
3321 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3323 prev_stmt_info = NULL;
3324 switch (modifier)
3326 case NONE:
3327 for (j = 0; j < ncopies; ++j)
3329 /* Build argument list for the vectorized call. */
3330 if (j == 0)
3331 vargs = VEC_alloc (tree, heap, nargs);
3332 else
3333 VEC_truncate (tree, vargs, 0);
3335 for (i = 0; i < nargs; i++)
3337 op = gimple_call_arg (stmt, i);
3338 if (j == 0)
3339 vec_oprnd0
3340 = vect_get_vec_def_for_operand (op, stmt, NULL);
3341 else
3342 vec_oprnd0
3343 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3345 VEC_quick_push (tree, vargs, vec_oprnd0);
3348 new_stmt = gimple_build_call_vec (fndecl, vargs);
3349 new_temp = make_ssa_name (vec_dest, new_stmt);
3350 gimple_call_set_lhs (new_stmt, new_temp);
3352 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3354 if (j == 0)
3355 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3356 else
3357 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3359 prev_stmt_info = vinfo_for_stmt (new_stmt);
3362 break;
3364 case NARROW:
3365 for (j = 0; j < ncopies; ++j)
3367 /* Build argument list for the vectorized call. */
3368 if (j == 0)
3369 vargs = VEC_alloc (tree, heap, nargs * 2);
3370 else
3371 VEC_truncate (tree, vargs, 0);
3373 for (i = 0; i < nargs; i++)
3375 op = gimple_call_arg (stmt, i);
3376 if (j == 0)
3378 vec_oprnd0
3379 = vect_get_vec_def_for_operand (op, stmt, NULL);
3380 vec_oprnd1
3381 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3383 else
3385 vec_oprnd0
3386 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
3387 vec_oprnd1
3388 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3391 VEC_quick_push (tree, vargs, vec_oprnd0);
3392 VEC_quick_push (tree, vargs, vec_oprnd1);
3395 new_stmt = gimple_build_call_vec (fndecl, vargs);
3396 new_temp = make_ssa_name (vec_dest, new_stmt);
3397 gimple_call_set_lhs (new_stmt, new_temp);
3399 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3401 if (j == 0)
3402 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3403 else
3404 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3406 prev_stmt_info = vinfo_for_stmt (new_stmt);
3409 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3411 break;
3413 case WIDEN:
3414 /* No current target implements this case. */
3415 return false;
3418 VEC_free (tree, heap, vargs);
3420 /* The call in STMT might prevent it from being removed in dce.
3421 We however cannot remove it here, due to the way the ssa name
3422 it defines is mapped to the new definition. So just replace
3423 rhs of the statement with something harmless. */
3425 type = TREE_TYPE (scalar_dest);
3426 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3427 fold_convert (type, integer_zero_node));
3428 set_vinfo_for_stmt (new_stmt, stmt_info);
3429 set_vinfo_for_stmt (stmt, NULL);
3430 STMT_VINFO_STMT (stmt_info) = new_stmt;
3431 gsi_replace (gsi, new_stmt, false);
3432 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3434 return true;
3438 /* Function vect_gen_widened_results_half
3440 Create a vector stmt whose code, type, number of arguments, and result
3441 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3442 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3443 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3444 needs to be created (DECL is a function-decl of a target-builtin).
3445 STMT is the original scalar stmt that we are vectorizing. */
3447 static gimple
3448 vect_gen_widened_results_half (enum tree_code code,
3449 tree decl,
3450 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3451 tree vec_dest, gimple_stmt_iterator *gsi,
3452 gimple stmt)
3454 gimple new_stmt;
3455 tree new_temp;
3456 tree sym;
3457 ssa_op_iter iter;
3459 /* Generate half of the widened result: */
3460 if (code == CALL_EXPR)
3462 /* Target specific support */
3463 if (op_type == binary_op)
3464 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3465 else
3466 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3467 new_temp = make_ssa_name (vec_dest, new_stmt);
3468 gimple_call_set_lhs (new_stmt, new_temp);
3470 else
3472 /* Generic support */
3473 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3474 if (op_type != binary_op)
3475 vec_oprnd1 = NULL;
3476 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
3477 vec_oprnd1);
3478 new_temp = make_ssa_name (vec_dest, new_stmt);
3479 gimple_assign_set_lhs (new_stmt, new_temp);
3481 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3483 if (code == CALL_EXPR)
3485 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
3487 if (TREE_CODE (sym) == SSA_NAME)
3488 sym = SSA_NAME_VAR (sym);
3489 mark_sym_for_renaming (sym);
3493 return new_stmt;
3497 /* Check if STMT performs a conversion operation, that can be vectorized.
3498 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3499 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3500 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3502 bool
3503 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3504 gimple *vec_stmt, slp_tree slp_node)
3506 tree vec_dest;
3507 tree scalar_dest;
3508 tree op0;
3509 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3510 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3511 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3512 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3513 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3514 tree new_temp;
3515 tree def;
3516 gimple def_stmt;
3517 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3518 gimple new_stmt = NULL;
3519 stmt_vec_info prev_stmt_info;
3520 int nunits_in;
3521 int nunits_out;
3522 tree vectype_out, vectype_in;
3523 int ncopies, j;
3524 tree expr;
3525 tree rhs_type, lhs_type;
3526 tree builtin_decl;
3527 enum { NARROW, NONE, WIDEN } modifier;
3528 int i;
3529 VEC(tree,heap) *vec_oprnds0 = NULL;
3530 tree vop0;
3531 tree integral_type;
3532 VEC(tree,heap) *dummy = NULL;
3533 int dummy_int;
3535 /* Is STMT a vectorizable conversion? */
3537 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3538 return false;
3540 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3541 return false;
3543 if (!is_gimple_assign (stmt))
3544 return false;
3546 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3547 return false;
3549 code = gimple_assign_rhs_code (stmt);
3550 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3551 return false;
3553 /* Check types of lhs and rhs. */
3554 op0 = gimple_assign_rhs1 (stmt);
3555 rhs_type = TREE_TYPE (op0);
3556 vectype_in = get_vectype_for_scalar_type (rhs_type);
3557 if (!vectype_in)
3558 return false;
3559 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3561 scalar_dest = gimple_assign_lhs (stmt);
3562 lhs_type = TREE_TYPE (scalar_dest);
3563 vectype_out = get_vectype_for_scalar_type (lhs_type);
3564 if (!vectype_out)
3565 return false;
3566 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3568 /* FORNOW */
3569 if (nunits_in == nunits_out / 2)
3570 modifier = NARROW;
3571 else if (nunits_out == nunits_in)
3572 modifier = NONE;
3573 else if (nunits_out == nunits_in / 2)
3574 modifier = WIDEN;
3575 else
3576 return false;
3578 if (modifier == NONE)
3579 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
3581 /* Bail out if the types are both integral or non-integral. */
3582 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
3583 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
3584 return false;
3586 integral_type = INTEGRAL_TYPE_P (rhs_type) ? vectype_in : vectype_out;
3588 if (modifier == NARROW)
3589 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3590 else
3591 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3593 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3594 this, so we can safely override NCOPIES with 1 here. */
3595 if (slp_node)
3596 ncopies = 1;
3598 /* Sanity check: make sure that at least one copy of the vectorized stmt
3599 needs to be generated. */
3600 gcc_assert (ncopies >= 1);
3602 /* Check the operands of the operation. */
3603 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3605 if (vect_print_dump_info (REPORT_DETAILS))
3606 fprintf (vect_dump, "use not simple.");
3607 return false;
3610 /* Supportable by target? */
3611 if ((modifier == NONE
3612 && !targetm.vectorize.builtin_conversion (code, integral_type))
3613 || (modifier == WIDEN
3614 && !supportable_widening_operation (code, stmt, vectype_in,
3615 &decl1, &decl2,
3616 &code1, &code2,
3617 &dummy_int, &dummy))
3618 || (modifier == NARROW
3619 && !supportable_narrowing_operation (code, stmt, vectype_in,
3620 &code1, &dummy_int, &dummy)))
3622 if (vect_print_dump_info (REPORT_DETAILS))
3623 fprintf (vect_dump, "conversion not supported by target.");
3624 return false;
3627 if (modifier != NONE)
3629 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3630 /* FORNOW: SLP not supported. */
3631 if (STMT_SLP_TYPE (stmt_info))
3632 return false;
3635 if (!vec_stmt) /* transformation not required. */
3637 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3638 return true;
3641 /** Transform. **/
3642 if (vect_print_dump_info (REPORT_DETAILS))
3643 fprintf (vect_dump, "transform conversion.");
3645 /* Handle def. */
3646 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3648 if (modifier == NONE && !slp_node)
3649 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3651 prev_stmt_info = NULL;
3652 switch (modifier)
3654 case NONE:
3655 for (j = 0; j < ncopies; j++)
3657 tree sym;
3658 ssa_op_iter iter;
3660 if (j == 0)
3661 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
3662 else
3663 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3665 builtin_decl =
3666 targetm.vectorize.builtin_conversion (code, integral_type);
3667 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
3669 /* Arguments are ready. create the new vector stmt. */
3670 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
3671 new_temp = make_ssa_name (vec_dest, new_stmt);
3672 gimple_call_set_lhs (new_stmt, new_temp);
3673 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3674 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter,
3675 SSA_OP_ALL_VIRTUALS)
3677 if (TREE_CODE (sym) == SSA_NAME)
3678 sym = SSA_NAME_VAR (sym);
3679 mark_sym_for_renaming (sym);
3681 if (slp_node)
3682 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3685 if (j == 0)
3686 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3687 else
3688 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3689 prev_stmt_info = vinfo_for_stmt (new_stmt);
3691 break;
3693 case WIDEN:
3694 /* In case the vectorization factor (VF) is bigger than the number
3695 of elements that we can fit in a vectype (nunits), we have to
3696 generate more than one vector stmt - i.e - we need to "unroll"
3697 the vector stmt by a factor VF/nunits. */
3698 for (j = 0; j < ncopies; j++)
3700 if (j == 0)
3701 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3702 else
3703 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3705 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3707 /* Generate first half of the widened result: */
3708 new_stmt
3709 = vect_gen_widened_results_half (code1, decl1,
3710 vec_oprnd0, vec_oprnd1,
3711 unary_op, vec_dest, gsi, stmt);
3712 if (j == 0)
3713 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3714 else
3715 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3716 prev_stmt_info = vinfo_for_stmt (new_stmt);
3718 /* Generate second half of the widened result: */
3719 new_stmt
3720 = vect_gen_widened_results_half (code2, decl2,
3721 vec_oprnd0, vec_oprnd1,
3722 unary_op, vec_dest, gsi, stmt);
3723 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3724 prev_stmt_info = vinfo_for_stmt (new_stmt);
3726 break;
3728 case NARROW:
3729 /* In case the vectorization factor (VF) is bigger than the number
3730 of elements that we can fit in a vectype (nunits), we have to
3731 generate more than one vector stmt - i.e - we need to "unroll"
3732 the vector stmt by a factor VF/nunits. */
3733 for (j = 0; j < ncopies; j++)
3735 /* Handle uses. */
3736 if (j == 0)
3738 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3739 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3741 else
3743 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
3744 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3747 /* Arguments are ready. Create the new vector stmt. */
3748 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
3749 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
3750 vec_oprnd1);
3751 new_temp = make_ssa_name (vec_dest, new_stmt);
3752 gimple_assign_set_lhs (new_stmt, new_temp);
3753 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3755 if (j == 0)
3756 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3757 else
3758 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3760 prev_stmt_info = vinfo_for_stmt (new_stmt);
3763 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3766 if (vec_oprnds0)
3767 VEC_free (tree, heap, vec_oprnds0);
3769 return true;
3773 /* Function vectorizable_assignment.
3775 Check if STMT performs an assignment (copy) that can be vectorized.
3776 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3777 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3778 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3780 bool
3781 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
3782 gimple *vec_stmt, slp_tree slp_node)
3784 tree vec_dest;
3785 tree scalar_dest;
3786 tree op;
3787 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3788 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3789 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3790 tree new_temp;
3791 tree def;
3792 gimple def_stmt;
3793 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3794 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3795 int ncopies;
3796 int i;
3797 VEC(tree,heap) *vec_oprnds = NULL;
3798 tree vop;
3800 /* Multiple types in SLP are handled by creating the appropriate number of
3801 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3802 case of SLP. */
3803 if (slp_node)
3804 ncopies = 1;
3805 else
3806 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3808 gcc_assert (ncopies >= 1);
3809 if (ncopies > 1)
3810 return false; /* FORNOW */
3812 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3813 return false;
3815 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3816 return false;
3818 /* Is vectorizable assignment? */
3819 if (!is_gimple_assign (stmt))
3820 return false;
3822 scalar_dest = gimple_assign_lhs (stmt);
3823 if (TREE_CODE (scalar_dest) != SSA_NAME)
3824 return false;
3826 if (gimple_assign_single_p (stmt)
3827 || gimple_assign_rhs_code (stmt) == PAREN_EXPR)
3828 op = gimple_assign_rhs1 (stmt);
3829 else
3830 return false;
3832 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
3834 if (vect_print_dump_info (REPORT_DETAILS))
3835 fprintf (vect_dump, "use not simple.");
3836 return false;
3839 if (!vec_stmt) /* transformation not required. */
3841 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
3842 if (vect_print_dump_info (REPORT_DETAILS))
3843 fprintf (vect_dump, "=== vectorizable_assignment ===");
3844 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3845 return true;
3848 /** Transform. **/
3849 if (vect_print_dump_info (REPORT_DETAILS))
3850 fprintf (vect_dump, "transform assignment.");
3852 /* Handle def. */
3853 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3855 /* Handle use. */
3856 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
3858 /* Arguments are ready. create the new vector stmt. */
3859 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
3861 *vec_stmt = gimple_build_assign (vec_dest, vop);
3862 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3863 gimple_assign_set_lhs (*vec_stmt, new_temp);
3864 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
3865 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt;
3867 if (slp_node)
3868 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), *vec_stmt);
3871 VEC_free (tree, heap, vec_oprnds);
3872 return true;
3876 /* Function vect_min_worthwhile_factor.
3878 For a loop where we could vectorize the operation indicated by CODE,
3879 return the minimum vectorization factor that makes it worthwhile
3880 to use generic vectors. */
3881 static int
3882 vect_min_worthwhile_factor (enum tree_code code)
3884 switch (code)
3886 case PLUS_EXPR:
3887 case MINUS_EXPR:
3888 case NEGATE_EXPR:
3889 return 4;
3891 case BIT_AND_EXPR:
3892 case BIT_IOR_EXPR:
3893 case BIT_XOR_EXPR:
3894 case BIT_NOT_EXPR:
3895 return 2;
3897 default:
3898 return INT_MAX;
3903 /* Function vectorizable_induction
3905 Check if PHI performs an induction computation that can be vectorized.
3906 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
3907 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
3908 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3910 bool
3911 vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
3912 gimple *vec_stmt)
3914 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
3915 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3916 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3917 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3918 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3919 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3920 tree vec_def;
3922 gcc_assert (ncopies >= 1);
3923 /* FORNOW. This restriction should be relaxed. */
3924 if (nested_in_vect_loop_p (loop, phi) && ncopies > 1)
3926 if (vect_print_dump_info (REPORT_DETAILS))
3927 fprintf (vect_dump, "multiple types in nested loop.");
3928 return false;
3931 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3932 return false;
3934 /* FORNOW: SLP not supported. */
3935 if (STMT_SLP_TYPE (stmt_info))
3936 return false;
3938 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
3940 if (gimple_code (phi) != GIMPLE_PHI)
3941 return false;
3943 if (!vec_stmt) /* transformation not required. */
3945 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
3946 if (vect_print_dump_info (REPORT_DETAILS))
3947 fprintf (vect_dump, "=== vectorizable_induction ===");
3948 vect_model_induction_cost (stmt_info, ncopies);
3949 return true;
3952 /** Transform. **/
3954 if (vect_print_dump_info (REPORT_DETAILS))
3955 fprintf (vect_dump, "transform induction phi.");
3957 vec_def = get_initial_def_for_induction (phi);
3958 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
3959 return true;
3963 /* Function vectorizable_operation.
3965 Check if STMT performs a binary or unary operation that can be vectorized.
3966 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3967 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3968 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3970 bool
3971 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3972 gimple *vec_stmt, slp_tree slp_node)
3974 tree vec_dest;
3975 tree scalar_dest;
3976 tree op0, op1 = NULL;
3977 tree vec_oprnd1 = NULL_TREE;
3978 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3979 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3980 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3981 enum tree_code code;
3982 enum machine_mode vec_mode;
3983 tree new_temp;
3984 int op_type;
3985 optab optab;
3986 int icode;
3987 enum machine_mode optab_op2_mode;
3988 tree def;
3989 gimple def_stmt;
3990 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3991 gimple new_stmt = NULL;
3992 stmt_vec_info prev_stmt_info;
3993 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3994 int nunits_out;
3995 tree vectype_out;
3996 int ncopies;
3997 int j, i;
3998 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3999 tree vop0, vop1;
4000 unsigned int k;
4001 bool shift_p = false;
4002 bool scalar_shift_arg = false;
4004 /* Multiple types in SLP are handled by creating the appropriate number of
4005 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4006 case of SLP. */
4007 if (slp_node)
4008 ncopies = 1;
4009 else
4010 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4012 gcc_assert (ncopies >= 1);
4014 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4015 return false;
4017 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4018 return false;
4020 /* Is STMT a vectorizable binary/unary operation? */
4021 if (!is_gimple_assign (stmt))
4022 return false;
4024 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4025 return false;
4027 scalar_dest = gimple_assign_lhs (stmt);
4028 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
4029 if (!vectype_out)
4030 return false;
4031 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4032 if (nunits_out != nunits_in)
4033 return false;
4035 code = gimple_assign_rhs_code (stmt);
4037 /* For pointer addition, we should use the normal plus for
4038 the vector addition. */
4039 if (code == POINTER_PLUS_EXPR)
4040 code = PLUS_EXPR;
4042 /* Support only unary or binary operations. */
4043 op_type = TREE_CODE_LENGTH (code);
4044 if (op_type != unary_op && op_type != binary_op)
4046 if (vect_print_dump_info (REPORT_DETAILS))
4047 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
4048 return false;
4051 op0 = gimple_assign_rhs1 (stmt);
4052 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
4054 if (vect_print_dump_info (REPORT_DETAILS))
4055 fprintf (vect_dump, "use not simple.");
4056 return false;
4059 if (op_type == binary_op)
4061 op1 = gimple_assign_rhs2 (stmt);
4062 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
4064 if (vect_print_dump_info (REPORT_DETAILS))
4065 fprintf (vect_dump, "use not simple.");
4066 return false;
4070 /* If this is a shift/rotate, determine whether the shift amount is a vector,
4071 or scalar. If the shift/rotate amount is a vector, use the vector/vector
4072 shift optabs. */
4073 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4074 || code == RROTATE_EXPR)
4076 shift_p = true;
4078 /* vector shifted by vector */
4079 if (dt[1] == vect_loop_def)
4081 optab = optab_for_tree_code (code, vectype, optab_vector);
4082 if (vect_print_dump_info (REPORT_DETAILS))
4083 fprintf (vect_dump, "vector/vector shift/rotate found.");
4086 /* See if the machine has a vector shifted by scalar insn and if not
4087 then see if it has a vector shifted by vector insn */
4088 else if (dt[1] == vect_constant_def || dt[1] == vect_invariant_def)
4090 optab = optab_for_tree_code (code, vectype, optab_scalar);
4091 if (optab
4092 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
4093 != CODE_FOR_nothing))
4095 scalar_shift_arg = true;
4096 if (vect_print_dump_info (REPORT_DETAILS))
4097 fprintf (vect_dump, "vector/scalar shift/rotate found.");
4099 else
4101 optab = optab_for_tree_code (code, vectype, optab_vector);
4102 if (vect_print_dump_info (REPORT_DETAILS)
4103 && optab
4104 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
4105 != CODE_FOR_nothing))
4106 fprintf (vect_dump, "vector/vector shift/rotate found.");
4110 else
4112 if (vect_print_dump_info (REPORT_DETAILS))
4113 fprintf (vect_dump, "operand mode requires invariant argument.");
4114 return false;
4117 else
4118 optab = optab_for_tree_code (code, vectype, optab_default);
4120 /* Supportable by target? */
4121 if (!optab)
4123 if (vect_print_dump_info (REPORT_DETAILS))
4124 fprintf (vect_dump, "no optab.");
4125 return false;
4127 vec_mode = TYPE_MODE (vectype);
4128 icode = (int) optab_handler (optab, vec_mode)->insn_code;
4129 if (icode == CODE_FOR_nothing)
4131 if (vect_print_dump_info (REPORT_DETAILS))
4132 fprintf (vect_dump, "op not supported by target.");
4133 /* Check only during analysis. */
4134 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4135 || (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4136 < vect_min_worthwhile_factor (code)
4137 && !vec_stmt))
4138 return false;
4139 if (vect_print_dump_info (REPORT_DETAILS))
4140 fprintf (vect_dump, "proceeding using word mode.");
4143 /* Worthwhile without SIMD support? Check only during analysis. */
4144 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4145 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4146 < vect_min_worthwhile_factor (code)
4147 && !vec_stmt)
4149 if (vect_print_dump_info (REPORT_DETAILS))
4150 fprintf (vect_dump, "not worthwhile without SIMD support.");
4151 return false;
4154 if (!vec_stmt) /* transformation not required. */
4156 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4157 if (vect_print_dump_info (REPORT_DETAILS))
4158 fprintf (vect_dump, "=== vectorizable_operation ===");
4159 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
4160 return true;
4163 /** Transform. **/
4165 if (vect_print_dump_info (REPORT_DETAILS))
4166 fprintf (vect_dump, "transform binary/unary operation.");
4168 /* Handle def. */
4169 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4171 /* Allocate VECs for vector operands. In case of SLP, vector operands are
4172 created in the previous stages of the recursion, so no allocation is
4173 needed, except for the case of shift with scalar shift argument. In that
4174 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
4175 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
4176 In case of loop-based vectorization we allocate VECs of size 1. We
4177 allocate VEC_OPRNDS1 only in case of binary operation. */
4178 if (!slp_node)
4180 vec_oprnds0 = VEC_alloc (tree, heap, 1);
4181 if (op_type == binary_op)
4182 vec_oprnds1 = VEC_alloc (tree, heap, 1);
4184 else if (scalar_shift_arg)
4185 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
4187 /* In case the vectorization factor (VF) is bigger than the number
4188 of elements that we can fit in a vectype (nunits), we have to generate
4189 more than one vector stmt - i.e - we need to "unroll" the
4190 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4191 from one copy of the vector stmt to the next, in the field
4192 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4193 stages to find the correct vector defs to be used when vectorizing
4194 stmts that use the defs of the current stmt. The example below illustrates
4195 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
4196 4 vectorized stmts):
4198 before vectorization:
4199 RELATED_STMT VEC_STMT
4200 S1: x = memref - -
4201 S2: z = x + 1 - -
4203 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4204 there):
4205 RELATED_STMT VEC_STMT
4206 VS1_0: vx0 = memref0 VS1_1 -
4207 VS1_1: vx1 = memref1 VS1_2 -
4208 VS1_2: vx2 = memref2 VS1_3 -
4209 VS1_3: vx3 = memref3 - -
4210 S1: x = load - VS1_0
4211 S2: z = x + 1 - -
4213 step2: vectorize stmt S2 (done here):
4214 To vectorize stmt S2 we first need to find the relevant vector
4215 def for the first operand 'x'. This is, as usual, obtained from
4216 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4217 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4218 relevant vector def 'vx0'. Having found 'vx0' we can generate
4219 the vector stmt VS2_0, and as usual, record it in the
4220 STMT_VINFO_VEC_STMT of stmt S2.
4221 When creating the second copy (VS2_1), we obtain the relevant vector
4222 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4223 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4224 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4225 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4226 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4227 chain of stmts and pointers:
4228 RELATED_STMT VEC_STMT
4229 VS1_0: vx0 = memref0 VS1_1 -
4230 VS1_1: vx1 = memref1 VS1_2 -
4231 VS1_2: vx2 = memref2 VS1_3 -
4232 VS1_3: vx3 = memref3 - -
4233 S1: x = load - VS1_0
4234 VS2_0: vz0 = vx0 + v1 VS2_1 -
4235 VS2_1: vz1 = vx1 + v1 VS2_2 -
4236 VS2_2: vz2 = vx2 + v1 VS2_3 -
4237 VS2_3: vz3 = vx3 + v1 - -
4238 S2: z = x + 1 - VS2_0 */
4240 prev_stmt_info = NULL;
4241 for (j = 0; j < ncopies; j++)
4243 /* Handle uses. */
4244 if (j == 0)
4246 if (op_type == binary_op && scalar_shift_arg)
4248 /* Vector shl and shr insn patterns can be defined with scalar
4249 operand 2 (shift operand). In this case, use constant or loop
4250 invariant op1 directly, without extending it to vector mode
4251 first. */
4252 optab_op2_mode = insn_data[icode].operand[2].mode;
4253 if (!VECTOR_MODE_P (optab_op2_mode))
4255 if (vect_print_dump_info (REPORT_DETAILS))
4256 fprintf (vect_dump, "operand 1 using scalar mode.");
4257 vec_oprnd1 = op1;
4258 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
4259 if (slp_node)
4261 /* Store vec_oprnd1 for every vector stmt to be created
4262 for SLP_NODE. We check during the analysis that all the
4263 shift arguments are the same.
4264 TODO: Allow different constants for different vector
4265 stmts generated for an SLP instance. */
4266 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4267 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
4272 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4273 (a special case for certain kind of vector shifts); otherwise,
4274 operand 1 should be of a vector type (the usual case). */
4275 if (op_type == binary_op && !vec_oprnd1)
4276 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4277 slp_node);
4278 else
4279 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4280 slp_node);
4282 else
4283 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4285 /* Arguments are ready. Create the new vector stmt. */
4286 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
4288 vop1 = ((op_type == binary_op)
4289 ? VEC_index (tree, vec_oprnds1, i) : NULL);
4290 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
4291 new_temp = make_ssa_name (vec_dest, new_stmt);
4292 gimple_assign_set_lhs (new_stmt, new_temp);
4293 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4294 if (slp_node)
4295 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4298 if (slp_node)
4299 continue;
4301 if (j == 0)
4302 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4303 else
4304 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4305 prev_stmt_info = vinfo_for_stmt (new_stmt);
4308 VEC_free (tree, heap, vec_oprnds0);
4309 if (vec_oprnds1)
4310 VEC_free (tree, heap, vec_oprnds1);
4312 return true;
4316 /* Get vectorized definitions for loop-based vectorization. For the first
4317 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4318 scalar operand), and for the rest we get a copy with
4319 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4320 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4321 The vectors are collected into VEC_OPRNDS. */
4323 static void
4324 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
4325 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
4327 tree vec_oprnd;
4329 /* Get first vector operand. */
4330 /* All the vector operands except the very first one (that is scalar oprnd)
4331 are stmt copies. */
4332 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4333 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
4334 else
4335 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4337 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
4339 /* Get second vector operand. */
4340 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
4341 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
4343 *oprnd = vec_oprnd;
4345 /* For conversion in multiple steps, continue to get operands
4346 recursively. */
4347 if (multi_step_cvt)
4348 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4352 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4353 For multi-step conversions store the resulting vectors and call the function
4354 recursively. */
4356 static void
4357 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
4358 int multi_step_cvt, gimple stmt,
4359 VEC (tree, heap) *vec_dsts,
4360 gimple_stmt_iterator *gsi,
4361 slp_tree slp_node, enum tree_code code,
4362 stmt_vec_info *prev_stmt_info)
4364 unsigned int i;
4365 tree vop0, vop1, new_tmp, vec_dest;
4366 gimple new_stmt;
4367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4369 vec_dest = VEC_pop (tree, vec_dsts);
4371 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
4373 /* Create demotion operation. */
4374 vop0 = VEC_index (tree, *vec_oprnds, i);
4375 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
4376 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
4377 new_tmp = make_ssa_name (vec_dest, new_stmt);
4378 gimple_assign_set_lhs (new_stmt, new_tmp);
4379 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4381 if (multi_step_cvt)
4382 /* Store the resulting vector for next recursive call. */
4383 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
4384 else
4386 /* This is the last step of the conversion sequence. Store the
4387 vectors in SLP_NODE or in vector info of the scalar statement
4388 (or in STMT_VINFO_RELATED_STMT chain). */
4389 if (slp_node)
4390 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4391 else
4393 if (!*prev_stmt_info)
4394 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4395 else
4396 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4398 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4403 /* For multi-step demotion operations we first generate demotion operations
4404 from the source type to the intermediate types, and then combine the
4405 results (stored in VEC_OPRNDS) in demotion operation to the destination
4406 type. */
4407 if (multi_step_cvt)
4409 /* At each level of recursion we have have of the operands we had at the
4410 previous level. */
4411 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
4412 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4413 stmt, vec_dsts, gsi, slp_node,
4414 code, prev_stmt_info);
4419 /* Function vectorizable_type_demotion
4421 Check if STMT performs a binary or unary operation that involves
4422 type demotion, and if it can be vectorized.
4423 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4424 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4425 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4427 bool
4428 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
4429 gimple *vec_stmt, slp_tree slp_node)
4431 tree vec_dest;
4432 tree scalar_dest;
4433 tree op0;
4434 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4435 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4436 enum tree_code code, code1 = ERROR_MARK;
4437 tree def;
4438 gimple def_stmt;
4439 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4440 stmt_vec_info prev_stmt_info;
4441 int nunits_in;
4442 int nunits_out;
4443 tree vectype_out;
4444 int ncopies;
4445 int j, i;
4446 tree vectype_in;
4447 int multi_step_cvt = 0;
4448 VEC (tree, heap) *vec_oprnds0 = NULL;
4449 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
4450 tree last_oprnd, intermediate_type;
4452 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4453 return false;
4455 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4456 return false;
4458 /* Is STMT a vectorizable type-demotion operation? */
4459 if (!is_gimple_assign (stmt))
4460 return false;
4462 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4463 return false;
4465 code = gimple_assign_rhs_code (stmt);
4466 if (!CONVERT_EXPR_CODE_P (code))
4467 return false;
4469 op0 = gimple_assign_rhs1 (stmt);
4470 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
4471 if (!vectype_in)
4472 return false;
4473 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4475 scalar_dest = gimple_assign_lhs (stmt);
4476 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
4477 if (!vectype_out)
4478 return false;
4479 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4480 if (nunits_in >= nunits_out)
4481 return false;
4483 /* Multiple types in SLP are handled by creating the appropriate number of
4484 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4485 case of SLP. */
4486 if (slp_node)
4487 ncopies = 1;
4488 else
4489 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4491 gcc_assert (ncopies >= 1);
4493 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4494 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
4495 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
4496 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
4497 && CONVERT_EXPR_CODE_P (code))))
4498 return false;
4500 /* Check the operands of the operation. */
4501 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
4503 if (vect_print_dump_info (REPORT_DETAILS))
4504 fprintf (vect_dump, "use not simple.");
4505 return false;
4508 /* Supportable by target? */
4509 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1,
4510 &multi_step_cvt, &interm_types))
4511 return false;
4513 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
4515 if (!vec_stmt) /* transformation not required. */
4517 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4518 if (vect_print_dump_info (REPORT_DETAILS))
4519 fprintf (vect_dump, "=== vectorizable_demotion ===");
4520 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
4521 return true;
4524 /** Transform. **/
4525 if (vect_print_dump_info (REPORT_DETAILS))
4526 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
4527 ncopies);
4529 /* In case of multi-step demotion, we first generate demotion operations to
4530 the intermediate types, and then from that types to the final one.
4531 We create vector destinations for the intermediate type (TYPES) received
4532 from supportable_narrowing_operation, and store them in the correct order
4533 for future use in vect_create_vectorized_demotion_stmts(). */
4534 if (multi_step_cvt)
4535 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
4536 else
4537 vec_dsts = VEC_alloc (tree, heap, 1);
4539 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4540 VEC_quick_push (tree, vec_dsts, vec_dest);
4542 if (multi_step_cvt)
4544 for (i = VEC_length (tree, interm_types) - 1;
4545 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
4547 vec_dest = vect_create_destination_var (scalar_dest,
4548 intermediate_type);
4549 VEC_quick_push (tree, vec_dsts, vec_dest);
4553 /* In case the vectorization factor (VF) is bigger than the number
4554 of elements that we can fit in a vectype (nunits), we have to generate
4555 more than one vector stmt - i.e - we need to "unroll" the
4556 vector stmt by a factor VF/nunits. */
4557 last_oprnd = op0;
4558 prev_stmt_info = NULL;
4559 for (j = 0; j < ncopies; j++)
4561 /* Handle uses. */
4562 if (slp_node)
4563 vect_get_slp_defs (slp_node, &vec_oprnds0, NULL);
4564 else
4566 VEC_free (tree, heap, vec_oprnds0);
4567 vec_oprnds0 = VEC_alloc (tree, heap,
4568 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
4569 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4570 vect_pow2 (multi_step_cvt) - 1);
4573 /* Arguments are ready. Create the new vector stmts. */
4574 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
4575 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
4576 multi_step_cvt, stmt, tmp_vec_dsts,
4577 gsi, slp_node, code1,
4578 &prev_stmt_info);
4581 VEC_free (tree, heap, vec_oprnds0);
4582 VEC_free (tree, heap, vec_dsts);
4583 VEC_free (tree, heap, tmp_vec_dsts);
4584 VEC_free (tree, heap, interm_types);
4586 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4587 return true;
4591 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4592 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4593 the resulting vectors and call the function recursively. */
4595 static void
4596 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
4597 VEC (tree, heap) **vec_oprnds1,
4598 int multi_step_cvt, gimple stmt,
4599 VEC (tree, heap) *vec_dsts,
4600 gimple_stmt_iterator *gsi,
4601 slp_tree slp_node, enum tree_code code1,
4602 enum tree_code code2, tree decl1,
4603 tree decl2, int op_type,
4604 stmt_vec_info *prev_stmt_info)
4606 int i;
4607 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
4608 gimple new_stmt1, new_stmt2;
4609 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4610 VEC (tree, heap) *vec_tmp;
4612 vec_dest = VEC_pop (tree, vec_dsts);
4613 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
4615 for (i = 0; VEC_iterate (tree, *vec_oprnds0, i, vop0); i++)
4617 if (op_type == binary_op)
4618 vop1 = VEC_index (tree, *vec_oprnds1, i);
4619 else
4620 vop1 = NULL_TREE;
4622 /* Generate the two halves of promotion operation. */
4623 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4624 op_type, vec_dest, gsi, stmt);
4625 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4626 op_type, vec_dest, gsi, stmt);
4627 if (is_gimple_call (new_stmt1))
4629 new_tmp1 = gimple_call_lhs (new_stmt1);
4630 new_tmp2 = gimple_call_lhs (new_stmt2);
4632 else
4634 new_tmp1 = gimple_assign_lhs (new_stmt1);
4635 new_tmp2 = gimple_assign_lhs (new_stmt2);
4638 if (multi_step_cvt)
4640 /* Store the results for the recursive call. */
4641 VEC_quick_push (tree, vec_tmp, new_tmp1);
4642 VEC_quick_push (tree, vec_tmp, new_tmp2);
4644 else
4646 /* Last step of promotion sequience - store the results. */
4647 if (slp_node)
4649 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
4650 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
4652 else
4654 if (!*prev_stmt_info)
4655 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
4656 else
4657 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
4659 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
4660 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
4661 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
4666 if (multi_step_cvt)
4668 /* For multi-step promotion operation we first generate we call the
4669 function recurcively for every stage. We start from the input type,
4670 create promotion operations to the intermediate types, and then
4671 create promotions to the output type. */
4672 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
4673 VEC_free (tree, heap, vec_tmp);
4674 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
4675 multi_step_cvt - 1, stmt,
4676 vec_dsts, gsi, slp_node, code1,
4677 code2, decl2, decl2, op_type,
4678 prev_stmt_info);
4683 /* Function vectorizable_type_promotion
4685 Check if STMT performs a binary or unary operation that involves
4686 type promotion, and if it can be vectorized.
4687 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4688 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4689 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4691 bool
4692 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
4693 gimple *vec_stmt, slp_tree slp_node)
4695 tree vec_dest;
4696 tree scalar_dest;
4697 tree op0, op1 = NULL;
4698 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
4699 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4700 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4701 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4702 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4703 int op_type;
4704 tree def;
4705 gimple def_stmt;
4706 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4707 stmt_vec_info prev_stmt_info;
4708 int nunits_in;
4709 int nunits_out;
4710 tree vectype_out;
4711 int ncopies;
4712 int j, i;
4713 tree vectype_in;
4714 tree intermediate_type = NULL_TREE;
4715 int multi_step_cvt = 0;
4716 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
4717 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
4719 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4720 return false;
4722 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4723 return false;
4725 /* Is STMT a vectorizable type-promotion operation? */
4726 if (!is_gimple_assign (stmt))
4727 return false;
4729 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4730 return false;
4732 code = gimple_assign_rhs_code (stmt);
4733 if (!CONVERT_EXPR_CODE_P (code)
4734 && code != WIDEN_MULT_EXPR)
4735 return false;
4737 op0 = gimple_assign_rhs1 (stmt);
4738 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
4739 if (!vectype_in)
4740 return false;
4741 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4743 scalar_dest = gimple_assign_lhs (stmt);
4744 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
4745 if (!vectype_out)
4746 return false;
4747 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4748 if (nunits_in <= nunits_out)
4749 return false;
4751 /* Multiple types in SLP are handled by creating the appropriate number of
4752 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4753 case of SLP. */
4754 if (slp_node)
4755 ncopies = 1;
4756 else
4757 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4759 gcc_assert (ncopies >= 1);
4761 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4762 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
4763 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
4764 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
4765 && CONVERT_EXPR_CODE_P (code))))
4766 return false;
4768 /* Check the operands of the operation. */
4769 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
4771 if (vect_print_dump_info (REPORT_DETAILS))
4772 fprintf (vect_dump, "use not simple.");
4773 return false;
4776 op_type = TREE_CODE_LENGTH (code);
4777 if (op_type == binary_op)
4779 op1 = gimple_assign_rhs2 (stmt);
4780 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
4782 if (vect_print_dump_info (REPORT_DETAILS))
4783 fprintf (vect_dump, "use not simple.");
4784 return false;
4788 /* Supportable by target? */
4789 if (!supportable_widening_operation (code, stmt, vectype_in,
4790 &decl1, &decl2, &code1, &code2,
4791 &multi_step_cvt, &interm_types))
4792 return false;
4794 /* Binary widening operation can only be supported directly by the
4795 architecture. */
4796 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4798 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
4800 if (!vec_stmt) /* transformation not required. */
4802 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4803 if (vect_print_dump_info (REPORT_DETAILS))
4804 fprintf (vect_dump, "=== vectorizable_promotion ===");
4805 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
4806 return true;
4809 /** Transform. **/
4811 if (vect_print_dump_info (REPORT_DETAILS))
4812 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
4813 ncopies);
4815 /* Handle def. */
4816 /* In case of multi-step promotion, we first generate promotion operations
4817 to the intermediate types, and then from that types to the final one.
4818 We store vector destination in VEC_DSTS in the correct order for
4819 recursive creation of promotion operations in
4820 vect_create_vectorized_promotion_stmts(). Vector destinations are created
4821 according to TYPES recieved from supportable_widening_operation(). */
4822 if (multi_step_cvt)
4823 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
4824 else
4825 vec_dsts = VEC_alloc (tree, heap, 1);
4827 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4828 VEC_quick_push (tree, vec_dsts, vec_dest);
4830 if (multi_step_cvt)
4832 for (i = VEC_length (tree, interm_types) - 1;
4833 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
4835 vec_dest = vect_create_destination_var (scalar_dest,
4836 intermediate_type);
4837 VEC_quick_push (tree, vec_dsts, vec_dest);
4841 if (!slp_node)
4843 vec_oprnds0 = VEC_alloc (tree, heap,
4844 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4845 if (op_type == binary_op)
4846 vec_oprnds1 = VEC_alloc (tree, heap, 1);
4849 /* In case the vectorization factor (VF) is bigger than the number
4850 of elements that we can fit in a vectype (nunits), we have to generate
4851 more than one vector stmt - i.e - we need to "unroll" the
4852 vector stmt by a factor VF/nunits. */
4854 prev_stmt_info = NULL;
4855 for (j = 0; j < ncopies; j++)
4857 /* Handle uses. */
4858 if (j == 0)
4860 if (slp_node)
4861 vect_get_slp_defs (slp_node, &vec_oprnds0, &vec_oprnds1);
4862 else
4864 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
4865 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
4866 if (op_type == binary_op)
4868 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
4869 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
4873 else
4875 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4876 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
4877 if (op_type == binary_op)
4879 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
4880 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
4884 /* Arguments are ready. Create the new vector stmts. */
4885 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
4886 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
4887 multi_step_cvt, stmt,
4888 tmp_vec_dsts,
4889 gsi, slp_node, code1, code2,
4890 decl1, decl2, op_type,
4891 &prev_stmt_info);
4894 VEC_free (tree, heap, vec_dsts);
4895 VEC_free (tree, heap, tmp_vec_dsts);
4896 VEC_free (tree, heap, interm_types);
4897 VEC_free (tree, heap, vec_oprnds0);
4898 VEC_free (tree, heap, vec_oprnds1);
4900 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4901 return true;
4905 /* Function vect_strided_store_supported.
4907 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
4908 and FALSE otherwise. */
4910 static bool
4911 vect_strided_store_supported (tree vectype)
4913 optab interleave_high_optab, interleave_low_optab;
4914 int mode;
4916 mode = (int) TYPE_MODE (vectype);
4918 /* Check that the operation is supported. */
4919 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
4920 vectype, optab_default);
4921 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
4922 vectype, optab_default);
4923 if (!interleave_high_optab || !interleave_low_optab)
4925 if (vect_print_dump_info (REPORT_DETAILS))
4926 fprintf (vect_dump, "no optab for interleave.");
4927 return false;
4930 if (optab_handler (interleave_high_optab, mode)->insn_code
4931 == CODE_FOR_nothing
4932 || optab_handler (interleave_low_optab, mode)->insn_code
4933 == CODE_FOR_nothing)
4935 if (vect_print_dump_info (REPORT_DETAILS))
4936 fprintf (vect_dump, "interleave op not supported by target.");
4937 return false;
4940 return true;
4944 /* Function vect_permute_store_chain.
4946 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4947 a power of 2, generate interleave_high/low stmts to reorder the data
4948 correctly for the stores. Return the final references for stores in
4949 RESULT_CHAIN.
4951 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4952 The input is 4 vectors each containing 8 elements. We assign a number to each
4953 element, the input sequence is:
4955 1st vec: 0 1 2 3 4 5 6 7
4956 2nd vec: 8 9 10 11 12 13 14 15
4957 3rd vec: 16 17 18 19 20 21 22 23
4958 4th vec: 24 25 26 27 28 29 30 31
4960 The output sequence should be:
4962 1st vec: 0 8 16 24 1 9 17 25
4963 2nd vec: 2 10 18 26 3 11 19 27
4964 3rd vec: 4 12 20 28 5 13 21 30
4965 4th vec: 6 14 22 30 7 15 23 31
4967 i.e., we interleave the contents of the four vectors in their order.
4969 We use interleave_high/low instructions to create such output. The input of
4970 each interleave_high/low operation is two vectors:
4971 1st vec 2nd vec
4972 0 1 2 3 4 5 6 7
4973 the even elements of the result vector are obtained left-to-right from the
4974 high/low elements of the first vector. The odd elements of the result are
4975 obtained left-to-right from the high/low elements of the second vector.
4976 The output of interleave_high will be: 0 4 1 5
4977 and of interleave_low: 2 6 3 7
4980 The permutation is done in log LENGTH stages. In each stage interleave_high
4981 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4982 where the first argument is taken from the first half of DR_CHAIN and the
4983 second argument from it's second half.
4984 In our example,
4986 I1: interleave_high (1st vec, 3rd vec)
4987 I2: interleave_low (1st vec, 3rd vec)
4988 I3: interleave_high (2nd vec, 4th vec)
4989 I4: interleave_low (2nd vec, 4th vec)
4991 The output for the first stage is:
4993 I1: 0 16 1 17 2 18 3 19
4994 I2: 4 20 5 21 6 22 7 23
4995 I3: 8 24 9 25 10 26 11 27
4996 I4: 12 28 13 29 14 30 15 31
4998 The output of the second stage, i.e. the final result is:
5000 I1: 0 8 16 24 1 9 17 25
5001 I2: 2 10 18 26 3 11 19 27
5002 I3: 4 12 20 28 5 13 21 30
5003 I4: 6 14 22 30 7 15 23 31. */
5005 static bool
5006 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
5007 unsigned int length,
5008 gimple stmt,
5009 gimple_stmt_iterator *gsi,
5010 VEC(tree,heap) **result_chain)
5012 tree perm_dest, vect1, vect2, high, low;
5013 gimple perm_stmt;
5014 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
5015 tree scalar_dest;
5016 int i;
5017 unsigned int j;
5018 enum tree_code high_code, low_code;
5020 scalar_dest = gimple_assign_lhs (stmt);
5022 /* Check that the operation is supported. */
5023 if (!vect_strided_store_supported (vectype))
5024 return false;
5026 *result_chain = VEC_copy (tree, heap, dr_chain);
5028 for (i = 0; i < exact_log2 (length); i++)
5030 for (j = 0; j < length/2; j++)
5032 vect1 = VEC_index (tree, dr_chain, j);
5033 vect2 = VEC_index (tree, dr_chain, j+length/2);
5035 /* Create interleaving stmt:
5036 in the case of big endian:
5037 high = interleave_high (vect1, vect2)
5038 and in the case of little endian:
5039 high = interleave_low (vect1, vect2). */
5040 perm_dest = create_tmp_var (vectype, "vect_inter_high");
5041 DECL_GIMPLE_REG_P (perm_dest) = 1;
5042 add_referenced_var (perm_dest);
5043 if (BYTES_BIG_ENDIAN)
5045 high_code = VEC_INTERLEAVE_HIGH_EXPR;
5046 low_code = VEC_INTERLEAVE_LOW_EXPR;
5048 else
5050 low_code = VEC_INTERLEAVE_HIGH_EXPR;
5051 high_code = VEC_INTERLEAVE_LOW_EXPR;
5053 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
5054 vect1, vect2);
5055 high = make_ssa_name (perm_dest, perm_stmt);
5056 gimple_assign_set_lhs (perm_stmt, high);
5057 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5058 VEC_replace (tree, *result_chain, 2*j, high);
5060 /* Create interleaving stmt:
5061 in the case of big endian:
5062 low = interleave_low (vect1, vect2)
5063 and in the case of little endian:
5064 low = interleave_high (vect1, vect2). */
5065 perm_dest = create_tmp_var (vectype, "vect_inter_low");
5066 DECL_GIMPLE_REG_P (perm_dest) = 1;
5067 add_referenced_var (perm_dest);
5068 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
5069 vect1, vect2);
5070 low = make_ssa_name (perm_dest, perm_stmt);
5071 gimple_assign_set_lhs (perm_stmt, low);
5072 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5073 VEC_replace (tree, *result_chain, 2*j+1, low);
5075 dr_chain = VEC_copy (tree, heap, *result_chain);
5077 return true;
5081 /* Function vectorizable_store.
5083 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5084 can be vectorized.
5085 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5086 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5087 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5089 bool
5090 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5091 slp_tree slp_node)
5093 tree scalar_dest;
5094 tree data_ref;
5095 tree op;
5096 tree vec_oprnd = NULL_TREE;
5097 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5098 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5099 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5100 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5101 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5102 enum machine_mode vec_mode;
5103 tree dummy;
5104 enum dr_alignment_support alignment_support_scheme;
5105 tree def;
5106 gimple def_stmt;
5107 enum vect_def_type dt;
5108 stmt_vec_info prev_stmt_info = NULL;
5109 tree dataref_ptr = NULL_TREE;
5110 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5111 int ncopies;
5112 int j;
5113 gimple next_stmt, first_stmt = NULL;
5114 bool strided_store = false;
5115 unsigned int group_size, i;
5116 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
5117 bool inv_p;
5118 VEC(tree,heap) *vec_oprnds = NULL;
5119 bool slp = (slp_node != NULL);
5120 stmt_vec_info first_stmt_vinfo;
5121 unsigned int vec_num;
5123 /* Multiple types in SLP are handled by creating the appropriate number of
5124 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5125 case of SLP. */
5126 if (slp)
5127 ncopies = 1;
5128 else
5129 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5131 gcc_assert (ncopies >= 1);
5133 /* FORNOW. This restriction should be relaxed. */
5134 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5136 if (vect_print_dump_info (REPORT_DETAILS))
5137 fprintf (vect_dump, "multiple types in nested loop.");
5138 return false;
5141 if (!STMT_VINFO_RELEVANT_P (stmt_info))
5142 return false;
5144 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
5145 return false;
5147 /* Is vectorizable store? */
5149 if (!is_gimple_assign (stmt))
5150 return false;
5152 scalar_dest = gimple_assign_lhs (stmt);
5153 if (TREE_CODE (scalar_dest) != ARRAY_REF
5154 && TREE_CODE (scalar_dest) != INDIRECT_REF
5155 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
5156 return false;
5158 gcc_assert (gimple_assign_single_p (stmt));
5159 op = gimple_assign_rhs1 (stmt);
5160 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
5162 if (vect_print_dump_info (REPORT_DETAILS))
5163 fprintf (vect_dump, "use not simple.");
5164 return false;
5167 /* If accesses through a pointer to vectype do not alias the original
5168 memory reference we have a problem. */
5169 if (get_alias_set (vectype) != get_alias_set (TREE_TYPE (scalar_dest))
5170 && !alias_set_subset_of (get_alias_set (vectype),
5171 get_alias_set (TREE_TYPE (scalar_dest))))
5173 if (vect_print_dump_info (REPORT_DETAILS))
5174 fprintf (vect_dump, "vector type does not alias scalar type");
5175 return false;
5178 if (!useless_type_conversion_p (TREE_TYPE (op), TREE_TYPE (scalar_dest)))
5180 if (vect_print_dump_info (REPORT_DETAILS))
5181 fprintf (vect_dump, "operands of different types");
5182 return false;
5185 vec_mode = TYPE_MODE (vectype);
5186 /* FORNOW. In some cases can vectorize even if data-type not supported
5187 (e.g. - array initialization with 0). */
5188 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
5189 return false;
5191 if (!STMT_VINFO_DATA_REF (stmt_info))
5192 return false;
5194 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
5196 strided_store = true;
5197 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
5198 if (!vect_strided_store_supported (vectype)
5199 && !PURE_SLP_STMT (stmt_info) && !slp)
5200 return false;
5202 if (first_stmt == stmt)
5204 /* STMT is the leader of the group. Check the operands of all the
5205 stmts of the group. */
5206 next_stmt = DR_GROUP_NEXT_DR (stmt_info);
5207 while (next_stmt)
5209 gcc_assert (gimple_assign_single_p (next_stmt));
5210 op = gimple_assign_rhs1 (next_stmt);
5211 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
5213 if (vect_print_dump_info (REPORT_DETAILS))
5214 fprintf (vect_dump, "use not simple.");
5215 return false;
5217 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
5222 if (!vec_stmt) /* transformation not required. */
5224 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5225 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
5226 return true;
5229 /** Transform. **/
5231 if (strided_store)
5233 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5234 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
5236 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5238 /* FORNOW */
5239 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5241 /* We vectorize all the stmts of the interleaving group when we
5242 reach the last stmt in the group. */
5243 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5244 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
5245 && !slp)
5247 *vec_stmt = NULL;
5248 return true;
5251 if (slp)
5252 strided_store = false;
5254 /* VEC_NUM is the number of vect stmts to be created for this group. */
5255 if (slp)
5256 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5257 else
5258 vec_num = group_size;
5260 else
5262 first_stmt = stmt;
5263 first_dr = dr;
5264 group_size = vec_num = 1;
5265 first_stmt_vinfo = stmt_info;
5268 if (vect_print_dump_info (REPORT_DETAILS))
5269 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
5271 dr_chain = VEC_alloc (tree, heap, group_size);
5272 oprnds = VEC_alloc (tree, heap, group_size);
5274 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
5275 gcc_assert (alignment_support_scheme);
5276 gcc_assert (alignment_support_scheme == dr_aligned); /* FORNOW */
5278 /* In case the vectorization factor (VF) is bigger than the number
5279 of elements that we can fit in a vectype (nunits), we have to generate
5280 more than one vector stmt - i.e - we need to "unroll" the
5281 vector stmt by a factor VF/nunits. For more details see documentation in
5282 vect_get_vec_def_for_copy_stmt. */
5284 /* In case of interleaving (non-unit strided access):
5286 S1: &base + 2 = x2
5287 S2: &base = x0
5288 S3: &base + 1 = x1
5289 S4: &base + 3 = x3
5291 We create vectorized stores starting from base address (the access of the
5292 first stmt in the chain (S2 in the above example), when the last store stmt
5293 of the chain (S4) is reached:
5295 VS1: &base = vx2
5296 VS2: &base + vec_size*1 = vx0
5297 VS3: &base + vec_size*2 = vx1
5298 VS4: &base + vec_size*3 = vx3
5300 Then permutation statements are generated:
5302 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
5303 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
5306 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5307 (the order of the data-refs in the output of vect_permute_store_chain
5308 corresponds to the order of scalar stmts in the interleaving chain - see
5309 the documentation of vect_permute_store_chain()).
5311 In case of both multiple types and interleaving, above vector stores and
5312 permutation stmts are created for every copy. The result vector stmts are
5313 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5314 STMT_VINFO_RELATED_STMT for the next copies.
5317 prev_stmt_info = NULL;
5318 for (j = 0; j < ncopies; j++)
5320 gimple new_stmt;
5321 gimple ptr_incr;
5323 if (j == 0)
5325 if (slp)
5327 /* Get vectorized arguments for SLP_NODE. */
5328 vect_get_slp_defs (slp_node, &vec_oprnds, NULL);
5330 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
5332 else
5334 /* For interleaved stores we collect vectorized defs for all the
5335 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5336 used as an input to vect_permute_store_chain(), and OPRNDS as
5337 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5339 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
5340 OPRNDS are of size 1. */
5341 next_stmt = first_stmt;
5342 for (i = 0; i < group_size; i++)
5344 /* Since gaps are not supported for interleaved stores,
5345 GROUP_SIZE is the exact number of stmts in the chain.
5346 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5347 there is no interleaving, GROUP_SIZE is 1, and only one
5348 iteration of the loop will be executed. */
5349 gcc_assert (next_stmt);
5350 gcc_assert (gimple_assign_single_p (next_stmt));
5351 op = gimple_assign_rhs1 (next_stmt);
5353 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5354 NULL);
5355 VEC_quick_push(tree, dr_chain, vec_oprnd);
5356 VEC_quick_push(tree, oprnds, vec_oprnd);
5357 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
5361 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
5362 &dummy, &ptr_incr, false,
5363 &inv_p);
5364 gcc_assert (!inv_p);
5366 else
5368 /* For interleaved stores we created vectorized defs for all the
5369 defs stored in OPRNDS in the previous iteration (previous copy).
5370 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5371 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5372 next copy.
5373 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
5374 OPRNDS are of size 1. */
5375 for (i = 0; i < group_size; i++)
5377 op = VEC_index (tree, oprnds, i);
5378 vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
5379 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5380 VEC_replace(tree, dr_chain, i, vec_oprnd);
5381 VEC_replace(tree, oprnds, i, vec_oprnd);
5383 dataref_ptr =
5384 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
5387 if (strided_store)
5389 result_chain = VEC_alloc (tree, heap, group_size);
5390 /* Permute. */
5391 if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5392 &result_chain))
5393 return false;
5396 next_stmt = first_stmt;
5397 for (i = 0; i < vec_num; i++)
5399 if (i > 0)
5400 /* Bump the vector pointer. */
5401 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5402 NULL_TREE);
5404 if (slp)
5405 vec_oprnd = VEC_index (tree, vec_oprnds, i);
5406 else if (strided_store)
5407 /* For strided stores vectorized defs are interleaved in
5408 vect_permute_store_chain(). */
5409 vec_oprnd = VEC_index (tree, result_chain, i);
5411 data_ref = build_fold_indirect_ref (dataref_ptr);
5412 /* Arguments are ready. Create the new vector stmt. */
5413 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5414 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5415 mark_symbols_for_renaming (new_stmt);
5417 if (slp)
5418 continue;
5420 if (j == 0)
5421 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5422 else
5423 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5425 prev_stmt_info = vinfo_for_stmt (new_stmt);
5426 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
5427 if (!next_stmt)
5428 break;
5432 VEC_free (tree, heap, dr_chain);
5433 VEC_free (tree, heap, oprnds);
5434 if (result_chain)
5435 VEC_free (tree, heap, result_chain);
5437 return true;
5441 /* Function vect_setup_realignment
5443 This function is called when vectorizing an unaligned load using
5444 the dr_explicit_realign[_optimized] scheme.
5445 This function generates the following code at the loop prolog:
5447 p = initial_addr;
5448 x msq_init = *(floor(p)); # prolog load
5449 realignment_token = call target_builtin;
5450 loop:
5451 x msq = phi (msq_init, ---)
5453 The stmts marked with x are generated only for the case of
5454 dr_explicit_realign_optimized.
5456 The code above sets up a new (vector) pointer, pointing to the first
5457 location accessed by STMT, and a "floor-aligned" load using that pointer.
5458 It also generates code to compute the "realignment-token" (if the relevant
5459 target hook was defined), and creates a phi-node at the loop-header bb
5460 whose arguments are the result of the prolog-load (created by this
5461 function) and the result of a load that takes place in the loop (to be
5462 created by the caller to this function).
5464 For the case of dr_explicit_realign_optimized:
5465 The caller to this function uses the phi-result (msq) to create the
5466 realignment code inside the loop, and sets up the missing phi argument,
5467 as follows:
5468 loop:
5469 msq = phi (msq_init, lsq)
5470 lsq = *(floor(p')); # load in loop
5471 result = realign_load (msq, lsq, realignment_token);
5473 For the case of dr_explicit_realign:
5474 loop:
5475 msq = *(floor(p)); # load in loop
5476 p' = p + (VS-1);
5477 lsq = *(floor(p')); # load in loop
5478 result = realign_load (msq, lsq, realignment_token);
5480 Input:
5481 STMT - (scalar) load stmt to be vectorized. This load accesses
5482 a memory location that may be unaligned.
5483 BSI - place where new code is to be inserted.
5484 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
5485 is used.
5487 Output:
5488 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
5489 target hook, if defined.
5490 Return value - the result of the loop-header phi node. */
5492 static tree
5493 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
5494 tree *realignment_token,
5495 enum dr_alignment_support alignment_support_scheme,
5496 tree init_addr,
5497 struct loop **at_loop)
5499 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5500 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5501 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5502 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5503 edge pe;
5504 tree scalar_dest = gimple_assign_lhs (stmt);
5505 tree vec_dest;
5506 gimple inc;
5507 tree ptr;
5508 tree data_ref;
5509 gimple new_stmt;
5510 basic_block new_bb;
5511 tree msq_init = NULL_TREE;
5512 tree new_temp;
5513 gimple phi_stmt;
5514 tree msq = NULL_TREE;
5515 gimple_seq stmts = NULL;
5516 bool inv_p;
5517 bool compute_in_loop = false;
5518 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5519 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5520 struct loop *loop_for_initial_load;
5522 gcc_assert (alignment_support_scheme == dr_explicit_realign
5523 || alignment_support_scheme == dr_explicit_realign_optimized);
5525 /* We need to generate three things:
5526 1. the misalignment computation
5527 2. the extra vector load (for the optimized realignment scheme).
5528 3. the phi node for the two vectors from which the realignment is
5529 done (for the optimized realignment scheme).
5532 /* 1. Determine where to generate the misalignment computation.
5534 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
5535 calculation will be generated by this function, outside the loop (in the
5536 preheader). Otherwise, INIT_ADDR had already been computed for us by the
5537 caller, inside the loop.
5539 Background: If the misalignment remains fixed throughout the iterations of
5540 the loop, then both realignment schemes are applicable, and also the
5541 misalignment computation can be done outside LOOP. This is because we are
5542 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
5543 are a multiple of VS (the Vector Size), and therefore the misalignment in
5544 different vectorized LOOP iterations is always the same.
5545 The problem arises only if the memory access is in an inner-loop nested
5546 inside LOOP, which is now being vectorized using outer-loop vectorization.
5547 This is the only case when the misalignment of the memory access may not
5548 remain fixed throughout the iterations of the inner-loop (as explained in
5549 detail in vect_supportable_dr_alignment). In this case, not only is the
5550 optimized realignment scheme not applicable, but also the misalignment
5551 computation (and generation of the realignment token that is passed to
5552 REALIGN_LOAD) have to be done inside the loop.
5554 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
5555 or not, which in turn determines if the misalignment is computed inside
5556 the inner-loop, or outside LOOP. */
5558 if (init_addr != NULL_TREE)
5560 compute_in_loop = true;
5561 gcc_assert (alignment_support_scheme == dr_explicit_realign);
5565 /* 2. Determine where to generate the extra vector load.
5567 For the optimized realignment scheme, instead of generating two vector
5568 loads in each iteration, we generate a single extra vector load in the
5569 preheader of the loop, and in each iteration reuse the result of the
5570 vector load from the previous iteration. In case the memory access is in
5571 an inner-loop nested inside LOOP, which is now being vectorized using
5572 outer-loop vectorization, we need to determine whether this initial vector
5573 load should be generated at the preheader of the inner-loop, or can be
5574 generated at the preheader of LOOP. If the memory access has no evolution
5575 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
5576 to be generated inside LOOP (in the preheader of the inner-loop). */
5578 if (nested_in_vect_loop)
5580 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
5581 bool invariant_in_outerloop =
5582 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
5583 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
5585 else
5586 loop_for_initial_load = loop;
5587 if (at_loop)
5588 *at_loop = loop_for_initial_load;
5590 /* 3. For the case of the optimized realignment, create the first vector
5591 load at the loop preheader. */
5593 if (alignment_support_scheme == dr_explicit_realign_optimized)
5595 /* Create msq_init = *(floor(p1)) in the loop preheader */
5597 gcc_assert (!compute_in_loop);
5598 pe = loop_preheader_edge (loop_for_initial_load);
5599 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5600 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
5601 &init_addr, &inc, true, &inv_p);
5602 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
5603 new_stmt = gimple_build_assign (vec_dest, data_ref);
5604 new_temp = make_ssa_name (vec_dest, new_stmt);
5605 gimple_assign_set_lhs (new_stmt, new_temp);
5606 mark_symbols_for_renaming (new_stmt);
5607 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5608 gcc_assert (!new_bb);
5609 msq_init = gimple_assign_lhs (new_stmt);
5612 /* 4. Create realignment token using a target builtin, if available.
5613 It is done either inside the containing loop, or before LOOP (as
5614 determined above). */
5616 if (targetm.vectorize.builtin_mask_for_load)
5618 tree builtin_decl;
5620 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
5621 if (compute_in_loop)
5622 gcc_assert (init_addr); /* already computed by the caller. */
5623 else
5625 /* Generate the INIT_ADDR computation outside LOOP. */
5626 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
5627 NULL_TREE, loop);
5628 pe = loop_preheader_edge (loop);
5629 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
5630 gcc_assert (!new_bb);
5633 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
5634 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
5635 vec_dest =
5636 vect_create_destination_var (scalar_dest,
5637 gimple_call_return_type (new_stmt));
5638 new_temp = make_ssa_name (vec_dest, new_stmt);
5639 gimple_call_set_lhs (new_stmt, new_temp);
5641 if (compute_in_loop)
5642 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
5643 else
5645 /* Generate the misalignment computation outside LOOP. */
5646 pe = loop_preheader_edge (loop);
5647 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5648 gcc_assert (!new_bb);
5651 *realignment_token = gimple_call_lhs (new_stmt);
5653 /* The result of the CALL_EXPR to this builtin is determined from
5654 the value of the parameter and no global variables are touched
5655 which makes the builtin a "const" function. Requiring the
5656 builtin to have the "const" attribute makes it unnecessary
5657 to call mark_call_clobbered. */
5658 gcc_assert (TREE_READONLY (builtin_decl));
5661 if (alignment_support_scheme == dr_explicit_realign)
5662 return msq;
5664 gcc_assert (!compute_in_loop);
5665 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
5668 /* 5. Create msq = phi <msq_init, lsq> in loop */
5670 pe = loop_preheader_edge (containing_loop);
5671 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5672 msq = make_ssa_name (vec_dest, NULL);
5673 phi_stmt = create_phi_node (msq, containing_loop->header);
5674 SSA_NAME_DEF_STMT (msq) = phi_stmt;
5675 add_phi_arg (phi_stmt, msq_init, pe);
5677 return msq;
5681 /* Function vect_strided_load_supported.
5683 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
5684 and FALSE otherwise. */
5686 static bool
5687 vect_strided_load_supported (tree vectype)
5689 optab perm_even_optab, perm_odd_optab;
5690 int mode;
5692 mode = (int) TYPE_MODE (vectype);
5694 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
5695 optab_default);
5696 if (!perm_even_optab)
5698 if (vect_print_dump_info (REPORT_DETAILS))
5699 fprintf (vect_dump, "no optab for perm_even.");
5700 return false;
5703 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
5705 if (vect_print_dump_info (REPORT_DETAILS))
5706 fprintf (vect_dump, "perm_even op not supported by target.");
5707 return false;
5710 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
5711 optab_default);
5712 if (!perm_odd_optab)
5714 if (vect_print_dump_info (REPORT_DETAILS))
5715 fprintf (vect_dump, "no optab for perm_odd.");
5716 return false;
5719 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
5721 if (vect_print_dump_info (REPORT_DETAILS))
5722 fprintf (vect_dump, "perm_odd op not supported by target.");
5723 return false;
5725 return true;
5729 /* Function vect_permute_load_chain.
5731 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
5732 a power of 2, generate extract_even/odd stmts to reorder the input data
5733 correctly. Return the final references for loads in RESULT_CHAIN.
5735 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
5736 The input is 4 vectors each containing 8 elements. We assign a number to each
5737 element, the input sequence is:
5739 1st vec: 0 1 2 3 4 5 6 7
5740 2nd vec: 8 9 10 11 12 13 14 15
5741 3rd vec: 16 17 18 19 20 21 22 23
5742 4th vec: 24 25 26 27 28 29 30 31
5744 The output sequence should be:
5746 1st vec: 0 4 8 12 16 20 24 28
5747 2nd vec: 1 5 9 13 17 21 25 29
5748 3rd vec: 2 6 10 14 18 22 26 30
5749 4th vec: 3 7 11 15 19 23 27 31
5751 i.e., the first output vector should contain the first elements of each
5752 interleaving group, etc.
5754 We use extract_even/odd instructions to create such output. The input of each
5755 extract_even/odd operation is two vectors
5756 1st vec 2nd vec
5757 0 1 2 3 4 5 6 7
5759 and the output is the vector of extracted even/odd elements. The output of
5760 extract_even will be: 0 2 4 6
5761 and of extract_odd: 1 3 5 7
5764 The permutation is done in log LENGTH stages. In each stage extract_even and
5765 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
5766 order. In our example,
5768 E1: extract_even (1st vec, 2nd vec)
5769 E2: extract_odd (1st vec, 2nd vec)
5770 E3: extract_even (3rd vec, 4th vec)
5771 E4: extract_odd (3rd vec, 4th vec)
5773 The output for the first stage will be:
5775 E1: 0 2 4 6 8 10 12 14
5776 E2: 1 3 5 7 9 11 13 15
5777 E3: 16 18 20 22 24 26 28 30
5778 E4: 17 19 21 23 25 27 29 31
5780 In order to proceed and create the correct sequence for the next stage (or
5781 for the correct output, if the second stage is the last one, as in our
5782 example), we first put the output of extract_even operation and then the
5783 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
5784 The input for the second stage is:
5786 1st vec (E1): 0 2 4 6 8 10 12 14
5787 2nd vec (E3): 16 18 20 22 24 26 28 30
5788 3rd vec (E2): 1 3 5 7 9 11 13 15
5789 4th vec (E4): 17 19 21 23 25 27 29 31
5791 The output of the second stage:
5793 E1: 0 4 8 12 16 20 24 28
5794 E2: 2 6 10 14 18 22 26 30
5795 E3: 1 5 9 13 17 21 25 29
5796 E4: 3 7 11 15 19 23 27 31
5798 And RESULT_CHAIN after reordering:
5800 1st vec (E1): 0 4 8 12 16 20 24 28
5801 2nd vec (E3): 1 5 9 13 17 21 25 29
5802 3rd vec (E2): 2 6 10 14 18 22 26 30
5803 4th vec (E4): 3 7 11 15 19 23 27 31. */
5805 static bool
5806 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
5807 unsigned int length,
5808 gimple stmt,
5809 gimple_stmt_iterator *gsi,
5810 VEC(tree,heap) **result_chain)
5812 tree perm_dest, data_ref, first_vect, second_vect;
5813 gimple perm_stmt;
5814 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
5815 int i;
5816 unsigned int j;
5818 /* Check that the operation is supported. */
5819 if (!vect_strided_load_supported (vectype))
5820 return false;
5822 *result_chain = VEC_copy (tree, heap, dr_chain);
5823 for (i = 0; i < exact_log2 (length); i++)
5825 for (j = 0; j < length; j +=2)
5827 first_vect = VEC_index (tree, dr_chain, j);
5828 second_vect = VEC_index (tree, dr_chain, j+1);
5830 /* data_ref = permute_even (first_data_ref, second_data_ref); */
5831 perm_dest = create_tmp_var (vectype, "vect_perm_even");
5832 DECL_GIMPLE_REG_P (perm_dest) = 1;
5833 add_referenced_var (perm_dest);
5835 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
5836 perm_dest, first_vect,
5837 second_vect);
5839 data_ref = make_ssa_name (perm_dest, perm_stmt);
5840 gimple_assign_set_lhs (perm_stmt, data_ref);
5841 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5842 mark_symbols_for_renaming (perm_stmt);
5844 VEC_replace (tree, *result_chain, j/2, data_ref);
5846 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
5847 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
5848 DECL_GIMPLE_REG_P (perm_dest) = 1;
5849 add_referenced_var (perm_dest);
5851 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
5852 perm_dest, first_vect,
5853 second_vect);
5854 data_ref = make_ssa_name (perm_dest, perm_stmt);
5855 gimple_assign_set_lhs (perm_stmt, data_ref);
5856 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5857 mark_symbols_for_renaming (perm_stmt);
5859 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
5861 dr_chain = VEC_copy (tree, heap, *result_chain);
5863 return true;
5867 /* Function vect_transform_strided_load.
5869 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
5870 to perform their permutation and ascribe the result vectorized statements to
5871 the scalar statements.
5874 static bool
5875 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
5876 gimple_stmt_iterator *gsi)
5878 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5879 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
5880 gimple next_stmt, new_stmt;
5881 VEC(tree,heap) *result_chain = NULL;
5882 unsigned int i, gap_count;
5883 tree tmp_data_ref;
5885 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
5886 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
5887 vectors, that are ready for vector computation. */
5888 result_chain = VEC_alloc (tree, heap, size);
5889 /* Permute. */
5890 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
5891 return false;
5893 /* Put a permuted data-ref in the VECTORIZED_STMT field.
5894 Since we scan the chain starting from it's first node, their order
5895 corresponds the order of data-refs in RESULT_CHAIN. */
5896 next_stmt = first_stmt;
5897 gap_count = 1;
5898 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
5900 if (!next_stmt)
5901 break;
5903 /* Skip the gaps. Loads created for the gaps will be removed by dead
5904 code elimination pass later. No need to check for the first stmt in
5905 the group, since it always exists.
5906 DR_GROUP_GAP is the number of steps in elements from the previous
5907 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
5908 correspond to the gaps.
5910 if (next_stmt != first_stmt
5911 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
5913 gap_count++;
5914 continue;
5917 while (next_stmt)
5919 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
5920 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
5921 copies, and we put the new vector statement in the first available
5922 RELATED_STMT. */
5923 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
5924 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
5925 else
5927 gimple prev_stmt =
5928 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
5929 gimple rel_stmt =
5930 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
5931 while (rel_stmt)
5933 prev_stmt = rel_stmt;
5934 rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
5936 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
5938 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
5939 gap_count = 1;
5940 /* If NEXT_STMT accesses the same DR as the previous statement,
5941 put the same TMP_DATA_REF as its vectorized statement; otherwise
5942 get the next data-ref from RESULT_CHAIN. */
5943 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
5944 break;
5948 VEC_free (tree, heap, result_chain);
5949 return true;
5953 /* vectorizable_load.
5955 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5956 can be vectorized.
5957 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5958 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5959 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5961 bool
5962 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5963 slp_tree slp_node)
5965 tree scalar_dest;
5966 tree vec_dest = NULL;
5967 tree data_ref = NULL;
5968 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5969 stmt_vec_info prev_stmt_info;
5970 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5971 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5972 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5973 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5974 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
5975 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5976 tree new_temp;
5977 int mode;
5978 gimple new_stmt = NULL;
5979 tree dummy;
5980 enum dr_alignment_support alignment_support_scheme;
5981 tree dataref_ptr = NULL_TREE;
5982 gimple ptr_incr;
5983 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5984 int ncopies;
5985 int i, j, group_size;
5986 tree msq = NULL_TREE, lsq;
5987 tree offset = NULL_TREE;
5988 tree realignment_token = NULL_TREE;
5989 gimple phi = NULL;
5990 VEC(tree,heap) *dr_chain = NULL;
5991 bool strided_load = false;
5992 gimple first_stmt;
5993 tree scalar_type;
5994 bool inv_p;
5995 bool compute_in_loop = false;
5996 struct loop *at_loop;
5997 int vec_num;
5998 bool slp = (slp_node != NULL);
5999 enum tree_code code;
6001 /* Multiple types in SLP are handled by creating the appropriate number of
6002 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6003 case of SLP. */
6004 if (slp)
6005 ncopies = 1;
6006 else
6007 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6009 gcc_assert (ncopies >= 1);
6011 /* FORNOW. This restriction should be relaxed. */
6012 if (nested_in_vect_loop && ncopies > 1)
6014 if (vect_print_dump_info (REPORT_DETAILS))
6015 fprintf (vect_dump, "multiple types in nested loop.");
6016 return false;
6019 if (!STMT_VINFO_RELEVANT_P (stmt_info))
6020 return false;
6022 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
6023 return false;
6025 /* Is vectorizable load? */
6026 if (!is_gimple_assign (stmt))
6027 return false;
6029 scalar_dest = gimple_assign_lhs (stmt);
6030 if (TREE_CODE (scalar_dest) != SSA_NAME)
6031 return false;
6033 code = gimple_assign_rhs_code (stmt);
6034 if (code != ARRAY_REF
6035 && code != INDIRECT_REF
6036 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
6037 return false;
6039 if (!STMT_VINFO_DATA_REF (stmt_info))
6040 return false;
6042 scalar_type = TREE_TYPE (DR_REF (dr));
6043 mode = (int) TYPE_MODE (vectype);
6045 /* FORNOW. In some cases can vectorize even if data-type not supported
6046 (e.g. - data copies). */
6047 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
6049 if (vect_print_dump_info (REPORT_DETAILS))
6050 fprintf (vect_dump, "Aligned load, but unsupported type.");
6051 return false;
6054 /* If accesses through a pointer to vectype do not alias the original
6055 memory reference we have a problem. */
6056 if (get_alias_set (vectype) != get_alias_set (scalar_type)
6057 && !alias_set_subset_of (get_alias_set (vectype),
6058 get_alias_set (scalar_type)))
6060 if (vect_print_dump_info (REPORT_DETAILS))
6061 fprintf (vect_dump, "vector type does not alias scalar type");
6062 return false;
6065 /* Check if the load is a part of an interleaving chain. */
6066 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
6068 strided_load = true;
6069 /* FORNOW */
6070 gcc_assert (! nested_in_vect_loop);
6072 /* Check if interleaving is supported. */
6073 if (!vect_strided_load_supported (vectype)
6074 && !PURE_SLP_STMT (stmt_info) && !slp)
6075 return false;
6078 if (!vec_stmt) /* transformation not required. */
6080 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6081 vect_model_load_cost (stmt_info, ncopies, NULL);
6082 return true;
6085 if (vect_print_dump_info (REPORT_DETAILS))
6086 fprintf (vect_dump, "transform load.");
6088 /** Transform. **/
6090 if (strided_load)
6092 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
6093 /* Check if the chain of loads is already vectorized. */
6094 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
6096 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6097 return true;
6099 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6100 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
6102 /* VEC_NUM is the number of vect stmts to be created for this group. */
6103 if (slp)
6105 strided_load = false;
6106 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6108 else
6109 vec_num = group_size;
6111 dr_chain = VEC_alloc (tree, heap, vec_num);
6113 else
6115 first_stmt = stmt;
6116 first_dr = dr;
6117 group_size = vec_num = 1;
6120 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
6121 gcc_assert (alignment_support_scheme);
6123 /* In case the vectorization factor (VF) is bigger than the number
6124 of elements that we can fit in a vectype (nunits), we have to generate
6125 more than one vector stmt - i.e - we need to "unroll" the
6126 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6127 from one copy of the vector stmt to the next, in the field
6128 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6129 stages to find the correct vector defs to be used when vectorizing
6130 stmts that use the defs of the current stmt. The example below illustrates
6131 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
6132 4 vectorized stmts):
6134 before vectorization:
6135 RELATED_STMT VEC_STMT
6136 S1: x = memref - -
6137 S2: z = x + 1 - -
6139 step 1: vectorize stmt S1:
6140 We first create the vector stmt VS1_0, and, as usual, record a
6141 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6142 Next, we create the vector stmt VS1_1, and record a pointer to
6143 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6144 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6145 stmts and pointers:
6146 RELATED_STMT VEC_STMT
6147 VS1_0: vx0 = memref0 VS1_1 -
6148 VS1_1: vx1 = memref1 VS1_2 -
6149 VS1_2: vx2 = memref2 VS1_3 -
6150 VS1_3: vx3 = memref3 - -
6151 S1: x = load - VS1_0
6152 S2: z = x + 1 - -
6154 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6155 information we recorded in RELATED_STMT field is used to vectorize
6156 stmt S2. */
6158 /* In case of interleaving (non-unit strided access):
6160 S1: x2 = &base + 2
6161 S2: x0 = &base
6162 S3: x1 = &base + 1
6163 S4: x3 = &base + 3
6165 Vectorized loads are created in the order of memory accesses
6166 starting from the access of the first stmt of the chain:
6168 VS1: vx0 = &base
6169 VS2: vx1 = &base + vec_size*1
6170 VS3: vx3 = &base + vec_size*2
6171 VS4: vx4 = &base + vec_size*3
6173 Then permutation statements are generated:
6175 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
6176 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
6179 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6180 (the order of the data-refs in the output of vect_permute_load_chain
6181 corresponds to the order of scalar stmts in the interleaving chain - see
6182 the documentation of vect_permute_load_chain()).
6183 The generation of permutation stmts and recording them in
6184 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
6186 In case of both multiple types and interleaving, the vector loads and
6187 permutation stmts above are created for every copy. The result vector stmts
6188 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6189 STMT_VINFO_RELATED_STMT for the next copies. */
6191 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6192 on a target that supports unaligned accesses (dr_unaligned_supported)
6193 we generate the following code:
6194 p = initial_addr;
6195 indx = 0;
6196 loop {
6197 p = p + indx * vectype_size;
6198 vec_dest = *(p);
6199 indx = indx + 1;
6202 Otherwise, the data reference is potentially unaligned on a target that
6203 does not support unaligned accesses (dr_explicit_realign_optimized) -
6204 then generate the following code, in which the data in each iteration is
6205 obtained by two vector loads, one from the previous iteration, and one
6206 from the current iteration:
6207 p1 = initial_addr;
6208 msq_init = *(floor(p1))
6209 p2 = initial_addr + VS - 1;
6210 realignment_token = call target_builtin;
6211 indx = 0;
6212 loop {
6213 p2 = p2 + indx * vectype_size
6214 lsq = *(floor(p2))
6215 vec_dest = realign_load (msq, lsq, realignment_token)
6216 indx = indx + 1;
6217 msq = lsq;
6218 } */
6220 /* If the misalignment remains the same throughout the execution of the
6221 loop, we can create the init_addr and permutation mask at the loop
6222 preheader. Otherwise, it needs to be created inside the loop.
6223 This can only occur when vectorizing memory accesses in the inner-loop
6224 nested within an outer-loop that is being vectorized. */
6226 if (nested_in_vect_loop_p (loop, stmt)
6227 && (TREE_INT_CST_LOW (DR_STEP (dr))
6228 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6230 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6231 compute_in_loop = true;
6234 if ((alignment_support_scheme == dr_explicit_realign_optimized
6235 || alignment_support_scheme == dr_explicit_realign)
6236 && !compute_in_loop)
6238 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6239 alignment_support_scheme, NULL_TREE,
6240 &at_loop);
6241 if (alignment_support_scheme == dr_explicit_realign_optimized)
6243 phi = SSA_NAME_DEF_STMT (msq);
6244 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6247 else
6248 at_loop = loop;
6250 prev_stmt_info = NULL;
6251 for (j = 0; j < ncopies; j++)
6253 /* 1. Create the vector pointer update chain. */
6254 if (j == 0)
6255 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
6256 at_loop, offset,
6257 &dummy, &ptr_incr, false,
6258 &inv_p);
6259 else
6260 dataref_ptr =
6261 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
6263 for (i = 0; i < vec_num; i++)
6265 if (i > 0)
6266 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6267 NULL_TREE);
6269 /* 2. Create the vector-load in the loop. */
6270 switch (alignment_support_scheme)
6272 case dr_aligned:
6273 gcc_assert (aligned_access_p (first_dr));
6274 data_ref = build_fold_indirect_ref (dataref_ptr);
6275 break;
6276 case dr_unaligned_supported:
6278 int mis = DR_MISALIGNMENT (first_dr);
6279 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
6281 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
6282 data_ref =
6283 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
6284 break;
6286 case dr_explicit_realign:
6288 tree ptr, bump;
6289 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6291 if (compute_in_loop)
6292 msq = vect_setup_realignment (first_stmt, gsi,
6293 &realignment_token,
6294 dr_explicit_realign,
6295 dataref_ptr, NULL);
6297 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
6298 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6299 new_stmt = gimple_build_assign (vec_dest, data_ref);
6300 new_temp = make_ssa_name (vec_dest, new_stmt);
6301 gimple_assign_set_lhs (new_stmt, new_temp);
6302 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6303 copy_virtual_operands (new_stmt, stmt);
6304 mark_symbols_for_renaming (new_stmt);
6305 msq = new_temp;
6307 bump = size_binop (MULT_EXPR, vs_minus_1,
6308 TYPE_SIZE_UNIT (scalar_type));
6309 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6310 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
6311 break;
6313 case dr_explicit_realign_optimized:
6314 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
6315 break;
6316 default:
6317 gcc_unreachable ();
6319 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6320 new_stmt = gimple_build_assign (vec_dest, data_ref);
6321 new_temp = make_ssa_name (vec_dest, new_stmt);
6322 gimple_assign_set_lhs (new_stmt, new_temp);
6323 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6324 mark_symbols_for_renaming (new_stmt);
6326 /* 3. Handle explicit realignment if necessary/supported. Create in
6327 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
6328 if (alignment_support_scheme == dr_explicit_realign_optimized
6329 || alignment_support_scheme == dr_explicit_realign)
6331 tree tmp;
6333 lsq = gimple_assign_lhs (new_stmt);
6334 if (!realignment_token)
6335 realignment_token = dataref_ptr;
6336 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6337 tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
6338 realignment_token);
6339 new_stmt = gimple_build_assign (vec_dest, tmp);
6340 new_temp = make_ssa_name (vec_dest, new_stmt);
6341 gimple_assign_set_lhs (new_stmt, new_temp);
6342 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6344 if (alignment_support_scheme == dr_explicit_realign_optimized)
6346 gcc_assert (phi);
6347 if (i == vec_num - 1 && j == ncopies - 1)
6348 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop));
6349 msq = lsq;
6353 /* 4. Handle invariant-load. */
6354 if (inv_p)
6356 gcc_assert (!strided_load);
6357 gcc_assert (nested_in_vect_loop_p (loop, stmt));
6358 if (j == 0)
6360 int k;
6361 tree t = NULL_TREE;
6362 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
6364 /* CHECKME: bitpos depends on endianess? */
6365 bitpos = bitsize_zero_node;
6366 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
6367 bitsize, bitpos);
6368 vec_dest =
6369 vect_create_destination_var (scalar_dest, NULL_TREE);
6370 new_stmt = gimple_build_assign (vec_dest, vec_inv);
6371 new_temp = make_ssa_name (vec_dest, new_stmt);
6372 gimple_assign_set_lhs (new_stmt, new_temp);
6373 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6375 for (k = nunits - 1; k >= 0; --k)
6376 t = tree_cons (NULL_TREE, new_temp, t);
6377 /* FIXME: use build_constructor directly. */
6378 vec_inv = build_constructor_from_list (vectype, t);
6379 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6380 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6382 else
6383 gcc_unreachable (); /* FORNOW. */
6386 /* Collect vector loads and later create their permutation in
6387 vect_transform_strided_load (). */
6388 if (strided_load)
6389 VEC_quick_push (tree, dr_chain, new_temp);
6391 /* Store vector loads in the corresponding SLP_NODE. */
6392 if (slp)
6393 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
6396 if (slp)
6397 continue;
6399 if (strided_load)
6401 if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
6402 return false;
6403 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6404 VEC_free (tree, heap, dr_chain);
6405 dr_chain = VEC_alloc (tree, heap, group_size);
6407 else
6409 if (j == 0)
6410 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6411 else
6412 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6413 prev_stmt_info = vinfo_for_stmt (new_stmt);
6417 if (dr_chain)
6418 VEC_free (tree, heap, dr_chain);
6420 return true;
6424 /* Function vectorizable_live_operation.
6426 STMT computes a value that is used outside the loop. Check if
6427 it can be supported. */
6429 bool
6430 vectorizable_live_operation (gimple stmt,
6431 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6432 gimple *vec_stmt ATTRIBUTE_UNUSED)
6434 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6435 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6436 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6437 int i;
6438 int op_type;
6439 tree op;
6440 tree def;
6441 gimple def_stmt;
6442 enum vect_def_type dt;
6443 enum tree_code code;
6444 enum gimple_rhs_class rhs_class;
6446 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
6448 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
6449 return false;
6451 if (!is_gimple_assign (stmt))
6452 return false;
6454 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6455 return false;
6457 /* FORNOW. CHECKME. */
6458 if (nested_in_vect_loop_p (loop, stmt))
6459 return false;
6461 code = gimple_assign_rhs_code (stmt);
6462 op_type = TREE_CODE_LENGTH (code);
6463 rhs_class = get_gimple_rhs_class (code);
6464 gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
6465 gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
6467 /* FORNOW: support only if all uses are invariant. This means
6468 that the scalar operations can remain in place, unvectorized.
6469 The original last scalar value that they compute will be used. */
6471 for (i = 0; i < op_type; i++)
6473 if (rhs_class == GIMPLE_SINGLE_RHS)
6474 op = TREE_OPERAND (gimple_op (stmt, 1), i);
6475 else
6476 op = gimple_op (stmt, i + 1);
6477 if (op && !vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
6479 if (vect_print_dump_info (REPORT_DETAILS))
6480 fprintf (vect_dump, "use not simple.");
6481 return false;
6484 if (dt != vect_invariant_def && dt != vect_constant_def)
6485 return false;
6488 /* No transformation is required for the cases we currently support. */
6489 return true;
6493 /* Function vect_is_simple_cond.
6495 Input:
6496 LOOP - the loop that is being vectorized.
6497 COND - Condition that is checked for simple use.
6499 Returns whether a COND can be vectorized. Checks whether
6500 condition operands are supportable using vec_is_simple_use. */
6502 static bool
6503 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
6505 tree lhs, rhs;
6506 tree def;
6507 enum vect_def_type dt;
6509 if (!COMPARISON_CLASS_P (cond))
6510 return false;
6512 lhs = TREE_OPERAND (cond, 0);
6513 rhs = TREE_OPERAND (cond, 1);
6515 if (TREE_CODE (lhs) == SSA_NAME)
6517 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6518 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
6519 return false;
6521 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6522 && TREE_CODE (lhs) != FIXED_CST)
6523 return false;
6525 if (TREE_CODE (rhs) == SSA_NAME)
6527 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6528 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
6529 return false;
6531 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6532 && TREE_CODE (rhs) != FIXED_CST)
6533 return false;
6535 return true;
6538 /* vectorizable_condition.
6540 Check if STMT is conditional modify expression that can be vectorized.
6541 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6542 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6543 at BSI.
6545 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6547 bool
6548 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6549 gimple *vec_stmt)
6551 tree scalar_dest = NULL_TREE;
6552 tree vec_dest = NULL_TREE;
6553 tree op = NULL_TREE;
6554 tree cond_expr, then_clause, else_clause;
6555 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6556 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6557 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
6558 tree vec_compare, vec_cond_expr;
6559 tree new_temp;
6560 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6561 enum machine_mode vec_mode;
6562 tree def;
6563 enum vect_def_type dt;
6564 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6565 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6566 enum tree_code code;
6568 gcc_assert (ncopies >= 1);
6569 if (ncopies > 1)
6570 return false; /* FORNOW */
6572 if (!STMT_VINFO_RELEVANT_P (stmt_info))
6573 return false;
6575 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
6576 return false;
6578 /* FORNOW: SLP not supported. */
6579 if (STMT_SLP_TYPE (stmt_info))
6580 return false;
6582 /* FORNOW: not yet supported. */
6583 if (STMT_VINFO_LIVE_P (stmt_info))
6585 if (vect_print_dump_info (REPORT_DETAILS))
6586 fprintf (vect_dump, "value used after loop.");
6587 return false;
6590 /* Is vectorizable conditional operation? */
6591 if (!is_gimple_assign (stmt))
6592 return false;
6594 code = gimple_assign_rhs_code (stmt);
6596 if (code != COND_EXPR)
6597 return false;
6599 gcc_assert (gimple_assign_single_p (stmt));
6600 op = gimple_assign_rhs1 (stmt);
6601 cond_expr = TREE_OPERAND (op, 0);
6602 then_clause = TREE_OPERAND (op, 1);
6603 else_clause = TREE_OPERAND (op, 2);
6605 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
6606 return false;
6608 /* We do not handle two different vector types for the condition
6609 and the values. */
6610 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
6611 return false;
6613 if (TREE_CODE (then_clause) == SSA_NAME)
6615 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6616 if (!vect_is_simple_use (then_clause, loop_vinfo,
6617 &then_def_stmt, &def, &dt))
6618 return false;
6620 else if (TREE_CODE (then_clause) != INTEGER_CST
6621 && TREE_CODE (then_clause) != REAL_CST
6622 && TREE_CODE (then_clause) != FIXED_CST)
6623 return false;
6625 if (TREE_CODE (else_clause) == SSA_NAME)
6627 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6628 if (!vect_is_simple_use (else_clause, loop_vinfo,
6629 &else_def_stmt, &def, &dt))
6630 return false;
6632 else if (TREE_CODE (else_clause) != INTEGER_CST
6633 && TREE_CODE (else_clause) != REAL_CST
6634 && TREE_CODE (else_clause) != FIXED_CST)
6635 return false;
6638 vec_mode = TYPE_MODE (vectype);
6640 if (!vec_stmt)
6642 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6643 return expand_vec_cond_expr_p (op, vec_mode);
6646 /* Transform */
6648 /* Handle def. */
6649 scalar_dest = gimple_assign_lhs (stmt);
6650 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6652 /* Handle cond expr. */
6653 vec_cond_lhs =
6654 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
6655 vec_cond_rhs =
6656 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
6657 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
6658 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
6660 /* Arguments are ready. Create the new vector stmt. */
6661 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
6662 vec_cond_lhs, vec_cond_rhs);
6663 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6664 vec_compare, vec_then_clause, vec_else_clause);
6666 *vec_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6667 new_temp = make_ssa_name (vec_dest, *vec_stmt);
6668 gimple_assign_set_lhs (*vec_stmt, new_temp);
6669 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
6671 return true;
6675 /* Function vect_transform_stmt.
6677 Create a vectorized stmt to replace STMT, and insert it at BSI. */
6679 static bool
6680 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
6681 bool *strided_store, slp_tree slp_node)
6683 bool is_store = false;
6684 gimple vec_stmt = NULL;
6685 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6686 gimple orig_stmt_in_pattern;
6687 bool done;
6689 switch (STMT_VINFO_TYPE (stmt_info))
6691 case type_demotion_vec_info_type:
6692 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
6693 gcc_assert (done);
6694 break;
6696 case type_promotion_vec_info_type:
6697 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
6698 gcc_assert (done);
6699 break;
6701 case type_conversion_vec_info_type:
6702 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
6703 gcc_assert (done);
6704 break;
6706 case induc_vec_info_type:
6707 gcc_assert (!slp_node);
6708 done = vectorizable_induction (stmt, gsi, &vec_stmt);
6709 gcc_assert (done);
6710 break;
6712 case op_vec_info_type:
6713 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
6714 gcc_assert (done);
6715 break;
6717 case assignment_vec_info_type:
6718 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
6719 gcc_assert (done);
6720 break;
6722 case load_vec_info_type:
6723 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node);
6724 gcc_assert (done);
6725 break;
6727 case store_vec_info_type:
6728 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
6729 gcc_assert (done);
6730 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
6732 /* In case of interleaving, the whole chain is vectorized when the
6733 last store in the chain is reached. Store stmts before the last
6734 one are skipped, and there vec_stmt_info shouldn't be freed
6735 meanwhile. */
6736 *strided_store = true;
6737 if (STMT_VINFO_VEC_STMT (stmt_info))
6738 is_store = true;
6740 else
6741 is_store = true;
6742 break;
6744 case condition_vec_info_type:
6745 gcc_assert (!slp_node);
6746 done = vectorizable_condition (stmt, gsi, &vec_stmt);
6747 gcc_assert (done);
6748 break;
6750 case call_vec_info_type:
6751 gcc_assert (!slp_node);
6752 done = vectorizable_call (stmt, gsi, &vec_stmt);
6753 break;
6755 case reduc_vec_info_type:
6756 gcc_assert (!slp_node);
6757 done = vectorizable_reduction (stmt, gsi, &vec_stmt);
6758 gcc_assert (done);
6759 break;
6761 default:
6762 if (!STMT_VINFO_LIVE_P (stmt_info))
6764 if (vect_print_dump_info (REPORT_DETAILS))
6765 fprintf (vect_dump, "stmt not supported.");
6766 gcc_unreachable ();
6770 if (STMT_VINFO_LIVE_P (stmt_info)
6771 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
6773 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
6774 gcc_assert (done);
6777 if (vec_stmt)
6779 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
6780 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
6781 if (orig_stmt_in_pattern)
6783 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
6784 /* STMT was inserted by the vectorizer to replace a computation idiom.
6785 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
6786 computed this idiom. We need to record a pointer to VEC_STMT in
6787 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
6788 documentation of vect_pattern_recog. */
6789 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
6791 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
6792 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
6797 return is_store;
6801 /* This function builds ni_name = number of iterations loop executes
6802 on the loop preheader. */
6804 static tree
6805 vect_build_loop_niters (loop_vec_info loop_vinfo)
6807 tree ni_name, var;
6808 gimple_seq stmts = NULL;
6809 edge pe;
6810 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6811 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
6813 var = create_tmp_var (TREE_TYPE (ni), "niters");
6814 add_referenced_var (var);
6815 ni_name = force_gimple_operand (ni, &stmts, false, var);
6817 pe = loop_preheader_edge (loop);
6818 if (stmts)
6820 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6821 gcc_assert (!new_bb);
6824 return ni_name;
6828 /* This function generates the following statements:
6830 ni_name = number of iterations loop executes
6831 ratio = ni_name / vf
6832 ratio_mult_vf_name = ratio * vf
6834 and places them at the loop preheader edge. */
6836 static void
6837 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
6838 tree *ni_name_ptr,
6839 tree *ratio_mult_vf_name_ptr,
6840 tree *ratio_name_ptr)
6843 edge pe;
6844 basic_block new_bb;
6845 gimple_seq stmts;
6846 tree ni_name;
6847 tree var;
6848 tree ratio_name;
6849 tree ratio_mult_vf_name;
6850 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6851 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
6852 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6853 tree log_vf;
6855 pe = loop_preheader_edge (loop);
6857 /* Generate temporary variable that contains
6858 number of iterations loop executes. */
6860 ni_name = vect_build_loop_niters (loop_vinfo);
6861 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
6863 /* Create: ratio = ni >> log2(vf) */
6865 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
6866 if (!is_gimple_val (ratio_name))
6868 var = create_tmp_var (TREE_TYPE (ni), "bnd");
6869 add_referenced_var (var);
6871 stmts = NULL;
6872 ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
6873 pe = loop_preheader_edge (loop);
6874 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6875 gcc_assert (!new_bb);
6878 /* Create: ratio_mult_vf = ratio << log2 (vf). */
6880 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
6881 ratio_name, log_vf);
6882 if (!is_gimple_val (ratio_mult_vf_name))
6884 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
6885 add_referenced_var (var);
6887 stmts = NULL;
6888 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
6889 true, var);
6890 pe = loop_preheader_edge (loop);
6891 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6892 gcc_assert (!new_bb);
6895 *ni_name_ptr = ni_name;
6896 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
6897 *ratio_name_ptr = ratio_name;
6899 return;
6903 /* Function vect_update_ivs_after_vectorizer.
6905 "Advance" the induction variables of LOOP to the value they should take
6906 after the execution of LOOP. This is currently necessary because the
6907 vectorizer does not handle induction variables that are used after the
6908 loop. Such a situation occurs when the last iterations of LOOP are
6909 peeled, because:
6910 1. We introduced new uses after LOOP for IVs that were not originally used
6911 after LOOP: the IVs of LOOP are now used by an epilog loop.
6912 2. LOOP is going to be vectorized; this means that it will iterate N/VF
6913 times, whereas the loop IVs should be bumped N times.
6915 Input:
6916 - LOOP - a loop that is going to be vectorized. The last few iterations
6917 of LOOP were peeled.
6918 - NITERS - the number of iterations that LOOP executes (before it is
6919 vectorized). i.e, the number of times the ivs should be bumped.
6920 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
6921 coming out from LOOP on which there are uses of the LOOP ivs
6922 (this is the path from LOOP->exit to epilog_loop->preheader).
6924 The new definitions of the ivs are placed in LOOP->exit.
6925 The phi args associated with the edge UPDATE_E in the bb
6926 UPDATE_E->dest are updated accordingly.
6928 Assumption 1: Like the rest of the vectorizer, this function assumes
6929 a single loop exit that has a single predecessor.
6931 Assumption 2: The phi nodes in the LOOP header and in update_bb are
6932 organized in the same order.
6934 Assumption 3: The access function of the ivs is simple enough (see
6935 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
6937 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
6938 coming out of LOOP on which the ivs of LOOP are used (this is the path
6939 that leads to the epilog loop; other paths skip the epilog loop). This
6940 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
6941 needs to have its phis updated.
6944 static void
6945 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
6946 edge update_e)
6948 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6949 basic_block exit_bb = single_exit (loop)->dest;
6950 gimple phi, phi1;
6951 gimple_stmt_iterator gsi, gsi1;
6952 basic_block update_bb = update_e->dest;
6954 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
6956 /* Make sure there exists a single-predecessor exit bb: */
6957 gcc_assert (single_pred_p (exit_bb));
6959 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
6960 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
6961 gsi_next (&gsi), gsi_next (&gsi1))
6963 tree access_fn = NULL;
6964 tree evolution_part;
6965 tree init_expr;
6966 tree step_expr;
6967 tree var, ni, ni_name;
6968 gimple_stmt_iterator last_gsi;
6970 phi = gsi_stmt (gsi);
6971 phi1 = gsi_stmt (gsi1);
6972 if (vect_print_dump_info (REPORT_DETAILS))
6974 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
6975 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
6978 /* Skip virtual phi's. */
6979 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
6981 if (vect_print_dump_info (REPORT_DETAILS))
6982 fprintf (vect_dump, "virtual phi. skip.");
6983 continue;
6986 /* Skip reduction phis. */
6987 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
6989 if (vect_print_dump_info (REPORT_DETAILS))
6990 fprintf (vect_dump, "reduc phi. skip.");
6991 continue;
6994 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
6995 gcc_assert (access_fn);
6996 evolution_part =
6997 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
6998 gcc_assert (evolution_part != NULL_TREE);
7000 /* FORNOW: We do not support IVs whose evolution function is a polynomial
7001 of degree >= 2 or exponential. */
7002 gcc_assert (!tree_is_chrec (evolution_part));
7004 step_expr = evolution_part;
7005 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
7006 loop->num));
7008 if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
7009 ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
7010 init_expr,
7011 fold_convert (sizetype,
7012 fold_build2 (MULT_EXPR, TREE_TYPE (niters),
7013 niters, step_expr)));
7014 else
7015 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
7016 fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
7017 fold_convert (TREE_TYPE (init_expr),
7018 niters),
7019 step_expr),
7020 init_expr);
7024 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
7025 add_referenced_var (var);
7027 last_gsi = gsi_last_bb (exit_bb);
7028 ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
7029 true, GSI_SAME_STMT);
7031 /* Fix phi expressions in the successor bb. */
7032 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
7036 /* Return the more conservative threshold between the
7037 min_profitable_iters returned by the cost model and the user
7038 specified threshold, if provided. */
7040 static unsigned int
7041 conservative_cost_threshold (loop_vec_info loop_vinfo,
7042 int min_profitable_iters)
7044 unsigned int th;
7045 int min_scalar_loop_bound;
7047 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
7048 * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
7050 /* Use the cost model only if it is more conservative than user specified
7051 threshold. */
7052 th = (unsigned) min_scalar_loop_bound;
7053 if (min_profitable_iters
7054 && (!min_scalar_loop_bound
7055 || min_profitable_iters > min_scalar_loop_bound))
7056 th = (unsigned) min_profitable_iters;
7058 if (th && vect_print_dump_info (REPORT_COST))
7059 fprintf (vect_dump, "Vectorization may not be profitable.");
7061 return th;
7064 /* Function vect_do_peeling_for_loop_bound
7066 Peel the last iterations of the loop represented by LOOP_VINFO.
7067 The peeled iterations form a new epilog loop. Given that the loop now
7068 iterates NITERS times, the new epilog loop iterates
7069 NITERS % VECTORIZATION_FACTOR times.
7071 The original loop will later be made to iterate
7072 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
7074 static void
7075 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
7077 tree ni_name, ratio_mult_vf_name;
7078 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7079 struct loop *new_loop;
7080 edge update_e;
7081 basic_block preheader;
7082 int loop_num;
7083 bool check_profitability = false;
7084 unsigned int th = 0;
7085 int min_profitable_iters;
7087 if (vect_print_dump_info (REPORT_DETAILS))
7088 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
7090 initialize_original_copy_tables ();
7092 /* Generate the following variables on the preheader of original loop:
7094 ni_name = number of iteration the original loop executes
7095 ratio = ni_name / vf
7096 ratio_mult_vf_name = ratio * vf */
7097 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
7098 &ratio_mult_vf_name, ratio);
7100 loop_num = loop->num;
7102 /* If cost model check not done during versioning and
7103 peeling for alignment. */
7104 if (!VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
7105 && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))
7106 && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
7108 check_profitability = true;
7110 /* Get profitability threshold for vectorized loop. */
7111 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
7113 th = conservative_cost_threshold (loop_vinfo,
7114 min_profitable_iters);
7117 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
7118 ratio_mult_vf_name, ni_name, false,
7119 th, check_profitability);
7120 gcc_assert (new_loop);
7121 gcc_assert (loop_num == loop->num);
7122 #ifdef ENABLE_CHECKING
7123 slpeel_verify_cfg_after_peeling (loop, new_loop);
7124 #endif
7126 /* A guard that controls whether the new_loop is to be executed or skipped
7127 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
7128 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
7129 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
7130 is on the path where the LOOP IVs are used and need to be updated. */
7132 preheader = loop_preheader_edge (new_loop)->src;
7133 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
7134 update_e = EDGE_PRED (preheader, 0);
7135 else
7136 update_e = EDGE_PRED (preheader, 1);
7138 /* Update IVs of original loop as if they were advanced
7139 by ratio_mult_vf_name steps. */
7140 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
7142 /* After peeling we have to reset scalar evolution analyzer. */
7143 scev_reset ();
7145 free_original_copy_tables ();
7149 /* Function vect_gen_niters_for_prolog_loop
7151 Set the number of iterations for the loop represented by LOOP_VINFO
7152 to the minimum between LOOP_NITERS (the original iteration count of the loop)
7153 and the misalignment of DR - the data reference recorded in
7154 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
7155 this loop, the data reference DR will refer to an aligned location.
7157 The following computation is generated:
7159 If the misalignment of DR is known at compile time:
7160 addr_mis = int mis = DR_MISALIGNMENT (dr);
7161 Else, compute address misalignment in bytes:
7162 addr_mis = addr & (vectype_size - 1)
7164 prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
7166 (elem_size = element type size; an element is the scalar element whose type
7167 is the inner type of the vectype)
7169 When the step of the data-ref in the loop is not 1 (as in interleaved data
7170 and SLP), the number of iterations of the prolog must be divided by the step
7171 (which is equal to the size of interleaved group).
7173 The above formulas assume that VF == number of elements in the vector. This
7174 may not hold when there are multiple-types in the loop.
7175 In this case, for some data-references in the loop the VF does not represent
7176 the number of elements that fit in the vector. Therefore, instead of VF we
7177 use TYPE_VECTOR_SUBPARTS. */
7179 static tree
7180 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
7182 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
7183 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7184 tree var;
7185 gimple_seq stmts;
7186 tree iters, iters_name;
7187 edge pe;
7188 basic_block new_bb;
7189 gimple dr_stmt = DR_STMT (dr);
7190 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
7191 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7192 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
7193 tree niters_type = TREE_TYPE (loop_niters);
7194 int step = 1;
7195 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
7196 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
7198 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
7199 step = DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
7201 pe = loop_preheader_edge (loop);
7203 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
7205 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
7206 int elem_misalign = byte_misalign / element_size;
7208 if (vect_print_dump_info (REPORT_DETAILS))
7209 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
7211 iters = build_int_cst (niters_type,
7212 (((nelements - elem_misalign) & (nelements - 1)) / step));
7214 else
7216 gimple_seq new_stmts = NULL;
7217 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
7218 &new_stmts, NULL_TREE, loop);
7219 tree ptr_type = TREE_TYPE (start_addr);
7220 tree size = TYPE_SIZE (ptr_type);
7221 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
7222 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
7223 tree elem_size_log =
7224 build_int_cst (type, exact_log2 (vectype_align/nelements));
7225 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
7226 tree nelements_tree = build_int_cst (type, nelements);
7227 tree byte_misalign;
7228 tree elem_misalign;
7230 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
7231 gcc_assert (!new_bb);
7233 /* Create: byte_misalign = addr & (vectype_size - 1) */
7234 byte_misalign =
7235 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
7237 /* Create: elem_misalign = byte_misalign / element_size */
7238 elem_misalign =
7239 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
7241 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
7242 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
7243 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
7244 iters = fold_convert (niters_type, iters);
7247 /* Create: prolog_loop_niters = min (iters, loop_niters) */
7248 /* If the loop bound is known at compile time we already verified that it is
7249 greater than vf; since the misalignment ('iters') is at most vf, there's
7250 no need to generate the MIN_EXPR in this case. */
7251 if (TREE_CODE (loop_niters) != INTEGER_CST)
7252 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
7254 if (vect_print_dump_info (REPORT_DETAILS))
7256 fprintf (vect_dump, "niters for prolog loop: ");
7257 print_generic_expr (vect_dump, iters, TDF_SLIM);
7260 var = create_tmp_var (niters_type, "prolog_loop_niters");
7261 add_referenced_var (var);
7262 stmts = NULL;
7263 iters_name = force_gimple_operand (iters, &stmts, false, var);
7265 /* Insert stmt on loop preheader edge. */
7266 if (stmts)
7268 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
7269 gcc_assert (!new_bb);
7272 return iters_name;
7276 /* Function vect_update_init_of_dr
7278 NITERS iterations were peeled from LOOP. DR represents a data reference
7279 in LOOP. This function updates the information recorded in DR to
7280 account for the fact that the first NITERS iterations had already been
7281 executed. Specifically, it updates the OFFSET field of DR. */
7283 static void
7284 vect_update_init_of_dr (struct data_reference *dr, tree niters)
7286 tree offset = DR_OFFSET (dr);
7288 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
7289 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
7290 DR_OFFSET (dr) = offset;
7294 /* Function vect_update_inits_of_drs
7296 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
7297 This function updates the information recorded for the data references in
7298 the loop to account for the fact that the first NITERS iterations had
7299 already been executed. Specifically, it updates the initial_condition of
7300 the access_function of all the data_references in the loop. */
7302 static void
7303 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
7305 unsigned int i;
7306 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
7307 struct data_reference *dr;
7309 if (vect_print_dump_info (REPORT_DETAILS))
7310 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
7312 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
7313 vect_update_init_of_dr (dr, niters);
7317 /* Function vect_do_peeling_for_alignment
7319 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
7320 'niters' is set to the misalignment of one of the data references in the
7321 loop, thereby forcing it to refer to an aligned location at the beginning
7322 of the execution of this loop. The data reference for which we are
7323 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
7325 static void
7326 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
7328 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7329 tree niters_of_prolog_loop, ni_name;
7330 tree n_iters;
7331 struct loop *new_loop;
7332 bool check_profitability = false;
7333 unsigned int th = 0;
7334 int min_profitable_iters;
7336 if (vect_print_dump_info (REPORT_DETAILS))
7337 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
7339 initialize_original_copy_tables ();
7341 ni_name = vect_build_loop_niters (loop_vinfo);
7342 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
7345 /* If cost model check not done during versioning. */
7346 if (!VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
7347 && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
7349 check_profitability = true;
7351 /* Get profitability threshold for vectorized loop. */
7352 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
7354 th = conservative_cost_threshold (loop_vinfo,
7355 min_profitable_iters);
7358 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
7359 new_loop =
7360 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
7361 niters_of_prolog_loop, ni_name, true,
7362 th, check_profitability);
7364 gcc_assert (new_loop);
7365 #ifdef ENABLE_CHECKING
7366 slpeel_verify_cfg_after_peeling (new_loop, loop);
7367 #endif
7369 /* Update number of times loop executes. */
7370 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
7371 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
7372 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
7374 /* Update the init conditions of the access functions of all data refs. */
7375 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
7377 /* After peeling we have to reset scalar evolution analyzer. */
7378 scev_reset ();
7380 free_original_copy_tables ();
7384 /* Function vect_create_cond_for_align_checks.
7386 Create a conditional expression that represents the alignment checks for
7387 all of data references (array element references) whose alignment must be
7388 checked at runtime.
7390 Input:
7391 COND_EXPR - input conditional expression. New conditions will be chained
7392 with logical AND operation.
7393 LOOP_VINFO - two fields of the loop information are used.
7394 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
7395 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
7397 Output:
7398 COND_EXPR_STMT_LIST - statements needed to construct the conditional
7399 expression.
7400 The returned value is the conditional expression to be used in the if
7401 statement that controls which version of the loop gets executed at runtime.
7403 The algorithm makes two assumptions:
7404 1) The number of bytes "n" in a vector is a power of 2.
7405 2) An address "a" is aligned if a%n is zero and that this
7406 test can be done as a&(n-1) == 0. For example, for 16
7407 byte vectors the test is a&0xf == 0. */
7409 static void
7410 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
7411 tree *cond_expr,
7412 gimple_seq *cond_expr_stmt_list)
7414 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7415 VEC(gimple,heap) *may_misalign_stmts
7416 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
7417 gimple ref_stmt;
7418 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
7419 tree mask_cst;
7420 unsigned int i;
7421 tree psize;
7422 tree int_ptrsize_type;
7423 char tmp_name[20];
7424 tree or_tmp_name = NULL_TREE;
7425 tree and_tmp, and_tmp_name;
7426 gimple and_stmt;
7427 tree ptrsize_zero;
7428 tree part_cond_expr;
7430 /* Check that mask is one less than a power of 2, i.e., mask is
7431 all zeros followed by all ones. */
7432 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
7434 /* CHECKME: what is the best integer or unsigned type to use to hold a
7435 cast from a pointer value? */
7436 psize = TYPE_SIZE (ptr_type_node);
7437 int_ptrsize_type
7438 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
7440 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
7441 of the first vector of the i'th data reference. */
7443 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, ref_stmt); i++)
7445 gimple_seq new_stmt_list = NULL;
7446 tree addr_base;
7447 tree addr_tmp, addr_tmp_name;
7448 tree or_tmp, new_or_tmp_name;
7449 gimple addr_stmt, or_stmt;
7451 /* create: addr_tmp = (int)(address_of_first_vector) */
7452 addr_base =
7453 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
7454 NULL_TREE, loop);
7455 if (new_stmt_list != NULL)
7456 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
7458 sprintf (tmp_name, "%s%d", "addr2int", i);
7459 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
7460 add_referenced_var (addr_tmp);
7461 addr_tmp_name = make_ssa_name (addr_tmp, NULL);
7462 addr_stmt = gimple_build_assign (addr_tmp_name, addr_base);
7463 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
7464 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
7466 /* The addresses are OR together. */
7468 if (or_tmp_name != NULL_TREE)
7470 /* create: or_tmp = or_tmp | addr_tmp */
7471 sprintf (tmp_name, "%s%d", "orptrs", i);
7472 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
7473 add_referenced_var (or_tmp);
7474 new_or_tmp_name = make_ssa_name (or_tmp, NULL);
7475 or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
7476 new_or_tmp_name,
7477 or_tmp_name, addr_tmp_name);
7478 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
7479 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
7480 or_tmp_name = new_or_tmp_name;
7482 else
7483 or_tmp_name = addr_tmp_name;
7485 } /* end for i */
7487 mask_cst = build_int_cst (int_ptrsize_type, mask);
7489 /* create: and_tmp = or_tmp & mask */
7490 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
7491 add_referenced_var (and_tmp);
7492 and_tmp_name = make_ssa_name (and_tmp, NULL);
7494 and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
7495 or_tmp_name, mask_cst);
7496 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
7497 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
7499 /* Make and_tmp the left operand of the conditional test against zero.
7500 if and_tmp has a nonzero bit then some address is unaligned. */
7501 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
7502 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
7503 and_tmp_name, ptrsize_zero);
7504 if (*cond_expr)
7505 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
7506 *cond_expr, part_cond_expr);
7507 else
7508 *cond_expr = part_cond_expr;
7511 /* Function vect_vfa_segment_size.
7513 Create an expression that computes the size of segment
7514 that will be accessed for a data reference. The functions takes into
7515 account that realignment loads may access one more vector.
7517 Input:
7518 DR: The data reference.
7519 VECT_FACTOR: vectorization factor.
7521 Return an expression whose value is the size of segment which will be
7522 accessed by DR. */
7524 static tree
7525 vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
7527 tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
7528 DR_STEP (dr), vect_factor);
7530 if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
7532 tree vector_size = TYPE_SIZE_UNIT
7533 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
7535 segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
7536 segment_length, vector_size);
7538 return fold_convert (sizetype, segment_length);
7541 /* Function vect_create_cond_for_alias_checks.
7543 Create a conditional expression that represents the run-time checks for
7544 overlapping of address ranges represented by a list of data references
7545 relations passed as input.
7547 Input:
7548 COND_EXPR - input conditional expression. New conditions will be chained
7549 with logical AND operation.
7550 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
7551 to be checked.
7553 Output:
7554 COND_EXPR - conditional expression.
7555 COND_EXPR_STMT_LIST - statements needed to construct the conditional
7556 expression.
7559 The returned value is the conditional expression to be used in the if
7560 statement that controls which version of the loop gets executed at runtime.
7563 static void
7564 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
7565 tree * cond_expr,
7566 gimple_seq * cond_expr_stmt_list)
7568 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7569 VEC (ddr_p, heap) * may_alias_ddrs =
7570 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
7571 tree vect_factor =
7572 build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
7574 ddr_p ddr;
7575 unsigned int i;
7576 tree part_cond_expr;
7578 /* Create expression
7579 ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
7580 || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
7584 ((store_ptr_n + store_segment_length_n) < load_ptr_n)
7585 || (load_ptr_n + load_segment_length_n) < store_ptr_n)) */
7587 if (VEC_empty (ddr_p, may_alias_ddrs))
7588 return;
7590 for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
7592 struct data_reference *dr_a, *dr_b;
7593 gimple dr_group_first_a, dr_group_first_b;
7594 tree addr_base_a, addr_base_b;
7595 tree segment_length_a, segment_length_b;
7596 gimple stmt_a, stmt_b;
7598 dr_a = DDR_A (ddr);
7599 stmt_a = DR_STMT (DDR_A (ddr));
7600 dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
7601 if (dr_group_first_a)
7603 stmt_a = dr_group_first_a;
7604 dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
7607 dr_b = DDR_B (ddr);
7608 stmt_b = DR_STMT (DDR_B (ddr));
7609 dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
7610 if (dr_group_first_b)
7612 stmt_b = dr_group_first_b;
7613 dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
7616 addr_base_a =
7617 vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
7618 NULL_TREE, loop);
7619 addr_base_b =
7620 vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
7621 NULL_TREE, loop);
7623 segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
7624 segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
7626 if (vect_print_dump_info (REPORT_DR_DETAILS))
7628 fprintf (vect_dump,
7629 "create runtime check for data references ");
7630 print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
7631 fprintf (vect_dump, " and ");
7632 print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
7636 part_cond_expr =
7637 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
7638 fold_build2 (LT_EXPR, boolean_type_node,
7639 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
7640 addr_base_a,
7641 segment_length_a),
7642 addr_base_b),
7643 fold_build2 (LT_EXPR, boolean_type_node,
7644 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
7645 addr_base_b,
7646 segment_length_b),
7647 addr_base_a));
7649 if (*cond_expr)
7650 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
7651 *cond_expr, part_cond_expr);
7652 else
7653 *cond_expr = part_cond_expr;
7655 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
7656 fprintf (vect_dump, "created %u versioning for alias checks.\n",
7657 VEC_length (ddr_p, may_alias_ddrs));
7661 /* Function vect_loop_versioning.
7663 If the loop has data references that may or may not be aligned or/and
7664 has data reference relations whose independence was not proven then
7665 two versions of the loop need to be generated, one which is vectorized
7666 and one which isn't. A test is then generated to control which of the
7667 loops is executed. The test checks for the alignment of all of the
7668 data references that may or may not be aligned. An additional
7669 sequence of runtime tests is generated for each pairs of DDRs whose
7670 independence was not proven. The vectorized version of loop is
7671 executed only if both alias and alignment tests are passed.
7673 The test generated to check which version of loop is executed
7674 is modified to also check for profitability as indicated by the
7675 cost model initially. */
7677 static void
7678 vect_loop_versioning (loop_vec_info loop_vinfo)
7680 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7681 struct loop *nloop;
7682 tree cond_expr = NULL_TREE;
7683 gimple_seq cond_expr_stmt_list = NULL;
7684 basic_block condition_bb;
7685 gimple_stmt_iterator gsi, cond_exp_gsi;
7686 basic_block merge_bb;
7687 basic_block new_exit_bb;
7688 edge new_exit_e, e;
7689 gimple orig_phi, new_phi;
7690 tree arg;
7691 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
7692 gimple_seq gimplify_stmt_list = NULL;
7693 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
7694 int min_profitable_iters = 0;
7695 unsigned int th;
7697 /* Get profitability threshold for vectorized loop. */
7698 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
7700 th = conservative_cost_threshold (loop_vinfo,
7701 min_profitable_iters);
7703 cond_expr =
7704 build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
7705 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
7707 cond_expr = force_gimple_operand (cond_expr, &cond_expr_stmt_list,
7708 false, NULL_TREE);
7710 if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
7711 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
7712 &cond_expr_stmt_list);
7714 if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
7715 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
7716 &cond_expr_stmt_list);
7718 cond_expr =
7719 fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
7720 cond_expr =
7721 force_gimple_operand (cond_expr, &gimplify_stmt_list, true, NULL_TREE);
7722 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
7724 initialize_original_copy_tables ();
7725 nloop = loop_version (loop, cond_expr, &condition_bb,
7726 prob, prob, REG_BR_PROB_BASE - prob, true);
7727 free_original_copy_tables();
7729 /* Loop versioning violates an assumption we try to maintain during
7730 vectorization - that the loop exit block has a single predecessor.
7731 After versioning, the exit block of both loop versions is the same
7732 basic block (i.e. it has two predecessors). Just in order to simplify
7733 following transformations in the vectorizer, we fix this situation
7734 here by adding a new (empty) block on the exit-edge of the loop,
7735 with the proper loop-exit phis to maintain loop-closed-form. */
7737 merge_bb = single_exit (loop)->dest;
7738 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
7739 new_exit_bb = split_edge (single_exit (loop));
7740 new_exit_e = single_exit (loop);
7741 e = EDGE_SUCC (new_exit_bb, 0);
7743 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
7745 orig_phi = gsi_stmt (gsi);
7746 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
7747 new_exit_bb);
7748 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
7749 add_phi_arg (new_phi, arg, new_exit_e);
7750 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
7753 /* End loop-exit-fixes after versioning. */
7755 update_ssa (TODO_update_ssa);
7756 if (cond_expr_stmt_list)
7758 cond_exp_gsi = gsi_last_bb (condition_bb);
7759 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list, GSI_SAME_STMT);
7763 /* Remove a group of stores (for SLP or interleaving), free their
7764 stmt_vec_info. */
7766 static void
7767 vect_remove_stores (gimple first_stmt)
7769 gimple next = first_stmt;
7770 gimple tmp;
7771 gimple_stmt_iterator next_si;
7773 while (next)
7775 /* Free the attached stmt_vec_info and remove the stmt. */
7776 next_si = gsi_for_stmt (next);
7777 gsi_remove (&next_si, true);
7778 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
7779 free_stmt_vec_info (next);
7780 next = tmp;
7785 /* Vectorize SLP instance tree in postorder. */
7787 static bool
7788 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
7789 unsigned int vectorization_factor)
7791 gimple stmt;
7792 bool strided_store, is_store;
7793 gimple_stmt_iterator si;
7794 stmt_vec_info stmt_info;
7795 unsigned int vec_stmts_size, nunits, group_size;
7796 tree vectype;
7798 if (!node)
7799 return false;
7801 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
7802 vectorization_factor);
7803 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
7804 vectorization_factor);
7806 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
7807 stmt_info = vinfo_for_stmt (stmt);
7808 /* VECTYPE is the type of the destination. */
7809 vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
7810 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
7811 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
7813 /* For each SLP instance calculate number of vector stmts to be created
7814 for the scalar stmts in each node of the SLP tree. Number of vector
7815 elements in one vector iteration is the number of scalar elements in
7816 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
7817 size. */
7818 vec_stmts_size = (vectorization_factor * group_size) / nunits;
7820 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
7821 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
7823 if (vect_print_dump_info (REPORT_DETAILS))
7825 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
7826 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
7829 si = gsi_for_stmt (stmt);
7830 is_store = vect_transform_stmt (stmt, &si, &strided_store, node);
7831 if (is_store)
7833 if (DR_GROUP_FIRST_DR (stmt_info))
7834 /* If IS_STORE is TRUE, the vectorization of the
7835 interleaving chain was completed - free all the stores in
7836 the chain. */
7837 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
7838 else
7839 /* FORNOW: SLP originates only from strided stores. */
7840 gcc_unreachable ();
7842 return true;
7845 /* FORNOW: SLP originates only from strided stores. */
7846 return false;
7850 static bool
7851 vect_schedule_slp (loop_vec_info loop_vinfo)
7853 VEC (slp_instance, heap) *slp_instances =
7854 LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
7855 slp_instance instance;
7856 unsigned int i;
7857 bool is_store = false;
7859 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
7861 /* Schedule the tree of INSTANCE. */
7862 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
7863 instance,
7864 LOOP_VINFO_VECT_FACTOR (loop_vinfo));
7866 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
7867 || vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
7868 fprintf (vect_dump, "vectorizing stmts using SLP.");
7871 return is_store;
7874 /* Function vect_transform_loop.
7876 The analysis phase has determined that the loop is vectorizable.
7877 Vectorize the loop - created vectorized stmts to replace the scalar
7878 stmts in the loop, and update the loop exit condition. */
7880 void
7881 vect_transform_loop (loop_vec_info loop_vinfo)
7883 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7884 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
7885 int nbbs = loop->num_nodes;
7886 gimple_stmt_iterator si;
7887 int i;
7888 tree ratio = NULL;
7889 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7890 bool strided_store;
7891 bool slp_scheduled = false;
7892 unsigned int nunits;
7894 if (vect_print_dump_info (REPORT_DETAILS))
7895 fprintf (vect_dump, "=== vec_transform_loop ===");
7897 if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
7898 || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
7899 vect_loop_versioning (loop_vinfo);
7901 /* CHECKME: we wouldn't need this if we called update_ssa once
7902 for all loops. */
7903 bitmap_zero (vect_memsyms_to_rename);
7905 /* Peel the loop if there are data refs with unknown alignment.
7906 Only one data ref with unknown store is allowed. */
7908 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
7909 vect_do_peeling_for_alignment (loop_vinfo);
7911 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
7912 compile time constant), or it is a constant that doesn't divide by the
7913 vectorization factor, then an epilog loop needs to be created.
7914 We therefore duplicate the loop: the original loop will be vectorized,
7915 and will compute the first (n/VF) iterations. The second copy of the loop
7916 will remain scalar and will compute the remaining (n%VF) iterations.
7917 (VF is the vectorization factor). */
7919 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7920 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7921 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
7922 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
7923 else
7924 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
7925 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
7927 /* 1) Make sure the loop header has exactly two entries
7928 2) Make sure we have a preheader basic block. */
7930 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
7932 split_edge (loop_preheader_edge (loop));
7934 /* FORNOW: the vectorizer supports only loops which body consist
7935 of one basic block (header + empty latch). When the vectorizer will
7936 support more involved loop forms, the order by which the BBs are
7937 traversed need to be reconsidered. */
7939 for (i = 0; i < nbbs; i++)
7941 basic_block bb = bbs[i];
7942 stmt_vec_info stmt_info;
7943 gimple phi;
7945 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
7947 phi = gsi_stmt (si);
7948 if (vect_print_dump_info (REPORT_DETAILS))
7950 fprintf (vect_dump, "------>vectorizing phi: ");
7951 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
7953 stmt_info = vinfo_for_stmt (phi);
7954 if (!stmt_info)
7955 continue;
7957 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7958 && !STMT_VINFO_LIVE_P (stmt_info))
7959 continue;
7961 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
7962 != (unsigned HOST_WIDE_INT) vectorization_factor)
7963 && vect_print_dump_info (REPORT_DETAILS))
7964 fprintf (vect_dump, "multiple-types.");
7966 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
7968 if (vect_print_dump_info (REPORT_DETAILS))
7969 fprintf (vect_dump, "transform phi.");
7970 vect_transform_stmt (phi, NULL, NULL, NULL);
7974 for (si = gsi_start_bb (bb); !gsi_end_p (si);)
7976 gimple stmt = gsi_stmt (si);
7977 bool is_store;
7979 if (vect_print_dump_info (REPORT_DETAILS))
7981 fprintf (vect_dump, "------>vectorizing statement: ");
7982 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
7985 stmt_info = vinfo_for_stmt (stmt);
7987 /* vector stmts created in the outer-loop during vectorization of
7988 stmts in an inner-loop may not have a stmt_info, and do not
7989 need to be vectorized. */
7990 if (!stmt_info)
7992 gsi_next (&si);
7993 continue;
7996 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7997 && !STMT_VINFO_LIVE_P (stmt_info))
7999 gsi_next (&si);
8000 continue;
8003 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
8004 nunits =
8005 (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
8006 if (!STMT_SLP_TYPE (stmt_info)
8007 && nunits != (unsigned int) vectorization_factor
8008 && vect_print_dump_info (REPORT_DETAILS))
8009 /* For SLP VF is set according to unrolling factor, and not to
8010 vector size, hence for SLP this print is not valid. */
8011 fprintf (vect_dump, "multiple-types.");
8013 /* SLP. Schedule all the SLP instances when the first SLP stmt is
8014 reached. */
8015 if (STMT_SLP_TYPE (stmt_info))
8017 if (!slp_scheduled)
8019 slp_scheduled = true;
8021 if (vect_print_dump_info (REPORT_DETAILS))
8022 fprintf (vect_dump, "=== scheduling SLP instances ===");
8024 is_store = vect_schedule_slp (loop_vinfo);
8026 /* IS_STORE is true if STMT is a store. Stores cannot be of
8027 hybrid SLP type. They are removed in
8028 vect_schedule_slp_instance and their vinfo is destroyed. */
8029 if (is_store)
8031 gsi_next (&si);
8032 continue;
8036 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
8037 if (PURE_SLP_STMT (stmt_info))
8039 gsi_next (&si);
8040 continue;
8044 /* -------- vectorize statement ------------ */
8045 if (vect_print_dump_info (REPORT_DETAILS))
8046 fprintf (vect_dump, "transform statement.");
8048 strided_store = false;
8049 is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL);
8050 if (is_store)
8052 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
8054 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
8055 interleaving chain was completed - free all the stores in
8056 the chain. */
8057 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
8058 gsi_remove (&si, true);
8059 continue;
8061 else
8063 /* Free the attached stmt_vec_info and remove the stmt. */
8064 free_stmt_vec_info (stmt);
8065 gsi_remove (&si, true);
8066 continue;
8069 gsi_next (&si);
8070 } /* stmts in BB */
8071 } /* BBs in loop */
8073 slpeel_make_loop_iterate_ntimes (loop, ratio);
8075 mark_set_for_renaming (vect_memsyms_to_rename);
8077 /* The memory tags and pointers in vectorized statements need to
8078 have their SSA forms updated. FIXME, why can't this be delayed
8079 until all the loops have been transformed? */
8080 update_ssa (TODO_update_ssa);
8082 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
8083 fprintf (vect_dump, "LOOP VECTORIZED.");
8084 if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
8085 fprintf (vect_dump, "OUTER LOOP VECTORIZED.");