1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (gimple
, gimple_stmt_iterator
*, bool *,
50 slp_tree
, slp_instance
);
51 static tree
vect_create_destination_var (tree
, tree
);
52 static tree vect_create_data_ref_ptr
53 (gimple
, struct loop
*, tree
, tree
*, gimple
*, bool, bool *, tree
);
54 static tree vect_create_addr_base_for_vector_ref
55 (gimple
, gimple_seq
*, tree
, struct loop
*);
56 static tree
vect_get_new_vect_var (tree
, enum vect_var_kind
, const char *);
57 static tree
vect_get_vec_def_for_operand (tree
, gimple
, tree
*);
58 static tree
vect_init_vector (gimple
, tree
, tree
, gimple_stmt_iterator
*);
59 static void vect_finish_stmt_generation
60 (gimple stmt
, gimple vec_stmt
, gimple_stmt_iterator
*);
61 static bool vect_is_simple_cond (tree
, loop_vec_info
);
62 static void vect_create_epilog_for_reduction
63 (tree
, gimple
, int, enum tree_code
, gimple
);
64 static tree
get_initial_def_for_reduction (gimple
, tree
, tree
*);
66 /* Utility function dealing with loop peeling (not peeling itself). */
67 static void vect_generate_tmps_on_preheader
68 (loop_vec_info
, tree
*, tree
*, tree
*);
69 static tree
vect_build_loop_niters (loop_vec_info
);
70 static void vect_update_ivs_after_vectorizer (loop_vec_info
, tree
, edge
);
71 static tree
vect_gen_niters_for_prolog_loop (loop_vec_info
, tree
);
72 static void vect_update_init_of_dr (struct data_reference
*, tree niters
);
73 static void vect_update_inits_of_drs (loop_vec_info
, tree
);
74 static int vect_min_worthwhile_factor (enum tree_code
);
78 cost_for_stmt (gimple stmt
)
80 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
82 switch (STMT_VINFO_TYPE (stmt_info
))
84 case load_vec_info_type
:
85 return TARG_SCALAR_LOAD_COST
;
86 case store_vec_info_type
:
87 return TARG_SCALAR_STORE_COST
;
88 case op_vec_info_type
:
89 case condition_vec_info_type
:
90 case assignment_vec_info_type
:
91 case reduc_vec_info_type
:
92 case induc_vec_info_type
:
93 case type_promotion_vec_info_type
:
94 case type_demotion_vec_info_type
:
95 case type_conversion_vec_info_type
:
96 case call_vec_info_type
:
97 return TARG_SCALAR_STMT_COST
;
98 case undef_vec_info_type
:
105 /* Function vect_estimate_min_profitable_iters
107 Return the number of iterations required for the vector version of the
108 loop to be profitable relative to the cost of the scalar version of the
111 TODO: Take profile info into account before making vectorization
112 decisions, if available. */
115 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo
)
118 int min_profitable_iters
;
119 int peel_iters_prologue
;
120 int peel_iters_epilogue
;
121 int vec_inside_cost
= 0;
122 int vec_outside_cost
= 0;
123 int scalar_single_iter_cost
= 0;
124 int scalar_outside_cost
= 0;
125 bool runtime_test
= false;
126 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
127 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
128 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
129 int nbbs
= loop
->num_nodes
;
130 int byte_misalign
= LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
);
131 int peel_guard_costs
= 0;
132 int innerloop_iters
= 0, factor
;
133 VEC (slp_instance
, heap
) *slp_instances
;
134 slp_instance instance
;
136 /* Cost model disabled. */
137 if (!flag_vect_cost_model
)
139 if (vect_print_dump_info (REPORT_COST
))
140 fprintf (vect_dump
, "cost model disabled.");
144 /* If the number of iterations is unknown, or the
145 peeling-for-misalignment amount is unknown, we will have to generate
146 a runtime test to test the loop count against the threshold. */
147 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
148 || (byte_misalign
< 0))
151 /* Requires loop versioning tests to handle misalignment. */
153 if (VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)))
155 /* FIXME: Make cost depend on complexity of individual check. */
157 VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
));
158 if (vect_print_dump_info (REPORT_COST
))
159 fprintf (vect_dump
, "cost model: Adding cost of checks for loop "
160 "versioning to treat misalignment.\n");
163 if (VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
165 /* FIXME: Make cost depend on complexity of individual check. */
167 VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
));
168 if (vect_print_dump_info (REPORT_COST
))
169 fprintf (vect_dump
, "cost model: Adding cost of checks for loop "
170 "versioning aliasing.\n");
173 if (VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
174 || VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
176 vec_outside_cost
+= TARG_COND_TAKEN_BRANCH_COST
;
179 /* Count statements in scalar loop. Using this as scalar cost for a single
182 TODO: Add outer loop support.
184 TODO: Consider assigning different costs to different scalar
189 innerloop_iters
= 50; /* FIXME */
191 for (i
= 0; i
< nbbs
; i
++)
193 gimple_stmt_iterator si
;
194 basic_block bb
= bbs
[i
];
196 if (bb
->loop_father
== loop
->inner
)
197 factor
= innerloop_iters
;
201 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
203 gimple stmt
= gsi_stmt (si
);
204 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
205 /* Skip stmts that are not vectorized inside the loop. */
206 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
207 && (!STMT_VINFO_LIVE_P (stmt_info
)
208 || STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
))
210 scalar_single_iter_cost
+= cost_for_stmt (stmt
) * factor
;
211 vec_inside_cost
+= STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
) * factor
;
212 /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
213 some of the "outside" costs are generated inside the outer-loop. */
214 vec_outside_cost
+= STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
);
218 /* Add additional cost for the peeled instructions in prologue and epilogue
221 FORNOW: If we don't know the value of peel_iters for prologue or epilogue
222 at compile-time - we assume it's vf/2 (the worst would be vf-1).
224 TODO: Build an expression that represents peel_iters for prologue and
225 epilogue to be used in a run-time test. */
227 if (byte_misalign
< 0)
229 peel_iters_prologue
= vf
/2;
230 if (vect_print_dump_info (REPORT_COST
))
231 fprintf (vect_dump
, "cost model: "
232 "prologue peel iters set to vf/2.");
234 /* If peeling for alignment is unknown, loop bound of main loop becomes
236 peel_iters_epilogue
= vf
/2;
237 if (vect_print_dump_info (REPORT_COST
))
238 fprintf (vect_dump
, "cost model: "
239 "epilogue peel iters set to vf/2 because "
240 "peeling for alignment is unknown .");
242 /* If peeled iterations are unknown, count a taken branch and a not taken
243 branch per peeled loop. Even if scalar loop iterations are known,
244 vector iterations are not known since peeled prologue iterations are
245 not known. Hence guards remain the same. */
246 peel_guard_costs
+= 2 * (TARG_COND_TAKEN_BRANCH_COST
247 + TARG_COND_NOT_TAKEN_BRANCH_COST
);
254 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
255 int element_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
256 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr
)));
257 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
259 peel_iters_prologue
= nelements
- (byte_misalign
/ element_size
);
262 peel_iters_prologue
= 0;
264 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
266 peel_iters_epilogue
= vf
/2;
267 if (vect_print_dump_info (REPORT_COST
))
268 fprintf (vect_dump
, "cost model: "
269 "epilogue peel iters set to vf/2 because "
270 "loop iterations are unknown .");
272 /* If peeled iterations are known but number of scalar loop
273 iterations are unknown, count a taken branch per peeled loop. */
274 peel_guard_costs
+= 2 * TARG_COND_TAKEN_BRANCH_COST
;
279 int niters
= LOOP_VINFO_INT_NITERS (loop_vinfo
);
280 peel_iters_prologue
= niters
< peel_iters_prologue
?
281 niters
: peel_iters_prologue
;
282 peel_iters_epilogue
= (niters
- peel_iters_prologue
) % vf
;
286 vec_outside_cost
+= (peel_iters_prologue
* scalar_single_iter_cost
)
287 + (peel_iters_epilogue
* scalar_single_iter_cost
)
290 /* FORNOW: The scalar outside cost is incremented in one of the
293 1. The vectorizer checks for alignment and aliasing and generates
294 a condition that allows dynamic vectorization. A cost model
295 check is ANDED with the versioning condition. Hence scalar code
296 path now has the added cost of the versioning check.
298 if (cost > th & versioning_check)
301 Hence run-time scalar is incremented by not-taken branch cost.
303 2. The vectorizer then checks if a prologue is required. If the
304 cost model check was not done before during versioning, it has to
305 be done before the prologue check.
308 prologue = scalar_iters
313 if (prologue == num_iters)
316 Hence the run-time scalar cost is incremented by a taken branch,
317 plus a not-taken branch, plus a taken branch cost.
319 3. The vectorizer then checks if an epilogue is required. If the
320 cost model check was not done before during prologue check, it
321 has to be done with the epilogue check.
327 if (prologue == num_iters)
330 if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
333 Hence the run-time scalar cost should be incremented by 2 taken
336 TODO: The back end may reorder the BBS's differently and reverse
337 conditions/branch directions. Change the estimates below to
338 something more reasonable. */
342 /* Cost model check occurs at versioning. */
343 if (VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
344 || VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
345 scalar_outside_cost
+= TARG_COND_NOT_TAKEN_BRANCH_COST
;
348 /* Cost model occurs at prologue generation. */
349 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
350 scalar_outside_cost
+= 2 * TARG_COND_TAKEN_BRANCH_COST
351 + TARG_COND_NOT_TAKEN_BRANCH_COST
;
352 /* Cost model check occurs at epilogue generation. */
354 scalar_outside_cost
+= 2 * TARG_COND_TAKEN_BRANCH_COST
;
359 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
360 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
362 vec_outside_cost
+= SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance
);
363 vec_inside_cost
+= SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
);
366 /* Calculate number of iterations required to make the vector version
367 profitable, relative to the loop bodies only. The following condition
369 SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
371 SIC = scalar iteration cost, VIC = vector iteration cost,
372 VOC = vector outside cost, VF = vectorization factor,
373 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
374 SOC = scalar outside cost for run time cost model check. */
376 if ((scalar_single_iter_cost
* vf
) > vec_inside_cost
)
378 if (vec_outside_cost
<= 0)
379 min_profitable_iters
= 1;
382 min_profitable_iters
= ((vec_outside_cost
- scalar_outside_cost
) * vf
383 - vec_inside_cost
* peel_iters_prologue
384 - vec_inside_cost
* peel_iters_epilogue
)
385 / ((scalar_single_iter_cost
* vf
)
388 if ((scalar_single_iter_cost
* vf
* min_profitable_iters
)
389 <= ((vec_inside_cost
* min_profitable_iters
)
390 + ((vec_outside_cost
- scalar_outside_cost
) * vf
)))
391 min_profitable_iters
++;
394 /* vector version will never be profitable. */
397 if (vect_print_dump_info (REPORT_COST
))
398 fprintf (vect_dump
, "cost model: vector iteration cost = %d "
399 "is divisible by scalar iteration cost = %d by a factor "
400 "greater than or equal to the vectorization factor = %d .",
401 vec_inside_cost
, scalar_single_iter_cost
, vf
);
405 if (vect_print_dump_info (REPORT_COST
))
407 fprintf (vect_dump
, "Cost model analysis: \n");
408 fprintf (vect_dump
, " Vector inside of loop cost: %d\n",
410 fprintf (vect_dump
, " Vector outside of loop cost: %d\n",
412 fprintf (vect_dump
, " Scalar iteration cost: %d\n",
413 scalar_single_iter_cost
);
414 fprintf (vect_dump
, " Scalar outside cost: %d\n", scalar_outside_cost
);
415 fprintf (vect_dump
, " prologue iterations: %d\n",
416 peel_iters_prologue
);
417 fprintf (vect_dump
, " epilogue iterations: %d\n",
418 peel_iters_epilogue
);
419 fprintf (vect_dump
, " Calculated minimum iters for profitability: %d\n",
420 min_profitable_iters
);
423 min_profitable_iters
=
424 min_profitable_iters
< vf
? vf
: min_profitable_iters
;
426 /* Because the condition we create is:
427 if (niters <= min_profitable_iters)
428 then skip the vectorized loop. */
429 min_profitable_iters
--;
431 if (vect_print_dump_info (REPORT_COST
))
432 fprintf (vect_dump
, " Profitability threshold = %d\n",
433 min_profitable_iters
);
435 return min_profitable_iters
;
439 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
440 functions. Design better to avoid maintenance issues. */
442 /* Function vect_model_reduction_cost.
444 Models cost for a reduction operation, including the vector ops
445 generated within the strip-mine loop, the initial definition before
446 the loop, and the epilogue code that must be generated. */
449 vect_model_reduction_cost (stmt_vec_info stmt_info
, enum tree_code reduc_code
,
456 gimple stmt
, orig_stmt
;
458 enum machine_mode mode
;
459 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
460 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
463 /* Cost of reduction op inside loop. */
464 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
) += ncopies
* TARG_VEC_STMT_COST
;
466 stmt
= STMT_VINFO_STMT (stmt_info
);
468 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
)))
470 case GIMPLE_SINGLE_RHS
:
471 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt
)) == ternary_op
);
472 reduction_op
= TREE_OPERAND (gimple_assign_rhs1 (stmt
), 2);
474 case GIMPLE_UNARY_RHS
:
475 reduction_op
= gimple_assign_rhs1 (stmt
);
477 case GIMPLE_BINARY_RHS
:
478 reduction_op
= gimple_assign_rhs2 (stmt
);
484 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
487 if (vect_print_dump_info (REPORT_COST
))
489 fprintf (vect_dump
, "unsupported data-type ");
490 print_generic_expr (vect_dump
, TREE_TYPE (reduction_op
), TDF_SLIM
);
495 mode
= TYPE_MODE (vectype
);
496 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
499 orig_stmt
= STMT_VINFO_STMT (stmt_info
);
501 code
= gimple_assign_rhs_code (orig_stmt
);
503 /* Add in cost for initial definition. */
504 outer_cost
+= TARG_SCALAR_TO_VEC_COST
;
506 /* Determine cost of epilogue code.
508 We have a reduction operator that will reduce the vector in one statement.
509 Also requires scalar extract. */
511 if (!nested_in_vect_loop_p (loop
, orig_stmt
))
513 if (reduc_code
< NUM_TREE_CODES
)
514 outer_cost
+= TARG_VEC_STMT_COST
+ TARG_VEC_TO_SCALAR_COST
;
517 int vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
519 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt
)));
520 int element_bitsize
= tree_low_cst (bitsize
, 1);
521 int nelements
= vec_size_in_bits
/ element_bitsize
;
523 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
525 /* We have a whole vector shift available. */
526 if (VECTOR_MODE_P (mode
)
527 && optab_handler (optab
, mode
)->insn_code
!= CODE_FOR_nothing
528 && optab_handler (vec_shr_optab
, mode
)->insn_code
!= CODE_FOR_nothing
)
529 /* Final reduction via vector shifts and the reduction operator. Also
530 requires scalar extract. */
531 outer_cost
+= ((exact_log2(nelements
) * 2) * TARG_VEC_STMT_COST
532 + TARG_VEC_TO_SCALAR_COST
);
534 /* Use extracts and reduction op for final reduction. For N elements,
535 we have N extracts and N-1 reduction ops. */
536 outer_cost
+= ((nelements
+ nelements
- 1) * TARG_VEC_STMT_COST
);
540 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
) = outer_cost
;
542 if (vect_print_dump_info (REPORT_COST
))
543 fprintf (vect_dump
, "vect_model_reduction_cost: inside_cost = %d, "
544 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
),
545 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
));
551 /* Function vect_model_induction_cost.
553 Models cost for induction operations. */
556 vect_model_induction_cost (stmt_vec_info stmt_info
, int ncopies
)
558 /* loop cost for vec_loop. */
559 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
) = ncopies
* TARG_VEC_STMT_COST
;
560 /* prologue cost for vec_init and vec_step. */
561 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
) = 2 * TARG_SCALAR_TO_VEC_COST
;
563 if (vect_print_dump_info (REPORT_COST
))
564 fprintf (vect_dump
, "vect_model_induction_cost: inside_cost = %d, "
565 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
),
566 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
));
570 /* Function vect_model_simple_cost.
572 Models cost for simple operations, i.e. those that only emit ncopies of a
573 single op. Right now, this does not account for multiple insns that could
574 be generated for the single vector op. We will handle that shortly. */
577 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
578 enum vect_def_type
*dt
, slp_tree slp_node
)
581 int inside_cost
= 0, outside_cost
= 0;
583 /* The SLP costs were already calculated during SLP tree build. */
584 if (PURE_SLP_STMT (stmt_info
))
587 inside_cost
= ncopies
* TARG_VEC_STMT_COST
;
589 /* FORNOW: Assuming maximum 2 args per stmts. */
590 for (i
= 0; i
< 2; i
++)
592 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_invariant_def
)
593 outside_cost
+= TARG_SCALAR_TO_VEC_COST
;
596 if (vect_print_dump_info (REPORT_COST
))
597 fprintf (vect_dump
, "vect_model_simple_cost: inside_cost = %d, "
598 "outside_cost = %d .", inside_cost
, outside_cost
);
600 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
601 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
602 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
606 /* Function vect_cost_strided_group_size
608 For strided load or store, return the group_size only if it is the first
609 load or store of a group, else return 1. This ensures that group size is
610 only returned once per group. */
613 vect_cost_strided_group_size (stmt_vec_info stmt_info
)
615 gimple first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
617 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
618 return DR_GROUP_SIZE (stmt_info
);
624 /* Function vect_model_store_cost
626 Models cost for stores. In the case of strided accesses, one access
627 has the overhead of the strided access attributed to it. */
630 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
631 enum vect_def_type dt
, slp_tree slp_node
)
634 int inside_cost
= 0, outside_cost
= 0;
636 /* The SLP costs were already calculated during SLP tree build. */
637 if (PURE_SLP_STMT (stmt_info
))
640 if (dt
== vect_constant_def
|| dt
== vect_invariant_def
)
641 outside_cost
= TARG_SCALAR_TO_VEC_COST
;
643 /* Strided access? */
644 if (DR_GROUP_FIRST_DR (stmt_info
) && !slp_node
)
645 group_size
= vect_cost_strided_group_size (stmt_info
);
646 /* Not a strided access. */
650 /* Is this an access in a group of stores, which provide strided access?
651 If so, add in the cost of the permutes. */
654 /* Uses a high and low interleave operation for each needed permute. */
655 inside_cost
= ncopies
* exact_log2(group_size
) * group_size
656 * TARG_VEC_STMT_COST
;
658 if (vect_print_dump_info (REPORT_COST
))
659 fprintf (vect_dump
, "vect_model_store_cost: strided group_size = %d .",
664 /* Costs of the stores. */
665 inside_cost
+= ncopies
* TARG_VEC_STORE_COST
;
667 if (vect_print_dump_info (REPORT_COST
))
668 fprintf (vect_dump
, "vect_model_store_cost: inside_cost = %d, "
669 "outside_cost = %d .", inside_cost
, outside_cost
);
671 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
672 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
673 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
677 /* Function vect_model_load_cost
679 Models cost for loads. In the case of strided accesses, the last access
680 has the overhead of the strided access attributed to it. Since unaligned
681 accesses are supported for loads, we also account for the costs of the
682 access scheme chosen. */
685 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
, slp_tree slp_node
)
689 int alignment_support_cheme
;
691 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
692 int inside_cost
= 0, outside_cost
= 0;
694 /* The SLP costs were already calculated during SLP tree build. */
695 if (PURE_SLP_STMT (stmt_info
))
698 /* Strided accesses? */
699 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
700 if (first_stmt
&& !slp_node
)
702 group_size
= vect_cost_strided_group_size (stmt_info
);
703 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
705 /* Not a strided access. */
712 alignment_support_cheme
= vect_supportable_dr_alignment (first_dr
);
714 /* Is this an access in a group of loads providing strided access?
715 If so, add in the cost of the permutes. */
718 /* Uses an even and odd extract operations for each needed permute. */
719 inside_cost
= ncopies
* exact_log2(group_size
) * group_size
720 * TARG_VEC_STMT_COST
;
722 if (vect_print_dump_info (REPORT_COST
))
723 fprintf (vect_dump
, "vect_model_load_cost: strided group_size = %d .",
728 /* The loads themselves. */
729 switch (alignment_support_cheme
)
733 inside_cost
+= ncopies
* TARG_VEC_LOAD_COST
;
735 if (vect_print_dump_info (REPORT_COST
))
736 fprintf (vect_dump
, "vect_model_load_cost: aligned.");
740 case dr_unaligned_supported
:
742 /* Here, we assign an additional cost for the unaligned load. */
743 inside_cost
+= ncopies
* TARG_VEC_UNALIGNED_LOAD_COST
;
745 if (vect_print_dump_info (REPORT_COST
))
746 fprintf (vect_dump
, "vect_model_load_cost: unaligned supported by "
751 case dr_explicit_realign
:
753 inside_cost
+= ncopies
* (2*TARG_VEC_LOAD_COST
+ TARG_VEC_STMT_COST
);
755 /* FIXME: If the misalignment remains fixed across the iterations of
756 the containing loop, the following cost should be added to the
758 if (targetm
.vectorize
.builtin_mask_for_load
)
759 inside_cost
+= TARG_VEC_STMT_COST
;
763 case dr_explicit_realign_optimized
:
765 if (vect_print_dump_info (REPORT_COST
))
766 fprintf (vect_dump
, "vect_model_load_cost: unaligned software "
769 /* Unaligned software pipeline has a load of an address, an initial
770 load, and possibly a mask operation to "prime" the loop. However,
771 if this is an access in a group of loads, which provide strided
772 access, then the above cost should only be considered for one
773 access in the group. Inside the loop, there is a load op
774 and a realignment op. */
776 if ((!DR_GROUP_FIRST_DR (stmt_info
)) || group_size
> 1 || slp_node
)
778 outside_cost
= 2*TARG_VEC_STMT_COST
;
779 if (targetm
.vectorize
.builtin_mask_for_load
)
780 outside_cost
+= TARG_VEC_STMT_COST
;
783 inside_cost
+= ncopies
* (TARG_VEC_LOAD_COST
+ TARG_VEC_STMT_COST
);
792 if (vect_print_dump_info (REPORT_COST
))
793 fprintf (vect_dump
, "vect_model_load_cost: inside_cost = %d, "
794 "outside_cost = %d .", inside_cost
, outside_cost
);
796 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
797 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
798 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
802 /* Function vect_get_new_vect_var.
804 Returns a name for a new variable. The current naming scheme appends the
805 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
806 the name of vectorizer generated variables, and appends that to NAME if
810 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
817 case vect_simple_var
:
820 case vect_scalar_var
:
823 case vect_pointer_var
:
832 char* tmp
= concat (prefix
, name
, NULL
);
833 new_vect_var
= create_tmp_var (type
, tmp
);
837 new_vect_var
= create_tmp_var (type
, prefix
);
839 /* Mark vector typed variable as a gimple register variable. */
840 if (TREE_CODE (type
) == VECTOR_TYPE
)
841 DECL_GIMPLE_REG_P (new_vect_var
) = true;
847 /* Function vect_create_addr_base_for_vector_ref.
849 Create an expression that computes the address of the first memory location
850 that will be accessed for a data reference.
853 STMT: The statement containing the data reference.
854 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
855 OFFSET: Optional. If supplied, it is be added to the initial address.
856 LOOP: Specify relative to which loop-nest should the address be computed.
857 For example, when the dataref is in an inner-loop nested in an
858 outer-loop that is now being vectorized, LOOP can be either the
859 outer-loop, or the inner-loop. The first memory location accessed
860 by the following dataref ('in' points to short):
867 if LOOP=i_loop: &in (relative to i_loop)
868 if LOOP=j_loop: &in+i*2B (relative to j_loop)
871 1. Return an SSA_NAME whose value is the address of the memory location of
872 the first vector of the data reference.
873 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
874 these statement(s) which define the returned SSA_NAME.
876 FORNOW: We are only handling array accesses with step 1. */
879 vect_create_addr_base_for_vector_ref (gimple stmt
,
880 gimple_seq
*new_stmt_list
,
884 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
885 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
886 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
887 tree data_ref_base
= unshare_expr (DR_BASE_ADDRESS (dr
));
889 tree data_ref_base_var
;
891 tree addr_base
, addr_expr
;
893 gimple_seq seq
= NULL
;
894 tree base_offset
= unshare_expr (DR_OFFSET (dr
));
895 tree init
= unshare_expr (DR_INIT (dr
));
896 tree vect_ptr_type
, addr_expr2
;
897 tree step
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
)));
900 if (loop
!= containing_loop
)
902 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
903 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
905 gcc_assert (nested_in_vect_loop_p (loop
, stmt
));
907 data_ref_base
= unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info
));
908 base_offset
= unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info
));
909 init
= unshare_expr (STMT_VINFO_DR_INIT (stmt_info
));
912 /* Create data_ref_base */
913 base_name
= build_fold_indirect_ref (data_ref_base
);
914 data_ref_base_var
= create_tmp_var (TREE_TYPE (data_ref_base
), "batmp");
915 add_referenced_var (data_ref_base_var
);
916 data_ref_base
= force_gimple_operand (data_ref_base
, &seq
, true,
918 gimple_seq_add_seq (new_stmt_list
, seq
);
920 /* Create base_offset */
921 base_offset
= size_binop (PLUS_EXPR
, base_offset
, init
);
922 base_offset
= fold_convert (sizetype
, base_offset
);
923 dest
= create_tmp_var (TREE_TYPE (base_offset
), "base_off");
924 add_referenced_var (dest
);
925 base_offset
= force_gimple_operand (base_offset
, &seq
, true, dest
);
926 gimple_seq_add_seq (new_stmt_list
, seq
);
930 tree tmp
= create_tmp_var (sizetype
, "offset");
932 add_referenced_var (tmp
);
933 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (offset
), offset
, step
);
934 base_offset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base_offset
),
935 base_offset
, offset
);
936 base_offset
= force_gimple_operand (base_offset
, &seq
, false, tmp
);
937 gimple_seq_add_seq (new_stmt_list
, seq
);
940 /* base + base_offset */
941 addr_base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (data_ref_base
),
942 data_ref_base
, base_offset
);
944 vect_ptr_type
= build_pointer_type (STMT_VINFO_VECTYPE (stmt_info
));
946 /* addr_expr = addr_base */
947 addr_expr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
948 get_name (base_name
));
949 add_referenced_var (addr_expr
);
950 vec_stmt
= fold_convert (vect_ptr_type
, addr_base
);
951 addr_expr2
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
952 get_name (base_name
));
953 add_referenced_var (addr_expr2
);
954 vec_stmt
= force_gimple_operand (vec_stmt
, &seq
, false, addr_expr2
);
955 gimple_seq_add_seq (new_stmt_list
, seq
);
957 if (vect_print_dump_info (REPORT_DETAILS
))
959 fprintf (vect_dump
, "created ");
960 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
966 /* Function vect_create_data_ref_ptr.
968 Create a new pointer to vector type (vp), that points to the first location
969 accessed in the loop by STMT, along with the def-use update chain to
970 appropriately advance the pointer through the loop iterations. Also set
971 aliasing information for the pointer. This vector pointer is used by the
972 callers to this function to create a memory reference expression for vector
976 1. STMT: a stmt that references memory. Expected to be of the form
977 GIMPLE_ASSIGN <name, data-ref> or
978 GIMPLE_ASSIGN <data-ref, name>.
979 2. AT_LOOP: the loop where the vector memref is to be created.
980 3. OFFSET (optional): an offset to be added to the initial address accessed
981 by the data-ref in STMT.
982 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
983 pointing to the initial address.
984 5. TYPE: if not NULL indicates the required type of the data-ref.
987 1. Declare a new ptr to vector_type, and have it point to the base of the
988 data reference (initial addressed accessed by the data reference).
989 For example, for vector of type V8HI, the following code is generated:
992 vp = (v8hi *)initial_address;
994 if OFFSET is not supplied:
995 initial_address = &a[init];
996 if OFFSET is supplied:
997 initial_address = &a[init + OFFSET];
999 Return the initial_address in INITIAL_ADDRESS.
1001 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
1002 update the pointer in each iteration of the loop.
1004 Return the increment stmt that updates the pointer in PTR_INCR.
1006 3. Set INV_P to true if the access pattern of the data reference in the
1007 vectorized loop is invariant. Set it to false otherwise.
1009 4. Return the pointer. */
1012 vect_create_data_ref_ptr (gimple stmt
, struct loop
*at_loop
,
1013 tree offset
, tree
*initial_address
, gimple
*ptr_incr
,
1014 bool only_init
, bool *inv_p
, tree type
)
1017 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1018 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1019 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1020 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1021 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
1022 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1028 gimple_seq new_stmt_list
= NULL
;
1032 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1034 gimple_stmt_iterator incr_gsi
;
1036 tree indx_before_incr
, indx_after_incr
;
1040 /* Check the step (evolution) of the load in LOOP, and record
1041 whether it's invariant. */
1042 if (nested_in_vect_loop
)
1043 step
= STMT_VINFO_DR_STEP (stmt_info
);
1045 step
= DR_STEP (STMT_VINFO_DATA_REF (stmt_info
));
1047 if (tree_int_cst_compare (step
, size_zero_node
) == 0)
1052 /* Create an expression for the first address accessed by this load
1054 base_name
= build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr
)));
1056 if (vect_print_dump_info (REPORT_DETAILS
))
1058 tree data_ref_base
= base_name
;
1059 fprintf (vect_dump
, "create vector-pointer variable to type: ");
1060 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
1061 if (TREE_CODE (data_ref_base
) == VAR_DECL
)
1062 fprintf (vect_dump
, " vectorizing a one dimensional array ref: ");
1063 else if (TREE_CODE (data_ref_base
) == ARRAY_REF
)
1064 fprintf (vect_dump
, " vectorizing a multidimensional array ref: ");
1065 else if (TREE_CODE (data_ref_base
) == COMPONENT_REF
)
1066 fprintf (vect_dump
, " vectorizing a record based array ref: ");
1067 else if (TREE_CODE (data_ref_base
) == SSA_NAME
)
1068 fprintf (vect_dump
, " vectorizing a pointer ref: ");
1069 print_generic_expr (vect_dump
, base_name
, TDF_SLIM
);
1072 /** (1) Create the new vector-pointer variable: **/
1074 vect_ptr_type
= build_pointer_type (type
);
1076 vect_ptr_type
= build_pointer_type (vectype
);
1078 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
1079 get_name (base_name
));
1080 add_referenced_var (vect_ptr
);
1082 /** (2) Add aliasing information to the new vector-pointer:
1083 (The points-to info (DR_PTR_INFO) may be defined later.) **/
1085 tag
= DR_SYMBOL_TAG (dr
);
1088 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
1089 tag must be created with tag added to its may alias list. */
1091 new_type_alias (vect_ptr
, tag
, DR_REF (dr
));
1093 set_symbol_mem_tag (vect_ptr
, tag
);
1095 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
1096 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
1097 def-use update cycles for the pointer: One relative to the outer-loop
1098 (LOOP), which is what steps (3) and (4) below do. The other is relative
1099 to the inner-loop (which is the inner-most loop containing the dataref),
1100 and this is done be step (5) below.
1102 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
1103 inner-most loop, and so steps (3),(4) work the same, and step (5) is
1104 redundant. Steps (3),(4) create the following:
1107 LOOP: vp1 = phi(vp0,vp2)
1113 If there is an inner-loop nested in loop, then step (5) will also be
1114 applied, and an additional update in the inner-loop will be created:
1117 LOOP: vp1 = phi(vp0,vp2)
1119 inner: vp3 = phi(vp1,vp4)
1120 vp4 = vp3 + inner_step
1126 /** (3) Calculate the initial address the vector-pointer, and set
1127 the vector-pointer to point to it before the loop: **/
1129 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1131 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
1133 pe
= loop_preheader_edge (loop
);
1136 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, new_stmt_list
);
1137 gcc_assert (!new_bb
);
1140 *initial_address
= new_temp
;
1142 /* Create: p = (vectype *) initial_base */
1143 vec_stmt
= gimple_build_assign (vect_ptr
,
1144 fold_convert (vect_ptr_type
, new_temp
));
1145 vect_ptr_init
= make_ssa_name (vect_ptr
, vec_stmt
);
1146 gimple_assign_set_lhs (vec_stmt
, vect_ptr_init
);
1147 new_bb
= gsi_insert_on_edge_immediate (pe
, vec_stmt
);
1148 gcc_assert (!new_bb
);
1151 /** (4) Handle the updating of the vector-pointer inside the loop.
1152 This is needed when ONLY_INIT is false, and also when AT_LOOP
1153 is the inner-loop nested in LOOP (during outer-loop vectorization).
1156 if (only_init
&& at_loop
== loop
) /* No update in loop is required. */
1158 /* Copy the points-to information if it exists. */
1159 if (DR_PTR_INFO (dr
))
1160 duplicate_ssa_name_ptr_info (vect_ptr_init
, DR_PTR_INFO (dr
));
1161 vptr
= vect_ptr_init
;
1165 /* The step of the vector pointer is the Vector Size. */
1166 tree step
= TYPE_SIZE_UNIT (vectype
);
1167 /* One exception to the above is when the scalar step of the load in
1168 LOOP is zero. In this case the step here is also zero. */
1170 step
= size_zero_node
;
1172 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
1174 create_iv (vect_ptr_init
,
1175 fold_convert (vect_ptr_type
, step
),
1176 NULL_TREE
, loop
, &incr_gsi
, insert_after
,
1177 &indx_before_incr
, &indx_after_incr
);
1178 incr
= gsi_stmt (incr_gsi
);
1179 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
1181 /* Copy the points-to information if it exists. */
1182 if (DR_PTR_INFO (dr
))
1184 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
1185 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
1187 merge_alias_info (vect_ptr_init
, indx_before_incr
);
1188 merge_alias_info (vect_ptr_init
, indx_after_incr
);
1192 vptr
= indx_before_incr
;
1195 if (!nested_in_vect_loop
|| only_init
)
1199 /** (5) Handle the updating of the vector-pointer inside the inner-loop
1200 nested in LOOP, if exists: **/
1202 gcc_assert (nested_in_vect_loop
);
1205 standard_iv_increment_position (containing_loop
, &incr_gsi
,
1207 create_iv (vptr
, fold_convert (vect_ptr_type
, DR_STEP (dr
)), NULL_TREE
,
1208 containing_loop
, &incr_gsi
, insert_after
, &indx_before_incr
,
1210 incr
= gsi_stmt (incr_gsi
);
1211 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
1213 /* Copy the points-to information if it exists. */
1214 if (DR_PTR_INFO (dr
))
1216 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
1217 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
1219 merge_alias_info (vect_ptr_init
, indx_before_incr
);
1220 merge_alias_info (vect_ptr_init
, indx_after_incr
);
1224 return indx_before_incr
;
1231 /* Function bump_vector_ptr
1233 Increment a pointer (to a vector type) by vector-size. If requested,
1234 i.e. if PTR-INCR is given, then also connect the new increment stmt
1235 to the existing def-use update-chain of the pointer, by modifying
1236 the PTR_INCR as illustrated below:
1238 The pointer def-use update-chain before this function:
1239 DATAREF_PTR = phi (p_0, p_2)
1241 PTR_INCR: p_2 = DATAREF_PTR + step
1243 The pointer def-use update-chain after this function:
1244 DATAREF_PTR = phi (p_0, p_2)
1246 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
1248 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
1251 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
1253 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
1254 the loop. The increment amount across iterations is expected
1256 BSI - location where the new update stmt is to be placed.
1257 STMT - the original scalar memory-access stmt that is being vectorized.
1258 BUMP - optional. The offset by which to bump the pointer. If not given,
1259 the offset is assumed to be vector_size.
1261 Output: Return NEW_DATAREF_PTR as illustrated above.
1266 bump_vector_ptr (tree dataref_ptr
, gimple ptr_incr
, gimple_stmt_iterator
*gsi
,
1267 gimple stmt
, tree bump
)
1269 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1270 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1271 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1272 tree ptr_var
= SSA_NAME_VAR (dataref_ptr
);
1273 tree update
= TYPE_SIZE_UNIT (vectype
);
1276 use_operand_p use_p
;
1277 tree new_dataref_ptr
;
1282 incr_stmt
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, ptr_var
,
1283 dataref_ptr
, update
);
1284 new_dataref_ptr
= make_ssa_name (ptr_var
, incr_stmt
);
1285 gimple_assign_set_lhs (incr_stmt
, new_dataref_ptr
);
1286 vect_finish_stmt_generation (stmt
, incr_stmt
, gsi
);
1288 /* Copy the points-to information if it exists. */
1289 if (DR_PTR_INFO (dr
))
1290 duplicate_ssa_name_ptr_info (new_dataref_ptr
, DR_PTR_INFO (dr
));
1291 merge_alias_info (new_dataref_ptr
, dataref_ptr
);
1294 return new_dataref_ptr
;
1296 /* Update the vector-pointer's cross-iteration increment. */
1297 FOR_EACH_SSA_USE_OPERAND (use_p
, ptr_incr
, iter
, SSA_OP_USE
)
1299 tree use
= USE_FROM_PTR (use_p
);
1301 if (use
== dataref_ptr
)
1302 SET_USE (use_p
, new_dataref_ptr
);
1304 gcc_assert (tree_int_cst_compare (use
, update
) == 0);
1307 return new_dataref_ptr
;
1311 /* Function vect_create_destination_var.
1313 Create a new temporary of type VECTYPE. */
1316 vect_create_destination_var (tree scalar_dest
, tree vectype
)
1319 const char *new_name
;
1321 enum vect_var_kind kind
;
1323 kind
= vectype
? vect_simple_var
: vect_scalar_var
;
1324 type
= vectype
? vectype
: TREE_TYPE (scalar_dest
);
1326 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
1328 new_name
= get_name (scalar_dest
);
1331 vec_dest
= vect_get_new_vect_var (type
, kind
, new_name
);
1332 add_referenced_var (vec_dest
);
1338 /* Function vect_init_vector.
1340 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1341 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1342 is not NULL. Otherwise, place the initialization at the loop preheader.
1343 Return the DEF of INIT_STMT.
1344 It will be used in the vectorization of STMT. */
1347 vect_init_vector (gimple stmt
, tree vector_var
, tree vector_type
,
1348 gimple_stmt_iterator
*gsi
)
1350 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1358 new_var
= vect_get_new_vect_var (vector_type
, vect_simple_var
, "cst_");
1359 add_referenced_var (new_var
);
1360 init_stmt
= gimple_build_assign (new_var
, vector_var
);
1361 new_temp
= make_ssa_name (new_var
, init_stmt
);
1362 gimple_assign_set_lhs (init_stmt
, new_temp
);
1365 vect_finish_stmt_generation (stmt
, init_stmt
, gsi
);
1368 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1369 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1371 if (nested_in_vect_loop_p (loop
, stmt
))
1373 pe
= loop_preheader_edge (loop
);
1374 new_bb
= gsi_insert_on_edge_immediate (pe
, init_stmt
);
1375 gcc_assert (!new_bb
);
1378 if (vect_print_dump_info (REPORT_DETAILS
))
1380 fprintf (vect_dump
, "created new init_stmt: ");
1381 print_gimple_stmt (vect_dump
, init_stmt
, 0, TDF_SLIM
);
1384 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1389 /* For constant and loop invariant defs of SLP_NODE this function returns
1390 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1391 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1392 stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */
1395 vect_get_constant_vectors (slp_tree slp_node
, VEC(tree
,heap
) **vec_oprnds
,
1396 unsigned int op_num
, unsigned int number_of_vectors
)
1398 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
1399 gimple stmt
= VEC_index (gimple
, stmts
, 0);
1400 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1401 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1402 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1405 int j
, number_of_places_left_in_vector
;
1408 int group_size
= VEC_length (gimple
, stmts
);
1409 unsigned int vec_num
, i
;
1410 int number_of_copies
= 1;
1411 bool is_store
= false;
1412 VEC (tree
, heap
) *voprnds
= VEC_alloc (tree
, heap
, number_of_vectors
);
1415 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
1418 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1419 created vectors. It is greater than 1 if unrolling is performed.
1421 For example, we have two scalar operands, s1 and s2 (e.g., group of
1422 strided accesses of size two), while NUNITS is four (i.e., four scalars
1423 of this type can be packed in a vector). The output vector will contain
1424 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1427 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1428 containing the operands.
1430 For example, NUNITS is four as before, and the group size is 8
1431 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1432 {s5, s6, s7, s8}. */
1434 number_of_copies
= least_common_multiple (nunits
, group_size
) / group_size
;
1436 number_of_places_left_in_vector
= nunits
;
1438 for (j
= 0; j
< number_of_copies
; j
++)
1440 for (i
= group_size
- 1; VEC_iterate (gimple
, stmts
, i
, stmt
); i
--)
1443 op
= gimple_assign_rhs1 (stmt
);
1445 op
= gimple_op (stmt
, op_num
+ 1);
1446 if (!CONSTANT_CLASS_P (op
))
1449 /* Create 'vect_ = {op0,op1,...,opn}'. */
1450 t
= tree_cons (NULL_TREE
, op
, t
);
1452 number_of_places_left_in_vector
--;
1454 if (number_of_places_left_in_vector
== 0)
1456 number_of_places_left_in_vector
= nunits
;
1458 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1459 gcc_assert (vector_type
);
1461 vec_cst
= build_vector (vector_type
, t
);
1463 vec_cst
= build_constructor_from_list (vector_type
, t
);
1465 VEC_quick_push (tree
, voprnds
,
1466 vect_init_vector (stmt
, vec_cst
, vector_type
,
1473 /* Since the vectors are created in the reverse order, we should invert
1475 vec_num
= VEC_length (tree
, voprnds
);
1476 for (j
= vec_num
- 1; j
>= 0; j
--)
1478 vop
= VEC_index (tree
, voprnds
, j
);
1479 VEC_quick_push (tree
, *vec_oprnds
, vop
);
1482 VEC_free (tree
, heap
, voprnds
);
1484 /* In case that VF is greater than the unrolling factor needed for the SLP
1485 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1486 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1487 to replicate the vectors. */
1488 while (number_of_vectors
> VEC_length (tree
, *vec_oprnds
))
1490 for (i
= 0; VEC_iterate (tree
, *vec_oprnds
, i
, vop
) && i
< vec_num
; i
++)
1491 VEC_quick_push (tree
, *vec_oprnds
, vop
);
1496 /* Get vectorized definitions from SLP_NODE that contains corresponding
1497 vectorized def-stmts. */
1500 vect_get_slp_vect_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds
)
1503 gimple vec_def_stmt
;
1506 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
));
1509 VEC_iterate (gimple
, SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
);
1512 gcc_assert (vec_def_stmt
);
1513 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
1514 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
1519 /* Get vectorized definitions for SLP_NODE.
1520 If the scalar definitions are loop invariants or constants, collect them and
1521 call vect_get_constant_vectors() to create vector stmts.
1522 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1523 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1524 vect_get_slp_vect_defs() to retrieve them.
1525 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1526 the right node. This is used when the second operand must remain scalar. */
1529 vect_get_slp_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds0
,
1530 VEC (tree
,heap
) **vec_oprnds1
)
1533 enum tree_code code
;
1534 int number_of_vects
;
1535 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
1537 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
1538 /* The number of vector defs is determined by the number of vector statements
1539 in the node from which we get those statements. */
1540 if (SLP_TREE_LEFT (slp_node
))
1541 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node
));
1544 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1545 /* Number of vector stmts was calculated according to LHS in
1546 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1547 necessary. See vect_get_smallest_scalar_type() for details. */
1548 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
1550 if (rhs_size_unit
!= lhs_size_unit
)
1552 number_of_vects
*= rhs_size_unit
;
1553 number_of_vects
/= lhs_size_unit
;
1557 /* Allocate memory for vectorized defs. */
1558 *vec_oprnds0
= VEC_alloc (tree
, heap
, number_of_vects
);
1560 /* SLP_NODE corresponds either to a group of stores or to a group of
1561 unary/binary operations. We don't call this function for loads. */
1562 if (SLP_TREE_LEFT (slp_node
))
1563 /* The defs are already vectorized. */
1564 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node
), vec_oprnds0
);
1566 /* Build vectors from scalar defs. */
1567 vect_get_constant_vectors (slp_node
, vec_oprnds0
, 0, number_of_vects
);
1569 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
)))
1570 /* Since we don't call this function with loads, this is a group of
1574 code
= gimple_assign_rhs_code (first_stmt
);
1575 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
|| !vec_oprnds1
)
1578 /* The number of vector defs is determined by the number of vector statements
1579 in the node from which we get those statements. */
1580 if (SLP_TREE_RIGHT (slp_node
))
1581 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node
));
1583 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1585 *vec_oprnds1
= VEC_alloc (tree
, heap
, number_of_vects
);
1587 if (SLP_TREE_RIGHT (slp_node
))
1588 /* The defs are already vectorized. */
1589 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node
), vec_oprnds1
);
1591 /* Build vectors from scalar defs. */
1592 vect_get_constant_vectors (slp_node
, vec_oprnds1
, 1, number_of_vects
);
1596 /* Function get_initial_def_for_induction
1599 STMT - a stmt that performs an induction operation in the loop.
1600 IV_PHI - the initial value of the induction variable
1603 Return a vector variable, initialized with the first VF values of
1604 the induction variable. E.g., for an iv with IV_PHI='X' and
1605 evolution S, for a vector of 4 units, we want to return:
1606 [X, X + S, X + 2*S, X + 3*S]. */
1609 get_initial_def_for_induction (gimple iv_phi
)
1611 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (iv_phi
);
1612 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1613 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1614 tree scalar_type
= TREE_TYPE (gimple_phi_result (iv_phi
));
1617 edge pe
= loop_preheader_edge (loop
);
1618 struct loop
*iv_loop
;
1620 tree vec
, vec_init
, vec_step
, t
;
1624 gimple init_stmt
, induction_phi
, new_stmt
;
1625 tree induc_def
, vec_def
, vec_dest
;
1626 tree init_expr
, step_expr
;
1627 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1632 stmt_vec_info phi_info
= vinfo_for_stmt (iv_phi
);
1633 bool nested_in_vect_loop
= false;
1634 gimple_seq stmts
= NULL
;
1635 imm_use_iterator imm_iter
;
1636 use_operand_p use_p
;
1640 gimple_stmt_iterator si
;
1641 basic_block bb
= gimple_bb (iv_phi
);
1643 vectype
= get_vectype_for_scalar_type (scalar_type
);
1644 gcc_assert (vectype
);
1645 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1646 ncopies
= vf
/ nunits
;
1648 gcc_assert (phi_info
);
1649 gcc_assert (ncopies
>= 1);
1651 /* Find the first insertion point in the BB. */
1652 si
= gsi_after_labels (bb
);
1654 if (INTEGRAL_TYPE_P (scalar_type
) || POINTER_TYPE_P (scalar_type
))
1655 step_expr
= build_int_cst (scalar_type
, 0);
1657 step_expr
= build_real (scalar_type
, dconst0
);
1659 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
1660 if (nested_in_vect_loop_p (loop
, iv_phi
))
1662 nested_in_vect_loop
= true;
1663 iv_loop
= loop
->inner
;
1667 gcc_assert (iv_loop
== (gimple_bb (iv_phi
))->loop_father
);
1669 latch_e
= loop_latch_edge (iv_loop
);
1670 loop_arg
= PHI_ARG_DEF_FROM_EDGE (iv_phi
, latch_e
);
1672 access_fn
= analyze_scalar_evolution (iv_loop
, PHI_RESULT (iv_phi
));
1673 gcc_assert (access_fn
);
1674 ok
= vect_is_simple_iv_evolution (iv_loop
->num
, access_fn
,
1675 &init_expr
, &step_expr
);
1677 pe
= loop_preheader_edge (iv_loop
);
1679 /* Create the vector that holds the initial_value of the induction. */
1680 if (nested_in_vect_loop
)
1682 /* iv_loop is nested in the loop to be vectorized. init_expr had already
1683 been created during vectorization of previous stmts; We obtain it from
1684 the STMT_VINFO_VEC_STMT of the defining stmt. */
1685 tree iv_def
= PHI_ARG_DEF_FROM_EDGE (iv_phi
, loop_preheader_edge (iv_loop
));
1686 vec_init
= vect_get_vec_def_for_operand (iv_def
, iv_phi
, NULL
);
1690 /* iv_loop is the loop to be vectorized. Create:
1691 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
1692 new_var
= vect_get_new_vect_var (scalar_type
, vect_scalar_var
, "var_");
1693 add_referenced_var (new_var
);
1695 new_name
= force_gimple_operand (init_expr
, &stmts
, false, new_var
);
1698 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
1699 gcc_assert (!new_bb
);
1703 t
= tree_cons (NULL_TREE
, init_expr
, t
);
1704 for (i
= 1; i
< nunits
; i
++)
1706 /* Create: new_name_i = new_name + step_expr */
1707 enum tree_code code
= POINTER_TYPE_P (scalar_type
)
1708 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
1709 init_stmt
= gimple_build_assign_with_ops (code
, new_var
,
1710 new_name
, step_expr
);
1711 new_name
= make_ssa_name (new_var
, init_stmt
);
1712 gimple_assign_set_lhs (init_stmt
, new_name
);
1714 new_bb
= gsi_insert_on_edge_immediate (pe
, init_stmt
);
1715 gcc_assert (!new_bb
);
1717 if (vect_print_dump_info (REPORT_DETAILS
))
1719 fprintf (vect_dump
, "created new init_stmt: ");
1720 print_gimple_stmt (vect_dump
, init_stmt
, 0, TDF_SLIM
);
1722 t
= tree_cons (NULL_TREE
, new_name
, t
);
1724 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
1725 vec
= build_constructor_from_list (vectype
, nreverse (t
));
1726 vec_init
= vect_init_vector (iv_phi
, vec
, vectype
, NULL
);
1730 /* Create the vector that holds the step of the induction. */
1731 if (nested_in_vect_loop
)
1732 /* iv_loop is nested in the loop to be vectorized. Generate:
1733 vec_step = [S, S, S, S] */
1734 new_name
= step_expr
;
1737 /* iv_loop is the loop to be vectorized. Generate:
1738 vec_step = [VF*S, VF*S, VF*S, VF*S] */
1739 expr
= build_int_cst (scalar_type
, vf
);
1740 new_name
= fold_build2 (MULT_EXPR
, scalar_type
, expr
, step_expr
);
1744 for (i
= 0; i
< nunits
; i
++)
1745 t
= tree_cons (NULL_TREE
, unshare_expr (new_name
), t
);
1746 gcc_assert (CONSTANT_CLASS_P (new_name
));
1747 vec
= build_vector (vectype
, t
);
1748 vec_step
= vect_init_vector (iv_phi
, vec
, vectype
, NULL
);
1751 /* Create the following def-use cycle:
1756 vec_iv = PHI <vec_init, vec_loop>
1760 vec_loop = vec_iv + vec_step; */
1762 /* Create the induction-phi that defines the induction-operand. */
1763 vec_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, "vec_iv_");
1764 add_referenced_var (vec_dest
);
1765 induction_phi
= create_phi_node (vec_dest
, iv_loop
->header
);
1766 set_vinfo_for_stmt (induction_phi
,
1767 new_stmt_vec_info (induction_phi
, loop_vinfo
));
1768 induc_def
= PHI_RESULT (induction_phi
);
1770 /* Create the iv update inside the loop */
1771 new_stmt
= gimple_build_assign_with_ops (PLUS_EXPR
, vec_dest
,
1772 induc_def
, vec_step
);
1773 vec_def
= make_ssa_name (vec_dest
, new_stmt
);
1774 gimple_assign_set_lhs (new_stmt
, vec_def
);
1775 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
1776 set_vinfo_for_stmt (new_stmt
, new_stmt_vec_info (new_stmt
, loop_vinfo
));
1778 /* Set the arguments of the phi node: */
1779 add_phi_arg (induction_phi
, vec_init
, pe
);
1780 add_phi_arg (induction_phi
, vec_def
, loop_latch_edge (iv_loop
));
1783 /* In case that vectorization factor (VF) is bigger than the number
1784 of elements that we can fit in a vectype (nunits), we have to generate
1785 more than one vector stmt - i.e - we need to "unroll" the
1786 vector stmt by a factor VF/nunits. For more details see documentation
1787 in vectorizable_operation. */
1791 stmt_vec_info prev_stmt_vinfo
;
1792 /* FORNOW. This restriction should be relaxed. */
1793 gcc_assert (!nested_in_vect_loop
);
1795 /* Create the vector that holds the step of the induction. */
1796 expr
= build_int_cst (scalar_type
, nunits
);
1797 new_name
= fold_build2 (MULT_EXPR
, scalar_type
, expr
, step_expr
);
1799 for (i
= 0; i
< nunits
; i
++)
1800 t
= tree_cons (NULL_TREE
, unshare_expr (new_name
), t
);
1801 gcc_assert (CONSTANT_CLASS_P (new_name
));
1802 vec
= build_vector (vectype
, t
);
1803 vec_step
= vect_init_vector (iv_phi
, vec
, vectype
, NULL
);
1805 vec_def
= induc_def
;
1806 prev_stmt_vinfo
= vinfo_for_stmt (induction_phi
);
1807 for (i
= 1; i
< ncopies
; i
++)
1809 /* vec_i = vec_prev + vec_step */
1810 new_stmt
= gimple_build_assign_with_ops (PLUS_EXPR
, vec_dest
,
1812 vec_def
= make_ssa_name (vec_dest
, new_stmt
);
1813 gimple_assign_set_lhs (new_stmt
, vec_def
);
1815 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
1816 set_vinfo_for_stmt (new_stmt
,
1817 new_stmt_vec_info (new_stmt
, loop_vinfo
));
1818 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo
) = new_stmt
;
1819 prev_stmt_vinfo
= vinfo_for_stmt (new_stmt
);
1823 if (nested_in_vect_loop
)
1825 /* Find the loop-closed exit-phi of the induction, and record
1826 the final vector of induction results: */
1828 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, loop_arg
)
1830 if (!flow_bb_inside_loop_p (iv_loop
, gimple_bb (USE_STMT (use_p
))))
1832 exit_phi
= USE_STMT (use_p
);
1838 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (exit_phi
);
1839 /* FORNOW. Currently not supporting the case that an inner-loop induction
1840 is not used in the outer-loop (i.e. only outside the outer-loop). */
1841 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo
)
1842 && !STMT_VINFO_LIVE_P (stmt_vinfo
));
1844 STMT_VINFO_VEC_STMT (stmt_vinfo
) = new_stmt
;
1845 if (vect_print_dump_info (REPORT_DETAILS
))
1847 fprintf (vect_dump
, "vector of inductions after inner-loop:");
1848 print_gimple_stmt (vect_dump
, new_stmt
, 0, TDF_SLIM
);
1854 if (vect_print_dump_info (REPORT_DETAILS
))
1856 fprintf (vect_dump
, "transform induction: created def-use cycle: ");
1857 print_gimple_stmt (vect_dump
, induction_phi
, 0, TDF_SLIM
);
1858 fprintf (vect_dump
, "\n");
1859 print_gimple_stmt (vect_dump
, SSA_NAME_DEF_STMT (vec_def
), 0, TDF_SLIM
);
1862 STMT_VINFO_VEC_STMT (phi_info
) = induction_phi
;
1867 /* Function vect_get_vec_def_for_operand.
1869 OP is an operand in STMT. This function returns a (vector) def that will be
1870 used in the vectorized stmt for STMT.
1872 In the case that OP is an SSA_NAME which is defined in the loop, then
1873 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1875 In case OP is an invariant or constant, a new stmt that creates a vector def
1876 needs to be introduced. */
1879 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1884 stmt_vec_info def_stmt_info
= NULL
;
1885 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1886 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1887 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1888 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1894 enum vect_def_type dt
;
1898 if (vect_print_dump_info (REPORT_DETAILS
))
1900 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
1901 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
1904 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
1905 gcc_assert (is_simple_use
);
1906 if (vect_print_dump_info (REPORT_DETAILS
))
1910 fprintf (vect_dump
, "def = ");
1911 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
1915 fprintf (vect_dump
, " def_stmt = ");
1916 print_gimple_stmt (vect_dump
, def_stmt
, 0, TDF_SLIM
);
1922 /* Case 1: operand is a constant. */
1923 case vect_constant_def
:
1928 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1929 if (vect_print_dump_info (REPORT_DETAILS
))
1930 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
1932 for (i
= nunits
- 1; i
>= 0; --i
)
1934 t
= tree_cons (NULL_TREE
, op
, t
);
1936 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1937 gcc_assert (vector_type
);
1938 vec_cst
= build_vector (vector_type
, t
);
1940 return vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
1943 /* Case 2: operand is defined outside the loop - loop invariant. */
1944 case vect_invariant_def
:
1949 /* Create 'vec_inv = {inv,inv,..,inv}' */
1950 if (vect_print_dump_info (REPORT_DETAILS
))
1951 fprintf (vect_dump
, "Create vector_inv.");
1953 for (i
= nunits
- 1; i
>= 0; --i
)
1955 t
= tree_cons (NULL_TREE
, def
, t
);
1958 /* FIXME: use build_constructor directly. */
1959 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1960 gcc_assert (vector_type
);
1961 vec_inv
= build_constructor_from_list (vector_type
, t
);
1962 return vect_init_vector (stmt
, vec_inv
, vector_type
, NULL
);
1965 /* Case 3: operand is defined inside the loop. */
1969 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1971 /* Get the def from the vectorized stmt. */
1972 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1973 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1974 gcc_assert (vec_stmt
);
1975 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1976 vec_oprnd
= PHI_RESULT (vec_stmt
);
1977 else if (is_gimple_call (vec_stmt
))
1978 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1980 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1984 /* Case 4: operand is defined by a loop header phi - reduction */
1985 case vect_reduction_def
:
1989 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1990 loop
= (gimple_bb (def_stmt
))->loop_father
;
1992 /* Get the def before the loop */
1993 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1994 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1997 /* Case 5: operand is defined by loop-header phi - induction. */
1998 case vect_induction_def
:
2000 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
2002 /* Get the def from the vectorized stmt. */
2003 def_stmt_info
= vinfo_for_stmt (def_stmt
);
2004 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
2005 gcc_assert (vec_stmt
&& gimple_code (vec_stmt
) == GIMPLE_PHI
);
2006 vec_oprnd
= PHI_RESULT (vec_stmt
);
2016 /* Function vect_get_vec_def_for_stmt_copy
2018 Return a vector-def for an operand. This function is used when the
2019 vectorized stmt to be created (by the caller to this function) is a "copy"
2020 created in case the vectorized result cannot fit in one vector, and several
2021 copies of the vector-stmt are required. In this case the vector-def is
2022 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
2023 of the stmt that defines VEC_OPRND.
2024 DT is the type of the vector def VEC_OPRND.
2027 In case the vectorization factor (VF) is bigger than the number
2028 of elements that can fit in a vectype (nunits), we have to generate
2029 more than one vector stmt to vectorize the scalar stmt. This situation
2030 arises when there are multiple data-types operated upon in the loop; the
2031 smallest data-type determines the VF, and as a result, when vectorizing
2032 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
2033 vector stmt (each computing a vector of 'nunits' results, and together
2034 computing 'VF' results in each iteration). This function is called when
2035 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
2036 which VF=16 and nunits=4, so the number of copies required is 4):
2038 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
2040 S1: x = load VS1.0: vx.0 = memref0 VS1.1
2041 VS1.1: vx.1 = memref1 VS1.2
2042 VS1.2: vx.2 = memref2 VS1.3
2043 VS1.3: vx.3 = memref3
2045 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
2046 VSnew.1: vz1 = vx.1 + ... VSnew.2
2047 VSnew.2: vz2 = vx.2 + ... VSnew.3
2048 VSnew.3: vz3 = vx.3 + ...
2050 The vectorization of S1 is explained in vectorizable_load.
2051 The vectorization of S2:
2052 To create the first vector-stmt out of the 4 copies - VSnew.0 -
2053 the function 'vect_get_vec_def_for_operand' is called to
2054 get the relevant vector-def for each operand of S2. For operand x it
2055 returns the vector-def 'vx.0'.
2057 To create the remaining copies of the vector-stmt (VSnew.j), this
2058 function is called to get the relevant vector-def for each operand. It is
2059 obtained from the respective VS1.j stmt, which is recorded in the
2060 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
2062 For example, to obtain the vector-def 'vx.1' in order to create the
2063 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
2064 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
2065 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
2066 and return its def ('vx.1').
2067 Overall, to create the above sequence this function will be called 3 times:
2068 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
2069 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
2070 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
2073 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
2075 gimple vec_stmt_for_operand
;
2076 stmt_vec_info def_stmt_info
;
2078 /* Do nothing; can reuse same def. */
2079 if (dt
== vect_invariant_def
|| dt
== vect_constant_def
)
2082 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
2083 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
2084 gcc_assert (def_stmt_info
);
2085 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
2086 gcc_assert (vec_stmt_for_operand
);
2087 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
2088 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
2089 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
2091 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
2096 /* Get vectorized definitions for the operands to create a copy of an original
2097 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
2100 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
2101 VEC(tree
,heap
) **vec_oprnds0
,
2102 VEC(tree
,heap
) **vec_oprnds1
)
2104 tree vec_oprnd
= VEC_pop (tree
, *vec_oprnds0
);
2106 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
2107 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
2109 if (vec_oprnds1
&& *vec_oprnds1
)
2111 vec_oprnd
= VEC_pop (tree
, *vec_oprnds1
);
2112 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
2113 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
2118 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
2121 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
2122 VEC(tree
,heap
) **vec_oprnds0
, VEC(tree
,heap
) **vec_oprnds1
,
2126 vect_get_slp_defs (slp_node
, vec_oprnds0
, vec_oprnds1
);
2131 *vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
2132 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2133 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
2137 *vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
2138 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
2139 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
2145 /* Function vect_finish_stmt_generation.
2147 Insert a new stmt. */
2150 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
2151 gimple_stmt_iterator
*gsi
)
2153 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2154 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2156 gcc_assert (stmt
== gsi_stmt (*gsi
));
2157 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
2159 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
2161 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
));
2163 if (vect_print_dump_info (REPORT_DETAILS
))
2165 fprintf (vect_dump
, "add new stmt: ");
2166 print_gimple_stmt (vect_dump
, vec_stmt
, 0, TDF_SLIM
);
2169 /* Make sure gsi points to the stmt that is being vectorized. */
2170 gcc_assert (stmt
== gsi_stmt (*gsi
));
2172 gimple_set_location (vec_stmt
, gimple_location (stmt
));
2176 /* Function get_initial_def_for_reduction
2179 STMT - a stmt that performs a reduction operation in the loop.
2180 INIT_VAL - the initial value of the reduction variable
2183 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
2184 of the reduction (used for adjusting the epilog - see below).
2185 Return a vector variable, initialized according to the operation that STMT
2186 performs. This vector will be used as the initial value of the
2187 vector of partial results.
2189 Option1 (adjust in epilog): Initialize the vector as follows:
2192 min/max: [init_val,init_val,..,init_val,init_val]
2193 bit and/or: [init_val,init_val,..,init_val,init_val]
2194 and when necessary (e.g. add/mult case) let the caller know
2195 that it needs to adjust the result by init_val.
2197 Option2: Initialize the vector as follows:
2198 add: [0,0,...,0,init_val]
2199 mult: [1,1,...,1,init_val]
2200 min/max: [init_val,init_val,...,init_val]
2201 bit and/or: [init_val,init_val,...,init_val]
2202 and no adjustments are needed.
2204 For example, for the following code:
2210 STMT is 's = s + a[i]', and the reduction variable is 's'.
2211 For a vector of 4 units, we want to return either [0,0,0,init_val],
2212 or [0,0,0,0] and let the caller know that it needs to adjust
2213 the result at the end by 'init_val'.
2215 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
2216 initialization vector is simpler (same element in all entries).
2217 A cost model should help decide between these two schemes. */
2220 get_initial_def_for_reduction (gimple stmt
, tree init_val
, tree
*adjustment_def
)
2222 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2223 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2224 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2225 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
2226 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2227 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2228 tree type
= TREE_TYPE (init_val
);
2235 bool nested_in_vect_loop
= false;
2237 gcc_assert (POINTER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
));
2238 if (nested_in_vect_loop_p (loop
, stmt
))
2239 nested_in_vect_loop
= true;
2241 gcc_assert (loop
== (gimple_bb (stmt
))->loop_father
);
2243 vecdef
= vect_get_vec_def_for_operand (init_val
, stmt
, NULL
);
2247 case WIDEN_SUM_EXPR
:
2250 if (nested_in_vect_loop
)
2251 *adjustment_def
= vecdef
;
2253 *adjustment_def
= init_val
;
2254 /* Create a vector of zeros for init_def. */
2255 if (SCALAR_FLOAT_TYPE_P (type
))
2256 def_for_init
= build_real (type
, dconst0
);
2258 def_for_init
= build_int_cst (type
, 0);
2259 for (i
= nunits
- 1; i
>= 0; --i
)
2260 t
= tree_cons (NULL_TREE
, def_for_init
, t
);
2261 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def_for_init
));
2262 gcc_assert (vector_type
);
2263 init_def
= build_vector (vector_type
, t
);
2268 *adjustment_def
= NULL_TREE
;
2280 /* Function vect_create_epilog_for_reduction
2282 Create code at the loop-epilog to finalize the result of a reduction
2285 VECT_DEF is a vector of partial results.
2286 REDUC_CODE is the tree-code for the epilog reduction.
2287 NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
2288 number of elements that we can fit in a vectype (nunits). In this case
2289 we have to generate more than one vector stmt - i.e - we need to "unroll"
2290 the vector stmt by a factor VF/nunits. For more details see documentation
2291 in vectorizable_operation.
2292 STMT is the scalar reduction stmt that is being vectorized.
2293 REDUCTION_PHI is the phi-node that carries the reduction computation.
2296 1. Creates the reduction def-use cycle: sets the arguments for
2298 The loop-entry argument is the vectorized initial-value of the reduction.
2299 The loop-latch argument is VECT_DEF - the vector of partial sums.
2300 2. "Reduces" the vector of partial results VECT_DEF into a single result,
2301 by applying the operation specified by REDUC_CODE if available, or by
2302 other means (whole-vector shifts or a scalar loop).
2303 The function also creates a new phi node at the loop exit to preserve
2304 loop-closed form, as illustrated below.
2306 The flow at the entry to this function:
2309 vec_def = phi <null, null> # REDUCTION_PHI
2310 VECT_DEF = vector_stmt # vectorized form of STMT
2311 s_loop = scalar_stmt # (scalar) STMT
2313 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2317 The above is transformed by this function into:
2320 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
2321 VECT_DEF = vector_stmt # vectorized form of STMT
2322 s_loop = scalar_stmt # (scalar) STMT
2324 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2325 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2326 v_out2 = reduce <v_out1>
2327 s_out3 = extract_field <v_out2, 0>
2328 s_out4 = adjust_result <s_out3>
2334 vect_create_epilog_for_reduction (tree vect_def
, gimple stmt
,
2336 enum tree_code reduc_code
,
2337 gimple reduction_phi
)
2339 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2340 stmt_vec_info prev_phi_info
;
2342 enum machine_mode mode
;
2343 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2344 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2345 basic_block exit_bb
;
2348 gimple new_phi
= NULL
, phi
;
2349 gimple_stmt_iterator exit_gsi
;
2351 tree new_temp
= NULL_TREE
;
2353 gimple epilog_stmt
= NULL
;
2354 tree new_scalar_dest
, new_dest
;
2356 tree bitsize
, bitpos
, bytesize
;
2357 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2358 tree adjustment_def
;
2359 tree vec_initial_def
, def
;
2361 imm_use_iterator imm_iter
;
2362 use_operand_p use_p
;
2363 bool extract_scalar_result
= false;
2364 tree reduction_op
, expr
;
2367 bool nested_in_vect_loop
= false;
2368 VEC(gimple
,heap
) *phis
= NULL
;
2369 enum vect_def_type dt
= vect_unknown_def_type
;
2372 if (nested_in_vect_loop_p (loop
, stmt
))
2375 nested_in_vect_loop
= true;
2378 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
)))
2380 case GIMPLE_SINGLE_RHS
:
2381 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt
)) == ternary_op
);
2382 reduction_op
= TREE_OPERAND (gimple_assign_rhs1 (stmt
), 2);
2384 case GIMPLE_UNARY_RHS
:
2385 reduction_op
= gimple_assign_rhs1 (stmt
);
2387 case GIMPLE_BINARY_RHS
:
2388 reduction_op
= gimple_assign_rhs2 (stmt
);
2394 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
2395 gcc_assert (vectype
);
2396 mode
= TYPE_MODE (vectype
);
2398 /*** 1. Create the reduction def-use cycle ***/
2400 /* For the case of reduction, vect_get_vec_def_for_operand returns
2401 the scalar def before the loop, that defines the initial value
2402 of the reduction variable. */
2403 vec_initial_def
= vect_get_vec_def_for_operand (reduction_op
, stmt
,
2406 phi
= reduction_phi
;
2408 for (j
= 0; j
< ncopies
; j
++)
2410 /* 1.1 set the loop-entry arg of the reduction-phi: */
2411 add_phi_arg (phi
, vec_initial_def
, loop_preheader_edge (loop
));
2413 /* 1.2 set the loop-latch arg for the reduction-phi: */
2415 def
= vect_get_vec_def_for_stmt_copy (dt
, def
);
2416 add_phi_arg (phi
, def
, loop_latch_edge (loop
));
2418 if (vect_print_dump_info (REPORT_DETAILS
))
2420 fprintf (vect_dump
, "transform reduction: created def-use cycle: ");
2421 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
2422 fprintf (vect_dump
, "\n");
2423 print_gimple_stmt (vect_dump
, SSA_NAME_DEF_STMT (def
), 0, TDF_SLIM
);
2426 phi
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi
));
2429 /*** 2. Create epilog code
2430 The reduction epilog code operates across the elements of the vector
2431 of partial results computed by the vectorized loop.
2432 The reduction epilog code consists of:
2433 step 1: compute the scalar result in a vector (v_out2)
2434 step 2: extract the scalar result (s_out3) from the vector (v_out2)
2435 step 3: adjust the scalar result (s_out3) if needed.
2437 Step 1 can be accomplished using one the following three schemes:
2438 (scheme 1) using reduc_code, if available.
2439 (scheme 2) using whole-vector shifts, if available.
2440 (scheme 3) using a scalar loop. In this case steps 1+2 above are
2443 The overall epilog code looks like this:
2445 s_out0 = phi <s_loop> # original EXIT_PHI
2446 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2447 v_out2 = reduce <v_out1> # step 1
2448 s_out3 = extract_field <v_out2, 0> # step 2
2449 s_out4 = adjust_result <s_out3> # step 3
2451 (step 3 is optional, and steps 1 and 2 may be combined).
2452 Lastly, the uses of s_out0 are replaced by s_out4.
2456 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
2457 v_out1 = phi <v_loop> */
2459 exit_bb
= single_exit (loop
)->dest
;
2461 prev_phi_info
= NULL
;
2462 for (j
= 0; j
< ncopies
; j
++)
2464 phi
= create_phi_node (SSA_NAME_VAR (vect_def
), exit_bb
);
2465 set_vinfo_for_stmt (phi
, new_stmt_vec_info (phi
, loop_vinfo
));
2470 def
= vect_get_vec_def_for_stmt_copy (dt
, def
);
2471 STMT_VINFO_RELATED_STMT (prev_phi_info
) = phi
;
2473 SET_PHI_ARG_DEF (phi
, single_exit (loop
)->dest_idx
, def
);
2474 prev_phi_info
= vinfo_for_stmt (phi
);
2476 exit_gsi
= gsi_after_labels (exit_bb
);
2478 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
2479 (i.e. when reduc_code is not available) and in the final adjustment
2480 code (if needed). Also get the original scalar reduction variable as
2481 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
2482 represents a reduction pattern), the tree-code and scalar-def are
2483 taken from the original stmt that the pattern-stmt (STMT) replaces.
2484 Otherwise (it is a regular reduction) - the tree-code and scalar-def
2485 are taken from STMT. */
2487 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2490 /* Regular reduction */
2495 /* Reduction pattern */
2496 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt
);
2497 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
));
2498 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
2500 code
= gimple_assign_rhs_code (orig_stmt
);
2501 scalar_dest
= gimple_assign_lhs (orig_stmt
);
2502 scalar_type
= TREE_TYPE (scalar_dest
);
2503 new_scalar_dest
= vect_create_destination_var (scalar_dest
, NULL
);
2504 bitsize
= TYPE_SIZE (scalar_type
);
2505 bytesize
= TYPE_SIZE_UNIT (scalar_type
);
2508 /* In case this is a reduction in an inner-loop while vectorizing an outer
2509 loop - we don't need to extract a single scalar result at the end of the
2510 inner-loop. The final vector of partial results will be used in the
2511 vectorized outer-loop, or reduced to a scalar result at the end of the
2513 if (nested_in_vect_loop
)
2514 goto vect_finalize_reduction
;
2517 gcc_assert (ncopies
== 1);
2519 /* 2.3 Create the reduction code, using one of the three schemes described
2522 if (reduc_code
< NUM_TREE_CODES
)
2526 /*** Case 1: Create:
2527 v_out2 = reduc_expr <v_out1> */
2529 if (vect_print_dump_info (REPORT_DETAILS
))
2530 fprintf (vect_dump
, "Reduce using direct vector reduction.");
2532 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2533 tmp
= build1 (reduc_code
, vectype
, PHI_RESULT (new_phi
));
2534 epilog_stmt
= gimple_build_assign (vec_dest
, tmp
);
2535 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
2536 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
2537 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
2539 extract_scalar_result
= true;
2543 enum tree_code shift_code
= 0;
2544 bool have_whole_vector_shift
= true;
2546 int element_bitsize
= tree_low_cst (bitsize
, 1);
2547 int vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
2550 if (optab_handler (vec_shr_optab
, mode
)->insn_code
!= CODE_FOR_nothing
)
2551 shift_code
= VEC_RSHIFT_EXPR
;
2553 have_whole_vector_shift
= false;
2555 /* Regardless of whether we have a whole vector shift, if we're
2556 emulating the operation via tree-vect-generic, we don't want
2557 to use it. Only the first round of the reduction is likely
2558 to still be profitable via emulation. */
2559 /* ??? It might be better to emit a reduction tree code here, so that
2560 tree-vect-generic can expand the first round via bit tricks. */
2561 if (!VECTOR_MODE_P (mode
))
2562 have_whole_vector_shift
= false;
2565 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
2566 if (optab_handler (optab
, mode
)->insn_code
== CODE_FOR_nothing
)
2567 have_whole_vector_shift
= false;
2570 if (have_whole_vector_shift
)
2572 /*** Case 2: Create:
2573 for (offset = VS/2; offset >= element_size; offset/=2)
2575 Create: va' = vec_shift <va, offset>
2576 Create: va = vop <va, va'>
2579 if (vect_print_dump_info (REPORT_DETAILS
))
2580 fprintf (vect_dump
, "Reduce using vector shifts");
2582 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2583 new_temp
= PHI_RESULT (new_phi
);
2585 for (bit_offset
= vec_size_in_bits
/2;
2586 bit_offset
>= element_bitsize
;
2589 tree bitpos
= size_int (bit_offset
);
2590 epilog_stmt
= gimple_build_assign_with_ops (shift_code
, vec_dest
,
2592 new_name
= make_ssa_name (vec_dest
, epilog_stmt
);
2593 gimple_assign_set_lhs (epilog_stmt
, new_name
);
2594 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
2596 epilog_stmt
= gimple_build_assign_with_ops (code
, vec_dest
,
2597 new_name
, new_temp
);
2598 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
2599 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
2600 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
2603 extract_scalar_result
= true;
2609 /*** Case 3: Create:
2610 s = extract_field <v_out2, 0>
2611 for (offset = element_size;
2612 offset < vector_size;
2613 offset += element_size;)
2615 Create: s' = extract_field <v_out2, offset>
2616 Create: s = op <s, s'>
2619 if (vect_print_dump_info (REPORT_DETAILS
))
2620 fprintf (vect_dump
, "Reduce using scalar code. ");
2622 vec_temp
= PHI_RESULT (new_phi
);
2623 vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
2624 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
2626 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
2627 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
2628 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
2629 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
2631 for (bit_offset
= element_bitsize
;
2632 bit_offset
< vec_size_in_bits
;
2633 bit_offset
+= element_bitsize
)
2635 tree bitpos
= bitsize_int (bit_offset
);
2636 tree rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
2639 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
2640 new_name
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
2641 gimple_assign_set_lhs (epilog_stmt
, new_name
);
2642 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
2644 epilog_stmt
= gimple_build_assign_with_ops (code
,
2646 new_name
, new_temp
);
2647 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
2648 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
2649 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
2652 extract_scalar_result
= false;
2656 /* 2.4 Extract the final scalar result. Create:
2657 s_out3 = extract_field <v_out2, bitpos> */
2659 if (extract_scalar_result
)
2663 gcc_assert (!nested_in_vect_loop
);
2664 if (vect_print_dump_info (REPORT_DETAILS
))
2665 fprintf (vect_dump
, "extract scalar result");
2667 if (BYTES_BIG_ENDIAN
)
2668 bitpos
= size_binop (MULT_EXPR
,
2669 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1),
2670 TYPE_SIZE (scalar_type
));
2672 bitpos
= bitsize_zero_node
;
2674 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, new_temp
, bitsize
, bitpos
);
2675 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
2676 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
2677 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
2678 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
2681 vect_finalize_reduction
:
2683 /* 2.5 Adjust the final result by the initial value of the reduction
2684 variable. (When such adjustment is not needed, then
2685 'adjustment_def' is zero). For example, if code is PLUS we create:
2686 new_temp = loop_exit_def + adjustment_def */
2690 if (nested_in_vect_loop
)
2692 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def
)) == VECTOR_TYPE
);
2693 expr
= build2 (code
, vectype
, PHI_RESULT (new_phi
), adjustment_def
);
2694 new_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2698 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def
)) != VECTOR_TYPE
);
2699 expr
= build2 (code
, scalar_type
, new_temp
, adjustment_def
);
2700 new_dest
= vect_create_destination_var (scalar_dest
, scalar_type
);
2702 epilog_stmt
= gimple_build_assign (new_dest
, expr
);
2703 new_temp
= make_ssa_name (new_dest
, epilog_stmt
);
2704 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
2705 SSA_NAME_DEF_STMT (new_temp
) = epilog_stmt
;
2706 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
2710 /* 2.6 Handle the loop-exit phi */
2712 /* Replace uses of s_out0 with uses of s_out3:
2713 Find the loop-closed-use at the loop exit of the original scalar result.
2714 (The reduction result is expected to have two immediate uses - one at the
2715 latch block, and one at the loop exit). */
2716 phis
= VEC_alloc (gimple
, heap
, 10);
2717 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
2719 if (!flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
2721 exit_phi
= USE_STMT (use_p
);
2722 VEC_quick_push (gimple
, phis
, exit_phi
);
2725 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
2726 gcc_assert (!VEC_empty (gimple
, phis
));
2728 for (i
= 0; VEC_iterate (gimple
, phis
, i
, exit_phi
); i
++)
2730 if (nested_in_vect_loop
)
2732 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (exit_phi
);
2734 /* FORNOW. Currently not supporting the case that an inner-loop
2735 reduction is not used in the outer-loop (but only outside the
2737 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo
)
2738 && !STMT_VINFO_LIVE_P (stmt_vinfo
));
2740 epilog_stmt
= adjustment_def
? epilog_stmt
: new_phi
;
2741 STMT_VINFO_VEC_STMT (stmt_vinfo
) = epilog_stmt
;
2742 set_vinfo_for_stmt (epilog_stmt
,
2743 new_stmt_vec_info (epilog_stmt
, loop_vinfo
));
2745 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt
)) =
2746 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi
));
2750 /* Replace the uses: */
2751 orig_name
= PHI_RESULT (exit_phi
);
2752 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, orig_name
)
2753 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
2754 SET_USE (use_p
, new_temp
);
2756 VEC_free (gimple
, heap
, phis
);
2760 /* Function vectorizable_reduction.
2762 Check if STMT performs a reduction operation that can be vectorized.
2763 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2764 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2765 Return FALSE if not a vectorizable STMT, TRUE otherwise.
2767 This function also handles reduction idioms (patterns) that have been
2768 recognized in advance during vect_pattern_recog. In this case, STMT may be
2770 X = pattern_expr (arg0, arg1, ..., X)
2771 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
2772 sequence that had been detected and replaced by the pattern-stmt (STMT).
2774 In some cases of reduction patterns, the type of the reduction variable X is
2775 different than the type of the other arguments of STMT.
2776 In such cases, the vectype that is used when transforming STMT into a vector
2777 stmt is different than the vectype that is used to determine the
2778 vectorization factor, because it consists of a different number of elements
2779 than the actual number of elements that are being operated upon in parallel.
2781 For example, consider an accumulation of shorts into an int accumulator.
2782 On some targets it's possible to vectorize this pattern operating on 8
2783 shorts at a time (hence, the vectype for purposes of determining the
2784 vectorization factor should be V8HI); on the other hand, the vectype that
2785 is used to create the vector form is actually V4SI (the type of the result).
2787 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
2788 indicates what is the actual level of parallelism (V8HI in the example), so
2789 that the right vectorization factor would be derived. This vectype
2790 corresponds to the type of arguments to the reduction stmt, and should *NOT*
2791 be used to create the vectorized stmt. The right vectype for the vectorized
2792 stmt is obtained from the type of the result X:
2793 get_vectype_for_scalar_type (TREE_TYPE (X))
2795 This means that, contrary to "regular" reductions (or "regular" stmts in
2796 general), the following equation:
2797 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
2798 does *NOT* necessarily hold for reduction patterns. */
2801 vectorizable_reduction (gimple stmt
, gimple_stmt_iterator
*gsi
,
2806 tree loop_vec_def0
= NULL_TREE
, loop_vec_def1
= NULL_TREE
;
2807 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2808 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2809 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2810 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2811 enum tree_code code
, orig_code
, epilog_reduc_code
= 0;
2812 enum machine_mode vec_mode
;
2814 optab optab
, reduc_optab
;
2815 tree new_temp
= NULL_TREE
;
2818 enum vect_def_type dt
;
2819 gimple new_phi
= NULL
;
2823 stmt_vec_info orig_stmt_info
;
2824 tree expr
= NULL_TREE
;
2826 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2827 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2829 stmt_vec_info prev_stmt_info
, prev_phi_info
;
2830 gimple first_phi
= NULL
;
2831 bool single_defuse_cycle
= false;
2833 gimple new_stmt
= NULL
;
2837 if (nested_in_vect_loop_p (loop
, stmt
))
2840 gcc_assert (ncopies
>= 1);
2842 /* FORNOW: SLP not supported. */
2843 if (STMT_SLP_TYPE (stmt_info
))
2846 /* 1. Is vectorizable reduction? */
2848 /* Not supportable if the reduction variable is used in the loop. */
2849 if (STMT_VINFO_RELEVANT (stmt_info
) > vect_used_in_outer
)
2852 /* Reductions that are not used even in an enclosing outer-loop,
2853 are expected to be "live" (used out of the loop). */
2854 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_unused_in_loop
2855 && !STMT_VINFO_LIVE_P (stmt_info
))
2858 /* Make sure it was already recognized as a reduction computation. */
2859 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
2862 /* 2. Has this been recognized as a reduction pattern?
2864 Check if STMT represents a pattern that has been recognized
2865 in earlier analysis stages. For stmts that represent a pattern,
2866 the STMT_VINFO_RELATED_STMT field records the last stmt in
2867 the original sequence that constitutes the pattern. */
2869 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2872 orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
2873 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
) == stmt
);
2874 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info
));
2875 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info
));
2878 /* 3. Check the operands of the operation. The first operands are defined
2879 inside the loop body. The last operand is the reduction variable,
2880 which is defined by the loop-header-phi. */
2882 gcc_assert (is_gimple_assign (stmt
));
2885 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
)))
2887 case GIMPLE_SINGLE_RHS
:
2888 op_type
= TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt
));
2889 if (op_type
== ternary_op
)
2891 tree rhs
= gimple_assign_rhs1 (stmt
);
2892 ops
[0] = TREE_OPERAND (rhs
, 0);
2893 ops
[1] = TREE_OPERAND (rhs
, 1);
2894 ops
[2] = TREE_OPERAND (rhs
, 2);
2895 code
= TREE_CODE (rhs
);
2901 case GIMPLE_BINARY_RHS
:
2902 code
= gimple_assign_rhs_code (stmt
);
2903 op_type
= TREE_CODE_LENGTH (code
);
2904 gcc_assert (op_type
== binary_op
);
2905 ops
[0] = gimple_assign_rhs1 (stmt
);
2906 ops
[1] = gimple_assign_rhs2 (stmt
);
2909 case GIMPLE_UNARY_RHS
:
2916 scalar_dest
= gimple_assign_lhs (stmt
);
2917 scalar_type
= TREE_TYPE (scalar_dest
);
2918 if (!POINTER_TYPE_P (scalar_type
) && !INTEGRAL_TYPE_P (scalar_type
)
2919 && !SCALAR_FLOAT_TYPE_P (scalar_type
))
2922 /* All uses but the last are expected to be defined in the loop.
2923 The last use is the reduction variable. */
2924 for (i
= 0; i
< op_type
-1; i
++)
2926 is_simple_use
= vect_is_simple_use (ops
[i
], loop_vinfo
, &def_stmt
,
2928 gcc_assert (is_simple_use
);
2929 if (dt
!= vect_loop_def
2930 && dt
!= vect_invariant_def
2931 && dt
!= vect_constant_def
2932 && dt
!= vect_induction_def
)
2936 is_simple_use
= vect_is_simple_use (ops
[i
], loop_vinfo
, &def_stmt
, &def
, &dt
);
2937 gcc_assert (is_simple_use
);
2938 gcc_assert (dt
== vect_reduction_def
);
2939 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
2941 gcc_assert (orig_stmt
== vect_is_simple_reduction (loop_vinfo
, def_stmt
));
2943 gcc_assert (stmt
== vect_is_simple_reduction (loop_vinfo
, def_stmt
));
2945 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
2948 /* 4. Supportable by target? */
2950 /* 4.1. check support for the operation in the loop */
2951 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
2954 if (vect_print_dump_info (REPORT_DETAILS
))
2955 fprintf (vect_dump
, "no optab.");
2958 vec_mode
= TYPE_MODE (vectype
);
2959 if (optab_handler (optab
, vec_mode
)->insn_code
== CODE_FOR_nothing
)
2961 if (vect_print_dump_info (REPORT_DETAILS
))
2962 fprintf (vect_dump
, "op not supported by target.");
2963 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
2964 || LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2965 < vect_min_worthwhile_factor (code
))
2967 if (vect_print_dump_info (REPORT_DETAILS
))
2968 fprintf (vect_dump
, "proceeding using word mode.");
2971 /* Worthwhile without SIMD support? */
2972 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
2973 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2974 < vect_min_worthwhile_factor (code
))
2976 if (vect_print_dump_info (REPORT_DETAILS
))
2977 fprintf (vect_dump
, "not worthwhile without SIMD support.");
2981 /* 4.2. Check support for the epilog operation.
2983 If STMT represents a reduction pattern, then the type of the
2984 reduction variable may be different than the type of the rest
2985 of the arguments. For example, consider the case of accumulation
2986 of shorts into an int accumulator; The original code:
2987 S1: int_a = (int) short_a;
2988 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
2991 STMT: int_acc = widen_sum <short_a, int_acc>
2994 1. The tree-code that is used to create the vector operation in the
2995 epilog code (that reduces the partial results) is not the
2996 tree-code of STMT, but is rather the tree-code of the original
2997 stmt from the pattern that STMT is replacing. I.e, in the example
2998 above we want to use 'widen_sum' in the loop, but 'plus' in the
3000 2. The type (mode) we use to check available target support
3001 for the vector operation to be created in the *epilog*, is
3002 determined by the type of the reduction variable (in the example
3003 above we'd check this: plus_optab[vect_int_mode]).
3004 However the type (mode) we use to check available target support
3005 for the vector operation to be created *inside the loop*, is
3006 determined by the type of the other arguments to STMT (in the
3007 example we'd check this: widen_sum_optab[vect_short_mode]).
3009 This is contrary to "regular" reductions, in which the types of all
3010 the arguments are the same as the type of the reduction variable.
3011 For "regular" reductions we can therefore use the same vector type
3012 (and also the same tree-code) when generating the epilog code and
3013 when generating the code inside the loop. */
3017 /* This is a reduction pattern: get the vectype from the type of the
3018 reduction variable, and get the tree-code from orig_stmt. */
3019 orig_code
= gimple_assign_rhs_code (orig_stmt
);
3020 vectype
= get_vectype_for_scalar_type (TREE_TYPE (def
));
3023 if (vect_print_dump_info (REPORT_DETAILS
))
3025 fprintf (vect_dump
, "unsupported data-type ");
3026 print_generic_expr (vect_dump
, TREE_TYPE (def
), TDF_SLIM
);
3031 vec_mode
= TYPE_MODE (vectype
);
3035 /* Regular reduction: use the same vectype and tree-code as used for
3036 the vector code inside the loop can be used for the epilog code. */
3040 if (!reduction_code_for_scalar_code (orig_code
, &epilog_reduc_code
))
3042 reduc_optab
= optab_for_tree_code (epilog_reduc_code
, vectype
, optab_default
);
3045 if (vect_print_dump_info (REPORT_DETAILS
))
3046 fprintf (vect_dump
, "no optab for reduction.");
3047 epilog_reduc_code
= NUM_TREE_CODES
;
3049 if (optab_handler (reduc_optab
, vec_mode
)->insn_code
== CODE_FOR_nothing
)
3051 if (vect_print_dump_info (REPORT_DETAILS
))
3052 fprintf (vect_dump
, "reduc op not supported by target.");
3053 epilog_reduc_code
= NUM_TREE_CODES
;
3056 if (!vec_stmt
) /* transformation not required. */
3058 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3059 if (!vect_model_reduction_cost (stmt_info
, epilog_reduc_code
, ncopies
))
3066 if (vect_print_dump_info (REPORT_DETAILS
))
3067 fprintf (vect_dump
, "transform reduction.");
3069 /* Create the destination vector */
3070 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3072 /* In case the vectorization factor (VF) is bigger than the number
3073 of elements that we can fit in a vectype (nunits), we have to generate
3074 more than one vector stmt - i.e - we need to "unroll" the
3075 vector stmt by a factor VF/nunits. For more details see documentation
3076 in vectorizable_operation. */
3078 /* If the reduction is used in an outer loop we need to generate
3079 VF intermediate results, like so (e.g. for ncopies=2):
3084 (i.e. we generate VF results in 2 registers).
3085 In this case we have a separate def-use cycle for each copy, and therefore
3086 for each copy we get the vector def for the reduction variable from the
3087 respective phi node created for this copy.
3089 Otherwise (the reduction is unused in the loop nest), we can combine
3090 together intermediate results, like so (e.g. for ncopies=2):
3094 (i.e. we generate VF/2 results in a single register).
3095 In this case for each copy we get the vector def for the reduction variable
3096 from the vectorized reduction operation generated in the previous iteration.
3099 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_unused_in_loop
)
3101 single_defuse_cycle
= true;
3105 epilog_copies
= ncopies
;
3107 prev_stmt_info
= NULL
;
3108 prev_phi_info
= NULL
;
3109 for (j
= 0; j
< ncopies
; j
++)
3111 if (j
== 0 || !single_defuse_cycle
)
3113 /* Create the reduction-phi that defines the reduction-operand. */
3114 new_phi
= create_phi_node (vec_dest
, loop
->header
);
3115 set_vinfo_for_stmt (new_phi
, new_stmt_vec_info (new_phi
, loop_vinfo
));
3121 loop_vec_def0
= vect_get_vec_def_for_operand (ops
[0], stmt
, NULL
);
3122 if (op_type
== ternary_op
)
3124 loop_vec_def1
= vect_get_vec_def_for_operand (ops
[1], stmt
, NULL
);
3127 /* Get the vector def for the reduction variable from the phi node */
3128 reduc_def
= PHI_RESULT (new_phi
);
3129 first_phi
= new_phi
;
3133 enum vect_def_type dt
= vect_unknown_def_type
; /* Dummy */
3134 loop_vec_def0
= vect_get_vec_def_for_stmt_copy (dt
, loop_vec_def0
);
3135 if (op_type
== ternary_op
)
3136 loop_vec_def1
= vect_get_vec_def_for_stmt_copy (dt
, loop_vec_def1
);
3138 if (single_defuse_cycle
)
3139 reduc_def
= gimple_assign_lhs (new_stmt
);
3141 reduc_def
= PHI_RESULT (new_phi
);
3143 STMT_VINFO_RELATED_STMT (prev_phi_info
) = new_phi
;
3146 /* Arguments are ready. create the new vector stmt. */
3147 if (op_type
== binary_op
)
3148 expr
= build2 (code
, vectype
, loop_vec_def0
, reduc_def
);
3150 expr
= build3 (code
, vectype
, loop_vec_def0
, loop_vec_def1
,
3152 new_stmt
= gimple_build_assign (vec_dest
, expr
);
3153 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3154 gimple_assign_set_lhs (new_stmt
, new_temp
);
3155 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3158 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3160 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3161 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3162 prev_phi_info
= vinfo_for_stmt (new_phi
);
3165 /* Finalize the reduction-phi (set its arguments) and create the
3166 epilog reduction code. */
3167 if (!single_defuse_cycle
)
3168 new_temp
= gimple_assign_lhs (*vec_stmt
);
3169 vect_create_epilog_for_reduction (new_temp
, stmt
, epilog_copies
,
3170 epilog_reduc_code
, first_phi
);
3174 /* Checks if CALL can be vectorized in type VECTYPE. Returns
3175 a function declaration if the target has a vectorized version
3176 of the function, or NULL_TREE if the function cannot be vectorized. */
3179 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
3181 tree fndecl
= gimple_call_fndecl (call
);
3182 enum built_in_function code
;
3184 /* We only handle functions that do not read or clobber memory -- i.e.
3185 const or novops ones. */
3186 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
3190 || TREE_CODE (fndecl
) != FUNCTION_DECL
3191 || !DECL_BUILT_IN (fndecl
))
3194 code
= DECL_FUNCTION_CODE (fndecl
);
3195 return targetm
.vectorize
.builtin_vectorized_function (code
, vectype_out
,
3199 /* Function vectorizable_call.
3201 Check if STMT performs a function call that can be vectorized.
3202 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3203 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3204 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3207 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
)
3212 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3213 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3214 tree vectype_out
, vectype_in
;
3217 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3218 tree fndecl
, new_temp
, def
, rhs_type
, lhs_type
;
3220 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3223 VEC(tree
, heap
) *vargs
= NULL
;
3224 enum { NARROW
, NONE
, WIDEN
} modifier
;
3227 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3230 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
3233 /* FORNOW: SLP not supported. */
3234 if (STMT_SLP_TYPE (stmt_info
))
3237 /* Is STMT a vectorizable call? */
3238 if (!is_gimple_call (stmt
))
3241 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3244 /* Process function arguments. */
3245 rhs_type
= NULL_TREE
;
3246 nargs
= gimple_call_num_args (stmt
);
3248 /* Bail out if the function has more than two arguments, we
3249 do not have interesting builtin functions to vectorize with
3250 more than two arguments. No arguments is also not good. */
3251 if (nargs
== 0 || nargs
> 2)
3254 for (i
= 0; i
< nargs
; i
++)
3256 op
= gimple_call_arg (stmt
, i
);
3258 /* We can only handle calls with arguments of the same type. */
3260 && rhs_type
!= TREE_TYPE (op
))
3262 if (vect_print_dump_info (REPORT_DETAILS
))
3263 fprintf (vect_dump
, "argument types differ.");
3266 rhs_type
= TREE_TYPE (op
);
3268 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
[i
]))
3270 if (vect_print_dump_info (REPORT_DETAILS
))
3271 fprintf (vect_dump
, "use not simple.");
3276 vectype_in
= get_vectype_for_scalar_type (rhs_type
);
3279 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3281 lhs_type
= TREE_TYPE (gimple_call_lhs (stmt
));
3282 vectype_out
= get_vectype_for_scalar_type (lhs_type
);
3285 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3288 if (nunits_in
== nunits_out
/ 2)
3290 else if (nunits_out
== nunits_in
)
3292 else if (nunits_out
== nunits_in
/ 2)
3297 /* For now, we only vectorize functions if a target specific builtin
3298 is available. TODO -- in some cases, it might be profitable to
3299 insert the calls for pieces of the vector, in order to be able
3300 to vectorize other operations in the loop. */
3301 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
3302 if (fndecl
== NULL_TREE
)
3304 if (vect_print_dump_info (REPORT_DETAILS
))
3305 fprintf (vect_dump
, "function is not vectorizable.");
3310 gcc_assert (ZERO_SSA_OPERANDS (stmt
, SSA_OP_ALL_VIRTUALS
));
3312 if (modifier
== NARROW
)
3313 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3315 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3317 /* Sanity check: make sure that at least one copy of the vectorized stmt
3318 needs to be generated. */
3319 gcc_assert (ncopies
>= 1);
3321 if (!vec_stmt
) /* transformation not required. */
3323 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3324 if (vect_print_dump_info (REPORT_DETAILS
))
3325 fprintf (vect_dump
, "=== vectorizable_call ===");
3326 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
3332 if (vect_print_dump_info (REPORT_DETAILS
))
3333 fprintf (vect_dump
, "transform operation.");
3336 scalar_dest
= gimple_call_lhs (stmt
);
3337 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3339 prev_stmt_info
= NULL
;
3343 for (j
= 0; j
< ncopies
; ++j
)
3345 /* Build argument list for the vectorized call. */
3347 vargs
= VEC_alloc (tree
, heap
, nargs
);
3349 VEC_truncate (tree
, vargs
, 0);
3351 for (i
= 0; i
< nargs
; i
++)
3353 op
= gimple_call_arg (stmt
, i
);
3356 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
3359 = vect_get_vec_def_for_stmt_copy (dt
[nargs
], vec_oprnd0
);
3361 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
3364 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3365 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3366 gimple_call_set_lhs (new_stmt
, new_temp
);
3368 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3371 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3373 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3375 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3381 for (j
= 0; j
< ncopies
; ++j
)
3383 /* Build argument list for the vectorized call. */
3385 vargs
= VEC_alloc (tree
, heap
, nargs
* 2);
3387 VEC_truncate (tree
, vargs
, 0);
3389 for (i
= 0; i
< nargs
; i
++)
3391 op
= gimple_call_arg (stmt
, i
);
3395 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
3397 = vect_get_vec_def_for_stmt_copy (dt
[nargs
], vec_oprnd0
);
3402 = vect_get_vec_def_for_stmt_copy (dt
[nargs
], vec_oprnd1
);
3404 = vect_get_vec_def_for_stmt_copy (dt
[nargs
], vec_oprnd0
);
3407 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
3408 VEC_quick_push (tree
, vargs
, vec_oprnd1
);
3411 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3412 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3413 gimple_call_set_lhs (new_stmt
, new_temp
);
3415 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3418 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3420 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3422 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3425 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3430 /* No current target implements this case. */
3434 VEC_free (tree
, heap
, vargs
);
3436 /* The call in STMT might prevent it from being removed in dce.
3437 We however cannot remove it here, due to the way the ssa name
3438 it defines is mapped to the new definition. So just replace
3439 rhs of the statement with something harmless. */
3441 type
= TREE_TYPE (scalar_dest
);
3442 new_stmt
= gimple_build_assign (gimple_call_lhs (stmt
),
3443 fold_convert (type
, integer_zero_node
));
3444 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3445 set_vinfo_for_stmt (stmt
, NULL
);
3446 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3447 gsi_replace (gsi
, new_stmt
, false);
3448 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3454 /* Function vect_gen_widened_results_half
3456 Create a vector stmt whose code, type, number of arguments, and result
3457 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3458 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3459 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3460 needs to be created (DECL is a function-decl of a target-builtin).
3461 STMT is the original scalar stmt that we are vectorizing. */
3464 vect_gen_widened_results_half (enum tree_code code
,
3466 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3467 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3475 /* Generate half of the widened result: */
3476 if (code
== CALL_EXPR
)
3478 /* Target specific support */
3479 if (op_type
== binary_op
)
3480 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3482 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3483 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3484 gimple_call_set_lhs (new_stmt
, new_temp
);
3488 /* Generic support */
3489 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3490 if (op_type
!= binary_op
)
3492 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
3494 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3495 gimple_assign_set_lhs (new_stmt
, new_temp
);
3497 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3499 if (code
== CALL_EXPR
)
3501 FOR_EACH_SSA_TREE_OPERAND (sym
, new_stmt
, iter
, SSA_OP_ALL_VIRTUALS
)
3503 if (TREE_CODE (sym
) == SSA_NAME
)
3504 sym
= SSA_NAME_VAR (sym
);
3505 mark_sym_for_renaming (sym
);
3513 /* Check if STMT performs a conversion operation, that can be vectorized.
3514 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3515 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3516 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3519 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
3520 gimple
*vec_stmt
, slp_tree slp_node
)
3525 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3526 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3527 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3528 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3529 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3533 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3534 gimple new_stmt
= NULL
;
3535 stmt_vec_info prev_stmt_info
;
3538 tree vectype_out
, vectype_in
;
3541 tree rhs_type
, lhs_type
;
3543 enum { NARROW
, NONE
, WIDEN
} modifier
;
3545 VEC(tree
,heap
) *vec_oprnds0
= NULL
;
3548 VEC(tree
,heap
) *dummy
= NULL
;
3551 /* Is STMT a vectorizable conversion? */
3553 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3556 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
3559 if (!is_gimple_assign (stmt
))
3562 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3565 code
= gimple_assign_rhs_code (stmt
);
3566 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3569 /* Check types of lhs and rhs. */
3570 op0
= gimple_assign_rhs1 (stmt
);
3571 rhs_type
= TREE_TYPE (op0
);
3572 vectype_in
= get_vectype_for_scalar_type (rhs_type
);
3575 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3577 scalar_dest
= gimple_assign_lhs (stmt
);
3578 lhs_type
= TREE_TYPE (scalar_dest
);
3579 vectype_out
= get_vectype_for_scalar_type (lhs_type
);
3582 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3585 if (nunits_in
== nunits_out
/ 2)
3587 else if (nunits_out
== nunits_in
)
3589 else if (nunits_out
== nunits_in
/ 2)
3594 if (modifier
== NONE
)
3595 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
) == vectype_out
);
3597 /* Bail out if the types are both integral or non-integral. */
3598 if ((INTEGRAL_TYPE_P (rhs_type
) && INTEGRAL_TYPE_P (lhs_type
))
3599 || (!INTEGRAL_TYPE_P (rhs_type
) && !INTEGRAL_TYPE_P (lhs_type
)))
3602 integral_type
= INTEGRAL_TYPE_P (rhs_type
) ? vectype_in
: vectype_out
;
3604 if (modifier
== NARROW
)
3605 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3607 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3609 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3610 this, so we can safely override NCOPIES with 1 here. */
3614 /* Sanity check: make sure that at least one copy of the vectorized stmt
3615 needs to be generated. */
3616 gcc_assert (ncopies
>= 1);
3618 /* Check the operands of the operation. */
3619 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt
[0]))
3621 if (vect_print_dump_info (REPORT_DETAILS
))
3622 fprintf (vect_dump
, "use not simple.");
3626 /* Supportable by target? */
3627 if ((modifier
== NONE
3628 && !targetm
.vectorize
.builtin_conversion (code
, integral_type
))
3629 || (modifier
== WIDEN
3630 && !supportable_widening_operation (code
, stmt
, vectype_in
,
3633 &dummy_int
, &dummy
))
3634 || (modifier
== NARROW
3635 && !supportable_narrowing_operation (code
, stmt
, vectype_in
,
3636 &code1
, &dummy_int
, &dummy
)))
3638 if (vect_print_dump_info (REPORT_DETAILS
))
3639 fprintf (vect_dump
, "conversion not supported by target.");
3643 if (modifier
!= NONE
)
3645 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
3646 /* FORNOW: SLP not supported. */
3647 if (STMT_SLP_TYPE (stmt_info
))
3651 if (!vec_stmt
) /* transformation not required. */
3653 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3658 if (vect_print_dump_info (REPORT_DETAILS
))
3659 fprintf (vect_dump
, "transform conversion.");
3662 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3664 if (modifier
== NONE
&& !slp_node
)
3665 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
3667 prev_stmt_info
= NULL
;
3671 for (j
= 0; j
< ncopies
; j
++)
3677 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
);
3679 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3682 targetm
.vectorize
.builtin_conversion (code
, integral_type
);
3683 for (i
= 0; VEC_iterate (tree
, vec_oprnds0
, i
, vop0
); i
++)
3685 /* Arguments are ready. create the new vector stmt. */
3686 new_stmt
= gimple_build_call (builtin_decl
, 1, vop0
);
3687 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3688 gimple_call_set_lhs (new_stmt
, new_temp
);
3689 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3690 FOR_EACH_SSA_TREE_OPERAND (sym
, new_stmt
, iter
,
3691 SSA_OP_ALL_VIRTUALS
)
3693 if (TREE_CODE (sym
) == SSA_NAME
)
3694 sym
= SSA_NAME_VAR (sym
);
3695 mark_sym_for_renaming (sym
);
3698 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3702 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3704 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3705 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3710 /* In case the vectorization factor (VF) is bigger than the number
3711 of elements that we can fit in a vectype (nunits), we have to
3712 generate more than one vector stmt - i.e - we need to "unroll"
3713 the vector stmt by a factor VF/nunits. */
3714 for (j
= 0; j
< ncopies
; j
++)
3717 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3719 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3721 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
3723 /* Generate first half of the widened result: */
3725 = vect_gen_widened_results_half (code1
, decl1
,
3726 vec_oprnd0
, vec_oprnd1
,
3727 unary_op
, vec_dest
, gsi
, stmt
);
3729 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3731 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3732 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3734 /* Generate second half of the widened result: */
3736 = vect_gen_widened_results_half (code2
, decl2
,
3737 vec_oprnd0
, vec_oprnd1
,
3738 unary_op
, vec_dest
, gsi
, stmt
);
3739 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3740 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3745 /* In case the vectorization factor (VF) is bigger than the number
3746 of elements that we can fit in a vectype (nunits), we have to
3747 generate more than one vector stmt - i.e - we need to "unroll"
3748 the vector stmt by a factor VF/nunits. */
3749 for (j
= 0; j
< ncopies
; j
++)
3754 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3755 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3759 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd1
);
3760 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3763 /* Arguments are ready. Create the new vector stmt. */
3764 expr
= build2 (code1
, vectype_out
, vec_oprnd0
, vec_oprnd1
);
3765 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
, vec_oprnd0
,
3767 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3768 gimple_assign_set_lhs (new_stmt
, new_temp
);
3769 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3772 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3774 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3776 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3779 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3783 VEC_free (tree
, heap
, vec_oprnds0
);
3789 /* Function vectorizable_assignment.
3791 Check if STMT performs an assignment (copy) that can be vectorized.
3792 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3793 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3794 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3797 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
3798 gimple
*vec_stmt
, slp_tree slp_node
)
3803 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3804 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3805 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3809 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3810 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3813 VEC(tree
,heap
) *vec_oprnds
= NULL
;
3816 /* Multiple types in SLP are handled by creating the appropriate number of
3817 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3822 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3824 gcc_assert (ncopies
>= 1);
3826 return false; /* FORNOW */
3828 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3831 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
3834 /* Is vectorizable assignment? */
3835 if (!is_gimple_assign (stmt
))
3838 scalar_dest
= gimple_assign_lhs (stmt
);
3839 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
3842 if (gimple_assign_single_p (stmt
)
3843 || gimple_assign_rhs_code (stmt
) == PAREN_EXPR
)
3844 op
= gimple_assign_rhs1 (stmt
);
3848 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
[0]))
3850 if (vect_print_dump_info (REPORT_DETAILS
))
3851 fprintf (vect_dump
, "use not simple.");
3855 if (!vec_stmt
) /* transformation not required. */
3857 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
3858 if (vect_print_dump_info (REPORT_DETAILS
))
3859 fprintf (vect_dump
, "=== vectorizable_assignment ===");
3860 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
3865 if (vect_print_dump_info (REPORT_DETAILS
))
3866 fprintf (vect_dump
, "transform assignment.");
3869 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3872 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
3874 /* Arguments are ready. create the new vector stmt. */
3875 for (i
= 0; VEC_iterate (tree
, vec_oprnds
, i
, vop
); i
++)
3877 *vec_stmt
= gimple_build_assign (vec_dest
, vop
);
3878 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
3879 gimple_assign_set_lhs (*vec_stmt
, new_temp
);
3880 vect_finish_stmt_generation (stmt
, *vec_stmt
, gsi
);
3881 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
;
3884 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), *vec_stmt
);
3887 VEC_free (tree
, heap
, vec_oprnds
);
3892 /* Function vect_min_worthwhile_factor.
3894 For a loop where we could vectorize the operation indicated by CODE,
3895 return the minimum vectorization factor that makes it worthwhile
3896 to use generic vectors. */
3898 vect_min_worthwhile_factor (enum tree_code code
)
3919 /* Function vectorizable_induction
3921 Check if PHI performs an induction computation that can be vectorized.
3922 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
3923 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
3924 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3927 vectorizable_induction (gimple phi
, gimple_stmt_iterator
*gsi ATTRIBUTE_UNUSED
,
3930 stmt_vec_info stmt_info
= vinfo_for_stmt (phi
);
3931 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3932 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3933 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3934 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3935 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3938 gcc_assert (ncopies
>= 1);
3939 /* FORNOW. This restriction should be relaxed. */
3940 if (nested_in_vect_loop_p (loop
, phi
) && ncopies
> 1)
3942 if (vect_print_dump_info (REPORT_DETAILS
))
3943 fprintf (vect_dump
, "multiple types in nested loop.");
3947 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3950 /* FORNOW: SLP not supported. */
3951 if (STMT_SLP_TYPE (stmt_info
))
3954 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
);
3956 if (gimple_code (phi
) != GIMPLE_PHI
)
3959 if (!vec_stmt
) /* transformation not required. */
3961 STMT_VINFO_TYPE (stmt_info
) = induc_vec_info_type
;
3962 if (vect_print_dump_info (REPORT_DETAILS
))
3963 fprintf (vect_dump
, "=== vectorizable_induction ===");
3964 vect_model_induction_cost (stmt_info
, ncopies
);
3970 if (vect_print_dump_info (REPORT_DETAILS
))
3971 fprintf (vect_dump
, "transform induction phi.");
3973 vec_def
= get_initial_def_for_induction (phi
);
3974 *vec_stmt
= SSA_NAME_DEF_STMT (vec_def
);
3979 /* Function vectorizable_operation.
3981 Check if STMT performs a binary or unary operation that can be vectorized.
3982 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3983 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3984 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3987 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
3988 gimple
*vec_stmt
, slp_tree slp_node
)
3992 tree op0
, op1
= NULL
;
3993 tree vec_oprnd1
= NULL_TREE
;
3994 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3995 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3996 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3997 enum tree_code code
;
3998 enum machine_mode vec_mode
;
4003 enum machine_mode optab_op2_mode
;
4006 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4007 gimple new_stmt
= NULL
;
4008 stmt_vec_info prev_stmt_info
;
4009 int nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4014 VEC(tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
4017 bool shift_p
= false;
4018 bool scalar_shift_arg
= false;
4020 /* Multiple types in SLP are handled by creating the appropriate number of
4021 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4026 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4028 gcc_assert (ncopies
>= 1);
4030 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
4033 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
4036 /* Is STMT a vectorizable binary/unary operation? */
4037 if (!is_gimple_assign (stmt
))
4040 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4043 scalar_dest
= gimple_assign_lhs (stmt
);
4044 vectype_out
= get_vectype_for_scalar_type (TREE_TYPE (scalar_dest
));
4047 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4048 if (nunits_out
!= nunits_in
)
4051 code
= gimple_assign_rhs_code (stmt
);
4053 /* For pointer addition, we should use the normal plus for
4054 the vector addition. */
4055 if (code
== POINTER_PLUS_EXPR
)
4058 /* Support only unary or binary operations. */
4059 op_type
= TREE_CODE_LENGTH (code
);
4060 if (op_type
!= unary_op
&& op_type
!= binary_op
)
4062 if (vect_print_dump_info (REPORT_DETAILS
))
4063 fprintf (vect_dump
, "num. args = %d (not unary/binary op).", op_type
);
4067 op0
= gimple_assign_rhs1 (stmt
);
4068 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt
[0]))
4070 if (vect_print_dump_info (REPORT_DETAILS
))
4071 fprintf (vect_dump
, "use not simple.");
4075 if (op_type
== binary_op
)
4077 op1
= gimple_assign_rhs2 (stmt
);
4078 if (!vect_is_simple_use (op1
, loop_vinfo
, &def_stmt
, &def
, &dt
[1]))
4080 if (vect_print_dump_info (REPORT_DETAILS
))
4081 fprintf (vect_dump
, "use not simple.");
4086 /* If this is a shift/rotate, determine whether the shift amount is a vector,
4087 or scalar. If the shift/rotate amount is a vector, use the vector/vector
4089 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4090 || code
== RROTATE_EXPR
)
4094 /* vector shifted by vector */
4095 if (dt
[1] == vect_loop_def
)
4097 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4098 if (vect_print_dump_info (REPORT_DETAILS
))
4099 fprintf (vect_dump
, "vector/vector shift/rotate found.");
4102 /* See if the machine has a vector shifted by scalar insn and if not
4103 then see if it has a vector shifted by vector insn */
4104 else if (dt
[1] == vect_constant_def
|| dt
[1] == vect_invariant_def
)
4106 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4108 && (optab_handler (optab
, TYPE_MODE (vectype
))->insn_code
4109 != CODE_FOR_nothing
))
4111 scalar_shift_arg
= true;
4112 if (vect_print_dump_info (REPORT_DETAILS
))
4113 fprintf (vect_dump
, "vector/scalar shift/rotate found.");
4117 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4118 if (vect_print_dump_info (REPORT_DETAILS
)
4120 && (optab_handler (optab
, TYPE_MODE (vectype
))->insn_code
4121 != CODE_FOR_nothing
))
4122 fprintf (vect_dump
, "vector/vector shift/rotate found.");
4128 if (vect_print_dump_info (REPORT_DETAILS
))
4129 fprintf (vect_dump
, "operand mode requires invariant argument.");
4134 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4136 /* Supportable by target? */
4139 if (vect_print_dump_info (REPORT_DETAILS
))
4140 fprintf (vect_dump
, "no optab.");
4143 vec_mode
= TYPE_MODE (vectype
);
4144 icode
= (int) optab_handler (optab
, vec_mode
)->insn_code
;
4145 if (icode
== CODE_FOR_nothing
)
4147 if (vect_print_dump_info (REPORT_DETAILS
))
4148 fprintf (vect_dump
, "op not supported by target.");
4149 /* Check only during analysis. */
4150 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4151 || (LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
4152 < vect_min_worthwhile_factor (code
)
4155 if (vect_print_dump_info (REPORT_DETAILS
))
4156 fprintf (vect_dump
, "proceeding using word mode.");
4159 /* Worthwhile without SIMD support? Check only during analysis. */
4160 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4161 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
4162 < vect_min_worthwhile_factor (code
)
4165 if (vect_print_dump_info (REPORT_DETAILS
))
4166 fprintf (vect_dump
, "not worthwhile without SIMD support.");
4170 if (!vec_stmt
) /* transformation not required. */
4172 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4173 if (vect_print_dump_info (REPORT_DETAILS
))
4174 fprintf (vect_dump
, "=== vectorizable_operation ===");
4175 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
4181 if (vect_print_dump_info (REPORT_DETAILS
))
4182 fprintf (vect_dump
, "transform binary/unary operation.");
4185 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4187 /* Allocate VECs for vector operands. In case of SLP, vector operands are
4188 created in the previous stages of the recursion, so no allocation is
4189 needed, except for the case of shift with scalar shift argument. In that
4190 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
4191 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
4192 In case of loop-based vectorization we allocate VECs of size 1. We
4193 allocate VEC_OPRNDS1 only in case of binary operation. */
4196 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
4197 if (op_type
== binary_op
)
4198 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
4200 else if (scalar_shift_arg
)
4201 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
4203 /* In case the vectorization factor (VF) is bigger than the number
4204 of elements that we can fit in a vectype (nunits), we have to generate
4205 more than one vector stmt - i.e - we need to "unroll" the
4206 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4207 from one copy of the vector stmt to the next, in the field
4208 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4209 stages to find the correct vector defs to be used when vectorizing
4210 stmts that use the defs of the current stmt. The example below illustrates
4211 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
4212 4 vectorized stmts):
4214 before vectorization:
4215 RELATED_STMT VEC_STMT
4219 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4221 RELATED_STMT VEC_STMT
4222 VS1_0: vx0 = memref0 VS1_1 -
4223 VS1_1: vx1 = memref1 VS1_2 -
4224 VS1_2: vx2 = memref2 VS1_3 -
4225 VS1_3: vx3 = memref3 - -
4226 S1: x = load - VS1_0
4229 step2: vectorize stmt S2 (done here):
4230 To vectorize stmt S2 we first need to find the relevant vector
4231 def for the first operand 'x'. This is, as usual, obtained from
4232 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4233 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4234 relevant vector def 'vx0'. Having found 'vx0' we can generate
4235 the vector stmt VS2_0, and as usual, record it in the
4236 STMT_VINFO_VEC_STMT of stmt S2.
4237 When creating the second copy (VS2_1), we obtain the relevant vector
4238 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4239 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4240 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4241 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4242 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4243 chain of stmts and pointers:
4244 RELATED_STMT VEC_STMT
4245 VS1_0: vx0 = memref0 VS1_1 -
4246 VS1_1: vx1 = memref1 VS1_2 -
4247 VS1_2: vx2 = memref2 VS1_3 -
4248 VS1_3: vx3 = memref3 - -
4249 S1: x = load - VS1_0
4250 VS2_0: vz0 = vx0 + v1 VS2_1 -
4251 VS2_1: vz1 = vx1 + v1 VS2_2 -
4252 VS2_2: vz2 = vx2 + v1 VS2_3 -
4253 VS2_3: vz3 = vx3 + v1 - -
4254 S2: z = x + 1 - VS2_0 */
4256 prev_stmt_info
= NULL
;
4257 for (j
= 0; j
< ncopies
; j
++)
4262 if (op_type
== binary_op
&& scalar_shift_arg
)
4264 /* Vector shl and shr insn patterns can be defined with scalar
4265 operand 2 (shift operand). In this case, use constant or loop
4266 invariant op1 directly, without extending it to vector mode
4268 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4269 if (!VECTOR_MODE_P (optab_op2_mode
))
4271 if (vect_print_dump_info (REPORT_DETAILS
))
4272 fprintf (vect_dump
, "operand 1 using scalar mode.");
4274 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
4277 /* Store vec_oprnd1 for every vector stmt to be created
4278 for SLP_NODE. We check during the analysis that all the
4279 shift arguments are the same.
4280 TODO: Allow different constants for different vector
4281 stmts generated for an SLP instance. */
4282 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4283 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
4288 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4289 (a special case for certain kind of vector shifts); otherwise,
4290 operand 1 should be of a vector type (the usual case). */
4291 if (op_type
== binary_op
&& !vec_oprnd1
)
4292 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4295 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4299 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4301 /* Arguments are ready. Create the new vector stmt. */
4302 for (i
= 0; VEC_iterate (tree
, vec_oprnds0
, i
, vop0
); i
++)
4304 vop1
= ((op_type
== binary_op
)
4305 ? VEC_index (tree
, vec_oprnds1
, i
) : NULL
);
4306 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
4307 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4308 gimple_assign_set_lhs (new_stmt
, new_temp
);
4309 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4311 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
4318 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4320 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4321 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4324 VEC_free (tree
, heap
, vec_oprnds0
);
4326 VEC_free (tree
, heap
, vec_oprnds1
);
4332 /* Get vectorized definitions for loop-based vectorization. For the first
4333 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4334 scalar operand), and for the rest we get a copy with
4335 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4336 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4337 The vectors are collected into VEC_OPRNDS. */
4340 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
4341 VEC (tree
, heap
) **vec_oprnds
, int multi_step_cvt
)
4345 /* Get first vector operand. */
4346 /* All the vector operands except the very first one (that is scalar oprnd)
4348 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4349 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
4351 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
4353 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
4355 /* Get second vector operand. */
4356 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
4357 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
4361 /* For conversion in multiple steps, continue to get operands
4364 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
4368 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4369 For multi-step conversions store the resulting vectors and call the function
4373 vect_create_vectorized_demotion_stmts (VEC (tree
, heap
) **vec_oprnds
,
4374 int multi_step_cvt
, gimple stmt
,
4375 VEC (tree
, heap
) *vec_dsts
,
4376 gimple_stmt_iterator
*gsi
,
4377 slp_tree slp_node
, enum tree_code code
,
4378 stmt_vec_info
*prev_stmt_info
)
4381 tree vop0
, vop1
, new_tmp
, vec_dest
;
4383 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4385 vec_dest
= VEC_pop (tree
, vec_dsts
);
4387 for (i
= 0; i
< VEC_length (tree
, *vec_oprnds
); i
+= 2)
4389 /* Create demotion operation. */
4390 vop0
= VEC_index (tree
, *vec_oprnds
, i
);
4391 vop1
= VEC_index (tree
, *vec_oprnds
, i
+ 1);
4392 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
4393 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4394 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4395 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4398 /* Store the resulting vector for next recursive call. */
4399 VEC_replace (tree
, *vec_oprnds
, i
/2, new_tmp
);
4402 /* This is the last step of the conversion sequence. Store the
4403 vectors in SLP_NODE or in vector info of the scalar statement
4404 (or in STMT_VINFO_RELATED_STMT chain). */
4406 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
4409 if (!*prev_stmt_info
)
4410 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4412 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
4414 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4419 /* For multi-step demotion operations we first generate demotion operations
4420 from the source type to the intermediate types, and then combine the
4421 results (stored in VEC_OPRNDS) in demotion operation to the destination
4425 /* At each level of recursion we have have of the operands we had at the
4427 VEC_truncate (tree
, *vec_oprnds
, (i
+1)/2);
4428 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
4429 stmt
, vec_dsts
, gsi
, slp_node
,
4430 code
, prev_stmt_info
);
4435 /* Function vectorizable_type_demotion
4437 Check if STMT performs a binary or unary operation that involves
4438 type demotion, and if it can be vectorized.
4439 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4440 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4441 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4444 vectorizable_type_demotion (gimple stmt
, gimple_stmt_iterator
*gsi
,
4445 gimple
*vec_stmt
, slp_tree slp_node
)
4450 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4451 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4452 enum tree_code code
, code1
= ERROR_MARK
;
4455 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4456 stmt_vec_info prev_stmt_info
;
4463 int multi_step_cvt
= 0;
4464 VEC (tree
, heap
) *vec_oprnds0
= NULL
;
4465 VEC (tree
, heap
) *vec_dsts
= NULL
, *interm_types
= NULL
, *tmp_vec_dsts
= NULL
;
4466 tree last_oprnd
, intermediate_type
;
4468 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
4471 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
4474 /* Is STMT a vectorizable type-demotion operation? */
4475 if (!is_gimple_assign (stmt
))
4478 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4481 code
= gimple_assign_rhs_code (stmt
);
4482 if (!CONVERT_EXPR_CODE_P (code
))
4485 op0
= gimple_assign_rhs1 (stmt
);
4486 vectype_in
= get_vectype_for_scalar_type (TREE_TYPE (op0
));
4489 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4491 scalar_dest
= gimple_assign_lhs (stmt
);
4492 vectype_out
= get_vectype_for_scalar_type (TREE_TYPE (scalar_dest
));
4495 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4496 if (nunits_in
>= nunits_out
)
4499 /* Multiple types in SLP are handled by creating the appropriate number of
4500 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4505 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
4507 gcc_assert (ncopies
>= 1);
4509 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4510 && INTEGRAL_TYPE_P (TREE_TYPE (op0
)))
4511 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest
))
4512 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0
))
4513 && CONVERT_EXPR_CODE_P (code
))))
4516 /* Check the operands of the operation. */
4517 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt
[0]))
4519 if (vect_print_dump_info (REPORT_DETAILS
))
4520 fprintf (vect_dump
, "use not simple.");
4524 /* Supportable by target? */
4525 if (!supportable_narrowing_operation (code
, stmt
, vectype_in
, &code1
,
4526 &multi_step_cvt
, &interm_types
))
4529 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
4531 if (!vec_stmt
) /* transformation not required. */
4533 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4534 if (vect_print_dump_info (REPORT_DETAILS
))
4535 fprintf (vect_dump
, "=== vectorizable_demotion ===");
4536 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
4541 if (vect_print_dump_info (REPORT_DETAILS
))
4542 fprintf (vect_dump
, "transform type demotion operation. ncopies = %d.",
4545 /* In case of multi-step demotion, we first generate demotion operations to
4546 the intermediate types, and then from that types to the final one.
4547 We create vector destinations for the intermediate type (TYPES) received
4548 from supportable_narrowing_operation, and store them in the correct order
4549 for future use in vect_create_vectorized_demotion_stmts(). */
4551 vec_dsts
= VEC_alloc (tree
, heap
, multi_step_cvt
+ 1);
4553 vec_dsts
= VEC_alloc (tree
, heap
, 1);
4555 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
4556 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
4560 for (i
= VEC_length (tree
, interm_types
) - 1;
4561 VEC_iterate (tree
, interm_types
, i
, intermediate_type
); i
--)
4563 vec_dest
= vect_create_destination_var (scalar_dest
,
4565 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
4569 /* In case the vectorization factor (VF) is bigger than the number
4570 of elements that we can fit in a vectype (nunits), we have to generate
4571 more than one vector stmt - i.e - we need to "unroll" the
4572 vector stmt by a factor VF/nunits. */
4574 prev_stmt_info
= NULL
;
4575 for (j
= 0; j
< ncopies
; j
++)
4579 vect_get_slp_defs (slp_node
, &vec_oprnds0
, NULL
);
4582 VEC_free (tree
, heap
, vec_oprnds0
);
4583 vec_oprnds0
= VEC_alloc (tree
, heap
,
4584 (multi_step_cvt
? vect_pow2 (multi_step_cvt
) * 2 : 2));
4585 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4586 vect_pow2 (multi_step_cvt
) - 1);
4589 /* Arguments are ready. Create the new vector stmts. */
4590 tmp_vec_dsts
= VEC_copy (tree
, heap
, vec_dsts
);
4591 vect_create_vectorized_demotion_stmts (&vec_oprnds0
,
4592 multi_step_cvt
, stmt
, tmp_vec_dsts
,
4593 gsi
, slp_node
, code1
,
4597 VEC_free (tree
, heap
, vec_oprnds0
);
4598 VEC_free (tree
, heap
, vec_dsts
);
4599 VEC_free (tree
, heap
, tmp_vec_dsts
);
4600 VEC_free (tree
, heap
, interm_types
);
4602 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4607 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4608 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4609 the resulting vectors and call the function recursively. */
4612 vect_create_vectorized_promotion_stmts (VEC (tree
, heap
) **vec_oprnds0
,
4613 VEC (tree
, heap
) **vec_oprnds1
,
4614 int multi_step_cvt
, gimple stmt
,
4615 VEC (tree
, heap
) *vec_dsts
,
4616 gimple_stmt_iterator
*gsi
,
4617 slp_tree slp_node
, enum tree_code code1
,
4618 enum tree_code code2
, tree decl1
,
4619 tree decl2
, int op_type
,
4620 stmt_vec_info
*prev_stmt_info
)
4623 tree vop0
, vop1
, new_tmp1
, new_tmp2
, vec_dest
;
4624 gimple new_stmt1
, new_stmt2
;
4625 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4626 VEC (tree
, heap
) *vec_tmp
;
4628 vec_dest
= VEC_pop (tree
, vec_dsts
);
4629 vec_tmp
= VEC_alloc (tree
, heap
, VEC_length (tree
, *vec_oprnds0
) * 2);
4631 for (i
= 0; VEC_iterate (tree
, *vec_oprnds0
, i
, vop0
); i
++)
4633 if (op_type
== binary_op
)
4634 vop1
= VEC_index (tree
, *vec_oprnds1
, i
);
4638 /* Generate the two halves of promotion operation. */
4639 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4640 op_type
, vec_dest
, gsi
, stmt
);
4641 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4642 op_type
, vec_dest
, gsi
, stmt
);
4643 if (is_gimple_call (new_stmt1
))
4645 new_tmp1
= gimple_call_lhs (new_stmt1
);
4646 new_tmp2
= gimple_call_lhs (new_stmt2
);
4650 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4651 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4656 /* Store the results for the recursive call. */
4657 VEC_quick_push (tree
, vec_tmp
, new_tmp1
);
4658 VEC_quick_push (tree
, vec_tmp
, new_tmp2
);
4662 /* Last step of promotion sequience - store the results. */
4665 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt1
);
4666 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt2
);
4670 if (!*prev_stmt_info
)
4671 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt1
;
4673 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt1
;
4675 *prev_stmt_info
= vinfo_for_stmt (new_stmt1
);
4676 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt2
;
4677 *prev_stmt_info
= vinfo_for_stmt (new_stmt2
);
4684 /* For multi-step promotion operation we first generate we call the
4685 function recurcively for every stage. We start from the input type,
4686 create promotion operations to the intermediate types, and then
4687 create promotions to the output type. */
4688 *vec_oprnds0
= VEC_copy (tree
, heap
, vec_tmp
);
4689 VEC_free (tree
, heap
, vec_tmp
);
4690 vect_create_vectorized_promotion_stmts (vec_oprnds0
, vec_oprnds1
,
4691 multi_step_cvt
- 1, stmt
,
4692 vec_dsts
, gsi
, slp_node
, code1
,
4693 code2
, decl2
, decl2
, op_type
,
4699 /* Function vectorizable_type_promotion
4701 Check if STMT performs a binary or unary operation that involves
4702 type promotion, and if it can be vectorized.
4703 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4704 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4705 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4708 vectorizable_type_promotion (gimple stmt
, gimple_stmt_iterator
*gsi
,
4709 gimple
*vec_stmt
, slp_tree slp_node
)
4713 tree op0
, op1
= NULL
;
4714 tree vec_oprnd0
=NULL
, vec_oprnd1
=NULL
;
4715 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4716 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4717 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4718 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4722 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4723 stmt_vec_info prev_stmt_info
;
4730 tree intermediate_type
= NULL_TREE
;
4731 int multi_step_cvt
= 0;
4732 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
4733 VEC (tree
, heap
) *vec_dsts
= NULL
, *interm_types
= NULL
, *tmp_vec_dsts
= NULL
;
4735 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
4738 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
4741 /* Is STMT a vectorizable type-promotion operation? */
4742 if (!is_gimple_assign (stmt
))
4745 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4748 code
= gimple_assign_rhs_code (stmt
);
4749 if (!CONVERT_EXPR_CODE_P (code
)
4750 && code
!= WIDEN_MULT_EXPR
)
4753 op0
= gimple_assign_rhs1 (stmt
);
4754 vectype_in
= get_vectype_for_scalar_type (TREE_TYPE (op0
));
4757 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4759 scalar_dest
= gimple_assign_lhs (stmt
);
4760 vectype_out
= get_vectype_for_scalar_type (TREE_TYPE (scalar_dest
));
4763 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4764 if (nunits_in
<= nunits_out
)
4767 /* Multiple types in SLP are handled by creating the appropriate number of
4768 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4773 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4775 gcc_assert (ncopies
>= 1);
4777 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4778 && INTEGRAL_TYPE_P (TREE_TYPE (op0
)))
4779 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest
))
4780 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0
))
4781 && CONVERT_EXPR_CODE_P (code
))))
4784 /* Check the operands of the operation. */
4785 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt
[0]))
4787 if (vect_print_dump_info (REPORT_DETAILS
))
4788 fprintf (vect_dump
, "use not simple.");
4792 op_type
= TREE_CODE_LENGTH (code
);
4793 if (op_type
== binary_op
)
4795 op1
= gimple_assign_rhs2 (stmt
);
4796 if (!vect_is_simple_use (op1
, loop_vinfo
, &def_stmt
, &def
, &dt
[1]))
4798 if (vect_print_dump_info (REPORT_DETAILS
))
4799 fprintf (vect_dump
, "use not simple.");
4804 /* Supportable by target? */
4805 if (!supportable_widening_operation (code
, stmt
, vectype_in
,
4806 &decl1
, &decl2
, &code1
, &code2
,
4807 &multi_step_cvt
, &interm_types
))
4810 /* Binary widening operation can only be supported directly by the
4812 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4814 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
4816 if (!vec_stmt
) /* transformation not required. */
4818 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4819 if (vect_print_dump_info (REPORT_DETAILS
))
4820 fprintf (vect_dump
, "=== vectorizable_promotion ===");
4821 vect_model_simple_cost (stmt_info
, 2*ncopies
, dt
, NULL
);
4827 if (vect_print_dump_info (REPORT_DETAILS
))
4828 fprintf (vect_dump
, "transform type promotion operation. ncopies = %d.",
4832 /* In case of multi-step promotion, we first generate promotion operations
4833 to the intermediate types, and then from that types to the final one.
4834 We store vector destination in VEC_DSTS in the correct order for
4835 recursive creation of promotion operations in
4836 vect_create_vectorized_promotion_stmts(). Vector destinations are created
4837 according to TYPES recieved from supportable_widening_operation(). */
4839 vec_dsts
= VEC_alloc (tree
, heap
, multi_step_cvt
+ 1);
4841 vec_dsts
= VEC_alloc (tree
, heap
, 1);
4843 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
4844 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
4848 for (i
= VEC_length (tree
, interm_types
) - 1;
4849 VEC_iterate (tree
, interm_types
, i
, intermediate_type
); i
--)
4851 vec_dest
= vect_create_destination_var (scalar_dest
,
4853 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
4859 vec_oprnds0
= VEC_alloc (tree
, heap
,
4860 (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4861 if (op_type
== binary_op
)
4862 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
4865 /* In case the vectorization factor (VF) is bigger than the number
4866 of elements that we can fit in a vectype (nunits), we have to generate
4867 more than one vector stmt - i.e - we need to "unroll" the
4868 vector stmt by a factor VF/nunits. */
4870 prev_stmt_info
= NULL
;
4871 for (j
= 0; j
< ncopies
; j
++)
4877 vect_get_slp_defs (slp_node
, &vec_oprnds0
, &vec_oprnds1
);
4880 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
4881 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
4882 if (op_type
== binary_op
)
4884 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
4885 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
4891 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4892 VEC_replace (tree
, vec_oprnds0
, 0, vec_oprnd0
);
4893 if (op_type
== binary_op
)
4895 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd1
);
4896 VEC_replace (tree
, vec_oprnds1
, 0, vec_oprnd1
);
4900 /* Arguments are ready. Create the new vector stmts. */
4901 tmp_vec_dsts
= VEC_copy (tree
, heap
, vec_dsts
);
4902 vect_create_vectorized_promotion_stmts (&vec_oprnds0
, &vec_oprnds1
,
4903 multi_step_cvt
, stmt
,
4905 gsi
, slp_node
, code1
, code2
,
4906 decl1
, decl2
, op_type
,
4910 VEC_free (tree
, heap
, vec_dsts
);
4911 VEC_free (tree
, heap
, tmp_vec_dsts
);
4912 VEC_free (tree
, heap
, interm_types
);
4913 VEC_free (tree
, heap
, vec_oprnds0
);
4914 VEC_free (tree
, heap
, vec_oprnds1
);
4916 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4921 /* Function vect_strided_store_supported.
4923 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
4924 and FALSE otherwise. */
4927 vect_strided_store_supported (tree vectype
)
4929 optab interleave_high_optab
, interleave_low_optab
;
4932 mode
= (int) TYPE_MODE (vectype
);
4934 /* Check that the operation is supported. */
4935 interleave_high_optab
= optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR
,
4936 vectype
, optab_default
);
4937 interleave_low_optab
= optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR
,
4938 vectype
, optab_default
);
4939 if (!interleave_high_optab
|| !interleave_low_optab
)
4941 if (vect_print_dump_info (REPORT_DETAILS
))
4942 fprintf (vect_dump
, "no optab for interleave.");
4946 if (optab_handler (interleave_high_optab
, mode
)->insn_code
4948 || optab_handler (interleave_low_optab
, mode
)->insn_code
4949 == CODE_FOR_nothing
)
4951 if (vect_print_dump_info (REPORT_DETAILS
))
4952 fprintf (vect_dump
, "interleave op not supported by target.");
4960 /* Function vect_permute_store_chain.
4962 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4963 a power of 2, generate interleave_high/low stmts to reorder the data
4964 correctly for the stores. Return the final references for stores in
4967 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4968 The input is 4 vectors each containing 8 elements. We assign a number to each
4969 element, the input sequence is:
4971 1st vec: 0 1 2 3 4 5 6 7
4972 2nd vec: 8 9 10 11 12 13 14 15
4973 3rd vec: 16 17 18 19 20 21 22 23
4974 4th vec: 24 25 26 27 28 29 30 31
4976 The output sequence should be:
4978 1st vec: 0 8 16 24 1 9 17 25
4979 2nd vec: 2 10 18 26 3 11 19 27
4980 3rd vec: 4 12 20 28 5 13 21 30
4981 4th vec: 6 14 22 30 7 15 23 31
4983 i.e., we interleave the contents of the four vectors in their order.
4985 We use interleave_high/low instructions to create such output. The input of
4986 each interleave_high/low operation is two vectors:
4989 the even elements of the result vector are obtained left-to-right from the
4990 high/low elements of the first vector. The odd elements of the result are
4991 obtained left-to-right from the high/low elements of the second vector.
4992 The output of interleave_high will be: 0 4 1 5
4993 and of interleave_low: 2 6 3 7
4996 The permutation is done in log LENGTH stages. In each stage interleave_high
4997 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4998 where the first argument is taken from the first half of DR_CHAIN and the
4999 second argument from it's second half.
5002 I1: interleave_high (1st vec, 3rd vec)
5003 I2: interleave_low (1st vec, 3rd vec)
5004 I3: interleave_high (2nd vec, 4th vec)
5005 I4: interleave_low (2nd vec, 4th vec)
5007 The output for the first stage is:
5009 I1: 0 16 1 17 2 18 3 19
5010 I2: 4 20 5 21 6 22 7 23
5011 I3: 8 24 9 25 10 26 11 27
5012 I4: 12 28 13 29 14 30 15 31
5014 The output of the second stage, i.e. the final result is:
5016 I1: 0 8 16 24 1 9 17 25
5017 I2: 2 10 18 26 3 11 19 27
5018 I3: 4 12 20 28 5 13 21 30
5019 I4: 6 14 22 30 7 15 23 31. */
5022 vect_permute_store_chain (VEC(tree
,heap
) *dr_chain
,
5023 unsigned int length
,
5025 gimple_stmt_iterator
*gsi
,
5026 VEC(tree
,heap
) **result_chain
)
5028 tree perm_dest
, vect1
, vect2
, high
, low
;
5030 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
5034 enum tree_code high_code
, low_code
;
5036 scalar_dest
= gimple_assign_lhs (stmt
);
5038 /* Check that the operation is supported. */
5039 if (!vect_strided_store_supported (vectype
))
5042 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
5044 for (i
= 0; i
< exact_log2 (length
); i
++)
5046 for (j
= 0; j
< length
/2; j
++)
5048 vect1
= VEC_index (tree
, dr_chain
, j
);
5049 vect2
= VEC_index (tree
, dr_chain
, j
+length
/2);
5051 /* Create interleaving stmt:
5052 in the case of big endian:
5053 high = interleave_high (vect1, vect2)
5054 and in the case of little endian:
5055 high = interleave_low (vect1, vect2). */
5056 perm_dest
= create_tmp_var (vectype
, "vect_inter_high");
5057 DECL_GIMPLE_REG_P (perm_dest
) = 1;
5058 add_referenced_var (perm_dest
);
5059 if (BYTES_BIG_ENDIAN
)
5061 high_code
= VEC_INTERLEAVE_HIGH_EXPR
;
5062 low_code
= VEC_INTERLEAVE_LOW_EXPR
;
5066 low_code
= VEC_INTERLEAVE_HIGH_EXPR
;
5067 high_code
= VEC_INTERLEAVE_LOW_EXPR
;
5069 perm_stmt
= gimple_build_assign_with_ops (high_code
, perm_dest
,
5071 high
= make_ssa_name (perm_dest
, perm_stmt
);
5072 gimple_assign_set_lhs (perm_stmt
, high
);
5073 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5074 VEC_replace (tree
, *result_chain
, 2*j
, high
);
5076 /* Create interleaving stmt:
5077 in the case of big endian:
5078 low = interleave_low (vect1, vect2)
5079 and in the case of little endian:
5080 low = interleave_high (vect1, vect2). */
5081 perm_dest
= create_tmp_var (vectype
, "vect_inter_low");
5082 DECL_GIMPLE_REG_P (perm_dest
) = 1;
5083 add_referenced_var (perm_dest
);
5084 perm_stmt
= gimple_build_assign_with_ops (low_code
, perm_dest
,
5086 low
= make_ssa_name (perm_dest
, perm_stmt
);
5087 gimple_assign_set_lhs (perm_stmt
, low
);
5088 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5089 VEC_replace (tree
, *result_chain
, 2*j
+1, low
);
5091 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
5097 /* Function vectorizable_store.
5099 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5101 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5102 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5103 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5106 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5112 tree vec_oprnd
= NULL_TREE
;
5113 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5114 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5115 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5116 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5117 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5118 enum machine_mode vec_mode
;
5120 enum dr_alignment_support alignment_support_scheme
;
5123 enum vect_def_type dt
;
5124 stmt_vec_info prev_stmt_info
= NULL
;
5125 tree dataref_ptr
= NULL_TREE
;
5126 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5129 gimple next_stmt
, first_stmt
= NULL
;
5130 bool strided_store
= false;
5131 unsigned int group_size
, i
;
5132 VEC(tree
,heap
) *dr_chain
= NULL
, *oprnds
= NULL
, *result_chain
= NULL
;
5134 VEC(tree
,heap
) *vec_oprnds
= NULL
;
5135 bool slp
= (slp_node
!= NULL
);
5136 stmt_vec_info first_stmt_vinfo
;
5137 unsigned int vec_num
;
5139 /* Multiple types in SLP are handled by creating the appropriate number of
5140 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5145 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5147 gcc_assert (ncopies
>= 1);
5149 /* FORNOW. This restriction should be relaxed. */
5150 if (nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5152 if (vect_print_dump_info (REPORT_DETAILS
))
5153 fprintf (vect_dump
, "multiple types in nested loop.");
5157 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
5160 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
5163 /* Is vectorizable store? */
5165 if (!is_gimple_assign (stmt
))
5168 scalar_dest
= gimple_assign_lhs (stmt
);
5169 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5170 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5171 && !STMT_VINFO_STRIDED_ACCESS (stmt_info
))
5174 gcc_assert (gimple_assign_single_p (stmt
));
5175 op
= gimple_assign_rhs1 (stmt
);
5176 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
5178 if (vect_print_dump_info (REPORT_DETAILS
))
5179 fprintf (vect_dump
, "use not simple.");
5183 /* If accesses through a pointer to vectype do not alias the original
5184 memory reference we have a problem. */
5185 if (get_alias_set (vectype
) != get_alias_set (TREE_TYPE (scalar_dest
))
5186 && !alias_set_subset_of (get_alias_set (vectype
),
5187 get_alias_set (TREE_TYPE (scalar_dest
))))
5189 if (vect_print_dump_info (REPORT_DETAILS
))
5190 fprintf (vect_dump
, "vector type does not alias scalar type");
5194 if (!useless_type_conversion_p (TREE_TYPE (op
), TREE_TYPE (scalar_dest
)))
5196 if (vect_print_dump_info (REPORT_DETAILS
))
5197 fprintf (vect_dump
, "operands of different types");
5201 vec_mode
= TYPE_MODE (vectype
);
5202 /* FORNOW. In some cases can vectorize even if data-type not supported
5203 (e.g. - array initialization with 0). */
5204 if (optab_handler (mov_optab
, (int)vec_mode
)->insn_code
== CODE_FOR_nothing
)
5207 if (!STMT_VINFO_DATA_REF (stmt_info
))
5210 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
5212 strided_store
= true;
5213 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
5214 if (!vect_strided_store_supported (vectype
)
5215 && !PURE_SLP_STMT (stmt_info
) && !slp
)
5218 if (first_stmt
== stmt
)
5220 /* STMT is the leader of the group. Check the operands of all the
5221 stmts of the group. */
5222 next_stmt
= DR_GROUP_NEXT_DR (stmt_info
);
5225 gcc_assert (gimple_assign_single_p (next_stmt
));
5226 op
= gimple_assign_rhs1 (next_stmt
);
5227 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
5229 if (vect_print_dump_info (REPORT_DETAILS
))
5230 fprintf (vect_dump
, "use not simple.");
5233 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
5238 if (!vec_stmt
) /* transformation not required. */
5240 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5241 vect_model_store_cost (stmt_info
, ncopies
, dt
, NULL
);
5249 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5250 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5252 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5255 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5257 /* We vectorize all the stmts of the interleaving group when we
5258 reach the last stmt in the group. */
5259 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5260 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5268 strided_store
= false;
5270 /* VEC_NUM is the number of vect stmts to be created for this group. */
5272 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5274 vec_num
= group_size
;
5280 group_size
= vec_num
= 1;
5281 first_stmt_vinfo
= stmt_info
;
5284 if (vect_print_dump_info (REPORT_DETAILS
))
5285 fprintf (vect_dump
, "transform store. ncopies = %d",ncopies
);
5287 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
5288 oprnds
= VEC_alloc (tree
, heap
, group_size
);
5290 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
);
5291 gcc_assert (alignment_support_scheme
);
5292 gcc_assert (alignment_support_scheme
== dr_aligned
); /* FORNOW */
5294 /* In case the vectorization factor (VF) is bigger than the number
5295 of elements that we can fit in a vectype (nunits), we have to generate
5296 more than one vector stmt - i.e - we need to "unroll" the
5297 vector stmt by a factor VF/nunits. For more details see documentation in
5298 vect_get_vec_def_for_copy_stmt. */
5300 /* In case of interleaving (non-unit strided access):
5307 We create vectorized stores starting from base address (the access of the
5308 first stmt in the chain (S2 in the above example), when the last store stmt
5309 of the chain (S4) is reached:
5312 VS2: &base + vec_size*1 = vx0
5313 VS3: &base + vec_size*2 = vx1
5314 VS4: &base + vec_size*3 = vx3
5316 Then permutation statements are generated:
5318 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
5319 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
5322 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5323 (the order of the data-refs in the output of vect_permute_store_chain
5324 corresponds to the order of scalar stmts in the interleaving chain - see
5325 the documentation of vect_permute_store_chain()).
5327 In case of both multiple types and interleaving, above vector stores and
5328 permutation stmts are created for every copy. The result vector stmts are
5329 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5330 STMT_VINFO_RELATED_STMT for the next copies.
5333 prev_stmt_info
= NULL
;
5334 for (j
= 0; j
< ncopies
; j
++)
5343 /* Get vectorized arguments for SLP_NODE. */
5344 vect_get_slp_defs (slp_node
, &vec_oprnds
, NULL
);
5346 vec_oprnd
= VEC_index (tree
, vec_oprnds
, 0);
5350 /* For interleaved stores we collect vectorized defs for all the
5351 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5352 used as an input to vect_permute_store_chain(), and OPRNDS as
5353 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5355 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
5356 OPRNDS are of size 1. */
5357 next_stmt
= first_stmt
;
5358 for (i
= 0; i
< group_size
; i
++)
5360 /* Since gaps are not supported for interleaved stores,
5361 GROUP_SIZE is the exact number of stmts in the chain.
5362 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5363 there is no interleaving, GROUP_SIZE is 1, and only one
5364 iteration of the loop will be executed. */
5365 gcc_assert (next_stmt
);
5366 gcc_assert (gimple_assign_single_p (next_stmt
));
5367 op
= gimple_assign_rhs1 (next_stmt
);
5369 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5371 VEC_quick_push(tree
, dr_chain
, vec_oprnd
);
5372 VEC_quick_push(tree
, oprnds
, vec_oprnd
);
5373 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
5377 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, NULL
, NULL_TREE
,
5378 &dummy
, &ptr_incr
, false,
5379 &inv_p
, TREE_TYPE (vec_oprnd
));
5380 gcc_assert (!inv_p
);
5384 /* For interleaved stores we created vectorized defs for all the
5385 defs stored in OPRNDS in the previous iteration (previous copy).
5386 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5387 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5389 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
5390 OPRNDS are of size 1. */
5391 for (i
= 0; i
< group_size
; i
++)
5393 op
= VEC_index (tree
, oprnds
, i
);
5394 vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
5395 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5396 VEC_replace(tree
, dr_chain
, i
, vec_oprnd
);
5397 VEC_replace(tree
, oprnds
, i
, vec_oprnd
);
5400 bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
, NULL_TREE
);
5405 result_chain
= VEC_alloc (tree
, heap
, group_size
);
5407 if (!vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5412 next_stmt
= first_stmt
;
5413 for (i
= 0; i
< vec_num
; i
++)
5416 /* Bump the vector pointer. */
5417 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5421 vec_oprnd
= VEC_index (tree
, vec_oprnds
, i
);
5422 else if (strided_store
)
5423 /* For strided stores vectorized defs are interleaved in
5424 vect_permute_store_chain(). */
5425 vec_oprnd
= VEC_index (tree
, result_chain
, i
);
5427 data_ref
= build_fold_indirect_ref (dataref_ptr
);
5428 /* Arguments are ready. Create the new vector stmt. */
5429 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
5430 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5431 mark_symbols_for_renaming (new_stmt
);
5437 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5439 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5441 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5442 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
5448 VEC_free (tree
, heap
, dr_chain
);
5449 VEC_free (tree
, heap
, oprnds
);
5451 VEC_free (tree
, heap
, result_chain
);
5457 /* Function vect_setup_realignment
5459 This function is called when vectorizing an unaligned load using
5460 the dr_explicit_realign[_optimized] scheme.
5461 This function generates the following code at the loop prolog:
5464 x msq_init = *(floor(p)); # prolog load
5465 realignment_token = call target_builtin;
5467 x msq = phi (msq_init, ---)
5469 The stmts marked with x are generated only for the case of
5470 dr_explicit_realign_optimized.
5472 The code above sets up a new (vector) pointer, pointing to the first
5473 location accessed by STMT, and a "floor-aligned" load using that pointer.
5474 It also generates code to compute the "realignment-token" (if the relevant
5475 target hook was defined), and creates a phi-node at the loop-header bb
5476 whose arguments are the result of the prolog-load (created by this
5477 function) and the result of a load that takes place in the loop (to be
5478 created by the caller to this function).
5480 For the case of dr_explicit_realign_optimized:
5481 The caller to this function uses the phi-result (msq) to create the
5482 realignment code inside the loop, and sets up the missing phi argument,
5485 msq = phi (msq_init, lsq)
5486 lsq = *(floor(p')); # load in loop
5487 result = realign_load (msq, lsq, realignment_token);
5489 For the case of dr_explicit_realign:
5491 msq = *(floor(p)); # load in loop
5493 lsq = *(floor(p')); # load in loop
5494 result = realign_load (msq, lsq, realignment_token);
5497 STMT - (scalar) load stmt to be vectorized. This load accesses
5498 a memory location that may be unaligned.
5499 BSI - place where new code is to be inserted.
5500 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
5504 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
5505 target hook, if defined.
5506 Return value - the result of the loop-header phi node. */
5509 vect_setup_realignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
5510 tree
*realignment_token
,
5511 enum dr_alignment_support alignment_support_scheme
,
5513 struct loop
**at_loop
)
5515 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5516 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5517 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5518 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5520 tree scalar_dest
= gimple_assign_lhs (stmt
);
5527 tree msq_init
= NULL_TREE
;
5530 tree msq
= NULL_TREE
;
5531 gimple_seq stmts
= NULL
;
5533 bool compute_in_loop
= false;
5534 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5535 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
5536 struct loop
*loop_for_initial_load
;
5538 gcc_assert (alignment_support_scheme
== dr_explicit_realign
5539 || alignment_support_scheme
== dr_explicit_realign_optimized
);
5541 /* We need to generate three things:
5542 1. the misalignment computation
5543 2. the extra vector load (for the optimized realignment scheme).
5544 3. the phi node for the two vectors from which the realignment is
5545 done (for the optimized realignment scheme).
5548 /* 1. Determine where to generate the misalignment computation.
5550 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
5551 calculation will be generated by this function, outside the loop (in the
5552 preheader). Otherwise, INIT_ADDR had already been computed for us by the
5553 caller, inside the loop.
5555 Background: If the misalignment remains fixed throughout the iterations of
5556 the loop, then both realignment schemes are applicable, and also the
5557 misalignment computation can be done outside LOOP. This is because we are
5558 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
5559 are a multiple of VS (the Vector Size), and therefore the misalignment in
5560 different vectorized LOOP iterations is always the same.
5561 The problem arises only if the memory access is in an inner-loop nested
5562 inside LOOP, which is now being vectorized using outer-loop vectorization.
5563 This is the only case when the misalignment of the memory access may not
5564 remain fixed throughout the iterations of the inner-loop (as explained in
5565 detail in vect_supportable_dr_alignment). In this case, not only is the
5566 optimized realignment scheme not applicable, but also the misalignment
5567 computation (and generation of the realignment token that is passed to
5568 REALIGN_LOAD) have to be done inside the loop.
5570 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
5571 or not, which in turn determines if the misalignment is computed inside
5572 the inner-loop, or outside LOOP. */
5574 if (init_addr
!= NULL_TREE
)
5576 compute_in_loop
= true;
5577 gcc_assert (alignment_support_scheme
== dr_explicit_realign
);
5581 /* 2. Determine where to generate the extra vector load.
5583 For the optimized realignment scheme, instead of generating two vector
5584 loads in each iteration, we generate a single extra vector load in the
5585 preheader of the loop, and in each iteration reuse the result of the
5586 vector load from the previous iteration. In case the memory access is in
5587 an inner-loop nested inside LOOP, which is now being vectorized using
5588 outer-loop vectorization, we need to determine whether this initial vector
5589 load should be generated at the preheader of the inner-loop, or can be
5590 generated at the preheader of LOOP. If the memory access has no evolution
5591 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
5592 to be generated inside LOOP (in the preheader of the inner-loop). */
5594 if (nested_in_vect_loop
)
5596 tree outerloop_step
= STMT_VINFO_DR_STEP (stmt_info
);
5597 bool invariant_in_outerloop
=
5598 (tree_int_cst_compare (outerloop_step
, size_zero_node
) == 0);
5599 loop_for_initial_load
= (invariant_in_outerloop
? loop
: loop
->inner
);
5602 loop_for_initial_load
= loop
;
5604 *at_loop
= loop_for_initial_load
;
5606 /* 3. For the case of the optimized realignment, create the first vector
5607 load at the loop preheader. */
5609 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5611 /* Create msq_init = *(floor(p1)) in the loop preheader */
5613 gcc_assert (!compute_in_loop
);
5614 pe
= loop_preheader_edge (loop_for_initial_load
);
5615 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5616 ptr
= vect_create_data_ref_ptr (stmt
, loop_for_initial_load
, NULL_TREE
,
5617 &init_addr
, &inc
, true, &inv_p
, NULL_TREE
);
5618 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, ptr
);
5619 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5620 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5621 gimple_assign_set_lhs (new_stmt
, new_temp
);
5622 mark_symbols_for_renaming (new_stmt
);
5623 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
5624 gcc_assert (!new_bb
);
5625 msq_init
= gimple_assign_lhs (new_stmt
);
5628 /* 4. Create realignment token using a target builtin, if available.
5629 It is done either inside the containing loop, or before LOOP (as
5630 determined above). */
5632 if (targetm
.vectorize
.builtin_mask_for_load
)
5636 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
5637 if (compute_in_loop
)
5638 gcc_assert (init_addr
); /* already computed by the caller. */
5641 /* Generate the INIT_ADDR computation outside LOOP. */
5642 init_addr
= vect_create_addr_base_for_vector_ref (stmt
, &stmts
,
5644 pe
= loop_preheader_edge (loop
);
5645 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
5646 gcc_assert (!new_bb
);
5649 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
5650 new_stmt
= gimple_build_call (builtin_decl
, 1, init_addr
);
5652 vect_create_destination_var (scalar_dest
,
5653 gimple_call_return_type (new_stmt
));
5654 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5655 gimple_call_set_lhs (new_stmt
, new_temp
);
5657 if (compute_in_loop
)
5658 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
5661 /* Generate the misalignment computation outside LOOP. */
5662 pe
= loop_preheader_edge (loop
);
5663 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
5664 gcc_assert (!new_bb
);
5667 *realignment_token
= gimple_call_lhs (new_stmt
);
5669 /* The result of the CALL_EXPR to this builtin is determined from
5670 the value of the parameter and no global variables are touched
5671 which makes the builtin a "const" function. Requiring the
5672 builtin to have the "const" attribute makes it unnecessary
5673 to call mark_call_clobbered. */
5674 gcc_assert (TREE_READONLY (builtin_decl
));
5677 if (alignment_support_scheme
== dr_explicit_realign
)
5680 gcc_assert (!compute_in_loop
);
5681 gcc_assert (alignment_support_scheme
== dr_explicit_realign_optimized
);
5684 /* 5. Create msq = phi <msq_init, lsq> in loop */
5686 pe
= loop_preheader_edge (containing_loop
);
5687 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5688 msq
= make_ssa_name (vec_dest
, NULL
);
5689 phi_stmt
= create_phi_node (msq
, containing_loop
->header
);
5690 SSA_NAME_DEF_STMT (msq
) = phi_stmt
;
5691 add_phi_arg (phi_stmt
, msq_init
, pe
);
5697 /* Function vect_strided_load_supported.
5699 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
5700 and FALSE otherwise. */
5703 vect_strided_load_supported (tree vectype
)
5705 optab perm_even_optab
, perm_odd_optab
;
5708 mode
= (int) TYPE_MODE (vectype
);
5710 perm_even_optab
= optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR
, vectype
,
5712 if (!perm_even_optab
)
5714 if (vect_print_dump_info (REPORT_DETAILS
))
5715 fprintf (vect_dump
, "no optab for perm_even.");
5719 if (optab_handler (perm_even_optab
, mode
)->insn_code
== CODE_FOR_nothing
)
5721 if (vect_print_dump_info (REPORT_DETAILS
))
5722 fprintf (vect_dump
, "perm_even op not supported by target.");
5726 perm_odd_optab
= optab_for_tree_code (VEC_EXTRACT_ODD_EXPR
, vectype
,
5728 if (!perm_odd_optab
)
5730 if (vect_print_dump_info (REPORT_DETAILS
))
5731 fprintf (vect_dump
, "no optab for perm_odd.");
5735 if (optab_handler (perm_odd_optab
, mode
)->insn_code
== CODE_FOR_nothing
)
5737 if (vect_print_dump_info (REPORT_DETAILS
))
5738 fprintf (vect_dump
, "perm_odd op not supported by target.");
5745 /* Function vect_permute_load_chain.
5747 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
5748 a power of 2, generate extract_even/odd stmts to reorder the input data
5749 correctly. Return the final references for loads in RESULT_CHAIN.
5751 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
5752 The input is 4 vectors each containing 8 elements. We assign a number to each
5753 element, the input sequence is:
5755 1st vec: 0 1 2 3 4 5 6 7
5756 2nd vec: 8 9 10 11 12 13 14 15
5757 3rd vec: 16 17 18 19 20 21 22 23
5758 4th vec: 24 25 26 27 28 29 30 31
5760 The output sequence should be:
5762 1st vec: 0 4 8 12 16 20 24 28
5763 2nd vec: 1 5 9 13 17 21 25 29
5764 3rd vec: 2 6 10 14 18 22 26 30
5765 4th vec: 3 7 11 15 19 23 27 31
5767 i.e., the first output vector should contain the first elements of each
5768 interleaving group, etc.
5770 We use extract_even/odd instructions to create such output. The input of each
5771 extract_even/odd operation is two vectors
5775 and the output is the vector of extracted even/odd elements. The output of
5776 extract_even will be: 0 2 4 6
5777 and of extract_odd: 1 3 5 7
5780 The permutation is done in log LENGTH stages. In each stage extract_even and
5781 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
5782 order. In our example,
5784 E1: extract_even (1st vec, 2nd vec)
5785 E2: extract_odd (1st vec, 2nd vec)
5786 E3: extract_even (3rd vec, 4th vec)
5787 E4: extract_odd (3rd vec, 4th vec)
5789 The output for the first stage will be:
5791 E1: 0 2 4 6 8 10 12 14
5792 E2: 1 3 5 7 9 11 13 15
5793 E3: 16 18 20 22 24 26 28 30
5794 E4: 17 19 21 23 25 27 29 31
5796 In order to proceed and create the correct sequence for the next stage (or
5797 for the correct output, if the second stage is the last one, as in our
5798 example), we first put the output of extract_even operation and then the
5799 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
5800 The input for the second stage is:
5802 1st vec (E1): 0 2 4 6 8 10 12 14
5803 2nd vec (E3): 16 18 20 22 24 26 28 30
5804 3rd vec (E2): 1 3 5 7 9 11 13 15
5805 4th vec (E4): 17 19 21 23 25 27 29 31
5807 The output of the second stage:
5809 E1: 0 4 8 12 16 20 24 28
5810 E2: 2 6 10 14 18 22 26 30
5811 E3: 1 5 9 13 17 21 25 29
5812 E4: 3 7 11 15 19 23 27 31
5814 And RESULT_CHAIN after reordering:
5816 1st vec (E1): 0 4 8 12 16 20 24 28
5817 2nd vec (E3): 1 5 9 13 17 21 25 29
5818 3rd vec (E2): 2 6 10 14 18 22 26 30
5819 4th vec (E4): 3 7 11 15 19 23 27 31. */
5822 vect_permute_load_chain (VEC(tree
,heap
) *dr_chain
,
5823 unsigned int length
,
5825 gimple_stmt_iterator
*gsi
,
5826 VEC(tree
,heap
) **result_chain
)
5828 tree perm_dest
, data_ref
, first_vect
, second_vect
;
5830 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
5834 /* Check that the operation is supported. */
5835 if (!vect_strided_load_supported (vectype
))
5838 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
5839 for (i
= 0; i
< exact_log2 (length
); i
++)
5841 for (j
= 0; j
< length
; j
+=2)
5843 first_vect
= VEC_index (tree
, dr_chain
, j
);
5844 second_vect
= VEC_index (tree
, dr_chain
, j
+1);
5846 /* data_ref = permute_even (first_data_ref, second_data_ref); */
5847 perm_dest
= create_tmp_var (vectype
, "vect_perm_even");
5848 DECL_GIMPLE_REG_P (perm_dest
) = 1;
5849 add_referenced_var (perm_dest
);
5851 perm_stmt
= gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR
,
5852 perm_dest
, first_vect
,
5855 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
5856 gimple_assign_set_lhs (perm_stmt
, data_ref
);
5857 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5858 mark_symbols_for_renaming (perm_stmt
);
5860 VEC_replace (tree
, *result_chain
, j
/2, data_ref
);
5862 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
5863 perm_dest
= create_tmp_var (vectype
, "vect_perm_odd");
5864 DECL_GIMPLE_REG_P (perm_dest
) = 1;
5865 add_referenced_var (perm_dest
);
5867 perm_stmt
= gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR
,
5868 perm_dest
, first_vect
,
5870 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
5871 gimple_assign_set_lhs (perm_stmt
, data_ref
);
5872 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5873 mark_symbols_for_renaming (perm_stmt
);
5875 VEC_replace (tree
, *result_chain
, j
/2+length
/2, data_ref
);
5877 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
5883 /* Function vect_transform_strided_load.
5885 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
5886 to perform their permutation and ascribe the result vectorized statements to
5887 the scalar statements.
5891 vect_transform_strided_load (gimple stmt
, VEC(tree
,heap
) *dr_chain
, int size
,
5892 gimple_stmt_iterator
*gsi
)
5894 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5895 gimple first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
5896 gimple next_stmt
, new_stmt
;
5897 VEC(tree
,heap
) *result_chain
= NULL
;
5898 unsigned int i
, gap_count
;
5901 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
5902 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
5903 vectors, that are ready for vector computation. */
5904 result_chain
= VEC_alloc (tree
, heap
, size
);
5906 if (!vect_permute_load_chain (dr_chain
, size
, stmt
, gsi
, &result_chain
))
5909 /* Put a permuted data-ref in the VECTORIZED_STMT field.
5910 Since we scan the chain starting from it's first node, their order
5911 corresponds the order of data-refs in RESULT_CHAIN. */
5912 next_stmt
= first_stmt
;
5914 for (i
= 0; VEC_iterate (tree
, result_chain
, i
, tmp_data_ref
); i
++)
5919 /* Skip the gaps. Loads created for the gaps will be removed by dead
5920 code elimination pass later. No need to check for the first stmt in
5921 the group, since it always exists.
5922 DR_GROUP_GAP is the number of steps in elements from the previous
5923 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
5924 correspond to the gaps.
5926 if (next_stmt
!= first_stmt
5927 && gap_count
< DR_GROUP_GAP (vinfo_for_stmt (next_stmt
)))
5935 new_stmt
= SSA_NAME_DEF_STMT (tmp_data_ref
);
5936 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
5937 copies, and we put the new vector statement in the first available
5939 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)))
5940 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)) = new_stmt
;
5944 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
));
5946 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
));
5949 prev_stmt
= rel_stmt
;
5950 rel_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt
));
5952 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
)) = new_stmt
;
5954 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
5956 /* If NEXT_STMT accesses the same DR as the previous statement,
5957 put the same TMP_DATA_REF as its vectorized statement; otherwise
5958 get the next data-ref from RESULT_CHAIN. */
5959 if (!next_stmt
|| !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
5964 VEC_free (tree
, heap
, result_chain
);
5969 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
5970 building a vector of type MASK_TYPE from it) and two input vectors placed in
5971 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
5972 shifting by STRIDE elements of DR_CHAIN for every copy.
5973 (STRIDE is the number of vectorized stmts for NODE divided by the number of
5975 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
5976 the created stmts must be inserted. */
5979 vect_create_mask_and_perm (gimple stmt
, gimple next_scalar_stmt
,
5980 int *mask_array
, int mask_nunits
,
5981 tree mask_element_type
, tree mask_type
,
5982 int first_vec_indx
, int second_vec_indx
,
5983 gimple_stmt_iterator
*gsi
, slp_tree node
,
5984 tree builtin_decl
, tree vectype
,
5985 VEC(tree
,heap
) *dr_chain
,
5986 int ncopies
, int vect_stmts_counter
)
5988 tree t
= NULL_TREE
, mask_vec
, mask
, perm_dest
;
5989 gimple perm_stmt
= NULL
;
5990 stmt_vec_info next_stmt_info
;
5991 int i
, group_size
, stride
, dr_chain_size
;
5992 tree first_vec
, second_vec
, data_ref
;
5995 VEC (tree
, heap
) *params
= NULL
;
5997 /* Create a vector mask. */
5998 for (i
= mask_nunits
- 1; i
>= 0; --i
)
5999 t
= tree_cons (NULL_TREE
, build_int_cst (mask_element_type
, mask_array
[i
]),
6002 mask_vec
= build_vector (mask_type
, t
);
6003 mask
= vect_init_vector (stmt
, mask_vec
, mask_type
, NULL
);
6005 group_size
= VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
));
6006 stride
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
) / ncopies
;
6007 dr_chain_size
= VEC_length (tree
, dr_chain
);
6009 /* Initialize the vect stmts of NODE to properly insert the generated
6011 for (i
= VEC_length (gimple
, SLP_TREE_VEC_STMTS (node
));
6012 i
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
6013 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (node
), NULL
);
6015 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
6016 for (i
= 0; i
< ncopies
; i
++)
6018 first_vec
= VEC_index (tree
, dr_chain
, first_vec_indx
);
6019 second_vec
= VEC_index (tree
, dr_chain
, second_vec_indx
);
6021 /* Build argument list for the vectorized call. */
6022 VEC_free (tree
, heap
, params
);
6023 params
= VEC_alloc (tree
, heap
, 3);
6024 VEC_quick_push (tree
, params
, first_vec
);
6025 VEC_quick_push (tree
, params
, second_vec
);
6026 VEC_quick_push (tree
, params
, mask
);
6028 /* Generate the permute statement. */
6029 perm_stmt
= gimple_build_call_vec (builtin_decl
, params
);
6030 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
6031 gimple_call_set_lhs (perm_stmt
, data_ref
);
6032 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6033 FOR_EACH_SSA_TREE_OPERAND (sym
, perm_stmt
, iter
, SSA_OP_ALL_VIRTUALS
)
6035 if (TREE_CODE (sym
) == SSA_NAME
)
6036 sym
= SSA_NAME_VAR (sym
);
6037 mark_sym_for_renaming (sym
);
6040 /* Store the vector statement in NODE. */
6041 VEC_replace (gimple
, SLP_TREE_VEC_STMTS (node
),
6042 stride
* i
+ vect_stmts_counter
, perm_stmt
);
6044 first_vec_indx
+= stride
;
6045 second_vec_indx
+= stride
;
6048 /* Mark the scalar stmt as vectorized. */
6049 next_stmt_info
= vinfo_for_stmt (next_scalar_stmt
);
6050 STMT_VINFO_VEC_STMT (next_stmt_info
) = perm_stmt
;
6054 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
6055 return in CURRENT_MASK_ELEMENT its equivalent in target specific
6056 representation. Check that the mask is valid and return FALSE if not.
6057 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
6058 the next vector, i.e., the current first vector is not needed. */
6061 vect_get_mask_element (gimple stmt
, int first_mask_element
, int m
,
6062 int mask_nunits
, bool only_one_vec
, int index
,
6063 int *mask
, int *current_mask_element
,
6064 bool *need_next_vector
)
6067 static int number_of_mask_fixes
= 1;
6068 static bool mask_fixed
= false;
6069 static bool needs_first_vector
= false;
6071 /* Convert to target specific representation. */
6072 *current_mask_element
= first_mask_element
+ m
;
6073 /* Adjust the value in case it's a mask for second and third vectors. */
6074 *current_mask_element
-= mask_nunits
* (number_of_mask_fixes
- 1);
6076 if (*current_mask_element
< mask_nunits
)
6077 needs_first_vector
= true;
6079 /* We have only one input vector to permute but the mask accesses values in
6080 the next vector as well. */
6081 if (only_one_vec
&& *current_mask_element
>= mask_nunits
)
6083 if (vect_print_dump_info (REPORT_DETAILS
))
6085 fprintf (vect_dump
, "permutation requires at least two vectors ");
6086 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
6092 /* The mask requires the next vector. */
6093 if (*current_mask_element
>= mask_nunits
* 2)
6095 if (needs_first_vector
|| mask_fixed
)
6097 /* We either need the first vector too or have already moved to the
6098 next vector. In both cases, this permutation needs three
6100 if (vect_print_dump_info (REPORT_DETAILS
))
6102 fprintf (vect_dump
, "permutation requires at "
6103 "least three vectors ");
6104 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
6110 /* We move to the next vector, dropping the first one and working with
6111 the second and the third - we need to adjust the values of the mask
6113 *current_mask_element
-= mask_nunits
* number_of_mask_fixes
;
6115 for (i
= 0; i
< index
; i
++)
6116 mask
[i
] -= mask_nunits
* number_of_mask_fixes
;
6118 (number_of_mask_fixes
)++;
6122 *need_next_vector
= mask_fixed
;
6124 /* This was the last element of this mask. Start a new one. */
6125 if (index
== mask_nunits
- 1)
6127 number_of_mask_fixes
= 1;
6129 needs_first_vector
= false;
6136 /* Generate vector permute statements from a list of loads in DR_CHAIN.
6137 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
6138 permute statements for SLP_NODE_INSTANCE. */
6140 vect_transform_slp_perm_load (gimple stmt
, VEC (tree
, heap
) *dr_chain
,
6141 gimple_stmt_iterator
*gsi
, int vf
,
6142 slp_instance slp_node_instance
, bool analyze_only
)
6144 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6145 tree mask_element_type
= NULL_TREE
, mask_type
;
6146 int i
, j
, k
, m
, scale
, mask_nunits
, nunits
, vec_index
= 0, scalar_index
;
6148 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), builtin_decl
;
6149 gimple next_scalar_stmt
;
6150 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
6151 int first_mask_element
;
6152 int index
, unroll_factor
, *mask
, current_mask_element
, ncopies
;
6153 bool only_one_vec
= false, need_next_vector
= false;
6154 int first_vec_index
, second_vec_index
, orig_vec_stmts_num
, vect_stmts_counter
;
6156 if (!targetm
.vectorize
.builtin_vec_perm
)
6158 if (vect_print_dump_info (REPORT_DETAILS
))
6160 fprintf (vect_dump
, "no builtin for vect permute for ");
6161 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
6167 builtin_decl
= targetm
.vectorize
.builtin_vec_perm (vectype
,
6168 &mask_element_type
);
6169 if (!builtin_decl
|| !mask_element_type
)
6171 if (vect_print_dump_info (REPORT_DETAILS
))
6173 fprintf (vect_dump
, "no builtin for vect permute for ");
6174 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
6180 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
6181 mask_nunits
= TYPE_VECTOR_SUBPARTS (mask_type
);
6182 mask
= (int *) xmalloc (sizeof (int) * mask_nunits
);
6183 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6184 scale
= mask_nunits
/ nunits
;
6185 unroll_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
6187 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
6188 unrolling factor. */
6189 orig_vec_stmts_num
= group_size
*
6190 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
) / nunits
;
6191 if (orig_vec_stmts_num
== 1)
6192 only_one_vec
= true;
6194 /* Number of copies is determined by the final vectorization factor
6195 relatively to SLP_NODE_INSTANCE unrolling factor. */
6196 ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
6198 /* Generate permutation masks for every NODE. Number of masks for each NODE
6199 is equal to GROUP_SIZE.
6200 E.g., we have a group of three nodes with three loads from the same
6201 location in each node, and the vector size is 4. I.e., we have a
6202 a0b0c0a1b1c1... sequence and we need to create the following vectors:
6203 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
6204 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
6207 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
6208 scpecific type, e.g., in bytes for Altivec.
6209 The last mask is illegal since we assume two operands for permute
6210 operation, and the mask element values can't be outside that range. Hence,
6211 the last mask must be converted into {2,5,5,5}.
6212 For the first two permutations we need the first and the second input
6213 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
6214 we need the second and the third vectors: {b1,c1,a2,b2} and
6218 VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (slp_node_instance
),
6224 vect_stmts_counter
= 0;
6226 first_vec_index
= vec_index
++;
6228 second_vec_index
= first_vec_index
;
6230 second_vec_index
= vec_index
++;
6232 for (j
= 0; j
< unroll_factor
; j
++)
6234 for (k
= 0; k
< group_size
; k
++)
6236 first_mask_element
= (i
+ j
* group_size
) * scale
;
6237 for (m
= 0; m
< scale
; m
++)
6239 if (!vect_get_mask_element (stmt
, first_mask_element
, m
,
6240 mask_nunits
, only_one_vec
, index
, mask
,
6241 ¤t_mask_element
, &need_next_vector
))
6244 mask
[index
++] = current_mask_element
;
6247 if (index
== mask_nunits
)
6252 if (need_next_vector
)
6254 first_vec_index
= second_vec_index
;
6255 second_vec_index
= vec_index
;
6258 next_scalar_stmt
= VEC_index (gimple
,
6259 SLP_TREE_SCALAR_STMTS (node
), scalar_index
++);
6261 vect_create_mask_and_perm (stmt
, next_scalar_stmt
,
6262 mask
, mask_nunits
, mask_element_type
, mask_type
,
6263 first_vec_index
, second_vec_index
, gsi
, node
,
6264 builtin_decl
, vectype
, dr_chain
, ncopies
,
6265 vect_stmts_counter
++);
6276 /* vectorizable_load.
6278 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6280 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6281 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6282 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6285 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
6286 slp_tree slp_node
, slp_instance slp_node_instance
)
6289 tree vec_dest
= NULL
;
6290 tree data_ref
= NULL
;
6291 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6292 stmt_vec_info prev_stmt_info
;
6293 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6294 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6295 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6296 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6297 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
6298 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6301 gimple new_stmt
= NULL
;
6303 enum dr_alignment_support alignment_support_scheme
;
6304 tree dataref_ptr
= NULL_TREE
;
6306 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6308 int i
, j
, group_size
;
6309 tree msq
= NULL_TREE
, lsq
;
6310 tree offset
= NULL_TREE
;
6311 tree realignment_token
= NULL_TREE
;
6313 VEC(tree
,heap
) *dr_chain
= NULL
;
6314 bool strided_load
= false;
6318 bool compute_in_loop
= false;
6319 struct loop
*at_loop
;
6321 bool slp
= (slp_node
!= NULL
);
6322 bool slp_perm
= false;
6323 enum tree_code code
;
6325 /* Multiple types in SLP are handled by creating the appropriate number of
6326 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6331 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6333 gcc_assert (ncopies
>= 1);
6335 /* FORNOW. This restriction should be relaxed. */
6336 if (nested_in_vect_loop
&& ncopies
> 1)
6338 if (vect_print_dump_info (REPORT_DETAILS
))
6339 fprintf (vect_dump
, "multiple types in nested loop.");
6343 if (slp
&& SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
))
6346 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
6349 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
6352 /* Is vectorizable load? */
6353 if (!is_gimple_assign (stmt
))
6356 scalar_dest
= gimple_assign_lhs (stmt
);
6357 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6360 code
= gimple_assign_rhs_code (stmt
);
6361 if (code
!= ARRAY_REF
6362 && code
!= INDIRECT_REF
6363 && !STMT_VINFO_STRIDED_ACCESS (stmt_info
))
6366 if (!STMT_VINFO_DATA_REF (stmt_info
))
6369 scalar_type
= TREE_TYPE (DR_REF (dr
));
6370 mode
= (int) TYPE_MODE (vectype
);
6372 /* FORNOW. In some cases can vectorize even if data-type not supported
6373 (e.g. - data copies). */
6374 if (optab_handler (mov_optab
, mode
)->insn_code
== CODE_FOR_nothing
)
6376 if (vect_print_dump_info (REPORT_DETAILS
))
6377 fprintf (vect_dump
, "Aligned load, but unsupported type.");
6381 /* If accesses through a pointer to vectype do not alias the original
6382 memory reference we have a problem. */
6383 if (get_alias_set (vectype
) != get_alias_set (scalar_type
)
6384 && !alias_set_subset_of (get_alias_set (vectype
),
6385 get_alias_set (scalar_type
)))
6387 if (vect_print_dump_info (REPORT_DETAILS
))
6388 fprintf (vect_dump
, "vector type does not alias scalar type");
6392 /* Check if the load is a part of an interleaving chain. */
6393 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
6395 strided_load
= true;
6397 gcc_assert (! nested_in_vect_loop
);
6399 /* Check if interleaving is supported. */
6400 if (!vect_strided_load_supported (vectype
)
6401 && !PURE_SLP_STMT (stmt_info
) && !slp
)
6405 if (!vec_stmt
) /* transformation not required. */
6407 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6408 vect_model_load_cost (stmt_info
, ncopies
, NULL
);
6412 if (vect_print_dump_info (REPORT_DETAILS
))
6413 fprintf (vect_dump
, "transform load.");
6419 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
6420 /* Check if the chain of loads is already vectorized. */
6421 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
)))
6423 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6426 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6427 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6429 /* VEC_NUM is the number of vect stmts to be created for this group. */
6432 strided_load
= false;
6433 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6436 vec_num
= group_size
;
6438 dr_chain
= VEC_alloc (tree
, heap
, vec_num
);
6444 group_size
= vec_num
= 1;
6447 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
);
6448 gcc_assert (alignment_support_scheme
);
6450 /* In case the vectorization factor (VF) is bigger than the number
6451 of elements that we can fit in a vectype (nunits), we have to generate
6452 more than one vector stmt - i.e - we need to "unroll" the
6453 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6454 from one copy of the vector stmt to the next, in the field
6455 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6456 stages to find the correct vector defs to be used when vectorizing
6457 stmts that use the defs of the current stmt. The example below illustrates
6458 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
6459 4 vectorized stmts):
6461 before vectorization:
6462 RELATED_STMT VEC_STMT
6466 step 1: vectorize stmt S1:
6467 We first create the vector stmt VS1_0, and, as usual, record a
6468 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6469 Next, we create the vector stmt VS1_1, and record a pointer to
6470 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6471 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6473 RELATED_STMT VEC_STMT
6474 VS1_0: vx0 = memref0 VS1_1 -
6475 VS1_1: vx1 = memref1 VS1_2 -
6476 VS1_2: vx2 = memref2 VS1_3 -
6477 VS1_3: vx3 = memref3 - -
6478 S1: x = load - VS1_0
6481 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6482 information we recorded in RELATED_STMT field is used to vectorize
6485 /* In case of interleaving (non-unit strided access):
6492 Vectorized loads are created in the order of memory accesses
6493 starting from the access of the first stmt of the chain:
6496 VS2: vx1 = &base + vec_size*1
6497 VS3: vx3 = &base + vec_size*2
6498 VS4: vx4 = &base + vec_size*3
6500 Then permutation statements are generated:
6502 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
6503 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
6506 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6507 (the order of the data-refs in the output of vect_permute_load_chain
6508 corresponds to the order of scalar stmts in the interleaving chain - see
6509 the documentation of vect_permute_load_chain()).
6510 The generation of permutation stmts and recording them in
6511 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
6513 In case of both multiple types and interleaving, the vector loads and
6514 permutation stmts above are created for every copy. The result vector stmts
6515 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6516 STMT_VINFO_RELATED_STMT for the next copies. */
6518 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6519 on a target that supports unaligned accesses (dr_unaligned_supported)
6520 we generate the following code:
6524 p = p + indx * vectype_size;
6529 Otherwise, the data reference is potentially unaligned on a target that
6530 does not support unaligned accesses (dr_explicit_realign_optimized) -
6531 then generate the following code, in which the data in each iteration is
6532 obtained by two vector loads, one from the previous iteration, and one
6533 from the current iteration:
6535 msq_init = *(floor(p1))
6536 p2 = initial_addr + VS - 1;
6537 realignment_token = call target_builtin;
6540 p2 = p2 + indx * vectype_size
6542 vec_dest = realign_load (msq, lsq, realignment_token)
6547 /* If the misalignment remains the same throughout the execution of the
6548 loop, we can create the init_addr and permutation mask at the loop
6549 preheader. Otherwise, it needs to be created inside the loop.
6550 This can only occur when vectorizing memory accesses in the inner-loop
6551 nested within an outer-loop that is being vectorized. */
6553 if (nested_in_vect_loop_p (loop
, stmt
)
6554 && (TREE_INT_CST_LOW (DR_STEP (dr
))
6555 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
6557 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
6558 compute_in_loop
= true;
6561 if ((alignment_support_scheme
== dr_explicit_realign_optimized
6562 || alignment_support_scheme
== dr_explicit_realign
)
6563 && !compute_in_loop
)
6565 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
6566 alignment_support_scheme
, NULL_TREE
,
6568 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6570 phi
= SSA_NAME_DEF_STMT (msq
);
6571 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
6577 prev_stmt_info
= NULL
;
6578 for (j
= 0; j
< ncopies
; j
++)
6580 /* 1. Create the vector pointer update chain. */
6582 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
,
6584 &dummy
, &ptr_incr
, false,
6588 bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
, NULL_TREE
);
6590 for (i
= 0; i
< vec_num
; i
++)
6593 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6596 /* 2. Create the vector-load in the loop. */
6597 switch (alignment_support_scheme
)
6600 gcc_assert (aligned_access_p (first_dr
));
6601 data_ref
= build_fold_indirect_ref (dataref_ptr
);
6603 case dr_unaligned_supported
:
6605 int mis
= DR_MISALIGNMENT (first_dr
);
6606 tree tmis
= (mis
== -1 ? size_zero_node
: size_int (mis
));
6608 tmis
= size_binop (MULT_EXPR
, tmis
, size_int(BITS_PER_UNIT
));
6610 build2 (MISALIGNED_INDIRECT_REF
, vectype
, dataref_ptr
, tmis
);
6613 case dr_explicit_realign
:
6616 tree vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
6618 if (compute_in_loop
)
6619 msq
= vect_setup_realignment (first_stmt
, gsi
,
6621 dr_explicit_realign
,
6624 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, dataref_ptr
);
6625 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6626 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6627 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6628 gimple_assign_set_lhs (new_stmt
, new_temp
);
6629 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6630 copy_virtual_operands (new_stmt
, stmt
);
6631 mark_symbols_for_renaming (new_stmt
);
6634 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
6635 TYPE_SIZE_UNIT (scalar_type
));
6636 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
6637 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, ptr
);
6640 case dr_explicit_realign_optimized
:
6641 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, dataref_ptr
);
6646 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6647 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6648 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6649 gimple_assign_set_lhs (new_stmt
, new_temp
);
6650 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6651 mark_symbols_for_renaming (new_stmt
);
6653 /* 3. Handle explicit realignment if necessary/supported. Create in
6654 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
6655 if (alignment_support_scheme
== dr_explicit_realign_optimized
6656 || alignment_support_scheme
== dr_explicit_realign
)
6660 lsq
= gimple_assign_lhs (new_stmt
);
6661 if (!realignment_token
)
6662 realignment_token
= dataref_ptr
;
6663 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6664 tmp
= build3 (REALIGN_LOAD_EXPR
, vectype
, msq
, lsq
,
6666 new_stmt
= gimple_build_assign (vec_dest
, tmp
);
6667 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6668 gimple_assign_set_lhs (new_stmt
, new_temp
);
6669 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6671 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6674 if (i
== vec_num
- 1 && j
== ncopies
- 1)
6675 add_phi_arg (phi
, lsq
, loop_latch_edge (containing_loop
));
6680 /* 4. Handle invariant-load. */
6683 gcc_assert (!strided_load
);
6684 gcc_assert (nested_in_vect_loop_p (loop
, stmt
));
6689 tree vec_inv
, bitpos
, bitsize
= TYPE_SIZE (scalar_type
);
6691 /* CHECKME: bitpos depends on endianess? */
6692 bitpos
= bitsize_zero_node
;
6693 vec_inv
= build3 (BIT_FIELD_REF
, scalar_type
, new_temp
,
6696 vect_create_destination_var (scalar_dest
, NULL_TREE
);
6697 new_stmt
= gimple_build_assign (vec_dest
, vec_inv
);
6698 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6699 gimple_assign_set_lhs (new_stmt
, new_temp
);
6700 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6702 for (k
= nunits
- 1; k
>= 0; --k
)
6703 t
= tree_cons (NULL_TREE
, new_temp
, t
);
6704 /* FIXME: use build_constructor directly. */
6705 vec_inv
= build_constructor_from_list (vectype
, t
);
6706 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6707 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6710 gcc_unreachable (); /* FORNOW. */
6713 /* Collect vector loads and later create their permutation in
6714 vect_transform_strided_load (). */
6715 if (strided_load
|| slp_perm
)
6716 VEC_quick_push (tree
, dr_chain
, new_temp
);
6718 /* Store vector loads in the corresponding SLP_NODE. */
6719 if (slp
&& !slp_perm
)
6720 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
6723 if (slp
&& !slp_perm
)
6728 if (!vect_transform_slp_perm_load (stmt
, dr_chain
, gsi
,
6729 LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
6730 slp_node_instance
, false))
6732 VEC_free (tree
, heap
, dr_chain
);
6740 if (!vect_transform_strided_load (stmt
, dr_chain
, group_size
, gsi
))
6743 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6744 VEC_free (tree
, heap
, dr_chain
);
6745 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
6750 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6752 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6753 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6759 VEC_free (tree
, heap
, dr_chain
);
6765 /* Function vectorizable_live_operation.
6767 STMT computes a value that is used outside the loop. Check if
6768 it can be supported. */
6771 vectorizable_live_operation (gimple stmt
,
6772 gimple_stmt_iterator
*gsi ATTRIBUTE_UNUSED
,
6773 gimple
*vec_stmt ATTRIBUTE_UNUSED
)
6775 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6776 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6777 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6783 enum vect_def_type dt
;
6784 enum tree_code code
;
6785 enum gimple_rhs_class rhs_class
;
6787 gcc_assert (STMT_VINFO_LIVE_P (stmt_info
));
6789 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
)
6792 if (!is_gimple_assign (stmt
))
6795 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6798 /* FORNOW. CHECKME. */
6799 if (nested_in_vect_loop_p (loop
, stmt
))
6802 code
= gimple_assign_rhs_code (stmt
);
6803 op_type
= TREE_CODE_LENGTH (code
);
6804 rhs_class
= get_gimple_rhs_class (code
);
6805 gcc_assert (rhs_class
!= GIMPLE_UNARY_RHS
|| op_type
== unary_op
);
6806 gcc_assert (rhs_class
!= GIMPLE_BINARY_RHS
|| op_type
== binary_op
);
6808 /* FORNOW: support only if all uses are invariant. This means
6809 that the scalar operations can remain in place, unvectorized.
6810 The original last scalar value that they compute will be used. */
6812 for (i
= 0; i
< op_type
; i
++)
6814 if (rhs_class
== GIMPLE_SINGLE_RHS
)
6815 op
= TREE_OPERAND (gimple_op (stmt
, 1), i
);
6817 op
= gimple_op (stmt
, i
+ 1);
6818 if (op
&& !vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
6820 if (vect_print_dump_info (REPORT_DETAILS
))
6821 fprintf (vect_dump
, "use not simple.");
6825 if (dt
!= vect_invariant_def
&& dt
!= vect_constant_def
)
6829 /* No transformation is required for the cases we currently support. */
6834 /* Function vect_is_simple_cond.
6837 LOOP - the loop that is being vectorized.
6838 COND - Condition that is checked for simple use.
6840 Returns whether a COND can be vectorized. Checks whether
6841 condition operands are supportable using vec_is_simple_use. */
6844 vect_is_simple_cond (tree cond
, loop_vec_info loop_vinfo
)
6848 enum vect_def_type dt
;
6850 if (!COMPARISON_CLASS_P (cond
))
6853 lhs
= TREE_OPERAND (cond
, 0);
6854 rhs
= TREE_OPERAND (cond
, 1);
6856 if (TREE_CODE (lhs
) == SSA_NAME
)
6858 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
6859 if (!vect_is_simple_use (lhs
, loop_vinfo
, &lhs_def_stmt
, &def
, &dt
))
6862 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
6863 && TREE_CODE (lhs
) != FIXED_CST
)
6866 if (TREE_CODE (rhs
) == SSA_NAME
)
6868 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6869 if (!vect_is_simple_use (rhs
, loop_vinfo
, &rhs_def_stmt
, &def
, &dt
))
6872 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
6873 && TREE_CODE (rhs
) != FIXED_CST
)
6879 /* vectorizable_condition.
6881 Check if STMT is conditional modify expression that can be vectorized.
6882 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6883 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6886 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6889 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
6892 tree scalar_dest
= NULL_TREE
;
6893 tree vec_dest
= NULL_TREE
;
6894 tree op
= NULL_TREE
;
6895 tree cond_expr
, then_clause
, else_clause
;
6896 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6897 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6898 tree vec_cond_lhs
, vec_cond_rhs
, vec_then_clause
, vec_else_clause
;
6899 tree vec_compare
, vec_cond_expr
;
6901 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6902 enum machine_mode vec_mode
;
6904 enum vect_def_type dt
;
6905 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6906 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6907 enum tree_code code
;
6909 gcc_assert (ncopies
>= 1);
6911 return false; /* FORNOW */
6913 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
6916 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
6919 /* FORNOW: SLP not supported. */
6920 if (STMT_SLP_TYPE (stmt_info
))
6923 /* FORNOW: not yet supported. */
6924 if (STMT_VINFO_LIVE_P (stmt_info
))
6926 if (vect_print_dump_info (REPORT_DETAILS
))
6927 fprintf (vect_dump
, "value used after loop.");
6931 /* Is vectorizable conditional operation? */
6932 if (!is_gimple_assign (stmt
))
6935 code
= gimple_assign_rhs_code (stmt
);
6937 if (code
!= COND_EXPR
)
6940 gcc_assert (gimple_assign_single_p (stmt
));
6941 op
= gimple_assign_rhs1 (stmt
);
6942 cond_expr
= TREE_OPERAND (op
, 0);
6943 then_clause
= TREE_OPERAND (op
, 1);
6944 else_clause
= TREE_OPERAND (op
, 2);
6946 if (!vect_is_simple_cond (cond_expr
, loop_vinfo
))
6949 /* We do not handle two different vector types for the condition
6951 if (TREE_TYPE (TREE_OPERAND (cond_expr
, 0)) != TREE_TYPE (vectype
))
6954 if (TREE_CODE (then_clause
) == SSA_NAME
)
6956 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
6957 if (!vect_is_simple_use (then_clause
, loop_vinfo
,
6958 &then_def_stmt
, &def
, &dt
))
6961 else if (TREE_CODE (then_clause
) != INTEGER_CST
6962 && TREE_CODE (then_clause
) != REAL_CST
6963 && TREE_CODE (then_clause
) != FIXED_CST
)
6966 if (TREE_CODE (else_clause
) == SSA_NAME
)
6968 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
6969 if (!vect_is_simple_use (else_clause
, loop_vinfo
,
6970 &else_def_stmt
, &def
, &dt
))
6973 else if (TREE_CODE (else_clause
) != INTEGER_CST
6974 && TREE_CODE (else_clause
) != REAL_CST
6975 && TREE_CODE (else_clause
) != FIXED_CST
)
6979 vec_mode
= TYPE_MODE (vectype
);
6983 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
6984 return expand_vec_cond_expr_p (op
, vec_mode
);
6990 scalar_dest
= gimple_assign_lhs (stmt
);
6991 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6993 /* Handle cond expr. */
6995 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0), stmt
, NULL
);
6997 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1), stmt
, NULL
);
6998 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
, stmt
, NULL
);
6999 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
, stmt
, NULL
);
7001 /* Arguments are ready. Create the new vector stmt. */
7002 vec_compare
= build2 (TREE_CODE (cond_expr
), vectype
,
7003 vec_cond_lhs
, vec_cond_rhs
);
7004 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
7005 vec_compare
, vec_then_clause
, vec_else_clause
);
7007 *vec_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
7008 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
7009 gimple_assign_set_lhs (*vec_stmt
, new_temp
);
7010 vect_finish_stmt_generation (stmt
, *vec_stmt
, gsi
);
7016 /* Function vect_transform_stmt.
7018 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7021 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
7022 bool *strided_store
, slp_tree slp_node
,
7023 slp_instance slp_node_instance
)
7025 bool is_store
= false;
7026 gimple vec_stmt
= NULL
;
7027 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7028 gimple orig_stmt_in_pattern
;
7031 switch (STMT_VINFO_TYPE (stmt_info
))
7033 case type_demotion_vec_info_type
:
7034 done
= vectorizable_type_demotion (stmt
, gsi
, &vec_stmt
, slp_node
);
7038 case type_promotion_vec_info_type
:
7039 done
= vectorizable_type_promotion (stmt
, gsi
, &vec_stmt
, slp_node
);
7043 case type_conversion_vec_info_type
:
7044 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
7048 case induc_vec_info_type
:
7049 gcc_assert (!slp_node
);
7050 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
7054 case op_vec_info_type
:
7055 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
7059 case assignment_vec_info_type
:
7060 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
7064 case load_vec_info_type
:
7065 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
7070 case store_vec_info_type
:
7071 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
7073 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
) && !slp_node
)
7075 /* In case of interleaving, the whole chain is vectorized when the
7076 last store in the chain is reached. Store stmts before the last
7077 one are skipped, and there vec_stmt_info shouldn't be freed
7079 *strided_store
= true;
7080 if (STMT_VINFO_VEC_STMT (stmt_info
))
7087 case condition_vec_info_type
:
7088 gcc_assert (!slp_node
);
7089 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
);
7093 case call_vec_info_type
:
7094 gcc_assert (!slp_node
);
7095 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
);
7098 case reduc_vec_info_type
:
7099 gcc_assert (!slp_node
);
7100 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
);
7105 if (!STMT_VINFO_LIVE_P (stmt_info
))
7107 if (vect_print_dump_info (REPORT_DETAILS
))
7108 fprintf (vect_dump
, "stmt not supported.");
7113 if (STMT_VINFO_LIVE_P (stmt_info
)
7114 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7116 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
7122 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
7123 orig_stmt_in_pattern
= STMT_VINFO_RELATED_STMT (stmt_info
);
7124 if (orig_stmt_in_pattern
)
7126 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt_in_pattern
);
7127 /* STMT was inserted by the vectorizer to replace a computation idiom.
7128 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
7129 computed this idiom. We need to record a pointer to VEC_STMT in
7130 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
7131 documentation of vect_pattern_recog. */
7132 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
7134 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
7135 STMT_VINFO_VEC_STMT (stmt_vinfo
) = vec_stmt
;
7144 /* This function builds ni_name = number of iterations loop executes
7145 on the loop preheader. */
7148 vect_build_loop_niters (loop_vec_info loop_vinfo
)
7151 gimple_seq stmts
= NULL
;
7153 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7154 tree ni
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
7156 var
= create_tmp_var (TREE_TYPE (ni
), "niters");
7157 add_referenced_var (var
);
7158 ni_name
= force_gimple_operand (ni
, &stmts
, false, var
);
7160 pe
= loop_preheader_edge (loop
);
7163 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
7164 gcc_assert (!new_bb
);
7171 /* This function generates the following statements:
7173 ni_name = number of iterations loop executes
7174 ratio = ni_name / vf
7175 ratio_mult_vf_name = ratio * vf
7177 and places them at the loop preheader edge. */
7180 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo
,
7182 tree
*ratio_mult_vf_name_ptr
,
7183 tree
*ratio_name_ptr
)
7192 tree ratio_mult_vf_name
;
7193 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7194 tree ni
= LOOP_VINFO_NITERS (loop_vinfo
);
7195 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7198 pe
= loop_preheader_edge (loop
);
7200 /* Generate temporary variable that contains
7201 number of iterations loop executes. */
7203 ni_name
= vect_build_loop_niters (loop_vinfo
);
7204 log_vf
= build_int_cst (TREE_TYPE (ni
), exact_log2 (vf
));
7206 /* Create: ratio = ni >> log2(vf) */
7208 ratio_name
= fold_build2 (RSHIFT_EXPR
, TREE_TYPE (ni_name
), ni_name
, log_vf
);
7209 if (!is_gimple_val (ratio_name
))
7211 var
= create_tmp_var (TREE_TYPE (ni
), "bnd");
7212 add_referenced_var (var
);
7215 ratio_name
= force_gimple_operand (ratio_name
, &stmts
, true, var
);
7216 pe
= loop_preheader_edge (loop
);
7217 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
7218 gcc_assert (!new_bb
);
7221 /* Create: ratio_mult_vf = ratio << log2 (vf). */
7223 ratio_mult_vf_name
= fold_build2 (LSHIFT_EXPR
, TREE_TYPE (ratio_name
),
7224 ratio_name
, log_vf
);
7225 if (!is_gimple_val (ratio_mult_vf_name
))
7227 var
= create_tmp_var (TREE_TYPE (ni
), "ratio_mult_vf");
7228 add_referenced_var (var
);
7231 ratio_mult_vf_name
= force_gimple_operand (ratio_mult_vf_name
, &stmts
,
7233 pe
= loop_preheader_edge (loop
);
7234 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
7235 gcc_assert (!new_bb
);
7238 *ni_name_ptr
= ni_name
;
7239 *ratio_mult_vf_name_ptr
= ratio_mult_vf_name
;
7240 *ratio_name_ptr
= ratio_name
;
7246 /* Function vect_update_ivs_after_vectorizer.
7248 "Advance" the induction variables of LOOP to the value they should take
7249 after the execution of LOOP. This is currently necessary because the
7250 vectorizer does not handle induction variables that are used after the
7251 loop. Such a situation occurs when the last iterations of LOOP are
7253 1. We introduced new uses after LOOP for IVs that were not originally used
7254 after LOOP: the IVs of LOOP are now used by an epilog loop.
7255 2. LOOP is going to be vectorized; this means that it will iterate N/VF
7256 times, whereas the loop IVs should be bumped N times.
7259 - LOOP - a loop that is going to be vectorized. The last few iterations
7260 of LOOP were peeled.
7261 - NITERS - the number of iterations that LOOP executes (before it is
7262 vectorized). i.e, the number of times the ivs should be bumped.
7263 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
7264 coming out from LOOP on which there are uses of the LOOP ivs
7265 (this is the path from LOOP->exit to epilog_loop->preheader).
7267 The new definitions of the ivs are placed in LOOP->exit.
7268 The phi args associated with the edge UPDATE_E in the bb
7269 UPDATE_E->dest are updated accordingly.
7271 Assumption 1: Like the rest of the vectorizer, this function assumes
7272 a single loop exit that has a single predecessor.
7274 Assumption 2: The phi nodes in the LOOP header and in update_bb are
7275 organized in the same order.
7277 Assumption 3: The access function of the ivs is simple enough (see
7278 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
7280 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
7281 coming out of LOOP on which the ivs of LOOP are used (this is the path
7282 that leads to the epilog loop; other paths skip the epilog loop). This
7283 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
7284 needs to have its phis updated.
7288 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo
, tree niters
,
7291 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7292 basic_block exit_bb
= single_exit (loop
)->dest
;
7294 gimple_stmt_iterator gsi
, gsi1
;
7295 basic_block update_bb
= update_e
->dest
;
7297 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
7299 /* Make sure there exists a single-predecessor exit bb: */
7300 gcc_assert (single_pred_p (exit_bb
));
7302 for (gsi
= gsi_start_phis (loop
->header
), gsi1
= gsi_start_phis (update_bb
);
7303 !gsi_end_p (gsi
) && !gsi_end_p (gsi1
);
7304 gsi_next (&gsi
), gsi_next (&gsi1
))
7306 tree access_fn
= NULL
;
7307 tree evolution_part
;
7310 tree var
, ni
, ni_name
;
7311 gimple_stmt_iterator last_gsi
;
7313 phi
= gsi_stmt (gsi
);
7314 phi1
= gsi_stmt (gsi1
);
7315 if (vect_print_dump_info (REPORT_DETAILS
))
7317 fprintf (vect_dump
, "vect_update_ivs_after_vectorizer: phi: ");
7318 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
7321 /* Skip virtual phi's. */
7322 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
7324 if (vect_print_dump_info (REPORT_DETAILS
))
7325 fprintf (vect_dump
, "virtual phi. skip.");
7329 /* Skip reduction phis. */
7330 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
7332 if (vect_print_dump_info (REPORT_DETAILS
))
7333 fprintf (vect_dump
, "reduc phi. skip.");
7337 access_fn
= analyze_scalar_evolution (loop
, PHI_RESULT (phi
));
7338 gcc_assert (access_fn
);
7339 STRIP_NOPS (access_fn
);
7341 unshare_expr (evolution_part_in_loop_num (access_fn
, loop
->num
));
7342 gcc_assert (evolution_part
!= NULL_TREE
);
7344 /* FORNOW: We do not support IVs whose evolution function is a polynomial
7345 of degree >= 2 or exponential. */
7346 gcc_assert (!tree_is_chrec (evolution_part
));
7348 step_expr
= evolution_part
;
7349 init_expr
= unshare_expr (initial_condition_in_loop_num (access_fn
,
7352 if (POINTER_TYPE_P (TREE_TYPE (init_expr
)))
7353 ni
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (init_expr
),
7355 fold_convert (sizetype
,
7356 fold_build2 (MULT_EXPR
, TREE_TYPE (niters
),
7357 niters
, step_expr
)));
7359 ni
= fold_build2 (PLUS_EXPR
, TREE_TYPE (init_expr
),
7360 fold_build2 (MULT_EXPR
, TREE_TYPE (init_expr
),
7361 fold_convert (TREE_TYPE (init_expr
),
7368 var
= create_tmp_var (TREE_TYPE (init_expr
), "tmp");
7369 add_referenced_var (var
);
7371 last_gsi
= gsi_last_bb (exit_bb
);
7372 ni_name
= force_gimple_operand_gsi (&last_gsi
, ni
, false, var
,
7373 true, GSI_SAME_STMT
);
7375 /* Fix phi expressions in the successor bb. */
7376 SET_PHI_ARG_DEF (phi1
, update_e
->dest_idx
, ni_name
);
7380 /* Return the more conservative threshold between the
7381 min_profitable_iters returned by the cost model and the user
7382 specified threshold, if provided. */
7385 conservative_cost_threshold (loop_vec_info loop_vinfo
,
7386 int min_profitable_iters
)
7389 int min_scalar_loop_bound
;
7391 min_scalar_loop_bound
= ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND
)
7392 * LOOP_VINFO_VECT_FACTOR (loop_vinfo
)) - 1);
7394 /* Use the cost model only if it is more conservative than user specified
7396 th
= (unsigned) min_scalar_loop_bound
;
7397 if (min_profitable_iters
7398 && (!min_scalar_loop_bound
7399 || min_profitable_iters
> min_scalar_loop_bound
))
7400 th
= (unsigned) min_profitable_iters
;
7402 if (th
&& vect_print_dump_info (REPORT_COST
))
7403 fprintf (vect_dump
, "Vectorization may not be profitable.");
7408 /* Function vect_do_peeling_for_loop_bound
7410 Peel the last iterations of the loop represented by LOOP_VINFO.
7411 The peeled iterations form a new epilog loop. Given that the loop now
7412 iterates NITERS times, the new epilog loop iterates
7413 NITERS % VECTORIZATION_FACTOR times.
7415 The original loop will later be made to iterate
7416 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
7419 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo
, tree
*ratio
)
7421 tree ni_name
, ratio_mult_vf_name
;
7422 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7423 struct loop
*new_loop
;
7425 basic_block preheader
;
7427 bool check_profitability
= false;
7428 unsigned int th
= 0;
7429 int min_profitable_iters
;
7431 if (vect_print_dump_info (REPORT_DETAILS
))
7432 fprintf (vect_dump
, "=== vect_do_peeling_for_loop_bound ===");
7434 initialize_original_copy_tables ();
7436 /* Generate the following variables on the preheader of original loop:
7438 ni_name = number of iteration the original loop executes
7439 ratio = ni_name / vf
7440 ratio_mult_vf_name = ratio * vf */
7441 vect_generate_tmps_on_preheader (loop_vinfo
, &ni_name
,
7442 &ratio_mult_vf_name
, ratio
);
7444 loop_num
= loop
->num
;
7446 /* If cost model check not done during versioning and
7447 peeling for alignment. */
7448 if (!VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
7449 && !VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
))
7450 && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
7452 check_profitability
= true;
7454 /* Get profitability threshold for vectorized loop. */
7455 min_profitable_iters
= LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
);
7457 th
= conservative_cost_threshold (loop_vinfo
,
7458 min_profitable_iters
);
7461 new_loop
= slpeel_tree_peel_loop_to_edge (loop
, single_exit (loop
),
7462 ratio_mult_vf_name
, ni_name
, false,
7463 th
, check_profitability
);
7464 gcc_assert (new_loop
);
7465 gcc_assert (loop_num
== loop
->num
);
7466 #ifdef ENABLE_CHECKING
7467 slpeel_verify_cfg_after_peeling (loop
, new_loop
);
7470 /* A guard that controls whether the new_loop is to be executed or skipped
7471 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
7472 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
7473 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
7474 is on the path where the LOOP IVs are used and need to be updated. */
7476 preheader
= loop_preheader_edge (new_loop
)->src
;
7477 if (EDGE_PRED (preheader
, 0)->src
== single_exit (loop
)->dest
)
7478 update_e
= EDGE_PRED (preheader
, 0);
7480 update_e
= EDGE_PRED (preheader
, 1);
7482 /* Update IVs of original loop as if they were advanced
7483 by ratio_mult_vf_name steps. */
7484 vect_update_ivs_after_vectorizer (loop_vinfo
, ratio_mult_vf_name
, update_e
);
7486 /* After peeling we have to reset scalar evolution analyzer. */
7489 free_original_copy_tables ();
7493 /* Function vect_gen_niters_for_prolog_loop
7495 Set the number of iterations for the loop represented by LOOP_VINFO
7496 to the minimum between LOOP_NITERS (the original iteration count of the loop)
7497 and the misalignment of DR - the data reference recorded in
7498 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
7499 this loop, the data reference DR will refer to an aligned location.
7501 The following computation is generated:
7503 If the misalignment of DR is known at compile time:
7504 addr_mis = int mis = DR_MISALIGNMENT (dr);
7505 Else, compute address misalignment in bytes:
7506 addr_mis = addr & (vectype_size - 1)
7508 prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
7510 (elem_size = element type size; an element is the scalar element whose type
7511 is the inner type of the vectype)
7513 When the step of the data-ref in the loop is not 1 (as in interleaved data
7514 and SLP), the number of iterations of the prolog must be divided by the step
7515 (which is equal to the size of interleaved group).
7517 The above formulas assume that VF == number of elements in the vector. This
7518 may not hold when there are multiple-types in the loop.
7519 In this case, for some data-references in the loop the VF does not represent
7520 the number of elements that fit in the vector. Therefore, instead of VF we
7521 use TYPE_VECTOR_SUBPARTS. */
7524 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo
, tree loop_niters
)
7526 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
7527 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7530 tree iters
, iters_name
;
7533 gimple dr_stmt
= DR_STMT (dr
);
7534 stmt_vec_info stmt_info
= vinfo_for_stmt (dr_stmt
);
7535 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7536 int vectype_align
= TYPE_ALIGN (vectype
) / BITS_PER_UNIT
;
7537 tree niters_type
= TREE_TYPE (loop_niters
);
7539 int element_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
7540 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
7542 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
7543 step
= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info
)));
7545 pe
= loop_preheader_edge (loop
);
7547 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) > 0)
7549 int byte_misalign
= LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
);
7550 int elem_misalign
= byte_misalign
/ element_size
;
7552 if (vect_print_dump_info (REPORT_DETAILS
))
7553 fprintf (vect_dump
, "known alignment = %d.", byte_misalign
);
7555 iters
= build_int_cst (niters_type
,
7556 (((nelements
- elem_misalign
) & (nelements
- 1)) / step
));
7560 gimple_seq new_stmts
= NULL
;
7561 tree start_addr
= vect_create_addr_base_for_vector_ref (dr_stmt
,
7562 &new_stmts
, NULL_TREE
, loop
);
7563 tree ptr_type
= TREE_TYPE (start_addr
);
7564 tree size
= TYPE_SIZE (ptr_type
);
7565 tree type
= lang_hooks
.types
.type_for_size (tree_low_cst (size
, 1), 1);
7566 tree vectype_size_minus_1
= build_int_cst (type
, vectype_align
- 1);
7567 tree elem_size_log
=
7568 build_int_cst (type
, exact_log2 (vectype_align
/nelements
));
7569 tree nelements_minus_1
= build_int_cst (type
, nelements
- 1);
7570 tree nelements_tree
= build_int_cst (type
, nelements
);
7574 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, new_stmts
);
7575 gcc_assert (!new_bb
);
7577 /* Create: byte_misalign = addr & (vectype_size - 1) */
7579 fold_build2 (BIT_AND_EXPR
, type
, fold_convert (type
, start_addr
), vectype_size_minus_1
);
7581 /* Create: elem_misalign = byte_misalign / element_size */
7583 fold_build2 (RSHIFT_EXPR
, type
, byte_misalign
, elem_size_log
);
7585 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
7586 iters
= fold_build2 (MINUS_EXPR
, type
, nelements_tree
, elem_misalign
);
7587 iters
= fold_build2 (BIT_AND_EXPR
, type
, iters
, nelements_minus_1
);
7588 iters
= fold_convert (niters_type
, iters
);
7591 /* Create: prolog_loop_niters = min (iters, loop_niters) */
7592 /* If the loop bound is known at compile time we already verified that it is
7593 greater than vf; since the misalignment ('iters') is at most vf, there's
7594 no need to generate the MIN_EXPR in this case. */
7595 if (TREE_CODE (loop_niters
) != INTEGER_CST
)
7596 iters
= fold_build2 (MIN_EXPR
, niters_type
, iters
, loop_niters
);
7598 if (vect_print_dump_info (REPORT_DETAILS
))
7600 fprintf (vect_dump
, "niters for prolog loop: ");
7601 print_generic_expr (vect_dump
, iters
, TDF_SLIM
);
7604 var
= create_tmp_var (niters_type
, "prolog_loop_niters");
7605 add_referenced_var (var
);
7607 iters_name
= force_gimple_operand (iters
, &stmts
, false, var
);
7609 /* Insert stmt on loop preheader edge. */
7612 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
7613 gcc_assert (!new_bb
);
7620 /* Function vect_update_init_of_dr
7622 NITERS iterations were peeled from LOOP. DR represents a data reference
7623 in LOOP. This function updates the information recorded in DR to
7624 account for the fact that the first NITERS iterations had already been
7625 executed. Specifically, it updates the OFFSET field of DR. */
7628 vect_update_init_of_dr (struct data_reference
*dr
, tree niters
)
7630 tree offset
= DR_OFFSET (dr
);
7632 niters
= fold_build2 (MULT_EXPR
, TREE_TYPE (niters
), niters
, DR_STEP (dr
));
7633 offset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
, niters
);
7634 DR_OFFSET (dr
) = offset
;
7638 /* Function vect_update_inits_of_drs
7640 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
7641 This function updates the information recorded for the data references in
7642 the loop to account for the fact that the first NITERS iterations had
7643 already been executed. Specifically, it updates the initial_condition of
7644 the access_function of all the data_references in the loop. */
7647 vect_update_inits_of_drs (loop_vec_info loop_vinfo
, tree niters
)
7650 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
7651 struct data_reference
*dr
;
7653 if (vect_print_dump_info (REPORT_DETAILS
))
7654 fprintf (vect_dump
, "=== vect_update_inits_of_dr ===");
7656 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
7657 vect_update_init_of_dr (dr
, niters
);
7661 /* Function vect_do_peeling_for_alignment
7663 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
7664 'niters' is set to the misalignment of one of the data references in the
7665 loop, thereby forcing it to refer to an aligned location at the beginning
7666 of the execution of this loop. The data reference for which we are
7667 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
7670 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo
)
7672 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7673 tree niters_of_prolog_loop
, ni_name
;
7675 struct loop
*new_loop
;
7676 bool check_profitability
= false;
7677 unsigned int th
= 0;
7678 int min_profitable_iters
;
7680 if (vect_print_dump_info (REPORT_DETAILS
))
7681 fprintf (vect_dump
, "=== vect_do_peeling_for_alignment ===");
7683 initialize_original_copy_tables ();
7685 ni_name
= vect_build_loop_niters (loop_vinfo
);
7686 niters_of_prolog_loop
= vect_gen_niters_for_prolog_loop (loop_vinfo
, ni_name
);
7689 /* If cost model check not done during versioning. */
7690 if (!VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
7691 && !VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
7693 check_profitability
= true;
7695 /* Get profitability threshold for vectorized loop. */
7696 min_profitable_iters
= LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
);
7698 th
= conservative_cost_threshold (loop_vinfo
,
7699 min_profitable_iters
);
7702 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
7704 slpeel_tree_peel_loop_to_edge (loop
, loop_preheader_edge (loop
),
7705 niters_of_prolog_loop
, ni_name
, true,
7706 th
, check_profitability
);
7708 gcc_assert (new_loop
);
7709 #ifdef ENABLE_CHECKING
7710 slpeel_verify_cfg_after_peeling (new_loop
, loop
);
7713 /* Update number of times loop executes. */
7714 n_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
7715 LOOP_VINFO_NITERS (loop_vinfo
) = fold_build2 (MINUS_EXPR
,
7716 TREE_TYPE (n_iters
), n_iters
, niters_of_prolog_loop
);
7718 /* Update the init conditions of the access functions of all data refs. */
7719 vect_update_inits_of_drs (loop_vinfo
, niters_of_prolog_loop
);
7721 /* After peeling we have to reset scalar evolution analyzer. */
7724 free_original_copy_tables ();
7728 /* Function vect_create_cond_for_align_checks.
7730 Create a conditional expression that represents the alignment checks for
7731 all of data references (array element references) whose alignment must be
7735 COND_EXPR - input conditional expression. New conditions will be chained
7736 with logical AND operation.
7737 LOOP_VINFO - two fields of the loop information are used.
7738 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
7739 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
7742 COND_EXPR_STMT_LIST - statements needed to construct the conditional
7744 The returned value is the conditional expression to be used in the if
7745 statement that controls which version of the loop gets executed at runtime.
7747 The algorithm makes two assumptions:
7748 1) The number of bytes "n" in a vector is a power of 2.
7749 2) An address "a" is aligned if a%n is zero and that this
7750 test can be done as a&(n-1) == 0. For example, for 16
7751 byte vectors the test is a&0xf == 0. */
7754 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo
,
7756 gimple_seq
*cond_expr_stmt_list
)
7758 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7759 VEC(gimple
,heap
) *may_misalign_stmts
7760 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
7762 int mask
= LOOP_VINFO_PTR_MASK (loop_vinfo
);
7766 tree int_ptrsize_type
;
7768 tree or_tmp_name
= NULL_TREE
;
7769 tree and_tmp
, and_tmp_name
;
7772 tree part_cond_expr
;
7774 /* Check that mask is one less than a power of 2, i.e., mask is
7775 all zeros followed by all ones. */
7776 gcc_assert ((mask
!= 0) && ((mask
& (mask
+1)) == 0));
7778 /* CHECKME: what is the best integer or unsigned type to use to hold a
7779 cast from a pointer value? */
7780 psize
= TYPE_SIZE (ptr_type_node
);
7782 = lang_hooks
.types
.type_for_size (tree_low_cst (psize
, 1), 0);
7784 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
7785 of the first vector of the i'th data reference. */
7787 for (i
= 0; VEC_iterate (gimple
, may_misalign_stmts
, i
, ref_stmt
); i
++)
7789 gimple_seq new_stmt_list
= NULL
;
7791 tree addr_tmp
, addr_tmp_name
;
7792 tree or_tmp
, new_or_tmp_name
;
7793 gimple addr_stmt
, or_stmt
;
7795 /* create: addr_tmp = (int)(address_of_first_vector) */
7797 vect_create_addr_base_for_vector_ref (ref_stmt
, &new_stmt_list
,
7799 if (new_stmt_list
!= NULL
)
7800 gimple_seq_add_seq (cond_expr_stmt_list
, new_stmt_list
);
7802 sprintf (tmp_name
, "%s%d", "addr2int", i
);
7803 addr_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
7804 add_referenced_var (addr_tmp
);
7805 addr_tmp_name
= make_ssa_name (addr_tmp
, NULL
);
7806 addr_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, addr_tmp_name
,
7807 addr_base
, NULL_TREE
);
7808 SSA_NAME_DEF_STMT (addr_tmp_name
) = addr_stmt
;
7809 gimple_seq_add_stmt (cond_expr_stmt_list
, addr_stmt
);
7811 /* The addresses are OR together. */
7813 if (or_tmp_name
!= NULL_TREE
)
7815 /* create: or_tmp = or_tmp | addr_tmp */
7816 sprintf (tmp_name
, "%s%d", "orptrs", i
);
7817 or_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
7818 add_referenced_var (or_tmp
);
7819 new_or_tmp_name
= make_ssa_name (or_tmp
, NULL
);
7820 or_stmt
= gimple_build_assign_with_ops (BIT_IOR_EXPR
,
7822 or_tmp_name
, addr_tmp_name
);
7823 SSA_NAME_DEF_STMT (new_or_tmp_name
) = or_stmt
;
7824 gimple_seq_add_stmt (cond_expr_stmt_list
, or_stmt
);
7825 or_tmp_name
= new_or_tmp_name
;
7828 or_tmp_name
= addr_tmp_name
;
7832 mask_cst
= build_int_cst (int_ptrsize_type
, mask
);
7834 /* create: and_tmp = or_tmp & mask */
7835 and_tmp
= create_tmp_var (int_ptrsize_type
, "andmask" );
7836 add_referenced_var (and_tmp
);
7837 and_tmp_name
= make_ssa_name (and_tmp
, NULL
);
7839 and_stmt
= gimple_build_assign_with_ops (BIT_AND_EXPR
, and_tmp_name
,
7840 or_tmp_name
, mask_cst
);
7841 SSA_NAME_DEF_STMT (and_tmp_name
) = and_stmt
;
7842 gimple_seq_add_stmt (cond_expr_stmt_list
, and_stmt
);
7844 /* Make and_tmp the left operand of the conditional test against zero.
7845 if and_tmp has a nonzero bit then some address is unaligned. */
7846 ptrsize_zero
= build_int_cst (int_ptrsize_type
, 0);
7847 part_cond_expr
= fold_build2 (EQ_EXPR
, boolean_type_node
,
7848 and_tmp_name
, ptrsize_zero
);
7850 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
7851 *cond_expr
, part_cond_expr
);
7853 *cond_expr
= part_cond_expr
;
7856 /* Function vect_vfa_segment_size.
7858 Create an expression that computes the size of segment
7859 that will be accessed for a data reference. The functions takes into
7860 account that realignment loads may access one more vector.
7863 DR: The data reference.
7864 VECT_FACTOR: vectorization factor.
7866 Return an expression whose value is the size of segment which will be
7870 vect_vfa_segment_size (struct data_reference
*dr
, tree vect_factor
)
7872 tree segment_length
= fold_build2 (MULT_EXPR
, integer_type_node
,
7873 DR_STEP (dr
), vect_factor
);
7875 if (vect_supportable_dr_alignment (dr
) == dr_explicit_realign_optimized
)
7877 tree vector_size
= TYPE_SIZE_UNIT
7878 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr
))));
7880 segment_length
= fold_build2 (PLUS_EXPR
, integer_type_node
,
7881 segment_length
, vector_size
);
7883 return fold_convert (sizetype
, segment_length
);
7886 /* Function vect_create_cond_for_alias_checks.
7888 Create a conditional expression that represents the run-time checks for
7889 overlapping of address ranges represented by a list of data references
7890 relations passed as input.
7893 COND_EXPR - input conditional expression. New conditions will be chained
7894 with logical AND operation.
7895 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
7899 COND_EXPR - conditional expression.
7900 COND_EXPR_STMT_LIST - statements needed to construct the conditional
7904 The returned value is the conditional expression to be used in the if
7905 statement that controls which version of the loop gets executed at runtime.
7909 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo
,
7911 gimple_seq
* cond_expr_stmt_list
)
7913 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7914 VEC (ddr_p
, heap
) * may_alias_ddrs
=
7915 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
);
7917 build_int_cst (integer_type_node
, LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
7921 tree part_cond_expr
;
7923 /* Create expression
7924 ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
7925 || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
7929 ((store_ptr_n + store_segment_length_n) < load_ptr_n)
7930 || (load_ptr_n + load_segment_length_n) < store_ptr_n)) */
7932 if (VEC_empty (ddr_p
, may_alias_ddrs
))
7935 for (i
= 0; VEC_iterate (ddr_p
, may_alias_ddrs
, i
, ddr
); i
++)
7937 struct data_reference
*dr_a
, *dr_b
;
7938 gimple dr_group_first_a
, dr_group_first_b
;
7939 tree addr_base_a
, addr_base_b
;
7940 tree segment_length_a
, segment_length_b
;
7941 gimple stmt_a
, stmt_b
;
7944 stmt_a
= DR_STMT (DDR_A (ddr
));
7945 dr_group_first_a
= DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a
));
7946 if (dr_group_first_a
)
7948 stmt_a
= dr_group_first_a
;
7949 dr_a
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a
));
7953 stmt_b
= DR_STMT (DDR_B (ddr
));
7954 dr_group_first_b
= DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b
));
7955 if (dr_group_first_b
)
7957 stmt_b
= dr_group_first_b
;
7958 dr_b
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b
));
7962 vect_create_addr_base_for_vector_ref (stmt_a
, cond_expr_stmt_list
,
7965 vect_create_addr_base_for_vector_ref (stmt_b
, cond_expr_stmt_list
,
7968 segment_length_a
= vect_vfa_segment_size (dr_a
, vect_factor
);
7969 segment_length_b
= vect_vfa_segment_size (dr_b
, vect_factor
);
7971 if (vect_print_dump_info (REPORT_DR_DETAILS
))
7974 "create runtime check for data references ");
7975 print_generic_expr (vect_dump
, DR_REF (dr_a
), TDF_SLIM
);
7976 fprintf (vect_dump
, " and ");
7977 print_generic_expr (vect_dump
, DR_REF (dr_b
), TDF_SLIM
);
7982 fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
7983 fold_build2 (LT_EXPR
, boolean_type_node
,
7984 fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (addr_base_a
),
7988 fold_build2 (LT_EXPR
, boolean_type_node
,
7989 fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (addr_base_b
),
7995 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
7996 *cond_expr
, part_cond_expr
);
7998 *cond_expr
= part_cond_expr
;
8000 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS
))
8001 fprintf (vect_dump
, "created %u versioning for alias checks.\n",
8002 VEC_length (ddr_p
, may_alias_ddrs
));
8006 /* Function vect_loop_versioning.
8008 If the loop has data references that may or may not be aligned or/and
8009 has data reference relations whose independence was not proven then
8010 two versions of the loop need to be generated, one which is vectorized
8011 and one which isn't. A test is then generated to control which of the
8012 loops is executed. The test checks for the alignment of all of the
8013 data references that may or may not be aligned. An additional
8014 sequence of runtime tests is generated for each pairs of DDRs whose
8015 independence was not proven. The vectorized version of loop is
8016 executed only if both alias and alignment tests are passed.
8018 The test generated to check which version of loop is executed
8019 is modified to also check for profitability as indicated by the
8020 cost model initially. */
8023 vect_loop_versioning (loop_vec_info loop_vinfo
)
8025 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8027 tree cond_expr
= NULL_TREE
;
8028 gimple_seq cond_expr_stmt_list
= NULL
;
8029 basic_block condition_bb
;
8030 gimple_stmt_iterator gsi
, cond_exp_gsi
;
8031 basic_block merge_bb
;
8032 basic_block new_exit_bb
;
8034 gimple orig_phi
, new_phi
;
8036 unsigned prob
= 4 * REG_BR_PROB_BASE
/ 5;
8037 gimple_seq gimplify_stmt_list
= NULL
;
8038 tree scalar_loop_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
8039 int min_profitable_iters
= 0;
8042 /* Get profitability threshold for vectorized loop. */
8043 min_profitable_iters
= LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
);
8045 th
= conservative_cost_threshold (loop_vinfo
,
8046 min_profitable_iters
);
8049 build2 (GT_EXPR
, boolean_type_node
, scalar_loop_iters
,
8050 build_int_cst (TREE_TYPE (scalar_loop_iters
), th
));
8052 cond_expr
= force_gimple_operand (cond_expr
, &cond_expr_stmt_list
,
8055 if (VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)))
8056 vect_create_cond_for_align_checks (loop_vinfo
, &cond_expr
,
8057 &cond_expr_stmt_list
);
8059 if (VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
8060 vect_create_cond_for_alias_checks (loop_vinfo
, &cond_expr
,
8061 &cond_expr_stmt_list
);
8064 fold_build2 (NE_EXPR
, boolean_type_node
, cond_expr
, integer_zero_node
);
8066 force_gimple_operand (cond_expr
, &gimplify_stmt_list
, true, NULL_TREE
);
8067 gimple_seq_add_seq (&cond_expr_stmt_list
, gimplify_stmt_list
);
8069 initialize_original_copy_tables ();
8070 nloop
= loop_version (loop
, cond_expr
, &condition_bb
,
8071 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
8072 free_original_copy_tables();
8074 /* Loop versioning violates an assumption we try to maintain during
8075 vectorization - that the loop exit block has a single predecessor.
8076 After versioning, the exit block of both loop versions is the same
8077 basic block (i.e. it has two predecessors). Just in order to simplify
8078 following transformations in the vectorizer, we fix this situation
8079 here by adding a new (empty) block on the exit-edge of the loop,
8080 with the proper loop-exit phis to maintain loop-closed-form. */
8082 merge_bb
= single_exit (loop
)->dest
;
8083 gcc_assert (EDGE_COUNT (merge_bb
->preds
) == 2);
8084 new_exit_bb
= split_edge (single_exit (loop
));
8085 new_exit_e
= single_exit (loop
);
8086 e
= EDGE_SUCC (new_exit_bb
, 0);
8088 for (gsi
= gsi_start_phis (merge_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
8090 orig_phi
= gsi_stmt (gsi
);
8091 new_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi
)),
8093 arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, e
);
8094 add_phi_arg (new_phi
, arg
, new_exit_e
);
8095 SET_PHI_ARG_DEF (orig_phi
, e
->dest_idx
, PHI_RESULT (new_phi
));
8098 /* End loop-exit-fixes after versioning. */
8100 update_ssa (TODO_update_ssa
);
8101 if (cond_expr_stmt_list
)
8103 cond_exp_gsi
= gsi_last_bb (condition_bb
);
8104 gsi_insert_seq_before (&cond_exp_gsi
, cond_expr_stmt_list
, GSI_SAME_STMT
);
8108 /* Remove a group of stores (for SLP or interleaving), free their
8112 vect_remove_stores (gimple first_stmt
)
8114 gimple next
= first_stmt
;
8116 gimple_stmt_iterator next_si
;
8120 /* Free the attached stmt_vec_info and remove the stmt. */
8121 next_si
= gsi_for_stmt (next
);
8122 gsi_remove (&next_si
, true);
8123 tmp
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
8124 free_stmt_vec_info (next
);
8130 /* Vectorize SLP instance tree in postorder. */
8133 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
8134 unsigned int vectorization_factor
)
8137 bool strided_store
, is_store
;
8138 gimple_stmt_iterator si
;
8139 stmt_vec_info stmt_info
;
8140 unsigned int vec_stmts_size
, nunits
, group_size
;
8143 slp_tree loads_node
;
8148 vect_schedule_slp_instance (SLP_TREE_LEFT (node
), instance
,
8149 vectorization_factor
);
8150 vect_schedule_slp_instance (SLP_TREE_RIGHT (node
), instance
,
8151 vectorization_factor
);
8153 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
8154 stmt_info
= vinfo_for_stmt (stmt
);
8155 /* VECTYPE is the type of the destination. */
8156 vectype
= get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt
)));
8157 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
8158 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
8160 /* For each SLP instance calculate number of vector stmts to be created
8161 for the scalar stmts in each node of the SLP tree. Number of vector
8162 elements in one vector iteration is the number of scalar elements in
8163 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
8165 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
8167 /* In case of load permutation we have to allocate vectorized statements for
8168 all the nodes that participate in that permutation. */
8169 if (SLP_INSTANCE_LOAD_PERMUTATION (instance
))
8172 VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, loads_node
);
8175 if (!SLP_TREE_VEC_STMTS (loads_node
))
8177 SLP_TREE_VEC_STMTS (loads_node
) = VEC_alloc (gimple
, heap
,
8179 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node
) = vec_stmts_size
;
8184 if (!SLP_TREE_VEC_STMTS (node
))
8186 SLP_TREE_VEC_STMTS (node
) = VEC_alloc (gimple
, heap
, vec_stmts_size
);
8187 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
8190 if (vect_print_dump_info (REPORT_DETAILS
))
8192 fprintf (vect_dump
, "------>vectorizing SLP node starting from: ");
8193 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
8196 si
= gsi_for_stmt (stmt
);
8197 is_store
= vect_transform_stmt (stmt
, &si
, &strided_store
, node
, instance
);
8200 if (DR_GROUP_FIRST_DR (stmt_info
))
8201 /* If IS_STORE is TRUE, the vectorization of the
8202 interleaving chain was completed - free all the stores in
8204 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info
));
8206 /* FORNOW: SLP originates only from strided stores. */
8212 /* FORNOW: SLP originates only from strided stores. */
8218 vect_schedule_slp (loop_vec_info loop_vinfo
)
8220 VEC (slp_instance
, heap
) *slp_instances
=
8221 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
8222 slp_instance instance
;
8224 bool is_store
= false;
8226 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
8228 /* Schedule the tree of INSTANCE. */
8229 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
8231 LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
8233 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS
)
8234 || vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
8235 fprintf (vect_dump
, "vectorizing stmts using SLP.");
8241 /* Function vect_transform_loop.
8243 The analysis phase has determined that the loop is vectorizable.
8244 Vectorize the loop - created vectorized stmts to replace the scalar
8245 stmts in the loop, and update the loop exit condition. */
8248 vect_transform_loop (loop_vec_info loop_vinfo
)
8250 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8251 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
8252 int nbbs
= loop
->num_nodes
;
8253 gimple_stmt_iterator si
;
8256 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8258 bool slp_scheduled
= false;
8259 unsigned int nunits
;
8261 if (vect_print_dump_info (REPORT_DETAILS
))
8262 fprintf (vect_dump
, "=== vec_transform_loop ===");
8264 if (VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
8265 || VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
8266 vect_loop_versioning (loop_vinfo
);
8268 /* CHECKME: we wouldn't need this if we called update_ssa once
8270 bitmap_zero (vect_memsyms_to_rename
);
8272 /* Peel the loop if there are data refs with unknown alignment.
8273 Only one data ref with unknown store is allowed. */
8275 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
8276 vect_do_peeling_for_alignment (loop_vinfo
);
8278 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
8279 compile time constant), or it is a constant that doesn't divide by the
8280 vectorization factor, then an epilog loop needs to be created.
8281 We therefore duplicate the loop: the original loop will be vectorized,
8282 and will compute the first (n/VF) iterations. The second copy of the loop
8283 will remain scalar and will compute the remaining (n%VF) iterations.
8284 (VF is the vectorization factor). */
8286 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
8287 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
8288 && LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0))
8289 vect_do_peeling_for_loop_bound (loop_vinfo
, &ratio
);
8291 ratio
= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
)),
8292 LOOP_VINFO_INT_NITERS (loop_vinfo
) / vectorization_factor
);
8294 /* 1) Make sure the loop header has exactly two entries
8295 2) Make sure we have a preheader basic block. */
8297 gcc_assert (EDGE_COUNT (loop
->header
->preds
) == 2);
8299 split_edge (loop_preheader_edge (loop
));
8301 /* FORNOW: the vectorizer supports only loops which body consist
8302 of one basic block (header + empty latch). When the vectorizer will
8303 support more involved loop forms, the order by which the BBs are
8304 traversed need to be reconsidered. */
8306 for (i
= 0; i
< nbbs
; i
++)
8308 basic_block bb
= bbs
[i
];
8309 stmt_vec_info stmt_info
;
8312 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
8314 phi
= gsi_stmt (si
);
8315 if (vect_print_dump_info (REPORT_DETAILS
))
8317 fprintf (vect_dump
, "------>vectorizing phi: ");
8318 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
8320 stmt_info
= vinfo_for_stmt (phi
);
8324 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
8325 && !STMT_VINFO_LIVE_P (stmt_info
))
8328 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
))
8329 != (unsigned HOST_WIDE_INT
) vectorization_factor
)
8330 && vect_print_dump_info (REPORT_DETAILS
))
8331 fprintf (vect_dump
, "multiple-types.");
8333 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
8335 if (vect_print_dump_info (REPORT_DETAILS
))
8336 fprintf (vect_dump
, "transform phi.");
8337 vect_transform_stmt (phi
, NULL
, NULL
, NULL
, NULL
);
8341 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
);)
8343 gimple stmt
= gsi_stmt (si
);
8346 if (vect_print_dump_info (REPORT_DETAILS
))
8348 fprintf (vect_dump
, "------>vectorizing statement: ");
8349 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
8352 stmt_info
= vinfo_for_stmt (stmt
);
8354 /* vector stmts created in the outer-loop during vectorization of
8355 stmts in an inner-loop may not have a stmt_info, and do not
8356 need to be vectorized. */
8363 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
8364 && !STMT_VINFO_LIVE_P (stmt_info
))
8370 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
8372 (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
8373 if (!STMT_SLP_TYPE (stmt_info
)
8374 && nunits
!= (unsigned int) vectorization_factor
8375 && vect_print_dump_info (REPORT_DETAILS
))
8376 /* For SLP VF is set according to unrolling factor, and not to
8377 vector size, hence for SLP this print is not valid. */
8378 fprintf (vect_dump
, "multiple-types.");
8380 /* SLP. Schedule all the SLP instances when the first SLP stmt is
8382 if (STMT_SLP_TYPE (stmt_info
))
8386 slp_scheduled
= true;
8388 if (vect_print_dump_info (REPORT_DETAILS
))
8389 fprintf (vect_dump
, "=== scheduling SLP instances ===");
8391 is_store
= vect_schedule_slp (loop_vinfo
);
8393 /* IS_STORE is true if STMT is a store. Stores cannot be of
8394 hybrid SLP type. They are removed in
8395 vect_schedule_slp_instance and their vinfo is destroyed. */
8403 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
8404 if (PURE_SLP_STMT (stmt_info
))
8411 /* -------- vectorize statement ------------ */
8412 if (vect_print_dump_info (REPORT_DETAILS
))
8413 fprintf (vect_dump
, "transform statement.");
8415 strided_store
= false;
8416 is_store
= vect_transform_stmt (stmt
, &si
, &strided_store
, NULL
, NULL
);
8419 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
8421 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
8422 interleaving chain was completed - free all the stores in
8424 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info
));
8425 gsi_remove (&si
, true);
8430 /* Free the attached stmt_vec_info and remove the stmt. */
8431 free_stmt_vec_info (stmt
);
8432 gsi_remove (&si
, true);
8440 slpeel_make_loop_iterate_ntimes (loop
, ratio
);
8442 mark_set_for_renaming (vect_memsyms_to_rename
);
8444 /* The memory tags and pointers in vectorized statements need to
8445 have their SSA forms updated. FIXME, why can't this be delayed
8446 until all the loops have been transformed? */
8447 update_ssa (TODO_update_ssa
);
8449 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS
))
8450 fprintf (vect_dump
, "LOOP VECTORIZED.");
8451 if (loop
->inner
&& vect_print_dump_info (REPORT_VECTORIZED_LOOPS
))
8452 fprintf (vect_dump
, "OUTER LOOP VECTORIZED.");