1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (gimple
, gimple_stmt_iterator
*, bool *,
51 static tree
vect_create_destination_var (tree
, tree
);
52 static tree vect_create_data_ref_ptr
53 (gimple
, struct loop
*, tree
, tree
*, gimple
*, bool, bool *);
54 static tree vect_create_addr_base_for_vector_ref
55 (gimple
, gimple_seq
*, tree
, struct loop
*);
56 static tree
vect_get_new_vect_var (tree
, enum vect_var_kind
, const char *);
57 static tree
vect_get_vec_def_for_operand (tree
, gimple
, tree
*);
58 static tree
vect_init_vector (gimple
, tree
, tree
, gimple_stmt_iterator
*);
59 static void vect_finish_stmt_generation
60 (gimple stmt
, gimple vec_stmt
, gimple_stmt_iterator
*);
61 static bool vect_is_simple_cond (tree
, loop_vec_info
);
62 static void vect_create_epilog_for_reduction
63 (tree
, gimple
, int, enum tree_code
, gimple
);
64 static tree
get_initial_def_for_reduction (gimple
, tree
, tree
*);
66 /* Utility function dealing with loop peeling (not peeling itself). */
67 static void vect_generate_tmps_on_preheader
68 (loop_vec_info
, tree
*, tree
*, tree
*);
69 static tree
vect_build_loop_niters (loop_vec_info
);
70 static void vect_update_ivs_after_vectorizer (loop_vec_info
, tree
, edge
);
71 static tree
vect_gen_niters_for_prolog_loop (loop_vec_info
, tree
);
72 static void vect_update_init_of_dr (struct data_reference
*, tree niters
);
73 static void vect_update_inits_of_drs (loop_vec_info
, tree
);
74 static int vect_min_worthwhile_factor (enum tree_code
);
78 cost_for_stmt (gimple stmt
)
80 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
82 switch (STMT_VINFO_TYPE (stmt_info
))
84 case load_vec_info_type
:
85 return TARG_SCALAR_LOAD_COST
;
86 case store_vec_info_type
:
87 return TARG_SCALAR_STORE_COST
;
88 case op_vec_info_type
:
89 case condition_vec_info_type
:
90 case assignment_vec_info_type
:
91 case reduc_vec_info_type
:
92 case induc_vec_info_type
:
93 case type_promotion_vec_info_type
:
94 case type_demotion_vec_info_type
:
95 case type_conversion_vec_info_type
:
96 case call_vec_info_type
:
97 return TARG_SCALAR_STMT_COST
;
98 case undef_vec_info_type
:
105 /* Function vect_estimate_min_profitable_iters
107 Return the number of iterations required for the vector version of the
108 loop to be profitable relative to the cost of the scalar version of the
111 TODO: Take profile info into account before making vectorization
112 decisions, if available. */
115 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo
)
118 int min_profitable_iters
;
119 int peel_iters_prologue
;
120 int peel_iters_epilogue
;
121 int vec_inside_cost
= 0;
122 int vec_outside_cost
= 0;
123 int scalar_single_iter_cost
= 0;
124 int scalar_outside_cost
= 0;
125 bool runtime_test
= false;
126 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
127 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
128 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
129 int nbbs
= loop
->num_nodes
;
130 int byte_misalign
= LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
);
131 int peel_guard_costs
= 0;
132 int innerloop_iters
= 0, factor
;
133 VEC (slp_instance
, heap
) *slp_instances
;
134 slp_instance instance
;
136 /* Cost model disabled. */
137 if (!flag_vect_cost_model
)
139 if (vect_print_dump_info (REPORT_COST
))
140 fprintf (vect_dump
, "cost model disabled.");
144 /* If the number of iterations is unknown, or the
145 peeling-for-misalignment amount is unknown, we will have to generate
146 a runtime test to test the loop count against the threshold. */
147 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
148 || (byte_misalign
< 0))
151 /* Requires loop versioning tests to handle misalignment. */
153 if (VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)))
155 /* FIXME: Make cost depend on complexity of individual check. */
157 VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
));
158 if (vect_print_dump_info (REPORT_COST
))
159 fprintf (vect_dump
, "cost model: Adding cost of checks for loop "
160 "versioning to treat misalignment.\n");
163 if (VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
165 /* FIXME: Make cost depend on complexity of individual check. */
167 VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
));
168 if (vect_print_dump_info (REPORT_COST
))
169 fprintf (vect_dump
, "cost model: Adding cost of checks for loop "
170 "versioning aliasing.\n");
173 if (VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
174 || VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
176 vec_outside_cost
+= TARG_COND_TAKEN_BRANCH_COST
;
179 /* Count statements in scalar loop. Using this as scalar cost for a single
182 TODO: Add outer loop support.
184 TODO: Consider assigning different costs to different scalar
189 innerloop_iters
= 50; /* FIXME */
191 for (i
= 0; i
< nbbs
; i
++)
193 gimple_stmt_iterator si
;
194 basic_block bb
= bbs
[i
];
196 if (bb
->loop_father
== loop
->inner
)
197 factor
= innerloop_iters
;
201 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
203 gimple stmt
= gsi_stmt (si
);
204 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
205 /* Skip stmts that are not vectorized inside the loop. */
206 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
207 && (!STMT_VINFO_LIVE_P (stmt_info
)
208 || STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
))
210 scalar_single_iter_cost
+= cost_for_stmt (stmt
) * factor
;
211 vec_inside_cost
+= STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
) * factor
;
212 /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
213 some of the "outside" costs are generated inside the outer-loop. */
214 vec_outside_cost
+= STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
);
218 /* Add additional cost for the peeled instructions in prologue and epilogue
221 FORNOW: If we don't know the value of peel_iters for prologue or epilogue
222 at compile-time - we assume it's vf/2 (the worst would be vf-1).
224 TODO: Build an expression that represents peel_iters for prologue and
225 epilogue to be used in a run-time test. */
227 if (byte_misalign
< 0)
229 peel_iters_prologue
= vf
/2;
230 if (vect_print_dump_info (REPORT_COST
))
231 fprintf (vect_dump
, "cost model: "
232 "prologue peel iters set to vf/2.");
234 /* If peeling for alignment is unknown, loop bound of main loop becomes
236 peel_iters_epilogue
= vf
/2;
237 if (vect_print_dump_info (REPORT_COST
))
238 fprintf (vect_dump
, "cost model: "
239 "epilogue peel iters set to vf/2 because "
240 "peeling for alignment is unknown .");
242 /* If peeled iterations are unknown, count a taken branch and a not taken
243 branch per peeled loop. Even if scalar loop iterations are known,
244 vector iterations are not known since peeled prologue iterations are
245 not known. Hence guards remain the same. */
246 peel_guard_costs
+= 2 * (TARG_COND_TAKEN_BRANCH_COST
247 + TARG_COND_NOT_TAKEN_BRANCH_COST
);
254 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
255 int element_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
256 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr
)));
257 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
259 peel_iters_prologue
= nelements
- (byte_misalign
/ element_size
);
262 peel_iters_prologue
= 0;
264 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
266 peel_iters_epilogue
= vf
/2;
267 if (vect_print_dump_info (REPORT_COST
))
268 fprintf (vect_dump
, "cost model: "
269 "epilogue peel iters set to vf/2 because "
270 "loop iterations are unknown .");
272 /* If peeled iterations are known but number of scalar loop
273 iterations are unknown, count a taken branch per peeled loop. */
274 peel_guard_costs
+= 2 * TARG_COND_TAKEN_BRANCH_COST
;
279 int niters
= LOOP_VINFO_INT_NITERS (loop_vinfo
);
280 peel_iters_prologue
= niters
< peel_iters_prologue
?
281 niters
: peel_iters_prologue
;
282 peel_iters_epilogue
= (niters
- peel_iters_prologue
) % vf
;
286 vec_outside_cost
+= (peel_iters_prologue
* scalar_single_iter_cost
)
287 + (peel_iters_epilogue
* scalar_single_iter_cost
)
290 /* FORNOW: The scalar outside cost is incremented in one of the
293 1. The vectorizer checks for alignment and aliasing and generates
294 a condition that allows dynamic vectorization. A cost model
295 check is ANDED with the versioning condition. Hence scalar code
296 path now has the added cost of the versioning check.
298 if (cost > th & versioning_check)
301 Hence run-time scalar is incremented by not-taken branch cost.
303 2. The vectorizer then checks if a prologue is required. If the
304 cost model check was not done before during versioning, it has to
305 be done before the prologue check.
308 prologue = scalar_iters
313 if (prologue == num_iters)
316 Hence the run-time scalar cost is incremented by a taken branch,
317 plus a not-taken branch, plus a taken branch cost.
319 3. The vectorizer then checks if an epilogue is required. If the
320 cost model check was not done before during prologue check, it
321 has to be done with the epilogue check.
327 if (prologue == num_iters)
330 if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
333 Hence the run-time scalar cost should be incremented by 2 taken
336 TODO: The back end may reorder the BBS's differently and reverse
337 conditions/branch directions. Change the estimates below to
338 something more reasonable. */
342 /* Cost model check occurs at versioning. */
343 if (VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
344 || VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
345 scalar_outside_cost
+= TARG_COND_NOT_TAKEN_BRANCH_COST
;
348 /* Cost model occurs at prologue generation. */
349 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
350 scalar_outside_cost
+= 2 * TARG_COND_TAKEN_BRANCH_COST
351 + TARG_COND_NOT_TAKEN_BRANCH_COST
;
352 /* Cost model check occurs at epilogue generation. */
354 scalar_outside_cost
+= 2 * TARG_COND_TAKEN_BRANCH_COST
;
359 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
360 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
362 vec_outside_cost
+= SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance
);
363 vec_inside_cost
+= SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
);
366 /* Calculate number of iterations required to make the vector version
367 profitable, relative to the loop bodies only. The following condition
369 SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
371 SIC = scalar iteration cost, VIC = vector iteration cost,
372 VOC = vector outside cost, VF = vectorization factor,
373 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
374 SOC = scalar outside cost for run time cost model check. */
376 if ((scalar_single_iter_cost
* vf
) > vec_inside_cost
)
378 if (vec_outside_cost
<= 0)
379 min_profitable_iters
= 1;
382 min_profitable_iters
= ((vec_outside_cost
- scalar_outside_cost
) * vf
383 - vec_inside_cost
* peel_iters_prologue
384 - vec_inside_cost
* peel_iters_epilogue
)
385 / ((scalar_single_iter_cost
* vf
)
388 if ((scalar_single_iter_cost
* vf
* min_profitable_iters
)
389 <= ((vec_inside_cost
* min_profitable_iters
)
390 + ((vec_outside_cost
- scalar_outside_cost
) * vf
)))
391 min_profitable_iters
++;
394 /* vector version will never be profitable. */
397 if (vect_print_dump_info (REPORT_COST
))
398 fprintf (vect_dump
, "cost model: vector iteration cost = %d "
399 "is divisible by scalar iteration cost = %d by a factor "
400 "greater than or equal to the vectorization factor = %d .",
401 vec_inside_cost
, scalar_single_iter_cost
, vf
);
405 if (vect_print_dump_info (REPORT_COST
))
407 fprintf (vect_dump
, "Cost model analysis: \n");
408 fprintf (vect_dump
, " Vector inside of loop cost: %d\n",
410 fprintf (vect_dump
, " Vector outside of loop cost: %d\n",
412 fprintf (vect_dump
, " Scalar iteration cost: %d\n",
413 scalar_single_iter_cost
);
414 fprintf (vect_dump
, " Scalar outside cost: %d\n", scalar_outside_cost
);
415 fprintf (vect_dump
, " prologue iterations: %d\n",
416 peel_iters_prologue
);
417 fprintf (vect_dump
, " epilogue iterations: %d\n",
418 peel_iters_epilogue
);
419 fprintf (vect_dump
, " Calculated minimum iters for profitability: %d\n",
420 min_profitable_iters
);
423 min_profitable_iters
=
424 min_profitable_iters
< vf
? vf
: min_profitable_iters
;
426 /* Because the condition we create is:
427 if (niters <= min_profitable_iters)
428 then skip the vectorized loop. */
429 min_profitable_iters
--;
431 if (vect_print_dump_info (REPORT_COST
))
432 fprintf (vect_dump
, " Profitability threshold = %d\n",
433 min_profitable_iters
);
435 return min_profitable_iters
;
439 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
440 functions. Design better to avoid maintenance issues. */
442 /* Function vect_model_reduction_cost.
444 Models cost for a reduction operation, including the vector ops
445 generated within the strip-mine loop, the initial definition before
446 the loop, and the epilogue code that must be generated. */
449 vect_model_reduction_cost (stmt_vec_info stmt_info
, enum tree_code reduc_code
,
456 gimple stmt
, orig_stmt
;
458 enum machine_mode mode
;
459 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
460 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
463 /* Cost of reduction op inside loop. */
464 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
) += ncopies
* TARG_VEC_STMT_COST
;
466 stmt
= STMT_VINFO_STMT (stmt_info
);
468 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
)))
470 case GIMPLE_SINGLE_RHS
:
471 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt
)) == ternary_op
);
472 reduction_op
= TREE_OPERAND (gimple_assign_rhs1 (stmt
), 2);
474 case GIMPLE_UNARY_RHS
:
475 reduction_op
= gimple_assign_rhs1 (stmt
);
477 case GIMPLE_BINARY_RHS
:
478 reduction_op
= gimple_assign_rhs2 (stmt
);
484 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
487 if (vect_print_dump_info (REPORT_COST
))
489 fprintf (vect_dump
, "unsupported data-type ");
490 print_generic_expr (vect_dump
, TREE_TYPE (reduction_op
), TDF_SLIM
);
495 mode
= TYPE_MODE (vectype
);
496 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
499 orig_stmt
= STMT_VINFO_STMT (stmt_info
);
501 code
= gimple_assign_rhs_code (orig_stmt
);
503 /* Add in cost for initial definition. */
504 outer_cost
+= TARG_SCALAR_TO_VEC_COST
;
506 /* Determine cost of epilogue code.
508 We have a reduction operator that will reduce the vector in one statement.
509 Also requires scalar extract. */
511 if (!nested_in_vect_loop_p (loop
, orig_stmt
))
513 if (reduc_code
< NUM_TREE_CODES
)
514 outer_cost
+= TARG_VEC_STMT_COST
+ TARG_VEC_TO_SCALAR_COST
;
517 int vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
519 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt
)));
520 int element_bitsize
= tree_low_cst (bitsize
, 1);
521 int nelements
= vec_size_in_bits
/ element_bitsize
;
523 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
525 /* We have a whole vector shift available. */
526 if (VECTOR_MODE_P (mode
)
527 && optab_handler (optab
, mode
)->insn_code
!= CODE_FOR_nothing
528 && optab_handler (vec_shr_optab
, mode
)->insn_code
!= CODE_FOR_nothing
)
529 /* Final reduction via vector shifts and the reduction operator. Also
530 requires scalar extract. */
531 outer_cost
+= ((exact_log2(nelements
) * 2) * TARG_VEC_STMT_COST
532 + TARG_VEC_TO_SCALAR_COST
);
534 /* Use extracts and reduction op for final reduction. For N elements,
535 we have N extracts and N-1 reduction ops. */
536 outer_cost
+= ((nelements
+ nelements
- 1) * TARG_VEC_STMT_COST
);
540 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
) = outer_cost
;
542 if (vect_print_dump_info (REPORT_COST
))
543 fprintf (vect_dump
, "vect_model_reduction_cost: inside_cost = %d, "
544 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
),
545 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
));
551 /* Function vect_model_induction_cost.
553 Models cost for induction operations. */
556 vect_model_induction_cost (stmt_vec_info stmt_info
, int ncopies
)
558 /* loop cost for vec_loop. */
559 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
) = ncopies
* TARG_VEC_STMT_COST
;
560 /* prologue cost for vec_init and vec_step. */
561 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
) = 2 * TARG_SCALAR_TO_VEC_COST
;
563 if (vect_print_dump_info (REPORT_COST
))
564 fprintf (vect_dump
, "vect_model_induction_cost: inside_cost = %d, "
565 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
),
566 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
));
570 /* Function vect_model_simple_cost.
572 Models cost for simple operations, i.e. those that only emit ncopies of a
573 single op. Right now, this does not account for multiple insns that could
574 be generated for the single vector op. We will handle that shortly. */
577 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
578 enum vect_def_type
*dt
, slp_tree slp_node
)
581 int inside_cost
= 0, outside_cost
= 0;
583 /* The SLP costs were already calculated during SLP tree build. */
584 if (PURE_SLP_STMT (stmt_info
))
587 inside_cost
= ncopies
* TARG_VEC_STMT_COST
;
589 /* FORNOW: Assuming maximum 2 args per stmts. */
590 for (i
= 0; i
< 2; i
++)
592 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_invariant_def
)
593 outside_cost
+= TARG_SCALAR_TO_VEC_COST
;
596 if (vect_print_dump_info (REPORT_COST
))
597 fprintf (vect_dump
, "vect_model_simple_cost: inside_cost = %d, "
598 "outside_cost = %d .", inside_cost
, outside_cost
);
600 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
601 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
602 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
606 /* Function vect_cost_strided_group_size
608 For strided load or store, return the group_size only if it is the first
609 load or store of a group, else return 1. This ensures that group size is
610 only returned once per group. */
613 vect_cost_strided_group_size (stmt_vec_info stmt_info
)
615 gimple first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
617 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
618 return DR_GROUP_SIZE (stmt_info
);
624 /* Function vect_model_store_cost
626 Models cost for stores. In the case of strided accesses, one access
627 has the overhead of the strided access attributed to it. */
630 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
631 enum vect_def_type dt
, slp_tree slp_node
)
634 int inside_cost
= 0, outside_cost
= 0;
636 /* The SLP costs were already calculated during SLP tree build. */
637 if (PURE_SLP_STMT (stmt_info
))
640 if (dt
== vect_constant_def
|| dt
== vect_invariant_def
)
641 outside_cost
= TARG_SCALAR_TO_VEC_COST
;
643 /* Strided access? */
644 if (DR_GROUP_FIRST_DR (stmt_info
) && !slp_node
)
645 group_size
= vect_cost_strided_group_size (stmt_info
);
646 /* Not a strided access. */
650 /* Is this an access in a group of stores, which provide strided access?
651 If so, add in the cost of the permutes. */
654 /* Uses a high and low interleave operation for each needed permute. */
655 inside_cost
= ncopies
* exact_log2(group_size
) * group_size
656 * TARG_VEC_STMT_COST
;
658 if (vect_print_dump_info (REPORT_COST
))
659 fprintf (vect_dump
, "vect_model_store_cost: strided group_size = %d .",
664 /* Costs of the stores. */
665 inside_cost
+= ncopies
* TARG_VEC_STORE_COST
;
667 if (vect_print_dump_info (REPORT_COST
))
668 fprintf (vect_dump
, "vect_model_store_cost: inside_cost = %d, "
669 "outside_cost = %d .", inside_cost
, outside_cost
);
671 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
672 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
673 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
677 /* Function vect_model_load_cost
679 Models cost for loads. In the case of strided accesses, the last access
680 has the overhead of the strided access attributed to it. Since unaligned
681 accesses are supported for loads, we also account for the costs of the
682 access scheme chosen. */
685 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
, slp_tree slp_node
)
689 int alignment_support_cheme
;
691 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
692 int inside_cost
= 0, outside_cost
= 0;
694 /* The SLP costs were already calculated during SLP tree build. */
695 if (PURE_SLP_STMT (stmt_info
))
698 /* Strided accesses? */
699 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
700 if (first_stmt
&& !slp_node
)
702 group_size
= vect_cost_strided_group_size (stmt_info
);
703 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
705 /* Not a strided access. */
712 alignment_support_cheme
= vect_supportable_dr_alignment (first_dr
);
714 /* Is this an access in a group of loads providing strided access?
715 If so, add in the cost of the permutes. */
718 /* Uses an even and odd extract operations for each needed permute. */
719 inside_cost
= ncopies
* exact_log2(group_size
) * group_size
720 * TARG_VEC_STMT_COST
;
722 if (vect_print_dump_info (REPORT_COST
))
723 fprintf (vect_dump
, "vect_model_load_cost: strided group_size = %d .",
728 /* The loads themselves. */
729 switch (alignment_support_cheme
)
733 inside_cost
+= ncopies
* TARG_VEC_LOAD_COST
;
735 if (vect_print_dump_info (REPORT_COST
))
736 fprintf (vect_dump
, "vect_model_load_cost: aligned.");
740 case dr_unaligned_supported
:
742 /* Here, we assign an additional cost for the unaligned load. */
743 inside_cost
+= ncopies
* TARG_VEC_UNALIGNED_LOAD_COST
;
745 if (vect_print_dump_info (REPORT_COST
))
746 fprintf (vect_dump
, "vect_model_load_cost: unaligned supported by "
751 case dr_explicit_realign
:
753 inside_cost
+= ncopies
* (2*TARG_VEC_LOAD_COST
+ TARG_VEC_STMT_COST
);
755 /* FIXME: If the misalignment remains fixed across the iterations of
756 the containing loop, the following cost should be added to the
758 if (targetm
.vectorize
.builtin_mask_for_load
)
759 inside_cost
+= TARG_VEC_STMT_COST
;
763 case dr_explicit_realign_optimized
:
765 if (vect_print_dump_info (REPORT_COST
))
766 fprintf (vect_dump
, "vect_model_load_cost: unaligned software "
769 /* Unaligned software pipeline has a load of an address, an initial
770 load, and possibly a mask operation to "prime" the loop. However,
771 if this is an access in a group of loads, which provide strided
772 access, then the above cost should only be considered for one
773 access in the group. Inside the loop, there is a load op
774 and a realignment op. */
776 if ((!DR_GROUP_FIRST_DR (stmt_info
)) || group_size
> 1 || slp_node
)
778 outside_cost
= 2*TARG_VEC_STMT_COST
;
779 if (targetm
.vectorize
.builtin_mask_for_load
)
780 outside_cost
+= TARG_VEC_STMT_COST
;
783 inside_cost
+= ncopies
* (TARG_VEC_LOAD_COST
+ TARG_VEC_STMT_COST
);
792 if (vect_print_dump_info (REPORT_COST
))
793 fprintf (vect_dump
, "vect_model_load_cost: inside_cost = %d, "
794 "outside_cost = %d .", inside_cost
, outside_cost
);
796 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
797 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
798 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
802 /* Function vect_get_new_vect_var.
804 Returns a name for a new variable. The current naming scheme appends the
805 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
806 the name of vectorizer generated variables, and appends that to NAME if
810 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
817 case vect_simple_var
:
820 case vect_scalar_var
:
823 case vect_pointer_var
:
832 char* tmp
= concat (prefix
, name
, NULL
);
833 new_vect_var
= create_tmp_var (type
, tmp
);
837 new_vect_var
= create_tmp_var (type
, prefix
);
839 /* Mark vector typed variable as a gimple register variable. */
840 if (TREE_CODE (type
) == VECTOR_TYPE
)
841 DECL_GIMPLE_REG_P (new_vect_var
) = true;
847 /* Function vect_create_addr_base_for_vector_ref.
849 Create an expression that computes the address of the first memory location
850 that will be accessed for a data reference.
853 STMT: The statement containing the data reference.
854 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
855 OFFSET: Optional. If supplied, it is be added to the initial address.
856 LOOP: Specify relative to which loop-nest should the address be computed.
857 For example, when the dataref is in an inner-loop nested in an
858 outer-loop that is now being vectorized, LOOP can be either the
859 outer-loop, or the inner-loop. The first memory location accessed
860 by the following dataref ('in' points to short):
867 if LOOP=i_loop: &in (relative to i_loop)
868 if LOOP=j_loop: &in+i*2B (relative to j_loop)
871 1. Return an SSA_NAME whose value is the address of the memory location of
872 the first vector of the data reference.
873 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
874 these statement(s) which define the returned SSA_NAME.
876 FORNOW: We are only handling array accesses with step 1. */
879 vect_create_addr_base_for_vector_ref (gimple stmt
,
880 gimple_seq
*new_stmt_list
,
884 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
885 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
886 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
887 tree data_ref_base
= unshare_expr (DR_BASE_ADDRESS (dr
));
889 tree data_ref_base_var
;
891 tree addr_base
, addr_expr
;
893 gimple_seq seq
= NULL
;
894 tree base_offset
= unshare_expr (DR_OFFSET (dr
));
895 tree init
= unshare_expr (DR_INIT (dr
));
896 tree vect_ptr_type
, addr_expr2
;
897 tree step
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
)));
900 if (loop
!= containing_loop
)
902 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
903 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
905 gcc_assert (nested_in_vect_loop_p (loop
, stmt
));
907 data_ref_base
= unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info
));
908 base_offset
= unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info
));
909 init
= unshare_expr (STMT_VINFO_DR_INIT (stmt_info
));
912 /* Create data_ref_base */
913 base_name
= build_fold_indirect_ref (data_ref_base
);
914 data_ref_base_var
= create_tmp_var (TREE_TYPE (data_ref_base
), "batmp");
915 add_referenced_var (data_ref_base_var
);
916 data_ref_base
= force_gimple_operand (data_ref_base
, &seq
, true,
918 gimple_seq_add_seq (new_stmt_list
, seq
);
920 /* Create base_offset */
921 base_offset
= size_binop (PLUS_EXPR
, base_offset
, init
);
922 base_offset
= fold_convert (sizetype
, base_offset
);
923 dest
= create_tmp_var (TREE_TYPE (base_offset
), "base_off");
924 add_referenced_var (dest
);
925 base_offset
= force_gimple_operand (base_offset
, &seq
, true, dest
);
926 gimple_seq_add_seq (new_stmt_list
, seq
);
930 tree tmp
= create_tmp_var (sizetype
, "offset");
932 add_referenced_var (tmp
);
933 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (offset
), offset
, step
);
934 base_offset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base_offset
),
935 base_offset
, offset
);
936 base_offset
= force_gimple_operand (base_offset
, &seq
, false, tmp
);
937 gimple_seq_add_seq (new_stmt_list
, seq
);
940 /* base + base_offset */
941 addr_base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (data_ref_base
),
942 data_ref_base
, base_offset
);
944 vect_ptr_type
= build_pointer_type (STMT_VINFO_VECTYPE (stmt_info
));
946 /* addr_expr = addr_base */
947 addr_expr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
948 get_name (base_name
));
949 add_referenced_var (addr_expr
);
950 vec_stmt
= fold_convert (vect_ptr_type
, addr_base
);
951 addr_expr2
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
952 get_name (base_name
));
953 add_referenced_var (addr_expr2
);
954 vec_stmt
= force_gimple_operand (vec_stmt
, &seq
, false, addr_expr2
);
955 gimple_seq_add_seq (new_stmt_list
, seq
);
957 if (vect_print_dump_info (REPORT_DETAILS
))
959 fprintf (vect_dump
, "created ");
960 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
966 /* Function vect_create_data_ref_ptr.
968 Create a new pointer to vector type (vp), that points to the first location
969 accessed in the loop by STMT, along with the def-use update chain to
970 appropriately advance the pointer through the loop iterations. Also set
971 aliasing information for the pointer. This vector pointer is used by the
972 callers to this function to create a memory reference expression for vector
976 1. STMT: a stmt that references memory. Expected to be of the form
977 GIMPLE_ASSIGN <name, data-ref> or
978 GIMPLE_ASSIGN <data-ref, name>.
979 2. AT_LOOP: the loop where the vector memref is to be created.
980 3. OFFSET (optional): an offset to be added to the initial address accessed
981 by the data-ref in STMT.
982 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
983 pointing to the initial address.
986 1. Declare a new ptr to vector_type, and have it point to the base of the
987 data reference (initial addressed accessed by the data reference).
988 For example, for vector of type V8HI, the following code is generated:
991 vp = (v8hi *)initial_address;
993 if OFFSET is not supplied:
994 initial_address = &a[init];
995 if OFFSET is supplied:
996 initial_address = &a[init + OFFSET];
998 Return the initial_address in INITIAL_ADDRESS.
1000 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
1001 update the pointer in each iteration of the loop.
1003 Return the increment stmt that updates the pointer in PTR_INCR.
1005 3. Set INV_P to true if the access pattern of the data reference in the
1006 vectorized loop is invariant. Set it to false otherwise.
1008 4. Return the pointer. */
1011 vect_create_data_ref_ptr (gimple stmt
, struct loop
*at_loop
,
1012 tree offset
, tree
*initial_address
, gimple
*ptr_incr
,
1013 bool only_init
, bool *inv_p
)
1016 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1017 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1018 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1019 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1020 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
1021 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1027 gimple_seq new_stmt_list
= NULL
;
1031 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1033 gimple_stmt_iterator incr_gsi
;
1035 tree indx_before_incr
, indx_after_incr
;
1039 /* Check the step (evolution) of the load in LOOP, and record
1040 whether it's invariant. */
1041 if (nested_in_vect_loop
)
1042 step
= STMT_VINFO_DR_STEP (stmt_info
);
1044 step
= DR_STEP (STMT_VINFO_DATA_REF (stmt_info
));
1046 if (tree_int_cst_compare (step
, size_zero_node
) == 0)
1051 /* Create an expression for the first address accessed by this load
1053 base_name
= build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr
)));
1055 if (vect_print_dump_info (REPORT_DETAILS
))
1057 tree data_ref_base
= base_name
;
1058 fprintf (vect_dump
, "create vector-pointer variable to type: ");
1059 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
1060 if (TREE_CODE (data_ref_base
) == VAR_DECL
)
1061 fprintf (vect_dump
, " vectorizing a one dimensional array ref: ");
1062 else if (TREE_CODE (data_ref_base
) == ARRAY_REF
)
1063 fprintf (vect_dump
, " vectorizing a multidimensional array ref: ");
1064 else if (TREE_CODE (data_ref_base
) == COMPONENT_REF
)
1065 fprintf (vect_dump
, " vectorizing a record based array ref: ");
1066 else if (TREE_CODE (data_ref_base
) == SSA_NAME
)
1067 fprintf (vect_dump
, " vectorizing a pointer ref: ");
1068 print_generic_expr (vect_dump
, base_name
, TDF_SLIM
);
1071 /** (1) Create the new vector-pointer variable: **/
1072 vect_ptr_type
= build_pointer_type (vectype
);
1074 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
1075 get_name (base_name
));
1076 add_referenced_var (vect_ptr
);
1078 /** (2) Add aliasing information to the new vector-pointer:
1079 (The points-to info (DR_PTR_INFO) may be defined later.) **/
1081 tag
= DR_SYMBOL_TAG (dr
);
1084 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
1085 tag must be created with tag added to its may alias list. */
1087 new_type_alias (vect_ptr
, tag
, DR_REF (dr
));
1089 set_symbol_mem_tag (vect_ptr
, tag
);
1091 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
1092 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
1093 def-use update cycles for the pointer: One relative to the outer-loop
1094 (LOOP), which is what steps (3) and (4) below do. The other is relative
1095 to the inner-loop (which is the inner-most loop containing the dataref),
1096 and this is done be step (5) below.
1098 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
1099 inner-most loop, and so steps (3),(4) work the same, and step (5) is
1100 redundant. Steps (3),(4) create the following:
1103 LOOP: vp1 = phi(vp0,vp2)
1109 If there is an inner-loop nested in loop, then step (5) will also be
1110 applied, and an additional update in the inner-loop will be created:
1113 LOOP: vp1 = phi(vp0,vp2)
1115 inner: vp3 = phi(vp1,vp4)
1116 vp4 = vp3 + inner_step
1122 /** (3) Calculate the initial address the vector-pointer, and set
1123 the vector-pointer to point to it before the loop: **/
1125 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1127 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
1129 pe
= loop_preheader_edge (loop
);
1132 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, new_stmt_list
);
1133 gcc_assert (!new_bb
);
1136 *initial_address
= new_temp
;
1138 /* Create: p = (vectype *) initial_base */
1139 vec_stmt
= gimple_build_assign (vect_ptr
,
1140 fold_convert (vect_ptr_type
, new_temp
));
1141 vect_ptr_init
= make_ssa_name (vect_ptr
, vec_stmt
);
1142 gimple_assign_set_lhs (vec_stmt
, vect_ptr_init
);
1143 new_bb
= gsi_insert_on_edge_immediate (pe
, vec_stmt
);
1144 gcc_assert (!new_bb
);
1147 /** (4) Handle the updating of the vector-pointer inside the loop.
1148 This is needed when ONLY_INIT is false, and also when AT_LOOP
1149 is the inner-loop nested in LOOP (during outer-loop vectorization).
1152 if (only_init
&& at_loop
== loop
) /* No update in loop is required. */
1154 /* Copy the points-to information if it exists. */
1155 if (DR_PTR_INFO (dr
))
1156 duplicate_ssa_name_ptr_info (vect_ptr_init
, DR_PTR_INFO (dr
));
1157 vptr
= vect_ptr_init
;
1161 /* The step of the vector pointer is the Vector Size. */
1162 tree step
= TYPE_SIZE_UNIT (vectype
);
1163 /* One exception to the above is when the scalar step of the load in
1164 LOOP is zero. In this case the step here is also zero. */
1166 step
= size_zero_node
;
1168 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
1170 create_iv (vect_ptr_init
,
1171 fold_convert (vect_ptr_type
, step
),
1172 NULL_TREE
, loop
, &incr_gsi
, insert_after
,
1173 &indx_before_incr
, &indx_after_incr
);
1174 incr
= gsi_stmt (incr_gsi
);
1175 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
1177 /* Copy the points-to information if it exists. */
1178 if (DR_PTR_INFO (dr
))
1180 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
1181 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
1183 merge_alias_info (vect_ptr_init
, indx_before_incr
);
1184 merge_alias_info (vect_ptr_init
, indx_after_incr
);
1188 vptr
= indx_before_incr
;
1191 if (!nested_in_vect_loop
|| only_init
)
1195 /** (5) Handle the updating of the vector-pointer inside the inner-loop
1196 nested in LOOP, if exists: **/
1198 gcc_assert (nested_in_vect_loop
);
1201 standard_iv_increment_position (containing_loop
, &incr_gsi
,
1203 create_iv (vptr
, fold_convert (vect_ptr_type
, DR_STEP (dr
)), NULL_TREE
,
1204 containing_loop
, &incr_gsi
, insert_after
, &indx_before_incr
,
1206 incr
= gsi_stmt (incr_gsi
);
1207 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
1209 /* Copy the points-to information if it exists. */
1210 if (DR_PTR_INFO (dr
))
1212 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
1213 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
1215 merge_alias_info (vect_ptr_init
, indx_before_incr
);
1216 merge_alias_info (vect_ptr_init
, indx_after_incr
);
1220 return indx_before_incr
;
1227 /* Function bump_vector_ptr
1229 Increment a pointer (to a vector type) by vector-size. If requested,
1230 i.e. if PTR-INCR is given, then also connect the new increment stmt
1231 to the existing def-use update-chain of the pointer, by modifying
1232 the PTR_INCR as illustrated below:
1234 The pointer def-use update-chain before this function:
1235 DATAREF_PTR = phi (p_0, p_2)
1237 PTR_INCR: p_2 = DATAREF_PTR + step
1239 The pointer def-use update-chain after this function:
1240 DATAREF_PTR = phi (p_0, p_2)
1242 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
1244 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
1247 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
1249 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
1250 the loop. The increment amount across iterations is expected
1252 BSI - location where the new update stmt is to be placed.
1253 STMT - the original scalar memory-access stmt that is being vectorized.
1254 BUMP - optional. The offset by which to bump the pointer. If not given,
1255 the offset is assumed to be vector_size.
1257 Output: Return NEW_DATAREF_PTR as illustrated above.
1262 bump_vector_ptr (tree dataref_ptr
, gimple ptr_incr
, gimple_stmt_iterator
*gsi
,
1263 gimple stmt
, tree bump
)
1265 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1266 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1267 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1268 tree ptr_var
= SSA_NAME_VAR (dataref_ptr
);
1269 tree update
= TYPE_SIZE_UNIT (vectype
);
1272 use_operand_p use_p
;
1273 tree new_dataref_ptr
;
1278 incr_stmt
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, ptr_var
,
1279 dataref_ptr
, update
);
1280 new_dataref_ptr
= make_ssa_name (ptr_var
, incr_stmt
);
1281 gimple_assign_set_lhs (incr_stmt
, new_dataref_ptr
);
1282 vect_finish_stmt_generation (stmt
, incr_stmt
, gsi
);
1284 /* Copy the points-to information if it exists. */
1285 if (DR_PTR_INFO (dr
))
1286 duplicate_ssa_name_ptr_info (new_dataref_ptr
, DR_PTR_INFO (dr
));
1287 merge_alias_info (new_dataref_ptr
, dataref_ptr
);
1290 return new_dataref_ptr
;
1292 /* Update the vector-pointer's cross-iteration increment. */
1293 FOR_EACH_SSA_USE_OPERAND (use_p
, ptr_incr
, iter
, SSA_OP_USE
)
1295 tree use
= USE_FROM_PTR (use_p
);
1297 if (use
== dataref_ptr
)
1298 SET_USE (use_p
, new_dataref_ptr
);
1300 gcc_assert (tree_int_cst_compare (use
, update
) == 0);
1303 return new_dataref_ptr
;
1307 /* Function vect_create_destination_var.
1309 Create a new temporary of type VECTYPE. */
1312 vect_create_destination_var (tree scalar_dest
, tree vectype
)
1315 const char *new_name
;
1317 enum vect_var_kind kind
;
1319 kind
= vectype
? vect_simple_var
: vect_scalar_var
;
1320 type
= vectype
? vectype
: TREE_TYPE (scalar_dest
);
1322 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
1324 new_name
= get_name (scalar_dest
);
1327 vec_dest
= vect_get_new_vect_var (type
, kind
, new_name
);
1328 add_referenced_var (vec_dest
);
1334 /* Function vect_init_vector.
1336 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1337 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1338 is not NULL. Otherwise, place the initialization at the loop preheader.
1339 Return the DEF of INIT_STMT.
1340 It will be used in the vectorization of STMT. */
1343 vect_init_vector (gimple stmt
, tree vector_var
, tree vector_type
,
1344 gimple_stmt_iterator
*gsi
)
1346 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1354 new_var
= vect_get_new_vect_var (vector_type
, vect_simple_var
, "cst_");
1355 add_referenced_var (new_var
);
1356 init_stmt
= gimple_build_assign (new_var
, vector_var
);
1357 new_temp
= make_ssa_name (new_var
, init_stmt
);
1358 gimple_assign_set_lhs (init_stmt
, new_temp
);
1361 vect_finish_stmt_generation (stmt
, init_stmt
, gsi
);
1364 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1365 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1367 if (nested_in_vect_loop_p (loop
, stmt
))
1369 pe
= loop_preheader_edge (loop
);
1370 new_bb
= gsi_insert_on_edge_immediate (pe
, init_stmt
);
1371 gcc_assert (!new_bb
);
1374 if (vect_print_dump_info (REPORT_DETAILS
))
1376 fprintf (vect_dump
, "created new init_stmt: ");
1377 print_gimple_stmt (vect_dump
, init_stmt
, 0, TDF_SLIM
);
1380 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1385 /* For constant and loop invariant defs of SLP_NODE this function returns
1386 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1387 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1391 vect_get_constant_vectors (slp_tree slp_node
, VEC(tree
,heap
) **vec_oprnds
,
1392 unsigned int op_num
)
1394 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
1395 gimple stmt
= VEC_index (gimple
, stmts
, 0);
1396 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1397 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1398 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1401 int j
, number_of_places_left_in_vector
;
1404 int group_size
= VEC_length (gimple
, stmts
);
1405 unsigned int vec_num
, i
;
1406 int number_of_copies
= 1;
1407 bool is_store
= false;
1408 unsigned int number_of_vectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1409 VEC (tree
, heap
) *voprnds
= VEC_alloc (tree
, heap
, number_of_vectors
);
1412 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
1415 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1416 created vectors. It is greater than 1 if unrolling is performed.
1418 For example, we have two scalar operands, s1 and s2 (e.g., group of
1419 strided accesses of size two), while NUNITS is four (i.e., four scalars
1420 of this type can be packed in a vector). The output vector will contain
1421 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1424 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1425 containing the operands.
1427 For example, NUNITS is four as before, and the group size is 8
1428 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1429 {s5, s6, s7, s8}. */
1431 number_of_copies
= least_common_multiple (nunits
, group_size
) / group_size
;
1433 number_of_places_left_in_vector
= nunits
;
1435 for (j
= 0; j
< number_of_copies
; j
++)
1437 for (i
= group_size
- 1; VEC_iterate (gimple
, stmts
, i
, stmt
); i
--)
1440 op
= gimple_assign_rhs1 (stmt
);
1442 op
= gimple_op (stmt
, op_num
+ 1);
1443 if (!CONSTANT_CLASS_P (op
))
1446 /* Create 'vect_ = {op0,op1,...,opn}'. */
1447 t
= tree_cons (NULL_TREE
, op
, t
);
1449 number_of_places_left_in_vector
--;
1451 if (number_of_places_left_in_vector
== 0)
1453 number_of_places_left_in_vector
= nunits
;
1455 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1456 gcc_assert (vector_type
);
1458 vec_cst
= build_vector (vector_type
, t
);
1460 vec_cst
= build_constructor_from_list (vector_type
, t
);
1462 VEC_quick_push (tree
, voprnds
,
1463 vect_init_vector (stmt
, vec_cst
, vector_type
,
1470 /* Since the vectors are created in the reverse order, we should invert
1472 vec_num
= VEC_length (tree
, voprnds
);
1473 for (j
= vec_num
- 1; j
>= 0; j
--)
1475 vop
= VEC_index (tree
, voprnds
, j
);
1476 VEC_quick_push (tree
, *vec_oprnds
, vop
);
1479 VEC_free (tree
, heap
, voprnds
);
1481 /* In case that VF is greater than the unrolling factor needed for the SLP
1482 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1483 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1484 to replicate the vectors. */
1485 while (number_of_vectors
> VEC_length (tree
, *vec_oprnds
))
1487 for (i
= 0; VEC_iterate (tree
, *vec_oprnds
, i
, vop
) && i
< vec_num
; i
++)
1488 VEC_quick_push (tree
, *vec_oprnds
, vop
);
1493 /* Get vectorized definitions from SLP_NODE that contains corresponding
1494 vectorized def-stmts. */
1497 vect_get_slp_vect_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds
)
1500 gimple vec_def_stmt
;
1503 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
));
1506 VEC_iterate (gimple
, SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
);
1509 gcc_assert (vec_def_stmt
);
1510 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
1511 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
1516 /* Get vectorized definitions for SLP_NODE.
1517 If the scalar definitions are loop invariants or constants, collect them and
1518 call vect_get_constant_vectors() to create vector stmts.
1519 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1520 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1521 vect_get_slp_vect_defs() to retrieve them.
1522 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1523 the right node. This is used when the second operand must remain scalar. */
1526 vect_get_slp_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds0
,
1527 VEC (tree
,heap
) **vec_oprnds1
)
1530 enum tree_code code
;
1531 int number_of_vects
;
1533 /* The number of vector defs is determined by the number of vector statements
1534 in the node from which we get those statements. */
1535 if (SLP_TREE_LEFT (slp_node
))
1536 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node
));
1538 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1540 /* Allocate memory for vectorized defs. */
1541 *vec_oprnds0
= VEC_alloc (tree
, heap
, number_of_vects
);
1543 /* SLP_NODE corresponds either to a group of stores or to a group of
1544 unary/binary operations. We don't call this function for loads. */
1545 if (SLP_TREE_LEFT (slp_node
))
1546 /* The defs are already vectorized. */
1547 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node
), vec_oprnds0
);
1549 /* Build vectors from scalar defs. */
1550 vect_get_constant_vectors (slp_node
, vec_oprnds0
, 0);
1552 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
1553 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
)))
1554 /* Since we don't call this function with loads, this is a group of
1558 code
= gimple_assign_rhs_code (first_stmt
);
1559 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
|| !vec_oprnds1
)
1562 /* The number of vector defs is determined by the number of vector statements
1563 in the node from which we get those statements. */
1564 if (SLP_TREE_RIGHT (slp_node
))
1565 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node
));
1567 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1569 *vec_oprnds1
= VEC_alloc (tree
, heap
, number_of_vects
);
1571 if (SLP_TREE_RIGHT (slp_node
))
1572 /* The defs are already vectorized. */
1573 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node
), vec_oprnds1
);
1575 /* Build vectors from scalar defs. */
1576 vect_get_constant_vectors (slp_node
, vec_oprnds1
, 1);
1580 /* Function get_initial_def_for_induction
1583 STMT - a stmt that performs an induction operation in the loop.
1584 IV_PHI - the initial value of the induction variable
1587 Return a vector variable, initialized with the first VF values of
1588 the induction variable. E.g., for an iv with IV_PHI='X' and
1589 evolution S, for a vector of 4 units, we want to return:
1590 [X, X + S, X + 2*S, X + 3*S]. */
1593 get_initial_def_for_induction (gimple iv_phi
)
1595 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (iv_phi
);
1596 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1597 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1598 tree scalar_type
= TREE_TYPE (gimple_phi_result (iv_phi
));
1601 edge pe
= loop_preheader_edge (loop
);
1602 struct loop
*iv_loop
;
1604 tree vec
, vec_init
, vec_step
, t
;
1608 gimple init_stmt
, induction_phi
, new_stmt
;
1609 tree induc_def
, vec_def
, vec_dest
;
1610 tree init_expr
, step_expr
;
1611 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1616 stmt_vec_info phi_info
= vinfo_for_stmt (iv_phi
);
1617 bool nested_in_vect_loop
= false;
1618 gimple_seq stmts
= NULL
;
1619 imm_use_iterator imm_iter
;
1620 use_operand_p use_p
;
1624 gimple_stmt_iterator si
;
1625 basic_block bb
= gimple_bb (iv_phi
);
1627 vectype
= get_vectype_for_scalar_type (scalar_type
);
1628 gcc_assert (vectype
);
1629 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1630 ncopies
= vf
/ nunits
;
1632 gcc_assert (phi_info
);
1633 gcc_assert (ncopies
>= 1);
1635 /* Find the first insertion point in the BB. */
1636 si
= gsi_after_labels (bb
);
1638 if (INTEGRAL_TYPE_P (scalar_type
) || POINTER_TYPE_P (scalar_type
))
1639 step_expr
= build_int_cst (scalar_type
, 0);
1641 step_expr
= build_real (scalar_type
, dconst0
);
1643 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
1644 if (nested_in_vect_loop_p (loop
, iv_phi
))
1646 nested_in_vect_loop
= true;
1647 iv_loop
= loop
->inner
;
1651 gcc_assert (iv_loop
== (gimple_bb (iv_phi
))->loop_father
);
1653 latch_e
= loop_latch_edge (iv_loop
);
1654 loop_arg
= PHI_ARG_DEF_FROM_EDGE (iv_phi
, latch_e
);
1656 access_fn
= analyze_scalar_evolution (iv_loop
, PHI_RESULT (iv_phi
));
1657 gcc_assert (access_fn
);
1658 ok
= vect_is_simple_iv_evolution (iv_loop
->num
, access_fn
,
1659 &init_expr
, &step_expr
);
1661 pe
= loop_preheader_edge (iv_loop
);
1663 /* Create the vector that holds the initial_value of the induction. */
1664 if (nested_in_vect_loop
)
1666 /* iv_loop is nested in the loop to be vectorized. init_expr had already
1667 been created during vectorization of previous stmts; We obtain it from
1668 the STMT_VINFO_VEC_STMT of the defining stmt. */
1669 tree iv_def
= PHI_ARG_DEF_FROM_EDGE (iv_phi
, loop_preheader_edge (iv_loop
));
1670 vec_init
= vect_get_vec_def_for_operand (iv_def
, iv_phi
, NULL
);
1674 /* iv_loop is the loop to be vectorized. Create:
1675 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
1676 new_var
= vect_get_new_vect_var (scalar_type
, vect_scalar_var
, "var_");
1677 add_referenced_var (new_var
);
1679 new_name
= force_gimple_operand (init_expr
, &stmts
, false, new_var
);
1682 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
1683 gcc_assert (!new_bb
);
1687 t
= tree_cons (NULL_TREE
, init_expr
, t
);
1688 for (i
= 1; i
< nunits
; i
++)
1690 /* Create: new_name_i = new_name + step_expr */
1691 enum tree_code code
= POINTER_TYPE_P (scalar_type
)
1692 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
1693 init_stmt
= gimple_build_assign_with_ops (code
, new_var
,
1694 new_name
, step_expr
);
1695 new_name
= make_ssa_name (new_var
, init_stmt
);
1696 gimple_assign_set_lhs (init_stmt
, new_name
);
1698 new_bb
= gsi_insert_on_edge_immediate (pe
, init_stmt
);
1699 gcc_assert (!new_bb
);
1701 if (vect_print_dump_info (REPORT_DETAILS
))
1703 fprintf (vect_dump
, "created new init_stmt: ");
1704 print_gimple_stmt (vect_dump
, init_stmt
, 0, TDF_SLIM
);
1706 t
= tree_cons (NULL_TREE
, new_name
, t
);
1708 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
1709 vec
= build_constructor_from_list (vectype
, nreverse (t
));
1710 vec_init
= vect_init_vector (iv_phi
, vec
, vectype
, NULL
);
1714 /* Create the vector that holds the step of the induction. */
1715 if (nested_in_vect_loop
)
1716 /* iv_loop is nested in the loop to be vectorized. Generate:
1717 vec_step = [S, S, S, S] */
1718 new_name
= step_expr
;
1721 /* iv_loop is the loop to be vectorized. Generate:
1722 vec_step = [VF*S, VF*S, VF*S, VF*S] */
1723 expr
= build_int_cst (scalar_type
, vf
);
1724 new_name
= fold_build2 (MULT_EXPR
, scalar_type
, expr
, step_expr
);
1728 for (i
= 0; i
< nunits
; i
++)
1729 t
= tree_cons (NULL_TREE
, unshare_expr (new_name
), t
);
1730 gcc_assert (CONSTANT_CLASS_P (new_name
));
1731 vec
= build_vector (vectype
, t
);
1732 vec_step
= vect_init_vector (iv_phi
, vec
, vectype
, NULL
);
1735 /* Create the following def-use cycle:
1740 vec_iv = PHI <vec_init, vec_loop>
1744 vec_loop = vec_iv + vec_step; */
1746 /* Create the induction-phi that defines the induction-operand. */
1747 vec_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, "vec_iv_");
1748 add_referenced_var (vec_dest
);
1749 induction_phi
= create_phi_node (vec_dest
, iv_loop
->header
);
1750 set_vinfo_for_stmt (induction_phi
,
1751 new_stmt_vec_info (induction_phi
, loop_vinfo
));
1752 induc_def
= PHI_RESULT (induction_phi
);
1754 /* Create the iv update inside the loop */
1755 new_stmt
= gimple_build_assign_with_ops (PLUS_EXPR
, vec_dest
,
1756 induc_def
, vec_step
);
1757 vec_def
= make_ssa_name (vec_dest
, new_stmt
);
1758 gimple_assign_set_lhs (new_stmt
, vec_def
);
1759 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
1760 set_vinfo_for_stmt (new_stmt
, new_stmt_vec_info (new_stmt
, loop_vinfo
));
1762 /* Set the arguments of the phi node: */
1763 add_phi_arg (induction_phi
, vec_init
, pe
);
1764 add_phi_arg (induction_phi
, vec_def
, loop_latch_edge (iv_loop
));
1767 /* In case that vectorization factor (VF) is bigger than the number
1768 of elements that we can fit in a vectype (nunits), we have to generate
1769 more than one vector stmt - i.e - we need to "unroll" the
1770 vector stmt by a factor VF/nunits. For more details see documentation
1771 in vectorizable_operation. */
1775 stmt_vec_info prev_stmt_vinfo
;
1776 /* FORNOW. This restriction should be relaxed. */
1777 gcc_assert (!nested_in_vect_loop
);
1779 /* Create the vector that holds the step of the induction. */
1780 expr
= build_int_cst (scalar_type
, nunits
);
1781 new_name
= fold_build2 (MULT_EXPR
, scalar_type
, expr
, step_expr
);
1783 for (i
= 0; i
< nunits
; i
++)
1784 t
= tree_cons (NULL_TREE
, unshare_expr (new_name
), t
);
1785 gcc_assert (CONSTANT_CLASS_P (new_name
));
1786 vec
= build_vector (vectype
, t
);
1787 vec_step
= vect_init_vector (iv_phi
, vec
, vectype
, NULL
);
1789 vec_def
= induc_def
;
1790 prev_stmt_vinfo
= vinfo_for_stmt (induction_phi
);
1791 for (i
= 1; i
< ncopies
; i
++)
1793 /* vec_i = vec_prev + vec_step */
1794 new_stmt
= gimple_build_assign_with_ops (PLUS_EXPR
, vec_dest
,
1796 vec_def
= make_ssa_name (vec_dest
, new_stmt
);
1797 gimple_assign_set_lhs (new_stmt
, vec_def
);
1799 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
1800 set_vinfo_for_stmt (new_stmt
,
1801 new_stmt_vec_info (new_stmt
, loop_vinfo
));
1802 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo
) = new_stmt
;
1803 prev_stmt_vinfo
= vinfo_for_stmt (new_stmt
);
1807 if (nested_in_vect_loop
)
1809 /* Find the loop-closed exit-phi of the induction, and record
1810 the final vector of induction results: */
1812 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, loop_arg
)
1814 if (!flow_bb_inside_loop_p (iv_loop
, gimple_bb (USE_STMT (use_p
))))
1816 exit_phi
= USE_STMT (use_p
);
1822 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (exit_phi
);
1823 /* FORNOW. Currently not supporting the case that an inner-loop induction
1824 is not used in the outer-loop (i.e. only outside the outer-loop). */
1825 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo
)
1826 && !STMT_VINFO_LIVE_P (stmt_vinfo
));
1828 STMT_VINFO_VEC_STMT (stmt_vinfo
) = new_stmt
;
1829 if (vect_print_dump_info (REPORT_DETAILS
))
1831 fprintf (vect_dump
, "vector of inductions after inner-loop:");
1832 print_gimple_stmt (vect_dump
, new_stmt
, 0, TDF_SLIM
);
1838 if (vect_print_dump_info (REPORT_DETAILS
))
1840 fprintf (vect_dump
, "transform induction: created def-use cycle: ");
1841 print_gimple_stmt (vect_dump
, induction_phi
, 0, TDF_SLIM
);
1842 fprintf (vect_dump
, "\n");
1843 print_gimple_stmt (vect_dump
, SSA_NAME_DEF_STMT (vec_def
), 0, TDF_SLIM
);
1846 STMT_VINFO_VEC_STMT (phi_info
) = induction_phi
;
1851 /* Function vect_get_vec_def_for_operand.
1853 OP is an operand in STMT. This function returns a (vector) def that will be
1854 used in the vectorized stmt for STMT.
1856 In the case that OP is an SSA_NAME which is defined in the loop, then
1857 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1859 In case OP is an invariant or constant, a new stmt that creates a vector def
1860 needs to be introduced. */
1863 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1868 stmt_vec_info def_stmt_info
= NULL
;
1869 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1870 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1871 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1872 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1878 enum vect_def_type dt
;
1882 if (vect_print_dump_info (REPORT_DETAILS
))
1884 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
1885 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
1888 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
1889 gcc_assert (is_simple_use
);
1890 if (vect_print_dump_info (REPORT_DETAILS
))
1894 fprintf (vect_dump
, "def = ");
1895 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
1899 fprintf (vect_dump
, " def_stmt = ");
1900 print_gimple_stmt (vect_dump
, def_stmt
, 0, TDF_SLIM
);
1906 /* Case 1: operand is a constant. */
1907 case vect_constant_def
:
1912 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1913 if (vect_print_dump_info (REPORT_DETAILS
))
1914 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
1916 for (i
= nunits
- 1; i
>= 0; --i
)
1918 t
= tree_cons (NULL_TREE
, op
, t
);
1920 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1921 gcc_assert (vector_type
);
1922 vec_cst
= build_vector (vector_type
, t
);
1924 return vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
1927 /* Case 2: operand is defined outside the loop - loop invariant. */
1928 case vect_invariant_def
:
1933 /* Create 'vec_inv = {inv,inv,..,inv}' */
1934 if (vect_print_dump_info (REPORT_DETAILS
))
1935 fprintf (vect_dump
, "Create vector_inv.");
1937 for (i
= nunits
- 1; i
>= 0; --i
)
1939 t
= tree_cons (NULL_TREE
, def
, t
);
1942 /* FIXME: use build_constructor directly. */
1943 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1944 gcc_assert (vector_type
);
1945 vec_inv
= build_constructor_from_list (vector_type
, t
);
1946 return vect_init_vector (stmt
, vec_inv
, vector_type
, NULL
);
1949 /* Case 3: operand is defined inside the loop. */
1953 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1955 /* Get the def from the vectorized stmt. */
1956 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1957 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1958 gcc_assert (vec_stmt
);
1959 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1960 vec_oprnd
= PHI_RESULT (vec_stmt
);
1961 else if (is_gimple_call (vec_stmt
))
1962 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1964 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1968 /* Case 4: operand is defined by a loop header phi - reduction */
1969 case vect_reduction_def
:
1973 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1974 loop
= (gimple_bb (def_stmt
))->loop_father
;
1976 /* Get the def before the loop */
1977 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1978 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1981 /* Case 5: operand is defined by loop-header phi - induction. */
1982 case vect_induction_def
:
1984 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1986 /* Get the def from the vectorized stmt. */
1987 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1988 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1989 gcc_assert (vec_stmt
&& gimple_code (vec_stmt
) == GIMPLE_PHI
);
1990 vec_oprnd
= PHI_RESULT (vec_stmt
);
2000 /* Function vect_get_vec_def_for_stmt_copy
2002 Return a vector-def for an operand. This function is used when the
2003 vectorized stmt to be created (by the caller to this function) is a "copy"
2004 created in case the vectorized result cannot fit in one vector, and several
2005 copies of the vector-stmt are required. In this case the vector-def is
2006 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
2007 of the stmt that defines VEC_OPRND.
2008 DT is the type of the vector def VEC_OPRND.
2011 In case the vectorization factor (VF) is bigger than the number
2012 of elements that can fit in a vectype (nunits), we have to generate
2013 more than one vector stmt to vectorize the scalar stmt. This situation
2014 arises when there are multiple data-types operated upon in the loop; the
2015 smallest data-type determines the VF, and as a result, when vectorizing
2016 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
2017 vector stmt (each computing a vector of 'nunits' results, and together
2018 computing 'VF' results in each iteration). This function is called when
2019 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
2020 which VF=16 and nunits=4, so the number of copies required is 4):
2022 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
2024 S1: x = load VS1.0: vx.0 = memref0 VS1.1
2025 VS1.1: vx.1 = memref1 VS1.2
2026 VS1.2: vx.2 = memref2 VS1.3
2027 VS1.3: vx.3 = memref3
2029 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
2030 VSnew.1: vz1 = vx.1 + ... VSnew.2
2031 VSnew.2: vz2 = vx.2 + ... VSnew.3
2032 VSnew.3: vz3 = vx.3 + ...
2034 The vectorization of S1 is explained in vectorizable_load.
2035 The vectorization of S2:
2036 To create the first vector-stmt out of the 4 copies - VSnew.0 -
2037 the function 'vect_get_vec_def_for_operand' is called to
2038 get the relevant vector-def for each operand of S2. For operand x it
2039 returns the vector-def 'vx.0'.
2041 To create the remaining copies of the vector-stmt (VSnew.j), this
2042 function is called to get the relevant vector-def for each operand. It is
2043 obtained from the respective VS1.j stmt, which is recorded in the
2044 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
2046 For example, to obtain the vector-def 'vx.1' in order to create the
2047 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
2048 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
2049 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
2050 and return its def ('vx.1').
2051 Overall, to create the above sequence this function will be called 3 times:
2052 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
2053 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
2054 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
2057 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
2059 gimple vec_stmt_for_operand
;
2060 stmt_vec_info def_stmt_info
;
2062 /* Do nothing; can reuse same def. */
2063 if (dt
== vect_invariant_def
|| dt
== vect_constant_def
)
2066 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
2067 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
2068 gcc_assert (def_stmt_info
);
2069 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
2070 gcc_assert (vec_stmt_for_operand
);
2071 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
2072 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
2073 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
2075 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
2080 /* Get vectorized definitions for the operands to create a copy of an original
2081 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
2084 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
2085 VEC(tree
,heap
) **vec_oprnds0
,
2086 VEC(tree
,heap
) **vec_oprnds1
)
2088 tree vec_oprnd
= VEC_pop (tree
, *vec_oprnds0
);
2090 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
2091 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
2093 if (vec_oprnds1
&& *vec_oprnds1
)
2095 vec_oprnd
= VEC_pop (tree
, *vec_oprnds1
);
2096 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
2097 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
2102 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
2105 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
2106 VEC(tree
,heap
) **vec_oprnds0
, VEC(tree
,heap
) **vec_oprnds1
,
2110 vect_get_slp_defs (slp_node
, vec_oprnds0
, vec_oprnds1
);
2115 *vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
2116 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2117 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
2121 *vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
2122 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
2123 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
2129 /* Function vect_finish_stmt_generation.
2131 Insert a new stmt. */
2134 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
2135 gimple_stmt_iterator
*gsi
)
2137 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2138 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2140 gcc_assert (stmt
== gsi_stmt (*gsi
));
2141 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
2143 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
2145 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
));
2147 if (vect_print_dump_info (REPORT_DETAILS
))
2149 fprintf (vect_dump
, "add new stmt: ");
2150 print_gimple_stmt (vect_dump
, vec_stmt
, 0, TDF_SLIM
);
2153 /* Make sure gsi points to the stmt that is being vectorized. */
2154 gcc_assert (stmt
== gsi_stmt (*gsi
));
2156 gimple_set_location (vec_stmt
, gimple_location (stmt
));
2160 /* Function get_initial_def_for_reduction
2163 STMT - a stmt that performs a reduction operation in the loop.
2164 INIT_VAL - the initial value of the reduction variable
2167 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
2168 of the reduction (used for adjusting the epilog - see below).
2169 Return a vector variable, initialized according to the operation that STMT
2170 performs. This vector will be used as the initial value of the
2171 vector of partial results.
2173 Option1 (adjust in epilog): Initialize the vector as follows:
2176 min/max: [init_val,init_val,..,init_val,init_val]
2177 bit and/or: [init_val,init_val,..,init_val,init_val]
2178 and when necessary (e.g. add/mult case) let the caller know
2179 that it needs to adjust the result by init_val.
2181 Option2: Initialize the vector as follows:
2182 add: [0,0,...,0,init_val]
2183 mult: [1,1,...,1,init_val]
2184 min/max: [init_val,init_val,...,init_val]
2185 bit and/or: [init_val,init_val,...,init_val]
2186 and no adjustments are needed.
2188 For example, for the following code:
2194 STMT is 's = s + a[i]', and the reduction variable is 's'.
2195 For a vector of 4 units, we want to return either [0,0,0,init_val],
2196 or [0,0,0,0] and let the caller know that it needs to adjust
2197 the result at the end by 'init_val'.
2199 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
2200 initialization vector is simpler (same element in all entries).
2201 A cost model should help decide between these two schemes. */
2204 get_initial_def_for_reduction (gimple stmt
, tree init_val
, tree
*adjustment_def
)
2206 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2207 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2208 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2209 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
2210 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2211 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2212 tree type
= TREE_TYPE (init_val
);
2219 bool nested_in_vect_loop
= false;
2221 gcc_assert (POINTER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
));
2222 if (nested_in_vect_loop_p (loop
, stmt
))
2223 nested_in_vect_loop
= true;
2225 gcc_assert (loop
== (gimple_bb (stmt
))->loop_father
);
2227 vecdef
= vect_get_vec_def_for_operand (init_val
, stmt
, NULL
);
2231 case WIDEN_SUM_EXPR
:
2234 if (nested_in_vect_loop
)
2235 *adjustment_def
= vecdef
;
2237 *adjustment_def
= init_val
;
2238 /* Create a vector of zeros for init_def. */
2239 if (SCALAR_FLOAT_TYPE_P (type
))
2240 def_for_init
= build_real (type
, dconst0
);
2242 def_for_init
= build_int_cst (type
, 0);
2243 for (i
= nunits
- 1; i
>= 0; --i
)
2244 t
= tree_cons (NULL_TREE
, def_for_init
, t
);
2245 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def_for_init
));
2246 gcc_assert (vector_type
);
2247 init_def
= build_vector (vector_type
, t
);
2252 *adjustment_def
= NULL_TREE
;
2264 /* Function vect_create_epilog_for_reduction
2266 Create code at the loop-epilog to finalize the result of a reduction
2269 VECT_DEF is a vector of partial results.
2270 REDUC_CODE is the tree-code for the epilog reduction.
2271 NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
2272 number of elements that we can fit in a vectype (nunits). In this case
2273 we have to generate more than one vector stmt - i.e - we need to "unroll"
2274 the vector stmt by a factor VF/nunits. For more details see documentation
2275 in vectorizable_operation.
2276 STMT is the scalar reduction stmt that is being vectorized.
2277 REDUCTION_PHI is the phi-node that carries the reduction computation.
2280 1. Creates the reduction def-use cycle: sets the arguments for
2282 The loop-entry argument is the vectorized initial-value of the reduction.
2283 The loop-latch argument is VECT_DEF - the vector of partial sums.
2284 2. "Reduces" the vector of partial results VECT_DEF into a single result,
2285 by applying the operation specified by REDUC_CODE if available, or by
2286 other means (whole-vector shifts or a scalar loop).
2287 The function also creates a new phi node at the loop exit to preserve
2288 loop-closed form, as illustrated below.
2290 The flow at the entry to this function:
2293 vec_def = phi <null, null> # REDUCTION_PHI
2294 VECT_DEF = vector_stmt # vectorized form of STMT
2295 s_loop = scalar_stmt # (scalar) STMT
2297 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2301 The above is transformed by this function into:
2304 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
2305 VECT_DEF = vector_stmt # vectorized form of STMT
2306 s_loop = scalar_stmt # (scalar) STMT
2308 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2309 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2310 v_out2 = reduce <v_out1>
2311 s_out3 = extract_field <v_out2, 0>
2312 s_out4 = adjust_result <s_out3>
2318 vect_create_epilog_for_reduction (tree vect_def
, gimple stmt
,
2320 enum tree_code reduc_code
,
2321 gimple reduction_phi
)
2323 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2324 stmt_vec_info prev_phi_info
;
2326 enum machine_mode mode
;
2327 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2328 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2329 basic_block exit_bb
;
2332 gimple new_phi
= NULL
, phi
;
2333 gimple_stmt_iterator exit_gsi
;
2335 tree new_temp
= NULL_TREE
;
2337 gimple epilog_stmt
= NULL
;
2338 tree new_scalar_dest
, new_dest
;
2340 tree bitsize
, bitpos
, bytesize
;
2341 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2342 tree adjustment_def
;
2343 tree vec_initial_def
, def
;
2345 imm_use_iterator imm_iter
;
2346 use_operand_p use_p
;
2347 bool extract_scalar_result
= false;
2348 tree reduction_op
, expr
;
2351 bool nested_in_vect_loop
= false;
2352 VEC(gimple
,heap
) *phis
= NULL
;
2353 enum vect_def_type dt
= vect_unknown_def_type
;
2356 if (nested_in_vect_loop_p (loop
, stmt
))
2359 nested_in_vect_loop
= true;
2362 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
)))
2364 case GIMPLE_SINGLE_RHS
:
2365 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt
)) == ternary_op
);
2366 reduction_op
= TREE_OPERAND (gimple_assign_rhs1 (stmt
), 2);
2368 case GIMPLE_UNARY_RHS
:
2369 reduction_op
= gimple_assign_rhs1 (stmt
);
2371 case GIMPLE_BINARY_RHS
:
2372 reduction_op
= gimple_assign_rhs2 (stmt
);
2378 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
2379 gcc_assert (vectype
);
2380 mode
= TYPE_MODE (vectype
);
2382 /*** 1. Create the reduction def-use cycle ***/
2384 /* For the case of reduction, vect_get_vec_def_for_operand returns
2385 the scalar def before the loop, that defines the initial value
2386 of the reduction variable. */
2387 vec_initial_def
= vect_get_vec_def_for_operand (reduction_op
, stmt
,
2390 phi
= reduction_phi
;
2392 for (j
= 0; j
< ncopies
; j
++)
2394 /* 1.1 set the loop-entry arg of the reduction-phi: */
2395 add_phi_arg (phi
, vec_initial_def
, loop_preheader_edge (loop
));
2397 /* 1.2 set the loop-latch arg for the reduction-phi: */
2399 def
= vect_get_vec_def_for_stmt_copy (dt
, def
);
2400 add_phi_arg (phi
, def
, loop_latch_edge (loop
));
2402 if (vect_print_dump_info (REPORT_DETAILS
))
2404 fprintf (vect_dump
, "transform reduction: created def-use cycle: ");
2405 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
2406 fprintf (vect_dump
, "\n");
2407 print_gimple_stmt (vect_dump
, SSA_NAME_DEF_STMT (def
), 0, TDF_SLIM
);
2410 phi
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi
));
2413 /*** 2. Create epilog code
2414 The reduction epilog code operates across the elements of the vector
2415 of partial results computed by the vectorized loop.
2416 The reduction epilog code consists of:
2417 step 1: compute the scalar result in a vector (v_out2)
2418 step 2: extract the scalar result (s_out3) from the vector (v_out2)
2419 step 3: adjust the scalar result (s_out3) if needed.
2421 Step 1 can be accomplished using one the following three schemes:
2422 (scheme 1) using reduc_code, if available.
2423 (scheme 2) using whole-vector shifts, if available.
2424 (scheme 3) using a scalar loop. In this case steps 1+2 above are
2427 The overall epilog code looks like this:
2429 s_out0 = phi <s_loop> # original EXIT_PHI
2430 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2431 v_out2 = reduce <v_out1> # step 1
2432 s_out3 = extract_field <v_out2, 0> # step 2
2433 s_out4 = adjust_result <s_out3> # step 3
2435 (step 3 is optional, and steps 1 and 2 may be combined).
2436 Lastly, the uses of s_out0 are replaced by s_out4.
2440 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
2441 v_out1 = phi <v_loop> */
2443 exit_bb
= single_exit (loop
)->dest
;
2445 prev_phi_info
= NULL
;
2446 for (j
= 0; j
< ncopies
; j
++)
2448 phi
= create_phi_node (SSA_NAME_VAR (vect_def
), exit_bb
);
2449 set_vinfo_for_stmt (phi
, new_stmt_vec_info (phi
, loop_vinfo
));
2454 def
= vect_get_vec_def_for_stmt_copy (dt
, def
);
2455 STMT_VINFO_RELATED_STMT (prev_phi_info
) = phi
;
2457 SET_PHI_ARG_DEF (phi
, single_exit (loop
)->dest_idx
, def
);
2458 prev_phi_info
= vinfo_for_stmt (phi
);
2460 exit_gsi
= gsi_after_labels (exit_bb
);
2462 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
2463 (i.e. when reduc_code is not available) and in the final adjustment
2464 code (if needed). Also get the original scalar reduction variable as
2465 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
2466 represents a reduction pattern), the tree-code and scalar-def are
2467 taken from the original stmt that the pattern-stmt (STMT) replaces.
2468 Otherwise (it is a regular reduction) - the tree-code and scalar-def
2469 are taken from STMT. */
2471 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2474 /* Regular reduction */
2479 /* Reduction pattern */
2480 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt
);
2481 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
));
2482 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
2484 code
= gimple_assign_rhs_code (orig_stmt
);
2485 scalar_dest
= gimple_assign_lhs (orig_stmt
);
2486 scalar_type
= TREE_TYPE (scalar_dest
);
2487 new_scalar_dest
= vect_create_destination_var (scalar_dest
, NULL
);
2488 bitsize
= TYPE_SIZE (scalar_type
);
2489 bytesize
= TYPE_SIZE_UNIT (scalar_type
);
2492 /* In case this is a reduction in an inner-loop while vectorizing an outer
2493 loop - we don't need to extract a single scalar result at the end of the
2494 inner-loop. The final vector of partial results will be used in the
2495 vectorized outer-loop, or reduced to a scalar result at the end of the
2497 if (nested_in_vect_loop
)
2498 goto vect_finalize_reduction
;
2501 gcc_assert (ncopies
== 1);
2503 /* 2.3 Create the reduction code, using one of the three schemes described
2506 if (reduc_code
< NUM_TREE_CODES
)
2510 /*** Case 1: Create:
2511 v_out2 = reduc_expr <v_out1> */
2513 if (vect_print_dump_info (REPORT_DETAILS
))
2514 fprintf (vect_dump
, "Reduce using direct vector reduction.");
2516 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2517 tmp
= build1 (reduc_code
, vectype
, PHI_RESULT (new_phi
));
2518 epilog_stmt
= gimple_build_assign (vec_dest
, tmp
);
2519 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
2520 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
2521 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
2523 extract_scalar_result
= true;
2527 enum tree_code shift_code
= 0;
2528 bool have_whole_vector_shift
= true;
2530 int element_bitsize
= tree_low_cst (bitsize
, 1);
2531 int vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
2534 if (optab_handler (vec_shr_optab
, mode
)->insn_code
!= CODE_FOR_nothing
)
2535 shift_code
= VEC_RSHIFT_EXPR
;
2537 have_whole_vector_shift
= false;
2539 /* Regardless of whether we have a whole vector shift, if we're
2540 emulating the operation via tree-vect-generic, we don't want
2541 to use it. Only the first round of the reduction is likely
2542 to still be profitable via emulation. */
2543 /* ??? It might be better to emit a reduction tree code here, so that
2544 tree-vect-generic can expand the first round via bit tricks. */
2545 if (!VECTOR_MODE_P (mode
))
2546 have_whole_vector_shift
= false;
2549 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
2550 if (optab_handler (optab
, mode
)->insn_code
== CODE_FOR_nothing
)
2551 have_whole_vector_shift
= false;
2554 if (have_whole_vector_shift
)
2556 /*** Case 2: Create:
2557 for (offset = VS/2; offset >= element_size; offset/=2)
2559 Create: va' = vec_shift <va, offset>
2560 Create: va = vop <va, va'>
2563 if (vect_print_dump_info (REPORT_DETAILS
))
2564 fprintf (vect_dump
, "Reduce using vector shifts");
2566 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2567 new_temp
= PHI_RESULT (new_phi
);
2569 for (bit_offset
= vec_size_in_bits
/2;
2570 bit_offset
>= element_bitsize
;
2573 tree bitpos
= size_int (bit_offset
);
2574 epilog_stmt
= gimple_build_assign_with_ops (shift_code
, vec_dest
,
2576 new_name
= make_ssa_name (vec_dest
, epilog_stmt
);
2577 gimple_assign_set_lhs (epilog_stmt
, new_name
);
2578 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
2580 epilog_stmt
= gimple_build_assign_with_ops (code
, vec_dest
,
2581 new_name
, new_temp
);
2582 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
2583 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
2584 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
2587 extract_scalar_result
= true;
2593 /*** Case 3: Create:
2594 s = extract_field <v_out2, 0>
2595 for (offset = element_size;
2596 offset < vector_size;
2597 offset += element_size;)
2599 Create: s' = extract_field <v_out2, offset>
2600 Create: s = op <s, s'>
2603 if (vect_print_dump_info (REPORT_DETAILS
))
2604 fprintf (vect_dump
, "Reduce using scalar code. ");
2606 vec_temp
= PHI_RESULT (new_phi
);
2607 vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
2608 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
2610 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
2611 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
2612 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
2613 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
2615 for (bit_offset
= element_bitsize
;
2616 bit_offset
< vec_size_in_bits
;
2617 bit_offset
+= element_bitsize
)
2619 tree bitpos
= bitsize_int (bit_offset
);
2620 tree rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
2623 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
2624 new_name
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
2625 gimple_assign_set_lhs (epilog_stmt
, new_name
);
2626 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
2628 epilog_stmt
= gimple_build_assign_with_ops (code
,
2630 new_name
, new_temp
);
2631 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
2632 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
2633 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
2636 extract_scalar_result
= false;
2640 /* 2.4 Extract the final scalar result. Create:
2641 s_out3 = extract_field <v_out2, bitpos> */
2643 if (extract_scalar_result
)
2647 gcc_assert (!nested_in_vect_loop
);
2648 if (vect_print_dump_info (REPORT_DETAILS
))
2649 fprintf (vect_dump
, "extract scalar result");
2651 if (BYTES_BIG_ENDIAN
)
2652 bitpos
= size_binop (MULT_EXPR
,
2653 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1),
2654 TYPE_SIZE (scalar_type
));
2656 bitpos
= bitsize_zero_node
;
2658 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, new_temp
, bitsize
, bitpos
);
2659 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
2660 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
2661 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
2662 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
2665 vect_finalize_reduction
:
2667 /* 2.5 Adjust the final result by the initial value of the reduction
2668 variable. (When such adjustment is not needed, then
2669 'adjustment_def' is zero). For example, if code is PLUS we create:
2670 new_temp = loop_exit_def + adjustment_def */
2674 if (nested_in_vect_loop
)
2676 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def
)) == VECTOR_TYPE
);
2677 expr
= build2 (code
, vectype
, PHI_RESULT (new_phi
), adjustment_def
);
2678 new_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2682 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def
)) != VECTOR_TYPE
);
2683 expr
= build2 (code
, scalar_type
, new_temp
, adjustment_def
);
2684 new_dest
= vect_create_destination_var (scalar_dest
, scalar_type
);
2686 epilog_stmt
= gimple_build_assign (new_dest
, expr
);
2687 new_temp
= make_ssa_name (new_dest
, epilog_stmt
);
2688 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
2689 SSA_NAME_DEF_STMT (new_temp
) = epilog_stmt
;
2690 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
2694 /* 2.6 Handle the loop-exit phi */
2696 /* Replace uses of s_out0 with uses of s_out3:
2697 Find the loop-closed-use at the loop exit of the original scalar result.
2698 (The reduction result is expected to have two immediate uses - one at the
2699 latch block, and one at the loop exit). */
2700 phis
= VEC_alloc (gimple
, heap
, 10);
2701 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
2703 if (!flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
2705 exit_phi
= USE_STMT (use_p
);
2706 VEC_quick_push (gimple
, phis
, exit_phi
);
2709 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
2710 gcc_assert (!VEC_empty (gimple
, phis
));
2712 for (i
= 0; VEC_iterate (gimple
, phis
, i
, exit_phi
); i
++)
2714 if (nested_in_vect_loop
)
2716 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (exit_phi
);
2718 /* FORNOW. Currently not supporting the case that an inner-loop
2719 reduction is not used in the outer-loop (but only outside the
2721 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo
)
2722 && !STMT_VINFO_LIVE_P (stmt_vinfo
));
2724 epilog_stmt
= adjustment_def
? epilog_stmt
: new_phi
;
2725 STMT_VINFO_VEC_STMT (stmt_vinfo
) = epilog_stmt
;
2726 set_vinfo_for_stmt (epilog_stmt
,
2727 new_stmt_vec_info (epilog_stmt
, loop_vinfo
));
2729 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt
)) =
2730 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi
));
2734 /* Replace the uses: */
2735 orig_name
= PHI_RESULT (exit_phi
);
2736 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, orig_name
)
2737 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
2738 SET_USE (use_p
, new_temp
);
2740 VEC_free (gimple
, heap
, phis
);
2744 /* Function vectorizable_reduction.
2746 Check if STMT performs a reduction operation that can be vectorized.
2747 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2748 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2749 Return FALSE if not a vectorizable STMT, TRUE otherwise.
2751 This function also handles reduction idioms (patterns) that have been
2752 recognized in advance during vect_pattern_recog. In this case, STMT may be
2754 X = pattern_expr (arg0, arg1, ..., X)
2755 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
2756 sequence that had been detected and replaced by the pattern-stmt (STMT).
2758 In some cases of reduction patterns, the type of the reduction variable X is
2759 different than the type of the other arguments of STMT.
2760 In such cases, the vectype that is used when transforming STMT into a vector
2761 stmt is different than the vectype that is used to determine the
2762 vectorization factor, because it consists of a different number of elements
2763 than the actual number of elements that are being operated upon in parallel.
2765 For example, consider an accumulation of shorts into an int accumulator.
2766 On some targets it's possible to vectorize this pattern operating on 8
2767 shorts at a time (hence, the vectype for purposes of determining the
2768 vectorization factor should be V8HI); on the other hand, the vectype that
2769 is used to create the vector form is actually V4SI (the type of the result).
2771 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
2772 indicates what is the actual level of parallelism (V8HI in the example), so
2773 that the right vectorization factor would be derived. This vectype
2774 corresponds to the type of arguments to the reduction stmt, and should *NOT*
2775 be used to create the vectorized stmt. The right vectype for the vectorized
2776 stmt is obtained from the type of the result X:
2777 get_vectype_for_scalar_type (TREE_TYPE (X))
2779 This means that, contrary to "regular" reductions (or "regular" stmts in
2780 general), the following equation:
2781 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
2782 does *NOT* necessarily hold for reduction patterns. */
2785 vectorizable_reduction (gimple stmt
, gimple_stmt_iterator
*gsi
,
2790 tree loop_vec_def0
= NULL_TREE
, loop_vec_def1
= NULL_TREE
;
2791 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2792 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2793 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2794 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2795 enum tree_code code
, orig_code
, epilog_reduc_code
= 0;
2796 enum machine_mode vec_mode
;
2798 optab optab
, reduc_optab
;
2799 tree new_temp
= NULL_TREE
;
2802 enum vect_def_type dt
;
2803 gimple new_phi
= NULL
;
2807 stmt_vec_info orig_stmt_info
;
2808 tree expr
= NULL_TREE
;
2810 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2811 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2813 stmt_vec_info prev_stmt_info
, prev_phi_info
;
2814 gimple first_phi
= NULL
;
2815 bool single_defuse_cycle
= false;
2817 gimple new_stmt
= NULL
;
2821 if (nested_in_vect_loop_p (loop
, stmt
))
2824 gcc_assert (ncopies
>= 1);
2826 /* FORNOW: SLP not supported. */
2827 if (STMT_SLP_TYPE (stmt_info
))
2830 /* 1. Is vectorizable reduction? */
2832 /* Not supportable if the reduction variable is used in the loop. */
2833 if (STMT_VINFO_RELEVANT (stmt_info
) > vect_used_in_outer
)
2836 /* Reductions that are not used even in an enclosing outer-loop,
2837 are expected to be "live" (used out of the loop). */
2838 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_unused_in_loop
2839 && !STMT_VINFO_LIVE_P (stmt_info
))
2842 /* Make sure it was already recognized as a reduction computation. */
2843 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
2846 /* 2. Has this been recognized as a reduction pattern?
2848 Check if STMT represents a pattern that has been recognized
2849 in earlier analysis stages. For stmts that represent a pattern,
2850 the STMT_VINFO_RELATED_STMT field records the last stmt in
2851 the original sequence that constitutes the pattern. */
2853 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2856 orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
2857 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
) == stmt
);
2858 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info
));
2859 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info
));
2862 /* 3. Check the operands of the operation. The first operands are defined
2863 inside the loop body. The last operand is the reduction variable,
2864 which is defined by the loop-header-phi. */
2866 gcc_assert (is_gimple_assign (stmt
));
2869 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
)))
2871 case GIMPLE_SINGLE_RHS
:
2872 op_type
= TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt
));
2873 if (op_type
== ternary_op
)
2875 tree rhs
= gimple_assign_rhs1 (stmt
);
2876 ops
[0] = TREE_OPERAND (rhs
, 0);
2877 ops
[1] = TREE_OPERAND (rhs
, 1);
2878 ops
[2] = TREE_OPERAND (rhs
, 2);
2879 code
= TREE_CODE (rhs
);
2885 case GIMPLE_BINARY_RHS
:
2886 code
= gimple_assign_rhs_code (stmt
);
2887 op_type
= TREE_CODE_LENGTH (code
);
2888 gcc_assert (op_type
== binary_op
);
2889 ops
[0] = gimple_assign_rhs1 (stmt
);
2890 ops
[1] = gimple_assign_rhs2 (stmt
);
2893 case GIMPLE_UNARY_RHS
:
2900 scalar_dest
= gimple_assign_lhs (stmt
);
2901 scalar_type
= TREE_TYPE (scalar_dest
);
2902 if (!POINTER_TYPE_P (scalar_type
) && !INTEGRAL_TYPE_P (scalar_type
)
2903 && !SCALAR_FLOAT_TYPE_P (scalar_type
))
2906 /* All uses but the last are expected to be defined in the loop.
2907 The last use is the reduction variable. */
2908 for (i
= 0; i
< op_type
-1; i
++)
2910 is_simple_use
= vect_is_simple_use (ops
[i
], loop_vinfo
, &def_stmt
,
2912 gcc_assert (is_simple_use
);
2913 if (dt
!= vect_loop_def
2914 && dt
!= vect_invariant_def
2915 && dt
!= vect_constant_def
2916 && dt
!= vect_induction_def
)
2920 is_simple_use
= vect_is_simple_use (ops
[i
], loop_vinfo
, &def_stmt
, &def
, &dt
);
2921 gcc_assert (is_simple_use
);
2922 gcc_assert (dt
== vect_reduction_def
);
2923 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
2925 gcc_assert (orig_stmt
== vect_is_simple_reduction (loop_vinfo
, def_stmt
));
2927 gcc_assert (stmt
== vect_is_simple_reduction (loop_vinfo
, def_stmt
));
2929 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
2932 /* 4. Supportable by target? */
2934 /* 4.1. check support for the operation in the loop */
2935 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
2938 if (vect_print_dump_info (REPORT_DETAILS
))
2939 fprintf (vect_dump
, "no optab.");
2942 vec_mode
= TYPE_MODE (vectype
);
2943 if (optab_handler (optab
, vec_mode
)->insn_code
== CODE_FOR_nothing
)
2945 if (vect_print_dump_info (REPORT_DETAILS
))
2946 fprintf (vect_dump
, "op not supported by target.");
2947 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
2948 || LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2949 < vect_min_worthwhile_factor (code
))
2951 if (vect_print_dump_info (REPORT_DETAILS
))
2952 fprintf (vect_dump
, "proceeding using word mode.");
2955 /* Worthwhile without SIMD support? */
2956 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
2957 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2958 < vect_min_worthwhile_factor (code
))
2960 if (vect_print_dump_info (REPORT_DETAILS
))
2961 fprintf (vect_dump
, "not worthwhile without SIMD support.");
2965 /* 4.2. Check support for the epilog operation.
2967 If STMT represents a reduction pattern, then the type of the
2968 reduction variable may be different than the type of the rest
2969 of the arguments. For example, consider the case of accumulation
2970 of shorts into an int accumulator; The original code:
2971 S1: int_a = (int) short_a;
2972 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
2975 STMT: int_acc = widen_sum <short_a, int_acc>
2978 1. The tree-code that is used to create the vector operation in the
2979 epilog code (that reduces the partial results) is not the
2980 tree-code of STMT, but is rather the tree-code of the original
2981 stmt from the pattern that STMT is replacing. I.e, in the example
2982 above we want to use 'widen_sum' in the loop, but 'plus' in the
2984 2. The type (mode) we use to check available target support
2985 for the vector operation to be created in the *epilog*, is
2986 determined by the type of the reduction variable (in the example
2987 above we'd check this: plus_optab[vect_int_mode]).
2988 However the type (mode) we use to check available target support
2989 for the vector operation to be created *inside the loop*, is
2990 determined by the type of the other arguments to STMT (in the
2991 example we'd check this: widen_sum_optab[vect_short_mode]).
2993 This is contrary to "regular" reductions, in which the types of all
2994 the arguments are the same as the type of the reduction variable.
2995 For "regular" reductions we can therefore use the same vector type
2996 (and also the same tree-code) when generating the epilog code and
2997 when generating the code inside the loop. */
3001 /* This is a reduction pattern: get the vectype from the type of the
3002 reduction variable, and get the tree-code from orig_stmt. */
3003 orig_code
= gimple_assign_rhs_code (orig_stmt
);
3004 vectype
= get_vectype_for_scalar_type (TREE_TYPE (def
));
3007 if (vect_print_dump_info (REPORT_DETAILS
))
3009 fprintf (vect_dump
, "unsupported data-type ");
3010 print_generic_expr (vect_dump
, TREE_TYPE (def
), TDF_SLIM
);
3015 vec_mode
= TYPE_MODE (vectype
);
3019 /* Regular reduction: use the same vectype and tree-code as used for
3020 the vector code inside the loop can be used for the epilog code. */
3024 if (!reduction_code_for_scalar_code (orig_code
, &epilog_reduc_code
))
3026 reduc_optab
= optab_for_tree_code (epilog_reduc_code
, vectype
, optab_default
);
3029 if (vect_print_dump_info (REPORT_DETAILS
))
3030 fprintf (vect_dump
, "no optab for reduction.");
3031 epilog_reduc_code
= NUM_TREE_CODES
;
3033 if (optab_handler (reduc_optab
, vec_mode
)->insn_code
== CODE_FOR_nothing
)
3035 if (vect_print_dump_info (REPORT_DETAILS
))
3036 fprintf (vect_dump
, "reduc op not supported by target.");
3037 epilog_reduc_code
= NUM_TREE_CODES
;
3040 if (!vec_stmt
) /* transformation not required. */
3042 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3043 if (!vect_model_reduction_cost (stmt_info
, epilog_reduc_code
, ncopies
))
3050 if (vect_print_dump_info (REPORT_DETAILS
))
3051 fprintf (vect_dump
, "transform reduction.");
3053 /* Create the destination vector */
3054 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3056 /* In case the vectorization factor (VF) is bigger than the number
3057 of elements that we can fit in a vectype (nunits), we have to generate
3058 more than one vector stmt - i.e - we need to "unroll" the
3059 vector stmt by a factor VF/nunits. For more details see documentation
3060 in vectorizable_operation. */
3062 /* If the reduction is used in an outer loop we need to generate
3063 VF intermediate results, like so (e.g. for ncopies=2):
3068 (i.e. we generate VF results in 2 registers).
3069 In this case we have a separate def-use cycle for each copy, and therefore
3070 for each copy we get the vector def for the reduction variable from the
3071 respective phi node created for this copy.
3073 Otherwise (the reduction is unused in the loop nest), we can combine
3074 together intermediate results, like so (e.g. for ncopies=2):
3078 (i.e. we generate VF/2 results in a single register).
3079 In this case for each copy we get the vector def for the reduction variable
3080 from the vectorized reduction operation generated in the previous iteration.
3083 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_unused_in_loop
)
3085 single_defuse_cycle
= true;
3089 epilog_copies
= ncopies
;
3091 prev_stmt_info
= NULL
;
3092 prev_phi_info
= NULL
;
3093 for (j
= 0; j
< ncopies
; j
++)
3095 if (j
== 0 || !single_defuse_cycle
)
3097 /* Create the reduction-phi that defines the reduction-operand. */
3098 new_phi
= create_phi_node (vec_dest
, loop
->header
);
3099 set_vinfo_for_stmt (new_phi
, new_stmt_vec_info (new_phi
, loop_vinfo
));
3105 loop_vec_def0
= vect_get_vec_def_for_operand (ops
[0], stmt
, NULL
);
3106 if (op_type
== ternary_op
)
3108 loop_vec_def1
= vect_get_vec_def_for_operand (ops
[1], stmt
, NULL
);
3111 /* Get the vector def for the reduction variable from the phi node */
3112 reduc_def
= PHI_RESULT (new_phi
);
3113 first_phi
= new_phi
;
3117 enum vect_def_type dt
= vect_unknown_def_type
; /* Dummy */
3118 loop_vec_def0
= vect_get_vec_def_for_stmt_copy (dt
, loop_vec_def0
);
3119 if (op_type
== ternary_op
)
3120 loop_vec_def1
= vect_get_vec_def_for_stmt_copy (dt
, loop_vec_def1
);
3122 if (single_defuse_cycle
)
3123 reduc_def
= gimple_assign_lhs (new_stmt
);
3125 reduc_def
= PHI_RESULT (new_phi
);
3127 STMT_VINFO_RELATED_STMT (prev_phi_info
) = new_phi
;
3130 /* Arguments are ready. create the new vector stmt. */
3131 if (op_type
== binary_op
)
3132 expr
= build2 (code
, vectype
, loop_vec_def0
, reduc_def
);
3134 expr
= build3 (code
, vectype
, loop_vec_def0
, loop_vec_def1
,
3136 new_stmt
= gimple_build_assign (vec_dest
, expr
);
3137 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3138 gimple_assign_set_lhs (new_stmt
, new_temp
);
3139 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3142 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3144 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3145 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3146 prev_phi_info
= vinfo_for_stmt (new_phi
);
3149 /* Finalize the reduction-phi (set its arguments) and create the
3150 epilog reduction code. */
3151 if (!single_defuse_cycle
)
3152 new_temp
= gimple_assign_lhs (*vec_stmt
);
3153 vect_create_epilog_for_reduction (new_temp
, stmt
, epilog_copies
,
3154 epilog_reduc_code
, first_phi
);
3158 /* Checks if CALL can be vectorized in type VECTYPE. Returns
3159 a function declaration if the target has a vectorized version
3160 of the function, or NULL_TREE if the function cannot be vectorized. */
3163 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
3165 tree fndecl
= gimple_call_fndecl (call
);
3166 enum built_in_function code
;
3168 /* We only handle functions that do not read or clobber memory -- i.e.
3169 const or novops ones. */
3170 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
3174 || TREE_CODE (fndecl
) != FUNCTION_DECL
3175 || !DECL_BUILT_IN (fndecl
))
3178 code
= DECL_FUNCTION_CODE (fndecl
);
3179 return targetm
.vectorize
.builtin_vectorized_function (code
, vectype_out
,
3183 /* Function vectorizable_call.
3185 Check if STMT performs a function call that can be vectorized.
3186 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3187 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3188 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3191 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
)
3196 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3197 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3198 tree vectype_out
, vectype_in
;
3201 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3202 tree fndecl
, new_temp
, def
, rhs_type
, lhs_type
;
3204 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3207 VEC(tree
, heap
) *vargs
= NULL
;
3208 enum { NARROW
, NONE
, WIDEN
} modifier
;
3211 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3214 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
3217 /* FORNOW: SLP not supported. */
3218 if (STMT_SLP_TYPE (stmt_info
))
3221 /* Is STMT a vectorizable call? */
3222 if (!is_gimple_call (stmt
))
3225 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3228 /* Process function arguments. */
3229 rhs_type
= NULL_TREE
;
3230 nargs
= gimple_call_num_args (stmt
);
3232 /* Bail out if the function has more than two arguments, we
3233 do not have interesting builtin functions to vectorize with
3234 more than two arguments. No arguments is also not good. */
3235 if (nargs
== 0 || nargs
> 2)
3238 for (i
= 0; i
< nargs
; i
++)
3240 op
= gimple_call_arg (stmt
, i
);
3242 /* We can only handle calls with arguments of the same type. */
3244 && rhs_type
!= TREE_TYPE (op
))
3246 if (vect_print_dump_info (REPORT_DETAILS
))
3247 fprintf (vect_dump
, "argument types differ.");
3250 rhs_type
= TREE_TYPE (op
);
3252 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
[i
]))
3254 if (vect_print_dump_info (REPORT_DETAILS
))
3255 fprintf (vect_dump
, "use not simple.");
3260 vectype_in
= get_vectype_for_scalar_type (rhs_type
);
3263 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3265 lhs_type
= TREE_TYPE (gimple_call_lhs (stmt
));
3266 vectype_out
= get_vectype_for_scalar_type (lhs_type
);
3269 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3272 if (nunits_in
== nunits_out
/ 2)
3274 else if (nunits_out
== nunits_in
)
3276 else if (nunits_out
== nunits_in
/ 2)
3281 /* For now, we only vectorize functions if a target specific builtin
3282 is available. TODO -- in some cases, it might be profitable to
3283 insert the calls for pieces of the vector, in order to be able
3284 to vectorize other operations in the loop. */
3285 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
3286 if (fndecl
== NULL_TREE
)
3288 if (vect_print_dump_info (REPORT_DETAILS
))
3289 fprintf (vect_dump
, "function is not vectorizable.");
3294 gcc_assert (ZERO_SSA_OPERANDS (stmt
, SSA_OP_ALL_VIRTUALS
));
3296 if (modifier
== NARROW
)
3297 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3299 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3301 /* Sanity check: make sure that at least one copy of the vectorized stmt
3302 needs to be generated. */
3303 gcc_assert (ncopies
>= 1);
3305 if (!vec_stmt
) /* transformation not required. */
3307 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3308 if (vect_print_dump_info (REPORT_DETAILS
))
3309 fprintf (vect_dump
, "=== vectorizable_call ===");
3310 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
3316 if (vect_print_dump_info (REPORT_DETAILS
))
3317 fprintf (vect_dump
, "transform operation.");
3320 scalar_dest
= gimple_call_lhs (stmt
);
3321 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3323 prev_stmt_info
= NULL
;
3327 for (j
= 0; j
< ncopies
; ++j
)
3329 /* Build argument list for the vectorized call. */
3331 vargs
= VEC_alloc (tree
, heap
, nargs
);
3333 VEC_truncate (tree
, vargs
, 0);
3335 for (i
= 0; i
< nargs
; i
++)
3337 op
= gimple_call_arg (stmt
, i
);
3340 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
3343 = vect_get_vec_def_for_stmt_copy (dt
[nargs
], vec_oprnd0
);
3345 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
3348 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3349 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3350 gimple_call_set_lhs (new_stmt
, new_temp
);
3352 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3355 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3357 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3359 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3365 for (j
= 0; j
< ncopies
; ++j
)
3367 /* Build argument list for the vectorized call. */
3369 vargs
= VEC_alloc (tree
, heap
, nargs
* 2);
3371 VEC_truncate (tree
, vargs
, 0);
3373 for (i
= 0; i
< nargs
; i
++)
3375 op
= gimple_call_arg (stmt
, i
);
3379 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
3381 = vect_get_vec_def_for_stmt_copy (dt
[nargs
], vec_oprnd0
);
3386 = vect_get_vec_def_for_stmt_copy (dt
[nargs
], vec_oprnd1
);
3388 = vect_get_vec_def_for_stmt_copy (dt
[nargs
], vec_oprnd0
);
3391 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
3392 VEC_quick_push (tree
, vargs
, vec_oprnd1
);
3395 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3396 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3397 gimple_call_set_lhs (new_stmt
, new_temp
);
3399 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3402 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3404 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3406 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3409 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3414 /* No current target implements this case. */
3418 VEC_free (tree
, heap
, vargs
);
3420 /* The call in STMT might prevent it from being removed in dce.
3421 We however cannot remove it here, due to the way the ssa name
3422 it defines is mapped to the new definition. So just replace
3423 rhs of the statement with something harmless. */
3425 type
= TREE_TYPE (scalar_dest
);
3426 new_stmt
= gimple_build_assign (gimple_call_lhs (stmt
),
3427 fold_convert (type
, integer_zero_node
));
3428 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3429 set_vinfo_for_stmt (stmt
, NULL
);
3430 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3431 gsi_replace (gsi
, new_stmt
, false);
3432 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3438 /* Function vect_gen_widened_results_half
3440 Create a vector stmt whose code, type, number of arguments, and result
3441 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3442 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3443 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3444 needs to be created (DECL is a function-decl of a target-builtin).
3445 STMT is the original scalar stmt that we are vectorizing. */
3448 vect_gen_widened_results_half (enum tree_code code
,
3450 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3451 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3459 /* Generate half of the widened result: */
3460 if (code
== CALL_EXPR
)
3462 /* Target specific support */
3463 if (op_type
== binary_op
)
3464 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3466 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3467 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3468 gimple_call_set_lhs (new_stmt
, new_temp
);
3472 /* Generic support */
3473 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3474 if (op_type
!= binary_op
)
3476 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
3478 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3479 gimple_assign_set_lhs (new_stmt
, new_temp
);
3481 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3483 if (code
== CALL_EXPR
)
3485 FOR_EACH_SSA_TREE_OPERAND (sym
, new_stmt
, iter
, SSA_OP_ALL_VIRTUALS
)
3487 if (TREE_CODE (sym
) == SSA_NAME
)
3488 sym
= SSA_NAME_VAR (sym
);
3489 mark_sym_for_renaming (sym
);
3497 /* Check if STMT performs a conversion operation, that can be vectorized.
3498 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3499 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3500 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3503 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
3504 gimple
*vec_stmt
, slp_tree slp_node
)
3509 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3510 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3511 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3512 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3513 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3517 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3518 gimple new_stmt
= NULL
;
3519 stmt_vec_info prev_stmt_info
;
3522 tree vectype_out
, vectype_in
;
3525 tree rhs_type
, lhs_type
;
3527 enum { NARROW
, NONE
, WIDEN
} modifier
;
3529 VEC(tree
,heap
) *vec_oprnds0
= NULL
;
3532 VEC(tree
,heap
) *dummy
= NULL
;
3535 /* Is STMT a vectorizable conversion? */
3537 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3540 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
3543 if (!is_gimple_assign (stmt
))
3546 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3549 code
= gimple_assign_rhs_code (stmt
);
3550 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3553 /* Check types of lhs and rhs. */
3554 op0
= gimple_assign_rhs1 (stmt
);
3555 rhs_type
= TREE_TYPE (op0
);
3556 vectype_in
= get_vectype_for_scalar_type (rhs_type
);
3559 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3561 scalar_dest
= gimple_assign_lhs (stmt
);
3562 lhs_type
= TREE_TYPE (scalar_dest
);
3563 vectype_out
= get_vectype_for_scalar_type (lhs_type
);
3566 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3569 if (nunits_in
== nunits_out
/ 2)
3571 else if (nunits_out
== nunits_in
)
3573 else if (nunits_out
== nunits_in
/ 2)
3578 if (modifier
== NONE
)
3579 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
) == vectype_out
);
3581 /* Bail out if the types are both integral or non-integral. */
3582 if ((INTEGRAL_TYPE_P (rhs_type
) && INTEGRAL_TYPE_P (lhs_type
))
3583 || (!INTEGRAL_TYPE_P (rhs_type
) && !INTEGRAL_TYPE_P (lhs_type
)))
3586 integral_type
= INTEGRAL_TYPE_P (rhs_type
) ? vectype_in
: vectype_out
;
3588 if (modifier
== NARROW
)
3589 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3591 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3593 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3594 this, so we can safely override NCOPIES with 1 here. */
3598 /* Sanity check: make sure that at least one copy of the vectorized stmt
3599 needs to be generated. */
3600 gcc_assert (ncopies
>= 1);
3602 /* Check the operands of the operation. */
3603 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt
[0]))
3605 if (vect_print_dump_info (REPORT_DETAILS
))
3606 fprintf (vect_dump
, "use not simple.");
3610 /* Supportable by target? */
3611 if ((modifier
== NONE
3612 && !targetm
.vectorize
.builtin_conversion (code
, integral_type
))
3613 || (modifier
== WIDEN
3614 && !supportable_widening_operation (code
, stmt
, vectype_in
,
3617 &dummy_int
, &dummy
))
3618 || (modifier
== NARROW
3619 && !supportable_narrowing_operation (code
, stmt
, vectype_in
,
3620 &code1
, &dummy_int
, &dummy
)))
3622 if (vect_print_dump_info (REPORT_DETAILS
))
3623 fprintf (vect_dump
, "conversion not supported by target.");
3627 if (modifier
!= NONE
)
3629 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
3630 /* FORNOW: SLP not supported. */
3631 if (STMT_SLP_TYPE (stmt_info
))
3635 if (!vec_stmt
) /* transformation not required. */
3637 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3642 if (vect_print_dump_info (REPORT_DETAILS
))
3643 fprintf (vect_dump
, "transform conversion.");
3646 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3648 if (modifier
== NONE
&& !slp_node
)
3649 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
3651 prev_stmt_info
= NULL
;
3655 for (j
= 0; j
< ncopies
; j
++)
3661 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
);
3663 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3666 targetm
.vectorize
.builtin_conversion (code
, integral_type
);
3667 for (i
= 0; VEC_iterate (tree
, vec_oprnds0
, i
, vop0
); i
++)
3669 /* Arguments are ready. create the new vector stmt. */
3670 new_stmt
= gimple_build_call (builtin_decl
, 1, vop0
);
3671 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3672 gimple_call_set_lhs (new_stmt
, new_temp
);
3673 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3674 FOR_EACH_SSA_TREE_OPERAND (sym
, new_stmt
, iter
,
3675 SSA_OP_ALL_VIRTUALS
)
3677 if (TREE_CODE (sym
) == SSA_NAME
)
3678 sym
= SSA_NAME_VAR (sym
);
3679 mark_sym_for_renaming (sym
);
3682 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3686 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3688 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3689 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3694 /* In case the vectorization factor (VF) is bigger than the number
3695 of elements that we can fit in a vectype (nunits), we have to
3696 generate more than one vector stmt - i.e - we need to "unroll"
3697 the vector stmt by a factor VF/nunits. */
3698 for (j
= 0; j
< ncopies
; j
++)
3701 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3703 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3705 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
3707 /* Generate first half of the widened result: */
3709 = vect_gen_widened_results_half (code1
, decl1
,
3710 vec_oprnd0
, vec_oprnd1
,
3711 unary_op
, vec_dest
, gsi
, stmt
);
3713 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3715 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3716 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3718 /* Generate second half of the widened result: */
3720 = vect_gen_widened_results_half (code2
, decl2
,
3721 vec_oprnd0
, vec_oprnd1
,
3722 unary_op
, vec_dest
, gsi
, stmt
);
3723 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3724 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3729 /* In case the vectorization factor (VF) is bigger than the number
3730 of elements that we can fit in a vectype (nunits), we have to
3731 generate more than one vector stmt - i.e - we need to "unroll"
3732 the vector stmt by a factor VF/nunits. */
3733 for (j
= 0; j
< ncopies
; j
++)
3738 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3739 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3743 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd1
);
3744 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3747 /* Arguments are ready. Create the new vector stmt. */
3748 expr
= build2 (code1
, vectype_out
, vec_oprnd0
, vec_oprnd1
);
3749 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
, vec_oprnd0
,
3751 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3752 gimple_assign_set_lhs (new_stmt
, new_temp
);
3753 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3756 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3758 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3760 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3763 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3767 VEC_free (tree
, heap
, vec_oprnds0
);
3773 /* Function vectorizable_assignment.
3775 Check if STMT performs an assignment (copy) that can be vectorized.
3776 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3777 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3778 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3781 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
3782 gimple
*vec_stmt
, slp_tree slp_node
)
3787 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3788 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3789 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3793 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3794 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3797 VEC(tree
,heap
) *vec_oprnds
= NULL
;
3800 /* Multiple types in SLP are handled by creating the appropriate number of
3801 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3806 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3808 gcc_assert (ncopies
>= 1);
3810 return false; /* FORNOW */
3812 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3815 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
3818 /* Is vectorizable assignment? */
3819 if (!is_gimple_assign (stmt
))
3822 scalar_dest
= gimple_assign_lhs (stmt
);
3823 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
3826 if (gimple_assign_single_p (stmt
)
3827 || gimple_assign_rhs_code (stmt
) == PAREN_EXPR
)
3828 op
= gimple_assign_rhs1 (stmt
);
3832 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
[0]))
3834 if (vect_print_dump_info (REPORT_DETAILS
))
3835 fprintf (vect_dump
, "use not simple.");
3839 if (!vec_stmt
) /* transformation not required. */
3841 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
3842 if (vect_print_dump_info (REPORT_DETAILS
))
3843 fprintf (vect_dump
, "=== vectorizable_assignment ===");
3844 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
3849 if (vect_print_dump_info (REPORT_DETAILS
))
3850 fprintf (vect_dump
, "transform assignment.");
3853 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3856 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
3858 /* Arguments are ready. create the new vector stmt. */
3859 for (i
= 0; VEC_iterate (tree
, vec_oprnds
, i
, vop
); i
++)
3861 *vec_stmt
= gimple_build_assign (vec_dest
, vop
);
3862 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
3863 gimple_assign_set_lhs (*vec_stmt
, new_temp
);
3864 vect_finish_stmt_generation (stmt
, *vec_stmt
, gsi
);
3865 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
;
3868 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), *vec_stmt
);
3871 VEC_free (tree
, heap
, vec_oprnds
);
3876 /* Function vect_min_worthwhile_factor.
3878 For a loop where we could vectorize the operation indicated by CODE,
3879 return the minimum vectorization factor that makes it worthwhile
3880 to use generic vectors. */
3882 vect_min_worthwhile_factor (enum tree_code code
)
3903 /* Function vectorizable_induction
3905 Check if PHI performs an induction computation that can be vectorized.
3906 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
3907 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
3908 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3911 vectorizable_induction (gimple phi
, gimple_stmt_iterator
*gsi ATTRIBUTE_UNUSED
,
3914 stmt_vec_info stmt_info
= vinfo_for_stmt (phi
);
3915 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3916 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3917 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3918 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3919 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3922 gcc_assert (ncopies
>= 1);
3923 /* FORNOW. This restriction should be relaxed. */
3924 if (nested_in_vect_loop_p (loop
, phi
) && ncopies
> 1)
3926 if (vect_print_dump_info (REPORT_DETAILS
))
3927 fprintf (vect_dump
, "multiple types in nested loop.");
3931 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3934 /* FORNOW: SLP not supported. */
3935 if (STMT_SLP_TYPE (stmt_info
))
3938 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
);
3940 if (gimple_code (phi
) != GIMPLE_PHI
)
3943 if (!vec_stmt
) /* transformation not required. */
3945 STMT_VINFO_TYPE (stmt_info
) = induc_vec_info_type
;
3946 if (vect_print_dump_info (REPORT_DETAILS
))
3947 fprintf (vect_dump
, "=== vectorizable_induction ===");
3948 vect_model_induction_cost (stmt_info
, ncopies
);
3954 if (vect_print_dump_info (REPORT_DETAILS
))
3955 fprintf (vect_dump
, "transform induction phi.");
3957 vec_def
= get_initial_def_for_induction (phi
);
3958 *vec_stmt
= SSA_NAME_DEF_STMT (vec_def
);
3963 /* Function vectorizable_operation.
3965 Check if STMT performs a binary or unary operation that can be vectorized.
3966 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3967 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3968 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3971 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
3972 gimple
*vec_stmt
, slp_tree slp_node
)
3976 tree op0
, op1
= NULL
;
3977 tree vec_oprnd1
= NULL_TREE
;
3978 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3979 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3980 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3981 enum tree_code code
;
3982 enum machine_mode vec_mode
;
3987 enum machine_mode optab_op2_mode
;
3990 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3991 gimple new_stmt
= NULL
;
3992 stmt_vec_info prev_stmt_info
;
3993 int nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3998 VEC(tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
4001 bool shift_p
= false;
4002 bool scalar_shift_arg
= false;
4004 /* Multiple types in SLP are handled by creating the appropriate number of
4005 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4010 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4012 gcc_assert (ncopies
>= 1);
4014 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
4017 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
4020 /* Is STMT a vectorizable binary/unary operation? */
4021 if (!is_gimple_assign (stmt
))
4024 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4027 scalar_dest
= gimple_assign_lhs (stmt
);
4028 vectype_out
= get_vectype_for_scalar_type (TREE_TYPE (scalar_dest
));
4031 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4032 if (nunits_out
!= nunits_in
)
4035 code
= gimple_assign_rhs_code (stmt
);
4037 /* For pointer addition, we should use the normal plus for
4038 the vector addition. */
4039 if (code
== POINTER_PLUS_EXPR
)
4042 /* Support only unary or binary operations. */
4043 op_type
= TREE_CODE_LENGTH (code
);
4044 if (op_type
!= unary_op
&& op_type
!= binary_op
)
4046 if (vect_print_dump_info (REPORT_DETAILS
))
4047 fprintf (vect_dump
, "num. args = %d (not unary/binary op).", op_type
);
4051 op0
= gimple_assign_rhs1 (stmt
);
4052 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt
[0]))
4054 if (vect_print_dump_info (REPORT_DETAILS
))
4055 fprintf (vect_dump
, "use not simple.");
4059 if (op_type
== binary_op
)
4061 op1
= gimple_assign_rhs2 (stmt
);
4062 if (!vect_is_simple_use (op1
, loop_vinfo
, &def_stmt
, &def
, &dt
[1]))
4064 if (vect_print_dump_info (REPORT_DETAILS
))
4065 fprintf (vect_dump
, "use not simple.");
4070 /* If this is a shift/rotate, determine whether the shift amount is a vector,
4071 or scalar. If the shift/rotate amount is a vector, use the vector/vector
4073 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4074 || code
== RROTATE_EXPR
)
4078 /* vector shifted by vector */
4079 if (dt
[1] == vect_loop_def
)
4081 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4082 if (vect_print_dump_info (REPORT_DETAILS
))
4083 fprintf (vect_dump
, "vector/vector shift/rotate found.");
4086 /* See if the machine has a vector shifted by scalar insn and if not
4087 then see if it has a vector shifted by vector insn */
4088 else if (dt
[1] == vect_constant_def
|| dt
[1] == vect_invariant_def
)
4090 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4092 && (optab_handler (optab
, TYPE_MODE (vectype
))->insn_code
4093 != CODE_FOR_nothing
))
4095 scalar_shift_arg
= true;
4096 if (vect_print_dump_info (REPORT_DETAILS
))
4097 fprintf (vect_dump
, "vector/scalar shift/rotate found.");
4101 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4102 if (vect_print_dump_info (REPORT_DETAILS
)
4104 && (optab_handler (optab
, TYPE_MODE (vectype
))->insn_code
4105 != CODE_FOR_nothing
))
4106 fprintf (vect_dump
, "vector/vector shift/rotate found.");
4112 if (vect_print_dump_info (REPORT_DETAILS
))
4113 fprintf (vect_dump
, "operand mode requires invariant argument.");
4118 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4120 /* Supportable by target? */
4123 if (vect_print_dump_info (REPORT_DETAILS
))
4124 fprintf (vect_dump
, "no optab.");
4127 vec_mode
= TYPE_MODE (vectype
);
4128 icode
= (int) optab_handler (optab
, vec_mode
)->insn_code
;
4129 if (icode
== CODE_FOR_nothing
)
4131 if (vect_print_dump_info (REPORT_DETAILS
))
4132 fprintf (vect_dump
, "op not supported by target.");
4133 /* Check only during analysis. */
4134 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4135 || (LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
4136 < vect_min_worthwhile_factor (code
)
4139 if (vect_print_dump_info (REPORT_DETAILS
))
4140 fprintf (vect_dump
, "proceeding using word mode.");
4143 /* Worthwhile without SIMD support? Check only during analysis. */
4144 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4145 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
4146 < vect_min_worthwhile_factor (code
)
4149 if (vect_print_dump_info (REPORT_DETAILS
))
4150 fprintf (vect_dump
, "not worthwhile without SIMD support.");
4154 if (!vec_stmt
) /* transformation not required. */
4156 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4157 if (vect_print_dump_info (REPORT_DETAILS
))
4158 fprintf (vect_dump
, "=== vectorizable_operation ===");
4159 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
4165 if (vect_print_dump_info (REPORT_DETAILS
))
4166 fprintf (vect_dump
, "transform binary/unary operation.");
4169 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4171 /* Allocate VECs for vector operands. In case of SLP, vector operands are
4172 created in the previous stages of the recursion, so no allocation is
4173 needed, except for the case of shift with scalar shift argument. In that
4174 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
4175 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
4176 In case of loop-based vectorization we allocate VECs of size 1. We
4177 allocate VEC_OPRNDS1 only in case of binary operation. */
4180 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
4181 if (op_type
== binary_op
)
4182 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
4184 else if (scalar_shift_arg
)
4185 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
4187 /* In case the vectorization factor (VF) is bigger than the number
4188 of elements that we can fit in a vectype (nunits), we have to generate
4189 more than one vector stmt - i.e - we need to "unroll" the
4190 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4191 from one copy of the vector stmt to the next, in the field
4192 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4193 stages to find the correct vector defs to be used when vectorizing
4194 stmts that use the defs of the current stmt. The example below illustrates
4195 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
4196 4 vectorized stmts):
4198 before vectorization:
4199 RELATED_STMT VEC_STMT
4203 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4205 RELATED_STMT VEC_STMT
4206 VS1_0: vx0 = memref0 VS1_1 -
4207 VS1_1: vx1 = memref1 VS1_2 -
4208 VS1_2: vx2 = memref2 VS1_3 -
4209 VS1_3: vx3 = memref3 - -
4210 S1: x = load - VS1_0
4213 step2: vectorize stmt S2 (done here):
4214 To vectorize stmt S2 we first need to find the relevant vector
4215 def for the first operand 'x'. This is, as usual, obtained from
4216 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4217 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4218 relevant vector def 'vx0'. Having found 'vx0' we can generate
4219 the vector stmt VS2_0, and as usual, record it in the
4220 STMT_VINFO_VEC_STMT of stmt S2.
4221 When creating the second copy (VS2_1), we obtain the relevant vector
4222 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4223 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4224 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4225 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4226 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4227 chain of stmts and pointers:
4228 RELATED_STMT VEC_STMT
4229 VS1_0: vx0 = memref0 VS1_1 -
4230 VS1_1: vx1 = memref1 VS1_2 -
4231 VS1_2: vx2 = memref2 VS1_3 -
4232 VS1_3: vx3 = memref3 - -
4233 S1: x = load - VS1_0
4234 VS2_0: vz0 = vx0 + v1 VS2_1 -
4235 VS2_1: vz1 = vx1 + v1 VS2_2 -
4236 VS2_2: vz2 = vx2 + v1 VS2_3 -
4237 VS2_3: vz3 = vx3 + v1 - -
4238 S2: z = x + 1 - VS2_0 */
4240 prev_stmt_info
= NULL
;
4241 for (j
= 0; j
< ncopies
; j
++)
4246 if (op_type
== binary_op
&& scalar_shift_arg
)
4248 /* Vector shl and shr insn patterns can be defined with scalar
4249 operand 2 (shift operand). In this case, use constant or loop
4250 invariant op1 directly, without extending it to vector mode
4252 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4253 if (!VECTOR_MODE_P (optab_op2_mode
))
4255 if (vect_print_dump_info (REPORT_DETAILS
))
4256 fprintf (vect_dump
, "operand 1 using scalar mode.");
4258 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
4261 /* Store vec_oprnd1 for every vector stmt to be created
4262 for SLP_NODE. We check during the analysis that all the
4263 shift arguments are the same.
4264 TODO: Allow different constants for different vector
4265 stmts generated for an SLP instance. */
4266 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4267 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
4272 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4273 (a special case for certain kind of vector shifts); otherwise,
4274 operand 1 should be of a vector type (the usual case). */
4275 if (op_type
== binary_op
&& !vec_oprnd1
)
4276 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4279 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4283 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4285 /* Arguments are ready. Create the new vector stmt. */
4286 for (i
= 0; VEC_iterate (tree
, vec_oprnds0
, i
, vop0
); i
++)
4288 vop1
= ((op_type
== binary_op
)
4289 ? VEC_index (tree
, vec_oprnds1
, i
) : NULL
);
4290 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
4291 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4292 gimple_assign_set_lhs (new_stmt
, new_temp
);
4293 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4295 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
4302 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4304 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4305 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4308 VEC_free (tree
, heap
, vec_oprnds0
);
4310 VEC_free (tree
, heap
, vec_oprnds1
);
4316 /* Get vectorized definitions for loop-based vectorization. For the first
4317 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4318 scalar operand), and for the rest we get a copy with
4319 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4320 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4321 The vectors are collected into VEC_OPRNDS. */
4324 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
4325 VEC (tree
, heap
) **vec_oprnds
, int multi_step_cvt
)
4329 /* Get first vector operand. */
4330 /* All the vector operands except the very first one (that is scalar oprnd)
4332 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4333 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
4335 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
4337 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
4339 /* Get second vector operand. */
4340 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
4341 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
4345 /* For conversion in multiple steps, continue to get operands
4348 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
4352 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4353 For multi-step conversions store the resulting vectors and call the function
4357 vect_create_vectorized_demotion_stmts (VEC (tree
, heap
) **vec_oprnds
,
4358 int multi_step_cvt
, gimple stmt
,
4359 VEC (tree
, heap
) *vec_dsts
,
4360 gimple_stmt_iterator
*gsi
,
4361 slp_tree slp_node
, enum tree_code code
,
4362 stmt_vec_info
*prev_stmt_info
)
4365 tree vop0
, vop1
, new_tmp
, vec_dest
;
4367 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4369 vec_dest
= VEC_pop (tree
, vec_dsts
);
4371 for (i
= 0; i
< VEC_length (tree
, *vec_oprnds
); i
+= 2)
4373 /* Create demotion operation. */
4374 vop0
= VEC_index (tree
, *vec_oprnds
, i
);
4375 vop1
= VEC_index (tree
, *vec_oprnds
, i
+ 1);
4376 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
4377 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4378 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4379 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4382 /* Store the resulting vector for next recursive call. */
4383 VEC_replace (tree
, *vec_oprnds
, i
/2, new_tmp
);
4386 /* This is the last step of the conversion sequence. Store the
4387 vectors in SLP_NODE or in vector info of the scalar statement
4388 (or in STMT_VINFO_RELATED_STMT chain). */
4390 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
4393 if (!*prev_stmt_info
)
4394 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4396 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
4398 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4403 /* For multi-step demotion operations we first generate demotion operations
4404 from the source type to the intermediate types, and then combine the
4405 results (stored in VEC_OPRNDS) in demotion operation to the destination
4409 /* At each level of recursion we have have of the operands we had at the
4411 VEC_truncate (tree
, *vec_oprnds
, (i
+1)/2);
4412 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
4413 stmt
, vec_dsts
, gsi
, slp_node
,
4414 code
, prev_stmt_info
);
4419 /* Function vectorizable_type_demotion
4421 Check if STMT performs a binary or unary operation that involves
4422 type demotion, and if it can be vectorized.
4423 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4424 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4425 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4428 vectorizable_type_demotion (gimple stmt
, gimple_stmt_iterator
*gsi
,
4429 gimple
*vec_stmt
, slp_tree slp_node
)
4434 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4435 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4436 enum tree_code code
, code1
= ERROR_MARK
;
4439 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4440 stmt_vec_info prev_stmt_info
;
4447 int multi_step_cvt
= 0;
4448 VEC (tree
, heap
) *vec_oprnds0
= NULL
;
4449 VEC (tree
, heap
) *vec_dsts
= NULL
, *interm_types
= NULL
, *tmp_vec_dsts
= NULL
;
4450 tree last_oprnd
, intermediate_type
;
4452 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
4455 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
4458 /* Is STMT a vectorizable type-demotion operation? */
4459 if (!is_gimple_assign (stmt
))
4462 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4465 code
= gimple_assign_rhs_code (stmt
);
4466 if (!CONVERT_EXPR_CODE_P (code
))
4469 op0
= gimple_assign_rhs1 (stmt
);
4470 vectype_in
= get_vectype_for_scalar_type (TREE_TYPE (op0
));
4473 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4475 scalar_dest
= gimple_assign_lhs (stmt
);
4476 vectype_out
= get_vectype_for_scalar_type (TREE_TYPE (scalar_dest
));
4479 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4480 if (nunits_in
>= nunits_out
)
4483 /* Multiple types in SLP are handled by creating the appropriate number of
4484 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4489 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
4491 gcc_assert (ncopies
>= 1);
4493 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4494 && INTEGRAL_TYPE_P (TREE_TYPE (op0
)))
4495 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest
))
4496 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0
))
4497 && CONVERT_EXPR_CODE_P (code
))))
4500 /* Check the operands of the operation. */
4501 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt
[0]))
4503 if (vect_print_dump_info (REPORT_DETAILS
))
4504 fprintf (vect_dump
, "use not simple.");
4508 /* Supportable by target? */
4509 if (!supportable_narrowing_operation (code
, stmt
, vectype_in
, &code1
,
4510 &multi_step_cvt
, &interm_types
))
4513 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
4515 if (!vec_stmt
) /* transformation not required. */
4517 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4518 if (vect_print_dump_info (REPORT_DETAILS
))
4519 fprintf (vect_dump
, "=== vectorizable_demotion ===");
4520 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
4525 if (vect_print_dump_info (REPORT_DETAILS
))
4526 fprintf (vect_dump
, "transform type demotion operation. ncopies = %d.",
4529 /* In case of multi-step demotion, we first generate demotion operations to
4530 the intermediate types, and then from that types to the final one.
4531 We create vector destinations for the intermediate type (TYPES) received
4532 from supportable_narrowing_operation, and store them in the correct order
4533 for future use in vect_create_vectorized_demotion_stmts(). */
4535 vec_dsts
= VEC_alloc (tree
, heap
, multi_step_cvt
+ 1);
4537 vec_dsts
= VEC_alloc (tree
, heap
, 1);
4539 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
4540 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
4544 for (i
= VEC_length (tree
, interm_types
) - 1;
4545 VEC_iterate (tree
, interm_types
, i
, intermediate_type
); i
--)
4547 vec_dest
= vect_create_destination_var (scalar_dest
,
4549 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
4553 /* In case the vectorization factor (VF) is bigger than the number
4554 of elements that we can fit in a vectype (nunits), we have to generate
4555 more than one vector stmt - i.e - we need to "unroll" the
4556 vector stmt by a factor VF/nunits. */
4558 prev_stmt_info
= NULL
;
4559 for (j
= 0; j
< ncopies
; j
++)
4563 vect_get_slp_defs (slp_node
, &vec_oprnds0
, NULL
);
4566 VEC_free (tree
, heap
, vec_oprnds0
);
4567 vec_oprnds0
= VEC_alloc (tree
, heap
,
4568 (multi_step_cvt
? vect_pow2 (multi_step_cvt
) * 2 : 2));
4569 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4570 vect_pow2 (multi_step_cvt
) - 1);
4573 /* Arguments are ready. Create the new vector stmts. */
4574 tmp_vec_dsts
= VEC_copy (tree
, heap
, vec_dsts
);
4575 vect_create_vectorized_demotion_stmts (&vec_oprnds0
,
4576 multi_step_cvt
, stmt
, tmp_vec_dsts
,
4577 gsi
, slp_node
, code1
,
4581 VEC_free (tree
, heap
, vec_oprnds0
);
4582 VEC_free (tree
, heap
, vec_dsts
);
4583 VEC_free (tree
, heap
, tmp_vec_dsts
);
4584 VEC_free (tree
, heap
, interm_types
);
4586 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4591 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4592 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4593 the resulting vectors and call the function recursively. */
4596 vect_create_vectorized_promotion_stmts (VEC (tree
, heap
) **vec_oprnds0
,
4597 VEC (tree
, heap
) **vec_oprnds1
,
4598 int multi_step_cvt
, gimple stmt
,
4599 VEC (tree
, heap
) *vec_dsts
,
4600 gimple_stmt_iterator
*gsi
,
4601 slp_tree slp_node
, enum tree_code code1
,
4602 enum tree_code code2
, tree decl1
,
4603 tree decl2
, int op_type
,
4604 stmt_vec_info
*prev_stmt_info
)
4607 tree vop0
, vop1
, new_tmp1
, new_tmp2
, vec_dest
;
4608 gimple new_stmt1
, new_stmt2
;
4609 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4610 VEC (tree
, heap
) *vec_tmp
;
4612 vec_dest
= VEC_pop (tree
, vec_dsts
);
4613 vec_tmp
= VEC_alloc (tree
, heap
, VEC_length (tree
, *vec_oprnds0
) * 2);
4615 for (i
= 0; VEC_iterate (tree
, *vec_oprnds0
, i
, vop0
); i
++)
4617 if (op_type
== binary_op
)
4618 vop1
= VEC_index (tree
, *vec_oprnds1
, i
);
4622 /* Generate the two halves of promotion operation. */
4623 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4624 op_type
, vec_dest
, gsi
, stmt
);
4625 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4626 op_type
, vec_dest
, gsi
, stmt
);
4627 if (is_gimple_call (new_stmt1
))
4629 new_tmp1
= gimple_call_lhs (new_stmt1
);
4630 new_tmp2
= gimple_call_lhs (new_stmt2
);
4634 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4635 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4640 /* Store the results for the recursive call. */
4641 VEC_quick_push (tree
, vec_tmp
, new_tmp1
);
4642 VEC_quick_push (tree
, vec_tmp
, new_tmp2
);
4646 /* Last step of promotion sequience - store the results. */
4649 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt1
);
4650 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt2
);
4654 if (!*prev_stmt_info
)
4655 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt1
;
4657 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt1
;
4659 *prev_stmt_info
= vinfo_for_stmt (new_stmt1
);
4660 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt2
;
4661 *prev_stmt_info
= vinfo_for_stmt (new_stmt2
);
4668 /* For multi-step promotion operation we first generate we call the
4669 function recurcively for every stage. We start from the input type,
4670 create promotion operations to the intermediate types, and then
4671 create promotions to the output type. */
4672 *vec_oprnds0
= VEC_copy (tree
, heap
, vec_tmp
);
4673 VEC_free (tree
, heap
, vec_tmp
);
4674 vect_create_vectorized_promotion_stmts (vec_oprnds0
, vec_oprnds1
,
4675 multi_step_cvt
- 1, stmt
,
4676 vec_dsts
, gsi
, slp_node
, code1
,
4677 code2
, decl2
, decl2
, op_type
,
4683 /* Function vectorizable_type_promotion
4685 Check if STMT performs a binary or unary operation that involves
4686 type promotion, and if it can be vectorized.
4687 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4688 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4689 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4692 vectorizable_type_promotion (gimple stmt
, gimple_stmt_iterator
*gsi
,
4693 gimple
*vec_stmt
, slp_tree slp_node
)
4697 tree op0
, op1
= NULL
;
4698 tree vec_oprnd0
=NULL
, vec_oprnd1
=NULL
;
4699 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4700 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4701 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4702 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4706 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4707 stmt_vec_info prev_stmt_info
;
4714 tree intermediate_type
= NULL_TREE
;
4715 int multi_step_cvt
= 0;
4716 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
4717 VEC (tree
, heap
) *vec_dsts
= NULL
, *interm_types
= NULL
, *tmp_vec_dsts
= NULL
;
4719 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
4722 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
4725 /* Is STMT a vectorizable type-promotion operation? */
4726 if (!is_gimple_assign (stmt
))
4729 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4732 code
= gimple_assign_rhs_code (stmt
);
4733 if (!CONVERT_EXPR_CODE_P (code
)
4734 && code
!= WIDEN_MULT_EXPR
)
4737 op0
= gimple_assign_rhs1 (stmt
);
4738 vectype_in
= get_vectype_for_scalar_type (TREE_TYPE (op0
));
4741 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4743 scalar_dest
= gimple_assign_lhs (stmt
);
4744 vectype_out
= get_vectype_for_scalar_type (TREE_TYPE (scalar_dest
));
4747 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4748 if (nunits_in
<= nunits_out
)
4751 /* Multiple types in SLP are handled by creating the appropriate number of
4752 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4757 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4759 gcc_assert (ncopies
>= 1);
4761 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4762 && INTEGRAL_TYPE_P (TREE_TYPE (op0
)))
4763 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest
))
4764 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0
))
4765 && CONVERT_EXPR_CODE_P (code
))))
4768 /* Check the operands of the operation. */
4769 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt
[0]))
4771 if (vect_print_dump_info (REPORT_DETAILS
))
4772 fprintf (vect_dump
, "use not simple.");
4776 op_type
= TREE_CODE_LENGTH (code
);
4777 if (op_type
== binary_op
)
4779 op1
= gimple_assign_rhs2 (stmt
);
4780 if (!vect_is_simple_use (op1
, loop_vinfo
, &def_stmt
, &def
, &dt
[1]))
4782 if (vect_print_dump_info (REPORT_DETAILS
))
4783 fprintf (vect_dump
, "use not simple.");
4788 /* Supportable by target? */
4789 if (!supportable_widening_operation (code
, stmt
, vectype_in
,
4790 &decl1
, &decl2
, &code1
, &code2
,
4791 &multi_step_cvt
, &interm_types
))
4794 /* Binary widening operation can only be supported directly by the
4796 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4798 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
4800 if (!vec_stmt
) /* transformation not required. */
4802 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4803 if (vect_print_dump_info (REPORT_DETAILS
))
4804 fprintf (vect_dump
, "=== vectorizable_promotion ===");
4805 vect_model_simple_cost (stmt_info
, 2*ncopies
, dt
, NULL
);
4811 if (vect_print_dump_info (REPORT_DETAILS
))
4812 fprintf (vect_dump
, "transform type promotion operation. ncopies = %d.",
4816 /* In case of multi-step promotion, we first generate promotion operations
4817 to the intermediate types, and then from that types to the final one.
4818 We store vector destination in VEC_DSTS in the correct order for
4819 recursive creation of promotion operations in
4820 vect_create_vectorized_promotion_stmts(). Vector destinations are created
4821 according to TYPES recieved from supportable_widening_operation(). */
4823 vec_dsts
= VEC_alloc (tree
, heap
, multi_step_cvt
+ 1);
4825 vec_dsts
= VEC_alloc (tree
, heap
, 1);
4827 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
4828 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
4832 for (i
= VEC_length (tree
, interm_types
) - 1;
4833 VEC_iterate (tree
, interm_types
, i
, intermediate_type
); i
--)
4835 vec_dest
= vect_create_destination_var (scalar_dest
,
4837 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
4843 vec_oprnds0
= VEC_alloc (tree
, heap
,
4844 (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4845 if (op_type
== binary_op
)
4846 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
4849 /* In case the vectorization factor (VF) is bigger than the number
4850 of elements that we can fit in a vectype (nunits), we have to generate
4851 more than one vector stmt - i.e - we need to "unroll" the
4852 vector stmt by a factor VF/nunits. */
4854 prev_stmt_info
= NULL
;
4855 for (j
= 0; j
< ncopies
; j
++)
4861 vect_get_slp_defs (slp_node
, &vec_oprnds0
, &vec_oprnds1
);
4864 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
4865 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
4866 if (op_type
== binary_op
)
4868 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
4869 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
4875 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4876 VEC_replace (tree
, vec_oprnds0
, 0, vec_oprnd0
);
4877 if (op_type
== binary_op
)
4879 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd1
);
4880 VEC_replace (tree
, vec_oprnds1
, 0, vec_oprnd1
);
4884 /* Arguments are ready. Create the new vector stmts. */
4885 tmp_vec_dsts
= VEC_copy (tree
, heap
, vec_dsts
);
4886 vect_create_vectorized_promotion_stmts (&vec_oprnds0
, &vec_oprnds1
,
4887 multi_step_cvt
, stmt
,
4889 gsi
, slp_node
, code1
, code2
,
4890 decl1
, decl2
, op_type
,
4894 VEC_free (tree
, heap
, vec_dsts
);
4895 VEC_free (tree
, heap
, tmp_vec_dsts
);
4896 VEC_free (tree
, heap
, interm_types
);
4897 VEC_free (tree
, heap
, vec_oprnds0
);
4898 VEC_free (tree
, heap
, vec_oprnds1
);
4900 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4905 /* Function vect_strided_store_supported.
4907 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
4908 and FALSE otherwise. */
4911 vect_strided_store_supported (tree vectype
)
4913 optab interleave_high_optab
, interleave_low_optab
;
4916 mode
= (int) TYPE_MODE (vectype
);
4918 /* Check that the operation is supported. */
4919 interleave_high_optab
= optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR
,
4920 vectype
, optab_default
);
4921 interleave_low_optab
= optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR
,
4922 vectype
, optab_default
);
4923 if (!interleave_high_optab
|| !interleave_low_optab
)
4925 if (vect_print_dump_info (REPORT_DETAILS
))
4926 fprintf (vect_dump
, "no optab for interleave.");
4930 if (optab_handler (interleave_high_optab
, mode
)->insn_code
4932 || optab_handler (interleave_low_optab
, mode
)->insn_code
4933 == CODE_FOR_nothing
)
4935 if (vect_print_dump_info (REPORT_DETAILS
))
4936 fprintf (vect_dump
, "interleave op not supported by target.");
4944 /* Function vect_permute_store_chain.
4946 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4947 a power of 2, generate interleave_high/low stmts to reorder the data
4948 correctly for the stores. Return the final references for stores in
4951 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4952 The input is 4 vectors each containing 8 elements. We assign a number to each
4953 element, the input sequence is:
4955 1st vec: 0 1 2 3 4 5 6 7
4956 2nd vec: 8 9 10 11 12 13 14 15
4957 3rd vec: 16 17 18 19 20 21 22 23
4958 4th vec: 24 25 26 27 28 29 30 31
4960 The output sequence should be:
4962 1st vec: 0 8 16 24 1 9 17 25
4963 2nd vec: 2 10 18 26 3 11 19 27
4964 3rd vec: 4 12 20 28 5 13 21 30
4965 4th vec: 6 14 22 30 7 15 23 31
4967 i.e., we interleave the contents of the four vectors in their order.
4969 We use interleave_high/low instructions to create such output. The input of
4970 each interleave_high/low operation is two vectors:
4973 the even elements of the result vector are obtained left-to-right from the
4974 high/low elements of the first vector. The odd elements of the result are
4975 obtained left-to-right from the high/low elements of the second vector.
4976 The output of interleave_high will be: 0 4 1 5
4977 and of interleave_low: 2 6 3 7
4980 The permutation is done in log LENGTH stages. In each stage interleave_high
4981 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4982 where the first argument is taken from the first half of DR_CHAIN and the
4983 second argument from it's second half.
4986 I1: interleave_high (1st vec, 3rd vec)
4987 I2: interleave_low (1st vec, 3rd vec)
4988 I3: interleave_high (2nd vec, 4th vec)
4989 I4: interleave_low (2nd vec, 4th vec)
4991 The output for the first stage is:
4993 I1: 0 16 1 17 2 18 3 19
4994 I2: 4 20 5 21 6 22 7 23
4995 I3: 8 24 9 25 10 26 11 27
4996 I4: 12 28 13 29 14 30 15 31
4998 The output of the second stage, i.e. the final result is:
5000 I1: 0 8 16 24 1 9 17 25
5001 I2: 2 10 18 26 3 11 19 27
5002 I3: 4 12 20 28 5 13 21 30
5003 I4: 6 14 22 30 7 15 23 31. */
5006 vect_permute_store_chain (VEC(tree
,heap
) *dr_chain
,
5007 unsigned int length
,
5009 gimple_stmt_iterator
*gsi
,
5010 VEC(tree
,heap
) **result_chain
)
5012 tree perm_dest
, vect1
, vect2
, high
, low
;
5014 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
5018 enum tree_code high_code
, low_code
;
5020 scalar_dest
= gimple_assign_lhs (stmt
);
5022 /* Check that the operation is supported. */
5023 if (!vect_strided_store_supported (vectype
))
5026 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
5028 for (i
= 0; i
< exact_log2 (length
); i
++)
5030 for (j
= 0; j
< length
/2; j
++)
5032 vect1
= VEC_index (tree
, dr_chain
, j
);
5033 vect2
= VEC_index (tree
, dr_chain
, j
+length
/2);
5035 /* Create interleaving stmt:
5036 in the case of big endian:
5037 high = interleave_high (vect1, vect2)
5038 and in the case of little endian:
5039 high = interleave_low (vect1, vect2). */
5040 perm_dest
= create_tmp_var (vectype
, "vect_inter_high");
5041 DECL_GIMPLE_REG_P (perm_dest
) = 1;
5042 add_referenced_var (perm_dest
);
5043 if (BYTES_BIG_ENDIAN
)
5045 high_code
= VEC_INTERLEAVE_HIGH_EXPR
;
5046 low_code
= VEC_INTERLEAVE_LOW_EXPR
;
5050 low_code
= VEC_INTERLEAVE_HIGH_EXPR
;
5051 high_code
= VEC_INTERLEAVE_LOW_EXPR
;
5053 perm_stmt
= gimple_build_assign_with_ops (high_code
, perm_dest
,
5055 high
= make_ssa_name (perm_dest
, perm_stmt
);
5056 gimple_assign_set_lhs (perm_stmt
, high
);
5057 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5058 VEC_replace (tree
, *result_chain
, 2*j
, high
);
5060 /* Create interleaving stmt:
5061 in the case of big endian:
5062 low = interleave_low (vect1, vect2)
5063 and in the case of little endian:
5064 low = interleave_high (vect1, vect2). */
5065 perm_dest
= create_tmp_var (vectype
, "vect_inter_low");
5066 DECL_GIMPLE_REG_P (perm_dest
) = 1;
5067 add_referenced_var (perm_dest
);
5068 perm_stmt
= gimple_build_assign_with_ops (low_code
, perm_dest
,
5070 low
= make_ssa_name (perm_dest
, perm_stmt
);
5071 gimple_assign_set_lhs (perm_stmt
, low
);
5072 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5073 VEC_replace (tree
, *result_chain
, 2*j
+1, low
);
5075 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
5081 /* Function vectorizable_store.
5083 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5085 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5086 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5087 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5090 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5096 tree vec_oprnd
= NULL_TREE
;
5097 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5098 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5099 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5100 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5101 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5102 enum machine_mode vec_mode
;
5104 enum dr_alignment_support alignment_support_scheme
;
5107 enum vect_def_type dt
;
5108 stmt_vec_info prev_stmt_info
= NULL
;
5109 tree dataref_ptr
= NULL_TREE
;
5110 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5113 gimple next_stmt
, first_stmt
= NULL
;
5114 bool strided_store
= false;
5115 unsigned int group_size
, i
;
5116 VEC(tree
,heap
) *dr_chain
= NULL
, *oprnds
= NULL
, *result_chain
= NULL
;
5118 VEC(tree
,heap
) *vec_oprnds
= NULL
;
5119 bool slp
= (slp_node
!= NULL
);
5120 stmt_vec_info first_stmt_vinfo
;
5121 unsigned int vec_num
;
5123 /* Multiple types in SLP are handled by creating the appropriate number of
5124 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5129 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5131 gcc_assert (ncopies
>= 1);
5133 /* FORNOW. This restriction should be relaxed. */
5134 if (nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5136 if (vect_print_dump_info (REPORT_DETAILS
))
5137 fprintf (vect_dump
, "multiple types in nested loop.");
5141 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
5144 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
5147 /* Is vectorizable store? */
5149 if (!is_gimple_assign (stmt
))
5152 scalar_dest
= gimple_assign_lhs (stmt
);
5153 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5154 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5155 && !STMT_VINFO_STRIDED_ACCESS (stmt_info
))
5158 gcc_assert (gimple_assign_single_p (stmt
));
5159 op
= gimple_assign_rhs1 (stmt
);
5160 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
5162 if (vect_print_dump_info (REPORT_DETAILS
))
5163 fprintf (vect_dump
, "use not simple.");
5167 /* If accesses through a pointer to vectype do not alias the original
5168 memory reference we have a problem. */
5169 if (get_alias_set (vectype
) != get_alias_set (TREE_TYPE (scalar_dest
))
5170 && !alias_set_subset_of (get_alias_set (vectype
),
5171 get_alias_set (TREE_TYPE (scalar_dest
))))
5173 if (vect_print_dump_info (REPORT_DETAILS
))
5174 fprintf (vect_dump
, "vector type does not alias scalar type");
5178 if (!useless_type_conversion_p (TREE_TYPE (op
), TREE_TYPE (scalar_dest
)))
5180 if (vect_print_dump_info (REPORT_DETAILS
))
5181 fprintf (vect_dump
, "operands of different types");
5185 vec_mode
= TYPE_MODE (vectype
);
5186 /* FORNOW. In some cases can vectorize even if data-type not supported
5187 (e.g. - array initialization with 0). */
5188 if (optab_handler (mov_optab
, (int)vec_mode
)->insn_code
== CODE_FOR_nothing
)
5191 if (!STMT_VINFO_DATA_REF (stmt_info
))
5194 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
5196 strided_store
= true;
5197 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
5198 if (!vect_strided_store_supported (vectype
)
5199 && !PURE_SLP_STMT (stmt_info
) && !slp
)
5202 if (first_stmt
== stmt
)
5204 /* STMT is the leader of the group. Check the operands of all the
5205 stmts of the group. */
5206 next_stmt
= DR_GROUP_NEXT_DR (stmt_info
);
5209 gcc_assert (gimple_assign_single_p (next_stmt
));
5210 op
= gimple_assign_rhs1 (next_stmt
);
5211 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
5213 if (vect_print_dump_info (REPORT_DETAILS
))
5214 fprintf (vect_dump
, "use not simple.");
5217 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
5222 if (!vec_stmt
) /* transformation not required. */
5224 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5225 vect_model_store_cost (stmt_info
, ncopies
, dt
, NULL
);
5233 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5234 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5236 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5239 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5241 /* We vectorize all the stmts of the interleaving group when we
5242 reach the last stmt in the group. */
5243 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5244 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5252 strided_store
= false;
5254 /* VEC_NUM is the number of vect stmts to be created for this group. */
5256 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5258 vec_num
= group_size
;
5264 group_size
= vec_num
= 1;
5265 first_stmt_vinfo
= stmt_info
;
5268 if (vect_print_dump_info (REPORT_DETAILS
))
5269 fprintf (vect_dump
, "transform store. ncopies = %d",ncopies
);
5271 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
5272 oprnds
= VEC_alloc (tree
, heap
, group_size
);
5274 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
);
5275 gcc_assert (alignment_support_scheme
);
5276 gcc_assert (alignment_support_scheme
== dr_aligned
); /* FORNOW */
5278 /* In case the vectorization factor (VF) is bigger than the number
5279 of elements that we can fit in a vectype (nunits), we have to generate
5280 more than one vector stmt - i.e - we need to "unroll" the
5281 vector stmt by a factor VF/nunits. For more details see documentation in
5282 vect_get_vec_def_for_copy_stmt. */
5284 /* In case of interleaving (non-unit strided access):
5291 We create vectorized stores starting from base address (the access of the
5292 first stmt in the chain (S2 in the above example), when the last store stmt
5293 of the chain (S4) is reached:
5296 VS2: &base + vec_size*1 = vx0
5297 VS3: &base + vec_size*2 = vx1
5298 VS4: &base + vec_size*3 = vx3
5300 Then permutation statements are generated:
5302 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
5303 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
5306 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5307 (the order of the data-refs in the output of vect_permute_store_chain
5308 corresponds to the order of scalar stmts in the interleaving chain - see
5309 the documentation of vect_permute_store_chain()).
5311 In case of both multiple types and interleaving, above vector stores and
5312 permutation stmts are created for every copy. The result vector stmts are
5313 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5314 STMT_VINFO_RELATED_STMT for the next copies.
5317 prev_stmt_info
= NULL
;
5318 for (j
= 0; j
< ncopies
; j
++)
5327 /* Get vectorized arguments for SLP_NODE. */
5328 vect_get_slp_defs (slp_node
, &vec_oprnds
, NULL
);
5330 vec_oprnd
= VEC_index (tree
, vec_oprnds
, 0);
5334 /* For interleaved stores we collect vectorized defs for all the
5335 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5336 used as an input to vect_permute_store_chain(), and OPRNDS as
5337 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5339 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
5340 OPRNDS are of size 1. */
5341 next_stmt
= first_stmt
;
5342 for (i
= 0; i
< group_size
; i
++)
5344 /* Since gaps are not supported for interleaved stores,
5345 GROUP_SIZE is the exact number of stmts in the chain.
5346 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5347 there is no interleaving, GROUP_SIZE is 1, and only one
5348 iteration of the loop will be executed. */
5349 gcc_assert (next_stmt
);
5350 gcc_assert (gimple_assign_single_p (next_stmt
));
5351 op
= gimple_assign_rhs1 (next_stmt
);
5353 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5355 VEC_quick_push(tree
, dr_chain
, vec_oprnd
);
5356 VEC_quick_push(tree
, oprnds
, vec_oprnd
);
5357 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
5361 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, NULL
, NULL_TREE
,
5362 &dummy
, &ptr_incr
, false,
5364 gcc_assert (!inv_p
);
5368 /* For interleaved stores we created vectorized defs for all the
5369 defs stored in OPRNDS in the previous iteration (previous copy).
5370 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5371 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5373 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
5374 OPRNDS are of size 1. */
5375 for (i
= 0; i
< group_size
; i
++)
5377 op
= VEC_index (tree
, oprnds
, i
);
5378 vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
5379 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5380 VEC_replace(tree
, dr_chain
, i
, vec_oprnd
);
5381 VEC_replace(tree
, oprnds
, i
, vec_oprnd
);
5384 bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
, NULL_TREE
);
5389 result_chain
= VEC_alloc (tree
, heap
, group_size
);
5391 if (!vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5396 next_stmt
= first_stmt
;
5397 for (i
= 0; i
< vec_num
; i
++)
5400 /* Bump the vector pointer. */
5401 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5405 vec_oprnd
= VEC_index (tree
, vec_oprnds
, i
);
5406 else if (strided_store
)
5407 /* For strided stores vectorized defs are interleaved in
5408 vect_permute_store_chain(). */
5409 vec_oprnd
= VEC_index (tree
, result_chain
, i
);
5411 data_ref
= build_fold_indirect_ref (dataref_ptr
);
5412 /* Arguments are ready. Create the new vector stmt. */
5413 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
5414 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5415 mark_symbols_for_renaming (new_stmt
);
5421 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5423 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5425 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5426 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
5432 VEC_free (tree
, heap
, dr_chain
);
5433 VEC_free (tree
, heap
, oprnds
);
5435 VEC_free (tree
, heap
, result_chain
);
5441 /* Function vect_setup_realignment
5443 This function is called when vectorizing an unaligned load using
5444 the dr_explicit_realign[_optimized] scheme.
5445 This function generates the following code at the loop prolog:
5448 x msq_init = *(floor(p)); # prolog load
5449 realignment_token = call target_builtin;
5451 x msq = phi (msq_init, ---)
5453 The stmts marked with x are generated only for the case of
5454 dr_explicit_realign_optimized.
5456 The code above sets up a new (vector) pointer, pointing to the first
5457 location accessed by STMT, and a "floor-aligned" load using that pointer.
5458 It also generates code to compute the "realignment-token" (if the relevant
5459 target hook was defined), and creates a phi-node at the loop-header bb
5460 whose arguments are the result of the prolog-load (created by this
5461 function) and the result of a load that takes place in the loop (to be
5462 created by the caller to this function).
5464 For the case of dr_explicit_realign_optimized:
5465 The caller to this function uses the phi-result (msq) to create the
5466 realignment code inside the loop, and sets up the missing phi argument,
5469 msq = phi (msq_init, lsq)
5470 lsq = *(floor(p')); # load in loop
5471 result = realign_load (msq, lsq, realignment_token);
5473 For the case of dr_explicit_realign:
5475 msq = *(floor(p)); # load in loop
5477 lsq = *(floor(p')); # load in loop
5478 result = realign_load (msq, lsq, realignment_token);
5481 STMT - (scalar) load stmt to be vectorized. This load accesses
5482 a memory location that may be unaligned.
5483 BSI - place where new code is to be inserted.
5484 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
5488 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
5489 target hook, if defined.
5490 Return value - the result of the loop-header phi node. */
5493 vect_setup_realignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
5494 tree
*realignment_token
,
5495 enum dr_alignment_support alignment_support_scheme
,
5497 struct loop
**at_loop
)
5499 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5500 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5501 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5502 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5504 tree scalar_dest
= gimple_assign_lhs (stmt
);
5511 tree msq_init
= NULL_TREE
;
5514 tree msq
= NULL_TREE
;
5515 gimple_seq stmts
= NULL
;
5517 bool compute_in_loop
= false;
5518 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5519 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
5520 struct loop
*loop_for_initial_load
;
5522 gcc_assert (alignment_support_scheme
== dr_explicit_realign
5523 || alignment_support_scheme
== dr_explicit_realign_optimized
);
5525 /* We need to generate three things:
5526 1. the misalignment computation
5527 2. the extra vector load (for the optimized realignment scheme).
5528 3. the phi node for the two vectors from which the realignment is
5529 done (for the optimized realignment scheme).
5532 /* 1. Determine where to generate the misalignment computation.
5534 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
5535 calculation will be generated by this function, outside the loop (in the
5536 preheader). Otherwise, INIT_ADDR had already been computed for us by the
5537 caller, inside the loop.
5539 Background: If the misalignment remains fixed throughout the iterations of
5540 the loop, then both realignment schemes are applicable, and also the
5541 misalignment computation can be done outside LOOP. This is because we are
5542 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
5543 are a multiple of VS (the Vector Size), and therefore the misalignment in
5544 different vectorized LOOP iterations is always the same.
5545 The problem arises only if the memory access is in an inner-loop nested
5546 inside LOOP, which is now being vectorized using outer-loop vectorization.
5547 This is the only case when the misalignment of the memory access may not
5548 remain fixed throughout the iterations of the inner-loop (as explained in
5549 detail in vect_supportable_dr_alignment). In this case, not only is the
5550 optimized realignment scheme not applicable, but also the misalignment
5551 computation (and generation of the realignment token that is passed to
5552 REALIGN_LOAD) have to be done inside the loop.
5554 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
5555 or not, which in turn determines if the misalignment is computed inside
5556 the inner-loop, or outside LOOP. */
5558 if (init_addr
!= NULL_TREE
)
5560 compute_in_loop
= true;
5561 gcc_assert (alignment_support_scheme
== dr_explicit_realign
);
5565 /* 2. Determine where to generate the extra vector load.
5567 For the optimized realignment scheme, instead of generating two vector
5568 loads in each iteration, we generate a single extra vector load in the
5569 preheader of the loop, and in each iteration reuse the result of the
5570 vector load from the previous iteration. In case the memory access is in
5571 an inner-loop nested inside LOOP, which is now being vectorized using
5572 outer-loop vectorization, we need to determine whether this initial vector
5573 load should be generated at the preheader of the inner-loop, or can be
5574 generated at the preheader of LOOP. If the memory access has no evolution
5575 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
5576 to be generated inside LOOP (in the preheader of the inner-loop). */
5578 if (nested_in_vect_loop
)
5580 tree outerloop_step
= STMT_VINFO_DR_STEP (stmt_info
);
5581 bool invariant_in_outerloop
=
5582 (tree_int_cst_compare (outerloop_step
, size_zero_node
) == 0);
5583 loop_for_initial_load
= (invariant_in_outerloop
? loop
: loop
->inner
);
5586 loop_for_initial_load
= loop
;
5588 *at_loop
= loop_for_initial_load
;
5590 /* 3. For the case of the optimized realignment, create the first vector
5591 load at the loop preheader. */
5593 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5595 /* Create msq_init = *(floor(p1)) in the loop preheader */
5597 gcc_assert (!compute_in_loop
);
5598 pe
= loop_preheader_edge (loop_for_initial_load
);
5599 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5600 ptr
= vect_create_data_ref_ptr (stmt
, loop_for_initial_load
, NULL_TREE
,
5601 &init_addr
, &inc
, true, &inv_p
);
5602 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, ptr
);
5603 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5604 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5605 gimple_assign_set_lhs (new_stmt
, new_temp
);
5606 mark_symbols_for_renaming (new_stmt
);
5607 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
5608 gcc_assert (!new_bb
);
5609 msq_init
= gimple_assign_lhs (new_stmt
);
5612 /* 4. Create realignment token using a target builtin, if available.
5613 It is done either inside the containing loop, or before LOOP (as
5614 determined above). */
5616 if (targetm
.vectorize
.builtin_mask_for_load
)
5620 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
5621 if (compute_in_loop
)
5622 gcc_assert (init_addr
); /* already computed by the caller. */
5625 /* Generate the INIT_ADDR computation outside LOOP. */
5626 init_addr
= vect_create_addr_base_for_vector_ref (stmt
, &stmts
,
5628 pe
= loop_preheader_edge (loop
);
5629 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
5630 gcc_assert (!new_bb
);
5633 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
5634 new_stmt
= gimple_build_call (builtin_decl
, 1, init_addr
);
5636 vect_create_destination_var (scalar_dest
,
5637 gimple_call_return_type (new_stmt
));
5638 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5639 gimple_call_set_lhs (new_stmt
, new_temp
);
5641 if (compute_in_loop
)
5642 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
5645 /* Generate the misalignment computation outside LOOP. */
5646 pe
= loop_preheader_edge (loop
);
5647 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
5648 gcc_assert (!new_bb
);
5651 *realignment_token
= gimple_call_lhs (new_stmt
);
5653 /* The result of the CALL_EXPR to this builtin is determined from
5654 the value of the parameter and no global variables are touched
5655 which makes the builtin a "const" function. Requiring the
5656 builtin to have the "const" attribute makes it unnecessary
5657 to call mark_call_clobbered. */
5658 gcc_assert (TREE_READONLY (builtin_decl
));
5661 if (alignment_support_scheme
== dr_explicit_realign
)
5664 gcc_assert (!compute_in_loop
);
5665 gcc_assert (alignment_support_scheme
== dr_explicit_realign_optimized
);
5668 /* 5. Create msq = phi <msq_init, lsq> in loop */
5670 pe
= loop_preheader_edge (containing_loop
);
5671 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5672 msq
= make_ssa_name (vec_dest
, NULL
);
5673 phi_stmt
= create_phi_node (msq
, containing_loop
->header
);
5674 SSA_NAME_DEF_STMT (msq
) = phi_stmt
;
5675 add_phi_arg (phi_stmt
, msq_init
, pe
);
5681 /* Function vect_strided_load_supported.
5683 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
5684 and FALSE otherwise. */
5687 vect_strided_load_supported (tree vectype
)
5689 optab perm_even_optab
, perm_odd_optab
;
5692 mode
= (int) TYPE_MODE (vectype
);
5694 perm_even_optab
= optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR
, vectype
,
5696 if (!perm_even_optab
)
5698 if (vect_print_dump_info (REPORT_DETAILS
))
5699 fprintf (vect_dump
, "no optab for perm_even.");
5703 if (optab_handler (perm_even_optab
, mode
)->insn_code
== CODE_FOR_nothing
)
5705 if (vect_print_dump_info (REPORT_DETAILS
))
5706 fprintf (vect_dump
, "perm_even op not supported by target.");
5710 perm_odd_optab
= optab_for_tree_code (VEC_EXTRACT_ODD_EXPR
, vectype
,
5712 if (!perm_odd_optab
)
5714 if (vect_print_dump_info (REPORT_DETAILS
))
5715 fprintf (vect_dump
, "no optab for perm_odd.");
5719 if (optab_handler (perm_odd_optab
, mode
)->insn_code
== CODE_FOR_nothing
)
5721 if (vect_print_dump_info (REPORT_DETAILS
))
5722 fprintf (vect_dump
, "perm_odd op not supported by target.");
5729 /* Function vect_permute_load_chain.
5731 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
5732 a power of 2, generate extract_even/odd stmts to reorder the input data
5733 correctly. Return the final references for loads in RESULT_CHAIN.
5735 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
5736 The input is 4 vectors each containing 8 elements. We assign a number to each
5737 element, the input sequence is:
5739 1st vec: 0 1 2 3 4 5 6 7
5740 2nd vec: 8 9 10 11 12 13 14 15
5741 3rd vec: 16 17 18 19 20 21 22 23
5742 4th vec: 24 25 26 27 28 29 30 31
5744 The output sequence should be:
5746 1st vec: 0 4 8 12 16 20 24 28
5747 2nd vec: 1 5 9 13 17 21 25 29
5748 3rd vec: 2 6 10 14 18 22 26 30
5749 4th vec: 3 7 11 15 19 23 27 31
5751 i.e., the first output vector should contain the first elements of each
5752 interleaving group, etc.
5754 We use extract_even/odd instructions to create such output. The input of each
5755 extract_even/odd operation is two vectors
5759 and the output is the vector of extracted even/odd elements. The output of
5760 extract_even will be: 0 2 4 6
5761 and of extract_odd: 1 3 5 7
5764 The permutation is done in log LENGTH stages. In each stage extract_even and
5765 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
5766 order. In our example,
5768 E1: extract_even (1st vec, 2nd vec)
5769 E2: extract_odd (1st vec, 2nd vec)
5770 E3: extract_even (3rd vec, 4th vec)
5771 E4: extract_odd (3rd vec, 4th vec)
5773 The output for the first stage will be:
5775 E1: 0 2 4 6 8 10 12 14
5776 E2: 1 3 5 7 9 11 13 15
5777 E3: 16 18 20 22 24 26 28 30
5778 E4: 17 19 21 23 25 27 29 31
5780 In order to proceed and create the correct sequence for the next stage (or
5781 for the correct output, if the second stage is the last one, as in our
5782 example), we first put the output of extract_even operation and then the
5783 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
5784 The input for the second stage is:
5786 1st vec (E1): 0 2 4 6 8 10 12 14
5787 2nd vec (E3): 16 18 20 22 24 26 28 30
5788 3rd vec (E2): 1 3 5 7 9 11 13 15
5789 4th vec (E4): 17 19 21 23 25 27 29 31
5791 The output of the second stage:
5793 E1: 0 4 8 12 16 20 24 28
5794 E2: 2 6 10 14 18 22 26 30
5795 E3: 1 5 9 13 17 21 25 29
5796 E4: 3 7 11 15 19 23 27 31
5798 And RESULT_CHAIN after reordering:
5800 1st vec (E1): 0 4 8 12 16 20 24 28
5801 2nd vec (E3): 1 5 9 13 17 21 25 29
5802 3rd vec (E2): 2 6 10 14 18 22 26 30
5803 4th vec (E4): 3 7 11 15 19 23 27 31. */
5806 vect_permute_load_chain (VEC(tree
,heap
) *dr_chain
,
5807 unsigned int length
,
5809 gimple_stmt_iterator
*gsi
,
5810 VEC(tree
,heap
) **result_chain
)
5812 tree perm_dest
, data_ref
, first_vect
, second_vect
;
5814 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
5818 /* Check that the operation is supported. */
5819 if (!vect_strided_load_supported (vectype
))
5822 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
5823 for (i
= 0; i
< exact_log2 (length
); i
++)
5825 for (j
= 0; j
< length
; j
+=2)
5827 first_vect
= VEC_index (tree
, dr_chain
, j
);
5828 second_vect
= VEC_index (tree
, dr_chain
, j
+1);
5830 /* data_ref = permute_even (first_data_ref, second_data_ref); */
5831 perm_dest
= create_tmp_var (vectype
, "vect_perm_even");
5832 DECL_GIMPLE_REG_P (perm_dest
) = 1;
5833 add_referenced_var (perm_dest
);
5835 perm_stmt
= gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR
,
5836 perm_dest
, first_vect
,
5839 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
5840 gimple_assign_set_lhs (perm_stmt
, data_ref
);
5841 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5842 mark_symbols_for_renaming (perm_stmt
);
5844 VEC_replace (tree
, *result_chain
, j
/2, data_ref
);
5846 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
5847 perm_dest
= create_tmp_var (vectype
, "vect_perm_odd");
5848 DECL_GIMPLE_REG_P (perm_dest
) = 1;
5849 add_referenced_var (perm_dest
);
5851 perm_stmt
= gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR
,
5852 perm_dest
, first_vect
,
5854 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
5855 gimple_assign_set_lhs (perm_stmt
, data_ref
);
5856 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5857 mark_symbols_for_renaming (perm_stmt
);
5859 VEC_replace (tree
, *result_chain
, j
/2+length
/2, data_ref
);
5861 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
5867 /* Function vect_transform_strided_load.
5869 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
5870 to perform their permutation and ascribe the result vectorized statements to
5871 the scalar statements.
5875 vect_transform_strided_load (gimple stmt
, VEC(tree
,heap
) *dr_chain
, int size
,
5876 gimple_stmt_iterator
*gsi
)
5878 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5879 gimple first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
5880 gimple next_stmt
, new_stmt
;
5881 VEC(tree
,heap
) *result_chain
= NULL
;
5882 unsigned int i
, gap_count
;
5885 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
5886 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
5887 vectors, that are ready for vector computation. */
5888 result_chain
= VEC_alloc (tree
, heap
, size
);
5890 if (!vect_permute_load_chain (dr_chain
, size
, stmt
, gsi
, &result_chain
))
5893 /* Put a permuted data-ref in the VECTORIZED_STMT field.
5894 Since we scan the chain starting from it's first node, their order
5895 corresponds the order of data-refs in RESULT_CHAIN. */
5896 next_stmt
= first_stmt
;
5898 for (i
= 0; VEC_iterate (tree
, result_chain
, i
, tmp_data_ref
); i
++)
5903 /* Skip the gaps. Loads created for the gaps will be removed by dead
5904 code elimination pass later. No need to check for the first stmt in
5905 the group, since it always exists.
5906 DR_GROUP_GAP is the number of steps in elements from the previous
5907 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
5908 correspond to the gaps.
5910 if (next_stmt
!= first_stmt
5911 && gap_count
< DR_GROUP_GAP (vinfo_for_stmt (next_stmt
)))
5919 new_stmt
= SSA_NAME_DEF_STMT (tmp_data_ref
);
5920 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
5921 copies, and we put the new vector statement in the first available
5923 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)))
5924 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)) = new_stmt
;
5928 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
));
5930 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
));
5933 prev_stmt
= rel_stmt
;
5934 rel_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt
));
5936 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
)) = new_stmt
;
5938 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
5940 /* If NEXT_STMT accesses the same DR as the previous statement,
5941 put the same TMP_DATA_REF as its vectorized statement; otherwise
5942 get the next data-ref from RESULT_CHAIN. */
5943 if (!next_stmt
|| !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
5948 VEC_free (tree
, heap
, result_chain
);
5953 /* vectorizable_load.
5955 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5957 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5958 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5959 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5962 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5966 tree vec_dest
= NULL
;
5967 tree data_ref
= NULL
;
5968 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5969 stmt_vec_info prev_stmt_info
;
5970 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5971 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5972 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
5973 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5974 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
5975 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5978 gimple new_stmt
= NULL
;
5980 enum dr_alignment_support alignment_support_scheme
;
5981 tree dataref_ptr
= NULL_TREE
;
5983 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5985 int i
, j
, group_size
;
5986 tree msq
= NULL_TREE
, lsq
;
5987 tree offset
= NULL_TREE
;
5988 tree realignment_token
= NULL_TREE
;
5990 VEC(tree
,heap
) *dr_chain
= NULL
;
5991 bool strided_load
= false;
5995 bool compute_in_loop
= false;
5996 struct loop
*at_loop
;
5998 bool slp
= (slp_node
!= NULL
);
5999 enum tree_code code
;
6001 /* Multiple types in SLP are handled by creating the appropriate number of
6002 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6007 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6009 gcc_assert (ncopies
>= 1);
6011 /* FORNOW. This restriction should be relaxed. */
6012 if (nested_in_vect_loop
&& ncopies
> 1)
6014 if (vect_print_dump_info (REPORT_DETAILS
))
6015 fprintf (vect_dump
, "multiple types in nested loop.");
6019 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
6022 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
6025 /* Is vectorizable load? */
6026 if (!is_gimple_assign (stmt
))
6029 scalar_dest
= gimple_assign_lhs (stmt
);
6030 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6033 code
= gimple_assign_rhs_code (stmt
);
6034 if (code
!= ARRAY_REF
6035 && code
!= INDIRECT_REF
6036 && !STMT_VINFO_STRIDED_ACCESS (stmt_info
))
6039 if (!STMT_VINFO_DATA_REF (stmt_info
))
6042 scalar_type
= TREE_TYPE (DR_REF (dr
));
6043 mode
= (int) TYPE_MODE (vectype
);
6045 /* FORNOW. In some cases can vectorize even if data-type not supported
6046 (e.g. - data copies). */
6047 if (optab_handler (mov_optab
, mode
)->insn_code
== CODE_FOR_nothing
)
6049 if (vect_print_dump_info (REPORT_DETAILS
))
6050 fprintf (vect_dump
, "Aligned load, but unsupported type.");
6054 /* If accesses through a pointer to vectype do not alias the original
6055 memory reference we have a problem. */
6056 if (get_alias_set (vectype
) != get_alias_set (scalar_type
)
6057 && !alias_set_subset_of (get_alias_set (vectype
),
6058 get_alias_set (scalar_type
)))
6060 if (vect_print_dump_info (REPORT_DETAILS
))
6061 fprintf (vect_dump
, "vector type does not alias scalar type");
6065 /* Check if the load is a part of an interleaving chain. */
6066 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
6068 strided_load
= true;
6070 gcc_assert (! nested_in_vect_loop
);
6072 /* Check if interleaving is supported. */
6073 if (!vect_strided_load_supported (vectype
)
6074 && !PURE_SLP_STMT (stmt_info
) && !slp
)
6078 if (!vec_stmt
) /* transformation not required. */
6080 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6081 vect_model_load_cost (stmt_info
, ncopies
, NULL
);
6085 if (vect_print_dump_info (REPORT_DETAILS
))
6086 fprintf (vect_dump
, "transform load.");
6092 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
6093 /* Check if the chain of loads is already vectorized. */
6094 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
)))
6096 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6099 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6100 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6102 /* VEC_NUM is the number of vect stmts to be created for this group. */
6105 strided_load
= false;
6106 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6109 vec_num
= group_size
;
6111 dr_chain
= VEC_alloc (tree
, heap
, vec_num
);
6117 group_size
= vec_num
= 1;
6120 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
);
6121 gcc_assert (alignment_support_scheme
);
6123 /* In case the vectorization factor (VF) is bigger than the number
6124 of elements that we can fit in a vectype (nunits), we have to generate
6125 more than one vector stmt - i.e - we need to "unroll" the
6126 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6127 from one copy of the vector stmt to the next, in the field
6128 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6129 stages to find the correct vector defs to be used when vectorizing
6130 stmts that use the defs of the current stmt. The example below illustrates
6131 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
6132 4 vectorized stmts):
6134 before vectorization:
6135 RELATED_STMT VEC_STMT
6139 step 1: vectorize stmt S1:
6140 We first create the vector stmt VS1_0, and, as usual, record a
6141 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6142 Next, we create the vector stmt VS1_1, and record a pointer to
6143 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6144 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6146 RELATED_STMT VEC_STMT
6147 VS1_0: vx0 = memref0 VS1_1 -
6148 VS1_1: vx1 = memref1 VS1_2 -
6149 VS1_2: vx2 = memref2 VS1_3 -
6150 VS1_3: vx3 = memref3 - -
6151 S1: x = load - VS1_0
6154 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6155 information we recorded in RELATED_STMT field is used to vectorize
6158 /* In case of interleaving (non-unit strided access):
6165 Vectorized loads are created in the order of memory accesses
6166 starting from the access of the first stmt of the chain:
6169 VS2: vx1 = &base + vec_size*1
6170 VS3: vx3 = &base + vec_size*2
6171 VS4: vx4 = &base + vec_size*3
6173 Then permutation statements are generated:
6175 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
6176 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
6179 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6180 (the order of the data-refs in the output of vect_permute_load_chain
6181 corresponds to the order of scalar stmts in the interleaving chain - see
6182 the documentation of vect_permute_load_chain()).
6183 The generation of permutation stmts and recording them in
6184 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
6186 In case of both multiple types and interleaving, the vector loads and
6187 permutation stmts above are created for every copy. The result vector stmts
6188 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6189 STMT_VINFO_RELATED_STMT for the next copies. */
6191 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6192 on a target that supports unaligned accesses (dr_unaligned_supported)
6193 we generate the following code:
6197 p = p + indx * vectype_size;
6202 Otherwise, the data reference is potentially unaligned on a target that
6203 does not support unaligned accesses (dr_explicit_realign_optimized) -
6204 then generate the following code, in which the data in each iteration is
6205 obtained by two vector loads, one from the previous iteration, and one
6206 from the current iteration:
6208 msq_init = *(floor(p1))
6209 p2 = initial_addr + VS - 1;
6210 realignment_token = call target_builtin;
6213 p2 = p2 + indx * vectype_size
6215 vec_dest = realign_load (msq, lsq, realignment_token)
6220 /* If the misalignment remains the same throughout the execution of the
6221 loop, we can create the init_addr and permutation mask at the loop
6222 preheader. Otherwise, it needs to be created inside the loop.
6223 This can only occur when vectorizing memory accesses in the inner-loop
6224 nested within an outer-loop that is being vectorized. */
6226 if (nested_in_vect_loop_p (loop
, stmt
)
6227 && (TREE_INT_CST_LOW (DR_STEP (dr
))
6228 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
6230 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
6231 compute_in_loop
= true;
6234 if ((alignment_support_scheme
== dr_explicit_realign_optimized
6235 || alignment_support_scheme
== dr_explicit_realign
)
6236 && !compute_in_loop
)
6238 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
6239 alignment_support_scheme
, NULL_TREE
,
6241 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6243 phi
= SSA_NAME_DEF_STMT (msq
);
6244 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
6250 prev_stmt_info
= NULL
;
6251 for (j
= 0; j
< ncopies
; j
++)
6253 /* 1. Create the vector pointer update chain. */
6255 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
,
6257 &dummy
, &ptr_incr
, false,
6261 bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
, NULL_TREE
);
6263 for (i
= 0; i
< vec_num
; i
++)
6266 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6269 /* 2. Create the vector-load in the loop. */
6270 switch (alignment_support_scheme
)
6273 gcc_assert (aligned_access_p (first_dr
));
6274 data_ref
= build_fold_indirect_ref (dataref_ptr
);
6276 case dr_unaligned_supported
:
6278 int mis
= DR_MISALIGNMENT (first_dr
);
6279 tree tmis
= (mis
== -1 ? size_zero_node
: size_int (mis
));
6281 tmis
= size_binop (MULT_EXPR
, tmis
, size_int(BITS_PER_UNIT
));
6283 build2 (MISALIGNED_INDIRECT_REF
, vectype
, dataref_ptr
, tmis
);
6286 case dr_explicit_realign
:
6289 tree vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
6291 if (compute_in_loop
)
6292 msq
= vect_setup_realignment (first_stmt
, gsi
,
6294 dr_explicit_realign
,
6297 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, dataref_ptr
);
6298 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6299 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6300 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6301 gimple_assign_set_lhs (new_stmt
, new_temp
);
6302 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6303 copy_virtual_operands (new_stmt
, stmt
);
6304 mark_symbols_for_renaming (new_stmt
);
6307 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
6308 TYPE_SIZE_UNIT (scalar_type
));
6309 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
6310 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, ptr
);
6313 case dr_explicit_realign_optimized
:
6314 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, dataref_ptr
);
6319 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6320 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6321 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6322 gimple_assign_set_lhs (new_stmt
, new_temp
);
6323 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6324 mark_symbols_for_renaming (new_stmt
);
6326 /* 3. Handle explicit realignment if necessary/supported. Create in
6327 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
6328 if (alignment_support_scheme
== dr_explicit_realign_optimized
6329 || alignment_support_scheme
== dr_explicit_realign
)
6333 lsq
= gimple_assign_lhs (new_stmt
);
6334 if (!realignment_token
)
6335 realignment_token
= dataref_ptr
;
6336 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6337 tmp
= build3 (REALIGN_LOAD_EXPR
, vectype
, msq
, lsq
,
6339 new_stmt
= gimple_build_assign (vec_dest
, tmp
);
6340 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6341 gimple_assign_set_lhs (new_stmt
, new_temp
);
6342 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6344 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6347 if (i
== vec_num
- 1 && j
== ncopies
- 1)
6348 add_phi_arg (phi
, lsq
, loop_latch_edge (containing_loop
));
6353 /* 4. Handle invariant-load. */
6356 gcc_assert (!strided_load
);
6357 gcc_assert (nested_in_vect_loop_p (loop
, stmt
));
6362 tree vec_inv
, bitpos
, bitsize
= TYPE_SIZE (scalar_type
);
6364 /* CHECKME: bitpos depends on endianess? */
6365 bitpos
= bitsize_zero_node
;
6366 vec_inv
= build3 (BIT_FIELD_REF
, scalar_type
, new_temp
,
6369 vect_create_destination_var (scalar_dest
, NULL_TREE
);
6370 new_stmt
= gimple_build_assign (vec_dest
, vec_inv
);
6371 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6372 gimple_assign_set_lhs (new_stmt
, new_temp
);
6373 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6375 for (k
= nunits
- 1; k
>= 0; --k
)
6376 t
= tree_cons (NULL_TREE
, new_temp
, t
);
6377 /* FIXME: use build_constructor directly. */
6378 vec_inv
= build_constructor_from_list (vectype
, t
);
6379 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6380 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6383 gcc_unreachable (); /* FORNOW. */
6386 /* Collect vector loads and later create their permutation in
6387 vect_transform_strided_load (). */
6389 VEC_quick_push (tree
, dr_chain
, new_temp
);
6391 /* Store vector loads in the corresponding SLP_NODE. */
6393 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
6401 if (!vect_transform_strided_load (stmt
, dr_chain
, group_size
, gsi
))
6403 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6404 VEC_free (tree
, heap
, dr_chain
);
6405 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
6410 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6412 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6413 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6418 VEC_free (tree
, heap
, dr_chain
);
6424 /* Function vectorizable_live_operation.
6426 STMT computes a value that is used outside the loop. Check if
6427 it can be supported. */
6430 vectorizable_live_operation (gimple stmt
,
6431 gimple_stmt_iterator
*gsi ATTRIBUTE_UNUSED
,
6432 gimple
*vec_stmt ATTRIBUTE_UNUSED
)
6434 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6435 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6436 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6442 enum vect_def_type dt
;
6443 enum tree_code code
;
6444 enum gimple_rhs_class rhs_class
;
6446 gcc_assert (STMT_VINFO_LIVE_P (stmt_info
));
6448 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
)
6451 if (!is_gimple_assign (stmt
))
6454 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6457 /* FORNOW. CHECKME. */
6458 if (nested_in_vect_loop_p (loop
, stmt
))
6461 code
= gimple_assign_rhs_code (stmt
);
6462 op_type
= TREE_CODE_LENGTH (code
);
6463 rhs_class
= get_gimple_rhs_class (code
);
6464 gcc_assert (rhs_class
!= GIMPLE_UNARY_RHS
|| op_type
== unary_op
);
6465 gcc_assert (rhs_class
!= GIMPLE_BINARY_RHS
|| op_type
== binary_op
);
6467 /* FORNOW: support only if all uses are invariant. This means
6468 that the scalar operations can remain in place, unvectorized.
6469 The original last scalar value that they compute will be used. */
6471 for (i
= 0; i
< op_type
; i
++)
6473 if (rhs_class
== GIMPLE_SINGLE_RHS
)
6474 op
= TREE_OPERAND (gimple_op (stmt
, 1), i
);
6476 op
= gimple_op (stmt
, i
+ 1);
6477 if (op
&& !vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
6479 if (vect_print_dump_info (REPORT_DETAILS
))
6480 fprintf (vect_dump
, "use not simple.");
6484 if (dt
!= vect_invariant_def
&& dt
!= vect_constant_def
)
6488 /* No transformation is required for the cases we currently support. */
6493 /* Function vect_is_simple_cond.
6496 LOOP - the loop that is being vectorized.
6497 COND - Condition that is checked for simple use.
6499 Returns whether a COND can be vectorized. Checks whether
6500 condition operands are supportable using vec_is_simple_use. */
6503 vect_is_simple_cond (tree cond
, loop_vec_info loop_vinfo
)
6507 enum vect_def_type dt
;
6509 if (!COMPARISON_CLASS_P (cond
))
6512 lhs
= TREE_OPERAND (cond
, 0);
6513 rhs
= TREE_OPERAND (cond
, 1);
6515 if (TREE_CODE (lhs
) == SSA_NAME
)
6517 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
6518 if (!vect_is_simple_use (lhs
, loop_vinfo
, &lhs_def_stmt
, &def
, &dt
))
6521 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
6522 && TREE_CODE (lhs
) != FIXED_CST
)
6525 if (TREE_CODE (rhs
) == SSA_NAME
)
6527 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6528 if (!vect_is_simple_use (rhs
, loop_vinfo
, &rhs_def_stmt
, &def
, &dt
))
6531 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
6532 && TREE_CODE (rhs
) != FIXED_CST
)
6538 /* vectorizable_condition.
6540 Check if STMT is conditional modify expression that can be vectorized.
6541 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6542 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6545 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6548 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
6551 tree scalar_dest
= NULL_TREE
;
6552 tree vec_dest
= NULL_TREE
;
6553 tree op
= NULL_TREE
;
6554 tree cond_expr
, then_clause
, else_clause
;
6555 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6556 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6557 tree vec_cond_lhs
, vec_cond_rhs
, vec_then_clause
, vec_else_clause
;
6558 tree vec_compare
, vec_cond_expr
;
6560 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6561 enum machine_mode vec_mode
;
6563 enum vect_def_type dt
;
6564 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6565 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6566 enum tree_code code
;
6568 gcc_assert (ncopies
>= 1);
6570 return false; /* FORNOW */
6572 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
6575 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
6578 /* FORNOW: SLP not supported. */
6579 if (STMT_SLP_TYPE (stmt_info
))
6582 /* FORNOW: not yet supported. */
6583 if (STMT_VINFO_LIVE_P (stmt_info
))
6585 if (vect_print_dump_info (REPORT_DETAILS
))
6586 fprintf (vect_dump
, "value used after loop.");
6590 /* Is vectorizable conditional operation? */
6591 if (!is_gimple_assign (stmt
))
6594 code
= gimple_assign_rhs_code (stmt
);
6596 if (code
!= COND_EXPR
)
6599 gcc_assert (gimple_assign_single_p (stmt
));
6600 op
= gimple_assign_rhs1 (stmt
);
6601 cond_expr
= TREE_OPERAND (op
, 0);
6602 then_clause
= TREE_OPERAND (op
, 1);
6603 else_clause
= TREE_OPERAND (op
, 2);
6605 if (!vect_is_simple_cond (cond_expr
, loop_vinfo
))
6608 /* We do not handle two different vector types for the condition
6610 if (TREE_TYPE (TREE_OPERAND (cond_expr
, 0)) != TREE_TYPE (vectype
))
6613 if (TREE_CODE (then_clause
) == SSA_NAME
)
6615 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
6616 if (!vect_is_simple_use (then_clause
, loop_vinfo
,
6617 &then_def_stmt
, &def
, &dt
))
6620 else if (TREE_CODE (then_clause
) != INTEGER_CST
6621 && TREE_CODE (then_clause
) != REAL_CST
6622 && TREE_CODE (then_clause
) != FIXED_CST
)
6625 if (TREE_CODE (else_clause
) == SSA_NAME
)
6627 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
6628 if (!vect_is_simple_use (else_clause
, loop_vinfo
,
6629 &else_def_stmt
, &def
, &dt
))
6632 else if (TREE_CODE (else_clause
) != INTEGER_CST
6633 && TREE_CODE (else_clause
) != REAL_CST
6634 && TREE_CODE (else_clause
) != FIXED_CST
)
6638 vec_mode
= TYPE_MODE (vectype
);
6642 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
6643 return expand_vec_cond_expr_p (op
, vec_mode
);
6649 scalar_dest
= gimple_assign_lhs (stmt
);
6650 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6652 /* Handle cond expr. */
6654 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0), stmt
, NULL
);
6656 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1), stmt
, NULL
);
6657 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
, stmt
, NULL
);
6658 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
, stmt
, NULL
);
6660 /* Arguments are ready. Create the new vector stmt. */
6661 vec_compare
= build2 (TREE_CODE (cond_expr
), vectype
,
6662 vec_cond_lhs
, vec_cond_rhs
);
6663 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
6664 vec_compare
, vec_then_clause
, vec_else_clause
);
6666 *vec_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
6667 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
6668 gimple_assign_set_lhs (*vec_stmt
, new_temp
);
6669 vect_finish_stmt_generation (stmt
, *vec_stmt
, gsi
);
6675 /* Function vect_transform_stmt.
6677 Create a vectorized stmt to replace STMT, and insert it at BSI. */
6680 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
6681 bool *strided_store
, slp_tree slp_node
)
6683 bool is_store
= false;
6684 gimple vec_stmt
= NULL
;
6685 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6686 gimple orig_stmt_in_pattern
;
6689 switch (STMT_VINFO_TYPE (stmt_info
))
6691 case type_demotion_vec_info_type
:
6692 done
= vectorizable_type_demotion (stmt
, gsi
, &vec_stmt
, slp_node
);
6696 case type_promotion_vec_info_type
:
6697 done
= vectorizable_type_promotion (stmt
, gsi
, &vec_stmt
, slp_node
);
6701 case type_conversion_vec_info_type
:
6702 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
6706 case induc_vec_info_type
:
6707 gcc_assert (!slp_node
);
6708 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
6712 case op_vec_info_type
:
6713 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
6717 case assignment_vec_info_type
:
6718 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
6722 case load_vec_info_type
:
6723 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
);
6727 case store_vec_info_type
:
6728 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
6730 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
) && !slp_node
)
6732 /* In case of interleaving, the whole chain is vectorized when the
6733 last store in the chain is reached. Store stmts before the last
6734 one are skipped, and there vec_stmt_info shouldn't be freed
6736 *strided_store
= true;
6737 if (STMT_VINFO_VEC_STMT (stmt_info
))
6744 case condition_vec_info_type
:
6745 gcc_assert (!slp_node
);
6746 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
);
6750 case call_vec_info_type
:
6751 gcc_assert (!slp_node
);
6752 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
);
6755 case reduc_vec_info_type
:
6756 gcc_assert (!slp_node
);
6757 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
);
6762 if (!STMT_VINFO_LIVE_P (stmt_info
))
6764 if (vect_print_dump_info (REPORT_DETAILS
))
6765 fprintf (vect_dump
, "stmt not supported.");
6770 if (STMT_VINFO_LIVE_P (stmt_info
)
6771 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
6773 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
6779 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
6780 orig_stmt_in_pattern
= STMT_VINFO_RELATED_STMT (stmt_info
);
6781 if (orig_stmt_in_pattern
)
6783 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt_in_pattern
);
6784 /* STMT was inserted by the vectorizer to replace a computation idiom.
6785 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
6786 computed this idiom. We need to record a pointer to VEC_STMT in
6787 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
6788 documentation of vect_pattern_recog. */
6789 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
6791 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
6792 STMT_VINFO_VEC_STMT (stmt_vinfo
) = vec_stmt
;
6801 /* This function builds ni_name = number of iterations loop executes
6802 on the loop preheader. */
6805 vect_build_loop_niters (loop_vec_info loop_vinfo
)
6808 gimple_seq stmts
= NULL
;
6810 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6811 tree ni
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
6813 var
= create_tmp_var (TREE_TYPE (ni
), "niters");
6814 add_referenced_var (var
);
6815 ni_name
= force_gimple_operand (ni
, &stmts
, false, var
);
6817 pe
= loop_preheader_edge (loop
);
6820 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
6821 gcc_assert (!new_bb
);
6828 /* This function generates the following statements:
6830 ni_name = number of iterations loop executes
6831 ratio = ni_name / vf
6832 ratio_mult_vf_name = ratio * vf
6834 and places them at the loop preheader edge. */
6837 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo
,
6839 tree
*ratio_mult_vf_name_ptr
,
6840 tree
*ratio_name_ptr
)
6849 tree ratio_mult_vf_name
;
6850 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6851 tree ni
= LOOP_VINFO_NITERS (loop_vinfo
);
6852 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6855 pe
= loop_preheader_edge (loop
);
6857 /* Generate temporary variable that contains
6858 number of iterations loop executes. */
6860 ni_name
= vect_build_loop_niters (loop_vinfo
);
6861 log_vf
= build_int_cst (TREE_TYPE (ni
), exact_log2 (vf
));
6863 /* Create: ratio = ni >> log2(vf) */
6865 ratio_name
= fold_build2 (RSHIFT_EXPR
, TREE_TYPE (ni_name
), ni_name
, log_vf
);
6866 if (!is_gimple_val (ratio_name
))
6868 var
= create_tmp_var (TREE_TYPE (ni
), "bnd");
6869 add_referenced_var (var
);
6872 ratio_name
= force_gimple_operand (ratio_name
, &stmts
, true, var
);
6873 pe
= loop_preheader_edge (loop
);
6874 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
6875 gcc_assert (!new_bb
);
6878 /* Create: ratio_mult_vf = ratio << log2 (vf). */
6880 ratio_mult_vf_name
= fold_build2 (LSHIFT_EXPR
, TREE_TYPE (ratio_name
),
6881 ratio_name
, log_vf
);
6882 if (!is_gimple_val (ratio_mult_vf_name
))
6884 var
= create_tmp_var (TREE_TYPE (ni
), "ratio_mult_vf");
6885 add_referenced_var (var
);
6888 ratio_mult_vf_name
= force_gimple_operand (ratio_mult_vf_name
, &stmts
,
6890 pe
= loop_preheader_edge (loop
);
6891 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
6892 gcc_assert (!new_bb
);
6895 *ni_name_ptr
= ni_name
;
6896 *ratio_mult_vf_name_ptr
= ratio_mult_vf_name
;
6897 *ratio_name_ptr
= ratio_name
;
6903 /* Function vect_update_ivs_after_vectorizer.
6905 "Advance" the induction variables of LOOP to the value they should take
6906 after the execution of LOOP. This is currently necessary because the
6907 vectorizer does not handle induction variables that are used after the
6908 loop. Such a situation occurs when the last iterations of LOOP are
6910 1. We introduced new uses after LOOP for IVs that were not originally used
6911 after LOOP: the IVs of LOOP are now used by an epilog loop.
6912 2. LOOP is going to be vectorized; this means that it will iterate N/VF
6913 times, whereas the loop IVs should be bumped N times.
6916 - LOOP - a loop that is going to be vectorized. The last few iterations
6917 of LOOP were peeled.
6918 - NITERS - the number of iterations that LOOP executes (before it is
6919 vectorized). i.e, the number of times the ivs should be bumped.
6920 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
6921 coming out from LOOP on which there are uses of the LOOP ivs
6922 (this is the path from LOOP->exit to epilog_loop->preheader).
6924 The new definitions of the ivs are placed in LOOP->exit.
6925 The phi args associated with the edge UPDATE_E in the bb
6926 UPDATE_E->dest are updated accordingly.
6928 Assumption 1: Like the rest of the vectorizer, this function assumes
6929 a single loop exit that has a single predecessor.
6931 Assumption 2: The phi nodes in the LOOP header and in update_bb are
6932 organized in the same order.
6934 Assumption 3: The access function of the ivs is simple enough (see
6935 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
6937 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
6938 coming out of LOOP on which the ivs of LOOP are used (this is the path
6939 that leads to the epilog loop; other paths skip the epilog loop). This
6940 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
6941 needs to have its phis updated.
6945 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo
, tree niters
,
6948 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6949 basic_block exit_bb
= single_exit (loop
)->dest
;
6951 gimple_stmt_iterator gsi
, gsi1
;
6952 basic_block update_bb
= update_e
->dest
;
6954 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
6956 /* Make sure there exists a single-predecessor exit bb: */
6957 gcc_assert (single_pred_p (exit_bb
));
6959 for (gsi
= gsi_start_phis (loop
->header
), gsi1
= gsi_start_phis (update_bb
);
6960 !gsi_end_p (gsi
) && !gsi_end_p (gsi1
);
6961 gsi_next (&gsi
), gsi_next (&gsi1
))
6963 tree access_fn
= NULL
;
6964 tree evolution_part
;
6967 tree var
, ni
, ni_name
;
6968 gimple_stmt_iterator last_gsi
;
6970 phi
= gsi_stmt (gsi
);
6971 phi1
= gsi_stmt (gsi1
);
6972 if (vect_print_dump_info (REPORT_DETAILS
))
6974 fprintf (vect_dump
, "vect_update_ivs_after_vectorizer: phi: ");
6975 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
6978 /* Skip virtual phi's. */
6979 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
6981 if (vect_print_dump_info (REPORT_DETAILS
))
6982 fprintf (vect_dump
, "virtual phi. skip.");
6986 /* Skip reduction phis. */
6987 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
6989 if (vect_print_dump_info (REPORT_DETAILS
))
6990 fprintf (vect_dump
, "reduc phi. skip.");
6994 access_fn
= analyze_scalar_evolution (loop
, PHI_RESULT (phi
));
6995 gcc_assert (access_fn
);
6997 unshare_expr (evolution_part_in_loop_num (access_fn
, loop
->num
));
6998 gcc_assert (evolution_part
!= NULL_TREE
);
7000 /* FORNOW: We do not support IVs whose evolution function is a polynomial
7001 of degree >= 2 or exponential. */
7002 gcc_assert (!tree_is_chrec (evolution_part
));
7004 step_expr
= evolution_part
;
7005 init_expr
= unshare_expr (initial_condition_in_loop_num (access_fn
,
7008 if (POINTER_TYPE_P (TREE_TYPE (init_expr
)))
7009 ni
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (init_expr
),
7011 fold_convert (sizetype
,
7012 fold_build2 (MULT_EXPR
, TREE_TYPE (niters
),
7013 niters
, step_expr
)));
7015 ni
= fold_build2 (PLUS_EXPR
, TREE_TYPE (init_expr
),
7016 fold_build2 (MULT_EXPR
, TREE_TYPE (init_expr
),
7017 fold_convert (TREE_TYPE (init_expr
),
7024 var
= create_tmp_var (TREE_TYPE (init_expr
), "tmp");
7025 add_referenced_var (var
);
7027 last_gsi
= gsi_last_bb (exit_bb
);
7028 ni_name
= force_gimple_operand_gsi (&last_gsi
, ni
, false, var
,
7029 true, GSI_SAME_STMT
);
7031 /* Fix phi expressions in the successor bb. */
7032 SET_PHI_ARG_DEF (phi1
, update_e
->dest_idx
, ni_name
);
7036 /* Return the more conservative threshold between the
7037 min_profitable_iters returned by the cost model and the user
7038 specified threshold, if provided. */
7041 conservative_cost_threshold (loop_vec_info loop_vinfo
,
7042 int min_profitable_iters
)
7045 int min_scalar_loop_bound
;
7047 min_scalar_loop_bound
= ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND
)
7048 * LOOP_VINFO_VECT_FACTOR (loop_vinfo
)) - 1);
7050 /* Use the cost model only if it is more conservative than user specified
7052 th
= (unsigned) min_scalar_loop_bound
;
7053 if (min_profitable_iters
7054 && (!min_scalar_loop_bound
7055 || min_profitable_iters
> min_scalar_loop_bound
))
7056 th
= (unsigned) min_profitable_iters
;
7058 if (th
&& vect_print_dump_info (REPORT_COST
))
7059 fprintf (vect_dump
, "Vectorization may not be profitable.");
7064 /* Function vect_do_peeling_for_loop_bound
7066 Peel the last iterations of the loop represented by LOOP_VINFO.
7067 The peeled iterations form a new epilog loop. Given that the loop now
7068 iterates NITERS times, the new epilog loop iterates
7069 NITERS % VECTORIZATION_FACTOR times.
7071 The original loop will later be made to iterate
7072 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
7075 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo
, tree
*ratio
)
7077 tree ni_name
, ratio_mult_vf_name
;
7078 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7079 struct loop
*new_loop
;
7081 basic_block preheader
;
7083 bool check_profitability
= false;
7084 unsigned int th
= 0;
7085 int min_profitable_iters
;
7087 if (vect_print_dump_info (REPORT_DETAILS
))
7088 fprintf (vect_dump
, "=== vect_do_peeling_for_loop_bound ===");
7090 initialize_original_copy_tables ();
7092 /* Generate the following variables on the preheader of original loop:
7094 ni_name = number of iteration the original loop executes
7095 ratio = ni_name / vf
7096 ratio_mult_vf_name = ratio * vf */
7097 vect_generate_tmps_on_preheader (loop_vinfo
, &ni_name
,
7098 &ratio_mult_vf_name
, ratio
);
7100 loop_num
= loop
->num
;
7102 /* If cost model check not done during versioning and
7103 peeling for alignment. */
7104 if (!VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
7105 && !VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
))
7106 && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
7108 check_profitability
= true;
7110 /* Get profitability threshold for vectorized loop. */
7111 min_profitable_iters
= LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
);
7113 th
= conservative_cost_threshold (loop_vinfo
,
7114 min_profitable_iters
);
7117 new_loop
= slpeel_tree_peel_loop_to_edge (loop
, single_exit (loop
),
7118 ratio_mult_vf_name
, ni_name
, false,
7119 th
, check_profitability
);
7120 gcc_assert (new_loop
);
7121 gcc_assert (loop_num
== loop
->num
);
7122 #ifdef ENABLE_CHECKING
7123 slpeel_verify_cfg_after_peeling (loop
, new_loop
);
7126 /* A guard that controls whether the new_loop is to be executed or skipped
7127 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
7128 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
7129 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
7130 is on the path where the LOOP IVs are used and need to be updated. */
7132 preheader
= loop_preheader_edge (new_loop
)->src
;
7133 if (EDGE_PRED (preheader
, 0)->src
== single_exit (loop
)->dest
)
7134 update_e
= EDGE_PRED (preheader
, 0);
7136 update_e
= EDGE_PRED (preheader
, 1);
7138 /* Update IVs of original loop as if they were advanced
7139 by ratio_mult_vf_name steps. */
7140 vect_update_ivs_after_vectorizer (loop_vinfo
, ratio_mult_vf_name
, update_e
);
7142 /* After peeling we have to reset scalar evolution analyzer. */
7145 free_original_copy_tables ();
7149 /* Function vect_gen_niters_for_prolog_loop
7151 Set the number of iterations for the loop represented by LOOP_VINFO
7152 to the minimum between LOOP_NITERS (the original iteration count of the loop)
7153 and the misalignment of DR - the data reference recorded in
7154 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
7155 this loop, the data reference DR will refer to an aligned location.
7157 The following computation is generated:
7159 If the misalignment of DR is known at compile time:
7160 addr_mis = int mis = DR_MISALIGNMENT (dr);
7161 Else, compute address misalignment in bytes:
7162 addr_mis = addr & (vectype_size - 1)
7164 prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
7166 (elem_size = element type size; an element is the scalar element whose type
7167 is the inner type of the vectype)
7169 When the step of the data-ref in the loop is not 1 (as in interleaved data
7170 and SLP), the number of iterations of the prolog must be divided by the step
7171 (which is equal to the size of interleaved group).
7173 The above formulas assume that VF == number of elements in the vector. This
7174 may not hold when there are multiple-types in the loop.
7175 In this case, for some data-references in the loop the VF does not represent
7176 the number of elements that fit in the vector. Therefore, instead of VF we
7177 use TYPE_VECTOR_SUBPARTS. */
7180 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo
, tree loop_niters
)
7182 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
7183 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7186 tree iters
, iters_name
;
7189 gimple dr_stmt
= DR_STMT (dr
);
7190 stmt_vec_info stmt_info
= vinfo_for_stmt (dr_stmt
);
7191 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7192 int vectype_align
= TYPE_ALIGN (vectype
) / BITS_PER_UNIT
;
7193 tree niters_type
= TREE_TYPE (loop_niters
);
7195 int element_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
7196 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
7198 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
7199 step
= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info
)));
7201 pe
= loop_preheader_edge (loop
);
7203 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) > 0)
7205 int byte_misalign
= LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
);
7206 int elem_misalign
= byte_misalign
/ element_size
;
7208 if (vect_print_dump_info (REPORT_DETAILS
))
7209 fprintf (vect_dump
, "known alignment = %d.", byte_misalign
);
7211 iters
= build_int_cst (niters_type
,
7212 (((nelements
- elem_misalign
) & (nelements
- 1)) / step
));
7216 gimple_seq new_stmts
= NULL
;
7217 tree start_addr
= vect_create_addr_base_for_vector_ref (dr_stmt
,
7218 &new_stmts
, NULL_TREE
, loop
);
7219 tree ptr_type
= TREE_TYPE (start_addr
);
7220 tree size
= TYPE_SIZE (ptr_type
);
7221 tree type
= lang_hooks
.types
.type_for_size (tree_low_cst (size
, 1), 1);
7222 tree vectype_size_minus_1
= build_int_cst (type
, vectype_align
- 1);
7223 tree elem_size_log
=
7224 build_int_cst (type
, exact_log2 (vectype_align
/nelements
));
7225 tree nelements_minus_1
= build_int_cst (type
, nelements
- 1);
7226 tree nelements_tree
= build_int_cst (type
, nelements
);
7230 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, new_stmts
);
7231 gcc_assert (!new_bb
);
7233 /* Create: byte_misalign = addr & (vectype_size - 1) */
7235 fold_build2 (BIT_AND_EXPR
, type
, fold_convert (type
, start_addr
), vectype_size_minus_1
);
7237 /* Create: elem_misalign = byte_misalign / element_size */
7239 fold_build2 (RSHIFT_EXPR
, type
, byte_misalign
, elem_size_log
);
7241 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
7242 iters
= fold_build2 (MINUS_EXPR
, type
, nelements_tree
, elem_misalign
);
7243 iters
= fold_build2 (BIT_AND_EXPR
, type
, iters
, nelements_minus_1
);
7244 iters
= fold_convert (niters_type
, iters
);
7247 /* Create: prolog_loop_niters = min (iters, loop_niters) */
7248 /* If the loop bound is known at compile time we already verified that it is
7249 greater than vf; since the misalignment ('iters') is at most vf, there's
7250 no need to generate the MIN_EXPR in this case. */
7251 if (TREE_CODE (loop_niters
) != INTEGER_CST
)
7252 iters
= fold_build2 (MIN_EXPR
, niters_type
, iters
, loop_niters
);
7254 if (vect_print_dump_info (REPORT_DETAILS
))
7256 fprintf (vect_dump
, "niters for prolog loop: ");
7257 print_generic_expr (vect_dump
, iters
, TDF_SLIM
);
7260 var
= create_tmp_var (niters_type
, "prolog_loop_niters");
7261 add_referenced_var (var
);
7263 iters_name
= force_gimple_operand (iters
, &stmts
, false, var
);
7265 /* Insert stmt on loop preheader edge. */
7268 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
7269 gcc_assert (!new_bb
);
7276 /* Function vect_update_init_of_dr
7278 NITERS iterations were peeled from LOOP. DR represents a data reference
7279 in LOOP. This function updates the information recorded in DR to
7280 account for the fact that the first NITERS iterations had already been
7281 executed. Specifically, it updates the OFFSET field of DR. */
7284 vect_update_init_of_dr (struct data_reference
*dr
, tree niters
)
7286 tree offset
= DR_OFFSET (dr
);
7288 niters
= fold_build2 (MULT_EXPR
, TREE_TYPE (niters
), niters
, DR_STEP (dr
));
7289 offset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
, niters
);
7290 DR_OFFSET (dr
) = offset
;
7294 /* Function vect_update_inits_of_drs
7296 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
7297 This function updates the information recorded for the data references in
7298 the loop to account for the fact that the first NITERS iterations had
7299 already been executed. Specifically, it updates the initial_condition of
7300 the access_function of all the data_references in the loop. */
7303 vect_update_inits_of_drs (loop_vec_info loop_vinfo
, tree niters
)
7306 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
7307 struct data_reference
*dr
;
7309 if (vect_print_dump_info (REPORT_DETAILS
))
7310 fprintf (vect_dump
, "=== vect_update_inits_of_dr ===");
7312 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
7313 vect_update_init_of_dr (dr
, niters
);
7317 /* Function vect_do_peeling_for_alignment
7319 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
7320 'niters' is set to the misalignment of one of the data references in the
7321 loop, thereby forcing it to refer to an aligned location at the beginning
7322 of the execution of this loop. The data reference for which we are
7323 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
7326 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo
)
7328 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7329 tree niters_of_prolog_loop
, ni_name
;
7331 struct loop
*new_loop
;
7332 bool check_profitability
= false;
7333 unsigned int th
= 0;
7334 int min_profitable_iters
;
7336 if (vect_print_dump_info (REPORT_DETAILS
))
7337 fprintf (vect_dump
, "=== vect_do_peeling_for_alignment ===");
7339 initialize_original_copy_tables ();
7341 ni_name
= vect_build_loop_niters (loop_vinfo
);
7342 niters_of_prolog_loop
= vect_gen_niters_for_prolog_loop (loop_vinfo
, ni_name
);
7345 /* If cost model check not done during versioning. */
7346 if (!VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
7347 && !VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
7349 check_profitability
= true;
7351 /* Get profitability threshold for vectorized loop. */
7352 min_profitable_iters
= LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
);
7354 th
= conservative_cost_threshold (loop_vinfo
,
7355 min_profitable_iters
);
7358 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
7360 slpeel_tree_peel_loop_to_edge (loop
, loop_preheader_edge (loop
),
7361 niters_of_prolog_loop
, ni_name
, true,
7362 th
, check_profitability
);
7364 gcc_assert (new_loop
);
7365 #ifdef ENABLE_CHECKING
7366 slpeel_verify_cfg_after_peeling (new_loop
, loop
);
7369 /* Update number of times loop executes. */
7370 n_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
7371 LOOP_VINFO_NITERS (loop_vinfo
) = fold_build2 (MINUS_EXPR
,
7372 TREE_TYPE (n_iters
), n_iters
, niters_of_prolog_loop
);
7374 /* Update the init conditions of the access functions of all data refs. */
7375 vect_update_inits_of_drs (loop_vinfo
, niters_of_prolog_loop
);
7377 /* After peeling we have to reset scalar evolution analyzer. */
7380 free_original_copy_tables ();
7384 /* Function vect_create_cond_for_align_checks.
7386 Create a conditional expression that represents the alignment checks for
7387 all of data references (array element references) whose alignment must be
7391 COND_EXPR - input conditional expression. New conditions will be chained
7392 with logical AND operation.
7393 LOOP_VINFO - two fields of the loop information are used.
7394 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
7395 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
7398 COND_EXPR_STMT_LIST - statements needed to construct the conditional
7400 The returned value is the conditional expression to be used in the if
7401 statement that controls which version of the loop gets executed at runtime.
7403 The algorithm makes two assumptions:
7404 1) The number of bytes "n" in a vector is a power of 2.
7405 2) An address "a" is aligned if a%n is zero and that this
7406 test can be done as a&(n-1) == 0. For example, for 16
7407 byte vectors the test is a&0xf == 0. */
7410 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo
,
7412 gimple_seq
*cond_expr_stmt_list
)
7414 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7415 VEC(gimple
,heap
) *may_misalign_stmts
7416 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
7418 int mask
= LOOP_VINFO_PTR_MASK (loop_vinfo
);
7422 tree int_ptrsize_type
;
7424 tree or_tmp_name
= NULL_TREE
;
7425 tree and_tmp
, and_tmp_name
;
7428 tree part_cond_expr
;
7430 /* Check that mask is one less than a power of 2, i.e., mask is
7431 all zeros followed by all ones. */
7432 gcc_assert ((mask
!= 0) && ((mask
& (mask
+1)) == 0));
7434 /* CHECKME: what is the best integer or unsigned type to use to hold a
7435 cast from a pointer value? */
7436 psize
= TYPE_SIZE (ptr_type_node
);
7438 = lang_hooks
.types
.type_for_size (tree_low_cst (psize
, 1), 0);
7440 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
7441 of the first vector of the i'th data reference. */
7443 for (i
= 0; VEC_iterate (gimple
, may_misalign_stmts
, i
, ref_stmt
); i
++)
7445 gimple_seq new_stmt_list
= NULL
;
7447 tree addr_tmp
, addr_tmp_name
;
7448 tree or_tmp
, new_or_tmp_name
;
7449 gimple addr_stmt
, or_stmt
;
7451 /* create: addr_tmp = (int)(address_of_first_vector) */
7453 vect_create_addr_base_for_vector_ref (ref_stmt
, &new_stmt_list
,
7455 if (new_stmt_list
!= NULL
)
7456 gimple_seq_add_seq (cond_expr_stmt_list
, new_stmt_list
);
7458 sprintf (tmp_name
, "%s%d", "addr2int", i
);
7459 addr_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
7460 add_referenced_var (addr_tmp
);
7461 addr_tmp_name
= make_ssa_name (addr_tmp
, NULL
);
7462 addr_stmt
= gimple_build_assign (addr_tmp_name
, addr_base
);
7463 SSA_NAME_DEF_STMT (addr_tmp_name
) = addr_stmt
;
7464 gimple_seq_add_stmt (cond_expr_stmt_list
, addr_stmt
);
7466 /* The addresses are OR together. */
7468 if (or_tmp_name
!= NULL_TREE
)
7470 /* create: or_tmp = or_tmp | addr_tmp */
7471 sprintf (tmp_name
, "%s%d", "orptrs", i
);
7472 or_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
7473 add_referenced_var (or_tmp
);
7474 new_or_tmp_name
= make_ssa_name (or_tmp
, NULL
);
7475 or_stmt
= gimple_build_assign_with_ops (BIT_IOR_EXPR
,
7477 or_tmp_name
, addr_tmp_name
);
7478 SSA_NAME_DEF_STMT (new_or_tmp_name
) = or_stmt
;
7479 gimple_seq_add_stmt (cond_expr_stmt_list
, or_stmt
);
7480 or_tmp_name
= new_or_tmp_name
;
7483 or_tmp_name
= addr_tmp_name
;
7487 mask_cst
= build_int_cst (int_ptrsize_type
, mask
);
7489 /* create: and_tmp = or_tmp & mask */
7490 and_tmp
= create_tmp_var (int_ptrsize_type
, "andmask" );
7491 add_referenced_var (and_tmp
);
7492 and_tmp_name
= make_ssa_name (and_tmp
, NULL
);
7494 and_stmt
= gimple_build_assign_with_ops (BIT_AND_EXPR
, and_tmp_name
,
7495 or_tmp_name
, mask_cst
);
7496 SSA_NAME_DEF_STMT (and_tmp_name
) = and_stmt
;
7497 gimple_seq_add_stmt (cond_expr_stmt_list
, and_stmt
);
7499 /* Make and_tmp the left operand of the conditional test against zero.
7500 if and_tmp has a nonzero bit then some address is unaligned. */
7501 ptrsize_zero
= build_int_cst (int_ptrsize_type
, 0);
7502 part_cond_expr
= fold_build2 (EQ_EXPR
, boolean_type_node
,
7503 and_tmp_name
, ptrsize_zero
);
7505 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
7506 *cond_expr
, part_cond_expr
);
7508 *cond_expr
= part_cond_expr
;
7511 /* Function vect_vfa_segment_size.
7513 Create an expression that computes the size of segment
7514 that will be accessed for a data reference. The functions takes into
7515 account that realignment loads may access one more vector.
7518 DR: The data reference.
7519 VECT_FACTOR: vectorization factor.
7521 Return an expression whose value is the size of segment which will be
7525 vect_vfa_segment_size (struct data_reference
*dr
, tree vect_factor
)
7527 tree segment_length
= fold_build2 (MULT_EXPR
, integer_type_node
,
7528 DR_STEP (dr
), vect_factor
);
7530 if (vect_supportable_dr_alignment (dr
) == dr_explicit_realign_optimized
)
7532 tree vector_size
= TYPE_SIZE_UNIT
7533 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr
))));
7535 segment_length
= fold_build2 (PLUS_EXPR
, integer_type_node
,
7536 segment_length
, vector_size
);
7538 return fold_convert (sizetype
, segment_length
);
7541 /* Function vect_create_cond_for_alias_checks.
7543 Create a conditional expression that represents the run-time checks for
7544 overlapping of address ranges represented by a list of data references
7545 relations passed as input.
7548 COND_EXPR - input conditional expression. New conditions will be chained
7549 with logical AND operation.
7550 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
7554 COND_EXPR - conditional expression.
7555 COND_EXPR_STMT_LIST - statements needed to construct the conditional
7559 The returned value is the conditional expression to be used in the if
7560 statement that controls which version of the loop gets executed at runtime.
7564 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo
,
7566 gimple_seq
* cond_expr_stmt_list
)
7568 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7569 VEC (ddr_p
, heap
) * may_alias_ddrs
=
7570 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
);
7572 build_int_cst (integer_type_node
, LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
7576 tree part_cond_expr
;
7578 /* Create expression
7579 ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
7580 || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
7584 ((store_ptr_n + store_segment_length_n) < load_ptr_n)
7585 || (load_ptr_n + load_segment_length_n) < store_ptr_n)) */
7587 if (VEC_empty (ddr_p
, may_alias_ddrs
))
7590 for (i
= 0; VEC_iterate (ddr_p
, may_alias_ddrs
, i
, ddr
); i
++)
7592 struct data_reference
*dr_a
, *dr_b
;
7593 gimple dr_group_first_a
, dr_group_first_b
;
7594 tree addr_base_a
, addr_base_b
;
7595 tree segment_length_a
, segment_length_b
;
7596 gimple stmt_a
, stmt_b
;
7599 stmt_a
= DR_STMT (DDR_A (ddr
));
7600 dr_group_first_a
= DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a
));
7601 if (dr_group_first_a
)
7603 stmt_a
= dr_group_first_a
;
7604 dr_a
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a
));
7608 stmt_b
= DR_STMT (DDR_B (ddr
));
7609 dr_group_first_b
= DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b
));
7610 if (dr_group_first_b
)
7612 stmt_b
= dr_group_first_b
;
7613 dr_b
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b
));
7617 vect_create_addr_base_for_vector_ref (stmt_a
, cond_expr_stmt_list
,
7620 vect_create_addr_base_for_vector_ref (stmt_b
, cond_expr_stmt_list
,
7623 segment_length_a
= vect_vfa_segment_size (dr_a
, vect_factor
);
7624 segment_length_b
= vect_vfa_segment_size (dr_b
, vect_factor
);
7626 if (vect_print_dump_info (REPORT_DR_DETAILS
))
7629 "create runtime check for data references ");
7630 print_generic_expr (vect_dump
, DR_REF (dr_a
), TDF_SLIM
);
7631 fprintf (vect_dump
, " and ");
7632 print_generic_expr (vect_dump
, DR_REF (dr_b
), TDF_SLIM
);
7637 fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
7638 fold_build2 (LT_EXPR
, boolean_type_node
,
7639 fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (addr_base_a
),
7643 fold_build2 (LT_EXPR
, boolean_type_node
,
7644 fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (addr_base_b
),
7650 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
7651 *cond_expr
, part_cond_expr
);
7653 *cond_expr
= part_cond_expr
;
7655 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS
))
7656 fprintf (vect_dump
, "created %u versioning for alias checks.\n",
7657 VEC_length (ddr_p
, may_alias_ddrs
));
7661 /* Function vect_loop_versioning.
7663 If the loop has data references that may or may not be aligned or/and
7664 has data reference relations whose independence was not proven then
7665 two versions of the loop need to be generated, one which is vectorized
7666 and one which isn't. A test is then generated to control which of the
7667 loops is executed. The test checks for the alignment of all of the
7668 data references that may or may not be aligned. An additional
7669 sequence of runtime tests is generated for each pairs of DDRs whose
7670 independence was not proven. The vectorized version of loop is
7671 executed only if both alias and alignment tests are passed.
7673 The test generated to check which version of loop is executed
7674 is modified to also check for profitability as indicated by the
7675 cost model initially. */
7678 vect_loop_versioning (loop_vec_info loop_vinfo
)
7680 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7682 tree cond_expr
= NULL_TREE
;
7683 gimple_seq cond_expr_stmt_list
= NULL
;
7684 basic_block condition_bb
;
7685 gimple_stmt_iterator gsi
, cond_exp_gsi
;
7686 basic_block merge_bb
;
7687 basic_block new_exit_bb
;
7689 gimple orig_phi
, new_phi
;
7691 unsigned prob
= 4 * REG_BR_PROB_BASE
/ 5;
7692 gimple_seq gimplify_stmt_list
= NULL
;
7693 tree scalar_loop_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
7694 int min_profitable_iters
= 0;
7697 /* Get profitability threshold for vectorized loop. */
7698 min_profitable_iters
= LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
);
7700 th
= conservative_cost_threshold (loop_vinfo
,
7701 min_profitable_iters
);
7704 build2 (GT_EXPR
, boolean_type_node
, scalar_loop_iters
,
7705 build_int_cst (TREE_TYPE (scalar_loop_iters
), th
));
7707 cond_expr
= force_gimple_operand (cond_expr
, &cond_expr_stmt_list
,
7710 if (VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)))
7711 vect_create_cond_for_align_checks (loop_vinfo
, &cond_expr
,
7712 &cond_expr_stmt_list
);
7714 if (VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
7715 vect_create_cond_for_alias_checks (loop_vinfo
, &cond_expr
,
7716 &cond_expr_stmt_list
);
7719 fold_build2 (NE_EXPR
, boolean_type_node
, cond_expr
, integer_zero_node
);
7721 force_gimple_operand (cond_expr
, &gimplify_stmt_list
, true, NULL_TREE
);
7722 gimple_seq_add_seq (&cond_expr_stmt_list
, gimplify_stmt_list
);
7724 initialize_original_copy_tables ();
7725 nloop
= loop_version (loop
, cond_expr
, &condition_bb
,
7726 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
7727 free_original_copy_tables();
7729 /* Loop versioning violates an assumption we try to maintain during
7730 vectorization - that the loop exit block has a single predecessor.
7731 After versioning, the exit block of both loop versions is the same
7732 basic block (i.e. it has two predecessors). Just in order to simplify
7733 following transformations in the vectorizer, we fix this situation
7734 here by adding a new (empty) block on the exit-edge of the loop,
7735 with the proper loop-exit phis to maintain loop-closed-form. */
7737 merge_bb
= single_exit (loop
)->dest
;
7738 gcc_assert (EDGE_COUNT (merge_bb
->preds
) == 2);
7739 new_exit_bb
= split_edge (single_exit (loop
));
7740 new_exit_e
= single_exit (loop
);
7741 e
= EDGE_SUCC (new_exit_bb
, 0);
7743 for (gsi
= gsi_start_phis (merge_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
7745 orig_phi
= gsi_stmt (gsi
);
7746 new_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi
)),
7748 arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, e
);
7749 add_phi_arg (new_phi
, arg
, new_exit_e
);
7750 SET_PHI_ARG_DEF (orig_phi
, e
->dest_idx
, PHI_RESULT (new_phi
));
7753 /* End loop-exit-fixes after versioning. */
7755 update_ssa (TODO_update_ssa
);
7756 if (cond_expr_stmt_list
)
7758 cond_exp_gsi
= gsi_last_bb (condition_bb
);
7759 gsi_insert_seq_before (&cond_exp_gsi
, cond_expr_stmt_list
, GSI_SAME_STMT
);
7763 /* Remove a group of stores (for SLP or interleaving), free their
7767 vect_remove_stores (gimple first_stmt
)
7769 gimple next
= first_stmt
;
7771 gimple_stmt_iterator next_si
;
7775 /* Free the attached stmt_vec_info and remove the stmt. */
7776 next_si
= gsi_for_stmt (next
);
7777 gsi_remove (&next_si
, true);
7778 tmp
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
7779 free_stmt_vec_info (next
);
7785 /* Vectorize SLP instance tree in postorder. */
7788 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
7789 unsigned int vectorization_factor
)
7792 bool strided_store
, is_store
;
7793 gimple_stmt_iterator si
;
7794 stmt_vec_info stmt_info
;
7795 unsigned int vec_stmts_size
, nunits
, group_size
;
7801 vect_schedule_slp_instance (SLP_TREE_LEFT (node
), instance
,
7802 vectorization_factor
);
7803 vect_schedule_slp_instance (SLP_TREE_RIGHT (node
), instance
,
7804 vectorization_factor
);
7806 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
7807 stmt_info
= vinfo_for_stmt (stmt
);
7808 /* VECTYPE is the type of the destination. */
7809 vectype
= get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt
)));
7810 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
7811 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
7813 /* For each SLP instance calculate number of vector stmts to be created
7814 for the scalar stmts in each node of the SLP tree. Number of vector
7815 elements in one vector iteration is the number of scalar elements in
7816 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
7818 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
7820 SLP_TREE_VEC_STMTS (node
) = VEC_alloc (gimple
, heap
, vec_stmts_size
);
7821 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
7823 if (vect_print_dump_info (REPORT_DETAILS
))
7825 fprintf (vect_dump
, "------>vectorizing SLP node starting from: ");
7826 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
7829 si
= gsi_for_stmt (stmt
);
7830 is_store
= vect_transform_stmt (stmt
, &si
, &strided_store
, node
);
7833 if (DR_GROUP_FIRST_DR (stmt_info
))
7834 /* If IS_STORE is TRUE, the vectorization of the
7835 interleaving chain was completed - free all the stores in
7837 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info
));
7839 /* FORNOW: SLP originates only from strided stores. */
7845 /* FORNOW: SLP originates only from strided stores. */
7851 vect_schedule_slp (loop_vec_info loop_vinfo
)
7853 VEC (slp_instance
, heap
) *slp_instances
=
7854 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
7855 slp_instance instance
;
7857 bool is_store
= false;
7859 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
7861 /* Schedule the tree of INSTANCE. */
7862 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
7864 LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
7866 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS
)
7867 || vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
7868 fprintf (vect_dump
, "vectorizing stmts using SLP.");
7874 /* Function vect_transform_loop.
7876 The analysis phase has determined that the loop is vectorizable.
7877 Vectorize the loop - created vectorized stmts to replace the scalar
7878 stmts in the loop, and update the loop exit condition. */
7881 vect_transform_loop (loop_vec_info loop_vinfo
)
7883 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7884 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
7885 int nbbs
= loop
->num_nodes
;
7886 gimple_stmt_iterator si
;
7889 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7891 bool slp_scheduled
= false;
7892 unsigned int nunits
;
7894 if (vect_print_dump_info (REPORT_DETAILS
))
7895 fprintf (vect_dump
, "=== vec_transform_loop ===");
7897 if (VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
7898 || VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
7899 vect_loop_versioning (loop_vinfo
);
7901 /* CHECKME: we wouldn't need this if we called update_ssa once
7903 bitmap_zero (vect_memsyms_to_rename
);
7905 /* Peel the loop if there are data refs with unknown alignment.
7906 Only one data ref with unknown store is allowed. */
7908 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
7909 vect_do_peeling_for_alignment (loop_vinfo
);
7911 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
7912 compile time constant), or it is a constant that doesn't divide by the
7913 vectorization factor, then an epilog loop needs to be created.
7914 We therefore duplicate the loop: the original loop will be vectorized,
7915 and will compute the first (n/VF) iterations. The second copy of the loop
7916 will remain scalar and will compute the remaining (n%VF) iterations.
7917 (VF is the vectorization factor). */
7919 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
7920 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
7921 && LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0))
7922 vect_do_peeling_for_loop_bound (loop_vinfo
, &ratio
);
7924 ratio
= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
)),
7925 LOOP_VINFO_INT_NITERS (loop_vinfo
) / vectorization_factor
);
7927 /* 1) Make sure the loop header has exactly two entries
7928 2) Make sure we have a preheader basic block. */
7930 gcc_assert (EDGE_COUNT (loop
->header
->preds
) == 2);
7932 split_edge (loop_preheader_edge (loop
));
7934 /* FORNOW: the vectorizer supports only loops which body consist
7935 of one basic block (header + empty latch). When the vectorizer will
7936 support more involved loop forms, the order by which the BBs are
7937 traversed need to be reconsidered. */
7939 for (i
= 0; i
< nbbs
; i
++)
7941 basic_block bb
= bbs
[i
];
7942 stmt_vec_info stmt_info
;
7945 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
7947 phi
= gsi_stmt (si
);
7948 if (vect_print_dump_info (REPORT_DETAILS
))
7950 fprintf (vect_dump
, "------>vectorizing phi: ");
7951 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
7953 stmt_info
= vinfo_for_stmt (phi
);
7957 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
7958 && !STMT_VINFO_LIVE_P (stmt_info
))
7961 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
))
7962 != (unsigned HOST_WIDE_INT
) vectorization_factor
)
7963 && vect_print_dump_info (REPORT_DETAILS
))
7964 fprintf (vect_dump
, "multiple-types.");
7966 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
7968 if (vect_print_dump_info (REPORT_DETAILS
))
7969 fprintf (vect_dump
, "transform phi.");
7970 vect_transform_stmt (phi
, NULL
, NULL
, NULL
);
7974 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
);)
7976 gimple stmt
= gsi_stmt (si
);
7979 if (vect_print_dump_info (REPORT_DETAILS
))
7981 fprintf (vect_dump
, "------>vectorizing statement: ");
7982 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
7985 stmt_info
= vinfo_for_stmt (stmt
);
7987 /* vector stmts created in the outer-loop during vectorization of
7988 stmts in an inner-loop may not have a stmt_info, and do not
7989 need to be vectorized. */
7996 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
7997 && !STMT_VINFO_LIVE_P (stmt_info
))
8003 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
8005 (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
8006 if (!STMT_SLP_TYPE (stmt_info
)
8007 && nunits
!= (unsigned int) vectorization_factor
8008 && vect_print_dump_info (REPORT_DETAILS
))
8009 /* For SLP VF is set according to unrolling factor, and not to
8010 vector size, hence for SLP this print is not valid. */
8011 fprintf (vect_dump
, "multiple-types.");
8013 /* SLP. Schedule all the SLP instances when the first SLP stmt is
8015 if (STMT_SLP_TYPE (stmt_info
))
8019 slp_scheduled
= true;
8021 if (vect_print_dump_info (REPORT_DETAILS
))
8022 fprintf (vect_dump
, "=== scheduling SLP instances ===");
8024 is_store
= vect_schedule_slp (loop_vinfo
);
8026 /* IS_STORE is true if STMT is a store. Stores cannot be of
8027 hybrid SLP type. They are removed in
8028 vect_schedule_slp_instance and their vinfo is destroyed. */
8036 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
8037 if (PURE_SLP_STMT (stmt_info
))
8044 /* -------- vectorize statement ------------ */
8045 if (vect_print_dump_info (REPORT_DETAILS
))
8046 fprintf (vect_dump
, "transform statement.");
8048 strided_store
= false;
8049 is_store
= vect_transform_stmt (stmt
, &si
, &strided_store
, NULL
);
8052 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
8054 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
8055 interleaving chain was completed - free all the stores in
8057 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info
));
8058 gsi_remove (&si
, true);
8063 /* Free the attached stmt_vec_info and remove the stmt. */
8064 free_stmt_vec_info (stmt
);
8065 gsi_remove (&si
, true);
8073 slpeel_make_loop_iterate_ntimes (loop
, ratio
);
8075 mark_set_for_renaming (vect_memsyms_to_rename
);
8077 /* The memory tags and pointers in vectorized statements need to
8078 have their SSA forms updated. FIXME, why can't this be delayed
8079 until all the loops have been transformed? */
8080 update_ssa (TODO_update_ssa
);
8082 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS
))
8083 fprintf (vect_dump
, "LOOP VECTORIZED.");
8084 if (loop
->inner
&& vect_print_dump_info (REPORT_VECTORIZED_LOOPS
))
8085 fprintf (vect_dump
, "OUTER LOOP VECTORIZED.");