1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (tree
, block_stmt_iterator
*, bool *, slp_tree
);
50 static tree
vect_create_destination_var (tree
, tree
);
51 static tree vect_create_data_ref_ptr
52 (tree
, struct loop
*, tree
, tree
*, tree
*, bool, tree
, bool *);
53 static tree vect_create_addr_base_for_vector_ref
54 (tree
, tree
*, tree
, struct loop
*);
55 static tree
vect_get_new_vect_var (tree
, enum vect_var_kind
, const char *);
56 static tree
vect_get_vec_def_for_operand (tree
, tree
, tree
*);
57 static tree
vect_init_vector (tree
, tree
, tree
, block_stmt_iterator
*);
58 static void vect_finish_stmt_generation
59 (tree stmt
, tree vec_stmt
, block_stmt_iterator
*);
60 static bool vect_is_simple_cond (tree
, loop_vec_info
);
61 static void vect_create_epilog_for_reduction (tree
, tree
, enum tree_code
, tree
);
62 static tree
get_initial_def_for_reduction (tree
, tree
, tree
*);
64 /* Utility function dealing with loop peeling (not peeling itself). */
65 static void vect_generate_tmps_on_preheader
66 (loop_vec_info
, tree
*, tree
*, tree
*);
67 static tree
vect_build_loop_niters (loop_vec_info
);
68 static void vect_update_ivs_after_vectorizer (loop_vec_info
, tree
, edge
);
69 static tree
vect_gen_niters_for_prolog_loop (loop_vec_info
, tree
);
70 static void vect_update_init_of_dr (struct data_reference
*, tree niters
);
71 static void vect_update_inits_of_drs (loop_vec_info
, tree
);
72 static int vect_min_worthwhile_factor (enum tree_code
);
76 cost_for_stmt (tree stmt
)
78 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
80 switch (STMT_VINFO_TYPE (stmt_info
))
82 case load_vec_info_type
:
83 return TARG_SCALAR_LOAD_COST
;
84 case store_vec_info_type
:
85 return TARG_SCALAR_STORE_COST
;
86 case op_vec_info_type
:
87 case condition_vec_info_type
:
88 case assignment_vec_info_type
:
89 case reduc_vec_info_type
:
90 case induc_vec_info_type
:
91 case type_promotion_vec_info_type
:
92 case type_demotion_vec_info_type
:
93 case type_conversion_vec_info_type
:
94 case call_vec_info_type
:
95 return TARG_SCALAR_STMT_COST
;
96 case undef_vec_info_type
:
103 /* Function vect_estimate_min_profitable_iters
105 Return the number of iterations required for the vector version of the
106 loop to be profitable relative to the cost of the scalar version of the
109 TODO: Take profile info into account before making vectorization
110 decisions, if available. */
113 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo
)
116 int min_profitable_iters
;
117 int peel_iters_prologue
;
118 int peel_iters_epilogue
;
119 int vec_inside_cost
= 0;
120 int vec_outside_cost
= 0;
121 int scalar_single_iter_cost
= 0;
122 int scalar_outside_cost
= 0;
123 bool runtime_test
= false;
124 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
125 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
126 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
127 int nbbs
= loop
->num_nodes
;
128 int byte_misalign
= LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
);
129 int peel_guard_costs
= 0;
130 int innerloop_iters
= 0, factor
;
131 VEC (slp_instance
, heap
) *slp_instances
;
132 slp_instance instance
;
134 /* Cost model disabled. */
135 if (!flag_vect_cost_model
)
137 if (vect_print_dump_info (REPORT_COST
))
138 fprintf (vect_dump
, "cost model disabled.");
142 /* If the number of iterations is unknown, or the
143 peeling-for-misalignment amount is unknown, we will have to generate
144 a runtime test to test the loop count against the threshold. */
145 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
146 || (byte_misalign
< 0))
149 /* Requires loop versioning tests to handle misalignment. */
151 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)))
153 /* FIXME: Make cost depend on complexity of individual check. */
155 VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
));
156 if (vect_print_dump_info (REPORT_COST
))
157 fprintf (vect_dump
, "cost model: Adding cost of checks for loop "
158 "versioning to treat misalignment.\n");
161 if (VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
163 /* FIXME: Make cost depend on complexity of individual check. */
165 VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
));
166 if (vect_print_dump_info (REPORT_COST
))
167 fprintf (vect_dump
, "cost model: Adding cost of checks for loop "
168 "versioning aliasing.\n");
171 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
172 || VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
174 vec_outside_cost
+= TARG_COND_TAKEN_BRANCH_COST
;
177 /* Count statements in scalar loop. Using this as scalar cost for a single
180 TODO: Add outer loop support.
182 TODO: Consider assigning different costs to different scalar
187 innerloop_iters
= 50; /* FIXME */
189 for (i
= 0; i
< nbbs
; i
++)
191 block_stmt_iterator si
;
192 basic_block bb
= bbs
[i
];
194 if (bb
->loop_father
== loop
->inner
)
195 factor
= innerloop_iters
;
199 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
201 tree stmt
= bsi_stmt (si
);
202 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
203 /* Skip stmts that are not vectorized inside the loop. */
204 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
205 && (!STMT_VINFO_LIVE_P (stmt_info
)
206 || STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
))
208 scalar_single_iter_cost
+= cost_for_stmt (stmt
) * factor
;
209 vec_inside_cost
+= STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
) * factor
;
210 /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
211 some of the "outside" costs are generated inside the outer-loop. */
212 vec_outside_cost
+= STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
);
216 /* Add additional cost for the peeled instructions in prologue and epilogue
219 FORNOW: If we dont know the value of peel_iters for prologue or epilogue
220 at compile-time - we assume it's vf/2 (the worst would be vf-1).
222 TODO: Build an expression that represents peel_iters for prologue and
223 epilogue to be used in a run-time test. */
225 if (byte_misalign
< 0)
227 peel_iters_prologue
= vf
/2;
228 if (vect_print_dump_info (REPORT_COST
))
229 fprintf (vect_dump
, "cost model: "
230 "prologue peel iters set to vf/2.");
232 /* If peeling for alignment is unknown, loop bound of main loop becomes
234 peel_iters_epilogue
= vf
/2;
235 if (vect_print_dump_info (REPORT_COST
))
236 fprintf (vect_dump
, "cost model: "
237 "epilogue peel iters set to vf/2 because "
238 "peeling for alignment is unknown .");
240 /* If peeled iterations are unknown, count a taken branch and a not taken
241 branch per peeled loop. Even if scalar loop iterations are known,
242 vector iterations are not known since peeled prologue iterations are
243 not known. Hence guards remain the same. */
244 peel_guard_costs
+= 2 * (TARG_COND_TAKEN_BRANCH_COST
245 + TARG_COND_NOT_TAKEN_BRANCH_COST
);
252 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
253 int element_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
254 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr
)));
255 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
257 peel_iters_prologue
= nelements
- (byte_misalign
/ element_size
);
260 peel_iters_prologue
= 0;
262 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
264 peel_iters_epilogue
= vf
/2;
265 if (vect_print_dump_info (REPORT_COST
))
266 fprintf (vect_dump
, "cost model: "
267 "epilogue peel iters set to vf/2 because "
268 "loop iterations are unknown .");
270 /* If peeled iterations are known but number of scalar loop
271 iterations are unknown, count a taken branch per peeled loop. */
272 peel_guard_costs
+= 2 * TARG_COND_TAKEN_BRANCH_COST
;
277 int niters
= LOOP_VINFO_INT_NITERS (loop_vinfo
);
278 peel_iters_prologue
= niters
< peel_iters_prologue
?
279 niters
: peel_iters_prologue
;
280 peel_iters_epilogue
= (niters
- peel_iters_prologue
) % vf
;
284 vec_outside_cost
+= (peel_iters_prologue
* scalar_single_iter_cost
)
285 + (peel_iters_epilogue
* scalar_single_iter_cost
)
288 /* FORNOW: The scalar outside cost is incremented in one of the
291 1. The vectorizer checks for alignment and aliasing and generates
292 a condition that allows dynamic vectorization. A cost model
293 check is ANDED with the versioning condition. Hence scalar code
294 path now has the added cost of the versioning check.
296 if (cost > th & versioning_check)
299 Hence run-time scalar is incremented by not-taken branch cost.
301 2. The vectorizer then checks if a prologue is required. If the
302 cost model check was not done before during versioning, it has to
303 be done before the prologue check.
306 prologue = scalar_iters
311 if (prologue == num_iters)
314 Hence the run-time scalar cost is incremented by a taken branch,
315 plus a not-taken branch, plus a taken branch cost.
317 3. The vectorizer then checks if an epilogue is required. If the
318 cost model check was not done before during prologue check, it
319 has to be done with the epilogue check.
325 if (prologue == num_iters)
328 if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
331 Hence the run-time scalar cost should be incremented by 2 taken
334 TODO: The back end may reorder the BBS's differently and reverse
335 conditions/branch directions. Change the stimates below to
336 something more reasonable. */
340 /* Cost model check occurs at versioning. */
341 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
342 || VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
343 scalar_outside_cost
+= TARG_COND_NOT_TAKEN_BRANCH_COST
;
346 /* Cost model occurs at prologue generation. */
347 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
348 scalar_outside_cost
+= 2 * TARG_COND_TAKEN_BRANCH_COST
349 + TARG_COND_NOT_TAKEN_BRANCH_COST
;
350 /* Cost model check occurs at epilogue generation. */
352 scalar_outside_cost
+= 2 * TARG_COND_TAKEN_BRANCH_COST
;
357 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
358 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
360 vec_outside_cost
+= SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance
);
361 vec_inside_cost
+= SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
);
364 /* Calculate number of iterations required to make the vector version
365 profitable, relative to the loop bodies only. The following condition
367 SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
369 SIC = scalar iteration cost, VIC = vector iteration cost,
370 VOC = vector outside cost, VF = vectorization factor,
371 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
372 SOC = scalar outside cost for run time cost model check. */
374 if ((scalar_single_iter_cost
* vf
) > vec_inside_cost
)
376 if (vec_outside_cost
<= 0)
377 min_profitable_iters
= 1;
380 min_profitable_iters
= ((vec_outside_cost
- scalar_outside_cost
) * vf
381 - vec_inside_cost
* peel_iters_prologue
382 - vec_inside_cost
* peel_iters_epilogue
)
383 / ((scalar_single_iter_cost
* vf
)
386 if ((scalar_single_iter_cost
* vf
* min_profitable_iters
)
387 <= ((vec_inside_cost
* min_profitable_iters
)
388 + ((vec_outside_cost
- scalar_outside_cost
) * vf
)))
389 min_profitable_iters
++;
392 /* vector version will never be profitable. */
395 if (vect_print_dump_info (REPORT_COST
))
396 fprintf (vect_dump
, "cost model: vector iteration cost = %d "
397 "is divisible by scalar iteration cost = %d by a factor "
398 "greater than or equal to the vectorization factor = %d .",
399 vec_inside_cost
, scalar_single_iter_cost
, vf
);
403 if (vect_print_dump_info (REPORT_COST
))
405 fprintf (vect_dump
, "Cost model analysis: \n");
406 fprintf (vect_dump
, " Vector inside of loop cost: %d\n",
408 fprintf (vect_dump
, " Vector outside of loop cost: %d\n",
410 fprintf (vect_dump
, " Scalar iteration cost: %d\n",
411 scalar_single_iter_cost
);
412 fprintf (vect_dump
, " Scalar outside cost: %d\n", scalar_outside_cost
);
413 fprintf (vect_dump
, " prologue iterations: %d\n",
414 peel_iters_prologue
);
415 fprintf (vect_dump
, " epilogue iterations: %d\n",
416 peel_iters_epilogue
);
417 fprintf (vect_dump
, " Calculated minimum iters for profitability: %d\n",
418 min_profitable_iters
);
421 min_profitable_iters
=
422 min_profitable_iters
< vf
? vf
: min_profitable_iters
;
424 /* Because the condition we create is:
425 if (niters <= min_profitable_iters)
426 then skip the vectorized loop. */
427 min_profitable_iters
--;
429 if (vect_print_dump_info (REPORT_COST
))
430 fprintf (vect_dump
, " Profitability threshold = %d\n",
431 min_profitable_iters
);
433 return min_profitable_iters
;
437 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
438 functions. Design better to avoid maintenance issues. */
440 /* Function vect_model_reduction_cost.
442 Models cost for a reduction operation, including the vector ops
443 generated within the strip-mine loop, the initial definition before
444 the loop, and the epilogue code that must be generated. */
447 vect_model_reduction_cost (stmt_vec_info stmt_info
, enum tree_code reduc_code
,
456 enum machine_mode mode
;
457 tree operation
= GIMPLE_STMT_OPERAND (STMT_VINFO_STMT (stmt_info
), 1);
458 int op_type
= TREE_CODE_LENGTH (TREE_CODE (operation
));
459 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
460 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
462 /* Cost of reduction op inside loop. */
463 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
) += ncopies
* TARG_VEC_STMT_COST
;
465 reduction_op
= TREE_OPERAND (operation
, op_type
-1);
466 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
469 if (vect_print_dump_info (REPORT_COST
))
471 fprintf (vect_dump
, "unsupported data-type ");
472 print_generic_expr (vect_dump
, TREE_TYPE (reduction_op
), TDF_SLIM
);
477 mode
= TYPE_MODE (vectype
);
478 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
481 orig_stmt
= STMT_VINFO_STMT (stmt_info
);
483 code
= TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt
, 1));
485 /* Add in cost for initial definition. */
486 outer_cost
+= TARG_SCALAR_TO_VEC_COST
;
488 /* Determine cost of epilogue code.
490 We have a reduction operator that will reduce the vector in one statement.
491 Also requires scalar extract. */
493 if (!nested_in_vect_loop_p (loop
, orig_stmt
))
495 if (reduc_code
< NUM_TREE_CODES
)
496 outer_cost
+= TARG_VEC_STMT_COST
+ TARG_VEC_TO_SCALAR_COST
;
499 int vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
501 TYPE_SIZE (TREE_TYPE ( GIMPLE_STMT_OPERAND (orig_stmt
, 0)));
502 int element_bitsize
= tree_low_cst (bitsize
, 1);
503 int nelements
= vec_size_in_bits
/ element_bitsize
;
505 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
507 /* We have a whole vector shift available. */
508 if (VECTOR_MODE_P (mode
)
509 && optab_handler (optab
, mode
)->insn_code
!= CODE_FOR_nothing
510 && optab_handler (vec_shr_optab
, mode
)->insn_code
!= CODE_FOR_nothing
)
511 /* Final reduction via vector shifts and the reduction operator. Also
512 requires scalar extract. */
513 outer_cost
+= ((exact_log2(nelements
) * 2) * TARG_VEC_STMT_COST
514 + TARG_VEC_TO_SCALAR_COST
);
516 /* Use extracts and reduction op for final reduction. For N elements,
517 we have N extracts and N-1 reduction ops. */
518 outer_cost
+= ((nelements
+ nelements
- 1) * TARG_VEC_STMT_COST
);
522 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
) = outer_cost
;
524 if (vect_print_dump_info (REPORT_COST
))
525 fprintf (vect_dump
, "vect_model_reduction_cost: inside_cost = %d, "
526 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
),
527 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
));
533 /* Function vect_model_induction_cost.
535 Models cost for induction operations. */
538 vect_model_induction_cost (stmt_vec_info stmt_info
, int ncopies
)
540 /* loop cost for vec_loop. */
541 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
) = ncopies
* TARG_VEC_STMT_COST
;
542 /* prologue cost for vec_init and vec_step. */
543 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
) = 2 * TARG_SCALAR_TO_VEC_COST
;
545 if (vect_print_dump_info (REPORT_COST
))
546 fprintf (vect_dump
, "vect_model_induction_cost: inside_cost = %d, "
547 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
),
548 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
));
552 /* Function vect_model_simple_cost.
554 Models cost for simple operations, i.e. those that only emit ncopies of a
555 single op. Right now, this does not account for multiple insns that could
556 be generated for the single vector op. We will handle that shortly. */
559 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
560 enum vect_def_type
*dt
, slp_tree slp_node
)
563 int inside_cost
= 0, outside_cost
= 0;
565 inside_cost
= ncopies
* TARG_VEC_STMT_COST
;
567 /* FORNOW: Assuming maximum 2 args per stmts. */
568 for (i
= 0; i
< 2; i
++)
570 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_invariant_def
)
571 outside_cost
+= TARG_SCALAR_TO_VEC_COST
;
574 if (vect_print_dump_info (REPORT_COST
))
575 fprintf (vect_dump
, "vect_model_simple_cost: inside_cost = %d, "
576 "outside_cost = %d .", inside_cost
, outside_cost
);
578 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
579 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
580 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
584 /* Function vect_cost_strided_group_size
586 For strided load or store, return the group_size only if it is the first
587 load or store of a group, else return 1. This ensures that group size is
588 only returned once per group. */
591 vect_cost_strided_group_size (stmt_vec_info stmt_info
)
593 tree first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
595 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
596 return DR_GROUP_SIZE (stmt_info
);
602 /* Function vect_model_store_cost
604 Models cost for stores. In the case of strided accesses, one access
605 has the overhead of the strided access attributed to it. */
608 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
609 enum vect_def_type dt
, slp_tree slp_node
)
612 int inside_cost
= 0, outside_cost
= 0;
614 if (dt
== vect_constant_def
|| dt
== vect_invariant_def
)
615 outside_cost
= TARG_SCALAR_TO_VEC_COST
;
617 /* Strided access? */
618 if (DR_GROUP_FIRST_DR (stmt_info
))
619 group_size
= vect_cost_strided_group_size (stmt_info
);
620 /* Not a strided access. */
624 /* Is this an access in a group of stores, which provide strided access?
625 If so, add in the cost of the permutes. */
628 /* Uses a high and low interleave operation for each needed permute. */
629 inside_cost
= ncopies
* exact_log2(group_size
) * group_size
630 * TARG_VEC_STMT_COST
;
632 if (vect_print_dump_info (REPORT_COST
))
633 fprintf (vect_dump
, "vect_model_store_cost: strided group_size = %d .",
638 /* Costs of the stores. */
639 inside_cost
+= ncopies
* TARG_VEC_STORE_COST
;
641 if (vect_print_dump_info (REPORT_COST
))
642 fprintf (vect_dump
, "vect_model_store_cost: inside_cost = %d, "
643 "outside_cost = %d .", inside_cost
, outside_cost
);
645 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
646 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
647 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
651 /* Function vect_model_load_cost
653 Models cost for loads. In the case of strided accesses, the last access
654 has the overhead of the strided access attributed to it. Since unaligned
655 accesses are supported for loads, we also account for the costs of the
656 access scheme chosen. */
659 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
, slp_tree slp_node
)
663 int alignment_support_cheme
;
665 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
666 int inside_cost
= 0, outside_cost
= 0;
668 /* Strided accesses? */
669 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
670 if (first_stmt
&& !slp_node
)
672 group_size
= vect_cost_strided_group_size (stmt_info
);
673 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
675 /* Not a strided access. */
682 alignment_support_cheme
= vect_supportable_dr_alignment (first_dr
);
684 /* Is this an access in a group of loads providing strided access?
685 If so, add in the cost of the permutes. */
688 /* Uses an even and odd extract operations for each needed permute. */
689 inside_cost
= ncopies
* exact_log2(group_size
) * group_size
690 * TARG_VEC_STMT_COST
;
692 if (vect_print_dump_info (REPORT_COST
))
693 fprintf (vect_dump
, "vect_model_load_cost: strided group_size = %d .",
698 /* The loads themselves. */
699 switch (alignment_support_cheme
)
703 inside_cost
+= ncopies
* TARG_VEC_LOAD_COST
;
705 if (vect_print_dump_info (REPORT_COST
))
706 fprintf (vect_dump
, "vect_model_load_cost: aligned.");
710 case dr_unaligned_supported
:
712 /* Here, we assign an additional cost for the unaligned load. */
713 inside_cost
+= ncopies
* TARG_VEC_UNALIGNED_LOAD_COST
;
715 if (vect_print_dump_info (REPORT_COST
))
716 fprintf (vect_dump
, "vect_model_load_cost: unaligned supported by "
721 case dr_explicit_realign
:
723 inside_cost
+= ncopies
* (2*TARG_VEC_LOAD_COST
+ TARG_VEC_STMT_COST
);
725 /* FIXME: If the misalignment remains fixed across the iterations of
726 the containing loop, the following cost should be added to the
728 if (targetm
.vectorize
.builtin_mask_for_load
)
729 inside_cost
+= TARG_VEC_STMT_COST
;
733 case dr_explicit_realign_optimized
:
735 if (vect_print_dump_info (REPORT_COST
))
736 fprintf (vect_dump
, "vect_model_load_cost: unaligned software "
739 /* Unaligned software pipeline has a load of an address, an initial
740 load, and possibly a mask operation to "prime" the loop. However,
741 if this is an access in a group of loads, which provide strided
742 access, then the above cost should only be considered for one
743 access in the group. Inside the loop, there is a load op
744 and a realignment op. */
746 if ((!DR_GROUP_FIRST_DR (stmt_info
)) || group_size
> 1 || slp_node
)
748 outside_cost
= 2*TARG_VEC_STMT_COST
;
749 if (targetm
.vectorize
.builtin_mask_for_load
)
750 outside_cost
+= TARG_VEC_STMT_COST
;
753 inside_cost
+= ncopies
* (TARG_VEC_LOAD_COST
+ TARG_VEC_STMT_COST
);
762 if (vect_print_dump_info (REPORT_COST
))
763 fprintf (vect_dump
, "vect_model_load_cost: inside_cost = %d, "
764 "outside_cost = %d .", inside_cost
, outside_cost
);
766 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
767 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
768 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
772 /* Function vect_get_new_vect_var.
774 Returns a name for a new variable. The current naming scheme appends the
775 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
776 the name of vectorizer generated variables, and appends that to NAME if
780 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
787 case vect_simple_var
:
790 case vect_scalar_var
:
793 case vect_pointer_var
:
802 char* tmp
= concat (prefix
, name
, NULL
);
803 new_vect_var
= create_tmp_var (type
, tmp
);
807 new_vect_var
= create_tmp_var (type
, prefix
);
809 /* Mark vector typed variable as a gimple register variable. */
810 if (TREE_CODE (type
) == VECTOR_TYPE
)
811 DECL_GIMPLE_REG_P (new_vect_var
) = true;
817 /* Function vect_create_addr_base_for_vector_ref.
819 Create an expression that computes the address of the first memory location
820 that will be accessed for a data reference.
823 STMT: The statement containing the data reference.
824 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
825 OFFSET: Optional. If supplied, it is be added to the initial address.
826 LOOP: Specify relative to which loop-nest should the address be computed.
827 For example, when the dataref is in an inner-loop nested in an
828 outer-loop that is now being vectorized, LOOP can be either the
829 outer-loop, or the inner-loop. The first memory location accessed
830 by the following dataref ('in' points to short):
837 if LOOP=i_loop: &in (relative to i_loop)
838 if LOOP=j_loop: &in+i*2B (relative to j_loop)
841 1. Return an SSA_NAME whose value is the address of the memory location of
842 the first vector of the data reference.
843 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
844 these statement(s) which define the returned SSA_NAME.
846 FORNOW: We are only handling array accesses with step 1. */
849 vect_create_addr_base_for_vector_ref (tree stmt
,
854 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
855 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
856 struct loop
*containing_loop
= (bb_for_stmt (stmt
))->loop_father
;
857 tree data_ref_base
= unshare_expr (DR_BASE_ADDRESS (dr
));
859 tree data_ref_base_var
;
862 tree addr_base
, addr_expr
;
864 tree base_offset
= unshare_expr (DR_OFFSET (dr
));
865 tree init
= unshare_expr (DR_INIT (dr
));
866 tree vect_ptr_type
, addr_expr2
;
867 tree step
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
)));
870 if (loop
!= containing_loop
)
872 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
873 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
875 gcc_assert (nested_in_vect_loop_p (loop
, stmt
));
877 data_ref_base
= unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info
));
878 base_offset
= unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info
));
879 init
= unshare_expr (STMT_VINFO_DR_INIT (stmt_info
));
882 /* Create data_ref_base */
883 base_name
= build_fold_indirect_ref (data_ref_base
);
884 data_ref_base_var
= create_tmp_var (TREE_TYPE (data_ref_base
), "batmp");
885 add_referenced_var (data_ref_base_var
);
886 data_ref_base
= force_gimple_operand (data_ref_base
, &new_base_stmt
,
887 true, data_ref_base_var
);
888 append_to_statement_list_force(new_base_stmt
, new_stmt_list
);
890 /* Create base_offset */
891 base_offset
= size_binop (PLUS_EXPR
, base_offset
, init
);
892 base_offset
= fold_convert (sizetype
, base_offset
);
893 dest
= create_tmp_var (TREE_TYPE (base_offset
), "base_off");
894 add_referenced_var (dest
);
895 base_offset
= force_gimple_operand (base_offset
, &new_stmt
, true, dest
);
896 append_to_statement_list_force (new_stmt
, new_stmt_list
);
900 tree tmp
= create_tmp_var (sizetype
, "offset");
902 add_referenced_var (tmp
);
903 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (offset
), offset
, step
);
904 base_offset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base_offset
),
905 base_offset
, offset
);
906 base_offset
= force_gimple_operand (base_offset
, &new_stmt
, false, tmp
);
907 append_to_statement_list_force (new_stmt
, new_stmt_list
);
910 /* base + base_offset */
911 addr_base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (data_ref_base
),
912 data_ref_base
, base_offset
);
914 vect_ptr_type
= build_pointer_type (STMT_VINFO_VECTYPE (stmt_info
));
916 /* addr_expr = addr_base */
917 addr_expr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
918 get_name (base_name
));
919 add_referenced_var (addr_expr
);
920 vec_stmt
= fold_convert (vect_ptr_type
, addr_base
);
921 addr_expr2
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
922 get_name (base_name
));
923 add_referenced_var (addr_expr2
);
924 vec_stmt
= force_gimple_operand (vec_stmt
, &new_stmt
, false, addr_expr2
);
925 append_to_statement_list_force (new_stmt
, new_stmt_list
);
927 if (vect_print_dump_info (REPORT_DETAILS
))
929 fprintf (vect_dump
, "created ");
930 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
936 /* Function vect_create_data_ref_ptr.
938 Create a new pointer to vector type (vp), that points to the first location
939 accessed in the loop by STMT, along with the def-use update chain to
940 appropriately advance the pointer through the loop iterations. Also set
941 aliasing information for the pointer. This vector pointer is used by the
942 callers to this function to create a memory reference expression for vector
946 1. STMT: a stmt that references memory. Expected to be of the form
947 GIMPLE_MODIFY_STMT <name, data-ref> or
948 GIMPLE_MODIFY_STMT <data-ref, name>.
949 2. AT_LOOP: the loop where the vector memref is to be created.
950 3. OFFSET (optional): an offset to be added to the initial address accessed
951 by the data-ref in STMT.
952 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
953 pointing to the initial address.
954 5. TYPE: if not NULL indicates the required type of the data-ref
957 1. Declare a new ptr to vector_type, and have it point to the base of the
958 data reference (initial addressed accessed by the data reference).
959 For example, for vector of type V8HI, the following code is generated:
962 vp = (v8hi *)initial_address;
964 if OFFSET is not supplied:
965 initial_address = &a[init];
966 if OFFSET is supplied:
967 initial_address = &a[init + OFFSET];
969 Return the initial_address in INITIAL_ADDRESS.
971 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
972 update the pointer in each iteration of the loop.
974 Return the increment stmt that updates the pointer in PTR_INCR.
976 3. Set INV_P to true if the access pattern of the data reference in the
977 vectorized loop is invariant. Set it to false otherwise.
979 4. Return the pointer. */
982 vect_create_data_ref_ptr (tree stmt
, struct loop
*at_loop
,
983 tree offset
, tree
*initial_address
, tree
*ptr_incr
,
984 bool only_init
, tree type
, bool *inv_p
)
987 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
988 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
989 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
990 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
991 struct loop
*containing_loop
= (bb_for_stmt (stmt
))->loop_father
;
992 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
998 tree new_stmt_list
= NULL_TREE
;
1002 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1004 block_stmt_iterator incr_bsi
;
1006 tree indx_before_incr
, indx_after_incr
;
1010 /* Check the step (evolution) of the load in LOOP, and record
1011 whether it's invariant. */
1012 if (nested_in_vect_loop
)
1013 step
= STMT_VINFO_DR_STEP (stmt_info
);
1015 step
= DR_STEP (STMT_VINFO_DATA_REF (stmt_info
));
1017 if (tree_int_cst_compare (step
, size_zero_node
) == 0)
1022 /* Create an expression for the first address accessed by this load
1024 base_name
= build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr
)));
1026 if (vect_print_dump_info (REPORT_DETAILS
))
1028 tree data_ref_base
= base_name
;
1029 fprintf (vect_dump
, "create vector-pointer variable to type: ");
1030 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
1031 if (TREE_CODE (data_ref_base
) == VAR_DECL
)
1032 fprintf (vect_dump
, " vectorizing a one dimensional array ref: ");
1033 else if (TREE_CODE (data_ref_base
) == ARRAY_REF
)
1034 fprintf (vect_dump
, " vectorizing a multidimensional array ref: ");
1035 else if (TREE_CODE (data_ref_base
) == COMPONENT_REF
)
1036 fprintf (vect_dump
, " vectorizing a record based array ref: ");
1037 else if (TREE_CODE (data_ref_base
) == SSA_NAME
)
1038 fprintf (vect_dump
, " vectorizing a pointer ref: ");
1039 print_generic_expr (vect_dump
, base_name
, TDF_SLIM
);
1042 /** (1) Create the new vector-pointer variable: **/
1044 vect_ptr_type
= build_pointer_type (type
);
1046 vect_ptr_type
= build_pointer_type (vectype
);
1047 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
1048 get_name (base_name
));
1049 add_referenced_var (vect_ptr
);
1051 /** (2) Add aliasing information to the new vector-pointer:
1052 (The points-to info (DR_PTR_INFO) may be defined later.) **/
1054 tag
= DR_SYMBOL_TAG (dr
);
1057 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
1058 tag must be created with tag added to its may alias list. */
1060 new_type_alias (vect_ptr
, tag
, DR_REF (dr
));
1062 set_symbol_mem_tag (vect_ptr
, tag
);
1064 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
1065 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
1066 def-use update cycles for the pointer: One relative to the outer-loop
1067 (LOOP), which is what steps (3) and (4) below do. The other is relative
1068 to the inner-loop (which is the inner-most loop containing the dataref),
1069 and this is done be step (5) below.
1071 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
1072 inner-most loop, and so steps (3),(4) work the same, and step (5) is
1073 redundant. Steps (3),(4) create the following:
1076 LOOP: vp1 = phi(vp0,vp2)
1082 If there is an inner-loop nested in loop, then step (5) will also be
1083 applied, and an additional update in the inner-loop will be created:
1086 LOOP: vp1 = phi(vp0,vp2)
1088 inner: vp3 = phi(vp1,vp4)
1089 vp4 = vp3 + inner_step
1095 /** (3) Calculate the initial address the vector-pointer, and set
1096 the vector-pointer to point to it before the loop: **/
1098 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1100 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
1102 pe
= loop_preheader_edge (loop
);
1105 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt_list
);
1106 gcc_assert (!new_bb
);
1109 *initial_address
= new_temp
;
1111 /* Create: p = (vectype *) initial_base */
1112 vec_stmt
= fold_convert (vect_ptr_type
, new_temp
);
1113 vec_stmt
= build_gimple_modify_stmt (vect_ptr
, vec_stmt
);
1114 vect_ptr_init
= make_ssa_name (vect_ptr
, vec_stmt
);
1115 GIMPLE_STMT_OPERAND (vec_stmt
, 0) = vect_ptr_init
;
1116 new_bb
= bsi_insert_on_edge_immediate (pe
, vec_stmt
);
1117 gcc_assert (!new_bb
);
1120 /** (4) Handle the updating of the vector-pointer inside the loop.
1121 This is needed when ONLY_INIT is false, and also when AT_LOOP
1122 is the inner-loop nested in LOOP (during outer-loop vectorization).
1125 if (only_init
&& at_loop
== loop
) /* No update in loop is required. */
1127 /* Copy the points-to information if it exists. */
1128 if (DR_PTR_INFO (dr
))
1129 duplicate_ssa_name_ptr_info (vect_ptr_init
, DR_PTR_INFO (dr
));
1130 vptr
= vect_ptr_init
;
1134 /* The step of the vector pointer is the Vector Size. */
1135 tree step
= TYPE_SIZE_UNIT (vectype
);
1136 /* One exception to the above is when the scalar step of the load in
1137 LOOP is zero. In this case the step here is also zero. */
1139 step
= size_zero_node
;
1141 standard_iv_increment_position (loop
, &incr_bsi
, &insert_after
);
1143 create_iv (vect_ptr_init
,
1144 fold_convert (vect_ptr_type
, step
),
1145 NULL_TREE
, loop
, &incr_bsi
, insert_after
,
1146 &indx_before_incr
, &indx_after_incr
);
1147 incr
= bsi_stmt (incr_bsi
);
1148 set_stmt_info (stmt_ann (incr
),
1149 new_stmt_vec_info (incr
, loop_vinfo
));
1151 /* Copy the points-to information if it exists. */
1152 if (DR_PTR_INFO (dr
))
1154 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
1155 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
1157 merge_alias_info (vect_ptr_init
, indx_before_incr
);
1158 merge_alias_info (vect_ptr_init
, indx_after_incr
);
1162 vptr
= indx_before_incr
;
1165 if (!nested_in_vect_loop
|| only_init
)
1169 /** (5) Handle the updating of the vector-pointer inside the inner-loop
1170 nested in LOOP, if exists: **/
1172 gcc_assert (nested_in_vect_loop
);
1175 standard_iv_increment_position (containing_loop
, &incr_bsi
,
1177 create_iv (vptr
, fold_convert (vect_ptr_type
, DR_STEP (dr
)), NULL_TREE
,
1178 containing_loop
, &incr_bsi
, insert_after
, &indx_before_incr
,
1180 incr
= bsi_stmt (incr_bsi
);
1181 set_stmt_info (stmt_ann (incr
), new_stmt_vec_info (incr
, loop_vinfo
));
1183 /* Copy the points-to information if it exists. */
1184 if (DR_PTR_INFO (dr
))
1186 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
1187 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
1189 merge_alias_info (vect_ptr_init
, indx_before_incr
);
1190 merge_alias_info (vect_ptr_init
, indx_after_incr
);
1194 return indx_before_incr
;
1201 /* Function bump_vector_ptr
1203 Increment a pointer (to a vector type) by vector-size. If requested,
1204 i.e. if PTR-INCR is given, then also connect the new increment stmt
1205 to the existing def-use update-chain of the pointer, by modifying
1206 the PTR_INCR as illustrated below:
1208 The pointer def-use update-chain before this function:
1209 DATAREF_PTR = phi (p_0, p_2)
1211 PTR_INCR: p_2 = DATAREF_PTR + step
1213 The pointer def-use update-chain after this function:
1214 DATAREF_PTR = phi (p_0, p_2)
1216 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
1218 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
1221 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
1223 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
1224 the loop. The increment amount across iterations is expected
1226 BSI - location where the new update stmt is to be placed.
1227 STMT - the original scalar memory-access stmt that is being vectorized.
1228 BUMP - optional. The offset by which to bump the pointer. If not given,
1229 the offset is assumed to be vector_size.
1231 Output: Return NEW_DATAREF_PTR as illustrated above.
1236 bump_vector_ptr (tree dataref_ptr
, tree ptr_incr
, block_stmt_iterator
*bsi
,
1237 tree stmt
, tree bump
)
1239 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1240 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1241 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1242 tree vptr_type
= TREE_TYPE (dataref_ptr
);
1243 tree ptr_var
= SSA_NAME_VAR (dataref_ptr
);
1244 tree update
= TYPE_SIZE_UNIT (vectype
);
1247 use_operand_p use_p
;
1248 tree new_dataref_ptr
;
1253 incr_stmt
= build_gimple_modify_stmt (ptr_var
,
1254 build2 (POINTER_PLUS_EXPR
, vptr_type
,
1255 dataref_ptr
, update
));
1256 new_dataref_ptr
= make_ssa_name (ptr_var
, incr_stmt
);
1257 GIMPLE_STMT_OPERAND (incr_stmt
, 0) = new_dataref_ptr
;
1258 vect_finish_stmt_generation (stmt
, incr_stmt
, bsi
);
1260 /* Copy the points-to information if it exists. */
1261 if (DR_PTR_INFO (dr
))
1262 duplicate_ssa_name_ptr_info (new_dataref_ptr
, DR_PTR_INFO (dr
));
1263 merge_alias_info (new_dataref_ptr
, dataref_ptr
);
1266 return new_dataref_ptr
;
1268 /* Update the vector-pointer's cross-iteration increment. */
1269 FOR_EACH_SSA_USE_OPERAND (use_p
, ptr_incr
, iter
, SSA_OP_USE
)
1271 tree use
= USE_FROM_PTR (use_p
);
1273 if (use
== dataref_ptr
)
1274 SET_USE (use_p
, new_dataref_ptr
);
1276 gcc_assert (tree_int_cst_compare (use
, update
) == 0);
1279 return new_dataref_ptr
;
1283 /* Function vect_create_destination_var.
1285 Create a new temporary of type VECTYPE. */
1288 vect_create_destination_var (tree scalar_dest
, tree vectype
)
1291 const char *new_name
;
1293 enum vect_var_kind kind
;
1295 kind
= vectype
? vect_simple_var
: vect_scalar_var
;
1296 type
= vectype
? vectype
: TREE_TYPE (scalar_dest
);
1298 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
1300 new_name
= get_name (scalar_dest
);
1303 vec_dest
= vect_get_new_vect_var (type
, kind
, new_name
);
1304 add_referenced_var (vec_dest
);
1310 /* Function vect_init_vector.
1312 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1313 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1314 is not NULL. Otherwise, place the initialization at the loop preheader.
1315 Return the DEF of INIT_STMT.
1316 It will be used in the vectorization of STMT. */
1319 vect_init_vector (tree stmt
, tree vector_var
, tree vector_type
,
1320 block_stmt_iterator
*bsi
)
1322 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1330 new_var
= vect_get_new_vect_var (vector_type
, vect_simple_var
, "cst_");
1331 add_referenced_var (new_var
);
1332 init_stmt
= build_gimple_modify_stmt (new_var
, vector_var
);
1333 new_temp
= make_ssa_name (new_var
, init_stmt
);
1334 GIMPLE_STMT_OPERAND (init_stmt
, 0) = new_temp
;
1337 vect_finish_stmt_generation (stmt
, init_stmt
, bsi
);
1340 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1341 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1343 if (nested_in_vect_loop_p (loop
, stmt
))
1345 pe
= loop_preheader_edge (loop
);
1346 new_bb
= bsi_insert_on_edge_immediate (pe
, init_stmt
);
1347 gcc_assert (!new_bb
);
1350 if (vect_print_dump_info (REPORT_DETAILS
))
1352 fprintf (vect_dump
, "created new init_stmt: ");
1353 print_generic_expr (vect_dump
, init_stmt
, TDF_SLIM
);
1356 vec_oprnd
= GIMPLE_STMT_OPERAND (init_stmt
, 0);
1361 /* For constant and loop invariant defs of SLP_NODE this function returns
1362 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1363 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1367 vect_get_constant_vectors (slp_tree slp_node
, VEC(tree
,heap
) **vec_oprnds
,
1368 unsigned int op_num
)
1370 VEC (tree
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
1371 tree stmt
= VEC_index (tree
, stmts
, 0);
1372 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1373 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1374 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1377 int j
, number_of_places_left_in_vector
;
1379 tree op
, vop
, operation
;
1380 int group_size
= VEC_length (tree
, stmts
);
1381 unsigned int vec_num
, i
;
1382 int number_of_copies
= 1;
1383 bool is_store
= false;
1384 unsigned int number_of_vectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1385 VEC (tree
, heap
) *voprnds
= VEC_alloc (tree
, heap
, number_of_vectors
);
1388 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
1391 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1392 created vectors. It is greater than 1 if unrolling is performed.
1394 For example, we have two scalar operands, s1 and s2 (e.g., group of
1395 strided accesses of size two), while NUINTS is four (i.e., four scalars
1396 of this type can be packed in a vector). The output vector will contain
1397 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1400 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1401 containing the operands.
1403 For example, NUINTS is four as before, and the group size is 8
1404 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1405 {s5, s6, s7, s8}. */
1407 number_of_copies
= least_common_multiple (nunits
, group_size
) / group_size
;
1409 number_of_places_left_in_vector
= nunits
;
1411 for (j
= 0; j
< number_of_copies
; j
++)
1413 for (i
= group_size
- 1; VEC_iterate (tree
, stmts
, i
, stmt
); i
--)
1415 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
1419 op
= TREE_OPERAND (operation
, op_num
);
1420 if (!CONSTANT_CLASS_P (op
))
1423 /* Create 'vect_ = {op0,op1,...,opn}'. */
1424 t
= tree_cons (NULL_TREE
, op
, t
);
1426 number_of_places_left_in_vector
--;
1428 if (number_of_places_left_in_vector
== 0)
1430 number_of_places_left_in_vector
= nunits
;
1432 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1433 gcc_assert (vector_type
);
1435 vec_cst
= build_vector (vector_type
, t
);
1437 vec_cst
= build_constructor_from_list (vector_type
, t
);
1439 VEC_quick_push (tree
, voprnds
,
1440 vect_init_vector (stmt
, vec_cst
, vector_type
,
1447 /* Since the vectors are created in the reverse order, we should invert
1449 vec_num
= VEC_length (tree
, voprnds
);
1450 for (j
= vec_num
- 1; j
>= 0; j
--)
1452 vop
= VEC_index (tree
, voprnds
, j
);
1453 VEC_quick_push (tree
, *vec_oprnds
, vop
);
1456 VEC_free (tree
, heap
, voprnds
);
1458 /* In case that VF is greater than the unrolling factor needed for the SLP
1459 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1460 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1461 to replicate the vectors. */
1462 while (number_of_vectors
> VEC_length (tree
, *vec_oprnds
))
1464 for (i
= 0; VEC_iterate (tree
, *vec_oprnds
, i
, vop
) && i
< vec_num
; i
++)
1465 VEC_quick_push (tree
, *vec_oprnds
, vop
);
1470 /* Get vectorized definitions from SLP_NODE that contains corresponding
1471 vectorized def-stmts. */
1474 vect_get_slp_vect_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds
)
1480 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
));
1483 VEC_iterate (tree
, SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
);
1486 gcc_assert (vec_def_stmt
);
1487 vec_oprnd
= GIMPLE_STMT_OPERAND (vec_def_stmt
, 0);
1488 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
1493 /* Get vectorized definitions for SLP_NODE.
1494 If the scalar definitions are loop invariants or constants, collect them and
1495 call vect_get_constant_vectors() to create vector stmts.
1496 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1497 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1498 vect_get_slp_vect_defs() to retrieve them.
1499 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1500 the right node. This is used when the second operand must remain scalar. */
1503 vect_get_slp_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds0
,
1504 VEC (tree
,heap
) **vec_oprnds1
)
1506 tree operation
, first_stmt
;
1508 /* Allocate memory for vectorized defs. */
1509 *vec_oprnds0
= VEC_alloc (tree
, heap
,
1510 SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
));
1512 /* SLP_NODE corresponds either to a group of stores or to a group of
1513 unary/binary operations. We don't call this function for loads. */
1514 if (SLP_TREE_LEFT (slp_node
))
1515 /* The defs are already vectorized. */
1516 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node
), vec_oprnds0
);
1518 /* Build vectors from scalar defs. */
1519 vect_get_constant_vectors (slp_node
, vec_oprnds0
, 0);
1521 first_stmt
= VEC_index (tree
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
1522 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
)))
1523 /* Since we don't call this function with loads, this is a group of
1527 operation
= GIMPLE_STMT_OPERAND (first_stmt
, 1);
1528 if (TREE_OPERAND_LENGTH (operation
) == unary_op
|| !vec_oprnds1
)
1531 *vec_oprnds1
= VEC_alloc (tree
, heap
,
1532 SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
));
1534 if (SLP_TREE_RIGHT (slp_node
))
1535 /* The defs are already vectorized. */
1536 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node
), vec_oprnds1
);
1538 /* Build vectors from scalar defs. */
1539 vect_get_constant_vectors (slp_node
, vec_oprnds1
, 1);
1543 /* Function get_initial_def_for_induction
1546 STMT - a stmt that performs an induction operation in the loop.
1547 IV_PHI - the initial value of the induction variable
1550 Return a vector variable, initialized with the first VF values of
1551 the induction variable. E.g., for an iv with IV_PHI='X' and
1552 evolution S, for a vector of 4 units, we want to return:
1553 [X, X + S, X + 2*S, X + 3*S]. */
1556 get_initial_def_for_induction (tree iv_phi
)
1558 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (iv_phi
);
1559 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1560 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1561 tree scalar_type
= TREE_TYPE (PHI_RESULT_TREE (iv_phi
));
1564 edge pe
= loop_preheader_edge (loop
);
1565 struct loop
*iv_loop
;
1567 tree vec
, vec_init
, vec_step
, t
;
1572 tree induction_phi
, induc_def
, new_stmt
, vec_def
, vec_dest
;
1573 tree init_expr
, step_expr
;
1574 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1579 stmt_vec_info phi_info
= vinfo_for_stmt (iv_phi
);
1580 bool nested_in_vect_loop
= false;
1582 imm_use_iterator imm_iter
;
1583 use_operand_p use_p
;
1587 block_stmt_iterator si
;
1588 basic_block bb
= bb_for_stmt (iv_phi
);
1590 vectype
= get_vectype_for_scalar_type (scalar_type
);
1591 gcc_assert (vectype
);
1592 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1593 ncopies
= vf
/ nunits
;
1595 gcc_assert (phi_info
);
1596 gcc_assert (ncopies
>= 1);
1598 /* Find the first insertion point in the BB. */
1599 si
= bsi_after_labels (bb
);
1601 if (INTEGRAL_TYPE_P (scalar_type
))
1602 step_expr
= build_int_cst (scalar_type
, 0);
1604 step_expr
= build_real (scalar_type
, dconst0
);
1606 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
1607 if (nested_in_vect_loop_p (loop
, iv_phi
))
1609 nested_in_vect_loop
= true;
1610 iv_loop
= loop
->inner
;
1614 gcc_assert (iv_loop
== (bb_for_stmt (iv_phi
))->loop_father
);
1616 latch_e
= loop_latch_edge (iv_loop
);
1617 loop_arg
= PHI_ARG_DEF_FROM_EDGE (iv_phi
, latch_e
);
1619 access_fn
= analyze_scalar_evolution (iv_loop
, PHI_RESULT (iv_phi
));
1620 gcc_assert (access_fn
);
1621 ok
= vect_is_simple_iv_evolution (iv_loop
->num
, access_fn
,
1622 &init_expr
, &step_expr
);
1624 pe
= loop_preheader_edge (iv_loop
);
1626 /* Create the vector that holds the initial_value of the induction. */
1627 if (nested_in_vect_loop
)
1629 /* iv_loop is nested in the loop to be vectorized. init_expr had already
1630 been created during vectorization of previous stmts; We obtain it from
1631 the STMT_VINFO_VEC_STMT of the defining stmt. */
1632 tree iv_def
= PHI_ARG_DEF_FROM_EDGE (iv_phi
, loop_preheader_edge (iv_loop
));
1633 vec_init
= vect_get_vec_def_for_operand (iv_def
, iv_phi
, NULL
);
1637 /* iv_loop is the loop to be vectorized. Create:
1638 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
1639 new_var
= vect_get_new_vect_var (scalar_type
, vect_scalar_var
, "var_");
1640 add_referenced_var (new_var
);
1642 new_name
= force_gimple_operand (init_expr
, &stmts
, false, new_var
);
1645 new_bb
= bsi_insert_on_edge_immediate (pe
, stmts
);
1646 gcc_assert (!new_bb
);
1650 t
= tree_cons (NULL_TREE
, init_expr
, t
);
1651 for (i
= 1; i
< nunits
; i
++)
1655 /* Create: new_name_i = new_name + step_expr */
1656 tmp
= fold_build2 (PLUS_EXPR
, scalar_type
, new_name
, step_expr
);
1657 init_stmt
= build_gimple_modify_stmt (new_var
, tmp
);
1658 new_name
= make_ssa_name (new_var
, init_stmt
);
1659 GIMPLE_STMT_OPERAND (init_stmt
, 0) = new_name
;
1661 new_bb
= bsi_insert_on_edge_immediate (pe
, init_stmt
);
1662 gcc_assert (!new_bb
);
1664 if (vect_print_dump_info (REPORT_DETAILS
))
1666 fprintf (vect_dump
, "created new init_stmt: ");
1667 print_generic_expr (vect_dump
, init_stmt
, TDF_SLIM
);
1669 t
= tree_cons (NULL_TREE
, new_name
, t
);
1671 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
1672 vec
= build_constructor_from_list (vectype
, nreverse (t
));
1673 vec_init
= vect_init_vector (iv_phi
, vec
, vectype
, NULL
);
1677 /* Create the vector that holds the step of the induction. */
1678 if (nested_in_vect_loop
)
1679 /* iv_loop is nested in the loop to be vectorized. Generate:
1680 vec_step = [S, S, S, S] */
1681 new_name
= step_expr
;
1684 /* iv_loop is the loop to be vectorized. Generate:
1685 vec_step = [VF*S, VF*S, VF*S, VF*S] */
1686 expr
= build_int_cst (scalar_type
, vf
);
1687 new_name
= fold_build2 (MULT_EXPR
, scalar_type
, expr
, step_expr
);
1691 for (i
= 0; i
< nunits
; i
++)
1692 t
= tree_cons (NULL_TREE
, unshare_expr (new_name
), t
);
1693 gcc_assert (CONSTANT_CLASS_P (new_name
));
1694 vec
= build_vector (vectype
, t
);
1695 vec_step
= vect_init_vector (iv_phi
, vec
, vectype
, NULL
);
1698 /* Create the following def-use cycle:
1703 vec_iv = PHI <vec_init, vec_loop>
1707 vec_loop = vec_iv + vec_step; */
1709 /* Create the induction-phi that defines the induction-operand. */
1710 vec_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, "vec_iv_");
1711 add_referenced_var (vec_dest
);
1712 induction_phi
= create_phi_node (vec_dest
, iv_loop
->header
);
1713 set_stmt_info (get_stmt_ann (induction_phi
),
1714 new_stmt_vec_info (induction_phi
, loop_vinfo
));
1715 induc_def
= PHI_RESULT (induction_phi
);
1717 /* Create the iv update inside the loop */
1718 new_stmt
= build_gimple_modify_stmt (NULL_TREE
,
1719 build2 (PLUS_EXPR
, vectype
,
1720 induc_def
, vec_step
));
1721 vec_def
= make_ssa_name (vec_dest
, new_stmt
);
1722 GIMPLE_STMT_OPERAND (new_stmt
, 0) = vec_def
;
1723 bsi_insert_before (&si
, new_stmt
, BSI_SAME_STMT
);
1724 set_stmt_info (get_stmt_ann (new_stmt
),
1725 new_stmt_vec_info (new_stmt
, loop_vinfo
));
1727 /* Set the arguments of the phi node: */
1728 add_phi_arg (induction_phi
, vec_init
, pe
);
1729 add_phi_arg (induction_phi
, vec_def
, loop_latch_edge (iv_loop
));
1732 /* In case that vectorization factor (VF) is bigger than the number
1733 of elements that we can fit in a vectype (nunits), we have to generate
1734 more than one vector stmt - i.e - we need to "unroll" the
1735 vector stmt by a factor VF/nunits. For more details see documentation
1736 in vectorizable_operation. */
1740 stmt_vec_info prev_stmt_vinfo
;
1741 /* FORNOW. This restriction should be relaxed. */
1742 gcc_assert (!nested_in_vect_loop
);
1744 /* Create the vector that holds the step of the induction. */
1745 expr
= build_int_cst (scalar_type
, nunits
);
1746 new_name
= fold_build2 (MULT_EXPR
, scalar_type
, expr
, step_expr
);
1748 for (i
= 0; i
< nunits
; i
++)
1749 t
= tree_cons (NULL_TREE
, unshare_expr (new_name
), t
);
1750 gcc_assert (CONSTANT_CLASS_P (new_name
));
1751 vec
= build_vector (vectype
, t
);
1752 vec_step
= vect_init_vector (iv_phi
, vec
, vectype
, NULL
);
1754 vec_def
= induc_def
;
1755 prev_stmt_vinfo
= vinfo_for_stmt (induction_phi
);
1756 for (i
= 1; i
< ncopies
; i
++)
1760 /* vec_i = vec_prev + vec_step */
1761 tmp
= build2 (PLUS_EXPR
, vectype
, vec_def
, vec_step
);
1762 new_stmt
= build_gimple_modify_stmt (NULL_TREE
, tmp
);
1763 vec_def
= make_ssa_name (vec_dest
, new_stmt
);
1764 GIMPLE_STMT_OPERAND (new_stmt
, 0) = vec_def
;
1765 bsi_insert_before (&si
, new_stmt
, BSI_SAME_STMT
);
1766 set_stmt_info (get_stmt_ann (new_stmt
),
1767 new_stmt_vec_info (new_stmt
, loop_vinfo
));
1768 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo
) = new_stmt
;
1769 prev_stmt_vinfo
= vinfo_for_stmt (new_stmt
);
1773 if (nested_in_vect_loop
)
1775 /* Find the loop-closed exit-phi of the induction, and record
1776 the final vector of induction results: */
1778 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, loop_arg
)
1780 if (!flow_bb_inside_loop_p (iv_loop
, bb_for_stmt (USE_STMT (use_p
))))
1782 exit_phi
= USE_STMT (use_p
);
1788 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (exit_phi
);
1789 /* FORNOW. Currently not supporting the case that an inner-loop induction
1790 is not used in the outer-loop (i.e. only outside the outer-loop). */
1791 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo
)
1792 && !STMT_VINFO_LIVE_P (stmt_vinfo
));
1794 STMT_VINFO_VEC_STMT (stmt_vinfo
) = new_stmt
;
1795 if (vect_print_dump_info (REPORT_DETAILS
))
1797 fprintf (vect_dump
, "vector of inductions after inner-loop:");
1798 print_generic_expr (vect_dump
, new_stmt
, TDF_SLIM
);
1804 if (vect_print_dump_info (REPORT_DETAILS
))
1806 fprintf (vect_dump
, "transform induction: created def-use cycle:");
1807 print_generic_expr (vect_dump
, induction_phi
, TDF_SLIM
);
1808 fprintf (vect_dump
, "\n");
1809 print_generic_expr (vect_dump
, SSA_NAME_DEF_STMT (vec_def
), TDF_SLIM
);
1812 STMT_VINFO_VEC_STMT (phi_info
) = induction_phi
;
1817 /* Function vect_get_vec_def_for_operand.
1819 OP is an operand in STMT. This function returns a (vector) def that will be
1820 used in the vectorized stmt for STMT.
1822 In the case that OP is an SSA_NAME which is defined in the loop, then
1823 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1825 In case OP is an invariant or constant, a new stmt that creates a vector def
1826 needs to be introduced. */
1829 vect_get_vec_def_for_operand (tree op
, tree stmt
, tree
*scalar_def
)
1834 stmt_vec_info def_stmt_info
= NULL
;
1835 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1836 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1837 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1838 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1844 enum vect_def_type dt
;
1848 if (vect_print_dump_info (REPORT_DETAILS
))
1850 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
1851 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
1854 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
1855 gcc_assert (is_simple_use
);
1856 if (vect_print_dump_info (REPORT_DETAILS
))
1860 fprintf (vect_dump
, "def = ");
1861 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
1865 fprintf (vect_dump
, " def_stmt = ");
1866 print_generic_expr (vect_dump
, def_stmt
, TDF_SLIM
);
1872 /* Case 1: operand is a constant. */
1873 case vect_constant_def
:
1878 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1879 if (vect_print_dump_info (REPORT_DETAILS
))
1880 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
1882 for (i
= nunits
- 1; i
>= 0; --i
)
1884 t
= tree_cons (NULL_TREE
, op
, t
);
1886 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1887 gcc_assert (vector_type
);
1888 vec_cst
= build_vector (vector_type
, t
);
1890 return vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
1893 /* Case 2: operand is defined outside the loop - loop invariant. */
1894 case vect_invariant_def
:
1899 /* Create 'vec_inv = {inv,inv,..,inv}' */
1900 if (vect_print_dump_info (REPORT_DETAILS
))
1901 fprintf (vect_dump
, "Create vector_inv.");
1903 for (i
= nunits
- 1; i
>= 0; --i
)
1905 t
= tree_cons (NULL_TREE
, def
, t
);
1908 /* FIXME: use build_constructor directly. */
1909 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1910 gcc_assert (vector_type
);
1911 vec_inv
= build_constructor_from_list (vector_type
, t
);
1912 return vect_init_vector (stmt
, vec_inv
, vector_type
, NULL
);
1915 /* Case 3: operand is defined inside the loop. */
1919 *scalar_def
= def_stmt
;
1921 /* Get the def from the vectorized stmt. */
1922 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1923 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1924 gcc_assert (vec_stmt
);
1925 if (TREE_CODE (vec_stmt
) == PHI_NODE
)
1926 vec_oprnd
= PHI_RESULT (vec_stmt
);
1928 vec_oprnd
= GIMPLE_STMT_OPERAND (vec_stmt
, 0);
1932 /* Case 4: operand is defined by a loop header phi - reduction */
1933 case vect_reduction_def
:
1937 gcc_assert (TREE_CODE (def_stmt
) == PHI_NODE
);
1938 loop
= (bb_for_stmt (def_stmt
))->loop_father
;
1940 /* Get the def before the loop */
1941 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1942 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1945 /* Case 5: operand is defined by loop-header phi - induction. */
1946 case vect_induction_def
:
1948 gcc_assert (TREE_CODE (def_stmt
) == PHI_NODE
);
1950 /* Get the def from the vectorized stmt. */
1951 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1952 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1953 gcc_assert (vec_stmt
&& (TREE_CODE (vec_stmt
) == PHI_NODE
));
1954 vec_oprnd
= PHI_RESULT (vec_stmt
);
1964 /* Function vect_get_vec_def_for_stmt_copy
1966 Return a vector-def for an operand. This function is used when the
1967 vectorized stmt to be created (by the caller to this function) is a "copy"
1968 created in case the vectorized result cannot fit in one vector, and several
1969 copies of the vector-stmt are required. In this case the vector-def is
1970 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1971 of the stmt that defines VEC_OPRND.
1972 DT is the type of the vector def VEC_OPRND.
1975 In case the vectorization factor (VF) is bigger than the number
1976 of elements that can fit in a vectype (nunits), we have to generate
1977 more than one vector stmt to vectorize the scalar stmt. This situation
1978 arises when there are multiple data-types operated upon in the loop; the
1979 smallest data-type determines the VF, and as a result, when vectorizing
1980 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1981 vector stmt (each computing a vector of 'nunits' results, and together
1982 computing 'VF' results in each iteration). This function is called when
1983 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1984 which VF=16 and nunits=4, so the number of copies required is 4):
1986 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1988 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1989 VS1.1: vx.1 = memref1 VS1.2
1990 VS1.2: vx.2 = memref2 VS1.3
1991 VS1.3: vx.3 = memref3
1993 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1994 VSnew.1: vz1 = vx.1 + ... VSnew.2
1995 VSnew.2: vz2 = vx.2 + ... VSnew.3
1996 VSnew.3: vz3 = vx.3 + ...
1998 The vectorization of S1 is explained in vectorizable_load.
1999 The vectorization of S2:
2000 To create the first vector-stmt out of the 4 copies - VSnew.0 -
2001 the function 'vect_get_vec_def_for_operand' is called to
2002 get the relevant vector-def for each operand of S2. For operand x it
2003 returns the vector-def 'vx.0'.
2005 To create the remaining copies of the vector-stmt (VSnew.j), this
2006 function is called to get the relevant vector-def for each operand. It is
2007 obtained from the respective VS1.j stmt, which is recorded in the
2008 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
2010 For example, to obtain the vector-def 'vx.1' in order to create the
2011 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
2012 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
2013 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
2014 and return its def ('vx.1').
2015 Overall, to create the above sequence this function will be called 3 times:
2016 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
2017 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
2018 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
2021 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
2023 tree vec_stmt_for_operand
;
2024 stmt_vec_info def_stmt_info
;
2026 /* Do nothing; can reuse same def. */
2027 if (dt
== vect_invariant_def
|| dt
== vect_constant_def
)
2030 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
2031 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
2032 gcc_assert (def_stmt_info
);
2033 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
2034 gcc_assert (vec_stmt_for_operand
);
2035 vec_oprnd
= GIMPLE_STMT_OPERAND (vec_stmt_for_operand
, 0);
2040 /* Get vectorized definitions for the operands to create a copy of an original
2041 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
2044 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
2045 VEC(tree
,heap
) **vec_oprnds0
,
2046 VEC(tree
,heap
) **vec_oprnds1
)
2048 tree vec_oprnd
= VEC_pop (tree
, *vec_oprnds0
);
2050 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
2051 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
2053 if (vec_oprnds1
&& *vec_oprnds1
)
2055 vec_oprnd
= VEC_pop (tree
, *vec_oprnds1
);
2056 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
2057 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
2062 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
2065 vect_get_vec_defs (tree op0
, tree op1
, tree stmt
, VEC(tree
,heap
) **vec_oprnds0
,
2066 VEC(tree
,heap
) **vec_oprnds1
, slp_tree slp_node
)
2069 vect_get_slp_defs (slp_node
, vec_oprnds0
, vec_oprnds1
);
2074 *vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
2075 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2076 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
2080 *vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
2081 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
2082 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
2088 /* Function vect_finish_stmt_generation.
2090 Insert a new stmt. */
2093 vect_finish_stmt_generation (tree stmt
, tree vec_stmt
,
2094 block_stmt_iterator
*bsi
)
2096 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2097 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2099 gcc_assert (stmt
== bsi_stmt (*bsi
));
2100 gcc_assert (TREE_CODE (stmt
) != LABEL_EXPR
);
2102 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
2104 set_stmt_info (get_stmt_ann (vec_stmt
),
2105 new_stmt_vec_info (vec_stmt
, loop_vinfo
));
2107 if (vect_print_dump_info (REPORT_DETAILS
))
2109 fprintf (vect_dump
, "add new stmt: ");
2110 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
2113 /* Make sure bsi points to the stmt that is being vectorized. */
2114 gcc_assert (stmt
== bsi_stmt (*bsi
));
2116 SET_EXPR_LOCATION (vec_stmt
, EXPR_LOCATION (stmt
));
2120 /* Function get_initial_def_for_reduction
2123 STMT - a stmt that performs a reduction operation in the loop.
2124 INIT_VAL - the initial value of the reduction variable
2127 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
2128 of the reduction (used for adjusting the epilog - see below).
2129 Return a vector variable, initialized according to the operation that STMT
2130 performs. This vector will be used as the initial value of the
2131 vector of partial results.
2133 Option1 (adjust in epilog): Initialize the vector as follows:
2136 min/max: [init_val,init_val,..,init_val,init_val]
2137 bit and/or: [init_val,init_val,..,init_val,init_val]
2138 and when necessary (e.g. add/mult case) let the caller know
2139 that it needs to adjust the result by init_val.
2141 Option2: Initialize the vector as follows:
2142 add: [0,0,...,0,init_val]
2143 mult: [1,1,...,1,init_val]
2144 min/max: [init_val,init_val,...,init_val]
2145 bit and/or: [init_val,init_val,...,init_val]
2146 and no adjustments are needed.
2148 For example, for the following code:
2154 STMT is 's = s + a[i]', and the reduction variable is 's'.
2155 For a vector of 4 units, we want to return either [0,0,0,init_val],
2156 or [0,0,0,0] and let the caller know that it needs to adjust
2157 the result at the end by 'init_val'.
2159 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
2160 initialization vector is simpler (same element in all entries).
2161 A cost model should help decide between these two schemes. */
2164 get_initial_def_for_reduction (tree stmt
, tree init_val
, tree
*adjustment_def
)
2166 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2167 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2168 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2169 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
2170 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2171 enum tree_code code
= TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 1));
2172 tree type
= TREE_TYPE (init_val
);
2179 bool nested_in_vect_loop
= false;
2181 gcc_assert (POINTER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
));
2182 if (nested_in_vect_loop_p (loop
, stmt
))
2183 nested_in_vect_loop
= true;
2185 gcc_assert (loop
== (bb_for_stmt (stmt
))->loop_father
);
2187 vecdef
= vect_get_vec_def_for_operand (init_val
, stmt
, NULL
);
2191 case WIDEN_SUM_EXPR
:
2194 if (nested_in_vect_loop
)
2195 *adjustment_def
= vecdef
;
2197 *adjustment_def
= init_val
;
2198 /* Create a vector of zeros for init_def. */
2199 if (SCALAR_FLOAT_TYPE_P (type
))
2200 def_for_init
= build_real (type
, dconst0
);
2202 def_for_init
= build_int_cst (type
, 0);
2203 for (i
= nunits
- 1; i
>= 0; --i
)
2204 t
= tree_cons (NULL_TREE
, def_for_init
, t
);
2205 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def_for_init
));
2206 gcc_assert (vector_type
);
2207 init_def
= build_vector (vector_type
, t
);
2212 *adjustment_def
= NULL_TREE
;
2224 /* Function vect_create_epilog_for_reduction
2226 Create code at the loop-epilog to finalize the result of a reduction
2229 VECT_DEF is a vector of partial results.
2230 REDUC_CODE is the tree-code for the epilog reduction.
2231 STMT is the scalar reduction stmt that is being vectorized.
2232 REDUCTION_PHI is the phi-node that carries the reduction computation.
2235 1. Creates the reduction def-use cycle: sets the arguments for
2237 The loop-entry argument is the vectorized initial-value of the reduction.
2238 The loop-latch argument is VECT_DEF - the vector of partial sums.
2239 2. "Reduces" the vector of partial results VECT_DEF into a single result,
2240 by applying the operation specified by REDUC_CODE if available, or by
2241 other means (whole-vector shifts or a scalar loop).
2242 The function also creates a new phi node at the loop exit to preserve
2243 loop-closed form, as illustrated below.
2245 The flow at the entry to this function:
2248 vec_def = phi <null, null> # REDUCTION_PHI
2249 VECT_DEF = vector_stmt # vectorized form of STMT
2250 s_loop = scalar_stmt # (scalar) STMT
2252 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2256 The above is transformed by this function into:
2259 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
2260 VECT_DEF = vector_stmt # vectorized form of STMT
2261 s_loop = scalar_stmt # (scalar) STMT
2263 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2264 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2265 v_out2 = reduce <v_out1>
2266 s_out3 = extract_field <v_out2, 0>
2267 s_out4 = adjust_result <s_out3>
2273 vect_create_epilog_for_reduction (tree vect_def
, tree stmt
,
2274 enum tree_code reduc_code
, tree reduction_phi
)
2276 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2278 enum machine_mode mode
;
2279 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2280 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2281 basic_block exit_bb
;
2285 block_stmt_iterator exit_bsi
;
2287 tree new_temp
= NULL_TREE
;
2289 tree epilog_stmt
= NULL_TREE
;
2290 tree new_scalar_dest
, exit_phi
, new_dest
;
2291 tree bitsize
, bitpos
, bytesize
;
2292 enum tree_code code
= TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 1));
2293 tree adjustment_def
;
2294 tree vec_initial_def
;
2296 imm_use_iterator imm_iter
;
2297 use_operand_p use_p
;
2298 bool extract_scalar_result
= false;
2299 tree reduction_op
, expr
;
2302 tree operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
2303 bool nested_in_vect_loop
= false;
2305 VEC(tree
,heap
) *phis
= NULL
;
2308 if (nested_in_vect_loop_p (loop
, stmt
))
2311 nested_in_vect_loop
= true;
2314 op_type
= TREE_OPERAND_LENGTH (operation
);
2315 reduction_op
= TREE_OPERAND (operation
, op_type
-1);
2316 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
2317 gcc_assert (vectype
);
2318 mode
= TYPE_MODE (vectype
);
2320 /*** 1. Create the reduction def-use cycle ***/
2322 /* 1.1 set the loop-entry arg of the reduction-phi: */
2323 /* For the case of reduction, vect_get_vec_def_for_operand returns
2324 the scalar def before the loop, that defines the initial value
2325 of the reduction variable. */
2326 vec_initial_def
= vect_get_vec_def_for_operand (reduction_op
, stmt
,
2328 add_phi_arg (reduction_phi
, vec_initial_def
, loop_preheader_edge (loop
));
2330 /* 1.2 set the loop-latch arg for the reduction-phi: */
2331 add_phi_arg (reduction_phi
, vect_def
, loop_latch_edge (loop
));
2333 if (vect_print_dump_info (REPORT_DETAILS
))
2335 fprintf (vect_dump
, "transform reduction: created def-use cycle:");
2336 print_generic_expr (vect_dump
, reduction_phi
, TDF_SLIM
);
2337 fprintf (vect_dump
, "\n");
2338 print_generic_expr (vect_dump
, SSA_NAME_DEF_STMT (vect_def
), TDF_SLIM
);
2342 /*** 2. Create epilog code
2343 The reduction epilog code operates across the elements of the vector
2344 of partial results computed by the vectorized loop.
2345 The reduction epilog code consists of:
2346 step 1: compute the scalar result in a vector (v_out2)
2347 step 2: extract the scalar result (s_out3) from the vector (v_out2)
2348 step 3: adjust the scalar result (s_out3) if needed.
2350 Step 1 can be accomplished using one the following three schemes:
2351 (scheme 1) using reduc_code, if available.
2352 (scheme 2) using whole-vector shifts, if available.
2353 (scheme 3) using a scalar loop. In this case steps 1+2 above are
2356 The overall epilog code looks like this:
2358 s_out0 = phi <s_loop> # original EXIT_PHI
2359 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2360 v_out2 = reduce <v_out1> # step 1
2361 s_out3 = extract_field <v_out2, 0> # step 2
2362 s_out4 = adjust_result <s_out3> # step 3
2364 (step 3 is optional, and step2 1 and 2 may be combined).
2365 Lastly, the uses of s_out0 are replaced by s_out4.
2369 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
2370 v_out1 = phi <v_loop> */
2372 exit_bb
= single_exit (loop
)->dest
;
2373 new_phi
= create_phi_node (SSA_NAME_VAR (vect_def
), exit_bb
);
2374 SET_PHI_ARG_DEF (new_phi
, single_exit (loop
)->dest_idx
, vect_def
);
2375 exit_bsi
= bsi_after_labels (exit_bb
);
2377 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
2378 (i.e. when reduc_code is not available) and in the final adjustment
2379 code (if needed). Also get the original scalar reduction variable as
2380 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
2381 represents a reduction pattern), the tree-code and scalar-def are
2382 taken from the original stmt that the pattern-stmt (STMT) replaces.
2383 Otherwise (it is a regular reduction) - the tree-code and scalar-def
2384 are taken from STMT. */
2386 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2389 /* Regular reduction */
2394 /* Reduction pattern */
2395 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt
);
2396 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
));
2397 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
2399 code
= TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt
, 1));
2400 scalar_dest
= GIMPLE_STMT_OPERAND (orig_stmt
, 0);
2401 scalar_type
= TREE_TYPE (scalar_dest
);
2402 new_scalar_dest
= vect_create_destination_var (scalar_dest
, NULL
);
2403 bitsize
= TYPE_SIZE (scalar_type
);
2404 bytesize
= TYPE_SIZE_UNIT (scalar_type
);
2407 /* In case this is a reduction in an inner-loop while vectorizing an outer
2408 loop - we don't need to extract a single scalar result at the end of the
2409 inner-loop. The final vector of partial results will be used in the
2410 vectorized outer-loop, or reduced to a scalar result at the end of the
2412 if (nested_in_vect_loop
)
2413 goto vect_finalize_reduction
;
2415 /* 2.3 Create the reduction code, using one of the three schemes described
2418 if (reduc_code
< NUM_TREE_CODES
)
2422 /*** Case 1: Create:
2423 v_out2 = reduc_expr <v_out1> */
2425 if (vect_print_dump_info (REPORT_DETAILS
))
2426 fprintf (vect_dump
, "Reduce using direct vector reduction.");
2428 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2429 tmp
= build1 (reduc_code
, vectype
, PHI_RESULT (new_phi
));
2430 epilog_stmt
= build_gimple_modify_stmt (vec_dest
, tmp
);
2431 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
2432 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
2433 bsi_insert_before (&exit_bsi
, epilog_stmt
, BSI_SAME_STMT
);
2435 extract_scalar_result
= true;
2439 enum tree_code shift_code
= 0;
2440 bool have_whole_vector_shift
= true;
2442 int element_bitsize
= tree_low_cst (bitsize
, 1);
2443 int vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
2446 if (optab_handler (vec_shr_optab
, mode
)->insn_code
!= CODE_FOR_nothing
)
2447 shift_code
= VEC_RSHIFT_EXPR
;
2449 have_whole_vector_shift
= false;
2451 /* Regardless of whether we have a whole vector shift, if we're
2452 emulating the operation via tree-vect-generic, we don't want
2453 to use it. Only the first round of the reduction is likely
2454 to still be profitable via emulation. */
2455 /* ??? It might be better to emit a reduction tree code here, so that
2456 tree-vect-generic can expand the first round via bit tricks. */
2457 if (!VECTOR_MODE_P (mode
))
2458 have_whole_vector_shift
= false;
2461 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
2462 if (optab_handler (optab
, mode
)->insn_code
== CODE_FOR_nothing
)
2463 have_whole_vector_shift
= false;
2466 if (have_whole_vector_shift
)
2468 /*** Case 2: Create:
2469 for (offset = VS/2; offset >= element_size; offset/=2)
2471 Create: va' = vec_shift <va, offset>
2472 Create: va = vop <va, va'>
2475 if (vect_print_dump_info (REPORT_DETAILS
))
2476 fprintf (vect_dump
, "Reduce using vector shifts");
2478 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2479 new_temp
= PHI_RESULT (new_phi
);
2481 for (bit_offset
= vec_size_in_bits
/2;
2482 bit_offset
>= element_bitsize
;
2485 tree bitpos
= size_int (bit_offset
);
2486 tree tmp
= build2 (shift_code
, vectype
, new_temp
, bitpos
);
2487 epilog_stmt
= build_gimple_modify_stmt (vec_dest
, tmp
);
2488 new_name
= make_ssa_name (vec_dest
, epilog_stmt
);
2489 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_name
;
2490 bsi_insert_before (&exit_bsi
, epilog_stmt
, BSI_SAME_STMT
);
2492 tmp
= build2 (code
, vectype
, new_name
, new_temp
);
2493 epilog_stmt
= build_gimple_modify_stmt (vec_dest
, tmp
);
2494 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
2495 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
2496 bsi_insert_before (&exit_bsi
, epilog_stmt
, BSI_SAME_STMT
);
2499 extract_scalar_result
= true;
2505 /*** Case 3: Create:
2506 s = extract_field <v_out2, 0>
2507 for (offset = element_size;
2508 offset < vector_size;
2509 offset += element_size;)
2511 Create: s' = extract_field <v_out2, offset>
2512 Create: s = op <s, s'>
2515 if (vect_print_dump_info (REPORT_DETAILS
))
2516 fprintf (vect_dump
, "Reduce using scalar code. ");
2518 vec_temp
= PHI_RESULT (new_phi
);
2519 vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
2520 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
2522 epilog_stmt
= build_gimple_modify_stmt (new_scalar_dest
, rhs
);
2523 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
2524 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
2525 bsi_insert_before (&exit_bsi
, epilog_stmt
, BSI_SAME_STMT
);
2527 for (bit_offset
= element_bitsize
;
2528 bit_offset
< vec_size_in_bits
;
2529 bit_offset
+= element_bitsize
)
2532 tree bitpos
= bitsize_int (bit_offset
);
2533 tree rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
2536 epilog_stmt
= build_gimple_modify_stmt (new_scalar_dest
, rhs
);
2537 new_name
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
2538 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_name
;
2539 bsi_insert_before (&exit_bsi
, epilog_stmt
, BSI_SAME_STMT
);
2541 tmp
= build2 (code
, scalar_type
, new_name
, new_temp
);
2542 epilog_stmt
= build_gimple_modify_stmt (new_scalar_dest
, tmp
);
2543 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
2544 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
2545 bsi_insert_before (&exit_bsi
, epilog_stmt
, BSI_SAME_STMT
);
2548 extract_scalar_result
= false;
2552 /* 2.4 Extract the final scalar result. Create:
2553 s_out3 = extract_field <v_out2, bitpos> */
2555 if (extract_scalar_result
)
2559 gcc_assert (!nested_in_vect_loop
);
2560 if (vect_print_dump_info (REPORT_DETAILS
))
2561 fprintf (vect_dump
, "extract scalar result");
2563 if (BYTES_BIG_ENDIAN
)
2564 bitpos
= size_binop (MULT_EXPR
,
2565 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1),
2566 TYPE_SIZE (scalar_type
));
2568 bitpos
= bitsize_zero_node
;
2570 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, new_temp
, bitsize
, bitpos
);
2571 epilog_stmt
= build_gimple_modify_stmt (new_scalar_dest
, rhs
);
2572 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
2573 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
2574 bsi_insert_before (&exit_bsi
, epilog_stmt
, BSI_SAME_STMT
);
2577 vect_finalize_reduction
:
2579 /* 2.5 Adjust the final result by the initial value of the reduction
2580 variable. (When such adjustment is not needed, then
2581 'adjustment_def' is zero). For example, if code is PLUS we create:
2582 new_temp = loop_exit_def + adjustment_def */
2586 if (nested_in_vect_loop
)
2588 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def
)) == VECTOR_TYPE
);
2589 expr
= build2 (code
, vectype
, PHI_RESULT (new_phi
), adjustment_def
);
2590 new_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2594 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def
)) != VECTOR_TYPE
);
2595 expr
= build2 (code
, scalar_type
, new_temp
, adjustment_def
);
2596 new_dest
= vect_create_destination_var (scalar_dest
, scalar_type
);
2598 epilog_stmt
= build_gimple_modify_stmt (new_dest
, expr
);
2599 new_temp
= make_ssa_name (new_dest
, epilog_stmt
);
2600 GIMPLE_STMT_OPERAND (epilog_stmt
, 0) = new_temp
;
2601 bsi_insert_before (&exit_bsi
, epilog_stmt
, BSI_SAME_STMT
);
2605 /* 2.6 Handle the loop-exit phi */
2607 /* Replace uses of s_out0 with uses of s_out3:
2608 Find the loop-closed-use at the loop exit of the original scalar result.
2609 (The reduction result is expected to have two immediate uses - one at the
2610 latch block, and one at the loop exit). */
2611 phis
= VEC_alloc (tree
, heap
, 10);
2612 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
2614 if (!flow_bb_inside_loop_p (loop
, bb_for_stmt (USE_STMT (use_p
))))
2616 exit_phi
= USE_STMT (use_p
);
2617 VEC_quick_push (tree
, phis
, exit_phi
);
2620 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
2621 gcc_assert (!VEC_empty (tree
, phis
));
2623 for (i
= 0; VEC_iterate (tree
, phis
, i
, exit_phi
); i
++)
2625 if (nested_in_vect_loop
)
2627 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (exit_phi
);
2629 /* FORNOW. Currently not supporting the case that an inner-loop reduction
2630 is not used in the outer-loop (but only outside the outer-loop). */
2631 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo
)
2632 && !STMT_VINFO_LIVE_P (stmt_vinfo
));
2634 epilog_stmt
= adjustment_def
? epilog_stmt
: new_phi
;
2635 STMT_VINFO_VEC_STMT (stmt_vinfo
) = epilog_stmt
;
2636 set_stmt_info (get_stmt_ann (epilog_stmt
),
2637 new_stmt_vec_info (epilog_stmt
, loop_vinfo
));
2641 /* Replace the uses: */
2642 orig_name
= PHI_RESULT (exit_phi
);
2643 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, orig_name
)
2644 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
2645 SET_USE (use_p
, new_temp
);
2647 VEC_free (tree
, heap
, phis
);
2651 /* Function vectorizable_reduction.
2653 Check if STMT performs a reduction operation that can be vectorized.
2654 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2655 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2656 Return FALSE if not a vectorizable STMT, TRUE otherwise.
2658 This function also handles reduction idioms (patterns) that have been
2659 recognized in advance during vect_pattern_recog. In this case, STMT may be
2661 X = pattern_expr (arg0, arg1, ..., X)
2662 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
2663 sequence that had been detected and replaced by the pattern-stmt (STMT).
2665 In some cases of reduction patterns, the type of the reduction variable X is
2666 different than the type of the other arguments of STMT.
2667 In such cases, the vectype that is used when transforming STMT into a vector
2668 stmt is different than the vectype that is used to determine the
2669 vectorization factor, because it consists of a different number of elements
2670 than the actual number of elements that are being operated upon in parallel.
2672 For example, consider an accumulation of shorts into an int accumulator.
2673 On some targets it's possible to vectorize this pattern operating on 8
2674 shorts at a time (hence, the vectype for purposes of determining the
2675 vectorization factor should be V8HI); on the other hand, the vectype that
2676 is used to create the vector form is actually V4SI (the type of the result).
2678 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
2679 indicates what is the actual level of parallelism (V8HI in the example), so
2680 that the right vectorization factor would be derived. This vectype
2681 corresponds to the type of arguments to the reduction stmt, and should *NOT*
2682 be used to create the vectorized stmt. The right vectype for the vectorized
2683 stmt is obtained from the type of the result X:
2684 get_vectype_for_scalar_type (TREE_TYPE (X))
2686 This means that, contrary to "regular" reductions (or "regular" stmts in
2687 general), the following equation:
2688 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
2689 does *NOT* necessarily hold for reduction patterns. */
2692 vectorizable_reduction (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
2697 tree loop_vec_def0
= NULL_TREE
, loop_vec_def1
= NULL_TREE
;
2698 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2699 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2700 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2701 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2703 enum tree_code code
, orig_code
, epilog_reduc_code
= 0;
2704 enum machine_mode vec_mode
;
2706 optab optab
, reduc_optab
;
2707 tree new_temp
= NULL_TREE
;
2709 enum vect_def_type dt
;
2714 stmt_vec_info orig_stmt_info
;
2715 tree expr
= NULL_TREE
;
2717 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2718 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2719 stmt_vec_info prev_stmt_info
;
2721 tree new_stmt
= NULL_TREE
;
2724 if (nested_in_vect_loop_p (loop
, stmt
))
2727 /* FORNOW. This restriction should be relaxed. */
2730 if (vect_print_dump_info (REPORT_DETAILS
))
2731 fprintf (vect_dump
, "multiple types in nested loop.");
2736 gcc_assert (ncopies
>= 1);
2738 /* FORNOW: SLP not supported. */
2739 if (STMT_SLP_TYPE (stmt_info
))
2742 /* 1. Is vectorizable reduction? */
2744 /* Not supportable if the reduction variable is used in the loop. */
2745 if (STMT_VINFO_RELEVANT (stmt_info
) > vect_used_in_outer
)
2748 /* Reductions that are not used even in an enclosing outer-loop,
2749 are expected to be "live" (used out of the loop). */
2750 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_unused_in_loop
2751 && !STMT_VINFO_LIVE_P (stmt_info
))
2754 /* Make sure it was already recognized as a reduction computation. */
2755 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
2758 /* 2. Has this been recognized as a reduction pattern?
2760 Check if STMT represents a pattern that has been recognized
2761 in earlier analysis stages. For stmts that represent a pattern,
2762 the STMT_VINFO_RELATED_STMT field records the last stmt in
2763 the original sequence that constitutes the pattern. */
2765 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2768 orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
2769 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
) == stmt
);
2770 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info
));
2771 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info
));
2774 /* 3. Check the operands of the operation. The first operands are defined
2775 inside the loop body. The last operand is the reduction variable,
2776 which is defined by the loop-header-phi. */
2778 gcc_assert (TREE_CODE (stmt
) == GIMPLE_MODIFY_STMT
);
2780 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
2781 code
= TREE_CODE (operation
);
2782 op_type
= TREE_OPERAND_LENGTH (operation
);
2783 if (op_type
!= binary_op
&& op_type
!= ternary_op
)
2785 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
2786 scalar_type
= TREE_TYPE (scalar_dest
);
2787 if (!POINTER_TYPE_P (scalar_type
) && !INTEGRAL_TYPE_P (scalar_type
)
2788 && !SCALAR_FLOAT_TYPE_P (scalar_type
))
2791 /* All uses but the last are expected to be defined in the loop.
2792 The last use is the reduction variable. */
2793 for (i
= 0; i
< op_type
-1; i
++)
2795 op
= TREE_OPERAND (operation
, i
);
2796 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
2797 gcc_assert (is_simple_use
);
2798 if (dt
!= vect_loop_def
2799 && dt
!= vect_invariant_def
2800 && dt
!= vect_constant_def
2801 && dt
!= vect_induction_def
)
2805 op
= TREE_OPERAND (operation
, i
);
2806 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
2807 gcc_assert (is_simple_use
);
2808 gcc_assert (dt
== vect_reduction_def
);
2809 gcc_assert (TREE_CODE (def_stmt
) == PHI_NODE
);
2811 gcc_assert (orig_stmt
== vect_is_simple_reduction (loop_vinfo
, def_stmt
));
2813 gcc_assert (stmt
== vect_is_simple_reduction (loop_vinfo
, def_stmt
));
2815 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
2818 /* 4. Supportable by target? */
2820 /* 4.1. check support for the operation in the loop */
2821 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
2824 if (vect_print_dump_info (REPORT_DETAILS
))
2825 fprintf (vect_dump
, "no optab.");
2828 vec_mode
= TYPE_MODE (vectype
);
2829 if (optab_handler (optab
, vec_mode
)->insn_code
== CODE_FOR_nothing
)
2831 if (vect_print_dump_info (REPORT_DETAILS
))
2832 fprintf (vect_dump
, "op not supported by target.");
2833 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
2834 || LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2835 < vect_min_worthwhile_factor (code
))
2837 if (vect_print_dump_info (REPORT_DETAILS
))
2838 fprintf (vect_dump
, "proceeding using word mode.");
2841 /* Worthwhile without SIMD support? */
2842 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
2843 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2844 < vect_min_worthwhile_factor (code
))
2846 if (vect_print_dump_info (REPORT_DETAILS
))
2847 fprintf (vect_dump
, "not worthwhile without SIMD support.");
2851 /* 4.2. Check support for the epilog operation.
2853 If STMT represents a reduction pattern, then the type of the
2854 reduction variable may be different than the type of the rest
2855 of the arguments. For example, consider the case of accumulation
2856 of shorts into an int accumulator; The original code:
2857 S1: int_a = (int) short_a;
2858 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
2861 STMT: int_acc = widen_sum <short_a, int_acc>
2864 1. The tree-code that is used to create the vector operation in the
2865 epilog code (that reduces the partial results) is not the
2866 tree-code of STMT, but is rather the tree-code of the original
2867 stmt from the pattern that STMT is replacing. I.e, in the example
2868 above we want to use 'widen_sum' in the loop, but 'plus' in the
2870 2. The type (mode) we use to check available target support
2871 for the vector operation to be created in the *epilog*, is
2872 determined by the type of the reduction variable (in the example
2873 above we'd check this: plus_optab[vect_int_mode]).
2874 However the type (mode) we use to check available target support
2875 for the vector operation to be created *inside the loop*, is
2876 determined by the type of the other arguments to STMT (in the
2877 example we'd check this: widen_sum_optab[vect_short_mode]).
2879 This is contrary to "regular" reductions, in which the types of all
2880 the arguments are the same as the type of the reduction variable.
2881 For "regular" reductions we can therefore use the same vector type
2882 (and also the same tree-code) when generating the epilog code and
2883 when generating the code inside the loop. */
2887 /* This is a reduction pattern: get the vectype from the type of the
2888 reduction variable, and get the tree-code from orig_stmt. */
2889 orig_code
= TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt
, 1));
2890 vectype
= get_vectype_for_scalar_type (TREE_TYPE (def
));
2893 if (vect_print_dump_info (REPORT_DETAILS
))
2895 fprintf (vect_dump
, "unsupported data-type ");
2896 print_generic_expr (vect_dump
, TREE_TYPE (def
), TDF_SLIM
);
2901 vec_mode
= TYPE_MODE (vectype
);
2905 /* Regular reduction: use the same vectype and tree-code as used for
2906 the vector code inside the loop can be used for the epilog code. */
2910 if (!reduction_code_for_scalar_code (orig_code
, &epilog_reduc_code
))
2912 reduc_optab
= optab_for_tree_code (epilog_reduc_code
, vectype
, optab_default
);
2915 if (vect_print_dump_info (REPORT_DETAILS
))
2916 fprintf (vect_dump
, "no optab for reduction.");
2917 epilog_reduc_code
= NUM_TREE_CODES
;
2919 if (optab_handler (reduc_optab
, vec_mode
)->insn_code
== CODE_FOR_nothing
)
2921 if (vect_print_dump_info (REPORT_DETAILS
))
2922 fprintf (vect_dump
, "reduc op not supported by target.");
2923 epilog_reduc_code
= NUM_TREE_CODES
;
2926 if (!vec_stmt
) /* transformation not required. */
2928 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
2929 if (!vect_model_reduction_cost (stmt_info
, epilog_reduc_code
, ncopies
))
2936 if (vect_print_dump_info (REPORT_DETAILS
))
2937 fprintf (vect_dump
, "transform reduction.");
2939 /* Create the destination vector */
2940 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2942 /* Create the reduction-phi that defines the reduction-operand. */
2943 new_phi
= create_phi_node (vec_dest
, loop
->header
);
2945 /* In case the vectorization factor (VF) is bigger than the number
2946 of elements that we can fit in a vectype (nunits), we have to generate
2947 more than one vector stmt - i.e - we need to "unroll" the
2948 vector stmt by a factor VF/nunits. For more details see documentation
2949 in vectorizable_operation. */
2951 prev_stmt_info
= NULL
;
2952 for (j
= 0; j
< ncopies
; j
++)
2957 op
= TREE_OPERAND (operation
, 0);
2958 loop_vec_def0
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2959 if (op_type
== ternary_op
)
2961 op
= TREE_OPERAND (operation
, 1);
2962 loop_vec_def1
= vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2965 /* Get the vector def for the reduction variable from the phi node */
2966 reduc_def
= PHI_RESULT (new_phi
);
2970 enum vect_def_type dt
= vect_unknown_def_type
; /* Dummy */
2971 loop_vec_def0
= vect_get_vec_def_for_stmt_copy (dt
, loop_vec_def0
);
2972 if (op_type
== ternary_op
)
2973 loop_vec_def1
= vect_get_vec_def_for_stmt_copy (dt
, loop_vec_def1
);
2975 /* Get the vector def for the reduction variable from the vectorized
2976 reduction operation generated in the previous iteration (j-1) */
2977 reduc_def
= GIMPLE_STMT_OPERAND (new_stmt
,0);
2980 /* Arguments are ready. create the new vector stmt. */
2981 if (op_type
== binary_op
)
2982 expr
= build2 (code
, vectype
, loop_vec_def0
, reduc_def
);
2984 expr
= build3 (code
, vectype
, loop_vec_def0
, loop_vec_def1
,
2986 new_stmt
= build_gimple_modify_stmt (vec_dest
, expr
);
2987 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2988 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
2989 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
2992 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2994 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2995 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2998 /* Finalize the reduction-phi (set it's arguments) and create the
2999 epilog reduction code. */
3000 vect_create_epilog_for_reduction (new_temp
, stmt
, epilog_reduc_code
, new_phi
);
3004 /* Checks if CALL can be vectorized in type VECTYPE. Returns
3005 a function declaration if the target has a vectorized version
3006 of the function, or NULL_TREE if the function cannot be vectorized. */
3009 vectorizable_function (tree call
, tree vectype_out
, tree vectype_in
)
3011 tree fndecl
= get_callee_fndecl (call
);
3012 enum built_in_function code
;
3014 /* We only handle functions that do not read or clobber memory -- i.e.
3015 const or novops ones. */
3016 if (!(call_expr_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
3020 || TREE_CODE (fndecl
) != FUNCTION_DECL
3021 || !DECL_BUILT_IN (fndecl
))
3024 code
= DECL_FUNCTION_CODE (fndecl
);
3025 return targetm
.vectorize
.builtin_vectorized_function (code
, vectype_out
,
3029 /* Function vectorizable_call.
3031 Check if STMT performs a function call that can be vectorized.
3032 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3033 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3034 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3037 vectorizable_call (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
3043 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3044 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3045 tree vectype_out
, vectype_in
;
3048 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3049 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3050 tree fndecl
, rhs
, new_temp
, def
, def_stmt
, rhs_type
, lhs_type
;
3051 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3053 int ncopies
, j
, nargs
;
3054 call_expr_arg_iterator iter
;
3056 enum { NARROW
, NONE
, WIDEN
} modifier
;
3058 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3061 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
3064 /* FORNOW: SLP not supported. */
3065 if (STMT_SLP_TYPE (stmt_info
))
3068 /* Is STMT a vectorizable call? */
3069 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
3072 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) != SSA_NAME
)
3075 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
3076 if (TREE_CODE (operation
) != CALL_EXPR
)
3079 /* Process function arguments. */
3080 rhs_type
= NULL_TREE
;
3082 FOR_EACH_CALL_EXPR_ARG (op
, iter
, operation
)
3084 /* Bail out if the function has more than two arguments, we
3085 do not have interesting builtin functions to vectorize with
3086 more than two arguments. */
3090 /* We can only handle calls with arguments of the same type. */
3092 && rhs_type
!= TREE_TYPE (op
))
3094 if (vect_print_dump_info (REPORT_DETAILS
))
3095 fprintf (vect_dump
, "argument types differ.");
3098 rhs_type
= TREE_TYPE (op
);
3100 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
[nargs
]))
3102 if (vect_print_dump_info (REPORT_DETAILS
))
3103 fprintf (vect_dump
, "use not simple.");
3110 /* No arguments is also not good. */
3114 vectype_in
= get_vectype_for_scalar_type (rhs_type
);
3117 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3119 lhs_type
= TREE_TYPE (GIMPLE_STMT_OPERAND (stmt
, 0));
3120 vectype_out
= get_vectype_for_scalar_type (lhs_type
);
3123 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3126 if (nunits_in
== nunits_out
/ 2)
3128 else if (nunits_out
== nunits_in
)
3130 else if (nunits_out
== nunits_in
/ 2)
3135 /* For now, we only vectorize functions if a target specific builtin
3136 is available. TODO -- in some cases, it might be profitable to
3137 insert the calls for pieces of the vector, in order to be able
3138 to vectorize other operations in the loop. */
3139 fndecl
= vectorizable_function (operation
, vectype_out
, vectype_in
);
3140 if (fndecl
== NULL_TREE
)
3142 if (vect_print_dump_info (REPORT_DETAILS
))
3143 fprintf (vect_dump
, "function is not vectorizable.");
3148 gcc_assert (ZERO_SSA_OPERANDS (stmt
, SSA_OP_ALL_VIRTUALS
));
3150 if (modifier
== NARROW
)
3151 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3153 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3155 /* Sanity check: make sure that at least one copy of the vectorized stmt
3156 needs to be generated. */
3157 gcc_assert (ncopies
>= 1);
3159 /* FORNOW. This restriction should be relaxed. */
3160 if (nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3162 if (vect_print_dump_info (REPORT_DETAILS
))
3163 fprintf (vect_dump
, "multiple types in nested loop.");
3167 if (!vec_stmt
) /* transformation not required. */
3169 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3170 if (vect_print_dump_info (REPORT_DETAILS
))
3171 fprintf (vect_dump
, "=== vectorizable_call ===");
3172 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
3178 if (vect_print_dump_info (REPORT_DETAILS
))
3179 fprintf (vect_dump
, "transform operation.");
3181 /* FORNOW. This restriction should be relaxed. */
3182 if (nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3184 if (vect_print_dump_info (REPORT_DETAILS
))
3185 fprintf (vect_dump
, "multiple types in nested loop.");
3190 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
3191 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3193 prev_stmt_info
= NULL
;
3197 for (j
= 0; j
< ncopies
; ++j
)
3199 /* Build argument list for the vectorized call. */
3200 /* FIXME: Rewrite this so that it doesn't
3201 construct a temporary list. */
3204 FOR_EACH_CALL_EXPR_ARG (op
, iter
, operation
)
3208 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
3211 = vect_get_vec_def_for_stmt_copy (dt
[nargs
], vec_oprnd0
);
3213 vargs
= tree_cons (NULL_TREE
, vec_oprnd0
, vargs
);
3217 vargs
= nreverse (vargs
);
3219 rhs
= build_function_call_expr (fndecl
, vargs
);
3220 new_stmt
= build_gimple_modify_stmt (vec_dest
, rhs
);
3221 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3222 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
3224 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
3227 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3229 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3231 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3237 for (j
= 0; j
< ncopies
; ++j
)
3239 /* Build argument list for the vectorized call. */
3240 /* FIXME: Rewrite this so that it doesn't
3241 construct a temporary list. */
3244 FOR_EACH_CALL_EXPR_ARG (op
, iter
, operation
)
3249 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
3251 = vect_get_vec_def_for_stmt_copy (dt
[nargs
], vec_oprnd0
);
3256 = vect_get_vec_def_for_stmt_copy (dt
[nargs
], vec_oprnd1
);
3258 = vect_get_vec_def_for_stmt_copy (dt
[nargs
], vec_oprnd0
);
3261 vargs
= tree_cons (NULL_TREE
, vec_oprnd0
, vargs
);
3262 vargs
= tree_cons (NULL_TREE
, vec_oprnd1
, vargs
);
3266 vargs
= nreverse (vargs
);
3268 rhs
= build_function_call_expr (fndecl
, vargs
);
3269 new_stmt
= build_gimple_modify_stmt (vec_dest
, rhs
);
3270 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3271 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
3273 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
3276 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3278 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3280 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3283 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3288 /* No current target implements this case. */
3292 /* The call in STMT might prevent it from being removed in dce.
3293 We however cannot remove it here, due to the way the ssa name
3294 it defines is mapped to the new definition. So just replace
3295 rhs of the statement with something harmless. */
3296 type
= TREE_TYPE (scalar_dest
);
3297 GIMPLE_STMT_OPERAND (stmt
, 1) = fold_convert (type
, integer_zero_node
);
3304 /* Function vect_gen_widened_results_half
3306 Create a vector stmt whose code, type, number of arguments, and result
3307 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
3308 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3309 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3310 needs to be created (DECL is a function-decl of a target-builtin).
3311 STMT is the original scalar stmt that we are vectorizing. */
3314 vect_gen_widened_results_half (enum tree_code code
, tree vectype
, tree decl
,
3315 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3316 tree vec_dest
, block_stmt_iterator
*bsi
,
3325 /* Generate half of the widened result: */
3326 if (code
== CALL_EXPR
)
3328 /* Target specific support */
3329 if (op_type
== binary_op
)
3330 expr
= build_call_expr (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3332 expr
= build_call_expr (decl
, 1, vec_oprnd0
);
3336 /* Generic support */
3337 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3338 if (op_type
== binary_op
)
3339 expr
= build2 (code
, vectype
, vec_oprnd0
, vec_oprnd1
);
3341 expr
= build1 (code
, vectype
, vec_oprnd0
);
3343 new_stmt
= build_gimple_modify_stmt (vec_dest
, expr
);
3344 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3345 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
3346 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
3348 if (code
== CALL_EXPR
)
3350 FOR_EACH_SSA_TREE_OPERAND (sym
, new_stmt
, iter
, SSA_OP_ALL_VIRTUALS
)
3352 if (TREE_CODE (sym
) == SSA_NAME
)
3353 sym
= SSA_NAME_VAR (sym
);
3354 mark_sym_for_renaming (sym
);
3362 /* Check if STMT performs a conversion operation, that can be vectorized.
3363 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3364 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3365 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3368 vectorizable_conversion (tree stmt
, block_stmt_iterator
*bsi
,
3369 tree
*vec_stmt
, slp_tree slp_node
)
3375 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3376 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3377 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3378 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3379 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3380 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3383 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3384 tree new_stmt
= NULL_TREE
;
3385 stmt_vec_info prev_stmt_info
;
3388 tree vectype_out
, vectype_in
;
3391 tree rhs_type
, lhs_type
;
3393 enum { NARROW
, NONE
, WIDEN
} modifier
;
3395 VEC(tree
,heap
) *vec_oprnds0
= NULL
;
3398 /* Is STMT a vectorizable conversion? */
3400 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3403 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
3406 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
3409 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) != SSA_NAME
)
3412 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
3413 code
= TREE_CODE (operation
);
3414 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3417 /* Check types of lhs and rhs. */
3418 op0
= TREE_OPERAND (operation
, 0);
3419 rhs_type
= TREE_TYPE (op0
);
3420 vectype_in
= get_vectype_for_scalar_type (rhs_type
);
3423 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3425 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
3426 lhs_type
= TREE_TYPE (scalar_dest
);
3427 vectype_out
= get_vectype_for_scalar_type (lhs_type
);
3430 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3433 if (nunits_in
== nunits_out
/ 2)
3435 else if (nunits_out
== nunits_in
)
3437 else if (nunits_out
== nunits_in
/ 2)
3442 if (modifier
== NONE
)
3443 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
) == vectype_out
);
3445 /* Bail out if the types are both integral or non-integral. */
3446 if ((INTEGRAL_TYPE_P (rhs_type
) && INTEGRAL_TYPE_P (lhs_type
))
3447 || (!INTEGRAL_TYPE_P (rhs_type
) && !INTEGRAL_TYPE_P (lhs_type
)))
3450 if (modifier
== NARROW
)
3451 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3453 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3455 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3456 this, so we can safely override NCOPIES with 1 here. */
3460 /* Sanity check: make sure that at least one copy of the vectorized stmt
3461 needs to be generated. */
3462 gcc_assert (ncopies
>= 1);
3464 /* FORNOW. This restriction should be relaxed. */
3465 if (nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3467 if (vect_print_dump_info (REPORT_DETAILS
))
3468 fprintf (vect_dump
, "multiple types in nested loop.");
3472 /* Check the operands of the operation. */
3473 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt
[0]))
3475 if (vect_print_dump_info (REPORT_DETAILS
))
3476 fprintf (vect_dump
, "use not simple.");
3480 /* Supportable by target? */
3481 if ((modifier
== NONE
3482 && !targetm
.vectorize
.builtin_conversion (code
, vectype_in
))
3483 || (modifier
== WIDEN
3484 && !supportable_widening_operation (code
, stmt
, vectype_in
,
3487 || (modifier
== NARROW
3488 && !supportable_narrowing_operation (code
, stmt
, vectype_in
,
3491 if (vect_print_dump_info (REPORT_DETAILS
))
3492 fprintf (vect_dump
, "op not supported by target.");
3496 if (modifier
!= NONE
)
3498 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
3499 /* FORNOW: SLP not supported. */
3500 if (STMT_SLP_TYPE (stmt_info
))
3504 if (!vec_stmt
) /* transformation not required. */
3506 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3511 if (vect_print_dump_info (REPORT_DETAILS
))
3512 fprintf (vect_dump
, "transform conversion.");
3515 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3517 if (modifier
== NONE
&& !slp_node
)
3518 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
3520 prev_stmt_info
= NULL
;
3524 for (j
= 0; j
< ncopies
; j
++)
3530 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
);
3532 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3535 targetm
.vectorize
.builtin_conversion (code
, vectype_in
);
3536 for (i
= 0; VEC_iterate (tree
, vec_oprnds0
, i
, vop0
); i
++)
3538 new_stmt
= build_call_expr (builtin_decl
, 1, vop0
);
3540 /* Arguments are ready. create the new vector stmt. */
3541 new_stmt
= build_gimple_modify_stmt (vec_dest
, new_stmt
);
3542 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3543 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
3544 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
3545 FOR_EACH_SSA_TREE_OPERAND (sym
, new_stmt
, iter
,
3546 SSA_OP_ALL_VIRTUALS
)
3548 if (TREE_CODE (sym
) == SSA_NAME
)
3549 sym
= SSA_NAME_VAR (sym
);
3550 mark_sym_for_renaming (sym
);
3553 VEC_quick_push (tree
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3557 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3559 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3560 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3565 /* In case the vectorization factor (VF) is bigger than the number
3566 of elements that we can fit in a vectype (nunits), we have to
3567 generate more than one vector stmt - i.e - we need to "unroll"
3568 the vector stmt by a factor VF/nunits. */
3569 for (j
= 0; j
< ncopies
; j
++)
3572 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3574 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3576 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
3578 /* Generate first half of the widened result: */
3580 = vect_gen_widened_results_half (code1
, vectype_out
, decl1
,
3581 vec_oprnd0
, vec_oprnd1
,
3582 unary_op
, vec_dest
, bsi
, stmt
);
3584 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3586 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3587 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3589 /* Generate second half of the widened result: */
3591 = vect_gen_widened_results_half (code2
, vectype_out
, decl2
,
3592 vec_oprnd0
, vec_oprnd1
,
3593 unary_op
, vec_dest
, bsi
, stmt
);
3594 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3595 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3600 /* In case the vectorization factor (VF) is bigger than the number
3601 of elements that we can fit in a vectype (nunits), we have to
3602 generate more than one vector stmt - i.e - we need to "unroll"
3603 the vector stmt by a factor VF/nunits. */
3604 for (j
= 0; j
< ncopies
; j
++)
3609 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3610 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3614 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd1
);
3615 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3618 /* Arguments are ready. Create the new vector stmt. */
3619 expr
= build2 (code1
, vectype_out
, vec_oprnd0
, vec_oprnd1
);
3620 new_stmt
= build_gimple_modify_stmt (vec_dest
, expr
);
3621 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3622 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
3623 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
3626 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3628 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3630 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3633 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3637 VEC_free (tree
, heap
, vec_oprnds0
);
3643 /* Function vectorizable_assignment.
3645 Check if STMT performs an assignment (copy) that can be vectorized.
3646 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3647 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3648 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3651 vectorizable_assignment (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
,
3657 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3658 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3659 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3662 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3663 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3664 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3666 VEC(tree
,heap
) *vec_oprnds
= NULL
;
3669 /* FORNOW: SLP with multiple types is not supported. The SLP analysis
3670 verifies this, so we can safely override NCOPIES with 1 here. */
3674 gcc_assert (ncopies
>= 1);
3676 return false; /* FORNOW */
3678 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3681 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
3684 /* Is vectorizable assignment? */
3685 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
3688 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
3689 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
3692 op
= GIMPLE_STMT_OPERAND (stmt
, 1);
3693 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
[0]))
3695 if (vect_print_dump_info (REPORT_DETAILS
))
3696 fprintf (vect_dump
, "use not simple.");
3700 if (!vec_stmt
) /* transformation not required. */
3702 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
3703 if (vect_print_dump_info (REPORT_DETAILS
))
3704 fprintf (vect_dump
, "=== vectorizable_assignment ===");
3705 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
3710 if (vect_print_dump_info (REPORT_DETAILS
))
3711 fprintf (vect_dump
, "transform assignment.");
3714 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3717 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
3719 /* Arguments are ready. create the new vector stmt. */
3720 for (i
= 0; VEC_iterate (tree
, vec_oprnds
, i
, vop
); i
++)
3722 *vec_stmt
= build_gimple_modify_stmt (vec_dest
, vop
);
3723 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
3724 GIMPLE_STMT_OPERAND (*vec_stmt
, 0) = new_temp
;
3725 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
3726 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
;
3729 VEC_quick_push (tree
, SLP_TREE_VEC_STMTS (slp_node
), *vec_stmt
);
3732 VEC_free (tree
, heap
, vec_oprnds
);
3737 /* Function vect_min_worthwhile_factor.
3739 For a loop where we could vectorize the operation indicated by CODE,
3740 return the minimum vectorization factor that makes it worthwhile
3741 to use generic vectors. */
3743 vect_min_worthwhile_factor (enum tree_code code
)
3764 /* Function vectorizable_induction
3766 Check if PHI performs an induction computation that can be vectorized.
3767 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
3768 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
3769 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3772 vectorizable_induction (tree phi
, block_stmt_iterator
*bsi ATTRIBUTE_UNUSED
,
3775 stmt_vec_info stmt_info
= vinfo_for_stmt (phi
);
3776 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3777 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3778 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3779 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3782 gcc_assert (ncopies
>= 1);
3784 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3787 /* FORNOW: SLP not supported. */
3788 if (STMT_SLP_TYPE (stmt_info
))
3791 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
);
3793 if (TREE_CODE (phi
) != PHI_NODE
)
3796 if (!vec_stmt
) /* transformation not required. */
3798 STMT_VINFO_TYPE (stmt_info
) = induc_vec_info_type
;
3799 if (vect_print_dump_info (REPORT_DETAILS
))
3800 fprintf (vect_dump
, "=== vectorizable_induction ===");
3801 vect_model_induction_cost (stmt_info
, ncopies
);
3807 if (vect_print_dump_info (REPORT_DETAILS
))
3808 fprintf (vect_dump
, "transform induction phi.");
3810 vec_def
= get_initial_def_for_induction (phi
);
3811 *vec_stmt
= SSA_NAME_DEF_STMT (vec_def
);
3816 /* Function vectorizable_operation.
3818 Check if STMT performs a binary or unary operation that can be vectorized.
3819 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3820 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3821 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3824 vectorizable_operation (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
,
3830 tree op0
, op1
= NULL
;
3831 tree vec_oprnd1
= NULL_TREE
;
3832 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3833 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3834 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3835 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3836 enum tree_code code
;
3837 enum machine_mode vec_mode
;
3842 enum machine_mode optab_op2_mode
;
3844 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3845 tree new_stmt
= NULL_TREE
;
3846 stmt_vec_info prev_stmt_info
;
3847 int nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3850 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3852 VEC(tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
3855 bool shift_p
= false;
3856 bool scalar_shift_arg
= false;
3858 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3859 this, so we can safely override NCOPIES with 1 here. */
3862 gcc_assert (ncopies
>= 1);
3863 /* FORNOW. This restriction should be relaxed. */
3864 if (nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3866 if (vect_print_dump_info (REPORT_DETAILS
))
3867 fprintf (vect_dump
, "multiple types in nested loop.");
3871 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3874 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
3877 /* Is STMT a vectorizable binary/unary operation? */
3878 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
3881 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) != SSA_NAME
)
3884 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
3885 vectype_out
= get_vectype_for_scalar_type (TREE_TYPE (scalar_dest
));
3888 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3889 if (nunits_out
!= nunits_in
)
3892 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
3893 code
= TREE_CODE (operation
);
3895 /* For pointer addition, we should use the normal plus for
3896 the vector addition. */
3897 if (code
== POINTER_PLUS_EXPR
)
3900 /* Support only unary or binary operations. */
3901 op_type
= TREE_OPERAND_LENGTH (operation
);
3902 if (op_type
!= unary_op
&& op_type
!= binary_op
)
3904 if (vect_print_dump_info (REPORT_DETAILS
))
3905 fprintf (vect_dump
, "num. args = %d (not unary/binary op).", op_type
);
3909 op0
= TREE_OPERAND (operation
, 0);
3910 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt
[0]))
3912 if (vect_print_dump_info (REPORT_DETAILS
))
3913 fprintf (vect_dump
, "use not simple.");
3917 if (op_type
== binary_op
)
3919 op1
= TREE_OPERAND (operation
, 1);
3920 if (!vect_is_simple_use (op1
, loop_vinfo
, &def_stmt
, &def
, &dt
[1]))
3922 if (vect_print_dump_info (REPORT_DETAILS
))
3923 fprintf (vect_dump
, "use not simple.");
3928 /* If this is a shift/rotate, determine whether the shift amount is a vector,
3929 or scalar. If the shift/rotate amount is a vector, use the vector/vector
3931 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3932 || code
== RROTATE_EXPR
)
3936 /* vector shifted by vector */
3937 if (dt
[1] == vect_loop_def
)
3939 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3940 if (vect_print_dump_info (REPORT_DETAILS
))
3941 fprintf (vect_dump
, "vector/vector shift/rotate found.");
3944 /* See if the machine has a vector shifted by scalar insn and if not
3945 then see if it has a vector shifted by vector insn */
3946 else if (dt
[1] == vect_constant_def
|| dt
[1] == vect_invariant_def
)
3948 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3950 && (optab_handler (optab
, TYPE_MODE (vectype
))->insn_code
3951 != CODE_FOR_nothing
))
3953 scalar_shift_arg
= true;
3954 if (vect_print_dump_info (REPORT_DETAILS
))
3955 fprintf (vect_dump
, "vector/scalar shift/rotate found.");
3959 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3960 if (vect_print_dump_info (REPORT_DETAILS
)
3962 && (optab_handler (optab
, TYPE_MODE (vectype
))->insn_code
3963 != CODE_FOR_nothing
))
3964 fprintf (vect_dump
, "vector/vector shift/rotate found.");
3970 if (vect_print_dump_info (REPORT_DETAILS
))
3971 fprintf (vect_dump
, "operand mode requires invariant argument.");
3976 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3978 /* Supportable by target? */
3981 if (vect_print_dump_info (REPORT_DETAILS
))
3982 fprintf (vect_dump
, "no optab.");
3985 vec_mode
= TYPE_MODE (vectype
);
3986 icode
= (int) optab_handler (optab
, vec_mode
)->insn_code
;
3987 if (icode
== CODE_FOR_nothing
)
3989 if (vect_print_dump_info (REPORT_DETAILS
))
3990 fprintf (vect_dump
, "op not supported by target.");
3991 /* Check only during analysis. */
3992 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3993 || (LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
3994 < vect_min_worthwhile_factor (code
)
3997 if (vect_print_dump_info (REPORT_DETAILS
))
3998 fprintf (vect_dump
, "proceeding using word mode.");
4001 /* Worthwhile without SIMD support? Check only during analysis. */
4002 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4003 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
4004 < vect_min_worthwhile_factor (code
)
4007 if (vect_print_dump_info (REPORT_DETAILS
))
4008 fprintf (vect_dump
, "not worthwhile without SIMD support.");
4012 if (!vec_stmt
) /* transformation not required. */
4014 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4015 if (vect_print_dump_info (REPORT_DETAILS
))
4016 fprintf (vect_dump
, "=== vectorizable_operation ===");
4017 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
4023 if (vect_print_dump_info (REPORT_DETAILS
))
4024 fprintf (vect_dump
, "transform binary/unary operation.");
4027 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4029 /* Allocate VECs for vector operands. In case of SLP, vector operands are
4030 created in the previous stages of the recursion, so no allocation is
4031 needed, except for the case of shift with scalar shift argument. In that
4032 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
4033 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
4034 In case of loop-based vectorization we allocate VECs of size 1. We
4035 allocate VEC_OPRNDS1 only in case of binary operation. */
4038 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
4039 if (op_type
== binary_op
)
4040 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
4042 else if (scalar_shift_arg
)
4043 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
4045 /* In case the vectorization factor (VF) is bigger than the number
4046 of elements that we can fit in a vectype (nunits), we have to generate
4047 more than one vector stmt - i.e - we need to "unroll" the
4048 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4049 from one copy of the vector stmt to the next, in the field
4050 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4051 stages to find the correct vector defs to be used when vectorizing
4052 stmts that use the defs of the current stmt. The example below illustrates
4053 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
4054 4 vectorized stmts):
4056 before vectorization:
4057 RELATED_STMT VEC_STMT
4061 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4063 RELATED_STMT VEC_STMT
4064 VS1_0: vx0 = memref0 VS1_1 -
4065 VS1_1: vx1 = memref1 VS1_2 -
4066 VS1_2: vx2 = memref2 VS1_3 -
4067 VS1_3: vx3 = memref3 - -
4068 S1: x = load - VS1_0
4071 step2: vectorize stmt S2 (done here):
4072 To vectorize stmt S2 we first need to find the relevant vector
4073 def for the first operand 'x'. This is, as usual, obtained from
4074 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4075 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4076 relevant vector def 'vx0'. Having found 'vx0' we can generate
4077 the vector stmt VS2_0, and as usual, record it in the
4078 STMT_VINFO_VEC_STMT of stmt S2.
4079 When creating the second copy (VS2_1), we obtain the relevant vector
4080 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4081 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4082 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4083 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4084 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4085 chain of stmts and pointers:
4086 RELATED_STMT VEC_STMT
4087 VS1_0: vx0 = memref0 VS1_1 -
4088 VS1_1: vx1 = memref1 VS1_2 -
4089 VS1_2: vx2 = memref2 VS1_3 -
4090 VS1_3: vx3 = memref3 - -
4091 S1: x = load - VS1_0
4092 VS2_0: vz0 = vx0 + v1 VS2_1 -
4093 VS2_1: vz1 = vx1 + v1 VS2_2 -
4094 VS2_2: vz2 = vx2 + v1 VS2_3 -
4095 VS2_3: vz3 = vx3 + v1 - -
4096 S2: z = x + 1 - VS2_0 */
4098 prev_stmt_info
= NULL
;
4099 for (j
= 0; j
< ncopies
; j
++)
4104 if (op_type
== binary_op
&& scalar_shift_arg
)
4106 /* Vector shl and shr insn patterns can be defined with scalar
4107 operand 2 (shift operand). In this case, use constant or loop
4108 invariant op1 directly, without extending it to vector mode
4110 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4111 if (!VECTOR_MODE_P (optab_op2_mode
))
4113 if (vect_print_dump_info (REPORT_DETAILS
))
4114 fprintf (vect_dump
, "operand 1 using scalar mode.");
4116 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
4119 /* Store vec_oprnd1 for every vector stmt to be created
4120 for SLP_NODE. We check during the analysis that all the
4121 shift arguments are the same.
4122 TODO: Allow different constants for different vector
4123 stmts generated for an SLP instance. */
4124 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4125 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
4130 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4131 (a special case for certain kind of vector shifts); otherwise,
4132 operand 1 should be of a vector type (the usual case). */
4133 if (op_type
== binary_op
&& !vec_oprnd1
)
4134 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4137 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4141 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4143 /* Arguments are ready. Create the new vector stmt. */
4144 for (i
= 0; VEC_iterate (tree
, vec_oprnds0
, i
, vop0
); i
++)
4146 if (op_type
== binary_op
)
4148 vop1
= VEC_index (tree
, vec_oprnds1
, i
);
4149 new_stmt
= build_gimple_modify_stmt (vec_dest
,
4150 build2 (code
, vectype
, vop0
, vop1
));
4153 new_stmt
= build_gimple_modify_stmt (vec_dest
,
4154 build1 (code
, vectype
, vop0
));
4156 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4157 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
4158 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
4160 VEC_quick_push (tree
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
4164 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4166 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4167 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4170 VEC_free (tree
, heap
, vec_oprnds0
);
4172 VEC_free (tree
, heap
, vec_oprnds1
);
4178 /* Function vectorizable_type_demotion
4180 Check if STMT performs a binary or unary operation that involves
4181 type demotion, and if it can be vectorized.
4182 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4183 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4184 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4187 vectorizable_type_demotion (tree stmt
, block_stmt_iterator
*bsi
,
4194 tree vec_oprnd0
=NULL
, vec_oprnd1
=NULL
;
4195 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4196 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4197 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4198 enum tree_code code
, code1
= ERROR_MARK
;
4201 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4203 stmt_vec_info prev_stmt_info
;
4212 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
4215 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
4218 /* Is STMT a vectorizable type-demotion operation? */
4219 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
4222 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) != SSA_NAME
)
4225 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
4226 code
= TREE_CODE (operation
);
4227 if (code
!= NOP_EXPR
&& code
!= CONVERT_EXPR
)
4230 op0
= TREE_OPERAND (operation
, 0);
4231 vectype_in
= get_vectype_for_scalar_type (TREE_TYPE (op0
));
4234 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4236 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
4237 vectype_out
= get_vectype_for_scalar_type (TREE_TYPE (scalar_dest
));
4240 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4241 if (nunits_in
!= nunits_out
/ 2) /* FORNOW */
4244 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
4245 gcc_assert (ncopies
>= 1);
4246 /* FORNOW. This restriction should be relaxed. */
4247 if (nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
4249 if (vect_print_dump_info (REPORT_DETAILS
))
4250 fprintf (vect_dump
, "multiple types in nested loop.");
4254 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4255 && INTEGRAL_TYPE_P (TREE_TYPE (op0
)))
4256 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest
))
4257 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0
))
4258 && (code
== NOP_EXPR
|| code
== CONVERT_EXPR
))))
4261 /* Check the operands of the operation. */
4262 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt
[0]))
4264 if (vect_print_dump_info (REPORT_DETAILS
))
4265 fprintf (vect_dump
, "use not simple.");
4269 /* Supportable by target? */
4270 if (!supportable_narrowing_operation (code
, stmt
, vectype_in
, &code1
))
4273 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
4275 if (!vec_stmt
) /* transformation not required. */
4277 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4278 if (vect_print_dump_info (REPORT_DETAILS
))
4279 fprintf (vect_dump
, "=== vectorizable_demotion ===");
4280 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
4285 if (vect_print_dump_info (REPORT_DETAILS
))
4286 fprintf (vect_dump
, "transform type demotion operation. ncopies = %d.",
4290 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
4292 /* In case the vectorization factor (VF) is bigger than the number
4293 of elements that we can fit in a vectype (nunits), we have to generate
4294 more than one vector stmt - i.e - we need to "unroll" the
4295 vector stmt by a factor VF/nunits. */
4296 prev_stmt_info
= NULL
;
4297 for (j
= 0; j
< ncopies
; j
++)
4302 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
4303 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4307 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd1
);
4308 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4311 /* Arguments are ready. Create the new vector stmt. */
4312 expr
= build2 (code1
, vectype_out
, vec_oprnd0
, vec_oprnd1
);
4313 new_stmt
= build_gimple_modify_stmt (vec_dest
, expr
);
4314 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4315 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
4316 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
4319 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4321 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4323 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4326 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4331 /* Function vectorizable_type_promotion
4333 Check if STMT performs a binary or unary operation that involves
4334 type promotion, and if it can be vectorized.
4335 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4336 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4337 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4340 vectorizable_type_promotion (tree stmt
, block_stmt_iterator
*bsi
,
4346 tree op0
, op1
= NULL
;
4347 tree vec_oprnd0
=NULL
, vec_oprnd1
=NULL
;
4348 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4349 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4350 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4351 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4352 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4355 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4357 stmt_vec_info prev_stmt_info
;
4365 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
4368 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
4371 /* Is STMT a vectorizable type-promotion operation? */
4372 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
4375 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) != SSA_NAME
)
4378 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
4379 code
= TREE_CODE (operation
);
4380 if (code
!= NOP_EXPR
&& code
!= CONVERT_EXPR
4381 && code
!= WIDEN_MULT_EXPR
)
4384 op0
= TREE_OPERAND (operation
, 0);
4385 vectype_in
= get_vectype_for_scalar_type (TREE_TYPE (op0
));
4388 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4390 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
4391 vectype_out
= get_vectype_for_scalar_type (TREE_TYPE (scalar_dest
));
4394 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4395 if (nunits_out
!= nunits_in
/ 2) /* FORNOW */
4398 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4399 gcc_assert (ncopies
>= 1);
4400 /* FORNOW. This restriction should be relaxed. */
4401 if (nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
4403 if (vect_print_dump_info (REPORT_DETAILS
))
4404 fprintf (vect_dump
, "multiple types in nested loop.");
4408 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4409 && INTEGRAL_TYPE_P (TREE_TYPE (op0
)))
4410 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest
))
4411 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0
))
4412 && (code
== CONVERT_EXPR
|| code
== NOP_EXPR
))))
4415 /* Check the operands of the operation. */
4416 if (!vect_is_simple_use (op0
, loop_vinfo
, &def_stmt
, &def
, &dt
[0]))
4418 if (vect_print_dump_info (REPORT_DETAILS
))
4419 fprintf (vect_dump
, "use not simple.");
4423 op_type
= TREE_CODE_LENGTH (code
);
4424 if (op_type
== binary_op
)
4426 op1
= TREE_OPERAND (operation
, 1);
4427 if (!vect_is_simple_use (op1
, loop_vinfo
, &def_stmt
, &def
, &dt
[1]))
4429 if (vect_print_dump_info (REPORT_DETAILS
))
4430 fprintf (vect_dump
, "use not simple.");
4435 /* Supportable by target? */
4436 if (!supportable_widening_operation (code
, stmt
, vectype_in
,
4437 &decl1
, &decl2
, &code1
, &code2
))
4440 STMT_VINFO_VECTYPE (stmt_info
) = vectype_in
;
4442 if (!vec_stmt
) /* transformation not required. */
4444 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4445 if (vect_print_dump_info (REPORT_DETAILS
))
4446 fprintf (vect_dump
, "=== vectorizable_promotion ===");
4447 vect_model_simple_cost (stmt_info
, 2*ncopies
, dt
, NULL
);
4453 if (vect_print_dump_info (REPORT_DETAILS
))
4454 fprintf (vect_dump
, "transform type promotion operation. ncopies = %d.",
4458 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
4460 /* In case the vectorization factor (VF) is bigger than the number
4461 of elements that we can fit in a vectype (nunits), we have to generate
4462 more than one vector stmt - i.e - we need to "unroll" the
4463 vector stmt by a factor VF/nunits. */
4465 prev_stmt_info
= NULL
;
4466 for (j
= 0; j
< ncopies
; j
++)
4471 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
4472 if (op_type
== binary_op
)
4473 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
4477 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4478 if (op_type
== binary_op
)
4479 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd1
);
4482 /* Arguments are ready. Create the new vector stmt. We are creating
4483 two vector defs because the widened result does not fit in one vector.
4484 The vectorized stmt can be expressed as a call to a taregt builtin,
4485 or a using a tree-code. */
4486 /* Generate first half of the widened result: */
4487 new_stmt
= vect_gen_widened_results_half (code1
, vectype_out
, decl1
,
4488 vec_oprnd0
, vec_oprnd1
, op_type
, vec_dest
, bsi
, stmt
);
4490 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4492 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4493 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4495 /* Generate second half of the widened result: */
4496 new_stmt
= vect_gen_widened_results_half (code2
, vectype_out
, decl2
,
4497 vec_oprnd0
, vec_oprnd1
, op_type
, vec_dest
, bsi
, stmt
);
4498 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4499 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4503 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4508 /* Function vect_strided_store_supported.
4510 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
4511 and FALSE otherwise. */
4514 vect_strided_store_supported (tree vectype
)
4516 optab interleave_high_optab
, interleave_low_optab
;
4519 mode
= (int) TYPE_MODE (vectype
);
4521 /* Check that the operation is supported. */
4522 interleave_high_optab
= optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR
,
4523 vectype
, optab_default
);
4524 interleave_low_optab
= optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR
,
4525 vectype
, optab_default
);
4526 if (!interleave_high_optab
|| !interleave_low_optab
)
4528 if (vect_print_dump_info (REPORT_DETAILS
))
4529 fprintf (vect_dump
, "no optab for interleave.");
4533 if (optab_handler (interleave_high_optab
, mode
)->insn_code
4535 || optab_handler (interleave_low_optab
, mode
)->insn_code
4536 == CODE_FOR_nothing
)
4538 if (vect_print_dump_info (REPORT_DETAILS
))
4539 fprintf (vect_dump
, "interleave op not supported by target.");
4547 /* Function vect_permute_store_chain.
4549 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4550 a power of 2, generate interleave_high/low stmts to reorder the data
4551 correctly for the stores. Return the final references for stores in
4554 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4555 The input is 4 vectors each containing 8 elements. We assign a number to each
4556 element, the input sequence is:
4558 1st vec: 0 1 2 3 4 5 6 7
4559 2nd vec: 8 9 10 11 12 13 14 15
4560 3rd vec: 16 17 18 19 20 21 22 23
4561 4th vec: 24 25 26 27 28 29 30 31
4563 The output sequence should be:
4565 1st vec: 0 8 16 24 1 9 17 25
4566 2nd vec: 2 10 18 26 3 11 19 27
4567 3rd vec: 4 12 20 28 5 13 21 30
4568 4th vec: 6 14 22 30 7 15 23 31
4570 i.e., we interleave the contents of the four vectors in their order.
4572 We use interleave_high/low instructions to create such output. The input of
4573 each interleave_high/low operation is two vectors:
4576 the even elements of the result vector are obtained left-to-right from the
4577 high/low elements of the first vector. The odd elements of the result are
4578 obtained left-to-right from the high/low elements of the second vector.
4579 The output of interleave_high will be: 0 4 1 5
4580 and of interleave_low: 2 6 3 7
4583 The permutation is done in log LENGTH stages. In each stage interleave_high
4584 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4585 where the first argument is taken from the first half of DR_CHAIN and the
4586 second argument from it's second half.
4589 I1: interleave_high (1st vec, 3rd vec)
4590 I2: interleave_low (1st vec, 3rd vec)
4591 I3: interleave_high (2nd vec, 4th vec)
4592 I4: interleave_low (2nd vec, 4th vec)
4594 The output for the first stage is:
4596 I1: 0 16 1 17 2 18 3 19
4597 I2: 4 20 5 21 6 22 7 23
4598 I3: 8 24 9 25 10 26 11 27
4599 I4: 12 28 13 29 14 30 15 31
4601 The output of the second stage, i.e. the final result is:
4603 I1: 0 8 16 24 1 9 17 25
4604 I2: 2 10 18 26 3 11 19 27
4605 I3: 4 12 20 28 5 13 21 30
4606 I4: 6 14 22 30 7 15 23 31. */
4609 vect_permute_store_chain (VEC(tree
,heap
) *dr_chain
,
4610 unsigned int length
,
4612 block_stmt_iterator
*bsi
,
4613 VEC(tree
,heap
) **result_chain
)
4615 tree perm_dest
, perm_stmt
, vect1
, vect2
, high
, low
;
4616 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
4617 tree scalar_dest
, tmp
;
4621 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
4623 /* Check that the operation is supported. */
4624 if (!vect_strided_store_supported (vectype
))
4627 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
4629 for (i
= 0; i
< exact_log2 (length
); i
++)
4631 for (j
= 0; j
< length
/2; j
++)
4633 vect1
= VEC_index (tree
, dr_chain
, j
);
4634 vect2
= VEC_index (tree
, dr_chain
, j
+length
/2);
4636 /* Create interleaving stmt:
4637 in the case of big endian:
4638 high = interleave_high (vect1, vect2)
4639 and in the case of little endian:
4640 high = interleave_low (vect1, vect2). */
4641 perm_dest
= create_tmp_var (vectype
, "vect_inter_high");
4642 DECL_GIMPLE_REG_P (perm_dest
) = 1;
4643 add_referenced_var (perm_dest
);
4644 if (BYTES_BIG_ENDIAN
)
4645 tmp
= build2 (VEC_INTERLEAVE_HIGH_EXPR
, vectype
, vect1
, vect2
);
4647 tmp
= build2 (VEC_INTERLEAVE_LOW_EXPR
, vectype
, vect1
, vect2
);
4648 perm_stmt
= build_gimple_modify_stmt (perm_dest
, tmp
);
4649 high
= make_ssa_name (perm_dest
, perm_stmt
);
4650 GIMPLE_STMT_OPERAND (perm_stmt
, 0) = high
;
4651 vect_finish_stmt_generation (stmt
, perm_stmt
, bsi
);
4652 VEC_replace (tree
, *result_chain
, 2*j
, high
);
4654 /* Create interleaving stmt:
4655 in the case of big endian:
4656 low = interleave_low (vect1, vect2)
4657 and in the case of little endian:
4658 low = interleave_high (vect1, vect2). */
4659 perm_dest
= create_tmp_var (vectype
, "vect_inter_low");
4660 DECL_GIMPLE_REG_P (perm_dest
) = 1;
4661 add_referenced_var (perm_dest
);
4662 if (BYTES_BIG_ENDIAN
)
4663 tmp
= build2 (VEC_INTERLEAVE_LOW_EXPR
, vectype
, vect1
, vect2
);
4665 tmp
= build2 (VEC_INTERLEAVE_HIGH_EXPR
, vectype
, vect1
, vect2
);
4666 perm_stmt
= build_gimple_modify_stmt (perm_dest
, tmp
);
4667 low
= make_ssa_name (perm_dest
, perm_stmt
);
4668 GIMPLE_STMT_OPERAND (perm_stmt
, 0) = low
;
4669 vect_finish_stmt_generation (stmt
, perm_stmt
, bsi
);
4670 VEC_replace (tree
, *result_chain
, 2*j
+1, low
);
4672 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
4678 /* Function vectorizable_store.
4680 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4682 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4683 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4684 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4687 vectorizable_store (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
,
4693 tree vec_oprnd
= NULL_TREE
;
4694 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4695 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
4696 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4697 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4698 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4699 enum machine_mode vec_mode
;
4701 enum dr_alignment_support alignment_support_scheme
;
4703 enum vect_def_type dt
;
4704 stmt_vec_info prev_stmt_info
= NULL
;
4705 tree dataref_ptr
= NULL_TREE
;
4706 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4707 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4709 tree next_stmt
, first_stmt
= NULL_TREE
;
4710 bool strided_store
= false;
4711 unsigned int group_size
, i
;
4712 VEC(tree
,heap
) *dr_chain
= NULL
, *oprnds
= NULL
, *result_chain
= NULL
;
4714 VEC(tree
,heap
) *vec_oprnds
= NULL
;
4715 bool slp
= (slp_node
!= NULL
);
4716 stmt_vec_info first_stmt_vinfo
;
4717 unsigned int vec_num
;
4719 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
4720 this, so we can safely override NCOPIES with 1 here. */
4724 gcc_assert (ncopies
>= 1);
4726 /* FORNOW. This restriction should be relaxed. */
4727 if (nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
4729 if (vect_print_dump_info (REPORT_DETAILS
))
4730 fprintf (vect_dump
, "multiple types in nested loop.");
4734 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
4737 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
4740 /* Is vectorizable store? */
4742 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
4745 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
4746 if (TREE_CODE (scalar_dest
) != ARRAY_REF
4747 && TREE_CODE (scalar_dest
) != INDIRECT_REF
4748 && !STMT_VINFO_STRIDED_ACCESS (stmt_info
))
4751 op
= GIMPLE_STMT_OPERAND (stmt
, 1);
4752 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
4754 if (vect_print_dump_info (REPORT_DETAILS
))
4755 fprintf (vect_dump
, "use not simple.");
4759 vec_mode
= TYPE_MODE (vectype
);
4760 /* FORNOW. In some cases can vectorize even if data-type not supported
4761 (e.g. - array initialization with 0). */
4762 if (optab_handler (mov_optab
, (int)vec_mode
)->insn_code
== CODE_FOR_nothing
)
4765 if (!STMT_VINFO_DATA_REF (stmt_info
))
4768 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
4770 strided_store
= true;
4771 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
4772 if (!vect_strided_store_supported (vectype
)
4773 && !PURE_SLP_STMT (stmt_info
) && !slp
)
4776 if (first_stmt
== stmt
)
4778 /* STMT is the leader of the group. Check the operands of all the
4779 stmts of the group. */
4780 next_stmt
= DR_GROUP_NEXT_DR (stmt_info
);
4783 op
= GIMPLE_STMT_OPERAND (next_stmt
, 1);
4784 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
4786 if (vect_print_dump_info (REPORT_DETAILS
))
4787 fprintf (vect_dump
, "use not simple.");
4790 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
4795 if (!vec_stmt
) /* transformation not required. */
4797 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
4798 if (!PURE_SLP_STMT (stmt_info
))
4799 vect_model_store_cost (stmt_info
, ncopies
, dt
, NULL
);
4807 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4808 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4810 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
4813 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
4815 /* We vectorize all the stmts of the interleaving group when we
4816 reach the last stmt in the group. */
4817 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
4818 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
))
4821 *vec_stmt
= NULL_TREE
;
4826 strided_store
= false;
4828 /* VEC_NUM is the number of vect stmts to be created for this group. */
4829 if (slp
&& SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) < group_size
)
4830 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4832 vec_num
= group_size
;
4838 group_size
= vec_num
= 1;
4839 first_stmt_vinfo
= stmt_info
;
4842 if (vect_print_dump_info (REPORT_DETAILS
))
4843 fprintf (vect_dump
, "transform store. ncopies = %d",ncopies
);
4845 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
4846 oprnds
= VEC_alloc (tree
, heap
, group_size
);
4848 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
);
4849 gcc_assert (alignment_support_scheme
);
4850 gcc_assert (alignment_support_scheme
== dr_aligned
); /* FORNOW */
4852 /* In case the vectorization factor (VF) is bigger than the number
4853 of elements that we can fit in a vectype (nunits), we have to generate
4854 more than one vector stmt - i.e - we need to "unroll" the
4855 vector stmt by a factor VF/nunits. For more details see documentation in
4856 vect_get_vec_def_for_copy_stmt. */
4858 /* In case of interleaving (non-unit strided access):
4865 We create vectorized stores starting from base address (the access of the
4866 first stmt in the chain (S2 in the above example), when the last store stmt
4867 of the chain (S4) is reached:
4870 VS2: &base + vec_size*1 = vx0
4871 VS3: &base + vec_size*2 = vx1
4872 VS4: &base + vec_size*3 = vx3
4874 Then permutation statements are generated:
4876 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
4877 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
4880 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4881 (the order of the data-refs in the output of vect_permute_store_chain
4882 corresponds to the order of scalar stmts in the interleaving chain - see
4883 the documentation of vect_permute_store_chain()).
4885 In case of both multiple types and interleaving, above vector stores and
4886 permutation stmts are created for every copy. The result vector stmts are
4887 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4888 STMT_VINFO_RELATED_STMT for the next copies.
4891 prev_stmt_info
= NULL
;
4892 for (j
= 0; j
< ncopies
; j
++)
4901 /* Get vectorized arguments for SLP_NODE. */
4902 vect_get_slp_defs (slp_node
, &vec_oprnds
, NULL
);
4904 vec_oprnd
= VEC_index (tree
, vec_oprnds
, 0);
4908 /* For interleaved stores we collect vectorized defs for all the
4909 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4910 used as an input to vect_permute_store_chain(), and OPRNDS as
4911 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4913 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4914 OPRNDS are of size 1. */
4915 next_stmt
= first_stmt
;
4916 for (i
= 0; i
< group_size
; i
++)
4918 /* Since gaps are not supported for interleaved stores,
4919 GROUP_SIZE is the exact number of stmts in the chain.
4920 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4921 there is no interleaving, GROUP_SIZE is 1, and only one
4922 iteration of the loop will be executed. */
4923 gcc_assert (next_stmt
);
4924 op
= GIMPLE_STMT_OPERAND (next_stmt
, 1);
4926 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
4928 VEC_quick_push(tree
, dr_chain
, vec_oprnd
);
4929 VEC_quick_push(tree
, oprnds
, vec_oprnd
);
4930 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
4933 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, NULL
, NULL_TREE
,
4934 &dummy
, &ptr_incr
, false,
4935 TREE_TYPE (vec_oprnd
), &inv_p
);
4936 gcc_assert (!inv_p
);
4940 /* FORNOW SLP doesn't work for multiple types. */
4943 /* For interleaved stores we created vectorized defs for all the
4944 defs stored in OPRNDS in the previous iteration (previous copy).
4945 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4946 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4948 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4949 OPRNDS are of size 1. */
4950 for (i
= 0; i
< group_size
; i
++)
4952 op
= VEC_index (tree
, oprnds
, i
);
4953 vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
);
4954 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
4955 VEC_replace(tree
, dr_chain
, i
, vec_oprnd
);
4956 VEC_replace(tree
, oprnds
, i
, vec_oprnd
);
4959 bump_vector_ptr (dataref_ptr
, ptr_incr
, bsi
, stmt
, NULL_TREE
);
4964 result_chain
= VEC_alloc (tree
, heap
, group_size
);
4966 if (!vect_permute_store_chain (dr_chain
, group_size
, stmt
, bsi
,
4971 next_stmt
= first_stmt
;
4972 for (i
= 0; i
< vec_num
; i
++)
4975 /* Bump the vector pointer. */
4976 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, bsi
, stmt
,
4980 vec_oprnd
= VEC_index (tree
, vec_oprnds
, i
);
4981 else if (strided_store
)
4982 /* For strided stores vectorized defs are interleaved in
4983 vect_permute_store_chain(). */
4984 vec_oprnd
= VEC_index (tree
, result_chain
, i
);
4986 data_ref
= build_fold_indirect_ref (dataref_ptr
);
4987 /* Arguments are ready. Create the new vector stmt. */
4988 new_stmt
= build_gimple_modify_stmt (data_ref
, vec_oprnd
);
4989 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
4990 mark_symbols_for_renaming (new_stmt
);
4993 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4995 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4997 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4998 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
5004 VEC_free (tree
, heap
, dr_chain
);
5005 VEC_free (tree
, heap
, oprnds
);
5007 VEC_free (tree
, heap
, result_chain
);
5013 /* Function vect_setup_realignment
5015 This function is called when vectorizing an unaligned load using
5016 the dr_explicit_realign[_optimized] scheme.
5017 This function generates the following code at the loop prolog:
5020 x msq_init = *(floor(p)); # prolog load
5021 realignment_token = call target_builtin;
5023 x msq = phi (msq_init, ---)
5025 The stmts marked with x are generated only for the case of
5026 dr_explicit_realign_optimized.
5028 The code above sets up a new (vector) pointer, pointing to the first
5029 location accessed by STMT, and a "floor-aligned" load using that pointer.
5030 It also generates code to compute the "realignment-token" (if the relevant
5031 target hook was defined), and creates a phi-node at the loop-header bb
5032 whose arguments are the result of the prolog-load (created by this
5033 function) and the result of a load that takes place in the loop (to be
5034 created by the caller to this function).
5036 For the case of dr_explicit_realign_optimized:
5037 The caller to this function uses the phi-result (msq) to create the
5038 realignment code inside the loop, and sets up the missing phi argument,
5041 msq = phi (msq_init, lsq)
5042 lsq = *(floor(p')); # load in loop
5043 result = realign_load (msq, lsq, realignment_token);
5045 For the case of dr_explicit_realign:
5047 msq = *(floor(p)); # load in loop
5049 lsq = *(floor(p')); # load in loop
5050 result = realign_load (msq, lsq, realignment_token);
5053 STMT - (scalar) load stmt to be vectorized. This load accesses
5054 a memory location that may be unaligned.
5055 BSI - place where new code is to be inserted.
5056 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
5060 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
5061 target hook, if defined.
5062 Return value - the result of the loop-header phi node. */
5065 vect_setup_realignment (tree stmt
, block_stmt_iterator
*bsi
,
5066 tree
*realignment_token
,
5067 enum dr_alignment_support alignment_support_scheme
,
5069 struct loop
**at_loop
)
5071 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5072 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5073 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5074 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5076 tree scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
5083 tree msq_init
= NULL_TREE
;
5086 tree msq
= NULL_TREE
;
5087 tree stmts
= NULL_TREE
;
5089 bool compute_in_loop
= false;
5090 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5091 struct loop
*containing_loop
= (bb_for_stmt (stmt
))->loop_father
;
5092 struct loop
*loop_for_initial_load
;
5094 gcc_assert (alignment_support_scheme
== dr_explicit_realign
5095 || alignment_support_scheme
== dr_explicit_realign_optimized
);
5097 /* We need to generate three things:
5098 1. the misalignment computation
5099 2. the extra vector load (for the optimized realignment scheme).
5100 3. the phi node for the two vectors from which the realignment is
5101 done (for the optimized realignment scheme).
5104 /* 1. Determine where to generate the misalignment computation.
5106 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
5107 calculation will be generated by this function, outside the loop (in the
5108 preheader). Otherwise, INIT_ADDR had already been computed for us by the
5109 caller, inside the loop.
5111 Background: If the misalignment remains fixed throughout the iterations of
5112 the loop, then both realignment schemes are applicable, and also the
5113 misalignment computation can be done outside LOOP. This is because we are
5114 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
5115 are a multiple of VS (the Vector Size), and therefore the misalignment in
5116 different vectorized LOOP iterations is always the same.
5117 The problem arises only if the memory access is in an inner-loop nested
5118 inside LOOP, which is now being vectorized using outer-loop vectorization.
5119 This is the only case when the misalignment of the memory access may not
5120 remain fixed throughout the iterations of the inner-loop (as explained in
5121 detail in vect_supportable_dr_alignment). In this case, not only is the
5122 optimized realignment scheme not applicable, but also the misalignment
5123 computation (and generation of the realignment token that is passed to
5124 REALIGN_LOAD) have to be done inside the loop.
5126 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
5127 or not, which in turn determines if the misalignment is computed inside
5128 the inner-loop, or outside LOOP. */
5130 if (init_addr
!= NULL_TREE
)
5132 compute_in_loop
= true;
5133 gcc_assert (alignment_support_scheme
== dr_explicit_realign
);
5137 /* 2. Determine where to generate the extra vector load.
5139 For the optimized realignment scheme, instead of generating two vector
5140 loads in each iteration, we generate a single extra vector load in the
5141 preheader of the loop, and in each iteration reuse the result of the
5142 vector load from the previous iteration. In case the memory access is in
5143 an inner-loop nested inside LOOP, which is now being vectorized using
5144 outer-loop vectorization, we need to determine whether this initial vector
5145 load should be generated at the preheader of the inner-loop, or can be
5146 generated at the preheader of LOOP. If the memory access has no evolution
5147 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
5148 to be generated inside LOOP (in the preheader of the inner-loop). */
5150 if (nested_in_vect_loop
)
5152 tree outerloop_step
= STMT_VINFO_DR_STEP (stmt_info
);
5153 bool invariant_in_outerloop
=
5154 (tree_int_cst_compare (outerloop_step
, size_zero_node
) == 0);
5155 loop_for_initial_load
= (invariant_in_outerloop
? loop
: loop
->inner
);
5158 loop_for_initial_load
= loop
;
5160 *at_loop
= loop_for_initial_load
;
5162 /* 3. For the case of the optimized realignment, create the first vector
5163 load at the loop preheader. */
5165 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5167 /* Create msq_init = *(floor(p1)) in the loop preheader */
5169 gcc_assert (!compute_in_loop
);
5170 pe
= loop_preheader_edge (loop_for_initial_load
);
5171 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5172 ptr
= vect_create_data_ref_ptr (stmt
, loop_for_initial_load
, NULL_TREE
,
5173 &init_addr
, &inc
, true, NULL_TREE
, &inv_p
);
5174 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, ptr
);
5175 new_stmt
= build_gimple_modify_stmt (vec_dest
, data_ref
);
5176 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5177 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
5178 mark_symbols_for_renaming (new_stmt
);
5179 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
5180 gcc_assert (!new_bb
);
5181 msq_init
= GIMPLE_STMT_OPERAND (new_stmt
, 0);
5184 /* 4. Create realignment token using a target builtin, if available.
5185 It is done either inside the containing loop, or before LOOP (as
5186 determined above). */
5188 if (targetm
.vectorize
.builtin_mask_for_load
)
5192 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
5193 if (compute_in_loop
)
5194 gcc_assert (init_addr
); /* already computed by the caller. */
5197 /* Generate the INIT_ADDR computation outside LOOP. */
5198 init_addr
= vect_create_addr_base_for_vector_ref (stmt
, &stmts
,
5200 pe
= loop_preheader_edge (loop
);
5201 new_bb
= bsi_insert_on_edge_immediate (pe
, stmts
);
5202 gcc_assert (!new_bb
);
5205 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
5206 new_stmt
= build_call_expr (builtin_decl
, 1, init_addr
);
5207 vec_dest
= vect_create_destination_var (scalar_dest
,
5208 TREE_TYPE (new_stmt
));
5209 new_stmt
= build_gimple_modify_stmt (vec_dest
, new_stmt
);
5210 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5211 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
5213 if (compute_in_loop
)
5214 bsi_insert_before (bsi
, new_stmt
, BSI_SAME_STMT
);
5217 /* Generate the misalignment computation outside LOOP. */
5218 pe
= loop_preheader_edge (loop
);
5219 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
5220 gcc_assert (!new_bb
);
5223 *realignment_token
= GIMPLE_STMT_OPERAND (new_stmt
, 0);
5225 /* The result of the CALL_EXPR to this builtin is determined from
5226 the value of the parameter and no global variables are touched
5227 which makes the builtin a "const" function. Requiring the
5228 builtin to have the "const" attribute makes it unnecessary
5229 to call mark_call_clobbered. */
5230 gcc_assert (TREE_READONLY (builtin_decl
));
5233 if (alignment_support_scheme
== dr_explicit_realign
)
5236 gcc_assert (!compute_in_loop
);
5237 gcc_assert (alignment_support_scheme
== dr_explicit_realign_optimized
);
5240 /* 5. Create msq = phi <msq_init, lsq> in loop */
5242 pe
= loop_preheader_edge (containing_loop
);
5243 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5244 msq
= make_ssa_name (vec_dest
, NULL_TREE
);
5245 phi_stmt
= create_phi_node (msq
, containing_loop
->header
);
5246 SSA_NAME_DEF_STMT (msq
) = phi_stmt
;
5247 add_phi_arg (phi_stmt
, msq_init
, pe
);
5253 /* Function vect_strided_load_supported.
5255 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
5256 and FALSE otherwise. */
5259 vect_strided_load_supported (tree vectype
)
5261 optab perm_even_optab
, perm_odd_optab
;
5264 mode
= (int) TYPE_MODE (vectype
);
5266 perm_even_optab
= optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR
, vectype
,
5268 if (!perm_even_optab
)
5270 if (vect_print_dump_info (REPORT_DETAILS
))
5271 fprintf (vect_dump
, "no optab for perm_even.");
5275 if (optab_handler (perm_even_optab
, mode
)->insn_code
== CODE_FOR_nothing
)
5277 if (vect_print_dump_info (REPORT_DETAILS
))
5278 fprintf (vect_dump
, "perm_even op not supported by target.");
5282 perm_odd_optab
= optab_for_tree_code (VEC_EXTRACT_ODD_EXPR
, vectype
,
5284 if (!perm_odd_optab
)
5286 if (vect_print_dump_info (REPORT_DETAILS
))
5287 fprintf (vect_dump
, "no optab for perm_odd.");
5291 if (optab_handler (perm_odd_optab
, mode
)->insn_code
== CODE_FOR_nothing
)
5293 if (vect_print_dump_info (REPORT_DETAILS
))
5294 fprintf (vect_dump
, "perm_odd op not supported by target.");
5301 /* Function vect_permute_load_chain.
5303 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
5304 a power of 2, generate extract_even/odd stmts to reorder the input data
5305 correctly. Return the final references for loads in RESULT_CHAIN.
5307 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
5308 The input is 4 vectors each containing 8 elements. We assign a number to each
5309 element, the input sequence is:
5311 1st vec: 0 1 2 3 4 5 6 7
5312 2nd vec: 8 9 10 11 12 13 14 15
5313 3rd vec: 16 17 18 19 20 21 22 23
5314 4th vec: 24 25 26 27 28 29 30 31
5316 The output sequence should be:
5318 1st vec: 0 4 8 12 16 20 24 28
5319 2nd vec: 1 5 9 13 17 21 25 29
5320 3rd vec: 2 6 10 14 18 22 26 30
5321 4th vec: 3 7 11 15 19 23 27 31
5323 i.e., the first output vector should contain the first elements of each
5324 interleaving group, etc.
5326 We use extract_even/odd instructions to create such output. The input of each
5327 extract_even/odd operation is two vectors
5331 and the output is the vector of extracted even/odd elements. The output of
5332 extract_even will be: 0 2 4 6
5333 and of extract_odd: 1 3 5 7
5336 The permutation is done in log LENGTH stages. In each stage extract_even and
5337 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
5338 order. In our example,
5340 E1: extract_even (1st vec, 2nd vec)
5341 E2: extract_odd (1st vec, 2nd vec)
5342 E3: extract_even (3rd vec, 4th vec)
5343 E4: extract_odd (3rd vec, 4th vec)
5345 The output for the first stage will be:
5347 E1: 0 2 4 6 8 10 12 14
5348 E2: 1 3 5 7 9 11 13 15
5349 E3: 16 18 20 22 24 26 28 30
5350 E4: 17 19 21 23 25 27 29 31
5352 In order to proceed and create the correct sequence for the next stage (or
5353 for the correct output, if the second stage is the last one, as in our
5354 example), we first put the output of extract_even operation and then the
5355 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
5356 The input for the second stage is:
5358 1st vec (E1): 0 2 4 6 8 10 12 14
5359 2nd vec (E3): 16 18 20 22 24 26 28 30
5360 3rd vec (E2): 1 3 5 7 9 11 13 15
5361 4th vec (E4): 17 19 21 23 25 27 29 31
5363 The output of the second stage:
5365 E1: 0 4 8 12 16 20 24 28
5366 E2: 2 6 10 14 18 22 26 30
5367 E3: 1 5 9 13 17 21 25 29
5368 E4: 3 7 11 15 19 23 27 31
5370 And RESULT_CHAIN after reordering:
5372 1st vec (E1): 0 4 8 12 16 20 24 28
5373 2nd vec (E3): 1 5 9 13 17 21 25 29
5374 3rd vec (E2): 2 6 10 14 18 22 26 30
5375 4th vec (E4): 3 7 11 15 19 23 27 31. */
5378 vect_permute_load_chain (VEC(tree
,heap
) *dr_chain
,
5379 unsigned int length
,
5381 block_stmt_iterator
*bsi
,
5382 VEC(tree
,heap
) **result_chain
)
5384 tree perm_dest
, perm_stmt
, data_ref
, first_vect
, second_vect
;
5385 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
5390 /* Check that the operation is supported. */
5391 if (!vect_strided_load_supported (vectype
))
5394 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
5395 for (i
= 0; i
< exact_log2 (length
); i
++)
5397 for (j
= 0; j
< length
; j
+=2)
5399 first_vect
= VEC_index (tree
, dr_chain
, j
);
5400 second_vect
= VEC_index (tree
, dr_chain
, j
+1);
5402 /* data_ref = permute_even (first_data_ref, second_data_ref); */
5403 perm_dest
= create_tmp_var (vectype
, "vect_perm_even");
5404 DECL_GIMPLE_REG_P (perm_dest
) = 1;
5405 add_referenced_var (perm_dest
);
5407 tmp
= build2 (VEC_EXTRACT_EVEN_EXPR
, vectype
,
5408 first_vect
, second_vect
);
5409 perm_stmt
= build_gimple_modify_stmt (perm_dest
, tmp
);
5411 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
5412 GIMPLE_STMT_OPERAND (perm_stmt
, 0) = data_ref
;
5413 vect_finish_stmt_generation (stmt
, perm_stmt
, bsi
);
5414 mark_symbols_for_renaming (perm_stmt
);
5416 VEC_replace (tree
, *result_chain
, j
/2, data_ref
);
5418 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
5419 perm_dest
= create_tmp_var (vectype
, "vect_perm_odd");
5420 DECL_GIMPLE_REG_P (perm_dest
) = 1;
5421 add_referenced_var (perm_dest
);
5423 tmp
= build2 (VEC_EXTRACT_ODD_EXPR
, vectype
,
5424 first_vect
, second_vect
);
5425 perm_stmt
= build_gimple_modify_stmt (perm_dest
, tmp
);
5426 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
5427 GIMPLE_STMT_OPERAND (perm_stmt
, 0) = data_ref
;
5428 vect_finish_stmt_generation (stmt
, perm_stmt
, bsi
);
5429 mark_symbols_for_renaming (perm_stmt
);
5431 VEC_replace (tree
, *result_chain
, j
/2+length
/2, data_ref
);
5433 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
5439 /* Function vect_transform_strided_load.
5441 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
5442 to perform their permutation and ascribe the result vectorized statements to
5443 the scalar statements.
5447 vect_transform_strided_load (tree stmt
, VEC(tree
,heap
) *dr_chain
, int size
,
5448 block_stmt_iterator
*bsi
)
5450 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5451 tree first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
5452 tree next_stmt
, new_stmt
;
5453 VEC(tree
,heap
) *result_chain
= NULL
;
5454 unsigned int i
, gap_count
;
5457 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
5458 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
5459 vectors, that are ready for vector computation. */
5460 result_chain
= VEC_alloc (tree
, heap
, size
);
5462 if (!vect_permute_load_chain (dr_chain
, size
, stmt
, bsi
, &result_chain
))
5465 /* Put a permuted data-ref in the VECTORIZED_STMT field.
5466 Since we scan the chain starting from it's first node, their order
5467 corresponds the order of data-refs in RESULT_CHAIN. */
5468 next_stmt
= first_stmt
;
5470 for (i
= 0; VEC_iterate (tree
, result_chain
, i
, tmp_data_ref
); i
++)
5475 /* Skip the gaps. Loads created for the gaps will be removed by dead
5476 code elimination pass later.
5477 DR_GROUP_GAP is the number of steps in elements from the previous
5478 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
5479 correspond to the gaps.
5481 if (gap_count
< DR_GROUP_GAP (vinfo_for_stmt (next_stmt
)))
5489 new_stmt
= SSA_NAME_DEF_STMT (tmp_data_ref
);
5490 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
5491 copies, and we put the new vector statement in the first available
5493 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)))
5494 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)) = new_stmt
;
5497 tree prev_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
));
5498 tree rel_stmt
= STMT_VINFO_RELATED_STMT (
5499 vinfo_for_stmt (prev_stmt
));
5502 prev_stmt
= rel_stmt
;
5503 rel_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt
));
5505 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
)) = new_stmt
;
5507 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
5509 /* If NEXT_STMT accesses the same DR as the previous statement,
5510 put the same TMP_DATA_REF as its vectorized statement; otherwise
5511 get the next data-ref from RESULT_CHAIN. */
5512 if (!next_stmt
|| !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
5517 VEC_free (tree
, heap
, result_chain
);
5522 /* vectorizable_load.
5524 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5526 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5527 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5528 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5531 vectorizable_load (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
,
5535 tree vec_dest
= NULL
;
5536 tree data_ref
= NULL
;
5538 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5539 stmt_vec_info prev_stmt_info
;
5540 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5541 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5542 struct loop
*containing_loop
= (bb_for_stmt (stmt
))->loop_father
;
5543 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5544 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
5545 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5548 tree new_stmt
= NULL_TREE
;
5550 enum dr_alignment_support alignment_support_scheme
;
5551 tree dataref_ptr
= NULL_TREE
;
5553 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5554 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5555 int i
, j
, group_size
;
5556 tree msq
= NULL_TREE
, lsq
;
5557 tree offset
= NULL_TREE
;
5558 tree realignment_token
= NULL_TREE
;
5559 tree phi
= NULL_TREE
;
5560 VEC(tree
,heap
) *dr_chain
= NULL
;
5561 bool strided_load
= false;
5565 bool compute_in_loop
= false;
5566 struct loop
*at_loop
;
5568 bool slp
= (slp_node
!= NULL
);
5570 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
5571 this, so we can safely override NCOPIES with 1 here. */
5575 gcc_assert (ncopies
>= 1);
5577 /* FORNOW. This restriction should be relaxed. */
5578 if (nested_in_vect_loop
&& ncopies
> 1)
5580 if (vect_print_dump_info (REPORT_DETAILS
))
5581 fprintf (vect_dump
, "multiple types in nested loop.");
5585 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
5588 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
5591 /* Is vectorizable load? */
5592 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
5595 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
5596 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5599 op
= GIMPLE_STMT_OPERAND (stmt
, 1);
5600 if (TREE_CODE (op
) != ARRAY_REF
5601 && TREE_CODE (op
) != INDIRECT_REF
5602 && !STMT_VINFO_STRIDED_ACCESS (stmt_info
))
5605 if (!STMT_VINFO_DATA_REF (stmt_info
))
5608 scalar_type
= TREE_TYPE (DR_REF (dr
));
5609 mode
= (int) TYPE_MODE (vectype
);
5611 /* FORNOW. In some cases can vectorize even if data-type not supported
5612 (e.g. - data copies). */
5613 if (optab_handler (mov_optab
, mode
)->insn_code
== CODE_FOR_nothing
)
5615 if (vect_print_dump_info (REPORT_DETAILS
))
5616 fprintf (vect_dump
, "Aligned load, but unsupported type.");
5620 /* Check if the load is a part of an interleaving chain. */
5621 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
5623 strided_load
= true;
5625 gcc_assert (! nested_in_vect_loop
);
5627 /* Check if interleaving is supported. */
5628 if (!vect_strided_load_supported (vectype
)
5629 && !PURE_SLP_STMT (stmt_info
) && !slp
)
5633 if (!vec_stmt
) /* transformation not required. */
5635 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
5636 vect_model_load_cost (stmt_info
, ncopies
, NULL
);
5640 if (vect_print_dump_info (REPORT_DETAILS
))
5641 fprintf (vect_dump
, "transform load.");
5647 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
5648 /* Check if the chain of loads is already vectorized. */
5649 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
)))
5651 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5654 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5655 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5656 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
5658 /* VEC_NUM is the number of vect stmts to be created for this group. */
5661 strided_load
= false;
5662 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5665 vec_num
= group_size
;
5671 group_size
= vec_num
= 1;
5674 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
);
5675 gcc_assert (alignment_support_scheme
);
5677 /* In case the vectorization factor (VF) is bigger than the number
5678 of elements that we can fit in a vectype (nunits), we have to generate
5679 more than one vector stmt - i.e - we need to "unroll" the
5680 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5681 from one copy of the vector stmt to the next, in the field
5682 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5683 stages to find the correct vector defs to be used when vectorizing
5684 stmts that use the defs of the current stmt. The example below illustrates
5685 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
5686 4 vectorized stmts):
5688 before vectorization:
5689 RELATED_STMT VEC_STMT
5693 step 1: vectorize stmt S1:
5694 We first create the vector stmt VS1_0, and, as usual, record a
5695 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
5696 Next, we create the vector stmt VS1_1, and record a pointer to
5697 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
5698 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
5700 RELATED_STMT VEC_STMT
5701 VS1_0: vx0 = memref0 VS1_1 -
5702 VS1_1: vx1 = memref1 VS1_2 -
5703 VS1_2: vx2 = memref2 VS1_3 -
5704 VS1_3: vx3 = memref3 - -
5705 S1: x = load - VS1_0
5708 See in documentation in vect_get_vec_def_for_stmt_copy for how the
5709 information we recorded in RELATED_STMT field is used to vectorize
5712 /* In case of interleaving (non-unit strided access):
5719 Vectorized loads are created in the order of memory accesses
5720 starting from the access of the first stmt of the chain:
5723 VS2: vx1 = &base + vec_size*1
5724 VS3: vx3 = &base + vec_size*2
5725 VS4: vx4 = &base + vec_size*3
5727 Then permutation statements are generated:
5729 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
5730 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
5733 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5734 (the order of the data-refs in the output of vect_permute_load_chain
5735 corresponds to the order of scalar stmts in the interleaving chain - see
5736 the documentation of vect_permute_load_chain()).
5737 The generation of permutation stmts and recording them in
5738 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
5740 In case of both multiple types and interleaving, the vector loads and
5741 permutation stmts above are created for every copy. The result vector stmts
5742 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5743 STMT_VINFO_RELATED_STMT for the next copies. */
5745 /* If the data reference is aligned (dr_aligned) or potentially unaligned
5746 on a target that supports unaligned accesses (dr_unaligned_supported)
5747 we generate the following code:
5751 p = p + indx * vectype_size;
5756 Otherwise, the data reference is potentially unaligned on a target that
5757 does not support unaligned accesses (dr_explicit_realign_optimized) -
5758 then generate the following code, in which the data in each iteration is
5759 obtained by two vector loads, one from the previous iteration, and one
5760 from the current iteration:
5762 msq_init = *(floor(p1))
5763 p2 = initial_addr + VS - 1;
5764 realignment_token = call target_builtin;
5767 p2 = p2 + indx * vectype_size
5769 vec_dest = realign_load (msq, lsq, realignment_token)
5774 /* If the misalignment remains the same throughout the execution of the
5775 loop, we can create the init_addr and permutation mask at the loop
5776 preheader. Otherwise, it needs to be created inside the loop.
5777 This can only occur when vectorizing memory accesses in the inner-loop
5778 nested within an outer-loop that is being vectorized. */
5780 if (nested_in_vect_loop_p (loop
, stmt
)
5781 && (TREE_INT_CST_LOW (DR_STEP (dr
)) % UNITS_PER_SIMD_WORD
!= 0))
5783 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
5784 compute_in_loop
= true;
5787 if ((alignment_support_scheme
== dr_explicit_realign_optimized
5788 || alignment_support_scheme
== dr_explicit_realign
)
5789 && !compute_in_loop
)
5791 msq
= vect_setup_realignment (first_stmt
, bsi
, &realignment_token
,
5792 alignment_support_scheme
, NULL_TREE
,
5794 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5796 phi
= SSA_NAME_DEF_STMT (msq
);
5797 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
5803 prev_stmt_info
= NULL
;
5804 for (j
= 0; j
< ncopies
; j
++)
5806 /* 1. Create the vector pointer update chain. */
5808 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
,
5810 &dummy
, &ptr_incr
, false,
5814 bump_vector_ptr (dataref_ptr
, ptr_incr
, bsi
, stmt
, NULL_TREE
);
5816 for (i
= 0; i
< vec_num
; i
++)
5819 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, bsi
, stmt
,
5822 /* 2. Create the vector-load in the loop. */
5823 switch (alignment_support_scheme
)
5826 gcc_assert (aligned_access_p (first_dr
));
5827 data_ref
= build_fold_indirect_ref (dataref_ptr
);
5829 case dr_unaligned_supported
:
5831 int mis
= DR_MISALIGNMENT (first_dr
);
5832 tree tmis
= (mis
== -1 ? size_zero_node
: size_int (mis
));
5834 tmis
= size_binop (MULT_EXPR
, tmis
, size_int(BITS_PER_UNIT
));
5836 build2 (MISALIGNED_INDIRECT_REF
, vectype
, dataref_ptr
, tmis
);
5839 case dr_explicit_realign
:
5842 tree vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
5844 if (compute_in_loop
)
5845 msq
= vect_setup_realignment (first_stmt
, bsi
,
5847 dr_explicit_realign
,
5850 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, dataref_ptr
);
5851 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5852 new_stmt
= build_gimple_modify_stmt (vec_dest
, data_ref
);
5853 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5854 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
5855 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
5856 copy_virtual_operands (new_stmt
, stmt
);
5857 mark_symbols_for_renaming (new_stmt
);
5860 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
5861 TYPE_SIZE_UNIT (scalar_type
));
5862 ptr
= bump_vector_ptr (dataref_ptr
, NULL_TREE
, bsi
, stmt
, bump
);
5863 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, ptr
);
5866 case dr_explicit_realign_optimized
:
5867 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, dataref_ptr
);
5872 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5873 new_stmt
= build_gimple_modify_stmt (vec_dest
, data_ref
);
5874 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5875 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
5876 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
5877 mark_symbols_for_renaming (new_stmt
);
5879 /* 3. Handle explicit realignment if necessary/supported. Create in
5880 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
5881 if (alignment_support_scheme
== dr_explicit_realign_optimized
5882 || alignment_support_scheme
== dr_explicit_realign
)
5884 lsq
= GIMPLE_STMT_OPERAND (new_stmt
, 0);
5885 if (!realignment_token
)
5886 realignment_token
= dataref_ptr
;
5887 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5888 new_stmt
= build3 (REALIGN_LOAD_EXPR
, vectype
, msq
, lsq
,
5890 new_stmt
= build_gimple_modify_stmt (vec_dest
, new_stmt
);
5891 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5892 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
5893 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
5895 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5897 if (i
== vec_num
- 1 && j
== ncopies
- 1)
5898 add_phi_arg (phi
, lsq
, loop_latch_edge (containing_loop
));
5903 /* 4. Handle invariant-load. */
5906 gcc_assert (!strided_load
);
5907 gcc_assert (nested_in_vect_loop_p (loop
, stmt
));
5912 tree vec_inv
, bitpos
, bitsize
= TYPE_SIZE (scalar_type
);
5914 /* CHECKME: bitpos depends on endianess? */
5915 bitpos
= bitsize_zero_node
;
5916 vec_inv
= build3 (BIT_FIELD_REF
, scalar_type
, new_temp
,
5919 vect_create_destination_var (scalar_dest
, NULL_TREE
);
5920 new_stmt
= build_gimple_modify_stmt (vec_dest
, vec_inv
);
5921 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5922 GIMPLE_STMT_OPERAND (new_stmt
, 0) = new_temp
;
5923 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
5925 for (k
= nunits
- 1; k
>= 0; --k
)
5926 t
= tree_cons (NULL_TREE
, new_temp
, t
);
5927 /* FIXME: use build_constructor directly. */
5928 vec_inv
= build_constructor_from_list (vectype
, t
);
5929 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, bsi
);
5930 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5933 gcc_unreachable (); /* FORNOW. */
5936 /* Collect vector loads and later create their permutation in
5937 vect_transform_strided_load (). */
5939 VEC_quick_push (tree
, dr_chain
, new_temp
);
5941 /* Store vector loads in the corresponding SLP_NODE. */
5943 VEC_quick_push (tree
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
5946 /* FORNOW: SLP with multiple types is unsupported. */
5952 if (!vect_transform_strided_load (stmt
, dr_chain
, group_size
, bsi
))
5954 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5955 VEC_free (tree
, heap
, dr_chain
);
5956 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
5961 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5963 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5964 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5969 VEC_free (tree
, heap
, dr_chain
);
5975 /* Function vectorizable_live_operation.
5977 STMT computes a value that is used outside the loop. Check if
5978 it can be supported. */
5981 vectorizable_live_operation (tree stmt
,
5982 block_stmt_iterator
*bsi ATTRIBUTE_UNUSED
,
5983 tree
*vec_stmt ATTRIBUTE_UNUSED
)
5986 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5987 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5988 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5993 enum vect_def_type dt
;
5995 gcc_assert (STMT_VINFO_LIVE_P (stmt_info
));
5997 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
)
6000 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
6003 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt
, 0)) != SSA_NAME
)
6006 /* FORNOW. CHECKME. */
6007 if (nested_in_vect_loop_p (loop
, stmt
))
6010 operation
= GIMPLE_STMT_OPERAND (stmt
, 1);
6011 op_type
= TREE_OPERAND_LENGTH (operation
);
6013 /* FORNOW: support only if all uses are invariant. This means
6014 that the scalar operations can remain in place, unvectorized.
6015 The original last scalar value that they compute will be used. */
6017 for (i
= 0; i
< op_type
; i
++)
6019 op
= TREE_OPERAND (operation
, i
);
6020 if (op
&& !vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &def
, &dt
))
6022 if (vect_print_dump_info (REPORT_DETAILS
))
6023 fprintf (vect_dump
, "use not simple.");
6027 if (dt
!= vect_invariant_def
&& dt
!= vect_constant_def
)
6031 /* No transformation is required for the cases we currently support. */
6036 /* Function vect_is_simple_cond.
6039 LOOP - the loop that is being vectorized.
6040 COND - Condition that is checked for simple use.
6042 Returns whether a COND can be vectorized. Checks whether
6043 condition operands are supportable using vec_is_simple_use. */
6046 vect_is_simple_cond (tree cond
, loop_vec_info loop_vinfo
)
6050 enum vect_def_type dt
;
6052 if (!COMPARISON_CLASS_P (cond
))
6055 lhs
= TREE_OPERAND (cond
, 0);
6056 rhs
= TREE_OPERAND (cond
, 1);
6058 if (TREE_CODE (lhs
) == SSA_NAME
)
6060 tree lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
6061 if (!vect_is_simple_use (lhs
, loop_vinfo
, &lhs_def_stmt
, &def
, &dt
))
6064 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
6065 && TREE_CODE (lhs
) != FIXED_CST
)
6068 if (TREE_CODE (rhs
) == SSA_NAME
)
6070 tree rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6071 if (!vect_is_simple_use (rhs
, loop_vinfo
, &rhs_def_stmt
, &def
, &dt
))
6074 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
6075 && TREE_CODE (rhs
) != FIXED_CST
)
6081 /* vectorizable_condition.
6083 Check if STMT is conditional modify expression that can be vectorized.
6084 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6085 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6088 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6091 vectorizable_condition (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
6093 tree scalar_dest
= NULL_TREE
;
6094 tree vec_dest
= NULL_TREE
;
6095 tree op
= NULL_TREE
;
6096 tree cond_expr
, then_clause
, else_clause
;
6097 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6098 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6099 tree vec_cond_lhs
, vec_cond_rhs
, vec_then_clause
, vec_else_clause
;
6100 tree vec_compare
, vec_cond_expr
;
6102 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6103 enum machine_mode vec_mode
;
6105 enum vect_def_type dt
;
6106 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6107 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6109 gcc_assert (ncopies
>= 1);
6111 return false; /* FORNOW */
6113 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
6116 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_loop_def
)
6119 /* FORNOW: SLP not supported. */
6120 if (STMT_SLP_TYPE (stmt_info
))
6123 /* FORNOW: not yet supported. */
6124 if (STMT_VINFO_LIVE_P (stmt_info
))
6126 if (vect_print_dump_info (REPORT_DETAILS
))
6127 fprintf (vect_dump
, "value used after loop.");
6131 /* Is vectorizable conditional operation? */
6132 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
6135 op
= GIMPLE_STMT_OPERAND (stmt
, 1);
6137 if (TREE_CODE (op
) != COND_EXPR
)
6140 cond_expr
= TREE_OPERAND (op
, 0);
6141 then_clause
= TREE_OPERAND (op
, 1);
6142 else_clause
= TREE_OPERAND (op
, 2);
6144 if (!vect_is_simple_cond (cond_expr
, loop_vinfo
))
6147 /* We do not handle two different vector types for the condition
6149 if (TREE_TYPE (TREE_OPERAND (cond_expr
, 0)) != TREE_TYPE (vectype
))
6152 if (TREE_CODE (then_clause
) == SSA_NAME
)
6154 tree then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
6155 if (!vect_is_simple_use (then_clause
, loop_vinfo
,
6156 &then_def_stmt
, &def
, &dt
))
6159 else if (TREE_CODE (then_clause
) != INTEGER_CST
6160 && TREE_CODE (then_clause
) != REAL_CST
6161 && TREE_CODE (then_clause
) != FIXED_CST
)
6164 if (TREE_CODE (else_clause
) == SSA_NAME
)
6166 tree else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
6167 if (!vect_is_simple_use (else_clause
, loop_vinfo
,
6168 &else_def_stmt
, &def
, &dt
))
6171 else if (TREE_CODE (else_clause
) != INTEGER_CST
6172 && TREE_CODE (else_clause
) != REAL_CST
6173 && TREE_CODE (else_clause
) != FIXED_CST
)
6177 vec_mode
= TYPE_MODE (vectype
);
6181 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
6182 return expand_vec_cond_expr_p (op
, vec_mode
);
6188 scalar_dest
= GIMPLE_STMT_OPERAND (stmt
, 0);
6189 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6191 /* Handle cond expr. */
6193 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0), stmt
, NULL
);
6195 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1), stmt
, NULL
);
6196 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
, stmt
, NULL
);
6197 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
, stmt
, NULL
);
6199 /* Arguments are ready. create the new vector stmt. */
6200 vec_compare
= build2 (TREE_CODE (cond_expr
), vectype
,
6201 vec_cond_lhs
, vec_cond_rhs
);
6202 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
6203 vec_compare
, vec_then_clause
, vec_else_clause
);
6205 *vec_stmt
= build_gimple_modify_stmt (vec_dest
, vec_cond_expr
);
6206 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
6207 GIMPLE_STMT_OPERAND (*vec_stmt
, 0) = new_temp
;
6208 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
6214 /* Function vect_transform_stmt.
6216 Create a vectorized stmt to replace STMT, and insert it at BSI. */
6219 vect_transform_stmt (tree stmt
, block_stmt_iterator
*bsi
, bool *strided_store
,
6222 bool is_store
= false;
6223 tree vec_stmt
= NULL_TREE
;
6224 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6225 tree orig_stmt_in_pattern
;
6228 switch (STMT_VINFO_TYPE (stmt_info
))
6230 case type_demotion_vec_info_type
:
6231 gcc_assert (!slp_node
);
6232 done
= vectorizable_type_demotion (stmt
, bsi
, &vec_stmt
);
6236 case type_promotion_vec_info_type
:
6237 gcc_assert (!slp_node
);
6238 done
= vectorizable_type_promotion (stmt
, bsi
, &vec_stmt
);
6242 case type_conversion_vec_info_type
:
6243 done
= vectorizable_conversion (stmt
, bsi
, &vec_stmt
, slp_node
);
6247 case induc_vec_info_type
:
6248 gcc_assert (!slp_node
);
6249 done
= vectorizable_induction (stmt
, bsi
, &vec_stmt
);
6253 case op_vec_info_type
:
6254 done
= vectorizable_operation (stmt
, bsi
, &vec_stmt
, slp_node
);
6258 case assignment_vec_info_type
:
6259 done
= vectorizable_assignment (stmt
, bsi
, &vec_stmt
, slp_node
);
6263 case load_vec_info_type
:
6264 done
= vectorizable_load (stmt
, bsi
, &vec_stmt
, slp_node
);
6268 case store_vec_info_type
:
6269 done
= vectorizable_store (stmt
, bsi
, &vec_stmt
, slp_node
);
6271 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
6273 /* In case of interleaving, the whole chain is vectorized when the
6274 last store in the chain is reached. Store stmts before the last
6275 one are skipped, and there vec_stmt_info shouldn't be freed
6277 *strided_store
= true;
6278 if (STMT_VINFO_VEC_STMT (stmt_info
))
6285 case condition_vec_info_type
:
6286 gcc_assert (!slp_node
);
6287 done
= vectorizable_condition (stmt
, bsi
, &vec_stmt
);
6291 case call_vec_info_type
:
6292 gcc_assert (!slp_node
);
6293 done
= vectorizable_call (stmt
, bsi
, &vec_stmt
);
6296 case reduc_vec_info_type
:
6297 gcc_assert (!slp_node
);
6298 done
= vectorizable_reduction (stmt
, bsi
, &vec_stmt
);
6303 if (!STMT_VINFO_LIVE_P (stmt_info
))
6305 if (vect_print_dump_info (REPORT_DETAILS
))
6306 fprintf (vect_dump
, "stmt not supported.");
6311 if (STMT_VINFO_LIVE_P (stmt_info
)
6312 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
6314 done
= vectorizable_live_operation (stmt
, bsi
, &vec_stmt
);
6320 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
6321 orig_stmt_in_pattern
= STMT_VINFO_RELATED_STMT (stmt_info
);
6322 if (orig_stmt_in_pattern
)
6324 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt_in_pattern
);
6325 /* STMT was inserted by the vectorizer to replace a computation idiom.
6326 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
6327 computed this idiom. We need to record a pointer to VEC_STMT in
6328 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
6329 documentation of vect_pattern_recog. */
6330 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
6332 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
6333 STMT_VINFO_VEC_STMT (stmt_vinfo
) = vec_stmt
;
6342 /* This function builds ni_name = number of iterations loop executes
6343 on the loop preheader. */
6346 vect_build_loop_niters (loop_vec_info loop_vinfo
)
6348 tree ni_name
, stmt
, var
;
6350 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6351 tree ni
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
6353 var
= create_tmp_var (TREE_TYPE (ni
), "niters");
6354 add_referenced_var (var
);
6355 ni_name
= force_gimple_operand (ni
, &stmt
, false, var
);
6357 pe
= loop_preheader_edge (loop
);
6360 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
6361 gcc_assert (!new_bb
);
6368 /* This function generates the following statements:
6370 ni_name = number of iterations loop executes
6371 ratio = ni_name / vf
6372 ratio_mult_vf_name = ratio * vf
6374 and places them at the loop preheader edge. */
6377 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo
,
6379 tree
*ratio_mult_vf_name_ptr
,
6380 tree
*ratio_name_ptr
)
6388 tree ratio_mult_vf_name
;
6389 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6390 tree ni
= LOOP_VINFO_NITERS (loop_vinfo
);
6391 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6394 pe
= loop_preheader_edge (loop
);
6396 /* Generate temporary variable that contains
6397 number of iterations loop executes. */
6399 ni_name
= vect_build_loop_niters (loop_vinfo
);
6400 log_vf
= build_int_cst (TREE_TYPE (ni
), exact_log2 (vf
));
6402 /* Create: ratio = ni >> log2(vf) */
6404 ratio_name
= fold_build2 (RSHIFT_EXPR
, TREE_TYPE (ni_name
), ni_name
, log_vf
);
6405 if (!is_gimple_val (ratio_name
))
6407 var
= create_tmp_var (TREE_TYPE (ni
), "bnd");
6408 add_referenced_var (var
);
6410 ratio_name
= force_gimple_operand (ratio_name
, &stmt
, true, var
);
6411 pe
= loop_preheader_edge (loop
);
6412 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
6413 gcc_assert (!new_bb
);
6416 /* Create: ratio_mult_vf = ratio << log2 (vf). */
6418 ratio_mult_vf_name
= fold_build2 (LSHIFT_EXPR
, TREE_TYPE (ratio_name
),
6419 ratio_name
, log_vf
);
6420 if (!is_gimple_val (ratio_mult_vf_name
))
6422 var
= create_tmp_var (TREE_TYPE (ni
), "ratio_mult_vf");
6423 add_referenced_var (var
);
6425 ratio_mult_vf_name
= force_gimple_operand (ratio_mult_vf_name
, &stmt
,
6427 pe
= loop_preheader_edge (loop
);
6428 new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
6429 gcc_assert (!new_bb
);
6432 *ni_name_ptr
= ni_name
;
6433 *ratio_mult_vf_name_ptr
= ratio_mult_vf_name
;
6434 *ratio_name_ptr
= ratio_name
;
6440 /* Function vect_update_ivs_after_vectorizer.
6442 "Advance" the induction variables of LOOP to the value they should take
6443 after the execution of LOOP. This is currently necessary because the
6444 vectorizer does not handle induction variables that are used after the
6445 loop. Such a situation occurs when the last iterations of LOOP are
6447 1. We introduced new uses after LOOP for IVs that were not originally used
6448 after LOOP: the IVs of LOOP are now used by an epilog loop.
6449 2. LOOP is going to be vectorized; this means that it will iterate N/VF
6450 times, whereas the loop IVs should be bumped N times.
6453 - LOOP - a loop that is going to be vectorized. The last few iterations
6454 of LOOP were peeled.
6455 - NITERS - the number of iterations that LOOP executes (before it is
6456 vectorized). i.e, the number of times the ivs should be bumped.
6457 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
6458 coming out from LOOP on which there are uses of the LOOP ivs
6459 (this is the path from LOOP->exit to epilog_loop->preheader).
6461 The new definitions of the ivs are placed in LOOP->exit.
6462 The phi args associated with the edge UPDATE_E in the bb
6463 UPDATE_E->dest are updated accordingly.
6465 Assumption 1: Like the rest of the vectorizer, this function assumes
6466 a single loop exit that has a single predecessor.
6468 Assumption 2: The phi nodes in the LOOP header and in update_bb are
6469 organized in the same order.
6471 Assumption 3: The access function of the ivs is simple enough (see
6472 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
6474 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
6475 coming out of LOOP on which the ivs of LOOP are used (this is the path
6476 that leads to the epilog loop; other paths skip the epilog loop). This
6477 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
6478 needs to have its phis updated.
6482 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo
, tree niters
,
6485 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6486 basic_block exit_bb
= single_exit (loop
)->dest
;
6488 basic_block update_bb
= update_e
->dest
;
6490 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
6492 /* Make sure there exists a single-predecessor exit bb: */
6493 gcc_assert (single_pred_p (exit_bb
));
6495 for (phi
= phi_nodes (loop
->header
), phi1
= phi_nodes (update_bb
);
6497 phi
= PHI_CHAIN (phi
), phi1
= PHI_CHAIN (phi1
))
6499 tree access_fn
= NULL
;
6500 tree evolution_part
;
6503 tree var
, ni
, ni_name
;
6504 block_stmt_iterator last_bsi
;
6506 if (vect_print_dump_info (REPORT_DETAILS
))
6508 fprintf (vect_dump
, "vect_update_ivs_after_vectorizer: phi: ");
6509 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
6512 /* Skip virtual phi's. */
6513 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
6515 if (vect_print_dump_info (REPORT_DETAILS
))
6516 fprintf (vect_dump
, "virtual phi. skip.");
6520 /* Skip reduction phis. */
6521 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
6523 if (vect_print_dump_info (REPORT_DETAILS
))
6524 fprintf (vect_dump
, "reduc phi. skip.");
6528 access_fn
= analyze_scalar_evolution (loop
, PHI_RESULT (phi
));
6529 gcc_assert (access_fn
);
6531 unshare_expr (evolution_part_in_loop_num (access_fn
, loop
->num
));
6532 gcc_assert (evolution_part
!= NULL_TREE
);
6534 /* FORNOW: We do not support IVs whose evolution function is a polynomial
6535 of degree >= 2 or exponential. */
6536 gcc_assert (!tree_is_chrec (evolution_part
));
6538 step_expr
= evolution_part
;
6539 init_expr
= unshare_expr (initial_condition_in_loop_num (access_fn
,
6542 if (POINTER_TYPE_P (TREE_TYPE (init_expr
)))
6543 ni
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (init_expr
),
6545 fold_convert (sizetype
,
6546 fold_build2 (MULT_EXPR
, TREE_TYPE (niters
),
6547 niters
, step_expr
)));
6549 ni
= fold_build2 (PLUS_EXPR
, TREE_TYPE (init_expr
),
6550 fold_build2 (MULT_EXPR
, TREE_TYPE (init_expr
),
6551 fold_convert (TREE_TYPE (init_expr
),
6558 var
= create_tmp_var (TREE_TYPE (init_expr
), "tmp");
6559 add_referenced_var (var
);
6561 last_bsi
= bsi_last (exit_bb
);
6562 ni_name
= force_gimple_operand_bsi (&last_bsi
, ni
, false, var
,
6563 true, BSI_SAME_STMT
);
6565 /* Fix phi expressions in the successor bb. */
6566 SET_PHI_ARG_DEF (phi1
, update_e
->dest_idx
, ni_name
);
6570 /* Return the more conservative threshold between the
6571 min_profitable_iters returned by the cost model and the user
6572 specified threshold, if provided. */
6575 conservative_cost_threshold (loop_vec_info loop_vinfo
,
6576 int min_profitable_iters
)
6579 int min_scalar_loop_bound
;
6581 min_scalar_loop_bound
= ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND
)
6582 * LOOP_VINFO_VECT_FACTOR (loop_vinfo
)) - 1);
6584 /* Use the cost model only if it is more conservative than user specified
6586 th
= (unsigned) min_scalar_loop_bound
;
6587 if (min_profitable_iters
6588 && (!min_scalar_loop_bound
6589 || min_profitable_iters
> min_scalar_loop_bound
))
6590 th
= (unsigned) min_profitable_iters
;
6592 if (th
&& vect_print_dump_info (REPORT_COST
))
6593 fprintf (vect_dump
, "Vectorization may not be profitable.");
6598 /* Function vect_do_peeling_for_loop_bound
6600 Peel the last iterations of the loop represented by LOOP_VINFO.
6601 The peeled iterations form a new epilog loop. Given that the loop now
6602 iterates NITERS times, the new epilog loop iterates
6603 NITERS % VECTORIZATION_FACTOR times.
6605 The original loop will later be made to iterate
6606 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
6609 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo
, tree
*ratio
)
6611 tree ni_name
, ratio_mult_vf_name
;
6612 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6613 struct loop
*new_loop
;
6615 basic_block preheader
;
6617 bool check_profitability
= false;
6618 unsigned int th
= 0;
6619 int min_profitable_iters
;
6621 if (vect_print_dump_info (REPORT_DETAILS
))
6622 fprintf (vect_dump
, "=== vect_do_peeling_for_loop_bound ===");
6624 initialize_original_copy_tables ();
6626 /* Generate the following variables on the preheader of original loop:
6628 ni_name = number of iteration the original loop executes
6629 ratio = ni_name / vf
6630 ratio_mult_vf_name = ratio * vf */
6631 vect_generate_tmps_on_preheader (loop_vinfo
, &ni_name
,
6632 &ratio_mult_vf_name
, ratio
);
6634 loop_num
= loop
->num
;
6636 /* If cost model check not done during versioning and
6637 peeling for alignment. */
6638 if (!VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
6639 && !VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
))
6640 && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
6642 check_profitability
= true;
6644 /* Get profitability threshold for vectorized loop. */
6645 min_profitable_iters
= LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
);
6647 th
= conservative_cost_threshold (loop_vinfo
,
6648 min_profitable_iters
);
6651 new_loop
= slpeel_tree_peel_loop_to_edge (loop
, single_exit (loop
),
6652 ratio_mult_vf_name
, ni_name
, false,
6653 th
, check_profitability
);
6654 gcc_assert (new_loop
);
6655 gcc_assert (loop_num
== loop
->num
);
6656 #ifdef ENABLE_CHECKING
6657 slpeel_verify_cfg_after_peeling (loop
, new_loop
);
6660 /* A guard that controls whether the new_loop is to be executed or skipped
6661 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
6662 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
6663 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
6664 is on the path where the LOOP IVs are used and need to be updated. */
6666 preheader
= loop_preheader_edge (new_loop
)->src
;
6667 if (EDGE_PRED (preheader
, 0)->src
== single_exit (loop
)->dest
)
6668 update_e
= EDGE_PRED (preheader
, 0);
6670 update_e
= EDGE_PRED (preheader
, 1);
6672 /* Update IVs of original loop as if they were advanced
6673 by ratio_mult_vf_name steps. */
6674 vect_update_ivs_after_vectorizer (loop_vinfo
, ratio_mult_vf_name
, update_e
);
6676 /* After peeling we have to reset scalar evolution analyzer. */
6679 free_original_copy_tables ();
6683 /* Function vect_gen_niters_for_prolog_loop
6685 Set the number of iterations for the loop represented by LOOP_VINFO
6686 to the minimum between LOOP_NITERS (the original iteration count of the loop)
6687 and the misalignment of DR - the data reference recorded in
6688 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
6689 this loop, the data reference DR will refer to an aligned location.
6691 The following computation is generated:
6693 If the misalignment of DR is known at compile time:
6694 addr_mis = int mis = DR_MISALIGNMENT (dr);
6695 Else, compute address misalignment in bytes:
6696 addr_mis = addr & (vectype_size - 1)
6698 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
6700 (elem_size = element type size; an element is the scalar element
6701 whose type is the inner type of the vectype)
6705 prolog_niters = min ( LOOP_NITERS ,
6706 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
6707 where group_size is the size of the interleaved group.
6709 The above formulas assume that VF == number of elements in the vector. This
6710 may not hold when there are multiple-types in the loop.
6711 In this case, for some data-references in the loop the VF does not represent
6712 the number of elements that fit in the vector. Therefore, instead of VF we
6713 use TYPE_VECTOR_SUBPARTS. */
6716 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo
, tree loop_niters
)
6718 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
6719 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6721 tree iters
, iters_name
;
6724 tree dr_stmt
= DR_STMT (dr
);
6725 stmt_vec_info stmt_info
= vinfo_for_stmt (dr_stmt
);
6726 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6727 int vectype_align
= TYPE_ALIGN (vectype
) / BITS_PER_UNIT
;
6728 tree niters_type
= TREE_TYPE (loop_niters
);
6730 int element_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
6731 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
6733 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
6735 /* For interleaved access element size must be multiplied by the size of
6736 the interleaved group. */
6737 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (
6738 DR_GROUP_FIRST_DR (stmt_info
)));
6739 element_size
*= group_size
;
6742 pe
= loop_preheader_edge (loop
);
6744 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) > 0)
6746 int byte_misalign
= LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
);
6747 int elem_misalign
= byte_misalign
/ element_size
;
6749 if (vect_print_dump_info (REPORT_DETAILS
))
6750 fprintf (vect_dump
, "known alignment = %d.", byte_misalign
);
6751 iters
= build_int_cst (niters_type
,
6752 (nelements
- elem_misalign
)&(nelements
/group_size
-1));
6756 tree new_stmts
= NULL_TREE
;
6757 tree start_addr
= vect_create_addr_base_for_vector_ref (dr_stmt
,
6758 &new_stmts
, NULL_TREE
, loop
);
6759 tree ptr_type
= TREE_TYPE (start_addr
);
6760 tree size
= TYPE_SIZE (ptr_type
);
6761 tree type
= lang_hooks
.types
.type_for_size (tree_low_cst (size
, 1), 1);
6762 tree vectype_size_minus_1
= build_int_cst (type
, vectype_align
- 1);
6763 tree elem_size_log
=
6764 build_int_cst (type
, exact_log2 (vectype_align
/nelements
));
6765 tree nelements_minus_1
= build_int_cst (type
, nelements
- 1);
6766 tree nelements_tree
= build_int_cst (type
, nelements
);
6770 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmts
);
6771 gcc_assert (!new_bb
);
6773 /* Create: byte_misalign = addr & (vectype_size - 1) */
6775 fold_build2 (BIT_AND_EXPR
, type
, fold_convert (type
, start_addr
), vectype_size_minus_1
);
6777 /* Create: elem_misalign = byte_misalign / element_size */
6779 fold_build2 (RSHIFT_EXPR
, type
, byte_misalign
, elem_size_log
);
6781 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
6782 iters
= fold_build2 (MINUS_EXPR
, type
, nelements_tree
, elem_misalign
);
6783 iters
= fold_build2 (BIT_AND_EXPR
, type
, iters
, nelements_minus_1
);
6784 iters
= fold_convert (niters_type
, iters
);
6787 /* Create: prolog_loop_niters = min (iters, loop_niters) */
6788 /* If the loop bound is known at compile time we already verified that it is
6789 greater than vf; since the misalignment ('iters') is at most vf, there's
6790 no need to generate the MIN_EXPR in this case. */
6791 if (TREE_CODE (loop_niters
) != INTEGER_CST
)
6792 iters
= fold_build2 (MIN_EXPR
, niters_type
, iters
, loop_niters
);
6794 if (vect_print_dump_info (REPORT_DETAILS
))
6796 fprintf (vect_dump
, "niters for prolog loop: ");
6797 print_generic_expr (vect_dump
, iters
, TDF_SLIM
);
6800 var
= create_tmp_var (niters_type
, "prolog_loop_niters");
6801 add_referenced_var (var
);
6802 iters_name
= force_gimple_operand (iters
, &stmt
, false, var
);
6804 /* Insert stmt on loop preheader edge. */
6807 basic_block new_bb
= bsi_insert_on_edge_immediate (pe
, stmt
);
6808 gcc_assert (!new_bb
);
6815 /* Function vect_update_init_of_dr
6817 NITERS iterations were peeled from LOOP. DR represents a data reference
6818 in LOOP. This function updates the information recorded in DR to
6819 account for the fact that the first NITERS iterations had already been
6820 executed. Specifically, it updates the OFFSET field of DR. */
6823 vect_update_init_of_dr (struct data_reference
*dr
, tree niters
)
6825 tree offset
= DR_OFFSET (dr
);
6827 niters
= fold_build2 (MULT_EXPR
, TREE_TYPE (niters
), niters
, DR_STEP (dr
));
6828 offset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
, niters
);
6829 DR_OFFSET (dr
) = offset
;
6833 /* Function vect_update_inits_of_drs
6835 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
6836 This function updates the information recorded for the data references in
6837 the loop to account for the fact that the first NITERS iterations had
6838 already been executed. Specifically, it updates the initial_condition of
6839 the access_function of all the data_references in the loop. */
6842 vect_update_inits_of_drs (loop_vec_info loop_vinfo
, tree niters
)
6845 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
6846 struct data_reference
*dr
;
6848 if (vect_print_dump_info (REPORT_DETAILS
))
6849 fprintf (vect_dump
, "=== vect_update_inits_of_dr ===");
6851 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
6852 vect_update_init_of_dr (dr
, niters
);
6856 /* Function vect_do_peeling_for_alignment
6858 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
6859 'niters' is set to the misalignment of one of the data references in the
6860 loop, thereby forcing it to refer to an aligned location at the beginning
6861 of the execution of this loop. The data reference for which we are
6862 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
6865 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo
)
6867 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6868 tree niters_of_prolog_loop
, ni_name
;
6870 struct loop
*new_loop
;
6871 bool check_profitability
= false;
6872 unsigned int th
= 0;
6873 int min_profitable_iters
;
6875 if (vect_print_dump_info (REPORT_DETAILS
))
6876 fprintf (vect_dump
, "=== vect_do_peeling_for_alignment ===");
6878 initialize_original_copy_tables ();
6880 ni_name
= vect_build_loop_niters (loop_vinfo
);
6881 niters_of_prolog_loop
= vect_gen_niters_for_prolog_loop (loop_vinfo
, ni_name
);
6884 /* If cost model check not done during versioning. */
6885 if (!VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
6886 && !VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
6888 check_profitability
= true;
6890 /* Get profitability threshold for vectorized loop. */
6891 min_profitable_iters
= LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
);
6893 th
= conservative_cost_threshold (loop_vinfo
,
6894 min_profitable_iters
);
6897 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
6899 slpeel_tree_peel_loop_to_edge (loop
, loop_preheader_edge (loop
),
6900 niters_of_prolog_loop
, ni_name
, true,
6901 th
, check_profitability
);
6903 gcc_assert (new_loop
);
6904 #ifdef ENABLE_CHECKING
6905 slpeel_verify_cfg_after_peeling (new_loop
, loop
);
6908 /* Update number of times loop executes. */
6909 n_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
6910 LOOP_VINFO_NITERS (loop_vinfo
) = fold_build2 (MINUS_EXPR
,
6911 TREE_TYPE (n_iters
), n_iters
, niters_of_prolog_loop
);
6913 /* Update the init conditions of the access functions of all data refs. */
6914 vect_update_inits_of_drs (loop_vinfo
, niters_of_prolog_loop
);
6916 /* After peeling we have to reset scalar evolution analyzer. */
6919 free_original_copy_tables ();
6923 /* Function vect_create_cond_for_align_checks.
6925 Create a conditional expression that represents the alignment checks for
6926 all of data references (array element references) whose alignment must be
6930 COND_EXPR - input conditional expression. New conditions will be chained
6931 with logical AND operation.
6932 LOOP_VINFO - two fields of the loop information are used.
6933 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
6934 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
6937 COND_EXPR_STMT_LIST - statements needed to construct the conditional
6939 The returned value is the conditional expression to be used in the if
6940 statement that controls which version of the loop gets executed at runtime.
6942 The algorithm makes two assumptions:
6943 1) The number of bytes "n" in a vector is a power of 2.
6944 2) An address "a" is aligned if a%n is zero and that this
6945 test can be done as a&(n-1) == 0. For example, for 16
6946 byte vectors the test is a&0xf == 0. */
6949 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo
,
6951 tree
*cond_expr_stmt_list
)
6953 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6954 VEC(tree
,heap
) *may_misalign_stmts
6955 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
6957 int mask
= LOOP_VINFO_PTR_MASK (loop_vinfo
);
6961 tree int_ptrsize_type
;
6963 tree or_tmp_name
= NULL_TREE
;
6964 tree and_tmp
, and_tmp_name
, and_stmt
;
6966 tree part_cond_expr
;
6968 /* Check that mask is one less than a power of 2, i.e., mask is
6969 all zeros followed by all ones. */
6970 gcc_assert ((mask
!= 0) && ((mask
& (mask
+1)) == 0));
6972 /* CHECKME: what is the best integer or unsigned type to use to hold a
6973 cast from a pointer value? */
6974 psize
= TYPE_SIZE (ptr_type_node
);
6976 = lang_hooks
.types
.type_for_size (tree_low_cst (psize
, 1), 0);
6978 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
6979 of the first vector of the i'th data reference. */
6981 for (i
= 0; VEC_iterate (tree
, may_misalign_stmts
, i
, ref_stmt
); i
++)
6983 tree new_stmt_list
= NULL_TREE
;
6985 tree addr_tmp
, addr_tmp_name
, addr_stmt
;
6986 tree or_tmp
, new_or_tmp_name
, or_stmt
;
6988 /* create: addr_tmp = (int)(address_of_first_vector) */
6989 addr_base
= vect_create_addr_base_for_vector_ref (ref_stmt
,
6990 &new_stmt_list
, NULL_TREE
, loop
);
6992 if (new_stmt_list
!= NULL_TREE
)
6993 append_to_statement_list_force (new_stmt_list
, cond_expr_stmt_list
);
6995 sprintf (tmp_name
, "%s%d", "addr2int", i
);
6996 addr_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
6997 add_referenced_var (addr_tmp
);
6998 addr_tmp_name
= make_ssa_name (addr_tmp
, NULL_TREE
);
6999 addr_stmt
= fold_convert (int_ptrsize_type
, addr_base
);
7000 addr_stmt
= build_gimple_modify_stmt (addr_tmp_name
, addr_stmt
);
7001 SSA_NAME_DEF_STMT (addr_tmp_name
) = addr_stmt
;
7002 append_to_statement_list_force (addr_stmt
, cond_expr_stmt_list
);
7004 /* The addresses are OR together. */
7006 if (or_tmp_name
!= NULL_TREE
)
7008 /* create: or_tmp = or_tmp | addr_tmp */
7009 sprintf (tmp_name
, "%s%d", "orptrs", i
);
7010 or_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
7011 add_referenced_var (or_tmp
);
7012 new_or_tmp_name
= make_ssa_name (or_tmp
, NULL_TREE
);
7013 tmp
= build2 (BIT_IOR_EXPR
, int_ptrsize_type
,
7014 or_tmp_name
, addr_tmp_name
);
7015 or_stmt
= build_gimple_modify_stmt (new_or_tmp_name
, tmp
);
7016 SSA_NAME_DEF_STMT (new_or_tmp_name
) = or_stmt
;
7017 append_to_statement_list_force (or_stmt
, cond_expr_stmt_list
);
7018 or_tmp_name
= new_or_tmp_name
;
7021 or_tmp_name
= addr_tmp_name
;
7025 mask_cst
= build_int_cst (int_ptrsize_type
, mask
);
7027 /* create: and_tmp = or_tmp & mask */
7028 and_tmp
= create_tmp_var (int_ptrsize_type
, "andmask" );
7029 add_referenced_var (and_tmp
);
7030 and_tmp_name
= make_ssa_name (and_tmp
, NULL_TREE
);
7032 tmp
= build2 (BIT_AND_EXPR
, int_ptrsize_type
, or_tmp_name
, mask_cst
);
7033 and_stmt
= build_gimple_modify_stmt (and_tmp_name
, tmp
);
7034 SSA_NAME_DEF_STMT (and_tmp_name
) = and_stmt
;
7035 append_to_statement_list_force (and_stmt
, cond_expr_stmt_list
);
7037 /* Make and_tmp the left operand of the conditional test against zero.
7038 if and_tmp has a nonzero bit then some address is unaligned. */
7039 ptrsize_zero
= build_int_cst (int_ptrsize_type
, 0);
7040 part_cond_expr
= fold_build2 (EQ_EXPR
, boolean_type_node
,
7041 and_tmp_name
, ptrsize_zero
);
7043 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
7044 *cond_expr
, part_cond_expr
);
7046 *cond_expr
= part_cond_expr
;
7049 /* Function vect_vfa_segment_size.
7051 Create an expression that computes the size of segment
7052 that will be accessed for a data reference. The functions takes into
7053 account that realignment loads may access one more vector.
7056 DR: The data reference.
7057 VECT_FACTOR: vectorization factor.
7059 Return an expression whose value is the size of segment which will be
7063 vect_vfa_segment_size (struct data_reference
*dr
, tree vect_factor
)
7065 tree segment_length
= fold_build2 (MULT_EXPR
, integer_type_node
,
7066 DR_STEP (dr
), vect_factor
);
7068 if (vect_supportable_dr_alignment (dr
) == dr_explicit_realign_optimized
)
7070 tree vector_size
= TYPE_SIZE_UNIT
7071 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr
))));
7073 segment_length
= fold_build2 (PLUS_EXPR
, integer_type_node
,
7074 segment_length
, vector_size
);
7076 return fold_convert (sizetype
, segment_length
);
7079 /* Function vect_create_cond_for_alias_checks.
7081 Create a conditional expression that represents the run-time checks for
7082 overlapping of address ranges represented by a list of data references
7083 relations passed as input.
7086 COND_EXPR - input conditional expression. New conditions will be chained
7087 with logical AND operation.
7088 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
7092 COND_EXPR - conditional expression.
7093 COND_EXPR_STMT_LIST - statements needed to construct the conditional
7097 The returned value is the conditional expression to be used in the if
7098 statement that controls which version of the loop gets executed at runtime.
7102 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo
,
7104 tree
* cond_expr_stmt_list
)
7106 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7107 VEC (ddr_p
, heap
) * may_alias_ddrs
=
7108 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
);
7110 build_int_cst (integer_type_node
, LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
7114 tree part_cond_expr
;
7116 /* Create expression
7117 ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
7118 || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
7122 ((store_ptr_n + store_segment_length_n) < load_ptr_n)
7123 || (load_ptr_n + load_segment_length_n) < store_ptr_n)) */
7125 if (VEC_empty (ddr_p
, may_alias_ddrs
))
7128 for (i
= 0; VEC_iterate (ddr_p
, may_alias_ddrs
, i
, ddr
); i
++)
7130 struct data_reference
*dr_a
, *dr_b
;
7131 tree dr_group_first_a
, dr_group_first_b
;
7132 tree addr_base_a
, addr_base_b
;
7133 tree segment_length_a
, segment_length_b
;
7134 tree stmt_a
, stmt_b
;
7137 stmt_a
= DR_STMT (DDR_A (ddr
));
7138 dr_group_first_a
= DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a
));
7139 if (dr_group_first_a
)
7141 stmt_a
= dr_group_first_a
;
7142 dr_a
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a
));
7146 stmt_b
= DR_STMT (DDR_B (ddr
));
7147 dr_group_first_b
= DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b
));
7148 if (dr_group_first_b
)
7150 stmt_b
= dr_group_first_b
;
7151 dr_b
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b
));
7155 vect_create_addr_base_for_vector_ref (stmt_a
, cond_expr_stmt_list
,
7158 vect_create_addr_base_for_vector_ref (stmt_b
, cond_expr_stmt_list
,
7161 segment_length_a
= vect_vfa_segment_size (dr_a
, vect_factor
);
7162 segment_length_b
= vect_vfa_segment_size (dr_b
, vect_factor
);
7164 if (vect_print_dump_info (REPORT_DR_DETAILS
))
7167 "create runtime check for data references ");
7168 print_generic_expr (vect_dump
, DR_REF (dr_a
), TDF_SLIM
);
7169 fprintf (vect_dump
, " and ");
7170 print_generic_expr (vect_dump
, DR_REF (dr_b
), TDF_SLIM
);
7175 fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
7176 fold_build2 (LT_EXPR
, boolean_type_node
,
7177 fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (addr_base_a
),
7181 fold_build2 (LT_EXPR
, boolean_type_node
,
7182 fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (addr_base_b
),
7188 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
7189 *cond_expr
, part_cond_expr
);
7191 *cond_expr
= part_cond_expr
;
7193 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS
))
7194 fprintf (vect_dump
, "created %u versioning for alias checks.\n",
7195 VEC_length (ddr_p
, may_alias_ddrs
));
7199 /* Function vect_loop_versioning.
7201 If the loop has data references that may or may not be aligned or/and
7202 has data reference relations whose independence was not proven then
7203 two versions of the loop need to be generated, one which is vectorized
7204 and one which isn't. A test is then generated to control which of the
7205 loops is executed. The test checks for the alignment of all of the
7206 data references that may or may not be aligned. An additional
7207 sequence of runtime tests is generated for each pairs of DDRs whose
7208 independence was not proven. The vectorized version of loop is
7209 executed only if both alias and alignment tests are passed.
7211 The test generated to check which version of loop is executed
7212 is modified to also check for profitability as indicated by the
7213 cost model initially. */
7216 vect_loop_versioning (loop_vec_info loop_vinfo
)
7218 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7220 tree cond_expr
= NULL_TREE
;
7221 tree cond_expr_stmt_list
= NULL_TREE
;
7222 basic_block condition_bb
;
7223 block_stmt_iterator cond_exp_bsi
;
7224 basic_block merge_bb
;
7225 basic_block new_exit_bb
;
7227 tree orig_phi
, new_phi
, arg
;
7228 unsigned prob
= 4 * REG_BR_PROB_BASE
/ 5;
7229 tree gimplify_stmt_list
;
7230 tree scalar_loop_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
7231 int min_profitable_iters
= 0;
7234 /* Get profitability threshold for vectorized loop. */
7235 min_profitable_iters
= LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
);
7237 th
= conservative_cost_threshold (loop_vinfo
,
7238 min_profitable_iters
);
7241 build2 (GT_EXPR
, boolean_type_node
, scalar_loop_iters
,
7242 build_int_cst (TREE_TYPE (scalar_loop_iters
), th
));
7244 cond_expr
= force_gimple_operand (cond_expr
, &cond_expr_stmt_list
,
7247 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)))
7248 vect_create_cond_for_align_checks (loop_vinfo
, &cond_expr
,
7249 &cond_expr_stmt_list
);
7251 if (VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
7252 vect_create_cond_for_alias_checks (loop_vinfo
, &cond_expr
,
7253 &cond_expr_stmt_list
);
7256 fold_build2 (NE_EXPR
, boolean_type_node
, cond_expr
, integer_zero_node
);
7258 force_gimple_operand (cond_expr
, &gimplify_stmt_list
, true,
7260 append_to_statement_list (gimplify_stmt_list
, &cond_expr_stmt_list
);
7262 initialize_original_copy_tables ();
7263 nloop
= loop_version (loop
, cond_expr
, &condition_bb
,
7264 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
7265 free_original_copy_tables();
7267 /* Loop versioning violates an assumption we try to maintain during
7268 vectorization - that the loop exit block has a single predecessor.
7269 After versioning, the exit block of both loop versions is the same
7270 basic block (i.e. it has two predecessors). Just in order to simplify
7271 following transformations in the vectorizer, we fix this situation
7272 here by adding a new (empty) block on the exit-edge of the loop,
7273 with the proper loop-exit phis to maintain loop-closed-form. */
7275 merge_bb
= single_exit (loop
)->dest
;
7276 gcc_assert (EDGE_COUNT (merge_bb
->preds
) == 2);
7277 new_exit_bb
= split_edge (single_exit (loop
));
7278 new_exit_e
= single_exit (loop
);
7279 e
= EDGE_SUCC (new_exit_bb
, 0);
7281 for (orig_phi
= phi_nodes (merge_bb
); orig_phi
;
7282 orig_phi
= PHI_CHAIN (orig_phi
))
7284 new_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi
)),
7286 arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, e
);
7287 add_phi_arg (new_phi
, arg
, new_exit_e
);
7288 SET_PHI_ARG_DEF (orig_phi
, e
->dest_idx
, PHI_RESULT (new_phi
));
7291 /* End loop-exit-fixes after versioning. */
7293 update_ssa (TODO_update_ssa
);
7294 if (cond_expr_stmt_list
)
7296 cond_exp_bsi
= bsi_last (condition_bb
);
7297 bsi_insert_before (&cond_exp_bsi
, cond_expr_stmt_list
, BSI_SAME_STMT
);
7301 /* Remove a group of stores (for SLP or interleaving), free their
7305 vect_remove_stores (tree first_stmt
)
7307 tree next
= first_stmt
;
7309 block_stmt_iterator next_si
;
7313 /* Free the attached stmt_vec_info and remove the stmt. */
7314 next_si
= bsi_for_stmt (next
);
7315 bsi_remove (&next_si
, true);
7316 tmp
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
7317 free_stmt_vec_info (next
);
7323 /* Vectorize SLP instance tree in postorder. */
7326 vect_schedule_slp_instance (slp_tree node
, unsigned int vec_stmts_size
)
7329 bool strided_store
, is_store
;
7330 block_stmt_iterator si
;
7331 stmt_vec_info stmt_info
;
7336 vect_schedule_slp_instance (SLP_TREE_LEFT (node
), vec_stmts_size
);
7337 vect_schedule_slp_instance (SLP_TREE_RIGHT (node
), vec_stmts_size
);
7339 stmt
= VEC_index(tree
, SLP_TREE_SCALAR_STMTS (node
), 0);
7340 stmt_info
= vinfo_for_stmt (stmt
);
7341 SLP_TREE_VEC_STMTS (node
) = VEC_alloc (tree
, heap
, vec_stmts_size
);
7342 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
7344 if (vect_print_dump_info (REPORT_DETAILS
))
7346 fprintf (vect_dump
, "------>vectorizing SLP node starting from: ");
7347 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
7350 si
= bsi_for_stmt (stmt
);
7351 is_store
= vect_transform_stmt (stmt
, &si
, &strided_store
, node
);
7354 if (DR_GROUP_FIRST_DR (stmt_info
))
7355 /* If IS_STORE is TRUE, the vectorization of the
7356 interleaving chain was completed - free all the stores in
7358 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info
));
7360 /* FORNOW: SLP originates only from strided stores. */
7366 /* FORNOW: SLP originates only from strided stores. */
7372 vect_schedule_slp (loop_vec_info loop_vinfo
, unsigned int nunits
)
7374 VEC (slp_instance
, heap
) *slp_instances
=
7375 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
7376 slp_instance instance
;
7377 unsigned int vec_stmts_size
;
7378 unsigned int group_size
, i
;
7379 unsigned int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7380 bool is_store
= false;
7382 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
7384 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
7385 /* For each SLP instance calculate number of vector stmts to be created
7386 for the scalar stmts in each node of the SLP tree. Number of vector
7387 elements in one vector iteration is the number of scalar elements in
7388 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
7390 vec_stmts_size
= vectorization_factor
* group_size
/ nunits
;
7392 /* Schedule the tree of INSTANCE. */
7393 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
7396 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS
)
7397 || vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
7398 fprintf (vect_dump
, "vectorizing stmts using SLP.");
7404 /* Function vect_transform_loop.
7406 The analysis phase has determined that the loop is vectorizable.
7407 Vectorize the loop - created vectorized stmts to replace the scalar
7408 stmts in the loop, and update the loop exit condition. */
7411 vect_transform_loop (loop_vec_info loop_vinfo
)
7413 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7414 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
7415 int nbbs
= loop
->num_nodes
;
7416 block_stmt_iterator si
;
7419 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7421 bool slp_scheduled
= false;
7422 unsigned int nunits
;
7424 if (vect_print_dump_info (REPORT_DETAILS
))
7425 fprintf (vect_dump
, "=== vec_transform_loop ===");
7427 if (VEC_length (tree
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
7428 || VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)))
7429 vect_loop_versioning (loop_vinfo
);
7431 /* CHECKME: we wouldn't need this if we called update_ssa once
7433 bitmap_zero (vect_memsyms_to_rename
);
7435 /* Peel the loop if there are data refs with unknown alignment.
7436 Only one data ref with unknown store is allowed. */
7438 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
7439 vect_do_peeling_for_alignment (loop_vinfo
);
7441 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
7442 compile time constant), or it is a constant that doesn't divide by the
7443 vectorization factor, then an epilog loop needs to be created.
7444 We therefore duplicate the loop: the original loop will be vectorized,
7445 and will compute the first (n/VF) iterations. The second copy of the loop
7446 will remain scalar and will compute the remaining (n%VF) iterations.
7447 (VF is the vectorization factor). */
7449 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
7450 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
7451 && LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0))
7452 vect_do_peeling_for_loop_bound (loop_vinfo
, &ratio
);
7454 ratio
= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
)),
7455 LOOP_VINFO_INT_NITERS (loop_vinfo
) / vectorization_factor
);
7457 /* 1) Make sure the loop header has exactly two entries
7458 2) Make sure we have a preheader basic block. */
7460 gcc_assert (EDGE_COUNT (loop
->header
->preds
) == 2);
7462 split_edge (loop_preheader_edge (loop
));
7464 /* FORNOW: the vectorizer supports only loops which body consist
7465 of one basic block (header + empty latch). When the vectorizer will
7466 support more involved loop forms, the order by which the BBs are
7467 traversed need to be reconsidered. */
7469 for (i
= 0; i
< nbbs
; i
++)
7471 basic_block bb
= bbs
[i
];
7472 stmt_vec_info stmt_info
;
7475 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
7477 if (vect_print_dump_info (REPORT_DETAILS
))
7479 fprintf (vect_dump
, "------>vectorizing phi: ");
7480 print_generic_expr (vect_dump
, phi
, TDF_SLIM
);
7482 stmt_info
= vinfo_for_stmt (phi
);
7486 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
7487 && !STMT_VINFO_LIVE_P (stmt_info
))
7490 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
))
7491 != (unsigned HOST_WIDE_INT
) vectorization_factor
)
7492 && vect_print_dump_info (REPORT_DETAILS
))
7493 fprintf (vect_dump
, "multiple-types.");
7495 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
7497 if (vect_print_dump_info (REPORT_DETAILS
))
7498 fprintf (vect_dump
, "transform phi.");
7499 vect_transform_stmt (phi
, NULL
, NULL
, NULL
);
7503 for (si
= bsi_start (bb
); !bsi_end_p (si
);)
7505 tree stmt
= bsi_stmt (si
);
7508 if (vect_print_dump_info (REPORT_DETAILS
))
7510 fprintf (vect_dump
, "------>vectorizing statement: ");
7511 print_generic_expr (vect_dump
, stmt
, TDF_SLIM
);
7514 stmt_info
= vinfo_for_stmt (stmt
);
7516 /* vector stmts created in the outer-loop during vectorization of
7517 stmts in an inner-loop may not have a stmt_info, and do not
7518 need to be vectorized. */
7525 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
7526 && !STMT_VINFO_LIVE_P (stmt_info
))
7532 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
7534 (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
7535 if (!STMT_SLP_TYPE (stmt_info
)
7536 && nunits
!= (unsigned int) vectorization_factor
7537 && vect_print_dump_info (REPORT_DETAILS
))
7538 /* For SLP VF is set according to unrolling factor, and not to
7539 vector size, hence for SLP this print is not valid. */
7540 fprintf (vect_dump
, "multiple-types.");
7542 /* SLP. Schedule all the SLP instances when the first SLP stmt is
7544 if (STMT_SLP_TYPE (stmt_info
))
7548 slp_scheduled
= true;
7550 if (vect_print_dump_info (REPORT_DETAILS
))
7551 fprintf (vect_dump
, "=== scheduling SLP instances ===");
7553 is_store
= vect_schedule_slp (loop_vinfo
, nunits
);
7555 /* IS_STORE is true if STMT is a store. Stores cannot be of
7556 hybrid SLP type. They are removed in
7557 vect_schedule_slp_instance and their vinfo is destroyed. */
7565 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
7566 if (PURE_SLP_STMT (stmt_info
))
7573 /* -------- vectorize statement ------------ */
7574 if (vect_print_dump_info (REPORT_DETAILS
))
7575 fprintf (vect_dump
, "transform statement.");
7577 strided_store
= false;
7578 is_store
= vect_transform_stmt (stmt
, &si
, &strided_store
, NULL
);
7581 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
7583 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
7584 interleaving chain was completed - free all the stores in
7586 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info
));
7587 bsi_remove (&si
, true);
7592 /* Free the attached stmt_vec_info and remove the stmt. */
7593 free_stmt_vec_info (stmt
);
7594 bsi_remove (&si
, true);
7602 slpeel_make_loop_iterate_ntimes (loop
, ratio
);
7604 mark_set_for_renaming (vect_memsyms_to_rename
);
7606 /* The memory tags and pointers in vectorized statements need to
7607 have their SSA forms updated. FIXME, why can't this be delayed
7608 until all the loops have been transformed? */
7609 update_ssa (TODO_update_ssa
);
7611 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS
))
7612 fprintf (vect_dump
, "LOOP VECTORIZED.");
7613 if (loop
->inner
&& vect_print_dump_info (REPORT_VECTORIZED_LOOPS
))
7614 fprintf (vect_dump
, "OUTER LOOP VECTORIZED.");