1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type
{
63 /* Return the vectorized type for the given statement. */
66 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
68 return STMT_VINFO_VECTYPE (stmt_info
);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
74 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
76 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
77 basic_block bb
= gimple_bb (stmt
);
78 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
84 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
86 return (bb
->loop_father
== loop
->inner
);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
94 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
95 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
96 int misalign
, enum vect_cost_model_location where
)
100 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
101 stmt_info_for_cost si
= { count
, kind
,
102 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
104 body_cost_vec
->safe_push (si
);
106 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
109 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
110 count
, kind
, stmt_info
, misalign
, where
);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
116 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
118 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
128 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
129 tree array
, unsigned HOST_WIDE_INT n
)
131 tree vect_type
, vect
, vect_name
, array_ref
;
134 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
135 vect_type
= TREE_TYPE (TREE_TYPE (array
));
136 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
137 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
138 build_int_cst (size_type_node
, n
),
139 NULL_TREE
, NULL_TREE
);
141 new_stmt
= gimple_build_assign (vect
, array_ref
);
142 vect_name
= make_ssa_name (vect
, new_stmt
);
143 gimple_assign_set_lhs (new_stmt
, vect_name
);
144 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
154 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
155 tree array
, unsigned HOST_WIDE_INT n
)
160 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
161 build_int_cst (size_type_node
, n
),
162 NULL_TREE
, NULL_TREE
);
164 new_stmt
= gimple_build_assign (array_ref
, vect
);
165 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
173 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
175 tree mem_ref
, alias_ptr_type
;
177 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
178 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
184 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
186 /* Function vect_mark_relevant.
188 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
191 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
192 enum vect_relevant relevant
, bool live_p
)
194 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
195 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
196 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
197 gimple
*pattern_stmt
;
199 if (dump_enabled_p ())
201 dump_printf_loc (MSG_NOTE
, vect_location
,
202 "mark relevant %d, live %d: ", relevant
, live_p
);
203 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
206 /* If this stmt is an original stmt in a pattern, we might need to mark its
207 related pattern stmt instead of the original stmt. However, such stmts
208 may have their own uses that are not in any pattern, in such cases the
209 stmt itself should be marked. */
210 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
212 /* This is the last stmt in a sequence that was detected as a
213 pattern that can potentially be vectorized. Don't mark the stmt
214 as relevant/live because it's not going to be vectorized.
215 Instead mark the pattern-stmt that replaces it. */
217 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
219 if (dump_enabled_p ())
220 dump_printf_loc (MSG_NOTE
, vect_location
,
221 "last stmt in pattern. don't mark"
222 " relevant/live.\n");
223 stmt_info
= vinfo_for_stmt (pattern_stmt
);
224 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
225 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
226 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
230 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
231 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
232 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
234 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
235 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
237 if (dump_enabled_p ())
238 dump_printf_loc (MSG_NOTE
, vect_location
,
239 "already marked relevant/live.\n");
243 worklist
->safe_push (stmt
);
247 /* Function is_simple_and_all_uses_invariant
249 Return true if STMT is simple and all uses of it are invariant. */
252 is_simple_and_all_uses_invariant (gimple
*stmt
, loop_vec_info loop_vinfo
)
258 if (!is_gimple_assign (stmt
))
261 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
263 enum vect_def_type dt
= vect_uninitialized_def
;
265 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
))
267 if (dump_enabled_p ())
268 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
269 "use not simple.\n");
273 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
279 /* Function vect_stmt_relevant_p.
281 Return true if STMT in loop that is represented by LOOP_VINFO is
282 "relevant for vectorization".
284 A stmt is considered "relevant for vectorization" if:
285 - it has uses outside the loop.
286 - it has vdefs (it alters memory).
287 - control stmts in the loop (except for the exit condition).
289 CHECKME: what other side effects would the vectorizer allow? */
292 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
293 enum vect_relevant
*relevant
, bool *live_p
)
295 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
297 imm_use_iterator imm_iter
;
301 *relevant
= vect_unused_in_scope
;
304 /* cond stmt other than loop exit cond. */
305 if (is_ctrl_stmt (stmt
)
306 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
307 != loop_exit_ctrl_vec_info_type
)
308 *relevant
= vect_used_in_scope
;
310 /* changing memory. */
311 if (gimple_code (stmt
) != GIMPLE_PHI
)
312 if (gimple_vdef (stmt
)
313 && !gimple_clobber_p (stmt
))
315 if (dump_enabled_p ())
316 dump_printf_loc (MSG_NOTE
, vect_location
,
317 "vec_stmt_relevant_p: stmt has vdefs.\n");
318 *relevant
= vect_used_in_scope
;
321 /* uses outside the loop. */
322 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
324 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
326 basic_block bb
= gimple_bb (USE_STMT (use_p
));
327 if (!flow_bb_inside_loop_p (loop
, bb
))
329 if (dump_enabled_p ())
330 dump_printf_loc (MSG_NOTE
, vect_location
,
331 "vec_stmt_relevant_p: used out of loop.\n");
333 if (is_gimple_debug (USE_STMT (use_p
)))
336 /* We expect all such uses to be in the loop exit phis
337 (because of loop closed form) */
338 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
339 gcc_assert (bb
== single_exit (loop
)->dest
);
346 if (*live_p
&& *relevant
== vect_unused_in_scope
347 && !is_simple_and_all_uses_invariant (stmt
, loop_vinfo
))
349 if (dump_enabled_p ())
350 dump_printf_loc (MSG_NOTE
, vect_location
,
351 "vec_stmt_relevant_p: stmt live but not relevant.\n");
352 *relevant
= vect_used_only_live
;
355 return (*live_p
|| *relevant
);
359 /* Function exist_non_indexing_operands_for_use_p
361 USE is one of the uses attached to STMT. Check if USE is
362 used in STMT for anything other than indexing an array. */
365 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
368 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
370 /* USE corresponds to some operand in STMT. If there is no data
371 reference in STMT, then any operand that corresponds to USE
372 is not indexing an array. */
373 if (!STMT_VINFO_DATA_REF (stmt_info
))
376 /* STMT has a data_ref. FORNOW this means that its of one of
380 (This should have been verified in analyze_data_refs).
382 'var' in the second case corresponds to a def, not a use,
383 so USE cannot correspond to any operands that are not used
386 Therefore, all we need to check is if STMT falls into the
387 first case, and whether var corresponds to USE. */
389 if (!gimple_assign_copy_p (stmt
))
391 if (is_gimple_call (stmt
)
392 && gimple_call_internal_p (stmt
))
393 switch (gimple_call_internal_fn (stmt
))
396 operand
= gimple_call_arg (stmt
, 3);
401 operand
= gimple_call_arg (stmt
, 2);
411 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
413 operand
= gimple_assign_rhs1 (stmt
);
414 if (TREE_CODE (operand
) != SSA_NAME
)
425 Function process_use.
428 - a USE in STMT in a loop represented by LOOP_VINFO
429 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
430 that defined USE. This is done by calling mark_relevant and passing it
431 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
432 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
436 Generally, LIVE_P and RELEVANT are used to define the liveness and
437 relevance info of the DEF_STMT of this USE:
438 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
439 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
441 - case 1: If USE is used only for address computations (e.g. array indexing),
442 which does not need to be directly vectorized, then the liveness/relevance
443 of the respective DEF_STMT is left unchanged.
444 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
445 skip DEF_STMT cause it had already been processed.
446 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
447 be modified accordingly.
449 Return true if everything is as expected. Return false otherwise. */
452 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
,
453 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
456 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
457 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
458 stmt_vec_info dstmt_vinfo
;
459 basic_block bb
, def_bb
;
461 enum vect_def_type dt
;
463 /* case 1: we are only interested in uses that need to be vectorized. Uses
464 that are used for address computation are not considered relevant. */
465 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
468 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
470 if (dump_enabled_p ())
471 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
472 "not vectorized: unsupported use in stmt.\n");
476 if (!def_stmt
|| gimple_nop_p (def_stmt
))
479 def_bb
= gimple_bb (def_stmt
);
480 if (!flow_bb_inside_loop_p (loop
, def_bb
))
482 if (dump_enabled_p ())
483 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
487 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
488 DEF_STMT must have already been processed, because this should be the
489 only way that STMT, which is a reduction-phi, was put in the worklist,
490 as there should be no other uses for DEF_STMT in the loop. So we just
491 check that everything is as expected, and we are done. */
492 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
493 bb
= gimple_bb (stmt
);
494 if (gimple_code (stmt
) == GIMPLE_PHI
495 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
496 && gimple_code (def_stmt
) != GIMPLE_PHI
497 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
498 && bb
->loop_father
== def_bb
->loop_father
)
500 if (dump_enabled_p ())
501 dump_printf_loc (MSG_NOTE
, vect_location
,
502 "reduc-stmt defining reduc-phi in the same nest.\n");
503 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
504 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
505 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
506 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
507 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
511 /* case 3a: outer-loop stmt defining an inner-loop stmt:
512 outer-loop-header-bb:
518 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
520 if (dump_enabled_p ())
521 dump_printf_loc (MSG_NOTE
, vect_location
,
522 "outer-loop def-stmt defining inner-loop stmt.\n");
526 case vect_unused_in_scope
:
527 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
528 vect_used_in_scope
: vect_unused_in_scope
;
531 case vect_used_in_outer_by_reduction
:
532 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
533 relevant
= vect_used_by_reduction
;
536 case vect_used_in_outer
:
537 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
538 relevant
= vect_used_in_scope
;
541 case vect_used_in_scope
:
549 /* case 3b: inner-loop stmt defining an outer-loop stmt:
550 outer-loop-header-bb:
554 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
556 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
558 if (dump_enabled_p ())
559 dump_printf_loc (MSG_NOTE
, vect_location
,
560 "inner-loop def-stmt defining outer-loop stmt.\n");
564 case vect_unused_in_scope
:
565 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
566 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
567 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
570 case vect_used_by_reduction
:
571 case vect_used_only_live
:
572 relevant
= vect_used_in_outer_by_reduction
;
575 case vect_used_in_scope
:
576 relevant
= vect_used_in_outer
;
584 vect_mark_relevant (worklist
, def_stmt
, relevant
, false);
589 /* Function vect_mark_stmts_to_be_vectorized.
591 Not all stmts in the loop need to be vectorized. For example:
600 Stmt 1 and 3 do not need to be vectorized, because loop control and
601 addressing of vectorized data-refs are handled differently.
603 This pass detects such stmts. */
606 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
608 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
609 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
610 unsigned int nbbs
= loop
->num_nodes
;
611 gimple_stmt_iterator si
;
614 stmt_vec_info stmt_vinfo
;
618 enum vect_relevant relevant
;
620 if (dump_enabled_p ())
621 dump_printf_loc (MSG_NOTE
, vect_location
,
622 "=== vect_mark_stmts_to_be_vectorized ===\n");
624 auto_vec
<gimple
*, 64> worklist
;
626 /* 1. Init worklist. */
627 for (i
= 0; i
< nbbs
; i
++)
630 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
633 if (dump_enabled_p ())
635 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
636 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
639 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
640 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
642 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
644 stmt
= gsi_stmt (si
);
645 if (dump_enabled_p ())
647 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
648 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
651 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
652 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
656 /* 2. Process_worklist */
657 while (worklist
.length () > 0)
662 stmt
= worklist
.pop ();
663 if (dump_enabled_p ())
665 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
666 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
669 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
670 (DEF_STMT) as relevant/irrelevant according to the relevance property
672 stmt_vinfo
= vinfo_for_stmt (stmt
);
673 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
675 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
676 propagated as is to the DEF_STMTs of its USEs.
678 One exception is when STMT has been identified as defining a reduction
679 variable; in this case we set the relevance to vect_used_by_reduction.
680 This is because we distinguish between two kinds of relevant stmts -
681 those that are used by a reduction computation, and those that are
682 (also) used by a regular computation. This allows us later on to
683 identify stmts that are used solely by a reduction, and therefore the
684 order of the results that they produce does not have to be kept. */
686 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
688 case vect_reduction_def
:
689 gcc_assert (relevant
!= vect_unused_in_scope
);
690 if (relevant
!= vect_unused_in_scope
691 && relevant
!= vect_used_in_scope
692 && relevant
!= vect_used_by_reduction
693 && relevant
!= vect_used_only_live
)
695 if (dump_enabled_p ())
696 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
697 "unsupported use of reduction.\n");
702 case vect_nested_cycle
:
703 if (relevant
!= vect_unused_in_scope
704 && relevant
!= vect_used_in_outer_by_reduction
705 && relevant
!= vect_used_in_outer
)
707 if (dump_enabled_p ())
708 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
709 "unsupported use of nested cycle.\n");
715 case vect_double_reduction_def
:
716 if (relevant
!= vect_unused_in_scope
717 && relevant
!= vect_used_by_reduction
718 && relevant
!= vect_used_only_live
)
720 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
722 "unsupported use of double reduction.\n");
732 if (is_pattern_stmt_p (stmt_vinfo
))
734 /* Pattern statements are not inserted into the code, so
735 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
736 have to scan the RHS or function arguments instead. */
737 if (is_gimple_assign (stmt
))
739 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
740 tree op
= gimple_assign_rhs1 (stmt
);
743 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
745 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
746 relevant
, &worklist
, false)
747 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
748 relevant
, &worklist
, false))
752 for (; i
< gimple_num_ops (stmt
); i
++)
754 op
= gimple_op (stmt
, i
);
755 if (TREE_CODE (op
) == SSA_NAME
756 && !process_use (stmt
, op
, loop_vinfo
, relevant
,
761 else if (is_gimple_call (stmt
))
763 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
765 tree arg
= gimple_call_arg (stmt
, i
);
766 if (!process_use (stmt
, arg
, loop_vinfo
, relevant
,
773 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
775 tree op
= USE_FROM_PTR (use_p
);
776 if (!process_use (stmt
, op
, loop_vinfo
, relevant
,
781 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
783 gather_scatter_info gs_info
;
784 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
))
786 if (!process_use (stmt
, gs_info
.offset
, loop_vinfo
, relevant
,
790 } /* while worklist */
796 /* Function vect_model_simple_cost.
798 Models cost for simple operations, i.e. those that only emit ncopies of a
799 single op. Right now, this does not account for multiple insns that could
800 be generated for the single vector op. We will handle that shortly. */
803 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
804 enum vect_def_type
*dt
,
805 stmt_vector_for_cost
*prologue_cost_vec
,
806 stmt_vector_for_cost
*body_cost_vec
)
809 int inside_cost
= 0, prologue_cost
= 0;
811 /* The SLP costs were already calculated during SLP tree build. */
812 if (PURE_SLP_STMT (stmt_info
))
815 /* FORNOW: Assuming maximum 2 args per stmts. */
816 for (i
= 0; i
< 2; i
++)
817 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
818 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
819 stmt_info
, 0, vect_prologue
);
821 /* Pass the inside-of-loop statements to the target-specific cost model. */
822 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
823 stmt_info
, 0, vect_body
);
825 if (dump_enabled_p ())
826 dump_printf_loc (MSG_NOTE
, vect_location
,
827 "vect_model_simple_cost: inside_cost = %d, "
828 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
832 /* Model cost for type demotion and promotion operations. PWR is normally
833 zero for single-step promotions and demotions. It will be one if
834 two-step promotion/demotion is required, and so on. Each additional
835 step doubles the number of instructions required. */
838 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
839 enum vect_def_type
*dt
, int pwr
)
842 int inside_cost
= 0, prologue_cost
= 0;
843 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
844 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
845 void *target_cost_data
;
847 /* The SLP costs were already calculated during SLP tree build. */
848 if (PURE_SLP_STMT (stmt_info
))
852 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
854 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
856 for (i
= 0; i
< pwr
+ 1; i
++)
858 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
860 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
861 vec_promote_demote
, stmt_info
, 0,
865 /* FORNOW: Assuming maximum 2 args per stmts. */
866 for (i
= 0; i
< 2; i
++)
867 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
868 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
869 stmt_info
, 0, vect_prologue
);
871 if (dump_enabled_p ())
872 dump_printf_loc (MSG_NOTE
, vect_location
,
873 "vect_model_promotion_demotion_cost: inside_cost = %d, "
874 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
877 /* Function vect_model_store_cost
879 Models cost for stores. In the case of grouped accesses, one access
880 has the overhead of the grouped access attributed to it. */
883 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
884 vect_memory_access_type memory_access_type
,
885 enum vect_def_type dt
, slp_tree slp_node
,
886 stmt_vector_for_cost
*prologue_cost_vec
,
887 stmt_vector_for_cost
*body_cost_vec
)
889 unsigned int inside_cost
= 0, prologue_cost
= 0;
890 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
891 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
892 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
894 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
895 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
896 stmt_info
, 0, vect_prologue
);
898 /* Grouped stores update all elements in the group at once,
899 so we want the DR for the first statement. */
900 if (!slp_node
&& grouped_access_p
)
902 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
903 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
906 /* True if we should include any once-per-group costs as well as
907 the cost of the statement itself. For SLP we only get called
908 once per group anyhow. */
909 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
911 /* We assume that the cost of a single store-lanes instruction is
912 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
913 access is instead being provided by a permute-and-store operation,
914 include the cost of the permutes. */
916 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
918 /* Uses a high and low interleave or shuffle operations for each
920 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
921 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
922 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
923 stmt_info
, 0, vect_body
);
925 if (dump_enabled_p ())
926 dump_printf_loc (MSG_NOTE
, vect_location
,
927 "vect_model_store_cost: strided group_size = %d .\n",
931 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
932 /* Costs of the stores. */
933 if (memory_access_type
== VMAT_ELEMENTWISE
)
934 /* N scalar stores plus extracting the elements. */
935 inside_cost
+= record_stmt_cost (body_cost_vec
,
936 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
937 scalar_store
, stmt_info
, 0, vect_body
);
939 vect_get_store_cost (dr
, ncopies
, &inside_cost
, body_cost_vec
);
941 if (memory_access_type
== VMAT_ELEMENTWISE
942 || memory_access_type
== VMAT_STRIDED_SLP
)
943 inside_cost
+= record_stmt_cost (body_cost_vec
,
944 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
945 vec_to_scalar
, stmt_info
, 0, vect_body
);
947 if (dump_enabled_p ())
948 dump_printf_loc (MSG_NOTE
, vect_location
,
949 "vect_model_store_cost: inside_cost = %d, "
950 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
954 /* Calculate cost of DR's memory access. */
956 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
957 unsigned int *inside_cost
,
958 stmt_vector_for_cost
*body_cost_vec
)
960 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
961 gimple
*stmt
= DR_STMT (dr
);
962 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
964 switch (alignment_support_scheme
)
968 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
969 vector_store
, stmt_info
, 0,
972 if (dump_enabled_p ())
973 dump_printf_loc (MSG_NOTE
, vect_location
,
974 "vect_model_store_cost: aligned.\n");
978 case dr_unaligned_supported
:
980 /* Here, we assign an additional cost for the unaligned store. */
981 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
982 unaligned_store
, stmt_info
,
983 DR_MISALIGNMENT (dr
), vect_body
);
984 if (dump_enabled_p ())
985 dump_printf_loc (MSG_NOTE
, vect_location
,
986 "vect_model_store_cost: unaligned supported by "
991 case dr_unaligned_unsupported
:
993 *inside_cost
= VECT_MAX_COST
;
995 if (dump_enabled_p ())
996 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
997 "vect_model_store_cost: unsupported access.\n");
1007 /* Function vect_model_load_cost
1009 Models cost for loads. In the case of grouped accesses, one access has
1010 the overhead of the grouped access attributed to it. Since unaligned
1011 accesses are supported for loads, we also account for the costs of the
1012 access scheme chosen. */
1015 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1016 vect_memory_access_type memory_access_type
,
1018 stmt_vector_for_cost
*prologue_cost_vec
,
1019 stmt_vector_for_cost
*body_cost_vec
)
1021 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
1022 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1023 unsigned int inside_cost
= 0, prologue_cost
= 0;
1024 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1026 /* Grouped loads read all elements in the group at once,
1027 so we want the DR for the first statement. */
1028 if (!slp_node
&& grouped_access_p
)
1030 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1031 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1034 /* True if we should include any once-per-group costs as well as
1035 the cost of the statement itself. For SLP we only get called
1036 once per group anyhow. */
1037 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
1039 /* We assume that the cost of a single load-lanes instruction is
1040 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1041 access is instead being provided by a load-and-permute operation,
1042 include the cost of the permutes. */
1044 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1046 /* Uses an even and odd extract operations or shuffle operations
1047 for each needed permute. */
1048 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1049 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1050 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1051 stmt_info
, 0, vect_body
);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE
, vect_location
,
1055 "vect_model_load_cost: strided group_size = %d .\n",
1059 /* The loads themselves. */
1060 if (memory_access_type
== VMAT_ELEMENTWISE
)
1062 /* N scalar loads plus gathering them into a vector. */
1063 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1064 inside_cost
+= record_stmt_cost (body_cost_vec
,
1065 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1066 scalar_load
, stmt_info
, 0, vect_body
);
1069 vect_get_load_cost (dr
, ncopies
, first_stmt_p
,
1070 &inside_cost
, &prologue_cost
,
1071 prologue_cost_vec
, body_cost_vec
, true);
1072 if (memory_access_type
== VMAT_ELEMENTWISE
1073 || memory_access_type
== VMAT_STRIDED_SLP
)
1074 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1075 stmt_info
, 0, vect_body
);
1077 if (dump_enabled_p ())
1078 dump_printf_loc (MSG_NOTE
, vect_location
,
1079 "vect_model_load_cost: inside_cost = %d, "
1080 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1084 /* Calculate cost of DR's memory access. */
1086 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1087 bool add_realign_cost
, unsigned int *inside_cost
,
1088 unsigned int *prologue_cost
,
1089 stmt_vector_for_cost
*prologue_cost_vec
,
1090 stmt_vector_for_cost
*body_cost_vec
,
1091 bool record_prologue_costs
)
1093 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1094 gimple
*stmt
= DR_STMT (dr
);
1095 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1097 switch (alignment_support_scheme
)
1101 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1102 stmt_info
, 0, vect_body
);
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_NOTE
, vect_location
,
1106 "vect_model_load_cost: aligned.\n");
1110 case dr_unaligned_supported
:
1112 /* Here, we assign an additional cost for the unaligned load. */
1113 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1114 unaligned_load
, stmt_info
,
1115 DR_MISALIGNMENT (dr
), vect_body
);
1117 if (dump_enabled_p ())
1118 dump_printf_loc (MSG_NOTE
, vect_location
,
1119 "vect_model_load_cost: unaligned supported by "
1124 case dr_explicit_realign
:
1126 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1127 vector_load
, stmt_info
, 0, vect_body
);
1128 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1129 vec_perm
, stmt_info
, 0, vect_body
);
1131 /* FIXME: If the misalignment remains fixed across the iterations of
1132 the containing loop, the following cost should be added to the
1134 if (targetm
.vectorize
.builtin_mask_for_load
)
1135 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1136 stmt_info
, 0, vect_body
);
1138 if (dump_enabled_p ())
1139 dump_printf_loc (MSG_NOTE
, vect_location
,
1140 "vect_model_load_cost: explicit realign\n");
1144 case dr_explicit_realign_optimized
:
1146 if (dump_enabled_p ())
1147 dump_printf_loc (MSG_NOTE
, vect_location
,
1148 "vect_model_load_cost: unaligned software "
1151 /* Unaligned software pipeline has a load of an address, an initial
1152 load, and possibly a mask operation to "prime" the loop. However,
1153 if this is an access in a group of loads, which provide grouped
1154 access, then the above cost should only be considered for one
1155 access in the group. Inside the loop, there is a load op
1156 and a realignment op. */
1158 if (add_realign_cost
&& record_prologue_costs
)
1160 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1161 vector_stmt
, stmt_info
,
1163 if (targetm
.vectorize
.builtin_mask_for_load
)
1164 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1165 vector_stmt
, stmt_info
,
1169 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1170 stmt_info
, 0, vect_body
);
1171 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1172 stmt_info
, 0, vect_body
);
1174 if (dump_enabled_p ())
1175 dump_printf_loc (MSG_NOTE
, vect_location
,
1176 "vect_model_load_cost: explicit realign optimized"
1182 case dr_unaligned_unsupported
:
1184 *inside_cost
= VECT_MAX_COST
;
1186 if (dump_enabled_p ())
1187 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1188 "vect_model_load_cost: unsupported access.\n");
1197 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1198 the loop preheader for the vectorized stmt STMT. */
1201 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1204 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1207 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1208 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1212 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1216 if (nested_in_vect_loop_p (loop
, stmt
))
1219 pe
= loop_preheader_edge (loop
);
1220 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1221 gcc_assert (!new_bb
);
1225 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1227 gimple_stmt_iterator gsi_bb_start
;
1229 gcc_assert (bb_vinfo
);
1230 bb
= BB_VINFO_BB (bb_vinfo
);
1231 gsi_bb_start
= gsi_after_labels (bb
);
1232 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1236 if (dump_enabled_p ())
1238 dump_printf_loc (MSG_NOTE
, vect_location
,
1239 "created new init_stmt: ");
1240 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1244 /* Function vect_init_vector.
1246 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1247 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1248 vector type a vector with all elements equal to VAL is created first.
1249 Place the initialization at BSI if it is not NULL. Otherwise, place the
1250 initialization at the loop preheader.
1251 Return the DEF of INIT_STMT.
1252 It will be used in the vectorization of STMT. */
1255 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1260 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1261 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1263 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1264 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1266 /* Scalar boolean value should be transformed into
1267 all zeros or all ones value before building a vector. */
1268 if (VECTOR_BOOLEAN_TYPE_P (type
))
1270 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1271 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1273 if (CONSTANT_CLASS_P (val
))
1274 val
= integer_zerop (val
) ? false_val
: true_val
;
1277 new_temp
= make_ssa_name (TREE_TYPE (type
));
1278 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1279 val
, true_val
, false_val
);
1280 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1284 else if (CONSTANT_CLASS_P (val
))
1285 val
= fold_convert (TREE_TYPE (type
), val
);
1288 new_temp
= make_ssa_name (TREE_TYPE (type
));
1289 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1290 init_stmt
= gimple_build_assign (new_temp
,
1291 fold_build1 (VIEW_CONVERT_EXPR
,
1295 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1296 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1300 val
= build_vector_from_val (type
, val
);
1303 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1304 init_stmt
= gimple_build_assign (new_temp
, val
);
1305 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1309 /* Function vect_get_vec_def_for_operand_1.
1311 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1312 DT that will be used in the vectorized stmt. */
1315 vect_get_vec_def_for_operand_1 (gimple
*def_stmt
, enum vect_def_type dt
)
1319 stmt_vec_info def_stmt_info
= NULL
;
1323 /* operand is a constant or a loop invariant. */
1324 case vect_constant_def
:
1325 case vect_external_def
:
1326 /* Code should use vect_get_vec_def_for_operand. */
1329 /* operand is defined inside the loop. */
1330 case vect_internal_def
:
1332 /* Get the def from the vectorized stmt. */
1333 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1335 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1336 /* Get vectorized pattern statement. */
1338 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1339 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1340 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1341 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1342 gcc_assert (vec_stmt
);
1343 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1344 vec_oprnd
= PHI_RESULT (vec_stmt
);
1345 else if (is_gimple_call (vec_stmt
))
1346 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1348 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1352 /* operand is defined by a loop header phi - reduction */
1353 case vect_reduction_def
:
1354 case vect_double_reduction_def
:
1355 case vect_nested_cycle
:
1356 /* Code should use get_initial_def_for_reduction. */
1359 /* operand is defined by loop-header phi - induction. */
1360 case vect_induction_def
:
1362 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1364 /* Get the def from the vectorized stmt. */
1365 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1366 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1367 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1368 vec_oprnd
= PHI_RESULT (vec_stmt
);
1370 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1380 /* Function vect_get_vec_def_for_operand.
1382 OP is an operand in STMT. This function returns a (vector) def that will be
1383 used in the vectorized stmt for STMT.
1385 In the case that OP is an SSA_NAME which is defined in the loop, then
1386 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1388 In case OP is an invariant or constant, a new stmt that creates a vector def
1389 needs to be introduced. VECTYPE may be used to specify a required type for
1390 vector invariant. */
1393 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1396 enum vect_def_type dt
;
1398 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1399 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1401 if (dump_enabled_p ())
1403 dump_printf_loc (MSG_NOTE
, vect_location
,
1404 "vect_get_vec_def_for_operand: ");
1405 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1406 dump_printf (MSG_NOTE
, "\n");
1409 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1410 gcc_assert (is_simple_use
);
1411 if (def_stmt
&& dump_enabled_p ())
1413 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1414 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1417 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1419 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1423 vector_type
= vectype
;
1424 else if (TREE_CODE (TREE_TYPE (op
)) == BOOLEAN_TYPE
1425 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1426 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1428 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1430 gcc_assert (vector_type
);
1431 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1434 return vect_get_vec_def_for_operand_1 (def_stmt
, dt
);
1438 /* Function vect_get_vec_def_for_stmt_copy
1440 Return a vector-def for an operand. This function is used when the
1441 vectorized stmt to be created (by the caller to this function) is a "copy"
1442 created in case the vectorized result cannot fit in one vector, and several
1443 copies of the vector-stmt are required. In this case the vector-def is
1444 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1445 of the stmt that defines VEC_OPRND.
1446 DT is the type of the vector def VEC_OPRND.
1449 In case the vectorization factor (VF) is bigger than the number
1450 of elements that can fit in a vectype (nunits), we have to generate
1451 more than one vector stmt to vectorize the scalar stmt. This situation
1452 arises when there are multiple data-types operated upon in the loop; the
1453 smallest data-type determines the VF, and as a result, when vectorizing
1454 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1455 vector stmt (each computing a vector of 'nunits' results, and together
1456 computing 'VF' results in each iteration). This function is called when
1457 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1458 which VF=16 and nunits=4, so the number of copies required is 4):
1460 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1462 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1463 VS1.1: vx.1 = memref1 VS1.2
1464 VS1.2: vx.2 = memref2 VS1.3
1465 VS1.3: vx.3 = memref3
1467 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1468 VSnew.1: vz1 = vx.1 + ... VSnew.2
1469 VSnew.2: vz2 = vx.2 + ... VSnew.3
1470 VSnew.3: vz3 = vx.3 + ...
1472 The vectorization of S1 is explained in vectorizable_load.
1473 The vectorization of S2:
1474 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1475 the function 'vect_get_vec_def_for_operand' is called to
1476 get the relevant vector-def for each operand of S2. For operand x it
1477 returns the vector-def 'vx.0'.
1479 To create the remaining copies of the vector-stmt (VSnew.j), this
1480 function is called to get the relevant vector-def for each operand. It is
1481 obtained from the respective VS1.j stmt, which is recorded in the
1482 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1484 For example, to obtain the vector-def 'vx.1' in order to create the
1485 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1486 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1487 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1488 and return its def ('vx.1').
1489 Overall, to create the above sequence this function will be called 3 times:
1490 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1491 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1492 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1495 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1497 gimple
*vec_stmt_for_operand
;
1498 stmt_vec_info def_stmt_info
;
1500 /* Do nothing; can reuse same def. */
1501 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1504 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1505 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1506 gcc_assert (def_stmt_info
);
1507 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1508 gcc_assert (vec_stmt_for_operand
);
1509 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1510 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1512 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1517 /* Get vectorized definitions for the operands to create a copy of an original
1518 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1521 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1522 vec
<tree
> *vec_oprnds0
,
1523 vec
<tree
> *vec_oprnds1
)
1525 tree vec_oprnd
= vec_oprnds0
->pop ();
1527 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1528 vec_oprnds0
->quick_push (vec_oprnd
);
1530 if (vec_oprnds1
&& vec_oprnds1
->length ())
1532 vec_oprnd
= vec_oprnds1
->pop ();
1533 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1534 vec_oprnds1
->quick_push (vec_oprnd
);
1539 /* Get vectorized definitions for OP0 and OP1.
1540 REDUC_INDEX is the index of reduction operand in case of reduction,
1541 and -1 otherwise. */
1544 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1545 vec
<tree
> *vec_oprnds0
,
1546 vec
<tree
> *vec_oprnds1
,
1547 slp_tree slp_node
, int reduc_index
)
1551 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1552 auto_vec
<tree
> ops (nops
);
1553 auto_vec
<vec
<tree
> > vec_defs (nops
);
1555 ops
.quick_push (op0
);
1557 ops
.quick_push (op1
);
1559 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1561 *vec_oprnds0
= vec_defs
[0];
1563 *vec_oprnds1
= vec_defs
[1];
1569 vec_oprnds0
->create (1);
1570 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1571 vec_oprnds0
->quick_push (vec_oprnd
);
1575 vec_oprnds1
->create (1);
1576 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1577 vec_oprnds1
->quick_push (vec_oprnd
);
1583 /* Function vect_finish_stmt_generation.
1585 Insert a new stmt. */
1588 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1589 gimple_stmt_iterator
*gsi
)
1591 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1592 vec_info
*vinfo
= stmt_info
->vinfo
;
1594 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1596 if (!gsi_end_p (*gsi
)
1597 && gimple_has_mem_ops (vec_stmt
))
1599 gimple
*at_stmt
= gsi_stmt (*gsi
);
1600 tree vuse
= gimple_vuse (at_stmt
);
1601 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1603 tree vdef
= gimple_vdef (at_stmt
);
1604 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1605 /* If we have an SSA vuse and insert a store, update virtual
1606 SSA form to avoid triggering the renamer. Do so only
1607 if we can easily see all uses - which is what almost always
1608 happens with the way vectorized stmts are inserted. */
1609 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1610 && ((is_gimple_assign (vec_stmt
)
1611 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1612 || (is_gimple_call (vec_stmt
)
1613 && !(gimple_call_flags (vec_stmt
)
1614 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1616 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1617 gimple_set_vdef (vec_stmt
, new_vdef
);
1618 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1622 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1624 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1626 if (dump_enabled_p ())
1628 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1629 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1632 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1634 /* While EH edges will generally prevent vectorization, stmt might
1635 e.g. be in a must-not-throw region. Ensure newly created stmts
1636 that could throw are part of the same region. */
1637 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1638 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1639 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1642 /* We want to vectorize a call to combined function CFN with function
1643 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1644 as the types of all inputs. Check whether this is possible using
1645 an internal function, returning its code if so or IFN_LAST if not. */
1648 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1649 tree vectype_out
, tree vectype_in
)
1652 if (internal_fn_p (cfn
))
1653 ifn
= as_internal_fn (cfn
);
1655 ifn
= associated_internal_fn (fndecl
);
1656 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1658 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1659 if (info
.vectorizable
)
1661 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1662 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1663 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1664 OPTIMIZE_FOR_SPEED
))
1672 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1673 gimple_stmt_iterator
*);
1675 /* STMT is a non-strided load or store, meaning that it accesses
1676 elements with a known constant step. Return -1 if that step
1677 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1680 compare_step_with_zero (gimple
*stmt
)
1682 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1683 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1685 if (loop_vinfo
&& nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo
), stmt
))
1686 step
= STMT_VINFO_DR_STEP (stmt_info
);
1688 step
= DR_STEP (STMT_VINFO_DATA_REF (stmt_info
));
1689 return tree_int_cst_compare (step
, size_zero_node
);
1692 /* If the target supports a permute mask that reverses the elements in
1693 a vector of type VECTYPE, return that mask, otherwise return null. */
1696 perm_mask_for_reverse (tree vectype
)
1701 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1702 sel
= XALLOCAVEC (unsigned char, nunits
);
1704 for (i
= 0; i
< nunits
; ++i
)
1705 sel
[i
] = nunits
- 1 - i
;
1707 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
1709 return vect_gen_perm_mask_checked (vectype
, sel
);
1712 /* A subroutine of get_load_store_type, with a subset of the same
1713 arguments. Handle the case where STMT is part of a grouped load
1716 For stores, the statements in the group are all consecutive
1717 and there is no gap at the end. For loads, the statements in the
1718 group might not be consecutive; there can be gaps between statements
1719 as well as at the end. */
1722 get_group_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1723 vec_load_store_type vls_type
,
1724 vect_memory_access_type
*memory_access_type
)
1726 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1727 vec_info
*vinfo
= stmt_info
->vinfo
;
1728 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1729 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
1730 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1731 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1732 bool single_element_p
= (stmt
== first_stmt
1733 && !GROUP_NEXT_ELEMENT (stmt_info
));
1734 unsigned HOST_WIDE_INT gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
1735 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1737 /* True if the vectorized statements would access beyond the last
1738 statement in the group. */
1739 bool overrun_p
= false;
1741 /* True if we can cope with such overrun by peeling for gaps, so that
1742 there is at least one final scalar iteration after the vector loop. */
1743 bool can_overrun_p
= (vls_type
== VLS_LOAD
&& loop_vinfo
&& !loop
->inner
);
1745 /* There can only be a gap at the end of the group if the stride is
1746 known at compile time. */
1747 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info
) || gap
== 0);
1749 /* Stores can't yet have gaps. */
1750 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
1754 if (STMT_VINFO_STRIDED_P (stmt_info
))
1756 /* Try to use consecutive accesses of GROUP_SIZE elements,
1757 separated by the stride, until we have a complete vector.
1758 Fall back to scalar accesses if that isn't possible. */
1759 if (nunits
% group_size
== 0)
1760 *memory_access_type
= VMAT_STRIDED_SLP
;
1762 *memory_access_type
= VMAT_ELEMENTWISE
;
1766 overrun_p
= loop_vinfo
&& gap
!= 0;
1767 if (overrun_p
&& vls_type
!= VLS_LOAD
)
1769 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1770 "Grouped store with gaps requires"
1771 " non-consecutive accesses\n");
1774 if (overrun_p
&& !can_overrun_p
)
1776 if (dump_enabled_p ())
1777 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1778 "Peeling for outer loop is not supported\n");
1781 *memory_access_type
= VMAT_CONTIGUOUS
;
1786 /* We can always handle this case using elementwise accesses,
1787 but see if something more efficient is available. */
1788 *memory_access_type
= VMAT_ELEMENTWISE
;
1790 /* If there is a gap at the end of the group then these optimizations
1791 would access excess elements in the last iteration. */
1792 bool would_overrun_p
= (gap
!= 0);
1793 if (!STMT_VINFO_STRIDED_P (stmt_info
)
1794 && (can_overrun_p
|| !would_overrun_p
)
1795 && compare_step_with_zero (stmt
) > 0)
1797 /* First try using LOAD/STORE_LANES. */
1798 if (vls_type
== VLS_LOAD
1799 ? vect_load_lanes_supported (vectype
, group_size
)
1800 : vect_store_lanes_supported (vectype
, group_size
))
1802 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
1803 overrun_p
= would_overrun_p
;
1806 /* If that fails, try using permuting loads. */
1807 if (*memory_access_type
== VMAT_ELEMENTWISE
1808 && (vls_type
== VLS_LOAD
1809 ? vect_grouped_load_supported (vectype
, single_element_p
,
1811 : vect_grouped_store_supported (vectype
, group_size
)))
1813 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
1814 overrun_p
= would_overrun_p
;
1819 if (vls_type
!= VLS_LOAD
&& first_stmt
== stmt
)
1821 /* STMT is the leader of the group. Check the operands of all the
1822 stmts of the group. */
1823 gimple
*next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
1826 gcc_assert (gimple_assign_single_p (next_stmt
));
1827 tree op
= gimple_assign_rhs1 (next_stmt
);
1829 enum vect_def_type dt
;
1830 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
1832 if (dump_enabled_p ())
1833 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1834 "use not simple.\n");
1837 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
1843 gcc_assert (can_overrun_p
);
1844 if (dump_enabled_p ())
1845 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1846 "Data access with gaps requires scalar "
1848 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
1854 /* A subroutine of get_load_store_type, with a subset of the same
1855 arguments. Handle the case where STMT is a load or store that
1856 accesses consecutive elements with a negative step. */
1858 static vect_memory_access_type
1859 get_negative_load_store_type (gimple
*stmt
, tree vectype
,
1860 vec_load_store_type vls_type
,
1861 unsigned int ncopies
)
1863 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1864 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1865 dr_alignment_support alignment_support_scheme
;
1869 if (dump_enabled_p ())
1870 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1871 "multiple types with negative step.\n");
1872 return VMAT_ELEMENTWISE
;
1875 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1876 if (alignment_support_scheme
!= dr_aligned
1877 && alignment_support_scheme
!= dr_unaligned_supported
)
1879 if (dump_enabled_p ())
1880 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1881 "negative step but alignment required.\n");
1882 return VMAT_ELEMENTWISE
;
1885 if (vls_type
== VLS_STORE_INVARIANT
)
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_NOTE
, vect_location
,
1889 "negative step with invariant source;"
1890 " no permute needed.\n");
1891 return VMAT_CONTIGUOUS_DOWN
;
1894 if (!perm_mask_for_reverse (vectype
))
1896 if (dump_enabled_p ())
1897 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1898 "negative step and reversing not supported.\n");
1899 return VMAT_ELEMENTWISE
;
1902 return VMAT_CONTIGUOUS_REVERSE
;
1905 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1906 if there is a memory access type that the vectorized form can use,
1907 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1908 or scatters, fill in GS_INFO accordingly.
1910 SLP says whether we're performing SLP rather than loop vectorization.
1911 VECTYPE is the vector type that the vectorized statements will use.
1912 NCOPIES is the number of vector statements that will be needed. */
1915 get_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1916 vec_load_store_type vls_type
, unsigned int ncopies
,
1917 vect_memory_access_type
*memory_access_type
,
1918 gather_scatter_info
*gs_info
)
1920 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1921 vec_info
*vinfo
= stmt_info
->vinfo
;
1922 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1923 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1925 *memory_access_type
= VMAT_GATHER_SCATTER
;
1927 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
))
1929 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
, &def_stmt
,
1930 &gs_info
->offset_dt
,
1931 &gs_info
->offset_vectype
))
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1935 "%s index use not simple.\n",
1936 vls_type
== VLS_LOAD
? "gather" : "scatter");
1940 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1942 if (!get_group_load_store_type (stmt
, vectype
, slp
, vls_type
,
1943 memory_access_type
))
1946 else if (STMT_VINFO_STRIDED_P (stmt_info
))
1949 *memory_access_type
= VMAT_ELEMENTWISE
;
1953 int cmp
= compare_step_with_zero (stmt
);
1955 *memory_access_type
= get_negative_load_store_type
1956 (stmt
, vectype
, vls_type
, ncopies
);
1959 gcc_assert (vls_type
== VLS_LOAD
);
1960 *memory_access_type
= VMAT_INVARIANT
;
1963 *memory_access_type
= VMAT_CONTIGUOUS
;
1966 /* FIXME: At the moment the cost model seems to underestimate the
1967 cost of using elementwise accesses. This check preserves the
1968 traditional behavior until that can be fixed. */
1969 if (*memory_access_type
== VMAT_ELEMENTWISE
1970 && !STMT_VINFO_STRIDED_P (stmt_info
))
1972 if (dump_enabled_p ())
1973 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1974 "not falling back to elementwise accesses\n");
1980 /* Function vectorizable_mask_load_store.
1982 Check if STMT performs a conditional load or store that can be vectorized.
1983 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1984 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1985 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1988 vectorizable_mask_load_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
1989 gimple
**vec_stmt
, slp_tree slp_node
)
1991 tree vec_dest
= NULL
;
1992 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1993 stmt_vec_info prev_stmt_info
;
1994 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1995 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1996 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1997 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1998 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1999 tree rhs_vectype
= NULL_TREE
;
2004 tree dataref_ptr
= NULL_TREE
;
2006 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2010 gather_scatter_info gs_info
;
2011 vec_load_store_type vls_type
;
2014 enum vect_def_type dt
;
2016 if (slp_node
!= NULL
)
2019 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2020 gcc_assert (ncopies
>= 1);
2022 mask
= gimple_call_arg (stmt
, 2);
2024 if (TREE_CODE (TREE_TYPE (mask
)) != BOOLEAN_TYPE
)
2027 /* FORNOW. This restriction should be relaxed. */
2028 if (nested_in_vect_loop
&& ncopies
> 1)
2030 if (dump_enabled_p ())
2031 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2032 "multiple types in nested loop.");
2036 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2039 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2043 if (!STMT_VINFO_DATA_REF (stmt_info
))
2046 elem_type
= TREE_TYPE (vectype
);
2048 if (TREE_CODE (mask
) != SSA_NAME
)
2051 if (!vect_is_simple_use (mask
, loop_vinfo
, &def_stmt
, &dt
, &mask_vectype
))
2055 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2057 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
)
2058 || TYPE_VECTOR_SUBPARTS (mask_vectype
) != TYPE_VECTOR_SUBPARTS (vectype
))
2061 if (gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
2063 tree rhs
= gimple_call_arg (stmt
, 3);
2064 if (!vect_is_simple_use (rhs
, loop_vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
2066 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
2067 vls_type
= VLS_STORE_INVARIANT
;
2069 vls_type
= VLS_STORE
;
2072 vls_type
= VLS_LOAD
;
2074 vect_memory_access_type memory_access_type
;
2075 if (!get_load_store_type (stmt
, vectype
, false, vls_type
, ncopies
,
2076 &memory_access_type
, &gs_info
))
2079 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2081 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2083 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
2084 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2086 if (dump_enabled_p ())
2087 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2088 "masked gather with integer mask not supported.");
2092 else if (memory_access_type
!= VMAT_CONTIGUOUS
)
2094 if (dump_enabled_p ())
2095 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2096 "unsupported access type for masked %s.\n",
2097 vls_type
== VLS_LOAD
? "load" : "store");
2100 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
2101 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
),
2102 TYPE_MODE (mask_vectype
),
2103 vls_type
== VLS_LOAD
)
2105 && !useless_type_conversion_p (vectype
, rhs_vectype
)))
2108 if (!vec_stmt
) /* transformation not required. */
2110 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
2111 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2112 if (vls_type
== VLS_LOAD
)
2113 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
2116 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
,
2117 dt
, NULL
, NULL
, NULL
);
2120 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
2124 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2126 tree vec_oprnd0
= NULL_TREE
, op
;
2127 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2128 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
2129 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
2130 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
2131 tree mask_perm_mask
= NULL_TREE
;
2132 edge pe
= loop_preheader_edge (loop
);
2135 enum { NARROW
, NONE
, WIDEN
} modifier
;
2136 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
2138 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
2139 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2140 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2141 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2142 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2143 scaletype
= TREE_VALUE (arglist
);
2144 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2145 && types_compatible_p (srctype
, masktype
));
2147 if (nunits
== gather_off_nunits
)
2149 else if (nunits
== gather_off_nunits
/ 2)
2151 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
2154 for (i
= 0; i
< gather_off_nunits
; ++i
)
2155 sel
[i
] = i
| nunits
;
2157 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
2159 else if (nunits
== gather_off_nunits
* 2)
2161 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
2164 for (i
= 0; i
< nunits
; ++i
)
2165 sel
[i
] = i
< gather_off_nunits
2166 ? i
: i
+ nunits
- gather_off_nunits
;
2168 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
2170 for (i
= 0; i
< nunits
; ++i
)
2171 sel
[i
] = i
| gather_off_nunits
;
2172 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
2177 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2179 ptr
= fold_convert (ptrtype
, gs_info
.base
);
2180 if (!is_gimple_min_invariant (ptr
))
2182 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2183 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2184 gcc_assert (!new_bb
);
2187 scale
= build_int_cst (scaletype
, gs_info
.scale
);
2189 prev_stmt_info
= NULL
;
2190 for (j
= 0; j
< ncopies
; ++j
)
2192 if (modifier
== WIDEN
&& (j
& 1))
2193 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2194 perm_mask
, stmt
, gsi
);
2197 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
2200 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
2202 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2204 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
2205 == TYPE_VECTOR_SUBPARTS (idxtype
));
2206 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2207 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2209 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2210 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2214 if (mask_perm_mask
&& (j
& 1))
2215 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2216 mask_perm_mask
, stmt
, gsi
);
2220 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2223 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2224 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2228 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2230 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2231 == TYPE_VECTOR_SUBPARTS (masktype
));
2232 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2233 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2235 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2236 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2242 = gimple_build_call (gs_info
.decl
, 5, mask_op
, ptr
, op
, mask_op
,
2245 if (!useless_type_conversion_p (vectype
, rettype
))
2247 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2248 == TYPE_VECTOR_SUBPARTS (rettype
));
2249 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2250 gimple_call_set_lhs (new_stmt
, op
);
2251 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2252 var
= make_ssa_name (vec_dest
);
2253 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2254 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2258 var
= make_ssa_name (vec_dest
, new_stmt
);
2259 gimple_call_set_lhs (new_stmt
, var
);
2262 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2264 if (modifier
== NARROW
)
2271 var
= permute_vec_elements (prev_res
, var
,
2272 perm_mask
, stmt
, gsi
);
2273 new_stmt
= SSA_NAME_DEF_STMT (var
);
2276 if (prev_stmt_info
== NULL
)
2277 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2279 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2280 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2283 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2285 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2287 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2288 stmt_info
= vinfo_for_stmt (stmt
);
2290 tree lhs
= gimple_call_lhs (stmt
);
2291 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2292 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2293 set_vinfo_for_stmt (stmt
, NULL
);
2294 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2295 gsi_replace (gsi
, new_stmt
, true);
2298 else if (vls_type
!= VLS_LOAD
)
2300 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2301 prev_stmt_info
= NULL
;
2302 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
2303 for (i
= 0; i
< ncopies
; i
++)
2305 unsigned align
, misalign
;
2309 tree rhs
= gimple_call_arg (stmt
, 3);
2310 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
);
2311 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2312 /* We should have catched mismatched types earlier. */
2313 gcc_assert (useless_type_conversion_p (vectype
,
2314 TREE_TYPE (vec_rhs
)));
2315 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2316 NULL_TREE
, &dummy
, gsi
,
2317 &ptr_incr
, false, &inv_p
);
2318 gcc_assert (!inv_p
);
2322 vect_is_simple_use (vec_rhs
, loop_vinfo
, &def_stmt
, &dt
);
2323 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2324 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2325 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2326 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2327 TYPE_SIZE_UNIT (vectype
));
2330 align
= TYPE_ALIGN_UNIT (vectype
);
2331 if (aligned_access_p (dr
))
2333 else if (DR_MISALIGNMENT (dr
) == -1)
2335 align
= TYPE_ALIGN_UNIT (elem_type
);
2339 misalign
= DR_MISALIGNMENT (dr
);
2340 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2342 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2343 misalign
? misalign
& -misalign
: align
);
2345 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2346 ptr
, vec_mask
, vec_rhs
);
2347 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2349 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2351 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2352 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2357 tree vec_mask
= NULL_TREE
;
2358 prev_stmt_info
= NULL
;
2359 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2360 for (i
= 0; i
< ncopies
; i
++)
2362 unsigned align
, misalign
;
2366 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2367 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2368 NULL_TREE
, &dummy
, gsi
,
2369 &ptr_incr
, false, &inv_p
);
2370 gcc_assert (!inv_p
);
2374 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2375 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2376 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2377 TYPE_SIZE_UNIT (vectype
));
2380 align
= TYPE_ALIGN_UNIT (vectype
);
2381 if (aligned_access_p (dr
))
2383 else if (DR_MISALIGNMENT (dr
) == -1)
2385 align
= TYPE_ALIGN_UNIT (elem_type
);
2389 misalign
= DR_MISALIGNMENT (dr
);
2390 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2392 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2393 misalign
? misalign
& -misalign
: align
);
2395 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2397 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2398 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2400 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2402 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2403 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2407 if (vls_type
== VLS_LOAD
)
2409 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2411 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2413 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2414 stmt_info
= vinfo_for_stmt (stmt
);
2416 tree lhs
= gimple_call_lhs (stmt
);
2417 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2418 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2419 set_vinfo_for_stmt (stmt
, NULL
);
2420 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2421 gsi_replace (gsi
, new_stmt
, true);
2427 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2428 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2429 in a single step. On success, store the binary pack code in
2433 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2434 tree_code
*convert_code
)
2436 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2437 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2441 int multi_step_cvt
= 0;
2442 auto_vec
<tree
, 8> interm_types
;
2443 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2444 &code
, &multi_step_cvt
,
2449 *convert_code
= code
;
2453 /* Function vectorizable_call.
2455 Check if GS performs a function call that can be vectorized.
2456 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2457 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2458 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2461 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2468 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2469 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2470 tree vectype_out
, vectype_in
;
2473 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2474 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2475 vec_info
*vinfo
= stmt_info
->vinfo
;
2476 tree fndecl
, new_temp
, rhs_type
;
2478 enum vect_def_type dt
[3]
2479 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2480 gimple
*new_stmt
= NULL
;
2482 vec
<tree
> vargs
= vNULL
;
2483 enum { NARROW
, NONE
, WIDEN
} modifier
;
2487 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2490 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2494 /* Is GS a vectorizable call? */
2495 stmt
= dyn_cast
<gcall
*> (gs
);
2499 if (gimple_call_internal_p (stmt
)
2500 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2501 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2502 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2505 if (gimple_call_lhs (stmt
) == NULL_TREE
2506 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2509 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2511 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2513 /* Process function arguments. */
2514 rhs_type
= NULL_TREE
;
2515 vectype_in
= NULL_TREE
;
2516 nargs
= gimple_call_num_args (stmt
);
2518 /* Bail out if the function has more than three arguments, we do not have
2519 interesting builtin functions to vectorize with more than two arguments
2520 except for fma. No arguments is also not good. */
2521 if (nargs
== 0 || nargs
> 3)
2524 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2525 if (gimple_call_internal_p (stmt
)
2526 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2529 rhs_type
= unsigned_type_node
;
2532 for (i
= 0; i
< nargs
; i
++)
2536 op
= gimple_call_arg (stmt
, i
);
2538 /* We can only handle calls with arguments of the same type. */
2540 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2542 if (dump_enabled_p ())
2543 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2544 "argument types differ.\n");
2548 rhs_type
= TREE_TYPE (op
);
2550 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2552 if (dump_enabled_p ())
2553 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2554 "use not simple.\n");
2559 vectype_in
= opvectype
;
2561 && opvectype
!= vectype_in
)
2563 if (dump_enabled_p ())
2564 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2565 "argument vector types differ.\n");
2569 /* If all arguments are external or constant defs use a vector type with
2570 the same size as the output vector type. */
2572 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2574 gcc_assert (vectype_in
);
2577 if (dump_enabled_p ())
2579 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2580 "no vectype for scalar type ");
2581 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2582 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2589 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2590 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2591 if (nunits_in
== nunits_out
/ 2)
2593 else if (nunits_out
== nunits_in
)
2595 else if (nunits_out
== nunits_in
/ 2)
2600 /* We only handle functions that do not read or clobber memory. */
2601 if (gimple_vuse (stmt
))
2603 if (dump_enabled_p ())
2604 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2605 "function reads from or writes to memory.\n");
2609 /* For now, we only vectorize functions if a target specific builtin
2610 is available. TODO -- in some cases, it might be profitable to
2611 insert the calls for pieces of the vector, in order to be able
2612 to vectorize other operations in the loop. */
2614 internal_fn ifn
= IFN_LAST
;
2615 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2616 tree callee
= gimple_call_fndecl (stmt
);
2618 /* First try using an internal function. */
2619 tree_code convert_code
= ERROR_MARK
;
2621 && (modifier
== NONE
2622 || (modifier
== NARROW
2623 && simple_integer_narrowing (vectype_out
, vectype_in
,
2625 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2628 /* If that fails, try asking for a target-specific built-in function. */
2629 if (ifn
== IFN_LAST
)
2631 if (cfn
!= CFN_LAST
)
2632 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2633 (cfn
, vectype_out
, vectype_in
);
2635 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2636 (callee
, vectype_out
, vectype_in
);
2639 if (ifn
== IFN_LAST
&& !fndecl
)
2641 if (cfn
== CFN_GOMP_SIMD_LANE
2644 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2645 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2646 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2647 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2649 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2650 { 0, 1, 2, ... vf - 1 } vector. */
2651 gcc_assert (nargs
== 0);
2655 if (dump_enabled_p ())
2656 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2657 "function is not vectorizable.\n");
2664 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2665 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2667 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2669 /* Sanity check: make sure that at least one copy of the vectorized stmt
2670 needs to be generated. */
2671 gcc_assert (ncopies
>= 1);
2673 if (!vec_stmt
) /* transformation not required. */
2675 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2676 if (dump_enabled_p ())
2677 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2679 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2680 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2681 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2682 vec_promote_demote
, stmt_info
, 0, vect_body
);
2689 if (dump_enabled_p ())
2690 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2693 scalar_dest
= gimple_call_lhs (stmt
);
2694 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2696 prev_stmt_info
= NULL
;
2697 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2699 tree prev_res
= NULL_TREE
;
2700 for (j
= 0; j
< ncopies
; ++j
)
2702 /* Build argument list for the vectorized call. */
2704 vargs
.create (nargs
);
2710 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2711 vec
<tree
> vec_oprnds0
;
2713 for (i
= 0; i
< nargs
; i
++)
2714 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2715 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2716 vec_oprnds0
= vec_defs
[0];
2718 /* Arguments are ready. Create the new vector stmt. */
2719 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2722 for (k
= 0; k
< nargs
; k
++)
2724 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2725 vargs
[k
] = vec_oprndsk
[i
];
2727 if (modifier
== NARROW
)
2729 tree half_res
= make_ssa_name (vectype_in
);
2730 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2731 gimple_call_set_lhs (new_stmt
, half_res
);
2732 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2735 prev_res
= half_res
;
2738 new_temp
= make_ssa_name (vec_dest
);
2739 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2740 prev_res
, half_res
);
2744 if (ifn
!= IFN_LAST
)
2745 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2747 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2748 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2749 gimple_call_set_lhs (new_stmt
, new_temp
);
2751 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2752 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2755 for (i
= 0; i
< nargs
; i
++)
2757 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2758 vec_oprndsi
.release ();
2763 for (i
= 0; i
< nargs
; i
++)
2765 op
= gimple_call_arg (stmt
, i
);
2768 = vect_get_vec_def_for_operand (op
, stmt
);
2771 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2773 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2776 vargs
.quick_push (vec_oprnd0
);
2779 if (gimple_call_internal_p (stmt
)
2780 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2782 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2784 for (k
= 0; k
< nunits_out
; ++k
)
2785 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2786 tree cst
= build_vector (vectype_out
, v
);
2788 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
2789 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
2790 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2791 new_temp
= make_ssa_name (vec_dest
);
2792 new_stmt
= gimple_build_assign (new_temp
, new_var
);
2794 else if (modifier
== NARROW
)
2796 tree half_res
= make_ssa_name (vectype_in
);
2797 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2798 gimple_call_set_lhs (new_stmt
, half_res
);
2799 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2802 prev_res
= half_res
;
2805 new_temp
= make_ssa_name (vec_dest
);
2806 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2807 prev_res
, half_res
);
2811 if (ifn
!= IFN_LAST
)
2812 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2814 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2815 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2816 gimple_call_set_lhs (new_stmt
, new_temp
);
2818 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2820 if (j
== (modifier
== NARROW
? 1 : 0))
2821 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2823 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2825 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2828 else if (modifier
== NARROW
)
2830 for (j
= 0; j
< ncopies
; ++j
)
2832 /* Build argument list for the vectorized call. */
2834 vargs
.create (nargs
* 2);
2840 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2841 vec
<tree
> vec_oprnds0
;
2843 for (i
= 0; i
< nargs
; i
++)
2844 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2845 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2846 vec_oprnds0
= vec_defs
[0];
2848 /* Arguments are ready. Create the new vector stmt. */
2849 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2853 for (k
= 0; k
< nargs
; k
++)
2855 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2856 vargs
.quick_push (vec_oprndsk
[i
]);
2857 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2859 if (ifn
!= IFN_LAST
)
2860 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2862 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2863 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2864 gimple_call_set_lhs (new_stmt
, new_temp
);
2865 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2866 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2869 for (i
= 0; i
< nargs
; i
++)
2871 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2872 vec_oprndsi
.release ();
2877 for (i
= 0; i
< nargs
; i
++)
2879 op
= gimple_call_arg (stmt
, i
);
2883 = vect_get_vec_def_for_operand (op
, stmt
);
2885 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2889 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2891 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2893 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2896 vargs
.quick_push (vec_oprnd0
);
2897 vargs
.quick_push (vec_oprnd1
);
2900 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2901 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2902 gimple_call_set_lhs (new_stmt
, new_temp
);
2903 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2906 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2908 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2910 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2913 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2916 /* No current target implements this case. */
2921 /* The call in STMT might prevent it from being removed in dce.
2922 We however cannot remove it here, due to the way the ssa name
2923 it defines is mapped to the new definition. So just replace
2924 rhs of the statement with something harmless. */
2929 type
= TREE_TYPE (scalar_dest
);
2930 if (is_pattern_stmt_p (stmt_info
))
2931 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2933 lhs
= gimple_call_lhs (stmt
);
2935 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2936 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2937 set_vinfo_for_stmt (stmt
, NULL
);
2938 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2939 gsi_replace (gsi
, new_stmt
, false);
2945 struct simd_call_arg_info
2949 enum vect_def_type dt
;
2950 HOST_WIDE_INT linear_step
;
2952 bool simd_lane_linear
;
2955 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2956 is linear within simd lane (but not within whole loop), note it in
2960 vect_simd_lane_linear (tree op
, struct loop
*loop
,
2961 struct simd_call_arg_info
*arginfo
)
2963 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
2965 if (!is_gimple_assign (def_stmt
)
2966 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
2967 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
2970 tree base
= gimple_assign_rhs1 (def_stmt
);
2971 HOST_WIDE_INT linear_step
= 0;
2972 tree v
= gimple_assign_rhs2 (def_stmt
);
2973 while (TREE_CODE (v
) == SSA_NAME
)
2976 def_stmt
= SSA_NAME_DEF_STMT (v
);
2977 if (is_gimple_assign (def_stmt
))
2978 switch (gimple_assign_rhs_code (def_stmt
))
2981 t
= gimple_assign_rhs2 (def_stmt
);
2982 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
2984 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
2985 v
= gimple_assign_rhs1 (def_stmt
);
2988 t
= gimple_assign_rhs2 (def_stmt
);
2989 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
2991 linear_step
= tree_to_shwi (t
);
2992 v
= gimple_assign_rhs1 (def_stmt
);
2995 t
= gimple_assign_rhs1 (def_stmt
);
2996 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
2997 || (TYPE_PRECISION (TREE_TYPE (v
))
2998 < TYPE_PRECISION (TREE_TYPE (t
))))
3007 else if (is_gimple_call (def_stmt
)
3008 && gimple_call_internal_p (def_stmt
)
3009 && gimple_call_internal_fn (def_stmt
) == IFN_GOMP_SIMD_LANE
3011 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3012 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3017 arginfo
->linear_step
= linear_step
;
3019 arginfo
->simd_lane_linear
= true;
3025 /* Function vectorizable_simd_clone_call.
3027 Check if STMT performs a function call that can be vectorized
3028 by calling a simd clone of the function.
3029 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3030 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3031 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3034 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3035 gimple
**vec_stmt
, slp_tree slp_node
)
3040 tree vec_oprnd0
= NULL_TREE
;
3041 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3043 unsigned int nunits
;
3044 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3045 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3046 vec_info
*vinfo
= stmt_info
->vinfo
;
3047 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3048 tree fndecl
, new_temp
;
3050 gimple
*new_stmt
= NULL
;
3052 auto_vec
<simd_call_arg_info
> arginfo
;
3053 vec
<tree
> vargs
= vNULL
;
3055 tree lhs
, rtype
, ratype
;
3056 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
3058 /* Is STMT a vectorizable call? */
3059 if (!is_gimple_call (stmt
))
3062 fndecl
= gimple_call_fndecl (stmt
);
3063 if (fndecl
== NULL_TREE
)
3066 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3067 if (node
== NULL
|| node
->simd_clones
== NULL
)
3070 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3073 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3077 if (gimple_call_lhs (stmt
)
3078 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3081 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3083 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3085 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
3092 /* Process function arguments. */
3093 nargs
= gimple_call_num_args (stmt
);
3095 /* Bail out if the function has zero arguments. */
3099 arginfo
.reserve (nargs
, true);
3101 for (i
= 0; i
< nargs
; i
++)
3103 simd_call_arg_info thisarginfo
;
3106 thisarginfo
.linear_step
= 0;
3107 thisarginfo
.align
= 0;
3108 thisarginfo
.op
= NULL_TREE
;
3109 thisarginfo
.simd_lane_linear
= false;
3111 op
= gimple_call_arg (stmt
, i
);
3112 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
3113 &thisarginfo
.vectype
)
3114 || thisarginfo
.dt
== vect_uninitialized_def
)
3116 if (dump_enabled_p ())
3117 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3118 "use not simple.\n");
3122 if (thisarginfo
.dt
== vect_constant_def
3123 || thisarginfo
.dt
== vect_external_def
)
3124 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3126 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3128 /* For linear arguments, the analyze phase should have saved
3129 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3130 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3131 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3133 gcc_assert (vec_stmt
);
3134 thisarginfo
.linear_step
3135 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3137 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3138 thisarginfo
.simd_lane_linear
3139 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3140 == boolean_true_node
);
3141 /* If loop has been peeled for alignment, we need to adjust it. */
3142 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3143 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3144 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3146 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3147 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3148 tree opt
= TREE_TYPE (thisarginfo
.op
);
3149 bias
= fold_convert (TREE_TYPE (step
), bias
);
3150 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3152 = fold_build2 (POINTER_TYPE_P (opt
)
3153 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3154 thisarginfo
.op
, bias
);
3158 && thisarginfo
.dt
!= vect_constant_def
3159 && thisarginfo
.dt
!= vect_external_def
3161 && TREE_CODE (op
) == SSA_NAME
3162 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3164 && tree_fits_shwi_p (iv
.step
))
3166 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3167 thisarginfo
.op
= iv
.base
;
3169 else if ((thisarginfo
.dt
== vect_constant_def
3170 || thisarginfo
.dt
== vect_external_def
)
3171 && POINTER_TYPE_P (TREE_TYPE (op
)))
3172 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3173 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3175 if (POINTER_TYPE_P (TREE_TYPE (op
))
3176 && !thisarginfo
.linear_step
3178 && thisarginfo
.dt
!= vect_constant_def
3179 && thisarginfo
.dt
!= vect_external_def
3182 && TREE_CODE (op
) == SSA_NAME
)
3183 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3185 arginfo
.quick_push (thisarginfo
);
3188 unsigned int badness
= 0;
3189 struct cgraph_node
*bestn
= NULL
;
3190 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3191 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3193 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3194 n
= n
->simdclone
->next_clone
)
3196 unsigned int this_badness
= 0;
3197 if (n
->simdclone
->simdlen
3198 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
3199 || n
->simdclone
->nargs
!= nargs
)
3201 if (n
->simdclone
->simdlen
3202 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3203 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3204 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3205 if (n
->simdclone
->inbranch
)
3206 this_badness
+= 2048;
3207 int target_badness
= targetm
.simd_clone
.usable (n
);
3208 if (target_badness
< 0)
3210 this_badness
+= target_badness
* 512;
3211 /* FORNOW: Have to add code to add the mask argument. */
3212 if (n
->simdclone
->inbranch
)
3214 for (i
= 0; i
< nargs
; i
++)
3216 switch (n
->simdclone
->args
[i
].arg_type
)
3218 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3219 if (!useless_type_conversion_p
3220 (n
->simdclone
->args
[i
].orig_type
,
3221 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3223 else if (arginfo
[i
].dt
== vect_constant_def
3224 || arginfo
[i
].dt
== vect_external_def
3225 || arginfo
[i
].linear_step
)
3228 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3229 if (arginfo
[i
].dt
!= vect_constant_def
3230 && arginfo
[i
].dt
!= vect_external_def
)
3233 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3234 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3235 if (arginfo
[i
].dt
== vect_constant_def
3236 || arginfo
[i
].dt
== vect_external_def
3237 || (arginfo
[i
].linear_step
3238 != n
->simdclone
->args
[i
].linear_step
))
3241 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3242 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3243 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3244 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3245 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3246 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3250 case SIMD_CLONE_ARG_TYPE_MASK
:
3253 if (i
== (size_t) -1)
3255 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3260 if (arginfo
[i
].align
)
3261 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3262 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3264 if (i
== (size_t) -1)
3266 if (bestn
== NULL
|| this_badness
< badness
)
3269 badness
= this_badness
;
3276 for (i
= 0; i
< nargs
; i
++)
3277 if ((arginfo
[i
].dt
== vect_constant_def
3278 || arginfo
[i
].dt
== vect_external_def
)
3279 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3282 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3284 if (arginfo
[i
].vectype
== NULL
3285 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3286 > bestn
->simdclone
->simdlen
))
3290 fndecl
= bestn
->decl
;
3291 nunits
= bestn
->simdclone
->simdlen
;
3292 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3294 /* If the function isn't const, only allow it in simd loops where user
3295 has asserted that at least nunits consecutive iterations can be
3296 performed using SIMD instructions. */
3297 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3298 && gimple_vuse (stmt
))
3301 /* Sanity check: make sure that at least one copy of the vectorized stmt
3302 needs to be generated. */
3303 gcc_assert (ncopies
>= 1);
3305 if (!vec_stmt
) /* transformation not required. */
3307 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3308 for (i
= 0; i
< nargs
; i
++)
3309 if ((bestn
->simdclone
->args
[i
].arg_type
3310 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3311 || (bestn
->simdclone
->args
[i
].arg_type
3312 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3314 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3316 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3317 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3318 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3319 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3320 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3321 tree sll
= arginfo
[i
].simd_lane_linear
3322 ? boolean_true_node
: boolean_false_node
;
3323 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3325 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3326 if (dump_enabled_p ())
3327 dump_printf_loc (MSG_NOTE
, vect_location
,
3328 "=== vectorizable_simd_clone_call ===\n");
3329 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3335 if (dump_enabled_p ())
3336 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3339 scalar_dest
= gimple_call_lhs (stmt
);
3340 vec_dest
= NULL_TREE
;
3345 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3346 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3347 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3350 rtype
= TREE_TYPE (ratype
);
3354 prev_stmt_info
= NULL
;
3355 for (j
= 0; j
< ncopies
; ++j
)
3357 /* Build argument list for the vectorized call. */
3359 vargs
.create (nargs
);
3363 for (i
= 0; i
< nargs
; i
++)
3365 unsigned int k
, l
, m
, o
;
3367 op
= gimple_call_arg (stmt
, i
);
3368 switch (bestn
->simdclone
->args
[i
].arg_type
)
3370 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3371 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3372 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
3373 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3375 if (TYPE_VECTOR_SUBPARTS (atype
)
3376 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
3378 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3379 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3380 / TYPE_VECTOR_SUBPARTS (atype
));
3381 gcc_assert ((k
& (k
- 1)) == 0);
3384 = vect_get_vec_def_for_operand (op
, stmt
);
3387 vec_oprnd0
= arginfo
[i
].op
;
3388 if ((m
& (k
- 1)) == 0)
3390 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3393 arginfo
[i
].op
= vec_oprnd0
;
3395 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3397 bitsize_int ((m
& (k
- 1)) * prec
));
3399 = gimple_build_assign (make_ssa_name (atype
),
3401 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3402 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3406 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3407 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3408 gcc_assert ((k
& (k
- 1)) == 0);
3409 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3411 vec_alloc (ctor_elts
, k
);
3414 for (l
= 0; l
< k
; l
++)
3416 if (m
== 0 && l
== 0)
3418 = vect_get_vec_def_for_operand (op
, stmt
);
3421 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3423 arginfo
[i
].op
= vec_oprnd0
;
3426 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3430 vargs
.safe_push (vec_oprnd0
);
3433 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3435 = gimple_build_assign (make_ssa_name (atype
),
3437 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3438 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3443 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3444 vargs
.safe_push (op
);
3446 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3447 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3452 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3457 edge pe
= loop_preheader_edge (loop
);
3458 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3459 gcc_assert (!new_bb
);
3461 if (arginfo
[i
].simd_lane_linear
)
3463 vargs
.safe_push (arginfo
[i
].op
);
3466 tree phi_res
= copy_ssa_name (op
);
3467 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3468 set_vinfo_for_stmt (new_phi
,
3469 new_stmt_vec_info (new_phi
, loop_vinfo
));
3470 add_phi_arg (new_phi
, arginfo
[i
].op
,
3471 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3473 = POINTER_TYPE_P (TREE_TYPE (op
))
3474 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3475 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3476 ? sizetype
: TREE_TYPE (op
);
3478 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3480 tree tcst
= wide_int_to_tree (type
, cst
);
3481 tree phi_arg
= copy_ssa_name (op
);
3483 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3484 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3485 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3486 set_vinfo_for_stmt (new_stmt
,
3487 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3488 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3490 arginfo
[i
].op
= phi_res
;
3491 vargs
.safe_push (phi_res
);
3496 = POINTER_TYPE_P (TREE_TYPE (op
))
3497 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3498 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3499 ? sizetype
: TREE_TYPE (op
);
3501 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3503 tree tcst
= wide_int_to_tree (type
, cst
);
3504 new_temp
= make_ssa_name (TREE_TYPE (op
));
3505 new_stmt
= gimple_build_assign (new_temp
, code
,
3506 arginfo
[i
].op
, tcst
);
3507 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3508 vargs
.safe_push (new_temp
);
3511 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3512 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3513 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3514 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3515 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3516 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3522 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3525 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3527 new_temp
= create_tmp_var (ratype
);
3528 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3529 == TYPE_VECTOR_SUBPARTS (rtype
))
3530 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3532 new_temp
= make_ssa_name (rtype
, new_stmt
);
3533 gimple_call_set_lhs (new_stmt
, new_temp
);
3535 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3539 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3542 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3543 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3544 gcc_assert ((k
& (k
- 1)) == 0);
3545 for (l
= 0; l
< k
; l
++)
3550 t
= build_fold_addr_expr (new_temp
);
3551 t
= build2 (MEM_REF
, vectype
, t
,
3552 build_int_cst (TREE_TYPE (t
),
3553 l
* prec
/ BITS_PER_UNIT
));
3556 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3557 size_int (prec
), bitsize_int (l
* prec
));
3559 = gimple_build_assign (make_ssa_name (vectype
), t
);
3560 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3561 if (j
== 0 && l
== 0)
3562 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3564 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3566 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3571 tree clobber
= build_constructor (ratype
, NULL
);
3572 TREE_THIS_VOLATILE (clobber
) = 1;
3573 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3574 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3578 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3580 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3581 / TYPE_VECTOR_SUBPARTS (rtype
));
3582 gcc_assert ((k
& (k
- 1)) == 0);
3583 if ((j
& (k
- 1)) == 0)
3584 vec_alloc (ret_ctor_elts
, k
);
3587 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3588 for (m
= 0; m
< o
; m
++)
3590 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3591 size_int (m
), NULL_TREE
, NULL_TREE
);
3593 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3594 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3595 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3596 gimple_assign_lhs (new_stmt
));
3598 tree clobber
= build_constructor (ratype
, NULL
);
3599 TREE_THIS_VOLATILE (clobber
) = 1;
3600 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3601 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3604 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3605 if ((j
& (k
- 1)) != k
- 1)
3607 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3609 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3610 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3612 if ((unsigned) j
== k
- 1)
3613 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3615 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3617 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3622 tree t
= build_fold_addr_expr (new_temp
);
3623 t
= build2 (MEM_REF
, vectype
, t
,
3624 build_int_cst (TREE_TYPE (t
), 0));
3626 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3627 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3628 tree clobber
= build_constructor (ratype
, NULL
);
3629 TREE_THIS_VOLATILE (clobber
) = 1;
3630 vect_finish_stmt_generation (stmt
,
3631 gimple_build_assign (new_temp
,
3637 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3639 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3641 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3646 /* The call in STMT might prevent it from being removed in dce.
3647 We however cannot remove it here, due to the way the ssa name
3648 it defines is mapped to the new definition. So just replace
3649 rhs of the statement with something harmless. */
3656 type
= TREE_TYPE (scalar_dest
);
3657 if (is_pattern_stmt_p (stmt_info
))
3658 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3660 lhs
= gimple_call_lhs (stmt
);
3661 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3664 new_stmt
= gimple_build_nop ();
3665 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3666 set_vinfo_for_stmt (stmt
, NULL
);
3667 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3668 gsi_replace (gsi
, new_stmt
, true);
3669 unlink_stmt_vdef (stmt
);
3675 /* Function vect_gen_widened_results_half
3677 Create a vector stmt whose code, type, number of arguments, and result
3678 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3679 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3680 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3681 needs to be created (DECL is a function-decl of a target-builtin).
3682 STMT is the original scalar stmt that we are vectorizing. */
3685 vect_gen_widened_results_half (enum tree_code code
,
3687 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3688 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3694 /* Generate half of the widened result: */
3695 if (code
== CALL_EXPR
)
3697 /* Target specific support */
3698 if (op_type
== binary_op
)
3699 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3701 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3702 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3703 gimple_call_set_lhs (new_stmt
, new_temp
);
3707 /* Generic support */
3708 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3709 if (op_type
!= binary_op
)
3711 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3712 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3713 gimple_assign_set_lhs (new_stmt
, new_temp
);
3715 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3721 /* Get vectorized definitions for loop-based vectorization. For the first
3722 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3723 scalar operand), and for the rest we get a copy with
3724 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3725 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3726 The vectors are collected into VEC_OPRNDS. */
3729 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
3730 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3734 /* Get first vector operand. */
3735 /* All the vector operands except the very first one (that is scalar oprnd)
3737 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3738 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
3740 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3742 vec_oprnds
->quick_push (vec_oprnd
);
3744 /* Get second vector operand. */
3745 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3746 vec_oprnds
->quick_push (vec_oprnd
);
3750 /* For conversion in multiple steps, continue to get operands
3753 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3757 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3758 For multi-step conversions store the resulting vectors and call the function
3762 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3763 int multi_step_cvt
, gimple
*stmt
,
3765 gimple_stmt_iterator
*gsi
,
3766 slp_tree slp_node
, enum tree_code code
,
3767 stmt_vec_info
*prev_stmt_info
)
3770 tree vop0
, vop1
, new_tmp
, vec_dest
;
3772 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3774 vec_dest
= vec_dsts
.pop ();
3776 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3778 /* Create demotion operation. */
3779 vop0
= (*vec_oprnds
)[i
];
3780 vop1
= (*vec_oprnds
)[i
+ 1];
3781 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3782 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3783 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3784 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3787 /* Store the resulting vector for next recursive call. */
3788 (*vec_oprnds
)[i
/2] = new_tmp
;
3791 /* This is the last step of the conversion sequence. Store the
3792 vectors in SLP_NODE or in vector info of the scalar statement
3793 (or in STMT_VINFO_RELATED_STMT chain). */
3795 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3798 if (!*prev_stmt_info
)
3799 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3801 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3803 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3808 /* For multi-step demotion operations we first generate demotion operations
3809 from the source type to the intermediate types, and then combine the
3810 results (stored in VEC_OPRNDS) in demotion operation to the destination
3814 /* At each level of recursion we have half of the operands we had at the
3816 vec_oprnds
->truncate ((i
+1)/2);
3817 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3818 stmt
, vec_dsts
, gsi
, slp_node
,
3819 VEC_PACK_TRUNC_EXPR
,
3823 vec_dsts
.quick_push (vec_dest
);
3827 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3828 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3829 the resulting vectors and call the function recursively. */
3832 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3833 vec
<tree
> *vec_oprnds1
,
3834 gimple
*stmt
, tree vec_dest
,
3835 gimple_stmt_iterator
*gsi
,
3836 enum tree_code code1
,
3837 enum tree_code code2
, tree decl1
,
3838 tree decl2
, int op_type
)
3841 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3842 gimple
*new_stmt1
, *new_stmt2
;
3843 vec
<tree
> vec_tmp
= vNULL
;
3845 vec_tmp
.create (vec_oprnds0
->length () * 2);
3846 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3848 if (op_type
== binary_op
)
3849 vop1
= (*vec_oprnds1
)[i
];
3853 /* Generate the two halves of promotion operation. */
3854 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3855 op_type
, vec_dest
, gsi
, stmt
);
3856 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3857 op_type
, vec_dest
, gsi
, stmt
);
3858 if (is_gimple_call (new_stmt1
))
3860 new_tmp1
= gimple_call_lhs (new_stmt1
);
3861 new_tmp2
= gimple_call_lhs (new_stmt2
);
3865 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3866 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3869 /* Store the results for the next step. */
3870 vec_tmp
.quick_push (new_tmp1
);
3871 vec_tmp
.quick_push (new_tmp2
);
3874 vec_oprnds0
->release ();
3875 *vec_oprnds0
= vec_tmp
;
3879 /* Check if STMT performs a conversion operation, that can be vectorized.
3880 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3881 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3882 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3885 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3886 gimple
**vec_stmt
, slp_tree slp_node
)
3890 tree op0
, op1
= NULL_TREE
;
3891 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3892 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3893 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3894 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3895 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3896 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3899 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3900 gimple
*new_stmt
= NULL
;
3901 stmt_vec_info prev_stmt_info
;
3904 tree vectype_out
, vectype_in
;
3906 tree lhs_type
, rhs_type
;
3907 enum { NARROW
, NONE
, WIDEN
} modifier
;
3908 vec
<tree
> vec_oprnds0
= vNULL
;
3909 vec
<tree
> vec_oprnds1
= vNULL
;
3911 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3912 vec_info
*vinfo
= stmt_info
->vinfo
;
3913 int multi_step_cvt
= 0;
3914 vec
<tree
> interm_types
= vNULL
;
3915 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3917 machine_mode rhs_mode
;
3918 unsigned short fltsz
;
3920 /* Is STMT a vectorizable conversion? */
3922 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3925 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3929 if (!is_gimple_assign (stmt
))
3932 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3935 code
= gimple_assign_rhs_code (stmt
);
3936 if (!CONVERT_EXPR_CODE_P (code
)
3937 && code
!= FIX_TRUNC_EXPR
3938 && code
!= FLOAT_EXPR
3939 && code
!= WIDEN_MULT_EXPR
3940 && code
!= WIDEN_LSHIFT_EXPR
)
3943 op_type
= TREE_CODE_LENGTH (code
);
3945 /* Check types of lhs and rhs. */
3946 scalar_dest
= gimple_assign_lhs (stmt
);
3947 lhs_type
= TREE_TYPE (scalar_dest
);
3948 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3950 op0
= gimple_assign_rhs1 (stmt
);
3951 rhs_type
= TREE_TYPE (op0
);
3953 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3954 && !((INTEGRAL_TYPE_P (lhs_type
)
3955 && INTEGRAL_TYPE_P (rhs_type
))
3956 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3957 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3960 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3961 && ((INTEGRAL_TYPE_P (lhs_type
)
3962 && (TYPE_PRECISION (lhs_type
)
3963 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3964 || (INTEGRAL_TYPE_P (rhs_type
)
3965 && (TYPE_PRECISION (rhs_type
)
3966 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
))))))
3968 if (dump_enabled_p ())
3969 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3970 "type conversion to/from bit-precision unsupported."
3975 /* Check the operands of the operation. */
3976 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
3978 if (dump_enabled_p ())
3979 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3980 "use not simple.\n");
3983 if (op_type
== binary_op
)
3987 op1
= gimple_assign_rhs2 (stmt
);
3988 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3989 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3991 if (CONSTANT_CLASS_P (op0
))
3992 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
3994 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
3998 if (dump_enabled_p ())
3999 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4000 "use not simple.\n");
4005 /* If op0 is an external or constant defs use a vector type of
4006 the same size as the output vector type. */
4008 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4010 gcc_assert (vectype_in
);
4013 if (dump_enabled_p ())
4015 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4016 "no vectype for scalar type ");
4017 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4018 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4024 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4025 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4027 if (dump_enabled_p ())
4029 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4030 "can't convert between boolean and non "
4032 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4033 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4039 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4040 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4041 if (nunits_in
< nunits_out
)
4043 else if (nunits_out
== nunits_in
)
4048 /* Multiple types in SLP are handled by creating the appropriate number of
4049 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4053 else if (modifier
== NARROW
)
4054 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
4056 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4058 /* Sanity check: make sure that at least one copy of the vectorized stmt
4059 needs to be generated. */
4060 gcc_assert (ncopies
>= 1);
4062 /* Supportable by target? */
4066 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4068 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4073 if (dump_enabled_p ())
4074 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4075 "conversion not supported by target.\n");
4079 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
4080 &code1
, &code2
, &multi_step_cvt
,
4083 /* Binary widening operation can only be supported directly by the
4085 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4089 if (code
!= FLOAT_EXPR
4090 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
4091 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
4094 rhs_mode
= TYPE_MODE (rhs_type
);
4095 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
4096 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
4097 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
4098 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
4101 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4102 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4103 if (cvt_type
== NULL_TREE
)
4106 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4108 if (!supportable_convert_operation (code
, vectype_out
,
4109 cvt_type
, &decl1
, &codecvt1
))
4112 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
4113 cvt_type
, &codecvt1
,
4114 &codecvt2
, &multi_step_cvt
,
4118 gcc_assert (multi_step_cvt
== 0);
4120 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
4121 vectype_in
, &code1
, &code2
,
4122 &multi_step_cvt
, &interm_types
))
4126 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
4129 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4130 codecvt2
= ERROR_MARK
;
4134 interm_types
.safe_push (cvt_type
);
4135 cvt_type
= NULL_TREE
;
4140 gcc_assert (op_type
== unary_op
);
4141 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4142 &code1
, &multi_step_cvt
,
4146 if (code
!= FIX_TRUNC_EXPR
4147 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
4148 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
4151 rhs_mode
= TYPE_MODE (rhs_type
);
4153 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4154 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4155 if (cvt_type
== NULL_TREE
)
4157 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4160 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4161 &code1
, &multi_step_cvt
,
4170 if (!vec_stmt
) /* transformation not required. */
4172 if (dump_enabled_p ())
4173 dump_printf_loc (MSG_NOTE
, vect_location
,
4174 "=== vectorizable_conversion ===\n");
4175 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4177 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4178 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4180 else if (modifier
== NARROW
)
4182 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4183 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4187 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4188 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4190 interm_types
.release ();
4195 if (dump_enabled_p ())
4196 dump_printf_loc (MSG_NOTE
, vect_location
,
4197 "transform conversion. ncopies = %d.\n", ncopies
);
4199 if (op_type
== binary_op
)
4201 if (CONSTANT_CLASS_P (op0
))
4202 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4203 else if (CONSTANT_CLASS_P (op1
))
4204 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4207 /* In case of multi-step conversion, we first generate conversion operations
4208 to the intermediate types, and then from that types to the final one.
4209 We create vector destinations for the intermediate type (TYPES) received
4210 from supportable_*_operation, and store them in the correct order
4211 for future use in vect_create_vectorized_*_stmts (). */
4212 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4213 vec_dest
= vect_create_destination_var (scalar_dest
,
4214 (cvt_type
&& modifier
== WIDEN
)
4215 ? cvt_type
: vectype_out
);
4216 vec_dsts
.quick_push (vec_dest
);
4220 for (i
= interm_types
.length () - 1;
4221 interm_types
.iterate (i
, &intermediate_type
); i
--)
4223 vec_dest
= vect_create_destination_var (scalar_dest
,
4225 vec_dsts
.quick_push (vec_dest
);
4230 vec_dest
= vect_create_destination_var (scalar_dest
,
4232 ? vectype_out
: cvt_type
);
4236 if (modifier
== WIDEN
)
4238 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4239 if (op_type
== binary_op
)
4240 vec_oprnds1
.create (1);
4242 else if (modifier
== NARROW
)
4243 vec_oprnds0
.create (
4244 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4246 else if (code
== WIDEN_LSHIFT_EXPR
)
4247 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4250 prev_stmt_info
= NULL
;
4254 for (j
= 0; j
< ncopies
; j
++)
4257 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
4260 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
4262 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4264 /* Arguments are ready, create the new vector stmt. */
4265 if (code1
== CALL_EXPR
)
4267 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4268 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4269 gimple_call_set_lhs (new_stmt
, new_temp
);
4273 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4274 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4275 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4276 gimple_assign_set_lhs (new_stmt
, new_temp
);
4279 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4281 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4284 if (!prev_stmt_info
)
4285 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4287 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4288 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4295 /* In case the vectorization factor (VF) is bigger than the number
4296 of elements that we can fit in a vectype (nunits), we have to
4297 generate more than one vector stmt - i.e - we need to "unroll"
4298 the vector stmt by a factor VF/nunits. */
4299 for (j
= 0; j
< ncopies
; j
++)
4306 if (code
== WIDEN_LSHIFT_EXPR
)
4311 /* Store vec_oprnd1 for every vector stmt to be created
4312 for SLP_NODE. We check during the analysis that all
4313 the shift arguments are the same. */
4314 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4315 vec_oprnds1
.quick_push (vec_oprnd1
);
4317 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4321 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4322 &vec_oprnds1
, slp_node
, -1);
4326 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4327 vec_oprnds0
.quick_push (vec_oprnd0
);
4328 if (op_type
== binary_op
)
4330 if (code
== WIDEN_LSHIFT_EXPR
)
4333 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4334 vec_oprnds1
.quick_push (vec_oprnd1
);
4340 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4341 vec_oprnds0
.truncate (0);
4342 vec_oprnds0
.quick_push (vec_oprnd0
);
4343 if (op_type
== binary_op
)
4345 if (code
== WIDEN_LSHIFT_EXPR
)
4348 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4350 vec_oprnds1
.truncate (0);
4351 vec_oprnds1
.quick_push (vec_oprnd1
);
4355 /* Arguments are ready. Create the new vector stmts. */
4356 for (i
= multi_step_cvt
; i
>= 0; i
--)
4358 tree this_dest
= vec_dsts
[i
];
4359 enum tree_code c1
= code1
, c2
= code2
;
4360 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4365 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4367 stmt
, this_dest
, gsi
,
4368 c1
, c2
, decl1
, decl2
,
4372 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4376 if (codecvt1
== CALL_EXPR
)
4378 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4379 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4380 gimple_call_set_lhs (new_stmt
, new_temp
);
4384 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4385 new_temp
= make_ssa_name (vec_dest
);
4386 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4390 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4393 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4396 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4399 if (!prev_stmt_info
)
4400 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4402 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4403 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4408 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4412 /* In case the vectorization factor (VF) is bigger than the number
4413 of elements that we can fit in a vectype (nunits), we have to
4414 generate more than one vector stmt - i.e - we need to "unroll"
4415 the vector stmt by a factor VF/nunits. */
4416 for (j
= 0; j
< ncopies
; j
++)
4420 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4424 vec_oprnds0
.truncate (0);
4425 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4426 vect_pow2 (multi_step_cvt
) - 1);
4429 /* Arguments are ready. Create the new vector stmts. */
4431 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4433 if (codecvt1
== CALL_EXPR
)
4435 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4436 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4437 gimple_call_set_lhs (new_stmt
, new_temp
);
4441 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4442 new_temp
= make_ssa_name (vec_dest
);
4443 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4447 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4448 vec_oprnds0
[i
] = new_temp
;
4451 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4452 stmt
, vec_dsts
, gsi
,
4457 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4461 vec_oprnds0
.release ();
4462 vec_oprnds1
.release ();
4463 interm_types
.release ();
4469 /* Function vectorizable_assignment.
4471 Check if STMT performs an assignment (copy) that can be vectorized.
4472 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4473 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4474 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4477 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4478 gimple
**vec_stmt
, slp_tree slp_node
)
4483 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4484 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4487 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4490 vec
<tree
> vec_oprnds
= vNULL
;
4492 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4493 vec_info
*vinfo
= stmt_info
->vinfo
;
4494 gimple
*new_stmt
= NULL
;
4495 stmt_vec_info prev_stmt_info
= NULL
;
4496 enum tree_code code
;
4499 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4502 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4506 /* Is vectorizable assignment? */
4507 if (!is_gimple_assign (stmt
))
4510 scalar_dest
= gimple_assign_lhs (stmt
);
4511 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4514 code
= gimple_assign_rhs_code (stmt
);
4515 if (gimple_assign_single_p (stmt
)
4516 || code
== PAREN_EXPR
4517 || CONVERT_EXPR_CODE_P (code
))
4518 op
= gimple_assign_rhs1 (stmt
);
4522 if (code
== VIEW_CONVERT_EXPR
)
4523 op
= TREE_OPERAND (op
, 0);
4525 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4526 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4528 /* Multiple types in SLP are handled by creating the appropriate number of
4529 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4534 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4536 gcc_assert (ncopies
>= 1);
4538 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4540 if (dump_enabled_p ())
4541 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4542 "use not simple.\n");
4546 /* We can handle NOP_EXPR conversions that do not change the number
4547 of elements or the vector size. */
4548 if ((CONVERT_EXPR_CODE_P (code
)
4549 || code
== VIEW_CONVERT_EXPR
)
4551 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4552 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4553 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4556 /* We do not handle bit-precision changes. */
4557 if ((CONVERT_EXPR_CODE_P (code
)
4558 || code
== VIEW_CONVERT_EXPR
)
4559 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4560 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4561 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4562 || ((TYPE_PRECISION (TREE_TYPE (op
))
4563 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4564 /* But a conversion that does not change the bit-pattern is ok. */
4565 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4566 > TYPE_PRECISION (TREE_TYPE (op
)))
4567 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4568 /* Conversion between boolean types of different sizes is
4569 a simple assignment in case their vectypes are same
4571 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4572 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4574 if (dump_enabled_p ())
4575 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4576 "type conversion to/from bit-precision "
4581 if (!vec_stmt
) /* transformation not required. */
4583 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4584 if (dump_enabled_p ())
4585 dump_printf_loc (MSG_NOTE
, vect_location
,
4586 "=== vectorizable_assignment ===\n");
4587 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4592 if (dump_enabled_p ())
4593 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4596 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4599 for (j
= 0; j
< ncopies
; j
++)
4603 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4605 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4607 /* Arguments are ready. create the new vector stmt. */
4608 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4610 if (CONVERT_EXPR_CODE_P (code
)
4611 || code
== VIEW_CONVERT_EXPR
)
4612 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4613 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4614 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4615 gimple_assign_set_lhs (new_stmt
, new_temp
);
4616 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4618 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4625 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4627 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4629 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4632 vec_oprnds
.release ();
4637 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4638 either as shift by a scalar or by a vector. */
4641 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4644 machine_mode vec_mode
;
4649 vectype
= get_vectype_for_scalar_type (scalar_type
);
4653 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4655 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4657 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4659 || (optab_handler (optab
, TYPE_MODE (vectype
))
4660 == CODE_FOR_nothing
))
4664 vec_mode
= TYPE_MODE (vectype
);
4665 icode
= (int) optab_handler (optab
, vec_mode
);
4666 if (icode
== CODE_FOR_nothing
)
4673 /* Function vectorizable_shift.
4675 Check if STMT performs a shift operation that can be vectorized.
4676 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4677 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4678 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4681 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4682 gimple
**vec_stmt
, slp_tree slp_node
)
4686 tree op0
, op1
= NULL
;
4687 tree vec_oprnd1
= NULL_TREE
;
4688 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4690 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4691 enum tree_code code
;
4692 machine_mode vec_mode
;
4696 machine_mode optab_op2_mode
;
4698 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4699 gimple
*new_stmt
= NULL
;
4700 stmt_vec_info prev_stmt_info
;
4707 vec
<tree
> vec_oprnds0
= vNULL
;
4708 vec
<tree
> vec_oprnds1
= vNULL
;
4711 bool scalar_shift_arg
= true;
4712 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4713 vec_info
*vinfo
= stmt_info
->vinfo
;
4716 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4719 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4723 /* Is STMT a vectorizable binary/unary operation? */
4724 if (!is_gimple_assign (stmt
))
4727 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4730 code
= gimple_assign_rhs_code (stmt
);
4732 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4733 || code
== RROTATE_EXPR
))
4736 scalar_dest
= gimple_assign_lhs (stmt
);
4737 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4738 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4739 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4741 if (dump_enabled_p ())
4742 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4743 "bit-precision shifts not supported.\n");
4747 op0
= gimple_assign_rhs1 (stmt
);
4748 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4750 if (dump_enabled_p ())
4751 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4752 "use not simple.\n");
4755 /* If op0 is an external or constant def use a vector type with
4756 the same size as the output vector type. */
4758 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4760 gcc_assert (vectype
);
4763 if (dump_enabled_p ())
4764 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4765 "no vectype for scalar type\n");
4769 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4770 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4771 if (nunits_out
!= nunits_in
)
4774 op1
= gimple_assign_rhs2 (stmt
);
4775 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
4777 if (dump_enabled_p ())
4778 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4779 "use not simple.\n");
4784 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4788 /* Multiple types in SLP are handled by creating the appropriate number of
4789 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4794 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4796 gcc_assert (ncopies
>= 1);
4798 /* Determine whether the shift amount is a vector, or scalar. If the
4799 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4801 if ((dt
[1] == vect_internal_def
4802 || dt
[1] == vect_induction_def
)
4804 scalar_shift_arg
= false;
4805 else if (dt
[1] == vect_constant_def
4806 || dt
[1] == vect_external_def
4807 || dt
[1] == vect_internal_def
)
4809 /* In SLP, need to check whether the shift count is the same,
4810 in loops if it is a constant or invariant, it is always
4814 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4817 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4818 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4819 scalar_shift_arg
= false;
4822 /* If the shift amount is computed by a pattern stmt we cannot
4823 use the scalar amount directly thus give up and use a vector
4825 if (dt
[1] == vect_internal_def
)
4827 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
4828 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
4829 scalar_shift_arg
= false;
4834 if (dump_enabled_p ())
4835 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4836 "operand mode requires invariant argument.\n");
4840 /* Vector shifted by vector. */
4841 if (!scalar_shift_arg
)
4843 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4844 if (dump_enabled_p ())
4845 dump_printf_loc (MSG_NOTE
, vect_location
,
4846 "vector/vector shift/rotate found.\n");
4849 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4850 if (op1_vectype
== NULL_TREE
4851 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4853 if (dump_enabled_p ())
4854 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4855 "unusable type for last operand in"
4856 " vector/vector shift/rotate.\n");
4860 /* See if the machine has a vector shifted by scalar insn and if not
4861 then see if it has a vector shifted by vector insn. */
4864 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4866 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4868 if (dump_enabled_p ())
4869 dump_printf_loc (MSG_NOTE
, vect_location
,
4870 "vector/scalar shift/rotate found.\n");
4874 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4876 && (optab_handler (optab
, TYPE_MODE (vectype
))
4877 != CODE_FOR_nothing
))
4879 scalar_shift_arg
= false;
4881 if (dump_enabled_p ())
4882 dump_printf_loc (MSG_NOTE
, vect_location
,
4883 "vector/vector shift/rotate found.\n");
4885 /* Unlike the other binary operators, shifts/rotates have
4886 the rhs being int, instead of the same type as the lhs,
4887 so make sure the scalar is the right type if we are
4888 dealing with vectors of long long/long/short/char. */
4889 if (dt
[1] == vect_constant_def
)
4890 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4891 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4895 && TYPE_MODE (TREE_TYPE (vectype
))
4896 != TYPE_MODE (TREE_TYPE (op1
)))
4898 if (dump_enabled_p ())
4899 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4900 "unusable type for last operand in"
4901 " vector/vector shift/rotate.\n");
4904 if (vec_stmt
&& !slp_node
)
4906 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4907 op1
= vect_init_vector (stmt
, op1
,
4908 TREE_TYPE (vectype
), NULL
);
4915 /* Supportable by target? */
4918 if (dump_enabled_p ())
4919 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4923 vec_mode
= TYPE_MODE (vectype
);
4924 icode
= (int) optab_handler (optab
, vec_mode
);
4925 if (icode
== CODE_FOR_nothing
)
4927 if (dump_enabled_p ())
4928 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4929 "op not supported by target.\n");
4930 /* Check only during analysis. */
4931 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4932 || (vf
< vect_min_worthwhile_factor (code
)
4935 if (dump_enabled_p ())
4936 dump_printf_loc (MSG_NOTE
, vect_location
,
4937 "proceeding using word mode.\n");
4940 /* Worthwhile without SIMD support? Check only during analysis. */
4941 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4942 && vf
< vect_min_worthwhile_factor (code
)
4945 if (dump_enabled_p ())
4946 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4947 "not worthwhile without SIMD support.\n");
4951 if (!vec_stmt
) /* transformation not required. */
4953 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4954 if (dump_enabled_p ())
4955 dump_printf_loc (MSG_NOTE
, vect_location
,
4956 "=== vectorizable_shift ===\n");
4957 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4963 if (dump_enabled_p ())
4964 dump_printf_loc (MSG_NOTE
, vect_location
,
4965 "transform binary/unary operation.\n");
4968 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4970 prev_stmt_info
= NULL
;
4971 for (j
= 0; j
< ncopies
; j
++)
4976 if (scalar_shift_arg
)
4978 /* Vector shl and shr insn patterns can be defined with scalar
4979 operand 2 (shift operand). In this case, use constant or loop
4980 invariant op1 directly, without extending it to vector mode
4982 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4983 if (!VECTOR_MODE_P (optab_op2_mode
))
4985 if (dump_enabled_p ())
4986 dump_printf_loc (MSG_NOTE
, vect_location
,
4987 "operand 1 using scalar mode.\n");
4989 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4990 vec_oprnds1
.quick_push (vec_oprnd1
);
4993 /* Store vec_oprnd1 for every vector stmt to be created
4994 for SLP_NODE. We check during the analysis that all
4995 the shift arguments are the same.
4996 TODO: Allow different constants for different vector
4997 stmts generated for an SLP instance. */
4998 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4999 vec_oprnds1
.quick_push (vec_oprnd1
);
5004 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5005 (a special case for certain kind of vector shifts); otherwise,
5006 operand 1 should be of a vector type (the usual case). */
5008 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5011 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5015 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5017 /* Arguments are ready. Create the new vector stmt. */
5018 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5020 vop1
= vec_oprnds1
[i
];
5021 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5022 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5023 gimple_assign_set_lhs (new_stmt
, new_temp
);
5024 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5026 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5033 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5035 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5036 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5039 vec_oprnds0
.release ();
5040 vec_oprnds1
.release ();
5046 /* Function vectorizable_operation.
5048 Check if STMT performs a binary, unary or ternary operation that can
5050 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5051 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5052 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5055 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5056 gimple
**vec_stmt
, slp_tree slp_node
)
5060 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5061 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5063 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5064 enum tree_code code
;
5065 machine_mode vec_mode
;
5069 bool target_support_p
;
5071 enum vect_def_type dt
[3]
5072 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5073 gimple
*new_stmt
= NULL
;
5074 stmt_vec_info prev_stmt_info
;
5080 vec
<tree
> vec_oprnds0
= vNULL
;
5081 vec
<tree
> vec_oprnds1
= vNULL
;
5082 vec
<tree
> vec_oprnds2
= vNULL
;
5083 tree vop0
, vop1
, vop2
;
5084 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5085 vec_info
*vinfo
= stmt_info
->vinfo
;
5088 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5091 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5095 /* Is STMT a vectorizable binary/unary operation? */
5096 if (!is_gimple_assign (stmt
))
5099 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5102 code
= gimple_assign_rhs_code (stmt
);
5104 /* For pointer addition, we should use the normal plus for
5105 the vector addition. */
5106 if (code
== POINTER_PLUS_EXPR
)
5109 /* Support only unary or binary operations. */
5110 op_type
= TREE_CODE_LENGTH (code
);
5111 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5113 if (dump_enabled_p ())
5114 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5115 "num. args = %d (not unary/binary/ternary op).\n",
5120 scalar_dest
= gimple_assign_lhs (stmt
);
5121 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5123 /* Most operations cannot handle bit-precision types without extra
5125 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5126 && (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5127 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
5128 /* Exception are bitwise binary operations. */
5129 && code
!= BIT_IOR_EXPR
5130 && code
!= BIT_XOR_EXPR
5131 && code
!= BIT_AND_EXPR
)
5133 if (dump_enabled_p ())
5134 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5135 "bit-precision arithmetic not supported.\n");
5139 op0
= gimple_assign_rhs1 (stmt
);
5140 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
5142 if (dump_enabled_p ())
5143 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5144 "use not simple.\n");
5147 /* If op0 is an external or constant def use a vector type with
5148 the same size as the output vector type. */
5151 /* For boolean type we cannot determine vectype by
5152 invariant value (don't know whether it is a vector
5153 of booleans or vector of integers). We use output
5154 vectype because operations on boolean don't change
5156 if (TREE_CODE (TREE_TYPE (op0
)) == BOOLEAN_TYPE
)
5158 if (TREE_CODE (TREE_TYPE (scalar_dest
)) != BOOLEAN_TYPE
)
5160 if (dump_enabled_p ())
5161 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5162 "not supported operation on bool value.\n");
5165 vectype
= vectype_out
;
5168 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5171 gcc_assert (vectype
);
5174 if (dump_enabled_p ())
5176 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5177 "no vectype for scalar type ");
5178 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5180 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5186 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5187 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5188 if (nunits_out
!= nunits_in
)
5191 if (op_type
== binary_op
|| op_type
== ternary_op
)
5193 op1
= gimple_assign_rhs2 (stmt
);
5194 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
5196 if (dump_enabled_p ())
5197 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5198 "use not simple.\n");
5202 if (op_type
== ternary_op
)
5204 op2
= gimple_assign_rhs3 (stmt
);
5205 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
5207 if (dump_enabled_p ())
5208 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5209 "use not simple.\n");
5215 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5219 /* Multiple types in SLP are handled by creating the appropriate number of
5220 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5225 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
5227 gcc_assert (ncopies
>= 1);
5229 /* Shifts are handled in vectorizable_shift (). */
5230 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5231 || code
== RROTATE_EXPR
)
5234 /* Supportable by target? */
5236 vec_mode
= TYPE_MODE (vectype
);
5237 if (code
== MULT_HIGHPART_EXPR
)
5238 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5241 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5244 if (dump_enabled_p ())
5245 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5249 target_support_p
= (optab_handler (optab
, vec_mode
)
5250 != CODE_FOR_nothing
);
5253 if (!target_support_p
)
5255 if (dump_enabled_p ())
5256 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5257 "op not supported by target.\n");
5258 /* Check only during analysis. */
5259 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5260 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
5262 if (dump_enabled_p ())
5263 dump_printf_loc (MSG_NOTE
, vect_location
,
5264 "proceeding using word mode.\n");
5267 /* Worthwhile without SIMD support? Check only during analysis. */
5268 if (!VECTOR_MODE_P (vec_mode
)
5270 && vf
< vect_min_worthwhile_factor (code
))
5272 if (dump_enabled_p ())
5273 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5274 "not worthwhile without SIMD support.\n");
5278 if (!vec_stmt
) /* transformation not required. */
5280 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5281 if (dump_enabled_p ())
5282 dump_printf_loc (MSG_NOTE
, vect_location
,
5283 "=== vectorizable_operation ===\n");
5284 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
5290 if (dump_enabled_p ())
5291 dump_printf_loc (MSG_NOTE
, vect_location
,
5292 "transform binary/unary operation.\n");
5295 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5297 /* In case the vectorization factor (VF) is bigger than the number
5298 of elements that we can fit in a vectype (nunits), we have to generate
5299 more than one vector stmt - i.e - we need to "unroll" the
5300 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5301 from one copy of the vector stmt to the next, in the field
5302 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5303 stages to find the correct vector defs to be used when vectorizing
5304 stmts that use the defs of the current stmt. The example below
5305 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5306 we need to create 4 vectorized stmts):
5308 before vectorization:
5309 RELATED_STMT VEC_STMT
5313 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5315 RELATED_STMT VEC_STMT
5316 VS1_0: vx0 = memref0 VS1_1 -
5317 VS1_1: vx1 = memref1 VS1_2 -
5318 VS1_2: vx2 = memref2 VS1_3 -
5319 VS1_3: vx3 = memref3 - -
5320 S1: x = load - VS1_0
5323 step2: vectorize stmt S2 (done here):
5324 To vectorize stmt S2 we first need to find the relevant vector
5325 def for the first operand 'x'. This is, as usual, obtained from
5326 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5327 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5328 relevant vector def 'vx0'. Having found 'vx0' we can generate
5329 the vector stmt VS2_0, and as usual, record it in the
5330 STMT_VINFO_VEC_STMT of stmt S2.
5331 When creating the second copy (VS2_1), we obtain the relevant vector
5332 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5333 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5334 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5335 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5336 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5337 chain of stmts and pointers:
5338 RELATED_STMT VEC_STMT
5339 VS1_0: vx0 = memref0 VS1_1 -
5340 VS1_1: vx1 = memref1 VS1_2 -
5341 VS1_2: vx2 = memref2 VS1_3 -
5342 VS1_3: vx3 = memref3 - -
5343 S1: x = load - VS1_0
5344 VS2_0: vz0 = vx0 + v1 VS2_1 -
5345 VS2_1: vz1 = vx1 + v1 VS2_2 -
5346 VS2_2: vz2 = vx2 + v1 VS2_3 -
5347 VS2_3: vz3 = vx3 + v1 - -
5348 S2: z = x + 1 - VS2_0 */
5350 prev_stmt_info
= NULL
;
5351 for (j
= 0; j
< ncopies
; j
++)
5356 if (op_type
== binary_op
|| op_type
== ternary_op
)
5357 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5360 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5362 if (op_type
== ternary_op
)
5363 vect_get_vec_defs (op2
, NULL_TREE
, stmt
, &vec_oprnds2
, NULL
,
5368 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5369 if (op_type
== ternary_op
)
5371 tree vec_oprnd
= vec_oprnds2
.pop ();
5372 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5377 /* Arguments are ready. Create the new vector stmt. */
5378 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5380 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5381 ? vec_oprnds1
[i
] : NULL_TREE
);
5382 vop2
= ((op_type
== ternary_op
)
5383 ? vec_oprnds2
[i
] : NULL_TREE
);
5384 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5385 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5386 gimple_assign_set_lhs (new_stmt
, new_temp
);
5387 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5389 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5396 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5398 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5399 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5402 vec_oprnds0
.release ();
5403 vec_oprnds1
.release ();
5404 vec_oprnds2
.release ();
5409 /* A helper function to ensure data reference DR's base alignment
5413 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
5418 if (DR_VECT_AUX (dr
)->base_misaligned
)
5420 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5421 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5423 if (decl_in_symtab_p (base_decl
))
5424 symtab_node::get (base_decl
)->increase_alignment (TYPE_ALIGN (vectype
));
5427 SET_DECL_ALIGN (base_decl
, TYPE_ALIGN (vectype
));
5428 DECL_USER_ALIGN (base_decl
) = 1;
5430 DR_VECT_AUX (dr
)->base_misaligned
= false;
5435 /* Function vectorizable_store.
5437 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5439 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5440 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5441 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5444 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5450 tree vec_oprnd
= NULL_TREE
;
5451 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5452 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5454 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5455 struct loop
*loop
= NULL
;
5456 machine_mode vec_mode
;
5458 enum dr_alignment_support alignment_support_scheme
;
5460 enum vect_def_type dt
;
5461 stmt_vec_info prev_stmt_info
= NULL
;
5462 tree dataref_ptr
= NULL_TREE
;
5463 tree dataref_offset
= NULL_TREE
;
5464 gimple
*ptr_incr
= NULL
;
5467 gimple
*next_stmt
, *first_stmt
;
5469 unsigned int group_size
, i
;
5470 vec
<tree
> oprnds
= vNULL
;
5471 vec
<tree
> result_chain
= vNULL
;
5473 tree offset
= NULL_TREE
;
5474 vec
<tree
> vec_oprnds
= vNULL
;
5475 bool slp
= (slp_node
!= NULL
);
5476 unsigned int vec_num
;
5477 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5478 vec_info
*vinfo
= stmt_info
->vinfo
;
5480 gather_scatter_info gs_info
;
5481 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5484 vec_load_store_type vls_type
;
5486 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5489 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5493 /* Is vectorizable store? */
5495 if (!is_gimple_assign (stmt
))
5498 scalar_dest
= gimple_assign_lhs (stmt
);
5499 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5500 && is_pattern_stmt_p (stmt_info
))
5501 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5502 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5503 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5504 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5505 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5506 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5507 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5508 && TREE_CODE (scalar_dest
) != MEM_REF
)
5511 /* Cannot have hybrid store SLP -- that would mean storing to the
5512 same location twice. */
5513 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
5515 gcc_assert (gimple_assign_single_p (stmt
));
5517 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
5518 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5522 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5523 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5528 /* Multiple types in SLP are handled by creating the appropriate number of
5529 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5534 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5536 gcc_assert (ncopies
>= 1);
5538 /* FORNOW. This restriction should be relaxed. */
5539 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5541 if (dump_enabled_p ())
5542 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5543 "multiple types in nested loop.\n");
5547 op
= gimple_assign_rhs1 (stmt
);
5549 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
5551 if (dump_enabled_p ())
5552 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5553 "use not simple.\n");
5557 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
5558 vls_type
= VLS_STORE_INVARIANT
;
5560 vls_type
= VLS_STORE
;
5562 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
5565 elem_type
= TREE_TYPE (vectype
);
5566 vec_mode
= TYPE_MODE (vectype
);
5568 /* FORNOW. In some cases can vectorize even if data-type not supported
5569 (e.g. - array initialization with 0). */
5570 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5573 if (!STMT_VINFO_DATA_REF (stmt_info
))
5576 vect_memory_access_type memory_access_type
;
5577 if (!get_load_store_type (stmt
, vectype
, slp
, vls_type
, ncopies
,
5578 &memory_access_type
, &gs_info
))
5581 if (!vec_stmt
) /* transformation not required. */
5583 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
5584 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5585 /* The SLP costs are calculated during SLP analysis. */
5586 if (!PURE_SLP_STMT (stmt_info
))
5587 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
, dt
,
5591 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
5595 ensure_base_align (stmt_info
, dr
);
5597 if (memory_access_type
== VMAT_GATHER_SCATTER
)
5599 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, op
, src
;
5600 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
5601 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5602 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5603 edge pe
= loop_preheader_edge (loop
);
5606 enum { NARROW
, NONE
, WIDEN
} modifier
;
5607 int scatter_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
5609 if (nunits
== (unsigned int) scatter_off_nunits
)
5611 else if (nunits
== (unsigned int) scatter_off_nunits
/ 2)
5613 unsigned char *sel
= XALLOCAVEC (unsigned char, scatter_off_nunits
);
5616 for (i
= 0; i
< (unsigned int) scatter_off_nunits
; ++i
)
5617 sel
[i
] = i
| nunits
;
5619 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
5620 gcc_assert (perm_mask
!= NULL_TREE
);
5622 else if (nunits
== (unsigned int) scatter_off_nunits
* 2)
5624 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5627 for (i
= 0; i
< (unsigned int) nunits
; ++i
)
5628 sel
[i
] = i
| scatter_off_nunits
;
5630 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
5631 gcc_assert (perm_mask
!= NULL_TREE
);
5637 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
5638 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5639 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5640 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5641 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5642 scaletype
= TREE_VALUE (arglist
);
5644 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
5645 && TREE_CODE (rettype
) == VOID_TYPE
);
5647 ptr
= fold_convert (ptrtype
, gs_info
.base
);
5648 if (!is_gimple_min_invariant (ptr
))
5650 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5651 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5652 gcc_assert (!new_bb
);
5655 /* Currently we support only unconditional scatter stores,
5656 so mask should be all ones. */
5657 mask
= build_int_cst (masktype
, -1);
5658 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5660 scale
= build_int_cst (scaletype
, gs_info
.scale
);
5662 prev_stmt_info
= NULL
;
5663 for (j
= 0; j
< ncopies
; ++j
)
5668 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt
), stmt
);
5670 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
5672 else if (modifier
!= NONE
&& (j
& 1))
5674 if (modifier
== WIDEN
)
5677 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5678 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
5681 else if (modifier
== NARROW
)
5683 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
5686 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5695 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5697 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5701 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
5703 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
))
5704 == TYPE_VECTOR_SUBPARTS (srctype
));
5705 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
5706 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
5707 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
5708 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5712 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5714 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5715 == TYPE_VECTOR_SUBPARTS (idxtype
));
5716 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
5717 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5718 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5719 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5724 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
5726 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5728 if (prev_stmt_info
== NULL
)
5729 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5731 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5732 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5737 grouped_store
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
5740 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5741 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5742 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5744 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5747 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5749 /* We vectorize all the stmts of the interleaving group when we
5750 reach the last stmt in the group. */
5751 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5752 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5761 grouped_store
= false;
5762 /* VEC_NUM is the number of vect stmts to be created for this
5764 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5765 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5766 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
5767 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5768 op
= gimple_assign_rhs1 (first_stmt
);
5771 /* VEC_NUM is the number of vect stmts to be created for this
5773 vec_num
= group_size
;
5779 group_size
= vec_num
= 1;
5782 if (dump_enabled_p ())
5783 dump_printf_loc (MSG_NOTE
, vect_location
,
5784 "transform store. ncopies = %d\n", ncopies
);
5786 if (memory_access_type
== VMAT_ELEMENTWISE
5787 || memory_access_type
== VMAT_STRIDED_SLP
)
5789 gimple_stmt_iterator incr_gsi
;
5795 gimple_seq stmts
= NULL
;
5796 tree stride_base
, stride_step
, alias_off
;
5800 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5803 = fold_build_pointer_plus
5804 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
5805 size_binop (PLUS_EXPR
,
5806 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
5807 convert_to_ptrofftype (DR_INIT(first_dr
))));
5808 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
5810 /* For a store with loop-invariant (but other than power-of-2)
5811 stride (i.e. not a grouped access) like so:
5813 for (i = 0; i < n; i += stride)
5816 we generate a new induction variable and new stores from
5817 the components of the (vectorized) rhs:
5819 for (j = 0; ; j += VF*stride)
5824 array[j + stride] = tmp2;
5828 unsigned nstores
= nunits
;
5830 tree ltype
= elem_type
;
5833 if (group_size
< nunits
5834 && nunits
% group_size
== 0)
5836 nstores
= nunits
/ group_size
;
5838 ltype
= build_vector_type (elem_type
, group_size
);
5840 else if (group_size
>= nunits
5841 && group_size
% nunits
== 0)
5847 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
5848 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5851 ivstep
= stride_step
;
5852 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
5853 build_int_cst (TREE_TYPE (ivstep
), vf
));
5855 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
5857 create_iv (stride_base
, ivstep
, NULL
,
5858 loop
, &incr_gsi
, insert_after
,
5860 incr
= gsi_stmt (incr_gsi
);
5861 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
5863 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
5865 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
5867 prev_stmt_info
= NULL
;
5868 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
5869 next_stmt
= first_stmt
;
5870 for (g
= 0; g
< group_size
; g
++)
5872 running_off
= offvar
;
5875 tree size
= TYPE_SIZE_UNIT (ltype
);
5876 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
5878 tree newoff
= copy_ssa_name (running_off
, NULL
);
5879 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5881 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5882 running_off
= newoff
;
5884 unsigned int group_el
= 0;
5885 unsigned HOST_WIDE_INT
5886 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
5887 for (j
= 0; j
< ncopies
; j
++)
5889 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5890 and first_stmt == stmt. */
5895 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
5897 vec_oprnd
= vec_oprnds
[0];
5901 gcc_assert (gimple_assign_single_p (next_stmt
));
5902 op
= gimple_assign_rhs1 (next_stmt
);
5903 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
5909 vec_oprnd
= vec_oprnds
[j
];
5912 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
5913 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
5917 for (i
= 0; i
< nstores
; i
++)
5919 tree newref
, newoff
;
5920 gimple
*incr
, *assign
;
5921 tree size
= TYPE_SIZE (ltype
);
5922 /* Extract the i'th component. */
5923 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
5924 bitsize_int (i
), size
);
5925 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
5928 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
5932 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
5934 newref
= build2 (MEM_REF
, ltype
,
5935 running_off
, this_off
);
5937 /* And store it to *running_off. */
5938 assign
= gimple_build_assign (newref
, elem
);
5939 vect_finish_stmt_generation (stmt
, assign
, gsi
);
5943 || group_el
== group_size
)
5945 newoff
= copy_ssa_name (running_off
, NULL
);
5946 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5947 running_off
, stride_step
);
5948 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5950 running_off
= newoff
;
5953 if (g
== group_size
- 1
5956 if (j
== 0 && i
== 0)
5957 STMT_VINFO_VEC_STMT (stmt_info
)
5958 = *vec_stmt
= assign
;
5960 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
5961 prev_stmt_info
= vinfo_for_stmt (assign
);
5965 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5972 auto_vec
<tree
> dr_chain (group_size
);
5973 oprnds
.create (group_size
);
5975 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5976 gcc_assert (alignment_support_scheme
);
5977 /* Targets with store-lane instructions must not require explicit
5979 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
5980 || alignment_support_scheme
== dr_aligned
5981 || alignment_support_scheme
== dr_unaligned_supported
);
5983 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
5984 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
5985 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5987 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
5988 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5990 aggr_type
= vectype
;
5992 /* In case the vectorization factor (VF) is bigger than the number
5993 of elements that we can fit in a vectype (nunits), we have to generate
5994 more than one vector stmt - i.e - we need to "unroll" the
5995 vector stmt by a factor VF/nunits. For more details see documentation in
5996 vect_get_vec_def_for_copy_stmt. */
5998 /* In case of interleaving (non-unit grouped access):
6005 We create vectorized stores starting from base address (the access of the
6006 first stmt in the chain (S2 in the above example), when the last store stmt
6007 of the chain (S4) is reached:
6010 VS2: &base + vec_size*1 = vx0
6011 VS3: &base + vec_size*2 = vx1
6012 VS4: &base + vec_size*3 = vx3
6014 Then permutation statements are generated:
6016 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6017 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6020 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6021 (the order of the data-refs in the output of vect_permute_store_chain
6022 corresponds to the order of scalar stmts in the interleaving chain - see
6023 the documentation of vect_permute_store_chain()).
6025 In case of both multiple types and interleaving, above vector stores and
6026 permutation stmts are created for every copy. The result vector stmts are
6027 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6028 STMT_VINFO_RELATED_STMT for the next copies.
6031 prev_stmt_info
= NULL
;
6032 for (j
= 0; j
< ncopies
; j
++)
6039 /* Get vectorized arguments for SLP_NODE. */
6040 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
6041 NULL
, slp_node
, -1);
6043 vec_oprnd
= vec_oprnds
[0];
6047 /* For interleaved stores we collect vectorized defs for all the
6048 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6049 used as an input to vect_permute_store_chain(), and OPRNDS as
6050 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6052 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6053 OPRNDS are of size 1. */
6054 next_stmt
= first_stmt
;
6055 for (i
= 0; i
< group_size
; i
++)
6057 /* Since gaps are not supported for interleaved stores,
6058 GROUP_SIZE is the exact number of stmts in the chain.
6059 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6060 there is no interleaving, GROUP_SIZE is 1, and only one
6061 iteration of the loop will be executed. */
6062 gcc_assert (next_stmt
6063 && gimple_assign_single_p (next_stmt
));
6064 op
= gimple_assign_rhs1 (next_stmt
);
6066 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6067 dr_chain
.quick_push (vec_oprnd
);
6068 oprnds
.quick_push (vec_oprnd
);
6069 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6073 /* We should have catched mismatched types earlier. */
6074 gcc_assert (useless_type_conversion_p (vectype
,
6075 TREE_TYPE (vec_oprnd
)));
6076 bool simd_lane_access_p
6077 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6078 if (simd_lane_access_p
6079 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6080 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6081 && integer_zerop (DR_OFFSET (first_dr
))
6082 && integer_zerop (DR_INIT (first_dr
))
6083 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6084 get_alias_set (DR_REF (first_dr
))))
6086 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6087 dataref_offset
= build_int_cst (reference_alias_ptr_type
6088 (DR_REF (first_dr
)), 0);
6093 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
6094 simd_lane_access_p
? loop
: NULL
,
6095 offset
, &dummy
, gsi
, &ptr_incr
,
6096 simd_lane_access_p
, &inv_p
);
6097 gcc_assert (bb_vinfo
|| !inv_p
);
6101 /* For interleaved stores we created vectorized defs for all the
6102 defs stored in OPRNDS in the previous iteration (previous copy).
6103 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6104 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6106 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6107 OPRNDS are of size 1. */
6108 for (i
= 0; i
< group_size
; i
++)
6111 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
6112 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
6113 dr_chain
[i
] = vec_oprnd
;
6114 oprnds
[i
] = vec_oprnd
;
6118 = int_const_binop (PLUS_EXPR
, dataref_offset
,
6119 TYPE_SIZE_UNIT (aggr_type
));
6121 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6122 TYPE_SIZE_UNIT (aggr_type
));
6125 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6129 /* Combine all the vectors into an array. */
6130 vec_array
= create_vector_array (vectype
, vec_num
);
6131 for (i
= 0; i
< vec_num
; i
++)
6133 vec_oprnd
= dr_chain
[i
];
6134 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
6138 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6139 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
6140 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
6141 gimple_call_set_lhs (new_stmt
, data_ref
);
6142 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6150 result_chain
.create (group_size
);
6152 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
6156 next_stmt
= first_stmt
;
6157 for (i
= 0; i
< vec_num
; i
++)
6159 unsigned align
, misalign
;
6162 /* Bump the vector pointer. */
6163 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6167 vec_oprnd
= vec_oprnds
[i
];
6168 else if (grouped_store
)
6169 /* For grouped stores vectorized defs are interleaved in
6170 vect_permute_store_chain(). */
6171 vec_oprnd
= result_chain
[i
];
6173 data_ref
= fold_build2 (MEM_REF
, TREE_TYPE (vec_oprnd
),
6177 : build_int_cst (reference_alias_ptr_type
6178 (DR_REF (first_dr
)), 0));
6179 align
= TYPE_ALIGN_UNIT (vectype
);
6180 if (aligned_access_p (first_dr
))
6182 else if (DR_MISALIGNMENT (first_dr
) == -1)
6184 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
6185 align
= TYPE_ALIGN_UNIT (elem_type
);
6187 align
= get_object_alignment (DR_REF (first_dr
))
6190 TREE_TYPE (data_ref
)
6191 = build_aligned_type (TREE_TYPE (data_ref
),
6192 align
* BITS_PER_UNIT
);
6196 TREE_TYPE (data_ref
)
6197 = build_aligned_type (TREE_TYPE (data_ref
),
6198 TYPE_ALIGN (elem_type
));
6199 misalign
= DR_MISALIGNMENT (first_dr
);
6201 if (dataref_offset
== NULL_TREE
6202 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6203 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6206 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6208 tree perm_mask
= perm_mask_for_reverse (vectype
);
6210 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
6212 tree new_temp
= make_ssa_name (perm_dest
);
6214 /* Generate the permute statement. */
6216 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6217 vec_oprnd
, perm_mask
);
6218 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6220 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6221 vec_oprnd
= new_temp
;
6224 /* Arguments are ready. Create the new vector stmt. */
6225 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6226 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6231 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6239 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6241 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6242 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6247 result_chain
.release ();
6248 vec_oprnds
.release ();
6253 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6254 VECTOR_CST mask. No checks are made that the target platform supports the
6255 mask, so callers may wish to test can_vec_perm_p separately, or use
6256 vect_gen_perm_mask_checked. */
6259 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
6261 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
6264 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6266 mask_elt_type
= lang_hooks
.types
.type_for_mode
6267 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
6268 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
6270 mask_elts
= XALLOCAVEC (tree
, nunits
);
6271 for (i
= nunits
- 1; i
>= 0; i
--)
6272 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
6273 mask_vec
= build_vector (mask_type
, mask_elts
);
6278 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6279 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6282 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
6284 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
6285 return vect_gen_perm_mask_any (vectype
, sel
);
6288 /* Given a vector variable X and Y, that was generated for the scalar
6289 STMT, generate instructions to permute the vector elements of X and Y
6290 using permutation mask MASK_VEC, insert them at *GSI and return the
6291 permuted vector variable. */
6294 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6295 gimple_stmt_iterator
*gsi
)
6297 tree vectype
= TREE_TYPE (x
);
6298 tree perm_dest
, data_ref
;
6301 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
6302 data_ref
= make_ssa_name (perm_dest
);
6304 /* Generate the permute statement. */
6305 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6306 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6311 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6312 inserting them on the loops preheader edge. Returns true if we
6313 were successful in doing so (and thus STMT can be moved then),
6314 otherwise returns false. */
6317 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6323 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6325 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6326 if (!gimple_nop_p (def_stmt
)
6327 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6329 /* Make sure we don't need to recurse. While we could do
6330 so in simple cases when there are more complex use webs
6331 we don't have an easy way to preserve stmt order to fulfil
6332 dependencies within them. */
6335 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6337 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6339 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6340 if (!gimple_nop_p (def_stmt2
)
6341 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6351 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6353 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6354 if (!gimple_nop_p (def_stmt
)
6355 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6357 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6358 gsi_remove (&gsi
, false);
6359 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6366 /* vectorizable_load.
6368 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6370 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6371 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6372 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6375 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6376 slp_tree slp_node
, slp_instance slp_node_instance
)
6379 tree vec_dest
= NULL
;
6380 tree data_ref
= NULL
;
6381 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6382 stmt_vec_info prev_stmt_info
;
6383 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6384 struct loop
*loop
= NULL
;
6385 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6386 bool nested_in_vect_loop
= false;
6387 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6391 gimple
*new_stmt
= NULL
;
6393 enum dr_alignment_support alignment_support_scheme
;
6394 tree dataref_ptr
= NULL_TREE
;
6395 tree dataref_offset
= NULL_TREE
;
6396 gimple
*ptr_incr
= NULL
;
6398 int i
, j
, group_size
= -1, group_gap_adj
;
6399 tree msq
= NULL_TREE
, lsq
;
6400 tree offset
= NULL_TREE
;
6401 tree byte_offset
= NULL_TREE
;
6402 tree realignment_token
= NULL_TREE
;
6404 vec
<tree
> dr_chain
= vNULL
;
6405 bool grouped_load
= false;
6407 gimple
*first_stmt_for_drptr
= NULL
;
6409 bool compute_in_loop
= false;
6410 struct loop
*at_loop
;
6412 bool slp
= (slp_node
!= NULL
);
6413 bool slp_perm
= false;
6414 enum tree_code code
;
6415 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6418 gather_scatter_info gs_info
;
6419 vec_info
*vinfo
= stmt_info
->vinfo
;
6421 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6424 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6428 /* Is vectorizable load? */
6429 if (!is_gimple_assign (stmt
))
6432 scalar_dest
= gimple_assign_lhs (stmt
);
6433 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6436 code
= gimple_assign_rhs_code (stmt
);
6437 if (code
!= ARRAY_REF
6438 && code
!= BIT_FIELD_REF
6439 && code
!= INDIRECT_REF
6440 && code
!= COMPONENT_REF
6441 && code
!= IMAGPART_EXPR
6442 && code
!= REALPART_EXPR
6444 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6447 if (!STMT_VINFO_DATA_REF (stmt_info
))
6450 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6451 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6455 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6456 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6457 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6462 /* Multiple types in SLP are handled by creating the appropriate number of
6463 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6468 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6470 gcc_assert (ncopies
>= 1);
6472 /* FORNOW. This restriction should be relaxed. */
6473 if (nested_in_vect_loop
&& ncopies
> 1)
6475 if (dump_enabled_p ())
6476 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6477 "multiple types in nested loop.\n");
6481 /* Invalidate assumptions made by dependence analysis when vectorization
6482 on the unrolled body effectively re-orders stmts. */
6484 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6485 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6486 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6488 if (dump_enabled_p ())
6489 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6490 "cannot perform implicit CSE when unrolling "
6491 "with negative dependence distance\n");
6495 elem_type
= TREE_TYPE (vectype
);
6496 mode
= TYPE_MODE (vectype
);
6498 /* FORNOW. In some cases can vectorize even if data-type not supported
6499 (e.g. - data copies). */
6500 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
6502 if (dump_enabled_p ())
6503 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6504 "Aligned load, but unsupported type.\n");
6508 /* Check if the load is a part of an interleaving chain. */
6509 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6511 grouped_load
= true;
6513 gcc_assert (!nested_in_vect_loop
);
6514 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
6516 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6517 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6519 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6522 /* ??? The following is overly pessimistic (as well as the loop
6523 case above) in the case we can statically determine the excess
6524 elements loaded are within the bounds of a decl that is accessed.
6525 Likewise for BB vectorizations using masked loads is a possibility. */
6526 if (bb_vinfo
&& slp_perm
&& group_size
% nunits
!= 0)
6528 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6529 "BB vectorization with gaps at the end of a load "
6530 "is not supported\n");
6534 /* Invalidate assumptions made by dependence analysis when vectorization
6535 on the unrolled body effectively re-orders stmts. */
6536 if (!PURE_SLP_STMT (stmt_info
)
6537 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6538 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6539 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6541 if (dump_enabled_p ())
6542 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6543 "cannot perform implicit CSE when performing "
6544 "group loads with negative dependence distance\n");
6548 /* Similarly when the stmt is a load that is both part of a SLP
6549 instance and a loop vectorized stmt via the same-dr mechanism
6550 we have to give up. */
6551 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6552 && (STMT_SLP_TYPE (stmt_info
)
6553 != STMT_SLP_TYPE (vinfo_for_stmt
6554 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6556 if (dump_enabled_p ())
6557 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6558 "conflicting SLP types for CSEd load\n");
6563 vect_memory_access_type memory_access_type
;
6564 if (!get_load_store_type (stmt
, vectype
, slp
, VLS_LOAD
, ncopies
,
6565 &memory_access_type
, &gs_info
))
6568 if (!vec_stmt
) /* transformation not required. */
6571 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
6572 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6573 /* The SLP costs are calculated during SLP analysis. */
6574 if (!PURE_SLP_STMT (stmt_info
))
6575 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
6581 gcc_assert (memory_access_type
6582 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
6584 if (dump_enabled_p ())
6585 dump_printf_loc (MSG_NOTE
, vect_location
,
6586 "transform load. ncopies = %d\n", ncopies
);
6590 ensure_base_align (stmt_info
, dr
);
6592 if (memory_access_type
== VMAT_GATHER_SCATTER
)
6594 tree vec_oprnd0
= NULL_TREE
, op
;
6595 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
6596 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6597 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6598 edge pe
= loop_preheader_edge (loop
);
6601 enum { NARROW
, NONE
, WIDEN
} modifier
;
6602 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
6604 if (nunits
== gather_off_nunits
)
6606 else if (nunits
== gather_off_nunits
/ 2)
6608 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
6611 for (i
= 0; i
< gather_off_nunits
; ++i
)
6612 sel
[i
] = i
| nunits
;
6614 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
6616 else if (nunits
== gather_off_nunits
* 2)
6618 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
6621 for (i
= 0; i
< nunits
; ++i
)
6622 sel
[i
] = i
< gather_off_nunits
6623 ? i
: i
+ nunits
- gather_off_nunits
;
6625 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6631 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6632 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6633 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6634 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6635 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6636 scaletype
= TREE_VALUE (arglist
);
6637 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6639 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6641 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6642 if (!is_gimple_min_invariant (ptr
))
6644 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6645 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6646 gcc_assert (!new_bb
);
6649 /* Currently we support only unconditional gather loads,
6650 so mask should be all ones. */
6651 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6652 mask
= build_int_cst (masktype
, -1);
6653 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6655 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6656 mask
= build_vector_from_val (masktype
, mask
);
6657 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6659 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6663 for (j
= 0; j
< 6; ++j
)
6665 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6666 mask
= build_real (TREE_TYPE (masktype
), r
);
6667 mask
= build_vector_from_val (masktype
, mask
);
6668 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6673 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6675 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6676 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6677 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6681 for (j
= 0; j
< 6; ++j
)
6683 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6684 merge
= build_real (TREE_TYPE (rettype
), r
);
6688 merge
= build_vector_from_val (rettype
, merge
);
6689 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6691 prev_stmt_info
= NULL
;
6692 for (j
= 0; j
< ncopies
; ++j
)
6694 if (modifier
== WIDEN
&& (j
& 1))
6695 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6696 perm_mask
, stmt
, gsi
);
6699 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
6702 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
6704 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6706 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6707 == TYPE_VECTOR_SUBPARTS (idxtype
));
6708 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6709 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6711 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6712 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6717 = gimple_build_call (gs_info
.decl
, 5, merge
, ptr
, op
, mask
, scale
);
6719 if (!useless_type_conversion_p (vectype
, rettype
))
6721 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6722 == TYPE_VECTOR_SUBPARTS (rettype
));
6723 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
6724 gimple_call_set_lhs (new_stmt
, op
);
6725 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6726 var
= make_ssa_name (vec_dest
);
6727 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6729 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6733 var
= make_ssa_name (vec_dest
, new_stmt
);
6734 gimple_call_set_lhs (new_stmt
, var
);
6737 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6739 if (modifier
== NARROW
)
6746 var
= permute_vec_elements (prev_res
, var
,
6747 perm_mask
, stmt
, gsi
);
6748 new_stmt
= SSA_NAME_DEF_STMT (var
);
6751 if (prev_stmt_info
== NULL
)
6752 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6754 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6755 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6760 if (memory_access_type
== VMAT_ELEMENTWISE
6761 || memory_access_type
== VMAT_STRIDED_SLP
)
6763 gimple_stmt_iterator incr_gsi
;
6769 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6770 gimple_seq stmts
= NULL
;
6771 tree stride_base
, stride_step
, alias_off
;
6773 gcc_assert (!nested_in_vect_loop
);
6775 if (slp
&& grouped_load
)
6776 first_dr
= STMT_VINFO_DATA_REF
6777 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
)));
6782 = fold_build_pointer_plus
6783 (DR_BASE_ADDRESS (first_dr
),
6784 size_binop (PLUS_EXPR
,
6785 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6786 convert_to_ptrofftype (DR_INIT (first_dr
))));
6787 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6789 /* For a load with loop-invariant (but other than power-of-2)
6790 stride (i.e. not a grouped access) like so:
6792 for (i = 0; i < n; i += stride)
6795 we generate a new induction variable and new accesses to
6796 form a new vector (or vectors, depending on ncopies):
6798 for (j = 0; ; j += VF*stride)
6800 tmp2 = array[j + stride];
6802 vectemp = {tmp1, tmp2, ...}
6805 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
6806 build_int_cst (TREE_TYPE (stride_step
), vf
));
6808 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6810 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
6811 loop
, &incr_gsi
, insert_after
,
6813 incr
= gsi_stmt (incr_gsi
);
6814 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6816 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
6817 &stmts
, true, NULL_TREE
);
6819 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6821 prev_stmt_info
= NULL
;
6822 running_off
= offvar
;
6823 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
6824 int nloads
= nunits
;
6826 tree ltype
= TREE_TYPE (vectype
);
6827 auto_vec
<tree
> dr_chain
;
6828 if (memory_access_type
== VMAT_STRIDED_SLP
)
6830 nloads
= nunits
/ group_size
;
6831 if (group_size
< nunits
)
6834 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
6841 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
6845 /* For SLP permutation support we need to load the whole group,
6846 not only the number of vector stmts the permutation result
6850 ncopies
= (group_size
* vf
+ nunits
- 1) / nunits
;
6851 dr_chain
.create (ncopies
);
6854 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6857 unsigned HOST_WIDE_INT
6858 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6859 for (j
= 0; j
< ncopies
; j
++)
6862 vec_alloc (v
, nloads
);
6863 for (i
= 0; i
< nloads
; i
++)
6865 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6867 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
6868 build2 (MEM_REF
, ltype
,
6869 running_off
, this_off
));
6870 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6872 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
6873 gimple_assign_lhs (new_stmt
));
6877 || group_el
== group_size
)
6879 tree newoff
= copy_ssa_name (running_off
);
6880 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6881 running_off
, stride_step
);
6882 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6884 running_off
= newoff
;
6890 tree vec_inv
= build_constructor (vectype
, v
);
6891 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6892 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6898 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
6900 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6905 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6907 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6908 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6912 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6913 slp_node_instance
, false);
6919 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6920 /* For SLP vectorization we directly vectorize a subchain
6921 without permutation. */
6922 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6923 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6924 /* For BB vectorization always use the first stmt to base
6925 the data ref pointer on. */
6927 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6929 /* Check if the chain of loads is already vectorized. */
6930 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6931 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6932 ??? But we can only do so if there is exactly one
6933 as we have no way to get at the rest. Leave the CSE
6935 ??? With the group load eventually participating
6936 in multiple different permutations (having multiple
6937 slp nodes which refer to the same group) the CSE
6938 is even wrong code. See PR56270. */
6941 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6944 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6945 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6948 /* VEC_NUM is the number of vect stmts to be created for this group. */
6951 grouped_load
= false;
6952 /* For SLP permutation support we need to load the whole group,
6953 not only the number of vector stmts the permutation result
6956 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
6958 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6959 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
6962 vec_num
= group_size
;
6968 group_size
= vec_num
= 1;
6972 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6973 gcc_assert (alignment_support_scheme
);
6974 /* Targets with load-lane instructions must not require explicit
6976 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
6977 || alignment_support_scheme
== dr_aligned
6978 || alignment_support_scheme
== dr_unaligned_supported
);
6980 /* In case the vectorization factor (VF) is bigger than the number
6981 of elements that we can fit in a vectype (nunits), we have to generate
6982 more than one vector stmt - i.e - we need to "unroll" the
6983 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6984 from one copy of the vector stmt to the next, in the field
6985 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6986 stages to find the correct vector defs to be used when vectorizing
6987 stmts that use the defs of the current stmt. The example below
6988 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6989 need to create 4 vectorized stmts):
6991 before vectorization:
6992 RELATED_STMT VEC_STMT
6996 step 1: vectorize stmt S1:
6997 We first create the vector stmt VS1_0, and, as usual, record a
6998 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6999 Next, we create the vector stmt VS1_1, and record a pointer to
7000 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7001 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7003 RELATED_STMT VEC_STMT
7004 VS1_0: vx0 = memref0 VS1_1 -
7005 VS1_1: vx1 = memref1 VS1_2 -
7006 VS1_2: vx2 = memref2 VS1_3 -
7007 VS1_3: vx3 = memref3 - -
7008 S1: x = load - VS1_0
7011 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7012 information we recorded in RELATED_STMT field is used to vectorize
7015 /* In case of interleaving (non-unit grouped access):
7022 Vectorized loads are created in the order of memory accesses
7023 starting from the access of the first stmt of the chain:
7026 VS2: vx1 = &base + vec_size*1
7027 VS3: vx3 = &base + vec_size*2
7028 VS4: vx4 = &base + vec_size*3
7030 Then permutation statements are generated:
7032 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7033 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7036 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7037 (the order of the data-refs in the output of vect_permute_load_chain
7038 corresponds to the order of scalar stmts in the interleaving chain - see
7039 the documentation of vect_permute_load_chain()).
7040 The generation of permutation stmts and recording them in
7041 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7043 In case of both multiple types and interleaving, the vector loads and
7044 permutation stmts above are created for every copy. The result vector
7045 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7046 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7048 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7049 on a target that supports unaligned accesses (dr_unaligned_supported)
7050 we generate the following code:
7054 p = p + indx * vectype_size;
7059 Otherwise, the data reference is potentially unaligned on a target that
7060 does not support unaligned accesses (dr_explicit_realign_optimized) -
7061 then generate the following code, in which the data in each iteration is
7062 obtained by two vector loads, one from the previous iteration, and one
7063 from the current iteration:
7065 msq_init = *(floor(p1))
7066 p2 = initial_addr + VS - 1;
7067 realignment_token = call target_builtin;
7070 p2 = p2 + indx * vectype_size
7072 vec_dest = realign_load (msq, lsq, realignment_token)
7077 /* If the misalignment remains the same throughout the execution of the
7078 loop, we can create the init_addr and permutation mask at the loop
7079 preheader. Otherwise, it needs to be created inside the loop.
7080 This can only occur when vectorizing memory accesses in the inner-loop
7081 nested within an outer-loop that is being vectorized. */
7083 if (nested_in_vect_loop
7084 && (TREE_INT_CST_LOW (DR_STEP (dr
))
7085 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
7087 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7088 compute_in_loop
= true;
7091 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7092 || alignment_support_scheme
== dr_explicit_realign
)
7093 && !compute_in_loop
)
7095 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7096 alignment_support_scheme
, NULL_TREE
,
7098 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7100 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7101 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7108 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7109 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7111 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7112 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7114 aggr_type
= vectype
;
7116 prev_stmt_info
= NULL
;
7117 for (j
= 0; j
< ncopies
; j
++)
7119 /* 1. Create the vector or array pointer update chain. */
7122 bool simd_lane_access_p
7123 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7124 if (simd_lane_access_p
7125 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7126 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7127 && integer_zerop (DR_OFFSET (first_dr
))
7128 && integer_zerop (DR_INIT (first_dr
))
7129 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7130 get_alias_set (DR_REF (first_dr
)))
7131 && (alignment_support_scheme
== dr_aligned
7132 || alignment_support_scheme
== dr_unaligned_supported
))
7134 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7135 dataref_offset
= build_int_cst (reference_alias_ptr_type
7136 (DR_REF (first_dr
)), 0);
7139 else if (first_stmt_for_drptr
7140 && first_stmt
!= first_stmt_for_drptr
)
7143 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7144 at_loop
, offset
, &dummy
, gsi
,
7145 &ptr_incr
, simd_lane_access_p
,
7146 &inv_p
, byte_offset
);
7147 /* Adjust the pointer by the difference to first_stmt. */
7148 data_reference_p ptrdr
7149 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7150 tree diff
= fold_convert (sizetype
,
7151 size_binop (MINUS_EXPR
,
7154 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7159 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7160 offset
, &dummy
, gsi
, &ptr_incr
,
7161 simd_lane_access_p
, &inv_p
,
7164 else if (dataref_offset
)
7165 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7166 TYPE_SIZE_UNIT (aggr_type
));
7168 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7169 TYPE_SIZE_UNIT (aggr_type
));
7171 if (grouped_load
|| slp_perm
)
7172 dr_chain
.create (vec_num
);
7174 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7178 vec_array
= create_vector_array (vectype
, vec_num
);
7181 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7182 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
7183 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
7184 gimple_call_set_lhs (new_stmt
, vec_array
);
7185 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7187 /* Extract each vector into an SSA_NAME. */
7188 for (i
= 0; i
< vec_num
; i
++)
7190 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7192 dr_chain
.quick_push (new_temp
);
7195 /* Record the mapping between SSA_NAMEs and statements. */
7196 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7200 for (i
= 0; i
< vec_num
; i
++)
7203 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7206 /* 2. Create the vector-load in the loop. */
7207 switch (alignment_support_scheme
)
7210 case dr_unaligned_supported
:
7212 unsigned int align
, misalign
;
7215 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7218 : build_int_cst (reference_alias_ptr_type
7219 (DR_REF (first_dr
)), 0));
7220 align
= TYPE_ALIGN_UNIT (vectype
);
7221 if (alignment_support_scheme
== dr_aligned
)
7223 gcc_assert (aligned_access_p (first_dr
));
7226 else if (DR_MISALIGNMENT (first_dr
) == -1)
7228 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
7229 align
= TYPE_ALIGN_UNIT (elem_type
);
7231 align
= (get_object_alignment (DR_REF (first_dr
))
7234 TREE_TYPE (data_ref
)
7235 = build_aligned_type (TREE_TYPE (data_ref
),
7236 align
* BITS_PER_UNIT
);
7240 TREE_TYPE (data_ref
)
7241 = build_aligned_type (TREE_TYPE (data_ref
),
7242 TYPE_ALIGN (elem_type
));
7243 misalign
= DR_MISALIGNMENT (first_dr
);
7245 if (dataref_offset
== NULL_TREE
7246 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7247 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7251 case dr_explicit_realign
:
7255 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7257 if (compute_in_loop
)
7258 msq
= vect_setup_realignment (first_stmt
, gsi
,
7260 dr_explicit_realign
,
7263 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7264 ptr
= copy_ssa_name (dataref_ptr
);
7266 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7267 new_stmt
= gimple_build_assign
7268 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7270 (TREE_TYPE (dataref_ptr
),
7271 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7272 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7274 = build2 (MEM_REF
, vectype
, ptr
,
7275 build_int_cst (reference_alias_ptr_type
7276 (DR_REF (first_dr
)), 0));
7277 vec_dest
= vect_create_destination_var (scalar_dest
,
7279 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7280 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7281 gimple_assign_set_lhs (new_stmt
, new_temp
);
7282 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7283 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7284 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7287 bump
= size_binop (MULT_EXPR
, vs
,
7288 TYPE_SIZE_UNIT (elem_type
));
7289 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7290 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7291 new_stmt
= gimple_build_assign
7292 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7295 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7296 ptr
= copy_ssa_name (ptr
, new_stmt
);
7297 gimple_assign_set_lhs (new_stmt
, ptr
);
7298 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7300 = build2 (MEM_REF
, vectype
, ptr
,
7301 build_int_cst (reference_alias_ptr_type
7302 (DR_REF (first_dr
)), 0));
7305 case dr_explicit_realign_optimized
:
7306 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7307 new_temp
= copy_ssa_name (dataref_ptr
);
7309 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7310 new_stmt
= gimple_build_assign
7311 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7313 (TREE_TYPE (dataref_ptr
),
7314 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7315 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7317 = build2 (MEM_REF
, vectype
, new_temp
,
7318 build_int_cst (reference_alias_ptr_type
7319 (DR_REF (first_dr
)), 0));
7324 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7325 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7326 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7327 gimple_assign_set_lhs (new_stmt
, new_temp
);
7328 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7330 /* 3. Handle explicit realignment if necessary/supported.
7332 vec_dest = realign_load (msq, lsq, realignment_token) */
7333 if (alignment_support_scheme
== dr_explicit_realign_optimized
7334 || alignment_support_scheme
== dr_explicit_realign
)
7336 lsq
= gimple_assign_lhs (new_stmt
);
7337 if (!realignment_token
)
7338 realignment_token
= dataref_ptr
;
7339 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7340 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7341 msq
, lsq
, realignment_token
);
7342 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7343 gimple_assign_set_lhs (new_stmt
, new_temp
);
7344 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7346 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7349 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7350 add_phi_arg (phi
, lsq
,
7351 loop_latch_edge (containing_loop
),
7357 /* 4. Handle invariant-load. */
7358 if (inv_p
&& !bb_vinfo
)
7360 gcc_assert (!grouped_load
);
7361 /* If we have versioned for aliasing or the loop doesn't
7362 have any data dependencies that would preclude this,
7363 then we are sure this is a loop invariant load and
7364 thus we can insert it on the preheader edge. */
7365 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7366 && !nested_in_vect_loop
7367 && hoist_defs_of_uses (stmt
, loop
))
7369 if (dump_enabled_p ())
7371 dump_printf_loc (MSG_NOTE
, vect_location
,
7372 "hoisting out of the vectorized "
7374 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7376 tree tem
= copy_ssa_name (scalar_dest
);
7377 gsi_insert_on_edge_immediate
7378 (loop_preheader_edge (loop
),
7379 gimple_build_assign (tem
,
7381 (gimple_assign_rhs1 (stmt
))));
7382 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7383 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7384 set_vinfo_for_stmt (new_stmt
,
7385 new_stmt_vec_info (new_stmt
, vinfo
));
7389 gimple_stmt_iterator gsi2
= *gsi
;
7391 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7393 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7397 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7399 tree perm_mask
= perm_mask_for_reverse (vectype
);
7400 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7401 perm_mask
, stmt
, gsi
);
7402 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7405 /* Collect vector loads and later create their permutation in
7406 vect_transform_grouped_load (). */
7407 if (grouped_load
|| slp_perm
)
7408 dr_chain
.quick_push (new_temp
);
7410 /* Store vector loads in the corresponding SLP_NODE. */
7411 if (slp
&& !slp_perm
)
7412 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7414 /* Bump the vector pointer to account for a gap or for excess
7415 elements loaded for a permuted SLP load. */
7416 if (group_gap_adj
!= 0)
7420 = wide_int_to_tree (sizetype
,
7421 wi::smul (TYPE_SIZE_UNIT (elem_type
),
7422 group_gap_adj
, &ovf
));
7423 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7428 if (slp
&& !slp_perm
)
7433 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7434 slp_node_instance
, false))
7436 dr_chain
.release ();
7444 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
7445 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7446 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7451 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7453 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7454 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7457 dr_chain
.release ();
7463 /* Function vect_is_simple_cond.
7466 LOOP - the loop that is being vectorized.
7467 COND - Condition that is checked for simple use.
7470 *COMP_VECTYPE - the vector type for the comparison.
7472 Returns whether a COND can be vectorized. Checks whether
7473 condition operands are supportable using vec_is_simple_use. */
7476 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, tree
*comp_vectype
)
7479 enum vect_def_type dt
;
7480 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7483 if (TREE_CODE (cond
) == SSA_NAME
7484 && TREE_CODE (TREE_TYPE (cond
)) == BOOLEAN_TYPE
)
7486 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
7487 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
7490 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
7495 if (!COMPARISON_CLASS_P (cond
))
7498 lhs
= TREE_OPERAND (cond
, 0);
7499 rhs
= TREE_OPERAND (cond
, 1);
7501 if (TREE_CODE (lhs
) == SSA_NAME
)
7503 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
7504 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dt
, &vectype1
))
7507 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
7508 && TREE_CODE (lhs
) != FIXED_CST
)
7511 if (TREE_CODE (rhs
) == SSA_NAME
)
7513 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7514 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dt
, &vectype2
))
7517 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
7518 && TREE_CODE (rhs
) != FIXED_CST
)
7521 if (vectype1
&& vectype2
7522 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7525 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7529 /* vectorizable_condition.
7531 Check if STMT is conditional modify expression that can be vectorized.
7532 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7533 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7536 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7537 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7538 else clause if it is 2).
7540 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7543 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7544 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
7547 tree scalar_dest
= NULL_TREE
;
7548 tree vec_dest
= NULL_TREE
;
7549 tree cond_expr
, then_clause
, else_clause
;
7550 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7551 tree comp_vectype
= NULL_TREE
;
7552 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7553 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7556 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7557 enum vect_def_type dt
, dts
[4];
7559 enum tree_code code
;
7560 stmt_vec_info prev_stmt_info
= NULL
;
7562 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7563 vec
<tree
> vec_oprnds0
= vNULL
;
7564 vec
<tree
> vec_oprnds1
= vNULL
;
7565 vec
<tree
> vec_oprnds2
= vNULL
;
7566 vec
<tree
> vec_oprnds3
= vNULL
;
7568 bool masked
= false;
7570 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7573 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == TREE_CODE_REDUCTION
)
7575 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7578 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7579 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7583 /* FORNOW: not yet supported. */
7584 if (STMT_VINFO_LIVE_P (stmt_info
))
7586 if (dump_enabled_p ())
7587 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7588 "value used after loop.\n");
7593 /* Is vectorizable conditional operation? */
7594 if (!is_gimple_assign (stmt
))
7597 code
= gimple_assign_rhs_code (stmt
);
7599 if (code
!= COND_EXPR
)
7602 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7603 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7604 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7609 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7611 gcc_assert (ncopies
>= 1);
7612 if (reduc_index
&& ncopies
> 1)
7613 return false; /* FORNOW */
7615 cond_expr
= gimple_assign_rhs1 (stmt
);
7616 then_clause
= gimple_assign_rhs2 (stmt
);
7617 else_clause
= gimple_assign_rhs3 (stmt
);
7619 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
, &comp_vectype
)
7624 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7627 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7631 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
7634 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
7637 masked
= !COMPARISON_CLASS_P (cond_expr
);
7638 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
7640 if (vec_cmp_type
== NULL_TREE
)
7645 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
7646 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
7653 vec_oprnds0
.create (1);
7654 vec_oprnds1
.create (1);
7655 vec_oprnds2
.create (1);
7656 vec_oprnds3
.create (1);
7660 scalar_dest
= gimple_assign_lhs (stmt
);
7661 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7663 /* Handle cond expr. */
7664 for (j
= 0; j
< ncopies
; j
++)
7666 gassign
*new_stmt
= NULL
;
7671 auto_vec
<tree
, 4> ops
;
7672 auto_vec
<vec
<tree
>, 4> vec_defs
;
7675 ops
.safe_push (cond_expr
);
7678 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
7679 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
7681 ops
.safe_push (then_clause
);
7682 ops
.safe_push (else_clause
);
7683 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7684 vec_oprnds3
= vec_defs
.pop ();
7685 vec_oprnds2
= vec_defs
.pop ();
7687 vec_oprnds1
= vec_defs
.pop ();
7688 vec_oprnds0
= vec_defs
.pop ();
7696 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
7698 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
7704 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
7705 stmt
, comp_vectype
);
7706 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0),
7707 loop_vinfo
, >emp
, &dts
[0]);
7710 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
7711 stmt
, comp_vectype
);
7712 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1),
7713 loop_vinfo
, >emp
, &dts
[1]);
7715 if (reduc_index
== 1)
7716 vec_then_clause
= reduc_def
;
7719 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
7721 vect_is_simple_use (then_clause
, loop_vinfo
,
7724 if (reduc_index
== 2)
7725 vec_else_clause
= reduc_def
;
7728 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
7730 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
7737 = vect_get_vec_def_for_stmt_copy (dts
[0],
7738 vec_oprnds0
.pop ());
7741 = vect_get_vec_def_for_stmt_copy (dts
[1],
7742 vec_oprnds1
.pop ());
7744 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
7745 vec_oprnds2
.pop ());
7746 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
7747 vec_oprnds3
.pop ());
7752 vec_oprnds0
.quick_push (vec_cond_lhs
);
7754 vec_oprnds1
.quick_push (vec_cond_rhs
);
7755 vec_oprnds2
.quick_push (vec_then_clause
);
7756 vec_oprnds3
.quick_push (vec_else_clause
);
7759 /* Arguments are ready. Create the new vector stmt. */
7760 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
7762 vec_then_clause
= vec_oprnds2
[i
];
7763 vec_else_clause
= vec_oprnds3
[i
];
7766 vec_compare
= vec_cond_lhs
;
7769 vec_cond_rhs
= vec_oprnds1
[i
];
7770 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
7771 vec_cond_lhs
, vec_cond_rhs
);
7773 new_temp
= make_ssa_name (vec_dest
);
7774 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
7775 vec_compare
, vec_then_clause
,
7777 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7779 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7786 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7788 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7790 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7793 vec_oprnds0
.release ();
7794 vec_oprnds1
.release ();
7795 vec_oprnds2
.release ();
7796 vec_oprnds3
.release ();
7801 /* vectorizable_comparison.
7803 Check if STMT is comparison expression that can be vectorized.
7804 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7805 comparison, put it in VEC_STMT, and insert it at GSI.
7807 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7810 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7811 gimple
**vec_stmt
, tree reduc_def
,
7814 tree lhs
, rhs1
, rhs2
;
7815 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7816 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7817 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7818 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
7820 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7821 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
7824 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
7825 stmt_vec_info prev_stmt_info
= NULL
;
7827 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7828 vec
<tree
> vec_oprnds0
= vNULL
;
7829 vec
<tree
> vec_oprnds1
= vNULL
;
7834 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7837 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
7840 mask_type
= vectype
;
7841 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7846 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7848 gcc_assert (ncopies
>= 1);
7849 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7850 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7854 if (STMT_VINFO_LIVE_P (stmt_info
))
7856 if (dump_enabled_p ())
7857 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7858 "value used after loop.\n");
7862 if (!is_gimple_assign (stmt
))
7865 code
= gimple_assign_rhs_code (stmt
);
7867 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
7870 rhs1
= gimple_assign_rhs1 (stmt
);
7871 rhs2
= gimple_assign_rhs2 (stmt
);
7873 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
7874 &dts
[0], &vectype1
))
7877 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
7878 &dts
[1], &vectype2
))
7881 if (vectype1
&& vectype2
7882 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7885 vectype
= vectype1
? vectype1
: vectype2
;
7887 /* Invariant comparison. */
7890 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
7891 if (TYPE_VECTOR_SUBPARTS (vectype
) != nunits
)
7894 else if (nunits
!= TYPE_VECTOR_SUBPARTS (vectype
))
7897 /* Can't compare mask and non-mask types. */
7898 if (vectype1
&& vectype2
7899 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
7902 /* Boolean values may have another representation in vectors
7903 and therefore we prefer bit operations over comparison for
7904 them (which also works for scalar masks). We store opcodes
7905 to use in bitop1 and bitop2. Statement is vectorized as
7906 BITOP2 (rhs1 BITOP1 rhs2) or
7907 rhs1 BITOP2 (BITOP1 rhs2)
7908 depending on bitop1 and bitop2 arity. */
7909 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
7911 if (code
== GT_EXPR
)
7913 bitop1
= BIT_NOT_EXPR
;
7914 bitop2
= BIT_AND_EXPR
;
7916 else if (code
== GE_EXPR
)
7918 bitop1
= BIT_NOT_EXPR
;
7919 bitop2
= BIT_IOR_EXPR
;
7921 else if (code
== LT_EXPR
)
7923 bitop1
= BIT_NOT_EXPR
;
7924 bitop2
= BIT_AND_EXPR
;
7925 std::swap (rhs1
, rhs2
);
7926 std::swap (dts
[0], dts
[1]);
7928 else if (code
== LE_EXPR
)
7930 bitop1
= BIT_NOT_EXPR
;
7931 bitop2
= BIT_IOR_EXPR
;
7932 std::swap (rhs1
, rhs2
);
7933 std::swap (dts
[0], dts
[1]);
7937 bitop1
= BIT_XOR_EXPR
;
7938 if (code
== EQ_EXPR
)
7939 bitop2
= BIT_NOT_EXPR
;
7945 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
7946 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
7948 if (bitop1
== NOP_EXPR
)
7949 return expand_vec_cmp_expr_p (vectype
, mask_type
);
7952 machine_mode mode
= TYPE_MODE (vectype
);
7955 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
7956 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
7959 if (bitop2
!= NOP_EXPR
)
7961 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
7962 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
7972 vec_oprnds0
.create (1);
7973 vec_oprnds1
.create (1);
7977 lhs
= gimple_assign_lhs (stmt
);
7978 mask
= vect_create_destination_var (lhs
, mask_type
);
7980 /* Handle cmp expr. */
7981 for (j
= 0; j
< ncopies
; j
++)
7983 gassign
*new_stmt
= NULL
;
7988 auto_vec
<tree
, 2> ops
;
7989 auto_vec
<vec
<tree
>, 2> vec_defs
;
7991 ops
.safe_push (rhs1
);
7992 ops
.safe_push (rhs2
);
7993 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7994 vec_oprnds1
= vec_defs
.pop ();
7995 vec_oprnds0
= vec_defs
.pop ();
7999 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
8000 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
8005 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
8006 vec_oprnds0
.pop ());
8007 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
8008 vec_oprnds1
.pop ());
8013 vec_oprnds0
.quick_push (vec_rhs1
);
8014 vec_oprnds1
.quick_push (vec_rhs2
);
8017 /* Arguments are ready. Create the new vector stmt. */
8018 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
8020 vec_rhs2
= vec_oprnds1
[i
];
8022 new_temp
= make_ssa_name (mask
);
8023 if (bitop1
== NOP_EXPR
)
8025 new_stmt
= gimple_build_assign (new_temp
, code
,
8026 vec_rhs1
, vec_rhs2
);
8027 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8031 if (bitop1
== BIT_NOT_EXPR
)
8032 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
8034 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
8036 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8037 if (bitop2
!= NOP_EXPR
)
8039 tree res
= make_ssa_name (mask
);
8040 if (bitop2
== BIT_NOT_EXPR
)
8041 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
8043 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
8045 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8049 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8056 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8058 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8060 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8063 vec_oprnds0
.release ();
8064 vec_oprnds1
.release ();
8069 /* Make sure the statement is vectorizable. */
8072 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
)
8074 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8075 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8076 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
8078 tree scalar_type
, vectype
;
8079 gimple
*pattern_stmt
;
8080 gimple_seq pattern_def_seq
;
8082 if (dump_enabled_p ())
8084 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
8085 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8088 if (gimple_has_volatile_ops (stmt
))
8090 if (dump_enabled_p ())
8091 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8092 "not vectorized: stmt has volatile operands\n");
8097 /* Skip stmts that do not need to be vectorized. In loops this is expected
8099 - the COND_EXPR which is the loop exit condition
8100 - any LABEL_EXPRs in the loop
8101 - computations that are used only for array indexing or loop control.
8102 In basic blocks we only analyze statements that are a part of some SLP
8103 instance, therefore, all the statements are relevant.
8105 Pattern statement needs to be analyzed instead of the original statement
8106 if the original statement is not relevant. Otherwise, we analyze both
8107 statements. In basic blocks we are called from some SLP instance
8108 traversal, don't analyze pattern stmts instead, the pattern stmts
8109 already will be part of SLP instance. */
8111 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
8112 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
8113 && !STMT_VINFO_LIVE_P (stmt_info
))
8115 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8117 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8118 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8120 /* Analyze PATTERN_STMT instead of the original stmt. */
8121 stmt
= pattern_stmt
;
8122 stmt_info
= vinfo_for_stmt (pattern_stmt
);
8123 if (dump_enabled_p ())
8125 dump_printf_loc (MSG_NOTE
, vect_location
,
8126 "==> examining pattern statement: ");
8127 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8132 if (dump_enabled_p ())
8133 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
8138 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8141 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8142 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8144 /* Analyze PATTERN_STMT too. */
8145 if (dump_enabled_p ())
8147 dump_printf_loc (MSG_NOTE
, vect_location
,
8148 "==> examining pattern statement: ");
8149 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8152 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
8156 if (is_pattern_stmt_p (stmt_info
)
8158 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
8160 gimple_stmt_iterator si
;
8162 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
8164 gimple
*pattern_def_stmt
= gsi_stmt (si
);
8165 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
8166 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
8168 /* Analyze def stmt of STMT if it's a pattern stmt. */
8169 if (dump_enabled_p ())
8171 dump_printf_loc (MSG_NOTE
, vect_location
,
8172 "==> examining pattern def statement: ");
8173 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
8176 if (!vect_analyze_stmt (pattern_def_stmt
,
8177 need_to_vectorize
, node
))
8183 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8185 case vect_internal_def
:
8188 case vect_reduction_def
:
8189 case vect_nested_cycle
:
8190 gcc_assert (!bb_vinfo
8191 && (relevance
== vect_used_in_outer
8192 || relevance
== vect_used_in_outer_by_reduction
8193 || relevance
== vect_used_by_reduction
8194 || relevance
== vect_unused_in_scope
8195 || relevance
== vect_used_only_live
));
8198 case vect_induction_def
:
8199 case vect_constant_def
:
8200 case vect_external_def
:
8201 case vect_unknown_def_type
:
8208 gcc_assert (PURE_SLP_STMT (stmt_info
));
8210 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
8211 if (dump_enabled_p ())
8213 dump_printf_loc (MSG_NOTE
, vect_location
,
8214 "get vectype for scalar type: ");
8215 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
8216 dump_printf (MSG_NOTE
, "\n");
8219 vectype
= get_vectype_for_scalar_type (scalar_type
);
8222 if (dump_enabled_p ())
8224 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8225 "not SLPed: unsupported data-type ");
8226 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
8228 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
8233 if (dump_enabled_p ())
8235 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
8236 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
8237 dump_printf (MSG_NOTE
, "\n");
8240 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
8243 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8245 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8246 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8247 || (is_gimple_call (stmt
)
8248 && gimple_call_lhs (stmt
) == NULL_TREE
));
8249 *need_to_vectorize
= true;
8252 if (PURE_SLP_STMT (stmt_info
) && !node
)
8254 dump_printf_loc (MSG_NOTE
, vect_location
,
8255 "handled only by SLP analysis\n");
8261 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8262 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8263 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8264 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8265 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8266 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8267 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8268 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8269 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8270 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8271 || vectorizable_reduction (stmt
, NULL
, NULL
, node
)
8272 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8273 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8277 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8278 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8279 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8280 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8281 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8282 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8283 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8284 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8285 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8286 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8291 if (dump_enabled_p ())
8293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8294 "not vectorized: relevant stmt not ");
8295 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8296 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8305 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8306 need extra handling, except for vectorizable reductions. */
8307 if (STMT_VINFO_LIVE_P (stmt_info
)
8308 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8309 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
, -1, NULL
);
8313 if (dump_enabled_p ())
8315 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8316 "not vectorized: live stmt not ");
8317 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8318 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8328 /* Function vect_transform_stmt.
8330 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8333 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8334 bool *grouped_store
, slp_tree slp_node
,
8335 slp_instance slp_node_instance
)
8337 bool is_store
= false;
8338 gimple
*vec_stmt
= NULL
;
8339 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8342 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
8343 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8345 switch (STMT_VINFO_TYPE (stmt_info
))
8347 case type_demotion_vec_info_type
:
8348 case type_promotion_vec_info_type
:
8349 case type_conversion_vec_info_type
:
8350 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
8354 case induc_vec_info_type
:
8355 gcc_assert (!slp_node
);
8356 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
8360 case shift_vec_info_type
:
8361 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
8365 case op_vec_info_type
:
8366 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
8370 case assignment_vec_info_type
:
8371 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
8375 case load_vec_info_type
:
8376 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
8381 case store_vec_info_type
:
8382 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
8384 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
8386 /* In case of interleaving, the whole chain is vectorized when the
8387 last store in the chain is reached. Store stmts before the last
8388 one are skipped, and there vec_stmt_info shouldn't be freed
8390 *grouped_store
= true;
8391 if (STMT_VINFO_VEC_STMT (stmt_info
))
8398 case condition_vec_info_type
:
8399 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
8403 case comparison_vec_info_type
:
8404 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
8408 case call_vec_info_type
:
8409 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8410 stmt
= gsi_stmt (*gsi
);
8411 if (is_gimple_call (stmt
)
8412 && gimple_call_internal_p (stmt
)
8413 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
8417 case call_simd_clone_vec_info_type
:
8418 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8419 stmt
= gsi_stmt (*gsi
);
8422 case reduc_vec_info_type
:
8423 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
8428 if (!STMT_VINFO_LIVE_P (stmt_info
))
8430 if (dump_enabled_p ())
8431 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8432 "stmt not supported.\n");
8437 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8438 This would break hybrid SLP vectorization. */
8440 gcc_assert (!vec_stmt
8441 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
8443 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8444 is being vectorized, but outside the immediately enclosing loop. */
8446 && STMT_VINFO_LOOP_VINFO (stmt_info
)
8447 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8448 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
8449 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8450 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
8451 || STMT_VINFO_RELEVANT (stmt_info
) ==
8452 vect_used_in_outer_by_reduction
))
8454 struct loop
*innerloop
= LOOP_VINFO_LOOP (
8455 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
8456 imm_use_iterator imm_iter
;
8457 use_operand_p use_p
;
8461 if (dump_enabled_p ())
8462 dump_printf_loc (MSG_NOTE
, vect_location
,
8463 "Record the vdef for outer-loop vectorization.\n");
8465 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8466 (to be used when vectorizing outer-loop stmts that use the DEF of
8468 if (gimple_code (stmt
) == GIMPLE_PHI
)
8469 scalar_dest
= PHI_RESULT (stmt
);
8471 scalar_dest
= gimple_assign_lhs (stmt
);
8473 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
8475 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
8477 exit_phi
= USE_STMT (use_p
);
8478 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
8483 /* Handle stmts whose DEF is used outside the loop-nest that is
8484 being vectorized. */
8489 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt
)
8491 stmt_vec_info slp_stmt_info
= vinfo_for_stmt (slp_stmt
);
8492 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
8493 && STMT_VINFO_TYPE (slp_stmt_info
) != reduc_vec_info_type
)
8495 done
= vectorizable_live_operation (slp_stmt
, gsi
, slp_node
, i
,
8501 else if (STMT_VINFO_LIVE_P (stmt_info
)
8502 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8504 done
= vectorizable_live_operation (stmt
, gsi
, slp_node
, -1, &vec_stmt
);
8509 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
8515 /* Remove a group of stores (for SLP or interleaving), free their
8519 vect_remove_stores (gimple
*first_stmt
)
8521 gimple
*next
= first_stmt
;
8523 gimple_stmt_iterator next_si
;
8527 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
8529 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
8530 if (is_pattern_stmt_p (stmt_info
))
8531 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
8532 /* Free the attached stmt_vec_info and remove the stmt. */
8533 next_si
= gsi_for_stmt (next
);
8534 unlink_stmt_vdef (next
);
8535 gsi_remove (&next_si
, true);
8536 release_defs (next
);
8537 free_stmt_vec_info (next
);
8543 /* Function new_stmt_vec_info.
8545 Create and initialize a new stmt_vec_info struct for STMT. */
8548 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
8551 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
8553 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
8554 STMT_VINFO_STMT (res
) = stmt
;
8556 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
8557 STMT_VINFO_LIVE_P (res
) = false;
8558 STMT_VINFO_VECTYPE (res
) = NULL
;
8559 STMT_VINFO_VEC_STMT (res
) = NULL
;
8560 STMT_VINFO_VECTORIZABLE (res
) = true;
8561 STMT_VINFO_IN_PATTERN_P (res
) = false;
8562 STMT_VINFO_RELATED_STMT (res
) = NULL
;
8563 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
8564 STMT_VINFO_DATA_REF (res
) = NULL
;
8565 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
8567 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
8568 STMT_VINFO_DR_OFFSET (res
) = NULL
;
8569 STMT_VINFO_DR_INIT (res
) = NULL
;
8570 STMT_VINFO_DR_STEP (res
) = NULL
;
8571 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
8573 if (gimple_code (stmt
) == GIMPLE_PHI
8574 && is_loop_header_bb_p (gimple_bb (stmt
)))
8575 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
8577 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
8579 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
8580 STMT_SLP_TYPE (res
) = loop_vect
;
8581 STMT_VINFO_NUM_SLP_USES (res
) = 0;
8583 GROUP_FIRST_ELEMENT (res
) = NULL
;
8584 GROUP_NEXT_ELEMENT (res
) = NULL
;
8585 GROUP_SIZE (res
) = 0;
8586 GROUP_STORE_COUNT (res
) = 0;
8587 GROUP_GAP (res
) = 0;
8588 GROUP_SAME_DR_STMT (res
) = NULL
;
8594 /* Create a hash table for stmt_vec_info. */
8597 init_stmt_vec_info_vec (void)
8599 gcc_assert (!stmt_vec_info_vec
.exists ());
8600 stmt_vec_info_vec
.create (50);
8604 /* Free hash table for stmt_vec_info. */
8607 free_stmt_vec_info_vec (void)
8611 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
8613 free_stmt_vec_info (STMT_VINFO_STMT (info
));
8614 gcc_assert (stmt_vec_info_vec
.exists ());
8615 stmt_vec_info_vec
.release ();
8619 /* Free stmt vectorization related info. */
8622 free_stmt_vec_info (gimple
*stmt
)
8624 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8629 /* Check if this statement has a related "pattern stmt"
8630 (introduced by the vectorizer during the pattern recognition
8631 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8633 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
8635 stmt_vec_info patt_info
8636 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8639 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
8640 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
8641 gimple_set_bb (patt_stmt
, NULL
);
8642 tree lhs
= gimple_get_lhs (patt_stmt
);
8643 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8644 release_ssa_name (lhs
);
8647 gimple_stmt_iterator si
;
8648 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
8650 gimple
*seq_stmt
= gsi_stmt (si
);
8651 gimple_set_bb (seq_stmt
, NULL
);
8652 lhs
= gimple_get_lhs (seq_stmt
);
8653 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8654 release_ssa_name (lhs
);
8655 free_stmt_vec_info (seq_stmt
);
8658 free_stmt_vec_info (patt_stmt
);
8662 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
8663 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
8664 set_vinfo_for_stmt (stmt
, NULL
);
8669 /* Function get_vectype_for_scalar_type_and_size.
8671 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8675 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
8677 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
8678 machine_mode simd_mode
;
8679 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
8686 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
8687 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
8690 /* For vector types of elements whose mode precision doesn't
8691 match their types precision we use a element type of mode
8692 precision. The vectorization routines will have to make sure
8693 they support the proper result truncation/extension.
8694 We also make sure to build vector types with INTEGER_TYPE
8695 component type only. */
8696 if (INTEGRAL_TYPE_P (scalar_type
)
8697 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
8698 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
8699 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
8700 TYPE_UNSIGNED (scalar_type
));
8702 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8703 When the component mode passes the above test simply use a type
8704 corresponding to that mode. The theory is that any use that
8705 would cause problems with this will disable vectorization anyway. */
8706 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
8707 && !INTEGRAL_TYPE_P (scalar_type
))
8708 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
8710 /* We can't build a vector type of elements with alignment bigger than
8712 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
8713 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
8714 TYPE_UNSIGNED (scalar_type
));
8716 /* If we felt back to using the mode fail if there was
8717 no scalar type for it. */
8718 if (scalar_type
== NULL_TREE
)
8721 /* If no size was supplied use the mode the target prefers. Otherwise
8722 lookup a vector mode of the specified size. */
8724 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
8726 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
8727 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
8731 vectype
= build_vector_type (scalar_type
, nunits
);
8733 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
8734 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
8740 unsigned int current_vector_size
;
8742 /* Function get_vectype_for_scalar_type.
8744 Returns the vector type corresponding to SCALAR_TYPE as supported
8748 get_vectype_for_scalar_type (tree scalar_type
)
8751 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
8752 current_vector_size
);
8754 && current_vector_size
== 0)
8755 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
8759 /* Function get_mask_type_for_scalar_type.
8761 Returns the mask type corresponding to a result of comparison
8762 of vectors of specified SCALAR_TYPE as supported by target. */
8765 get_mask_type_for_scalar_type (tree scalar_type
)
8767 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
8772 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
8773 current_vector_size
);
8776 /* Function get_same_sized_vectype
8778 Returns a vector type corresponding to SCALAR_TYPE of size
8779 VECTOR_TYPE if supported by the target. */
8782 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
8784 if (TREE_CODE (scalar_type
) == BOOLEAN_TYPE
)
8785 return build_same_sized_truth_vector_type (vector_type
);
8787 return get_vectype_for_scalar_type_and_size
8788 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
8791 /* Function vect_is_simple_use.
8794 VINFO - the vect info of the loop or basic block that is being vectorized.
8795 OPERAND - operand in the loop or bb.
8797 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8798 DT - the type of definition
8800 Returns whether a stmt with OPERAND can be vectorized.
8801 For loops, supportable operands are constants, loop invariants, and operands
8802 that are defined by the current iteration of the loop. Unsupportable
8803 operands are those that are defined by a previous iteration of the loop (as
8804 is the case in reduction/induction computations).
8805 For basic blocks, supportable operands are constants and bb invariants.
8806 For now, operands defined outside the basic block are not supported. */
8809 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
8810 gimple
**def_stmt
, enum vect_def_type
*dt
)
8813 *dt
= vect_unknown_def_type
;
8815 if (dump_enabled_p ())
8817 dump_printf_loc (MSG_NOTE
, vect_location
,
8818 "vect_is_simple_use: operand ");
8819 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
8820 dump_printf (MSG_NOTE
, "\n");
8823 if (CONSTANT_CLASS_P (operand
))
8825 *dt
= vect_constant_def
;
8829 if (is_gimple_min_invariant (operand
))
8831 *dt
= vect_external_def
;
8835 if (TREE_CODE (operand
) != SSA_NAME
)
8837 if (dump_enabled_p ())
8838 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8843 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
8845 *dt
= vect_external_def
;
8849 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
8850 if (dump_enabled_p ())
8852 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
8853 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
8856 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
8857 *dt
= vect_external_def
;
8860 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
8861 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
8864 if (dump_enabled_p ())
8866 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
8869 case vect_uninitialized_def
:
8870 dump_printf (MSG_NOTE
, "uninitialized\n");
8872 case vect_constant_def
:
8873 dump_printf (MSG_NOTE
, "constant\n");
8875 case vect_external_def
:
8876 dump_printf (MSG_NOTE
, "external\n");
8878 case vect_internal_def
:
8879 dump_printf (MSG_NOTE
, "internal\n");
8881 case vect_induction_def
:
8882 dump_printf (MSG_NOTE
, "induction\n");
8884 case vect_reduction_def
:
8885 dump_printf (MSG_NOTE
, "reduction\n");
8887 case vect_double_reduction_def
:
8888 dump_printf (MSG_NOTE
, "double reduction\n");
8890 case vect_nested_cycle
:
8891 dump_printf (MSG_NOTE
, "nested cycle\n");
8893 case vect_unknown_def_type
:
8894 dump_printf (MSG_NOTE
, "unknown\n");
8899 if (*dt
== vect_unknown_def_type
)
8901 if (dump_enabled_p ())
8902 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8903 "Unsupported pattern.\n");
8907 switch (gimple_code (*def_stmt
))
8914 if (dump_enabled_p ())
8915 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8916 "unsupported defining stmt:\n");
8923 /* Function vect_is_simple_use.
8925 Same as vect_is_simple_use but also determines the vector operand
8926 type of OPERAND and stores it to *VECTYPE. If the definition of
8927 OPERAND is vect_uninitialized_def, vect_constant_def or
8928 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8929 is responsible to compute the best suited vector type for the
8933 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
8934 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
8936 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
8939 /* Now get a vector type if the def is internal, otherwise supply
8940 NULL_TREE and leave it up to the caller to figure out a proper
8941 type for the use stmt. */
8942 if (*dt
== vect_internal_def
8943 || *dt
== vect_induction_def
8944 || *dt
== vect_reduction_def
8945 || *dt
== vect_double_reduction_def
8946 || *dt
== vect_nested_cycle
)
8948 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
8950 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8951 && !STMT_VINFO_RELEVANT (stmt_info
)
8952 && !STMT_VINFO_LIVE_P (stmt_info
))
8953 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8955 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8956 gcc_assert (*vectype
!= NULL_TREE
);
8958 else if (*dt
== vect_uninitialized_def
8959 || *dt
== vect_constant_def
8960 || *dt
== vect_external_def
)
8961 *vectype
= NULL_TREE
;
8969 /* Function supportable_widening_operation
8971 Check whether an operation represented by the code CODE is a
8972 widening operation that is supported by the target platform in
8973 vector form (i.e., when operating on arguments of type VECTYPE_IN
8974 producing a result of type VECTYPE_OUT).
8976 Widening operations we currently support are NOP (CONVERT), FLOAT
8977 and WIDEN_MULT. This function checks if these operations are supported
8978 by the target platform either directly (via vector tree-codes), or via
8982 - CODE1 and CODE2 are codes of vector operations to be used when
8983 vectorizing the operation, if available.
8984 - MULTI_STEP_CVT determines the number of required intermediate steps in
8985 case of multi-step conversion (like char->short->int - in that case
8986 MULTI_STEP_CVT will be 1).
8987 - INTERM_TYPES contains the intermediate type required to perform the
8988 widening operation (short in the above example). */
8991 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
8992 tree vectype_out
, tree vectype_in
,
8993 enum tree_code
*code1
, enum tree_code
*code2
,
8994 int *multi_step_cvt
,
8995 vec
<tree
> *interm_types
)
8997 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8998 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8999 struct loop
*vect_loop
= NULL
;
9000 machine_mode vec_mode
;
9001 enum insn_code icode1
, icode2
;
9002 optab optab1
, optab2
;
9003 tree vectype
= vectype_in
;
9004 tree wide_vectype
= vectype_out
;
9005 enum tree_code c1
, c2
;
9007 tree prev_type
, intermediate_type
;
9008 machine_mode intermediate_mode
, prev_mode
;
9009 optab optab3
, optab4
;
9011 *multi_step_cvt
= 0;
9013 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
9017 case WIDEN_MULT_EXPR
:
9018 /* The result of a vectorized widening operation usually requires
9019 two vectors (because the widened results do not fit into one vector).
9020 The generated vector results would normally be expected to be
9021 generated in the same order as in the original scalar computation,
9022 i.e. if 8 results are generated in each vector iteration, they are
9023 to be organized as follows:
9024 vect1: [res1,res2,res3,res4],
9025 vect2: [res5,res6,res7,res8].
9027 However, in the special case that the result of the widening
9028 operation is used in a reduction computation only, the order doesn't
9029 matter (because when vectorizing a reduction we change the order of
9030 the computation). Some targets can take advantage of this and
9031 generate more efficient code. For example, targets like Altivec,
9032 that support widen_mult using a sequence of {mult_even,mult_odd}
9033 generate the following vectors:
9034 vect1: [res1,res3,res5,res7],
9035 vect2: [res2,res4,res6,res8].
9037 When vectorizing outer-loops, we execute the inner-loop sequentially
9038 (each vectorized inner-loop iteration contributes to VF outer-loop
9039 iterations in parallel). We therefore don't allow to change the
9040 order of the computation in the inner-loop during outer-loop
9042 /* TODO: Another case in which order doesn't *really* matter is when we
9043 widen and then contract again, e.g. (short)((int)x * y >> 8).
9044 Normally, pack_trunc performs an even/odd permute, whereas the
9045 repack from an even/odd expansion would be an interleave, which
9046 would be significantly simpler for e.g. AVX2. */
9047 /* In any case, in order to avoid duplicating the code below, recurse
9048 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9049 are properly set up for the caller. If we fail, we'll continue with
9050 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9052 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
9053 && !nested_in_vect_loop_p (vect_loop
, stmt
)
9054 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
9055 stmt
, vectype_out
, vectype_in
,
9056 code1
, code2
, multi_step_cvt
,
9059 /* Elements in a vector with vect_used_by_reduction property cannot
9060 be reordered if the use chain with this property does not have the
9061 same operation. One such an example is s += a * b, where elements
9062 in a and b cannot be reordered. Here we check if the vector defined
9063 by STMT is only directly used in the reduction statement. */
9064 tree lhs
= gimple_assign_lhs (stmt
);
9065 use_operand_p dummy
;
9067 stmt_vec_info use_stmt_info
= NULL
;
9068 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
9069 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
9070 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
9073 c1
= VEC_WIDEN_MULT_LO_EXPR
;
9074 c2
= VEC_WIDEN_MULT_HI_EXPR
;
9087 case VEC_WIDEN_MULT_EVEN_EXPR
:
9088 /* Support the recursion induced just above. */
9089 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
9090 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
9093 case WIDEN_LSHIFT_EXPR
:
9094 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
9095 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
9099 c1
= VEC_UNPACK_LO_EXPR
;
9100 c2
= VEC_UNPACK_HI_EXPR
;
9104 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
9105 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
9108 case FIX_TRUNC_EXPR
:
9109 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9110 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9111 computing the operation. */
9118 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
9121 if (code
== FIX_TRUNC_EXPR
)
9123 /* The signedness is determined from output operand. */
9124 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9125 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
9129 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9130 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
9133 if (!optab1
|| !optab2
)
9136 vec_mode
= TYPE_MODE (vectype
);
9137 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
9138 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
9144 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9145 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9146 /* For scalar masks we may have different boolean
9147 vector types having the same QImode. Thus we
9148 add additional check for elements number. */
9149 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9150 || (TYPE_VECTOR_SUBPARTS (vectype
) / 2
9151 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9153 /* Check if it's a multi-step conversion that can be done using intermediate
9156 prev_type
= vectype
;
9157 prev_mode
= vec_mode
;
9159 if (!CONVERT_EXPR_CODE_P (code
))
9162 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9163 intermediate steps in promotion sequence. We try
9164 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9166 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9167 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9169 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9170 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9173 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) / 2,
9174 current_vector_size
);
9175 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9180 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
9181 TYPE_UNSIGNED (prev_type
));
9183 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9184 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
9186 if (!optab3
|| !optab4
9187 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
9188 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9189 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
9190 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
9191 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
9192 == CODE_FOR_nothing
)
9193 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
9194 == CODE_FOR_nothing
))
9197 interm_types
->quick_push (intermediate_type
);
9198 (*multi_step_cvt
)++;
9200 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9201 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9202 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9203 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) / 2
9204 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9206 prev_type
= intermediate_type
;
9207 prev_mode
= intermediate_mode
;
9210 interm_types
->release ();
9215 /* Function supportable_narrowing_operation
9217 Check whether an operation represented by the code CODE is a
9218 narrowing operation that is supported by the target platform in
9219 vector form (i.e., when operating on arguments of type VECTYPE_IN
9220 and producing a result of type VECTYPE_OUT).
9222 Narrowing operations we currently support are NOP (CONVERT) and
9223 FIX_TRUNC. This function checks if these operations are supported by
9224 the target platform directly via vector tree-codes.
9227 - CODE1 is the code of a vector operation to be used when
9228 vectorizing the operation, if available.
9229 - MULTI_STEP_CVT determines the number of required intermediate steps in
9230 case of multi-step conversion (like int->short->char - in that case
9231 MULTI_STEP_CVT will be 1).
9232 - INTERM_TYPES contains the intermediate type required to perform the
9233 narrowing operation (short in the above example). */
9236 supportable_narrowing_operation (enum tree_code code
,
9237 tree vectype_out
, tree vectype_in
,
9238 enum tree_code
*code1
, int *multi_step_cvt
,
9239 vec
<tree
> *interm_types
)
9241 machine_mode vec_mode
;
9242 enum insn_code icode1
;
9243 optab optab1
, interm_optab
;
9244 tree vectype
= vectype_in
;
9245 tree narrow_vectype
= vectype_out
;
9247 tree intermediate_type
, prev_type
;
9248 machine_mode intermediate_mode
, prev_mode
;
9252 *multi_step_cvt
= 0;
9256 c1
= VEC_PACK_TRUNC_EXPR
;
9259 case FIX_TRUNC_EXPR
:
9260 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9264 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9265 tree code and optabs used for computing the operation. */
9272 if (code
== FIX_TRUNC_EXPR
)
9273 /* The signedness is determined from output operand. */
9274 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9276 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9281 vec_mode
= TYPE_MODE (vectype
);
9282 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9287 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9288 /* For scalar masks we may have different boolean
9289 vector types having the same QImode. Thus we
9290 add additional check for elements number. */
9291 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9292 || (TYPE_VECTOR_SUBPARTS (vectype
) * 2
9293 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9295 /* Check if it's a multi-step conversion that can be done using intermediate
9297 prev_mode
= vec_mode
;
9298 prev_type
= vectype
;
9299 if (code
== FIX_TRUNC_EXPR
)
9300 uns
= TYPE_UNSIGNED (vectype_out
);
9302 uns
= TYPE_UNSIGNED (vectype
);
9304 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9305 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9306 costly than signed. */
9307 if (code
== FIX_TRUNC_EXPR
&& uns
)
9309 enum insn_code icode2
;
9312 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9314 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9315 if (interm_optab
!= unknown_optab
9316 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9317 && insn_data
[icode1
].operand
[0].mode
9318 == insn_data
[icode2
].operand
[0].mode
)
9321 optab1
= interm_optab
;
9326 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9327 intermediate steps in promotion sequence. We try
9328 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9329 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9330 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9332 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9333 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9336 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) * 2,
9337 current_vector_size
);
9338 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9343 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
9345 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
9348 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
9349 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9350 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
9351 == CODE_FOR_nothing
))
9354 interm_types
->quick_push (intermediate_type
);
9355 (*multi_step_cvt
)++;
9357 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9358 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9359 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2
9360 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9362 prev_mode
= intermediate_mode
;
9363 prev_type
= intermediate_type
;
9364 optab1
= interm_optab
;
9367 interm_types
->release ();